]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
tree-ssa-loop-ivopts.c (get_computation_cost_at): Check inv expression pointer, not...
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
36
37 2) Candidates for the induction variables are found. This includes
38
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
42
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
46
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
57
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
60
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
63
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
68
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "tm_p.h"
79 #include "ssa.h"
80 #include "expmed.h"
81 #include "insn-config.h"
82 #include "emit-rtl.h"
83 #include "recog.h"
84 #include "cgraph.h"
85 #include "gimple-pretty-print.h"
86 #include "alias.h"
87 #include "fold-const.h"
88 #include "stor-layout.h"
89 #include "tree-eh.h"
90 #include "gimplify.h"
91 #include "gimple-iterator.h"
92 #include "gimplify-me.h"
93 #include "tree-cfg.h"
94 #include "tree-ssa-loop-ivopts.h"
95 #include "tree-ssa-loop-manip.h"
96 #include "tree-ssa-loop-niter.h"
97 #include "tree-ssa-loop.h"
98 #include "explow.h"
99 #include "expr.h"
100 #include "tree-dfa.h"
101 #include "tree-ssa.h"
102 #include "cfgloop.h"
103 #include "tree-scalar-evolution.h"
104 #include "params.h"
105 #include "tree-affine.h"
106 #include "tree-ssa-propagate.h"
107 #include "tree-ssa-address.h"
108 #include "builtins.h"
109 #include "tree-vectorizer.h"
110
111 /* FIXME: Expressions are expanded to RTL in this pass to determine the
112 cost of different addressing modes. This should be moved to a TBD
113 interface between the GIMPLE and RTL worlds. */
114
115 /* The infinite cost. */
116 #define INFTY 10000000
117
118 #define AVG_LOOP_NITER(LOOP) 5
119
120 /* Returns the expected number of loop iterations for LOOP.
121 The average trip count is computed from profile data if it
122 exists. */
123
124 static inline HOST_WIDE_INT
125 avg_loop_niter (struct loop *loop)
126 {
127 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
128 if (niter == -1)
129 {
130 niter = max_stmt_executions_int (loop);
131 if (niter == -1 || niter > AVG_LOOP_NITER (loop))
132 return AVG_LOOP_NITER (loop);
133 }
134
135 return niter;
136 }
137
138 struct iv_use;
139
140 /* Representation of the induction variable. */
141 struct iv
142 {
143 tree base; /* Initial value of the iv. */
144 tree base_object; /* A memory object to that the induction variable points. */
145 tree step; /* Step of the iv (constant only). */
146 tree ssa_name; /* The ssa name with the value. */
147 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
148 bool biv_p; /* Is it a biv? */
149 bool no_overflow; /* True if the iv doesn't overflow. */
150 bool have_address_use;/* For biv, indicate if it's used in any address
151 type use. */
152 };
153
154 /* Per-ssa version information (induction variable descriptions, etc.). */
155 struct version_info
156 {
157 tree name; /* The ssa name. */
158 struct iv *iv; /* Induction variable description. */
159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv; /* For the original biv, whether to preserve it. */
162 unsigned inv_id; /* Id of an invariant. */
163 };
164
165 /* Types of uses. */
166 enum use_type
167 {
168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
169 USE_ADDRESS, /* Use in an address. */
170 USE_COMPARE /* Use is a compare. */
171 };
172
173 /* Cost of a computation. */
174 struct comp_cost
175 {
176 int cost; /* The runtime cost. */
177 unsigned complexity; /* The estimate of the complexity of the code for
178 the computation (in no concrete units --
179 complexity field should be larger for more
180 complex expressions and addressing modes). */
181 int scratch; /* Scratch used during cost computation. */
182 };
183
184 static const comp_cost no_cost = {0, 0, 0};
185 static const comp_cost infinite_cost = {INFTY, INFTY, INFTY};
186
187 struct iv_inv_expr_ent;
188
189 /* The candidate - cost pair. */
190 struct cost_pair
191 {
192 struct iv_cand *cand; /* The candidate. */
193 comp_cost cost; /* The cost. */
194 bitmap depends_on; /* The list of invariants that have to be
195 preserved. */
196 tree value; /* For final value elimination, the expression for
197 the final value of the iv. For iv elimination,
198 the new bound to compare with. */
199 enum tree_code comp; /* For iv elimination, the comparison. */
200 iv_inv_expr_ent *inv_expr; /* Loop invariant expression. */
201 };
202
203 /* Use. */
204 struct iv_use
205 {
206 unsigned id; /* The id of the use. */
207 unsigned group_id; /* The group id the use belongs to. */
208 enum use_type type; /* Type of the use. */
209 struct iv *iv; /* The induction variable it is based on. */
210 gimple *stmt; /* Statement in that it occurs. */
211 tree *op_p; /* The place where it occurs. */
212
213 tree addr_base; /* Base address with const offset stripped. */
214 unsigned HOST_WIDE_INT addr_offset;
215 /* Const offset stripped from base address. */
216 };
217
218 /* Group of uses. */
219 struct iv_group
220 {
221 /* The id of the group. */
222 unsigned id;
223 /* Uses of the group are of the same type. */
224 enum use_type type;
225 /* The set of "related" IV candidates, plus the important ones. */
226 bitmap related_cands;
227 /* Number of IV candidates in the cost_map. */
228 unsigned n_map_members;
229 /* The costs wrto the iv candidates. */
230 struct cost_pair *cost_map;
231 /* The selected candidate for the group. */
232 struct iv_cand *selected;
233 /* Uses in the group. */
234 vec<struct iv_use *> vuses;
235 };
236
237 /* The position where the iv is computed. */
238 enum iv_position
239 {
240 IP_NORMAL, /* At the end, just before the exit condition. */
241 IP_END, /* At the end of the latch block. */
242 IP_BEFORE_USE, /* Immediately before a specific use. */
243 IP_AFTER_USE, /* Immediately after a specific use. */
244 IP_ORIGINAL /* The original biv. */
245 };
246
247 /* The induction variable candidate. */
248 struct iv_cand
249 {
250 unsigned id; /* The number of the candidate. */
251 bool important; /* Whether this is an "important" candidate, i.e. such
252 that it should be considered by all uses. */
253 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
254 gimple *incremented_at;/* For original biv, the statement where it is
255 incremented. */
256 tree var_before; /* The variable used for it before increment. */
257 tree var_after; /* The variable used for it after increment. */
258 struct iv *iv; /* The value of the candidate. NULL for
259 "pseudocandidate" used to indicate the possibility
260 to replace the final value of an iv by direct
261 computation of the value. */
262 unsigned cost; /* Cost of the candidate. */
263 unsigned cost_step; /* Cost of the candidate's increment operation. */
264 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
265 where it is incremented. */
266 bitmap depends_on; /* The list of invariants that are used in step of the
267 biv. */
268 struct iv *orig_iv; /* The original iv if this cand is added from biv with
269 smaller type. */
270 };
271
272 /* Hashtable entry for common candidate derived from iv uses. */
273 struct iv_common_cand
274 {
275 tree base;
276 tree step;
277 /* IV uses from which this common candidate is derived. */
278 auto_vec<struct iv_use *> uses;
279 hashval_t hash;
280 };
281
282 /* Hashtable helpers. */
283
284 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
285 {
286 static inline hashval_t hash (const iv_common_cand *);
287 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
288 };
289
290 /* Hash function for possible common candidates. */
291
292 inline hashval_t
293 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
294 {
295 return ccand->hash;
296 }
297
298 /* Hash table equality function for common candidates. */
299
300 inline bool
301 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
302 const iv_common_cand *ccand2)
303 {
304 return (ccand1->hash == ccand2->hash
305 && operand_equal_p (ccand1->base, ccand2->base, 0)
306 && operand_equal_p (ccand1->step, ccand2->step, 0)
307 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
308 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
309 }
310
311 /* Loop invariant expression hashtable entry. */
312
313 struct iv_inv_expr_ent
314 {
315 /* Tree expression of the entry. */
316 tree expr;
317 /* Unique indentifier. */
318 int id;
319 /* Hash value. */
320 hashval_t hash;
321 };
322
323 /* Sort iv_inv_expr_ent pair A and B by id field. */
324
325 static int
326 sort_iv_inv_expr_ent (const void *a, const void *b)
327 {
328 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
329 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
330
331 unsigned id1 = (*e1)->id;
332 unsigned id2 = (*e2)->id;
333
334 if (id1 < id2)
335 return -1;
336 else if (id1 > id2)
337 return 1;
338 else
339 return 0;
340 }
341
342 /* Hashtable helpers. */
343
344 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
345 {
346 static inline hashval_t hash (const iv_inv_expr_ent *);
347 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
348 };
349
350 /* Hash function for loop invariant expressions. */
351
352 inline hashval_t
353 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
354 {
355 return expr->hash;
356 }
357
358 /* Hash table equality function for expressions. */
359
360 inline bool
361 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
362 const iv_inv_expr_ent *expr2)
363 {
364 return expr1->hash == expr2->hash
365 && operand_equal_p (expr1->expr, expr2->expr, 0);
366 }
367
368 struct ivopts_data
369 {
370 /* The currently optimized loop. */
371 struct loop *current_loop;
372 source_location loop_loc;
373
374 /* Numbers of iterations for all exits of the current loop. */
375 hash_map<edge, tree_niter_desc *> *niters;
376
377 /* Number of registers used in it. */
378 unsigned regs_used;
379
380 /* The size of version_info array allocated. */
381 unsigned version_info_size;
382
383 /* The array of information for the ssa names. */
384 struct version_info *version_info;
385
386 /* The hashtable of loop invariant expressions created
387 by ivopt. */
388 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
389
390 /* Loop invariant expression id. */
391 int max_inv_expr_id;
392
393 /* The bitmap of indices in version_info whose value was changed. */
394 bitmap relevant;
395
396 /* The uses of induction variables. */
397 vec<iv_group *> vgroups;
398
399 /* The candidates. */
400 vec<iv_cand *> vcands;
401
402 /* A bitmap of important candidates. */
403 bitmap important_candidates;
404
405 /* Cache used by tree_to_aff_combination_expand. */
406 hash_map<tree, name_expansion *> *name_expansion_cache;
407
408 /* The hashtable of common candidates derived from iv uses. */
409 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
410
411 /* The common candidates. */
412 vec<iv_common_cand *> iv_common_cands;
413
414 /* The maximum invariant id. */
415 unsigned max_inv_id;
416
417 /* Number of no_overflow BIVs which are not used in memory address. */
418 unsigned bivs_not_used_in_addr;
419
420 /* Obstack for iv structure. */
421 struct obstack iv_obstack;
422
423 /* Whether to consider just related and important candidates when replacing a
424 use. */
425 bool consider_all_candidates;
426
427 /* Are we optimizing for speed? */
428 bool speed;
429
430 /* Whether the loop body includes any function calls. */
431 bool body_includes_call;
432
433 /* Whether the loop body can only be exited via single exit. */
434 bool loop_single_exit_p;
435 };
436
437 /* An assignment of iv candidates to uses. */
438
439 struct iv_ca
440 {
441 /* The number of uses covered by the assignment. */
442 unsigned upto;
443
444 /* Number of uses that cannot be expressed by the candidates in the set. */
445 unsigned bad_groups;
446
447 /* Candidate assigned to a use, together with the related costs. */
448 struct cost_pair **cand_for_group;
449
450 /* Number of times each candidate is used. */
451 unsigned *n_cand_uses;
452
453 /* The candidates used. */
454 bitmap cands;
455
456 /* The number of candidates in the set. */
457 unsigned n_cands;
458
459 /* Total number of registers needed. */
460 unsigned n_regs;
461
462 /* Total cost of expressing uses. */
463 comp_cost cand_use_cost;
464
465 /* Total cost of candidates. */
466 unsigned cand_cost;
467
468 /* Number of times each invariant is used. */
469 unsigned *n_invariant_uses;
470
471 /* Hash set with used invariant expression. */
472 hash_map <iv_inv_expr_ent *, unsigned> *used_inv_exprs;
473
474 /* Total cost of the assignment. */
475 comp_cost cost;
476 };
477
478 /* Difference of two iv candidate assignments. */
479
480 struct iv_ca_delta
481 {
482 /* Changed group. */
483 struct iv_group *group;
484
485 /* An old assignment (for rollback purposes). */
486 struct cost_pair *old_cp;
487
488 /* A new assignment. */
489 struct cost_pair *new_cp;
490
491 /* Next change in the list. */
492 struct iv_ca_delta *next;
493 };
494
495 /* Bound on number of candidates below that all candidates are considered. */
496
497 #define CONSIDER_ALL_CANDIDATES_BOUND \
498 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
499
500 /* If there are more iv occurrences, we just give up (it is quite unlikely that
501 optimizing such a loop would help, and it would take ages). */
502
503 #define MAX_CONSIDERED_GROUPS \
504 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
505
506 /* If there are at most this number of ivs in the set, try removing unnecessary
507 ivs from the set always. */
508
509 #define ALWAYS_PRUNE_CAND_SET_BOUND \
510 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
511
512 /* The list of trees for that the decl_rtl field must be reset is stored
513 here. */
514
515 static vec<tree> decl_rtl_to_reset;
516
517 static comp_cost force_expr_to_var_cost (tree, bool);
518
519 /* The single loop exit if it dominates the latch, NULL otherwise. */
520
521 edge
522 single_dom_exit (struct loop *loop)
523 {
524 edge exit = single_exit (loop);
525
526 if (!exit)
527 return NULL;
528
529 if (!just_once_each_iteration_p (loop, exit->src))
530 return NULL;
531
532 return exit;
533 }
534
535 /* Dumps information about the induction variable IV to FILE. Don't dump
536 variable's name if DUMP_NAME is FALSE. The information is dumped with
537 preceding spaces indicated by INDENT_LEVEL. */
538
539 void
540 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
541 {
542 const char *p;
543 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
544
545 if (indent_level > 4)
546 indent_level = 4;
547 p = spaces + 8 - (indent_level << 1);
548
549 fprintf (file, "%sIV struct:\n", p);
550 if (iv->ssa_name && dump_name)
551 {
552 fprintf (file, "%s SSA_NAME:\t", p);
553 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
554 fprintf (file, "\n");
555 }
556
557 fprintf (file, "%s Type:\t", p);
558 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
559 fprintf (file, "\n");
560
561 fprintf (file, "%s Base:\t", p);
562 print_generic_expr (file, iv->base, TDF_SLIM);
563 fprintf (file, "\n");
564
565 fprintf (file, "%s Step:\t", p);
566 print_generic_expr (file, iv->step, TDF_SLIM);
567 fprintf (file, "\n");
568
569 if (iv->base_object)
570 {
571 fprintf (file, "%s Object:\t", p);
572 print_generic_expr (file, iv->base_object, TDF_SLIM);
573 fprintf (file, "\n");
574 }
575
576 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
577
578 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
579 p, iv->no_overflow ? "No-overflow" : "Overflow");
580 }
581
582 /* Dumps information about the USE to FILE. */
583
584 void
585 dump_use (FILE *file, struct iv_use *use)
586 {
587 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
588 fprintf (file, " At stmt:\t");
589 print_gimple_stmt (file, use->stmt, 0, 0);
590 fprintf (file, " At pos:\t");
591 if (use->op_p)
592 print_generic_expr (file, *use->op_p, TDF_SLIM);
593 fprintf (file, "\n");
594 dump_iv (file, use->iv, false, 2);
595 }
596
597 /* Dumps information about the uses to FILE. */
598
599 void
600 dump_groups (FILE *file, struct ivopts_data *data)
601 {
602 unsigned i, j;
603 struct iv_group *group;
604
605 for (i = 0; i < data->vgroups.length (); i++)
606 {
607 group = data->vgroups[i];
608 fprintf (file, "Group %d:\n", group->id);
609 if (group->type == USE_NONLINEAR_EXPR)
610 fprintf (file, " Type:\tGENERIC\n");
611 else if (group->type == USE_ADDRESS)
612 fprintf (file, " Type:\tADDRESS\n");
613 else
614 {
615 gcc_assert (group->type == USE_COMPARE);
616 fprintf (file, " Type:\tCOMPARE\n");
617 }
618 for (j = 0; j < group->vuses.length (); j++)
619 dump_use (file, group->vuses[j]);
620 }
621 }
622
623 /* Dumps information about induction variable candidate CAND to FILE. */
624
625 void
626 dump_cand (FILE *file, struct iv_cand *cand)
627 {
628 struct iv *iv = cand->iv;
629
630 fprintf (file, "Candidate %d:\n", cand->id);
631 if (cand->depends_on)
632 {
633 fprintf (file, " Depend on: ");
634 dump_bitmap (file, cand->depends_on);
635 }
636
637 if (cand->var_before)
638 {
639 fprintf (file, " Var befor: ");
640 print_generic_expr (file, cand->var_before, TDF_SLIM);
641 fprintf (file, "\n");
642 }
643 if (cand->var_after)
644 {
645 fprintf (file, " Var after: ");
646 print_generic_expr (file, cand->var_after, TDF_SLIM);
647 fprintf (file, "\n");
648 }
649
650 switch (cand->pos)
651 {
652 case IP_NORMAL:
653 fprintf (file, " Incr POS: before exit test\n");
654 break;
655
656 case IP_BEFORE_USE:
657 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
658 break;
659
660 case IP_AFTER_USE:
661 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
662 break;
663
664 case IP_END:
665 fprintf (file, " Incr POS: at end\n");
666 break;
667
668 case IP_ORIGINAL:
669 fprintf (file, " Incr POS: orig biv\n");
670 break;
671 }
672
673 dump_iv (file, iv, false, 1);
674 }
675
676 /* Returns the info for ssa version VER. */
677
678 static inline struct version_info *
679 ver_info (struct ivopts_data *data, unsigned ver)
680 {
681 return data->version_info + ver;
682 }
683
684 /* Returns the info for ssa name NAME. */
685
686 static inline struct version_info *
687 name_info (struct ivopts_data *data, tree name)
688 {
689 return ver_info (data, SSA_NAME_VERSION (name));
690 }
691
692 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
693 emitted in LOOP. */
694
695 static bool
696 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
697 {
698 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
699
700 gcc_assert (bb);
701
702 if (sbb == loop->latch)
703 return true;
704
705 if (sbb != bb)
706 return false;
707
708 return stmt == last_stmt (bb);
709 }
710
711 /* Returns true if STMT if after the place where the original induction
712 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
713 if the positions are identical. */
714
715 static bool
716 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
717 {
718 basic_block cand_bb = gimple_bb (cand->incremented_at);
719 basic_block stmt_bb = gimple_bb (stmt);
720
721 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
722 return false;
723
724 if (stmt_bb != cand_bb)
725 return true;
726
727 if (true_if_equal
728 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
729 return true;
730 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
731 }
732
733 /* Returns true if STMT if after the place where the induction variable
734 CAND is incremented in LOOP. */
735
736 static bool
737 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
738 {
739 switch (cand->pos)
740 {
741 case IP_END:
742 return false;
743
744 case IP_NORMAL:
745 return stmt_after_ip_normal_pos (loop, stmt);
746
747 case IP_ORIGINAL:
748 case IP_AFTER_USE:
749 return stmt_after_inc_pos (cand, stmt, false);
750
751 case IP_BEFORE_USE:
752 return stmt_after_inc_pos (cand, stmt, true);
753
754 default:
755 gcc_unreachable ();
756 }
757 }
758
759 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
760
761 static bool
762 abnormal_ssa_name_p (tree exp)
763 {
764 if (!exp)
765 return false;
766
767 if (TREE_CODE (exp) != SSA_NAME)
768 return false;
769
770 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
771 }
772
773 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
774 abnormal phi node. Callback for for_each_index. */
775
776 static bool
777 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
778 void *data ATTRIBUTE_UNUSED)
779 {
780 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
781 {
782 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
783 return false;
784 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
785 return false;
786 }
787
788 return !abnormal_ssa_name_p (*index);
789 }
790
791 /* Returns true if EXPR contains a ssa name that occurs in an
792 abnormal phi node. */
793
794 bool
795 contains_abnormal_ssa_name_p (tree expr)
796 {
797 enum tree_code code;
798 enum tree_code_class codeclass;
799
800 if (!expr)
801 return false;
802
803 code = TREE_CODE (expr);
804 codeclass = TREE_CODE_CLASS (code);
805
806 if (code == SSA_NAME)
807 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
808
809 if (code == INTEGER_CST
810 || is_gimple_min_invariant (expr))
811 return false;
812
813 if (code == ADDR_EXPR)
814 return !for_each_index (&TREE_OPERAND (expr, 0),
815 idx_contains_abnormal_ssa_name_p,
816 NULL);
817
818 if (code == COND_EXPR)
819 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
820 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
821 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
822
823 switch (codeclass)
824 {
825 case tcc_binary:
826 case tcc_comparison:
827 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
828 return true;
829
830 /* Fallthru. */
831 case tcc_unary:
832 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
833 return true;
834
835 break;
836
837 default:
838 gcc_unreachable ();
839 }
840
841 return false;
842 }
843
844 /* Returns the structure describing number of iterations determined from
845 EXIT of DATA->current_loop, or NULL if something goes wrong. */
846
847 static struct tree_niter_desc *
848 niter_for_exit (struct ivopts_data *data, edge exit)
849 {
850 struct tree_niter_desc *desc;
851 tree_niter_desc **slot;
852
853 if (!data->niters)
854 {
855 data->niters = new hash_map<edge, tree_niter_desc *>;
856 slot = NULL;
857 }
858 else
859 slot = data->niters->get (exit);
860
861 if (!slot)
862 {
863 /* Try to determine number of iterations. We cannot safely work with ssa
864 names that appear in phi nodes on abnormal edges, so that we do not
865 create overlapping life ranges for them (PR 27283). */
866 desc = XNEW (struct tree_niter_desc);
867 if (!number_of_iterations_exit (data->current_loop,
868 exit, desc, true)
869 || contains_abnormal_ssa_name_p (desc->niter))
870 {
871 XDELETE (desc);
872 desc = NULL;
873 }
874 data->niters->put (exit, desc);
875 }
876 else
877 desc = *slot;
878
879 return desc;
880 }
881
882 /* Returns the structure describing number of iterations determined from
883 single dominating exit of DATA->current_loop, or NULL if something
884 goes wrong. */
885
886 static struct tree_niter_desc *
887 niter_for_single_dom_exit (struct ivopts_data *data)
888 {
889 edge exit = single_dom_exit (data->current_loop);
890
891 if (!exit)
892 return NULL;
893
894 return niter_for_exit (data, exit);
895 }
896
897 /* Initializes data structures used by the iv optimization pass, stored
898 in DATA. */
899
900 static void
901 tree_ssa_iv_optimize_init (struct ivopts_data *data)
902 {
903 data->version_info_size = 2 * num_ssa_names;
904 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
905 data->relevant = BITMAP_ALLOC (NULL);
906 data->important_candidates = BITMAP_ALLOC (NULL);
907 data->max_inv_id = 0;
908 data->niters = NULL;
909 data->vgroups.create (20);
910 data->vcands.create (20);
911 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
912 data->max_inv_expr_id = 0;
913 data->name_expansion_cache = NULL;
914 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
915 data->iv_common_cands.create (20);
916 decl_rtl_to_reset.create (20);
917 gcc_obstack_init (&data->iv_obstack);
918 }
919
920 /* Returns a memory object to that EXPR points. In case we are able to
921 determine that it does not point to any such object, NULL is returned. */
922
923 static tree
924 determine_base_object (tree expr)
925 {
926 enum tree_code code = TREE_CODE (expr);
927 tree base, obj;
928
929 /* If this is a pointer casted to any type, we need to determine
930 the base object for the pointer; so handle conversions before
931 throwing away non-pointer expressions. */
932 if (CONVERT_EXPR_P (expr))
933 return determine_base_object (TREE_OPERAND (expr, 0));
934
935 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
936 return NULL_TREE;
937
938 switch (code)
939 {
940 case INTEGER_CST:
941 return NULL_TREE;
942
943 case ADDR_EXPR:
944 obj = TREE_OPERAND (expr, 0);
945 base = get_base_address (obj);
946
947 if (!base)
948 return expr;
949
950 if (TREE_CODE (base) == MEM_REF)
951 return determine_base_object (TREE_OPERAND (base, 0));
952
953 return fold_convert (ptr_type_node,
954 build_fold_addr_expr (base));
955
956 case POINTER_PLUS_EXPR:
957 return determine_base_object (TREE_OPERAND (expr, 0));
958
959 case PLUS_EXPR:
960 case MINUS_EXPR:
961 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
962 gcc_unreachable ();
963
964 default:
965 return fold_convert (ptr_type_node, expr);
966 }
967 }
968
969 /* Return true if address expression with non-DECL_P operand appears
970 in EXPR. */
971
972 static bool
973 contain_complex_addr_expr (tree expr)
974 {
975 bool res = false;
976
977 STRIP_NOPS (expr);
978 switch (TREE_CODE (expr))
979 {
980 case POINTER_PLUS_EXPR:
981 case PLUS_EXPR:
982 case MINUS_EXPR:
983 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
984 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
985 break;
986
987 case ADDR_EXPR:
988 return (!DECL_P (TREE_OPERAND (expr, 0)));
989
990 default:
991 return false;
992 }
993
994 return res;
995 }
996
997 /* Allocates an induction variable with given initial value BASE and step STEP
998 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
999
1000 static struct iv *
1001 alloc_iv (struct ivopts_data *data, tree base, tree step,
1002 bool no_overflow = false)
1003 {
1004 tree expr = base;
1005 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1006 sizeof (struct iv));
1007 gcc_assert (step != NULL_TREE);
1008
1009 /* Lower address expression in base except ones with DECL_P as operand.
1010 By doing this:
1011 1) More accurate cost can be computed for address expressions;
1012 2) Duplicate candidates won't be created for bases in different
1013 forms, like &a[0] and &a. */
1014 STRIP_NOPS (expr);
1015 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1016 || contain_complex_addr_expr (expr))
1017 {
1018 aff_tree comb;
1019 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1020 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1021 }
1022
1023 iv->base = base;
1024 iv->base_object = determine_base_object (base);
1025 iv->step = step;
1026 iv->biv_p = false;
1027 iv->nonlin_use = NULL;
1028 iv->ssa_name = NULL_TREE;
1029 iv->no_overflow = no_overflow;
1030 iv->have_address_use = false;
1031
1032 return iv;
1033 }
1034
1035 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1036 doesn't overflow. */
1037
1038 static void
1039 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1040 bool no_overflow)
1041 {
1042 struct version_info *info = name_info (data, iv);
1043
1044 gcc_assert (!info->iv);
1045
1046 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1047 info->iv = alloc_iv (data, base, step, no_overflow);
1048 info->iv->ssa_name = iv;
1049 }
1050
1051 /* Finds induction variable declaration for VAR. */
1052
1053 static struct iv *
1054 get_iv (struct ivopts_data *data, tree var)
1055 {
1056 basic_block bb;
1057 tree type = TREE_TYPE (var);
1058
1059 if (!POINTER_TYPE_P (type)
1060 && !INTEGRAL_TYPE_P (type))
1061 return NULL;
1062
1063 if (!name_info (data, var)->iv)
1064 {
1065 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1066
1067 if (!bb
1068 || !flow_bb_inside_loop_p (data->current_loop, bb))
1069 set_iv (data, var, var, build_int_cst (type, 0), true);
1070 }
1071
1072 return name_info (data, var)->iv;
1073 }
1074
1075 /* Return the first non-invariant ssa var found in EXPR. */
1076
1077 static tree
1078 extract_single_var_from_expr (tree expr)
1079 {
1080 int i, n;
1081 tree tmp;
1082 enum tree_code code;
1083
1084 if (!expr || is_gimple_min_invariant (expr))
1085 return NULL;
1086
1087 code = TREE_CODE (expr);
1088 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1089 {
1090 n = TREE_OPERAND_LENGTH (expr);
1091 for (i = 0; i < n; i++)
1092 {
1093 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1094
1095 if (tmp)
1096 return tmp;
1097 }
1098 }
1099 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1100 }
1101
1102 /* Finds basic ivs. */
1103
1104 static bool
1105 find_bivs (struct ivopts_data *data)
1106 {
1107 gphi *phi;
1108 affine_iv iv;
1109 tree step, type, base, stop;
1110 bool found = false;
1111 struct loop *loop = data->current_loop;
1112 gphi_iterator psi;
1113
1114 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1115 {
1116 phi = psi.phi ();
1117
1118 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1119 continue;
1120
1121 if (virtual_operand_p (PHI_RESULT (phi)))
1122 continue;
1123
1124 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1125 continue;
1126
1127 if (integer_zerop (iv.step))
1128 continue;
1129
1130 step = iv.step;
1131 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1132 /* Stop expanding iv base at the first ssa var referred by iv step.
1133 Ideally we should stop at any ssa var, because that's expensive
1134 and unusual to happen, we just do it on the first one.
1135
1136 See PR64705 for the rationale. */
1137 stop = extract_single_var_from_expr (step);
1138 base = expand_simple_operations (base, stop);
1139 if (contains_abnormal_ssa_name_p (base)
1140 || contains_abnormal_ssa_name_p (step))
1141 continue;
1142
1143 type = TREE_TYPE (PHI_RESULT (phi));
1144 base = fold_convert (type, base);
1145 if (step)
1146 {
1147 if (POINTER_TYPE_P (type))
1148 step = convert_to_ptrofftype (step);
1149 else
1150 step = fold_convert (type, step);
1151 }
1152
1153 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1154 found = true;
1155 }
1156
1157 return found;
1158 }
1159
1160 /* Marks basic ivs. */
1161
1162 static void
1163 mark_bivs (struct ivopts_data *data)
1164 {
1165 gphi *phi;
1166 gimple *def;
1167 tree var;
1168 struct iv *iv, *incr_iv;
1169 struct loop *loop = data->current_loop;
1170 basic_block incr_bb;
1171 gphi_iterator psi;
1172
1173 data->bivs_not_used_in_addr = 0;
1174 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1175 {
1176 phi = psi.phi ();
1177
1178 iv = get_iv (data, PHI_RESULT (phi));
1179 if (!iv)
1180 continue;
1181
1182 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1183 def = SSA_NAME_DEF_STMT (var);
1184 /* Don't mark iv peeled from other one as biv. */
1185 if (def
1186 && gimple_code (def) == GIMPLE_PHI
1187 && gimple_bb (def) == loop->header)
1188 continue;
1189
1190 incr_iv = get_iv (data, var);
1191 if (!incr_iv)
1192 continue;
1193
1194 /* If the increment is in the subloop, ignore it. */
1195 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1196 if (incr_bb->loop_father != data->current_loop
1197 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1198 continue;
1199
1200 iv->biv_p = true;
1201 incr_iv->biv_p = true;
1202 if (iv->no_overflow)
1203 data->bivs_not_used_in_addr++;
1204 if (incr_iv->no_overflow)
1205 data->bivs_not_used_in_addr++;
1206 }
1207 }
1208
1209 /* Checks whether STMT defines a linear induction variable and stores its
1210 parameters to IV. */
1211
1212 static bool
1213 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1214 {
1215 tree lhs, stop;
1216 struct loop *loop = data->current_loop;
1217
1218 iv->base = NULL_TREE;
1219 iv->step = NULL_TREE;
1220
1221 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1222 return false;
1223
1224 lhs = gimple_assign_lhs (stmt);
1225 if (TREE_CODE (lhs) != SSA_NAME)
1226 return false;
1227
1228 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1229 return false;
1230
1231 /* Stop expanding iv base at the first ssa var referred by iv step.
1232 Ideally we should stop at any ssa var, because that's expensive
1233 and unusual to happen, we just do it on the first one.
1234
1235 See PR64705 for the rationale. */
1236 stop = extract_single_var_from_expr (iv->step);
1237 iv->base = expand_simple_operations (iv->base, stop);
1238 if (contains_abnormal_ssa_name_p (iv->base)
1239 || contains_abnormal_ssa_name_p (iv->step))
1240 return false;
1241
1242 /* If STMT could throw, then do not consider STMT as defining a GIV.
1243 While this will suppress optimizations, we can not safely delete this
1244 GIV and associated statements, even if it appears it is not used. */
1245 if (stmt_could_throw_p (stmt))
1246 return false;
1247
1248 return true;
1249 }
1250
1251 /* Finds general ivs in statement STMT. */
1252
1253 static void
1254 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1255 {
1256 affine_iv iv;
1257
1258 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1259 return;
1260
1261 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1262 }
1263
1264 /* Finds general ivs in basic block BB. */
1265
1266 static void
1267 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1268 {
1269 gimple_stmt_iterator bsi;
1270
1271 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1272 find_givs_in_stmt (data, gsi_stmt (bsi));
1273 }
1274
1275 /* Finds general ivs. */
1276
1277 static void
1278 find_givs (struct ivopts_data *data)
1279 {
1280 struct loop *loop = data->current_loop;
1281 basic_block *body = get_loop_body_in_dom_order (loop);
1282 unsigned i;
1283
1284 for (i = 0; i < loop->num_nodes; i++)
1285 find_givs_in_bb (data, body[i]);
1286 free (body);
1287 }
1288
1289 /* For each ssa name defined in LOOP determines whether it is an induction
1290 variable and if so, its initial value and step. */
1291
1292 static bool
1293 find_induction_variables (struct ivopts_data *data)
1294 {
1295 unsigned i;
1296 bitmap_iterator bi;
1297
1298 if (!find_bivs (data))
1299 return false;
1300
1301 find_givs (data);
1302 mark_bivs (data);
1303
1304 if (dump_file && (dump_flags & TDF_DETAILS))
1305 {
1306 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1307
1308 if (niter)
1309 {
1310 fprintf (dump_file, " number of iterations ");
1311 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1312 if (!integer_zerop (niter->may_be_zero))
1313 {
1314 fprintf (dump_file, "; zero if ");
1315 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1316 }
1317 fprintf (dump_file, "\n");
1318 };
1319
1320 fprintf (dump_file, "\n<Induction Vars>:\n");
1321 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1322 {
1323 struct version_info *info = ver_info (data, i);
1324 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1325 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1326 }
1327 }
1328
1329 return true;
1330 }
1331
1332 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1333 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1334 is the const offset stripped from IV base; for other types use, both
1335 are zero by default. */
1336
1337 static struct iv_use *
1338 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1339 gimple *stmt, enum use_type type, tree addr_base,
1340 unsigned HOST_WIDE_INT addr_offset)
1341 {
1342 struct iv_use *use = XCNEW (struct iv_use);
1343
1344 use->id = group->vuses.length ();
1345 use->group_id = group->id;
1346 use->type = type;
1347 use->iv = iv;
1348 use->stmt = stmt;
1349 use->op_p = use_p;
1350 use->addr_base = addr_base;
1351 use->addr_offset = addr_offset;
1352
1353 group->vuses.safe_push (use);
1354 return use;
1355 }
1356
1357 /* Checks whether OP is a loop-level invariant and if so, records it.
1358 NONLINEAR_USE is true if the invariant is used in a way we do not
1359 handle specially. */
1360
1361 static void
1362 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1363 {
1364 basic_block bb;
1365 struct version_info *info;
1366
1367 if (TREE_CODE (op) != SSA_NAME
1368 || virtual_operand_p (op))
1369 return;
1370
1371 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1372 if (bb
1373 && flow_bb_inside_loop_p (data->current_loop, bb))
1374 return;
1375
1376 info = name_info (data, op);
1377 info->name = op;
1378 info->has_nonlin_use |= nonlinear_use;
1379 if (!info->inv_id)
1380 info->inv_id = ++data->max_inv_id;
1381 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1382 }
1383
1384 static tree
1385 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1386
1387 /* Record a group of TYPE. */
1388
1389 static struct iv_group *
1390 record_group (struct ivopts_data *data, enum use_type type)
1391 {
1392 struct iv_group *group = XCNEW (struct iv_group);
1393
1394 group->id = data->vgroups.length ();
1395 group->type = type;
1396 group->related_cands = BITMAP_ALLOC (NULL);
1397 group->vuses.create (1);
1398
1399 data->vgroups.safe_push (group);
1400 return group;
1401 }
1402
1403 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1404 New group will be created if there is no existing group for the use. */
1405
1406 static struct iv_use *
1407 record_group_use (struct ivopts_data *data, tree *use_p,
1408 struct iv *iv, gimple *stmt, enum use_type type)
1409 {
1410 tree addr_base = NULL;
1411 struct iv_group *group = NULL;
1412 unsigned HOST_WIDE_INT addr_offset = 0;
1413
1414 /* Record non address type use in a new group. */
1415 if (type == USE_ADDRESS && iv->base_object)
1416 {
1417 unsigned int i;
1418
1419 addr_base = strip_offset (iv->base, &addr_offset);
1420 for (i = 0; i < data->vgroups.length (); i++)
1421 {
1422 struct iv_use *use;
1423
1424 group = data->vgroups[i];
1425 use = group->vuses[0];
1426 if (use->type != USE_ADDRESS || !use->iv->base_object)
1427 continue;
1428
1429 /* Check if it has the same stripped base and step. */
1430 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1431 && operand_equal_p (iv->step, use->iv->step, 0)
1432 && operand_equal_p (addr_base, use->addr_base, 0))
1433 break;
1434 }
1435 if (i == data->vgroups.length ())
1436 group = NULL;
1437 }
1438
1439 if (!group)
1440 group = record_group (data, type);
1441
1442 return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1443 }
1444
1445 /* Checks whether the use OP is interesting and if so, records it. */
1446
1447 static struct iv_use *
1448 find_interesting_uses_op (struct ivopts_data *data, tree op)
1449 {
1450 struct iv *iv;
1451 gimple *stmt;
1452 struct iv_use *use;
1453
1454 if (TREE_CODE (op) != SSA_NAME)
1455 return NULL;
1456
1457 iv = get_iv (data, op);
1458 if (!iv)
1459 return NULL;
1460
1461 if (iv->nonlin_use)
1462 {
1463 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1464 return iv->nonlin_use;
1465 }
1466
1467 if (integer_zerop (iv->step))
1468 {
1469 record_invariant (data, op, true);
1470 return NULL;
1471 }
1472
1473 stmt = SSA_NAME_DEF_STMT (op);
1474 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1475
1476 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1477 iv->nonlin_use = use;
1478 return use;
1479 }
1480
1481 /* Given a condition in statement STMT, checks whether it is a compare
1482 of an induction variable and an invariant. If this is the case,
1483 CONTROL_VAR is set to location of the iv, BOUND to the location of
1484 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1485 induction variable descriptions, and true is returned. If this is not
1486 the case, CONTROL_VAR and BOUND are set to the arguments of the
1487 condition and false is returned. */
1488
1489 static bool
1490 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1491 tree **control_var, tree **bound,
1492 struct iv **iv_var, struct iv **iv_bound)
1493 {
1494 /* The objects returned when COND has constant operands. */
1495 static struct iv const_iv;
1496 static tree zero;
1497 tree *op0 = &zero, *op1 = &zero;
1498 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1499 bool ret = false;
1500
1501 if (gimple_code (stmt) == GIMPLE_COND)
1502 {
1503 gcond *cond_stmt = as_a <gcond *> (stmt);
1504 op0 = gimple_cond_lhs_ptr (cond_stmt);
1505 op1 = gimple_cond_rhs_ptr (cond_stmt);
1506 }
1507 else
1508 {
1509 op0 = gimple_assign_rhs1_ptr (stmt);
1510 op1 = gimple_assign_rhs2_ptr (stmt);
1511 }
1512
1513 zero = integer_zero_node;
1514 const_iv.step = integer_zero_node;
1515
1516 if (TREE_CODE (*op0) == SSA_NAME)
1517 iv0 = get_iv (data, *op0);
1518 if (TREE_CODE (*op1) == SSA_NAME)
1519 iv1 = get_iv (data, *op1);
1520
1521 /* Exactly one of the compared values must be an iv, and the other one must
1522 be an invariant. */
1523 if (!iv0 || !iv1)
1524 goto end;
1525
1526 if (integer_zerop (iv0->step))
1527 {
1528 /* Control variable may be on the other side. */
1529 std::swap (op0, op1);
1530 std::swap (iv0, iv1);
1531 }
1532 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1533
1534 end:
1535 if (control_var)
1536 *control_var = op0;
1537 if (iv_var)
1538 *iv_var = iv0;
1539 if (bound)
1540 *bound = op1;
1541 if (iv_bound)
1542 *iv_bound = iv1;
1543
1544 return ret;
1545 }
1546
1547 /* Checks whether the condition in STMT is interesting and if so,
1548 records it. */
1549
1550 static void
1551 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1552 {
1553 tree *var_p, *bound_p;
1554 struct iv *var_iv;
1555
1556 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1557 {
1558 find_interesting_uses_op (data, *var_p);
1559 find_interesting_uses_op (data, *bound_p);
1560 return;
1561 }
1562
1563 record_group_use (data, NULL, var_iv, stmt, USE_COMPARE);
1564 }
1565
1566 /* Returns the outermost loop EXPR is obviously invariant in
1567 relative to the loop LOOP, i.e. if all its operands are defined
1568 outside of the returned loop. Returns NULL if EXPR is not
1569 even obviously invariant in LOOP. */
1570
1571 struct loop *
1572 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1573 {
1574 basic_block def_bb;
1575 unsigned i, len;
1576
1577 if (is_gimple_min_invariant (expr))
1578 return current_loops->tree_root;
1579
1580 if (TREE_CODE (expr) == SSA_NAME)
1581 {
1582 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1583 if (def_bb)
1584 {
1585 if (flow_bb_inside_loop_p (loop, def_bb))
1586 return NULL;
1587 return superloop_at_depth (loop,
1588 loop_depth (def_bb->loop_father) + 1);
1589 }
1590
1591 return current_loops->tree_root;
1592 }
1593
1594 if (!EXPR_P (expr))
1595 return NULL;
1596
1597 unsigned maxdepth = 0;
1598 len = TREE_OPERAND_LENGTH (expr);
1599 for (i = 0; i < len; i++)
1600 {
1601 struct loop *ivloop;
1602 if (!TREE_OPERAND (expr, i))
1603 continue;
1604
1605 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1606 if (!ivloop)
1607 return NULL;
1608 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1609 }
1610
1611 return superloop_at_depth (loop, maxdepth);
1612 }
1613
1614 /* Returns true if expression EXPR is obviously invariant in LOOP,
1615 i.e. if all its operands are defined outside of the LOOP. LOOP
1616 should not be the function body. */
1617
1618 bool
1619 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1620 {
1621 basic_block def_bb;
1622 unsigned i, len;
1623
1624 gcc_assert (loop_depth (loop) > 0);
1625
1626 if (is_gimple_min_invariant (expr))
1627 return true;
1628
1629 if (TREE_CODE (expr) == SSA_NAME)
1630 {
1631 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1632 if (def_bb
1633 && flow_bb_inside_loop_p (loop, def_bb))
1634 return false;
1635
1636 return true;
1637 }
1638
1639 if (!EXPR_P (expr))
1640 return false;
1641
1642 len = TREE_OPERAND_LENGTH (expr);
1643 for (i = 0; i < len; i++)
1644 if (TREE_OPERAND (expr, i)
1645 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1646 return false;
1647
1648 return true;
1649 }
1650
1651 /* Given expression EXPR which computes inductive values with respect
1652 to loop recorded in DATA, this function returns biv from which EXPR
1653 is derived by tracing definition chains of ssa variables in EXPR. */
1654
1655 static struct iv*
1656 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1657 {
1658 struct iv *iv;
1659 unsigned i, n;
1660 tree e2, e1;
1661 enum tree_code code;
1662 gimple *stmt;
1663
1664 if (expr == NULL_TREE)
1665 return NULL;
1666
1667 if (is_gimple_min_invariant (expr))
1668 return NULL;
1669
1670 code = TREE_CODE (expr);
1671 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1672 {
1673 n = TREE_OPERAND_LENGTH (expr);
1674 for (i = 0; i < n; i++)
1675 {
1676 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1677 if (iv)
1678 return iv;
1679 }
1680 }
1681
1682 /* Stop if it's not ssa name. */
1683 if (code != SSA_NAME)
1684 return NULL;
1685
1686 iv = get_iv (data, expr);
1687 if (!iv || integer_zerop (iv->step))
1688 return NULL;
1689 else if (iv->biv_p)
1690 return iv;
1691
1692 stmt = SSA_NAME_DEF_STMT (expr);
1693 if (gphi *phi = dyn_cast <gphi *> (stmt))
1694 {
1695 ssa_op_iter iter;
1696 use_operand_p use_p;
1697
1698 if (virtual_operand_p (gimple_phi_result (phi)))
1699 return NULL;
1700
1701 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1702 {
1703 tree use = USE_FROM_PTR (use_p);
1704 iv = find_deriving_biv_for_expr (data, use);
1705 if (iv)
1706 return iv;
1707 }
1708 return NULL;
1709 }
1710 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1711 return NULL;
1712
1713 e1 = gimple_assign_rhs1 (stmt);
1714 code = gimple_assign_rhs_code (stmt);
1715 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1716 return find_deriving_biv_for_expr (data, e1);
1717
1718 switch (code)
1719 {
1720 case MULT_EXPR:
1721 case PLUS_EXPR:
1722 case MINUS_EXPR:
1723 case POINTER_PLUS_EXPR:
1724 /* Increments, decrements and multiplications by a constant
1725 are simple. */
1726 e2 = gimple_assign_rhs2 (stmt);
1727 iv = find_deriving_biv_for_expr (data, e2);
1728 if (iv)
1729 return iv;
1730
1731 /* Fallthru. */
1732 CASE_CONVERT:
1733 /* Casts are simple. */
1734 return find_deriving_biv_for_expr (data, e1);
1735
1736 default:
1737 break;
1738 }
1739
1740 return NULL;
1741 }
1742
1743 /* Record BIV, its predecessor and successor that they are used in
1744 address type uses. */
1745
1746 static void
1747 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1748 {
1749 unsigned i;
1750 tree type, base_1, base_2;
1751 bitmap_iterator bi;
1752
1753 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1754 || biv->have_address_use || !biv->no_overflow)
1755 return;
1756
1757 type = TREE_TYPE (biv->base);
1758 if (!INTEGRAL_TYPE_P (type))
1759 return;
1760
1761 biv->have_address_use = true;
1762 data->bivs_not_used_in_addr--;
1763 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1764 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1765 {
1766 struct iv *iv = ver_info (data, i)->iv;
1767
1768 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1769 || iv->have_address_use || !iv->no_overflow)
1770 continue;
1771
1772 if (type != TREE_TYPE (iv->base)
1773 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1774 continue;
1775
1776 if (!operand_equal_p (biv->step, iv->step, 0))
1777 continue;
1778
1779 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1780 if (operand_equal_p (base_1, iv->base, 0)
1781 || operand_equal_p (base_2, biv->base, 0))
1782 {
1783 iv->have_address_use = true;
1784 data->bivs_not_used_in_addr--;
1785 }
1786 }
1787 }
1788
1789 /* Cumulates the steps of indices into DATA and replaces their values with the
1790 initial ones. Returns false when the value of the index cannot be determined.
1791 Callback for for_each_index. */
1792
1793 struct ifs_ivopts_data
1794 {
1795 struct ivopts_data *ivopts_data;
1796 gimple *stmt;
1797 tree step;
1798 };
1799
1800 static bool
1801 idx_find_step (tree base, tree *idx, void *data)
1802 {
1803 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1804 struct iv *iv;
1805 bool use_overflow_semantics = false;
1806 tree step, iv_base, iv_step, lbound, off;
1807 struct loop *loop = dta->ivopts_data->current_loop;
1808
1809 /* If base is a component ref, require that the offset of the reference
1810 be invariant. */
1811 if (TREE_CODE (base) == COMPONENT_REF)
1812 {
1813 off = component_ref_field_offset (base);
1814 return expr_invariant_in_loop_p (loop, off);
1815 }
1816
1817 /* If base is array, first check whether we will be able to move the
1818 reference out of the loop (in order to take its address in strength
1819 reduction). In order for this to work we need both lower bound
1820 and step to be loop invariants. */
1821 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1822 {
1823 /* Moreover, for a range, the size needs to be invariant as well. */
1824 if (TREE_CODE (base) == ARRAY_RANGE_REF
1825 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1826 return false;
1827
1828 step = array_ref_element_size (base);
1829 lbound = array_ref_low_bound (base);
1830
1831 if (!expr_invariant_in_loop_p (loop, step)
1832 || !expr_invariant_in_loop_p (loop, lbound))
1833 return false;
1834 }
1835
1836 if (TREE_CODE (*idx) != SSA_NAME)
1837 return true;
1838
1839 iv = get_iv (dta->ivopts_data, *idx);
1840 if (!iv)
1841 return false;
1842
1843 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1844 *&x[0], which is not folded and does not trigger the
1845 ARRAY_REF path below. */
1846 *idx = iv->base;
1847
1848 if (integer_zerop (iv->step))
1849 return true;
1850
1851 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1852 {
1853 step = array_ref_element_size (base);
1854
1855 /* We only handle addresses whose step is an integer constant. */
1856 if (TREE_CODE (step) != INTEGER_CST)
1857 return false;
1858 }
1859 else
1860 /* The step for pointer arithmetics already is 1 byte. */
1861 step = size_one_node;
1862
1863 iv_base = iv->base;
1864 iv_step = iv->step;
1865 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1866 use_overflow_semantics = true;
1867
1868 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1869 sizetype, &iv_base, &iv_step, dta->stmt,
1870 use_overflow_semantics))
1871 {
1872 /* The index might wrap. */
1873 return false;
1874 }
1875
1876 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1877 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1878
1879 if (dta->ivopts_data->bivs_not_used_in_addr)
1880 {
1881 if (!iv->biv_p)
1882 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1883
1884 record_biv_for_address_use (dta->ivopts_data, iv);
1885 }
1886 return true;
1887 }
1888
1889 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1890 object is passed to it in DATA. */
1891
1892 static bool
1893 idx_record_use (tree base, tree *idx,
1894 void *vdata)
1895 {
1896 struct ivopts_data *data = (struct ivopts_data *) vdata;
1897 find_interesting_uses_op (data, *idx);
1898 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1899 {
1900 find_interesting_uses_op (data, array_ref_element_size (base));
1901 find_interesting_uses_op (data, array_ref_low_bound (base));
1902 }
1903 return true;
1904 }
1905
1906 /* If we can prove that TOP = cst * BOT for some constant cst,
1907 store cst to MUL and return true. Otherwise return false.
1908 The returned value is always sign-extended, regardless of the
1909 signedness of TOP and BOT. */
1910
1911 static bool
1912 constant_multiple_of (tree top, tree bot, widest_int *mul)
1913 {
1914 tree mby;
1915 enum tree_code code;
1916 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1917 widest_int res, p0, p1;
1918
1919 STRIP_NOPS (top);
1920 STRIP_NOPS (bot);
1921
1922 if (operand_equal_p (top, bot, 0))
1923 {
1924 *mul = 1;
1925 return true;
1926 }
1927
1928 code = TREE_CODE (top);
1929 switch (code)
1930 {
1931 case MULT_EXPR:
1932 mby = TREE_OPERAND (top, 1);
1933 if (TREE_CODE (mby) != INTEGER_CST)
1934 return false;
1935
1936 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1937 return false;
1938
1939 *mul = wi::sext (res * wi::to_widest (mby), precision);
1940 return true;
1941
1942 case PLUS_EXPR:
1943 case MINUS_EXPR:
1944 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1945 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1946 return false;
1947
1948 if (code == MINUS_EXPR)
1949 p1 = -p1;
1950 *mul = wi::sext (p0 + p1, precision);
1951 return true;
1952
1953 case INTEGER_CST:
1954 if (TREE_CODE (bot) != INTEGER_CST)
1955 return false;
1956
1957 p0 = widest_int::from (top, SIGNED);
1958 p1 = widest_int::from (bot, SIGNED);
1959 if (p1 == 0)
1960 return false;
1961 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1962 return res == 0;
1963
1964 default:
1965 return false;
1966 }
1967 }
1968
1969 /* Return true if memory reference REF with step STEP may be unaligned. */
1970
1971 static bool
1972 may_be_unaligned_p (tree ref, tree step)
1973 {
1974 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1975 thus they are not misaligned. */
1976 if (TREE_CODE (ref) == TARGET_MEM_REF)
1977 return false;
1978
1979 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1980 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1981 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1982
1983 unsigned HOST_WIDE_INT bitpos;
1984 unsigned int ref_align;
1985 get_object_alignment_1 (ref, &ref_align, &bitpos);
1986 if (ref_align < align
1987 || (bitpos % align) != 0
1988 || (bitpos % BITS_PER_UNIT) != 0)
1989 return true;
1990
1991 unsigned int trailing_zeros = tree_ctz (step);
1992 if (trailing_zeros < HOST_BITS_PER_INT
1993 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1994 return true;
1995
1996 return false;
1997 }
1998
1999 /* Return true if EXPR may be non-addressable. */
2000
2001 bool
2002 may_be_nonaddressable_p (tree expr)
2003 {
2004 switch (TREE_CODE (expr))
2005 {
2006 case TARGET_MEM_REF:
2007 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2008 target, thus they are always addressable. */
2009 return false;
2010
2011 case MEM_REF:
2012 /* Likewise for MEM_REFs, modulo the storage order. */
2013 return REF_REVERSE_STORAGE_ORDER (expr);
2014
2015 case BIT_FIELD_REF:
2016 if (REF_REVERSE_STORAGE_ORDER (expr))
2017 return true;
2018 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2019
2020 case COMPONENT_REF:
2021 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2022 return true;
2023 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2024 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2025
2026 case ARRAY_REF:
2027 case ARRAY_RANGE_REF:
2028 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2029 return true;
2030 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2031
2032 case VIEW_CONVERT_EXPR:
2033 /* This kind of view-conversions may wrap non-addressable objects
2034 and make them look addressable. After some processing the
2035 non-addressability may be uncovered again, causing ADDR_EXPRs
2036 of inappropriate objects to be built. */
2037 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2038 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2039 return true;
2040 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2041
2042 CASE_CONVERT:
2043 return true;
2044
2045 default:
2046 break;
2047 }
2048
2049 return false;
2050 }
2051
2052 /* Finds addresses in *OP_P inside STMT. */
2053
2054 static void
2055 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2056 tree *op_p)
2057 {
2058 tree base = *op_p, step = size_zero_node;
2059 struct iv *civ;
2060 struct ifs_ivopts_data ifs_ivopts_data;
2061
2062 /* Do not play with volatile memory references. A bit too conservative,
2063 perhaps, but safe. */
2064 if (gimple_has_volatile_ops (stmt))
2065 goto fail;
2066
2067 /* Ignore bitfields for now. Not really something terribly complicated
2068 to handle. TODO. */
2069 if (TREE_CODE (base) == BIT_FIELD_REF)
2070 goto fail;
2071
2072 base = unshare_expr (base);
2073
2074 if (TREE_CODE (base) == TARGET_MEM_REF)
2075 {
2076 tree type = build_pointer_type (TREE_TYPE (base));
2077 tree astep;
2078
2079 if (TMR_BASE (base)
2080 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2081 {
2082 civ = get_iv (data, TMR_BASE (base));
2083 if (!civ)
2084 goto fail;
2085
2086 TMR_BASE (base) = civ->base;
2087 step = civ->step;
2088 }
2089 if (TMR_INDEX2 (base)
2090 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2091 {
2092 civ = get_iv (data, TMR_INDEX2 (base));
2093 if (!civ)
2094 goto fail;
2095
2096 TMR_INDEX2 (base) = civ->base;
2097 step = civ->step;
2098 }
2099 if (TMR_INDEX (base)
2100 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2101 {
2102 civ = get_iv (data, TMR_INDEX (base));
2103 if (!civ)
2104 goto fail;
2105
2106 TMR_INDEX (base) = civ->base;
2107 astep = civ->step;
2108
2109 if (astep)
2110 {
2111 if (TMR_STEP (base))
2112 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2113
2114 step = fold_build2 (PLUS_EXPR, type, step, astep);
2115 }
2116 }
2117
2118 if (integer_zerop (step))
2119 goto fail;
2120 base = tree_mem_ref_addr (type, base);
2121 }
2122 else
2123 {
2124 ifs_ivopts_data.ivopts_data = data;
2125 ifs_ivopts_data.stmt = stmt;
2126 ifs_ivopts_data.step = size_zero_node;
2127 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2128 || integer_zerop (ifs_ivopts_data.step))
2129 goto fail;
2130 step = ifs_ivopts_data.step;
2131
2132 /* Check that the base expression is addressable. This needs
2133 to be done after substituting bases of IVs into it. */
2134 if (may_be_nonaddressable_p (base))
2135 goto fail;
2136
2137 /* Moreover, on strict alignment platforms, check that it is
2138 sufficiently aligned. */
2139 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2140 goto fail;
2141
2142 base = build_fold_addr_expr (base);
2143
2144 /* Substituting bases of IVs into the base expression might
2145 have caused folding opportunities. */
2146 if (TREE_CODE (base) == ADDR_EXPR)
2147 {
2148 tree *ref = &TREE_OPERAND (base, 0);
2149 while (handled_component_p (*ref))
2150 ref = &TREE_OPERAND (*ref, 0);
2151 if (TREE_CODE (*ref) == MEM_REF)
2152 {
2153 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2154 TREE_OPERAND (*ref, 0),
2155 TREE_OPERAND (*ref, 1));
2156 if (tem)
2157 *ref = tem;
2158 }
2159 }
2160 }
2161
2162 civ = alloc_iv (data, base, step);
2163 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2164 return;
2165
2166 fail:
2167 for_each_index (op_p, idx_record_use, data);
2168 }
2169
2170 /* Finds and records invariants used in STMT. */
2171
2172 static void
2173 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2174 {
2175 ssa_op_iter iter;
2176 use_operand_p use_p;
2177 tree op;
2178
2179 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2180 {
2181 op = USE_FROM_PTR (use_p);
2182 record_invariant (data, op, false);
2183 }
2184 }
2185
2186 /* Finds interesting uses of induction variables in the statement STMT. */
2187
2188 static void
2189 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2190 {
2191 struct iv *iv;
2192 tree op, *lhs, *rhs;
2193 ssa_op_iter iter;
2194 use_operand_p use_p;
2195 enum tree_code code;
2196
2197 find_invariants_stmt (data, stmt);
2198
2199 if (gimple_code (stmt) == GIMPLE_COND)
2200 {
2201 find_interesting_uses_cond (data, stmt);
2202 return;
2203 }
2204
2205 if (is_gimple_assign (stmt))
2206 {
2207 lhs = gimple_assign_lhs_ptr (stmt);
2208 rhs = gimple_assign_rhs1_ptr (stmt);
2209
2210 if (TREE_CODE (*lhs) == SSA_NAME)
2211 {
2212 /* If the statement defines an induction variable, the uses are not
2213 interesting by themselves. */
2214
2215 iv = get_iv (data, *lhs);
2216
2217 if (iv && !integer_zerop (iv->step))
2218 return;
2219 }
2220
2221 code = gimple_assign_rhs_code (stmt);
2222 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2223 && (REFERENCE_CLASS_P (*rhs)
2224 || is_gimple_val (*rhs)))
2225 {
2226 if (REFERENCE_CLASS_P (*rhs))
2227 find_interesting_uses_address (data, stmt, rhs);
2228 else
2229 find_interesting_uses_op (data, *rhs);
2230
2231 if (REFERENCE_CLASS_P (*lhs))
2232 find_interesting_uses_address (data, stmt, lhs);
2233 return;
2234 }
2235 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2236 {
2237 find_interesting_uses_cond (data, stmt);
2238 return;
2239 }
2240
2241 /* TODO -- we should also handle address uses of type
2242
2243 memory = call (whatever);
2244
2245 and
2246
2247 call (memory). */
2248 }
2249
2250 if (gimple_code (stmt) == GIMPLE_PHI
2251 && gimple_bb (stmt) == data->current_loop->header)
2252 {
2253 iv = get_iv (data, PHI_RESULT (stmt));
2254
2255 if (iv && !integer_zerop (iv->step))
2256 return;
2257 }
2258
2259 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2260 {
2261 op = USE_FROM_PTR (use_p);
2262
2263 if (TREE_CODE (op) != SSA_NAME)
2264 continue;
2265
2266 iv = get_iv (data, op);
2267 if (!iv)
2268 continue;
2269
2270 find_interesting_uses_op (data, op);
2271 }
2272 }
2273
2274 /* Finds interesting uses of induction variables outside of loops
2275 on loop exit edge EXIT. */
2276
2277 static void
2278 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2279 {
2280 gphi *phi;
2281 gphi_iterator psi;
2282 tree def;
2283
2284 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2285 {
2286 phi = psi.phi ();
2287 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2288 if (!virtual_operand_p (def))
2289 find_interesting_uses_op (data, def);
2290 }
2291 }
2292
2293 /* Compute maximum offset of [base + offset] addressing mode
2294 for memory reference represented by USE. */
2295
2296 static HOST_WIDE_INT
2297 compute_max_addr_offset (struct iv_use *use)
2298 {
2299 int width;
2300 rtx reg, addr;
2301 HOST_WIDE_INT i, off;
2302 unsigned list_index, num;
2303 addr_space_t as;
2304 machine_mode mem_mode, addr_mode;
2305 static vec<HOST_WIDE_INT> max_offset_list;
2306
2307 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2308 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2309
2310 num = max_offset_list.length ();
2311 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2312 if (list_index >= num)
2313 {
2314 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2315 for (; num < max_offset_list.length (); num++)
2316 max_offset_list[num] = -1;
2317 }
2318
2319 off = max_offset_list[list_index];
2320 if (off != -1)
2321 return off;
2322
2323 addr_mode = targetm.addr_space.address_mode (as);
2324 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2325 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2326
2327 width = GET_MODE_BITSIZE (addr_mode) - 1;
2328 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2329 width = HOST_BITS_PER_WIDE_INT - 1;
2330
2331 for (i = width; i > 0; i--)
2332 {
2333 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2334 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2335 if (memory_address_addr_space_p (mem_mode, addr, as))
2336 break;
2337
2338 /* For some strict-alignment targets, the offset must be naturally
2339 aligned. Try an aligned offset if mem_mode is not QImode. */
2340 off = ((unsigned HOST_WIDE_INT) 1 << i);
2341 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2342 {
2343 off -= GET_MODE_SIZE (mem_mode);
2344 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2345 if (memory_address_addr_space_p (mem_mode, addr, as))
2346 break;
2347 }
2348 }
2349 if (i == 0)
2350 off = 0;
2351
2352 max_offset_list[list_index] = off;
2353 return off;
2354 }
2355
2356 /* Comparison function to sort group in ascending order of addr_offset. */
2357
2358 static int
2359 group_compare_offset (const void *a, const void *b)
2360 {
2361 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2362 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2363
2364 if ((*u1)->addr_offset != (*u2)->addr_offset)
2365 return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2366 else
2367 return 0;
2368 }
2369
2370 /* Check if small groups should be split. Return true if no group
2371 contains more than two uses with distinct addr_offsets. Return
2372 false otherwise. We want to split such groups because:
2373
2374 1) Small groups don't have much benefit and may interfer with
2375 general candidate selection.
2376 2) Size for problem with only small groups is usually small and
2377 general algorithm can handle it well.
2378
2379 TODO -- Above claim may not hold when we want to merge memory
2380 accesses with conseuctive addresses. */
2381
2382 static bool
2383 split_small_address_groups_p (struct ivopts_data *data)
2384 {
2385 unsigned int i, j, distinct = 1;
2386 struct iv_use *pre;
2387 struct iv_group *group;
2388
2389 for (i = 0; i < data->vgroups.length (); i++)
2390 {
2391 group = data->vgroups[i];
2392 if (group->vuses.length () == 1)
2393 continue;
2394
2395 gcc_assert (group->type == USE_ADDRESS);
2396 if (group->vuses.length () == 2)
2397 {
2398 if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2399 std::swap (group->vuses[0], group->vuses[1]);
2400 }
2401 else
2402 group->vuses.qsort (group_compare_offset);
2403
2404 if (distinct > 2)
2405 continue;
2406
2407 distinct = 1;
2408 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2409 {
2410 if (group->vuses[j]->addr_offset != pre->addr_offset)
2411 {
2412 pre = group->vuses[j];
2413 distinct++;
2414 }
2415
2416 if (distinct > 2)
2417 break;
2418 }
2419 }
2420
2421 return (distinct <= 2);
2422 }
2423
2424 /* For each group of address type uses, this function further groups
2425 these uses according to the maximum offset supported by target's
2426 [base + offset] addressing mode. */
2427
2428 static void
2429 split_address_groups (struct ivopts_data *data)
2430 {
2431 unsigned int i, j;
2432 HOST_WIDE_INT max_offset = -1;
2433
2434 /* Reset max offset to split all small groups. */
2435 if (split_small_address_groups_p (data))
2436 max_offset = 0;
2437
2438 for (i = 0; i < data->vgroups.length (); i++)
2439 {
2440 struct iv_group *group = data->vgroups[i];
2441 struct iv_use *use = group->vuses[0];
2442
2443 use->id = 0;
2444 use->group_id = group->id;
2445 if (group->vuses.length () == 1)
2446 continue;
2447
2448 if (max_offset != 0)
2449 max_offset = compute_max_addr_offset (use);
2450
2451 for (j = 1; j < group->vuses.length (); j++)
2452 {
2453 struct iv_use *next = group->vuses[j];
2454
2455 /* Only uses with offset that can fit in offset part against
2456 the first use can be grouped together. */
2457 if (next->addr_offset - use->addr_offset
2458 > (unsigned HOST_WIDE_INT) max_offset)
2459 break;
2460
2461 next->id = j;
2462 next->group_id = group->id;
2463 }
2464 /* Split group. */
2465 if (j < group->vuses.length ())
2466 {
2467 struct iv_group *new_group = record_group (data, group->type);
2468 new_group->vuses.safe_splice (group->vuses);
2469 new_group->vuses.block_remove (0, j);
2470 group->vuses.truncate (j);
2471 }
2472 }
2473 }
2474
2475 /* Finds uses of the induction variables that are interesting. */
2476
2477 static void
2478 find_interesting_uses (struct ivopts_data *data)
2479 {
2480 basic_block bb;
2481 gimple_stmt_iterator bsi;
2482 basic_block *body = get_loop_body (data->current_loop);
2483 unsigned i;
2484 edge e;
2485
2486 for (i = 0; i < data->current_loop->num_nodes; i++)
2487 {
2488 edge_iterator ei;
2489 bb = body[i];
2490
2491 FOR_EACH_EDGE (e, ei, bb->succs)
2492 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2493 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2494 find_interesting_uses_outside (data, e);
2495
2496 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2497 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2498 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2499 if (!is_gimple_debug (gsi_stmt (bsi)))
2500 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2501 }
2502
2503 split_address_groups (data);
2504
2505 if (dump_file && (dump_flags & TDF_DETAILS))
2506 {
2507 bitmap_iterator bi;
2508
2509 fprintf (dump_file, "\n<Invariant Vars>:\n");
2510 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2511 {
2512 struct version_info *info = ver_info (data, i);
2513 if (info->inv_id)
2514 {
2515 fprintf (dump_file, "Inv %d:\t", info->inv_id);
2516 print_generic_expr (dump_file, info->name, TDF_SLIM);
2517 fprintf (dump_file, "%s\n",
2518 info->has_nonlin_use ? "" : "\t(eliminable)");
2519 }
2520 }
2521
2522 fprintf (dump_file, "\n<IV Groups>:\n");
2523 dump_groups (dump_file, data);
2524 fprintf (dump_file, "\n");
2525 }
2526
2527 free (body);
2528 }
2529
2530 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2531 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2532 we are at the top-level of the processed address. */
2533
2534 static tree
2535 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2536 HOST_WIDE_INT *offset)
2537 {
2538 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2539 enum tree_code code;
2540 tree type, orig_type = TREE_TYPE (expr);
2541 HOST_WIDE_INT off0, off1, st;
2542 tree orig_expr = expr;
2543
2544 STRIP_NOPS (expr);
2545
2546 type = TREE_TYPE (expr);
2547 code = TREE_CODE (expr);
2548 *offset = 0;
2549
2550 switch (code)
2551 {
2552 case INTEGER_CST:
2553 if (!cst_and_fits_in_hwi (expr)
2554 || integer_zerop (expr))
2555 return orig_expr;
2556
2557 *offset = int_cst_value (expr);
2558 return build_int_cst (orig_type, 0);
2559
2560 case POINTER_PLUS_EXPR:
2561 case PLUS_EXPR:
2562 case MINUS_EXPR:
2563 op0 = TREE_OPERAND (expr, 0);
2564 op1 = TREE_OPERAND (expr, 1);
2565
2566 op0 = strip_offset_1 (op0, false, false, &off0);
2567 op1 = strip_offset_1 (op1, false, false, &off1);
2568
2569 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2570 if (op0 == TREE_OPERAND (expr, 0)
2571 && op1 == TREE_OPERAND (expr, 1))
2572 return orig_expr;
2573
2574 if (integer_zerop (op1))
2575 expr = op0;
2576 else if (integer_zerop (op0))
2577 {
2578 if (code == MINUS_EXPR)
2579 expr = fold_build1 (NEGATE_EXPR, type, op1);
2580 else
2581 expr = op1;
2582 }
2583 else
2584 expr = fold_build2 (code, type, op0, op1);
2585
2586 return fold_convert (orig_type, expr);
2587
2588 case MULT_EXPR:
2589 op1 = TREE_OPERAND (expr, 1);
2590 if (!cst_and_fits_in_hwi (op1))
2591 return orig_expr;
2592
2593 op0 = TREE_OPERAND (expr, 0);
2594 op0 = strip_offset_1 (op0, false, false, &off0);
2595 if (op0 == TREE_OPERAND (expr, 0))
2596 return orig_expr;
2597
2598 *offset = off0 * int_cst_value (op1);
2599 if (integer_zerop (op0))
2600 expr = op0;
2601 else
2602 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2603
2604 return fold_convert (orig_type, expr);
2605
2606 case ARRAY_REF:
2607 case ARRAY_RANGE_REF:
2608 if (!inside_addr)
2609 return orig_expr;
2610
2611 step = array_ref_element_size (expr);
2612 if (!cst_and_fits_in_hwi (step))
2613 break;
2614
2615 st = int_cst_value (step);
2616 op1 = TREE_OPERAND (expr, 1);
2617 op1 = strip_offset_1 (op1, false, false, &off1);
2618 *offset = off1 * st;
2619
2620 if (top_compref
2621 && integer_zerop (op1))
2622 {
2623 /* Strip the component reference completely. */
2624 op0 = TREE_OPERAND (expr, 0);
2625 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2626 *offset += off0;
2627 return op0;
2628 }
2629 break;
2630
2631 case COMPONENT_REF:
2632 {
2633 tree field;
2634
2635 if (!inside_addr)
2636 return orig_expr;
2637
2638 tmp = component_ref_field_offset (expr);
2639 field = TREE_OPERAND (expr, 1);
2640 if (top_compref
2641 && cst_and_fits_in_hwi (tmp)
2642 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2643 {
2644 HOST_WIDE_INT boffset, abs_off;
2645
2646 /* Strip the component reference completely. */
2647 op0 = TREE_OPERAND (expr, 0);
2648 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2649 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2650 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2651 if (boffset < 0)
2652 abs_off = -abs_off;
2653
2654 *offset = off0 + int_cst_value (tmp) + abs_off;
2655 return op0;
2656 }
2657 }
2658 break;
2659
2660 case ADDR_EXPR:
2661 op0 = TREE_OPERAND (expr, 0);
2662 op0 = strip_offset_1 (op0, true, true, &off0);
2663 *offset += off0;
2664
2665 if (op0 == TREE_OPERAND (expr, 0))
2666 return orig_expr;
2667
2668 expr = build_fold_addr_expr (op0);
2669 return fold_convert (orig_type, expr);
2670
2671 case MEM_REF:
2672 /* ??? Offset operand? */
2673 inside_addr = false;
2674 break;
2675
2676 default:
2677 return orig_expr;
2678 }
2679
2680 /* Default handling of expressions for that we want to recurse into
2681 the first operand. */
2682 op0 = TREE_OPERAND (expr, 0);
2683 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2684 *offset += off0;
2685
2686 if (op0 == TREE_OPERAND (expr, 0)
2687 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2688 return orig_expr;
2689
2690 expr = copy_node (expr);
2691 TREE_OPERAND (expr, 0) = op0;
2692 if (op1)
2693 TREE_OPERAND (expr, 1) = op1;
2694
2695 /* Inside address, we might strip the top level component references,
2696 thus changing type of the expression. Handling of ADDR_EXPR
2697 will fix that. */
2698 expr = fold_convert (orig_type, expr);
2699
2700 return expr;
2701 }
2702
2703 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2704
2705 static tree
2706 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2707 {
2708 HOST_WIDE_INT off;
2709 tree core = strip_offset_1 (expr, false, false, &off);
2710 *offset = off;
2711 return core;
2712 }
2713
2714 /* Returns variant of TYPE that can be used as base for different uses.
2715 We return unsigned type with the same precision, which avoids problems
2716 with overflows. */
2717
2718 static tree
2719 generic_type_for (tree type)
2720 {
2721 if (POINTER_TYPE_P (type))
2722 return unsigned_type_for (type);
2723
2724 if (TYPE_UNSIGNED (type))
2725 return type;
2726
2727 return unsigned_type_for (type);
2728 }
2729
2730 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2731 the bitmap to that we should store it. */
2732
2733 static struct ivopts_data *fd_ivopts_data;
2734 static tree
2735 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2736 {
2737 bitmap *depends_on = (bitmap *) data;
2738 struct version_info *info;
2739
2740 if (TREE_CODE (*expr_p) != SSA_NAME)
2741 return NULL_TREE;
2742 info = name_info (fd_ivopts_data, *expr_p);
2743
2744 if (!info->inv_id || info->has_nonlin_use)
2745 return NULL_TREE;
2746
2747 if (!*depends_on)
2748 *depends_on = BITMAP_ALLOC (NULL);
2749 bitmap_set_bit (*depends_on, info->inv_id);
2750
2751 return NULL_TREE;
2752 }
2753
2754 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2755 position to POS. If USE is not NULL, the candidate is set as related to
2756 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2757 replacement of the final value of the iv by a direct computation. */
2758
2759 static struct iv_cand *
2760 add_candidate_1 (struct ivopts_data *data,
2761 tree base, tree step, bool important, enum iv_position pos,
2762 struct iv_use *use, gimple *incremented_at,
2763 struct iv *orig_iv = NULL)
2764 {
2765 unsigned i;
2766 struct iv_cand *cand = NULL;
2767 tree type, orig_type;
2768
2769 gcc_assert (base && step);
2770
2771 /* -fkeep-gc-roots-live means that we have to keep a real pointer
2772 live, but the ivopts code may replace a real pointer with one
2773 pointing before or after the memory block that is then adjusted
2774 into the memory block during the loop. FIXME: It would likely be
2775 better to actually force the pointer live and still use ivopts;
2776 for example, it would be enough to write the pointer into memory
2777 and keep it there until after the loop. */
2778 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2779 return NULL;
2780
2781 /* For non-original variables, make sure their values are computed in a type
2782 that does not invoke undefined behavior on overflows (since in general,
2783 we cannot prove that these induction variables are non-wrapping). */
2784 if (pos != IP_ORIGINAL)
2785 {
2786 orig_type = TREE_TYPE (base);
2787 type = generic_type_for (orig_type);
2788 if (type != orig_type)
2789 {
2790 base = fold_convert (type, base);
2791 step = fold_convert (type, step);
2792 }
2793 }
2794
2795 for (i = 0; i < data->vcands.length (); i++)
2796 {
2797 cand = data->vcands[i];
2798
2799 if (cand->pos != pos)
2800 continue;
2801
2802 if (cand->incremented_at != incremented_at
2803 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2804 && cand->ainc_use != use))
2805 continue;
2806
2807 if (operand_equal_p (base, cand->iv->base, 0)
2808 && operand_equal_p (step, cand->iv->step, 0)
2809 && (TYPE_PRECISION (TREE_TYPE (base))
2810 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2811 break;
2812 }
2813
2814 if (i == data->vcands.length ())
2815 {
2816 cand = XCNEW (struct iv_cand);
2817 cand->id = i;
2818 cand->iv = alloc_iv (data, base, step);
2819 cand->pos = pos;
2820 if (pos != IP_ORIGINAL)
2821 {
2822 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2823 cand->var_after = cand->var_before;
2824 }
2825 cand->important = important;
2826 cand->incremented_at = incremented_at;
2827 data->vcands.safe_push (cand);
2828
2829 if (TREE_CODE (step) != INTEGER_CST)
2830 {
2831 fd_ivopts_data = data;
2832 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2833 }
2834
2835 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2836 cand->ainc_use = use;
2837 else
2838 cand->ainc_use = NULL;
2839
2840 cand->orig_iv = orig_iv;
2841 if (dump_file && (dump_flags & TDF_DETAILS))
2842 dump_cand (dump_file, cand);
2843 }
2844
2845 cand->important |= important;
2846
2847 /* Relate candidate to the group for which it is added. */
2848 if (use)
2849 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
2850
2851 return cand;
2852 }
2853
2854 /* Returns true if incrementing the induction variable at the end of the LOOP
2855 is allowed.
2856
2857 The purpose is to avoid splitting latch edge with a biv increment, thus
2858 creating a jump, possibly confusing other optimization passes and leaving
2859 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2860 is not available (so we do not have a better alternative), or if the latch
2861 edge is already nonempty. */
2862
2863 static bool
2864 allow_ip_end_pos_p (struct loop *loop)
2865 {
2866 if (!ip_normal_pos (loop))
2867 return true;
2868
2869 if (!empty_block_p (ip_end_pos (loop)))
2870 return true;
2871
2872 return false;
2873 }
2874
2875 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2876 Important field is set to IMPORTANT. */
2877
2878 static void
2879 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2880 bool important, struct iv_use *use)
2881 {
2882 basic_block use_bb = gimple_bb (use->stmt);
2883 machine_mode mem_mode;
2884 unsigned HOST_WIDE_INT cstepi;
2885
2886 /* If we insert the increment in any position other than the standard
2887 ones, we must ensure that it is incremented once per iteration.
2888 It must not be in an inner nested loop, or one side of an if
2889 statement. */
2890 if (use_bb->loop_father != data->current_loop
2891 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2892 || stmt_could_throw_p (use->stmt)
2893 || !cst_and_fits_in_hwi (step))
2894 return;
2895
2896 cstepi = int_cst_value (step);
2897
2898 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2899 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2900 || USE_STORE_PRE_INCREMENT (mem_mode))
2901 && GET_MODE_SIZE (mem_mode) == cstepi)
2902 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2903 || USE_STORE_PRE_DECREMENT (mem_mode))
2904 && GET_MODE_SIZE (mem_mode) == -cstepi))
2905 {
2906 enum tree_code code = MINUS_EXPR;
2907 tree new_base;
2908 tree new_step = step;
2909
2910 if (POINTER_TYPE_P (TREE_TYPE (base)))
2911 {
2912 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2913 code = POINTER_PLUS_EXPR;
2914 }
2915 else
2916 new_step = fold_convert (TREE_TYPE (base), new_step);
2917 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2918 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2919 use->stmt);
2920 }
2921 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2922 || USE_STORE_POST_INCREMENT (mem_mode))
2923 && GET_MODE_SIZE (mem_mode) == cstepi)
2924 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2925 || USE_STORE_POST_DECREMENT (mem_mode))
2926 && GET_MODE_SIZE (mem_mode) == -cstepi))
2927 {
2928 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2929 use->stmt);
2930 }
2931 }
2932
2933 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2934 position to POS. If USE is not NULL, the candidate is set as related to
2935 it. The candidate computation is scheduled before exit condition and at
2936 the end of loop. */
2937
2938 static void
2939 add_candidate (struct ivopts_data *data,
2940 tree base, tree step, bool important, struct iv_use *use,
2941 struct iv *orig_iv = NULL)
2942 {
2943 if (ip_normal_pos (data->current_loop))
2944 add_candidate_1 (data, base, step, important,
2945 IP_NORMAL, use, NULL, orig_iv);
2946 if (ip_end_pos (data->current_loop)
2947 && allow_ip_end_pos_p (data->current_loop))
2948 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
2949 }
2950
2951 /* Adds standard iv candidates. */
2952
2953 static void
2954 add_standard_iv_candidates (struct ivopts_data *data)
2955 {
2956 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2957
2958 /* The same for a double-integer type if it is still fast enough. */
2959 if (TYPE_PRECISION
2960 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2961 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2962 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2963 build_int_cst (long_integer_type_node, 1), true, NULL);
2964
2965 /* The same for a double-integer type if it is still fast enough. */
2966 if (TYPE_PRECISION
2967 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2968 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2969 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2970 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2971 }
2972
2973
2974 /* Adds candidates bases on the old induction variable IV. */
2975
2976 static void
2977 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
2978 {
2979 gimple *phi;
2980 tree def;
2981 struct iv_cand *cand;
2982
2983 /* Check if this biv is used in address type use. */
2984 if (iv->no_overflow && iv->have_address_use
2985 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
2986 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
2987 {
2988 tree base = fold_convert (sizetype, iv->base);
2989 tree step = fold_convert (sizetype, iv->step);
2990
2991 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
2992 add_candidate (data, base, step, true, NULL, iv);
2993 /* Add iv cand of the original type only if it has nonlinear use. */
2994 if (iv->nonlin_use)
2995 add_candidate (data, iv->base, iv->step, true, NULL);
2996 }
2997 else
2998 add_candidate (data, iv->base, iv->step, true, NULL);
2999
3000 /* The same, but with initial value zero. */
3001 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3002 add_candidate (data, size_int (0), iv->step, true, NULL);
3003 else
3004 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3005 iv->step, true, NULL);
3006
3007 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3008 if (gimple_code (phi) == GIMPLE_PHI)
3009 {
3010 /* Additionally record the possibility of leaving the original iv
3011 untouched. */
3012 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3013 /* Don't add candidate if it's from another PHI node because
3014 it's an affine iv appearing in the form of PEELED_CHREC. */
3015 phi = SSA_NAME_DEF_STMT (def);
3016 if (gimple_code (phi) != GIMPLE_PHI)
3017 {
3018 cand = add_candidate_1 (data,
3019 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3020 SSA_NAME_DEF_STMT (def));
3021 if (cand)
3022 {
3023 cand->var_before = iv->ssa_name;
3024 cand->var_after = def;
3025 }
3026 }
3027 else
3028 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3029 }
3030 }
3031
3032 /* Adds candidates based on the old induction variables. */
3033
3034 static void
3035 add_iv_candidate_for_bivs (struct ivopts_data *data)
3036 {
3037 unsigned i;
3038 struct iv *iv;
3039 bitmap_iterator bi;
3040
3041 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3042 {
3043 iv = ver_info (data, i)->iv;
3044 if (iv && iv->biv_p && !integer_zerop (iv->step))
3045 add_iv_candidate_for_biv (data, iv);
3046 }
3047 }
3048
3049 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3050
3051 static void
3052 record_common_cand (struct ivopts_data *data, tree base,
3053 tree step, struct iv_use *use)
3054 {
3055 struct iv_common_cand ent;
3056 struct iv_common_cand **slot;
3057
3058 ent.base = base;
3059 ent.step = step;
3060 ent.hash = iterative_hash_expr (base, 0);
3061 ent.hash = iterative_hash_expr (step, ent.hash);
3062
3063 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3064 if (*slot == NULL)
3065 {
3066 *slot = new iv_common_cand ();
3067 (*slot)->base = base;
3068 (*slot)->step = step;
3069 (*slot)->uses.create (8);
3070 (*slot)->hash = ent.hash;
3071 data->iv_common_cands.safe_push ((*slot));
3072 }
3073
3074 gcc_assert (use != NULL);
3075 (*slot)->uses.safe_push (use);
3076 return;
3077 }
3078
3079 /* Comparison function used to sort common candidates. */
3080
3081 static int
3082 common_cand_cmp (const void *p1, const void *p2)
3083 {
3084 unsigned n1, n2;
3085 const struct iv_common_cand *const *const ccand1
3086 = (const struct iv_common_cand *const *)p1;
3087 const struct iv_common_cand *const *const ccand2
3088 = (const struct iv_common_cand *const *)p2;
3089
3090 n1 = (*ccand1)->uses.length ();
3091 n2 = (*ccand2)->uses.length ();
3092 return n2 - n1;
3093 }
3094
3095 /* Adds IV candidates based on common candidated recorded. */
3096
3097 static void
3098 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3099 {
3100 unsigned i, j;
3101 struct iv_cand *cand_1, *cand_2;
3102
3103 data->iv_common_cands.qsort (common_cand_cmp);
3104 for (i = 0; i < data->iv_common_cands.length (); i++)
3105 {
3106 struct iv_common_cand *ptr = data->iv_common_cands[i];
3107
3108 /* Only add IV candidate if it's derived from multiple uses. */
3109 if (ptr->uses.length () <= 1)
3110 break;
3111
3112 cand_1 = NULL;
3113 cand_2 = NULL;
3114 if (ip_normal_pos (data->current_loop))
3115 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3116 false, IP_NORMAL, NULL, NULL);
3117
3118 if (ip_end_pos (data->current_loop)
3119 && allow_ip_end_pos_p (data->current_loop))
3120 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3121 false, IP_END, NULL, NULL);
3122
3123 /* Bind deriving uses and the new candidates. */
3124 for (j = 0; j < ptr->uses.length (); j++)
3125 {
3126 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3127 if (cand_1)
3128 bitmap_set_bit (group->related_cands, cand_1->id);
3129 if (cand_2)
3130 bitmap_set_bit (group->related_cands, cand_2->id);
3131 }
3132 }
3133
3134 /* Release data since it is useless from this point. */
3135 data->iv_common_cand_tab->empty ();
3136 data->iv_common_cands.truncate (0);
3137 }
3138
3139 /* Adds candidates based on the value of USE's iv. */
3140
3141 static void
3142 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3143 {
3144 unsigned HOST_WIDE_INT offset;
3145 tree base;
3146 tree basetype;
3147 struct iv *iv = use->iv;
3148
3149 add_candidate (data, iv->base, iv->step, false, use);
3150
3151 /* Record common candidate for use in case it can be shared by others. */
3152 record_common_cand (data, iv->base, iv->step, use);
3153
3154 /* Record common candidate with initial value zero. */
3155 basetype = TREE_TYPE (iv->base);
3156 if (POINTER_TYPE_P (basetype))
3157 basetype = sizetype;
3158 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3159
3160 /* Record common candidate with constant offset stripped in base.
3161 Like the use itself, we also add candidate directly for it. */
3162 base = strip_offset (iv->base, &offset);
3163 if (offset || base != iv->base)
3164 {
3165 record_common_cand (data, base, iv->step, use);
3166 add_candidate (data, base, iv->step, false, use);
3167 }
3168
3169 /* Record common candidate with base_object removed in base. */
3170 if (iv->base_object != NULL)
3171 {
3172 unsigned i;
3173 aff_tree aff_base;
3174 tree step, base_object = iv->base_object;
3175
3176 base = iv->base;
3177 step = iv->step;
3178 STRIP_NOPS (base);
3179 STRIP_NOPS (step);
3180 STRIP_NOPS (base_object);
3181 tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3182 for (i = 0; i < aff_base.n; i++)
3183 {
3184 if (aff_base.elts[i].coef != 1)
3185 continue;
3186
3187 if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3188 break;
3189 }
3190 if (i < aff_base.n)
3191 {
3192 aff_combination_remove_elt (&aff_base, i);
3193 base = aff_combination_to_tree (&aff_base);
3194 basetype = TREE_TYPE (base);
3195 if (POINTER_TYPE_P (basetype))
3196 basetype = sizetype;
3197
3198 step = fold_convert (basetype, step);
3199 record_common_cand (data, base, step, use);
3200 /* Also record common candidate with offset stripped. */
3201 base = strip_offset (base, &offset);
3202 if (offset)
3203 record_common_cand (data, base, step, use);
3204 }
3205 }
3206
3207 /* At last, add auto-incremental candidates. Make such variables
3208 important since other iv uses with same base object may be based
3209 on it. */
3210 if (use != NULL && use->type == USE_ADDRESS)
3211 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3212 }
3213
3214 /* Adds candidates based on the uses. */
3215
3216 static void
3217 add_iv_candidate_for_groups (struct ivopts_data *data)
3218 {
3219 unsigned i;
3220
3221 /* Only add candidate for the first use in group. */
3222 for (i = 0; i < data->vgroups.length (); i++)
3223 {
3224 struct iv_group *group = data->vgroups[i];
3225
3226 gcc_assert (group->vuses[0] != NULL);
3227 add_iv_candidate_for_use (data, group->vuses[0]);
3228 }
3229 add_iv_candidate_derived_from_uses (data);
3230 }
3231
3232 /* Record important candidates and add them to related_cands bitmaps. */
3233
3234 static void
3235 record_important_candidates (struct ivopts_data *data)
3236 {
3237 unsigned i;
3238 struct iv_group *group;
3239
3240 for (i = 0; i < data->vcands.length (); i++)
3241 {
3242 struct iv_cand *cand = data->vcands[i];
3243
3244 if (cand->important)
3245 bitmap_set_bit (data->important_candidates, i);
3246 }
3247
3248 data->consider_all_candidates = (data->vcands.length ()
3249 <= CONSIDER_ALL_CANDIDATES_BOUND);
3250
3251 /* Add important candidates to groups' related_cands bitmaps. */
3252 for (i = 0; i < data->vgroups.length (); i++)
3253 {
3254 group = data->vgroups[i];
3255 bitmap_ior_into (group->related_cands, data->important_candidates);
3256 }
3257 }
3258
3259 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3260 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3261 we allocate a simple list to every use. */
3262
3263 static void
3264 alloc_use_cost_map (struct ivopts_data *data)
3265 {
3266 unsigned i, size, s;
3267
3268 for (i = 0; i < data->vgroups.length (); i++)
3269 {
3270 struct iv_group *group = data->vgroups[i];
3271
3272 if (data->consider_all_candidates)
3273 size = data->vcands.length ();
3274 else
3275 {
3276 s = bitmap_count_bits (group->related_cands);
3277
3278 /* Round up to the power of two, so that moduling by it is fast. */
3279 size = s ? (1 << ceil_log2 (s)) : 1;
3280 }
3281
3282 group->n_map_members = size;
3283 group->cost_map = XCNEWVEC (struct cost_pair, size);
3284 }
3285 }
3286
3287 /* Returns description of computation cost of expression whose runtime
3288 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3289
3290 static comp_cost
3291 new_cost (unsigned runtime, unsigned complexity)
3292 {
3293 comp_cost cost;
3294
3295 cost.cost = runtime;
3296 cost.complexity = complexity;
3297
3298 return cost;
3299 }
3300
3301 /* Returns true if COST is infinite. */
3302
3303 static bool
3304 infinite_cost_p (comp_cost cost)
3305 {
3306 return cost.cost == INFTY;
3307 }
3308
3309 /* Adds costs COST1 and COST2. */
3310
3311 static comp_cost
3312 add_costs (comp_cost cost1, comp_cost cost2)
3313 {
3314 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3315 return infinite_cost;
3316
3317 cost1.cost += cost2.cost;
3318 cost1.complexity += cost2.complexity;
3319
3320 return cost1;
3321 }
3322 /* Subtracts costs COST1 and COST2. */
3323
3324 static comp_cost
3325 sub_costs (comp_cost cost1, comp_cost cost2)
3326 {
3327 cost1.cost -= cost2.cost;
3328 cost1.complexity -= cost2.complexity;
3329
3330 return cost1;
3331 }
3332
3333 /* Returns a negative number if COST1 < COST2, a positive number if
3334 COST1 > COST2, and 0 if COST1 = COST2. */
3335
3336 static int
3337 compare_costs (comp_cost cost1, comp_cost cost2)
3338 {
3339 if (cost1.cost == cost2.cost)
3340 return cost1.complexity - cost2.complexity;
3341
3342 return cost1.cost - cost2.cost;
3343 }
3344
3345 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3346 on invariants DEPENDS_ON and that the value used in expressing it
3347 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3348
3349 static void
3350 set_group_iv_cost (struct ivopts_data *data,
3351 struct iv_group *group, struct iv_cand *cand,
3352 comp_cost cost, bitmap depends_on, tree value,
3353 enum tree_code comp, iv_inv_expr_ent *inv_expr)
3354 {
3355 unsigned i, s;
3356
3357 if (infinite_cost_p (cost))
3358 {
3359 BITMAP_FREE (depends_on);
3360 return;
3361 }
3362
3363 if (data->consider_all_candidates)
3364 {
3365 group->cost_map[cand->id].cand = cand;
3366 group->cost_map[cand->id].cost = cost;
3367 group->cost_map[cand->id].depends_on = depends_on;
3368 group->cost_map[cand->id].value = value;
3369 group->cost_map[cand->id].comp = comp;
3370 group->cost_map[cand->id].inv_expr = inv_expr;
3371 return;
3372 }
3373
3374 /* n_map_members is a power of two, so this computes modulo. */
3375 s = cand->id & (group->n_map_members - 1);
3376 for (i = s; i < group->n_map_members; i++)
3377 if (!group->cost_map[i].cand)
3378 goto found;
3379 for (i = 0; i < s; i++)
3380 if (!group->cost_map[i].cand)
3381 goto found;
3382
3383 gcc_unreachable ();
3384
3385 found:
3386 group->cost_map[i].cand = cand;
3387 group->cost_map[i].cost = cost;
3388 group->cost_map[i].depends_on = depends_on;
3389 group->cost_map[i].value = value;
3390 group->cost_map[i].comp = comp;
3391 group->cost_map[i].inv_expr = inv_expr;
3392 }
3393
3394 /* Gets cost of (GROUP, CAND) pair. */
3395
3396 static struct cost_pair *
3397 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3398 struct iv_cand *cand)
3399 {
3400 unsigned i, s;
3401 struct cost_pair *ret;
3402
3403 if (!cand)
3404 return NULL;
3405
3406 if (data->consider_all_candidates)
3407 {
3408 ret = group->cost_map + cand->id;
3409 if (!ret->cand)
3410 return NULL;
3411
3412 return ret;
3413 }
3414
3415 /* n_map_members is a power of two, so this computes modulo. */
3416 s = cand->id & (group->n_map_members - 1);
3417 for (i = s; i < group->n_map_members; i++)
3418 if (group->cost_map[i].cand == cand)
3419 return group->cost_map + i;
3420 else if (group->cost_map[i].cand == NULL)
3421 return NULL;
3422 for (i = 0; i < s; i++)
3423 if (group->cost_map[i].cand == cand)
3424 return group->cost_map + i;
3425 else if (group->cost_map[i].cand == NULL)
3426 return NULL;
3427
3428 return NULL;
3429 }
3430
3431 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3432 static rtx
3433 produce_memory_decl_rtl (tree obj, int *regno)
3434 {
3435 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3436 machine_mode address_mode = targetm.addr_space.address_mode (as);
3437 rtx x;
3438
3439 gcc_assert (obj);
3440 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3441 {
3442 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3443 x = gen_rtx_SYMBOL_REF (address_mode, name);
3444 SET_SYMBOL_REF_DECL (x, obj);
3445 x = gen_rtx_MEM (DECL_MODE (obj), x);
3446 set_mem_addr_space (x, as);
3447 targetm.encode_section_info (obj, x, true);
3448 }
3449 else
3450 {
3451 x = gen_raw_REG (address_mode, (*regno)++);
3452 x = gen_rtx_MEM (DECL_MODE (obj), x);
3453 set_mem_addr_space (x, as);
3454 }
3455
3456 return x;
3457 }
3458
3459 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3460 walk_tree. DATA contains the actual fake register number. */
3461
3462 static tree
3463 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3464 {
3465 tree obj = NULL_TREE;
3466 rtx x = NULL_RTX;
3467 int *regno = (int *) data;
3468
3469 switch (TREE_CODE (*expr_p))
3470 {
3471 case ADDR_EXPR:
3472 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3473 handled_component_p (*expr_p);
3474 expr_p = &TREE_OPERAND (*expr_p, 0))
3475 continue;
3476 obj = *expr_p;
3477 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3478 x = produce_memory_decl_rtl (obj, regno);
3479 break;
3480
3481 case SSA_NAME:
3482 *ws = 0;
3483 obj = SSA_NAME_VAR (*expr_p);
3484 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3485 if (!obj)
3486 return NULL_TREE;
3487 if (!DECL_RTL_SET_P (obj))
3488 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3489 break;
3490
3491 case VAR_DECL:
3492 case PARM_DECL:
3493 case RESULT_DECL:
3494 *ws = 0;
3495 obj = *expr_p;
3496
3497 if (DECL_RTL_SET_P (obj))
3498 break;
3499
3500 if (DECL_MODE (obj) == BLKmode)
3501 x = produce_memory_decl_rtl (obj, regno);
3502 else
3503 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3504
3505 break;
3506
3507 default:
3508 break;
3509 }
3510
3511 if (x)
3512 {
3513 decl_rtl_to_reset.safe_push (obj);
3514 SET_DECL_RTL (obj, x);
3515 }
3516
3517 return NULL_TREE;
3518 }
3519
3520 /* Determines cost of the computation of EXPR. */
3521
3522 static unsigned
3523 computation_cost (tree expr, bool speed)
3524 {
3525 rtx_insn *seq;
3526 rtx rslt;
3527 tree type = TREE_TYPE (expr);
3528 unsigned cost;
3529 /* Avoid using hard regs in ways which may be unsupported. */
3530 int regno = LAST_VIRTUAL_REGISTER + 1;
3531 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3532 enum node_frequency real_frequency = node->frequency;
3533
3534 node->frequency = NODE_FREQUENCY_NORMAL;
3535 crtl->maybe_hot_insn_p = speed;
3536 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3537 start_sequence ();
3538 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3539 seq = get_insns ();
3540 end_sequence ();
3541 default_rtl_profile ();
3542 node->frequency = real_frequency;
3543
3544 cost = seq_cost (seq, speed);
3545 if (MEM_P (rslt))
3546 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3547 TYPE_ADDR_SPACE (type), speed);
3548 else if (!REG_P (rslt))
3549 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3550
3551 return cost;
3552 }
3553
3554 /* Returns variable containing the value of candidate CAND at statement AT. */
3555
3556 static tree
3557 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3558 {
3559 if (stmt_after_increment (loop, cand, stmt))
3560 return cand->var_after;
3561 else
3562 return cand->var_before;
3563 }
3564
3565 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3566 same precision that is at least as wide as the precision of TYPE, stores
3567 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3568 type of A and B. */
3569
3570 static tree
3571 determine_common_wider_type (tree *a, tree *b)
3572 {
3573 tree wider_type = NULL;
3574 tree suba, subb;
3575 tree atype = TREE_TYPE (*a);
3576
3577 if (CONVERT_EXPR_P (*a))
3578 {
3579 suba = TREE_OPERAND (*a, 0);
3580 wider_type = TREE_TYPE (suba);
3581 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3582 return atype;
3583 }
3584 else
3585 return atype;
3586
3587 if (CONVERT_EXPR_P (*b))
3588 {
3589 subb = TREE_OPERAND (*b, 0);
3590 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3591 return atype;
3592 }
3593 else
3594 return atype;
3595
3596 *a = suba;
3597 *b = subb;
3598 return wider_type;
3599 }
3600
3601 /* Determines the expression by that USE is expressed from induction variable
3602 CAND at statement AT in LOOP. The expression is stored in a decomposed
3603 form into AFF. Returns false if USE cannot be expressed using CAND. */
3604
3605 static bool
3606 get_computation_aff (struct loop *loop,
3607 struct iv_use *use, struct iv_cand *cand, gimple *at,
3608 struct aff_tree *aff)
3609 {
3610 tree ubase = use->iv->base;
3611 tree ustep = use->iv->step;
3612 tree cbase = cand->iv->base;
3613 tree cstep = cand->iv->step, cstep_common;
3614 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3615 tree common_type, var;
3616 tree uutype;
3617 aff_tree cbase_aff, var_aff;
3618 widest_int rat;
3619
3620 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3621 {
3622 /* We do not have a precision to express the values of use. */
3623 return false;
3624 }
3625
3626 var = var_at_stmt (loop, cand, at);
3627 uutype = unsigned_type_for (utype);
3628
3629 /* If the conversion is not noop, perform it. */
3630 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3631 {
3632 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3633 && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3634 {
3635 tree inner_base, inner_step, inner_type;
3636 inner_base = TREE_OPERAND (cbase, 0);
3637 if (CONVERT_EXPR_P (cstep))
3638 inner_step = TREE_OPERAND (cstep, 0);
3639 else
3640 inner_step = cstep;
3641
3642 inner_type = TREE_TYPE (inner_base);
3643 /* If candidate is added from a biv whose type is smaller than
3644 ctype, we know both candidate and the biv won't overflow.
3645 In this case, it's safe to skip the convertion in candidate.
3646 As an example, (unsigned short)((unsigned long)A) equals to
3647 (unsigned short)A, if A has a type no larger than short. */
3648 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3649 {
3650 cbase = inner_base;
3651 cstep = inner_step;
3652 }
3653 }
3654 cstep = fold_convert (uutype, cstep);
3655 cbase = fold_convert (uutype, cbase);
3656 var = fold_convert (uutype, var);
3657 }
3658
3659 /* Ratio is 1 when computing the value of biv cand by itself.
3660 We can't rely on constant_multiple_of in this case because the
3661 use is created after the original biv is selected. The call
3662 could fail because of inconsistent fold behavior. See PR68021
3663 for more information. */
3664 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3665 {
3666 gcc_assert (is_gimple_assign (use->stmt));
3667 gcc_assert (use->iv->ssa_name == cand->var_after);
3668 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3669 rat = 1;
3670 }
3671 else if (!constant_multiple_of (ustep, cstep, &rat))
3672 return false;
3673
3674 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3675 type, we achieve better folding by computing their difference in this
3676 wider type, and cast the result to UUTYPE. We do not need to worry about
3677 overflows, as all the arithmetics will in the end be performed in UUTYPE
3678 anyway. */
3679 common_type = determine_common_wider_type (&ubase, &cbase);
3680
3681 /* use = ubase - ratio * cbase + ratio * var. */
3682 tree_to_aff_combination (ubase, common_type, aff);
3683 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3684 tree_to_aff_combination (var, uutype, &var_aff);
3685
3686 /* We need to shift the value if we are after the increment. */
3687 if (stmt_after_increment (loop, cand, at))
3688 {
3689 aff_tree cstep_aff;
3690
3691 if (common_type != uutype)
3692 cstep_common = fold_convert (common_type, cstep);
3693 else
3694 cstep_common = cstep;
3695
3696 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3697 aff_combination_add (&cbase_aff, &cstep_aff);
3698 }
3699
3700 aff_combination_scale (&cbase_aff, -rat);
3701 aff_combination_add (aff, &cbase_aff);
3702 if (common_type != uutype)
3703 aff_combination_convert (aff, uutype);
3704
3705 aff_combination_scale (&var_aff, rat);
3706 aff_combination_add (aff, &var_aff);
3707
3708 return true;
3709 }
3710
3711 /* Return the type of USE. */
3712
3713 static tree
3714 get_use_type (struct iv_use *use)
3715 {
3716 tree base_type = TREE_TYPE (use->iv->base);
3717 tree type;
3718
3719 if (use->type == USE_ADDRESS)
3720 {
3721 /* The base_type may be a void pointer. Create a pointer type based on
3722 the mem_ref instead. */
3723 type = build_pointer_type (TREE_TYPE (*use->op_p));
3724 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3725 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3726 }
3727 else
3728 type = base_type;
3729
3730 return type;
3731 }
3732
3733 /* Determines the expression by that USE is expressed from induction variable
3734 CAND at statement AT in LOOP. The computation is unshared. */
3735
3736 static tree
3737 get_computation_at (struct loop *loop,
3738 struct iv_use *use, struct iv_cand *cand, gimple *at)
3739 {
3740 aff_tree aff;
3741 tree type = get_use_type (use);
3742
3743 if (!get_computation_aff (loop, use, cand, at, &aff))
3744 return NULL_TREE;
3745 unshare_aff_combination (&aff);
3746 return fold_convert (type, aff_combination_to_tree (&aff));
3747 }
3748
3749 /* Determines the expression by that USE is expressed from induction variable
3750 CAND in LOOP. The computation is unshared. */
3751
3752 static tree
3753 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3754 {
3755 return get_computation_at (loop, use, cand, use->stmt);
3756 }
3757
3758 /* Adjust the cost COST for being in loop setup rather than loop body.
3759 If we're optimizing for space, the loop setup overhead is constant;
3760 if we're optimizing for speed, amortize it over the per-iteration cost. */
3761 static unsigned
3762 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3763 {
3764 if (cost == INFTY)
3765 return cost;
3766 else if (optimize_loop_for_speed_p (data->current_loop))
3767 return cost / avg_loop_niter (data->current_loop);
3768 else
3769 return cost;
3770 }
3771
3772 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3773 validity for a memory reference accessing memory of mode MODE in
3774 address space AS. */
3775
3776
3777 bool
3778 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3779 addr_space_t as)
3780 {
3781 #define MAX_RATIO 128
3782 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3783 static vec<sbitmap> valid_mult_list;
3784 sbitmap valid_mult;
3785
3786 if (data_index >= valid_mult_list.length ())
3787 valid_mult_list.safe_grow_cleared (data_index + 1);
3788
3789 valid_mult = valid_mult_list[data_index];
3790 if (!valid_mult)
3791 {
3792 machine_mode address_mode = targetm.addr_space.address_mode (as);
3793 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3794 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3795 rtx addr, scaled;
3796 HOST_WIDE_INT i;
3797
3798 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3799 bitmap_clear (valid_mult);
3800 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3801 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3802 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3803 {
3804 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3805 if (memory_address_addr_space_p (mode, addr, as)
3806 || memory_address_addr_space_p (mode, scaled, as))
3807 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3808 }
3809
3810 if (dump_file && (dump_flags & TDF_DETAILS))
3811 {
3812 fprintf (dump_file, " allowed multipliers:");
3813 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3814 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3815 fprintf (dump_file, " %d", (int) i);
3816 fprintf (dump_file, "\n");
3817 fprintf (dump_file, "\n");
3818 }
3819
3820 valid_mult_list[data_index] = valid_mult;
3821 }
3822
3823 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3824 return false;
3825
3826 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3827 }
3828
3829 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3830 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3831 variable is omitted. Compute the cost for a memory reference that accesses
3832 a memory location of mode MEM_MODE in address space AS.
3833
3834 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3835 size of MEM_MODE / RATIO) is available. To make this determination, we
3836 look at the size of the increment to be made, which is given in CSTEP.
3837 CSTEP may be zero if the step is unknown.
3838 STMT_AFTER_INC is true iff the statement we're looking at is after the
3839 increment of the original biv.
3840
3841 TODO -- there must be some better way. This all is quite crude. */
3842
3843 enum ainc_type
3844 {
3845 AINC_PRE_INC, /* Pre increment. */
3846 AINC_PRE_DEC, /* Pre decrement. */
3847 AINC_POST_INC, /* Post increment. */
3848 AINC_POST_DEC, /* Post decrement. */
3849 AINC_NONE /* Also the number of auto increment types. */
3850 };
3851
3852 struct address_cost_data
3853 {
3854 HOST_WIDE_INT min_offset, max_offset;
3855 unsigned costs[2][2][2][2];
3856 unsigned ainc_costs[AINC_NONE];
3857 };
3858
3859
3860 static comp_cost
3861 get_address_cost (bool symbol_present, bool var_present,
3862 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3863 HOST_WIDE_INT cstep, machine_mode mem_mode,
3864 addr_space_t as, bool speed,
3865 bool stmt_after_inc, bool *may_autoinc)
3866 {
3867 machine_mode address_mode = targetm.addr_space.address_mode (as);
3868 static vec<address_cost_data *> address_cost_data_list;
3869 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3870 address_cost_data *data;
3871 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3872 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3873 unsigned cost, acost, complexity;
3874 enum ainc_type autoinc_type;
3875 bool offset_p, ratio_p, autoinc;
3876 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3877 unsigned HOST_WIDE_INT mask;
3878 unsigned bits;
3879
3880 if (data_index >= address_cost_data_list.length ())
3881 address_cost_data_list.safe_grow_cleared (data_index + 1);
3882
3883 data = address_cost_data_list[data_index];
3884 if (!data)
3885 {
3886 HOST_WIDE_INT i;
3887 HOST_WIDE_INT rat, off = 0;
3888 int old_cse_not_expected, width;
3889 unsigned sym_p, var_p, off_p, rat_p, add_c;
3890 rtx_insn *seq;
3891 rtx addr, base;
3892 rtx reg0, reg1;
3893
3894 data = (address_cost_data *) xcalloc (1, sizeof (*data));
3895
3896 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3897
3898 width = GET_MODE_BITSIZE (address_mode) - 1;
3899 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3900 width = HOST_BITS_PER_WIDE_INT - 1;
3901 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3902
3903 for (i = width; i >= 0; i--)
3904 {
3905 off = -((unsigned HOST_WIDE_INT) 1 << i);
3906 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3907 if (memory_address_addr_space_p (mem_mode, addr, as))
3908 break;
3909 }
3910 data->min_offset = (i == -1? 0 : off);
3911
3912 for (i = width; i >= 0; i--)
3913 {
3914 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3915 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3916 if (memory_address_addr_space_p (mem_mode, addr, as))
3917 break;
3918 /* For some strict-alignment targets, the offset must be naturally
3919 aligned. Try an aligned offset if mem_mode is not QImode. */
3920 off = mem_mode != QImode
3921 ? ((unsigned HOST_WIDE_INT) 1 << i)
3922 - GET_MODE_SIZE (mem_mode)
3923 : 0;
3924 if (off > 0)
3925 {
3926 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3927 if (memory_address_addr_space_p (mem_mode, addr, as))
3928 break;
3929 }
3930 }
3931 if (i == -1)
3932 off = 0;
3933 data->max_offset = off;
3934
3935 if (dump_file && (dump_flags & TDF_DETAILS))
3936 {
3937 fprintf (dump_file, "get_address_cost:\n");
3938 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3939 GET_MODE_NAME (mem_mode),
3940 data->min_offset);
3941 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3942 GET_MODE_NAME (mem_mode),
3943 data->max_offset);
3944 }
3945
3946 rat = 1;
3947 for (i = 2; i <= MAX_RATIO; i++)
3948 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3949 {
3950 rat = i;
3951 break;
3952 }
3953
3954 /* Compute the cost of various addressing modes. */
3955 acost = 0;
3956 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3957 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3958
3959 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3960 || USE_STORE_PRE_DECREMENT (mem_mode))
3961 {
3962 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3963 has_predec[mem_mode]
3964 = memory_address_addr_space_p (mem_mode, addr, as);
3965
3966 if (has_predec[mem_mode])
3967 data->ainc_costs[AINC_PRE_DEC]
3968 = address_cost (addr, mem_mode, as, speed);
3969 }
3970 if (USE_LOAD_POST_DECREMENT (mem_mode)
3971 || USE_STORE_POST_DECREMENT (mem_mode))
3972 {
3973 addr = gen_rtx_POST_DEC (address_mode, reg0);
3974 has_postdec[mem_mode]
3975 = memory_address_addr_space_p (mem_mode, addr, as);
3976
3977 if (has_postdec[mem_mode])
3978 data->ainc_costs[AINC_POST_DEC]
3979 = address_cost (addr, mem_mode, as, speed);
3980 }
3981 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3982 || USE_STORE_PRE_DECREMENT (mem_mode))
3983 {
3984 addr = gen_rtx_PRE_INC (address_mode, reg0);
3985 has_preinc[mem_mode]
3986 = memory_address_addr_space_p (mem_mode, addr, as);
3987
3988 if (has_preinc[mem_mode])
3989 data->ainc_costs[AINC_PRE_INC]
3990 = address_cost (addr, mem_mode, as, speed);
3991 }
3992 if (USE_LOAD_POST_INCREMENT (mem_mode)
3993 || USE_STORE_POST_INCREMENT (mem_mode))
3994 {
3995 addr = gen_rtx_POST_INC (address_mode, reg0);
3996 has_postinc[mem_mode]
3997 = memory_address_addr_space_p (mem_mode, addr, as);
3998
3999 if (has_postinc[mem_mode])
4000 data->ainc_costs[AINC_POST_INC]
4001 = address_cost (addr, mem_mode, as, speed);
4002 }
4003 for (i = 0; i < 16; i++)
4004 {
4005 sym_p = i & 1;
4006 var_p = (i >> 1) & 1;
4007 off_p = (i >> 2) & 1;
4008 rat_p = (i >> 3) & 1;
4009
4010 addr = reg0;
4011 if (rat_p)
4012 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4013 gen_int_mode (rat, address_mode));
4014
4015 if (var_p)
4016 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4017
4018 if (sym_p)
4019 {
4020 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4021 /* ??? We can run into trouble with some backends by presenting
4022 it with symbols which haven't been properly passed through
4023 targetm.encode_section_info. By setting the local bit, we
4024 enhance the probability of things working. */
4025 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4026
4027 if (off_p)
4028 base = gen_rtx_fmt_e (CONST, address_mode,
4029 gen_rtx_fmt_ee
4030 (PLUS, address_mode, base,
4031 gen_int_mode (off, address_mode)));
4032 }
4033 else if (off_p)
4034 base = gen_int_mode (off, address_mode);
4035 else
4036 base = NULL_RTX;
4037
4038 if (base)
4039 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4040
4041 start_sequence ();
4042 /* To avoid splitting addressing modes, pretend that no cse will
4043 follow. */
4044 old_cse_not_expected = cse_not_expected;
4045 cse_not_expected = true;
4046 addr = memory_address_addr_space (mem_mode, addr, as);
4047 cse_not_expected = old_cse_not_expected;
4048 seq = get_insns ();
4049 end_sequence ();
4050
4051 acost = seq_cost (seq, speed);
4052 acost += address_cost (addr, mem_mode, as, speed);
4053
4054 if (!acost)
4055 acost = 1;
4056 data->costs[sym_p][var_p][off_p][rat_p] = acost;
4057 }
4058
4059 /* On some targets, it is quite expensive to load symbol to a register,
4060 which makes addresses that contain symbols look much more expensive.
4061 However, the symbol will have to be loaded in any case before the
4062 loop (and quite likely we have it in register already), so it does not
4063 make much sense to penalize them too heavily. So make some final
4064 tweaks for the SYMBOL_PRESENT modes:
4065
4066 If VAR_PRESENT is false, and the mode obtained by changing symbol to
4067 var is cheaper, use this mode with small penalty.
4068 If VAR_PRESENT is true, try whether the mode with
4069 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4070 if this is the case, use it. */
4071 add_c = add_cost (speed, address_mode);
4072 for (i = 0; i < 8; i++)
4073 {
4074 var_p = i & 1;
4075 off_p = (i >> 1) & 1;
4076 rat_p = (i >> 2) & 1;
4077
4078 acost = data->costs[0][1][off_p][rat_p] + 1;
4079 if (var_p)
4080 acost += add_c;
4081
4082 if (acost < data->costs[1][var_p][off_p][rat_p])
4083 data->costs[1][var_p][off_p][rat_p] = acost;
4084 }
4085
4086 if (dump_file && (dump_flags & TDF_DETAILS))
4087 {
4088 fprintf (dump_file, "<Address Costs>:\n");
4089
4090 for (i = 0; i < 16; i++)
4091 {
4092 sym_p = i & 1;
4093 var_p = (i >> 1) & 1;
4094 off_p = (i >> 2) & 1;
4095 rat_p = (i >> 3) & 1;
4096
4097 fprintf (dump_file, " ");
4098 if (sym_p)
4099 fprintf (dump_file, "sym + ");
4100 if (var_p)
4101 fprintf (dump_file, "var + ");
4102 if (off_p)
4103 fprintf (dump_file, "cst + ");
4104 if (rat_p)
4105 fprintf (dump_file, "rat * ");
4106
4107 acost = data->costs[sym_p][var_p][off_p][rat_p];
4108 fprintf (dump_file, "index costs %d\n", acost);
4109 }
4110 if (has_predec[mem_mode] || has_postdec[mem_mode]
4111 || has_preinc[mem_mode] || has_postinc[mem_mode])
4112 fprintf (dump_file, " May include autoinc/dec\n");
4113 fprintf (dump_file, "\n");
4114 }
4115
4116 address_cost_data_list[data_index] = data;
4117 }
4118
4119 bits = GET_MODE_BITSIZE (address_mode);
4120 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4121 offset &= mask;
4122 if ((offset >> (bits - 1) & 1))
4123 offset |= ~mask;
4124 s_offset = offset;
4125
4126 autoinc = false;
4127 autoinc_type = AINC_NONE;
4128 msize = GET_MODE_SIZE (mem_mode);
4129 autoinc_offset = offset;
4130 if (stmt_after_inc)
4131 autoinc_offset += ratio * cstep;
4132 if (symbol_present || var_present || ratio != 1)
4133 autoinc = false;
4134 else
4135 {
4136 if (has_postinc[mem_mode] && autoinc_offset == 0
4137 && msize == cstep)
4138 autoinc_type = AINC_POST_INC;
4139 else if (has_postdec[mem_mode] && autoinc_offset == 0
4140 && msize == -cstep)
4141 autoinc_type = AINC_POST_DEC;
4142 else if (has_preinc[mem_mode] && autoinc_offset == msize
4143 && msize == cstep)
4144 autoinc_type = AINC_PRE_INC;
4145 else if (has_predec[mem_mode] && autoinc_offset == -msize
4146 && msize == -cstep)
4147 autoinc_type = AINC_PRE_DEC;
4148
4149 if (autoinc_type != AINC_NONE)
4150 autoinc = true;
4151 }
4152
4153 cost = 0;
4154 offset_p = (s_offset != 0
4155 && data->min_offset <= s_offset
4156 && s_offset <= data->max_offset);
4157 ratio_p = (ratio != 1
4158 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4159
4160 if (ratio != 1 && !ratio_p)
4161 cost += mult_by_coeff_cost (ratio, address_mode, speed);
4162
4163 if (s_offset && !offset_p && !symbol_present)
4164 cost += add_cost (speed, address_mode);
4165
4166 if (may_autoinc)
4167 *may_autoinc = autoinc;
4168 if (autoinc)
4169 acost = data->ainc_costs[autoinc_type];
4170 else
4171 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4172 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4173 return new_cost (cost + acost, complexity);
4174 }
4175
4176 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4177 EXPR operand holding the shift. COST0 and COST1 are the costs for
4178 calculating the operands of EXPR. Returns true if successful, and returns
4179 the cost in COST. */
4180
4181 static bool
4182 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4183 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4184 {
4185 comp_cost res;
4186 tree op1 = TREE_OPERAND (expr, 1);
4187 tree cst = TREE_OPERAND (mult, 1);
4188 tree multop = TREE_OPERAND (mult, 0);
4189 int m = exact_log2 (int_cst_value (cst));
4190 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4191 int as_cost, sa_cost;
4192 bool mult_in_op1;
4193
4194 if (!(m >= 0 && m < maxm))
4195 return false;
4196
4197 STRIP_NOPS (op1);
4198 mult_in_op1 = operand_equal_p (op1, mult, 0);
4199
4200 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4201
4202 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4203 use that in preference to a shift insn followed by an add insn. */
4204 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4205 ? shiftadd_cost (speed, mode, m)
4206 : (mult_in_op1
4207 ? shiftsub1_cost (speed, mode, m)
4208 : shiftsub0_cost (speed, mode, m)));
4209
4210 res = new_cost (MIN (as_cost, sa_cost), 0);
4211 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4212
4213 STRIP_NOPS (multop);
4214 if (!is_gimple_val (multop))
4215 res = add_costs (res, force_expr_to_var_cost (multop, speed));
4216
4217 *cost = res;
4218 return true;
4219 }
4220
4221 /* Estimates cost of forcing expression EXPR into a variable. */
4222
4223 static comp_cost
4224 force_expr_to_var_cost (tree expr, bool speed)
4225 {
4226 static bool costs_initialized = false;
4227 static unsigned integer_cost [2];
4228 static unsigned symbol_cost [2];
4229 static unsigned address_cost [2];
4230 tree op0, op1;
4231 comp_cost cost0, cost1, cost;
4232 machine_mode mode;
4233
4234 if (!costs_initialized)
4235 {
4236 tree type = build_pointer_type (integer_type_node);
4237 tree var, addr;
4238 rtx x;
4239 int i;
4240
4241 var = create_tmp_var_raw (integer_type_node, "test_var");
4242 TREE_STATIC (var) = 1;
4243 x = produce_memory_decl_rtl (var, NULL);
4244 SET_DECL_RTL (var, x);
4245
4246 addr = build1 (ADDR_EXPR, type, var);
4247
4248
4249 for (i = 0; i < 2; i++)
4250 {
4251 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4252 2000), i);
4253
4254 symbol_cost[i] = computation_cost (addr, i) + 1;
4255
4256 address_cost[i]
4257 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4258 if (dump_file && (dump_flags & TDF_DETAILS))
4259 {
4260 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4261 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4262 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4263 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4264 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4265 fprintf (dump_file, "\n");
4266 }
4267 }
4268
4269 costs_initialized = true;
4270 }
4271
4272 STRIP_NOPS (expr);
4273
4274 if (SSA_VAR_P (expr))
4275 return no_cost;
4276
4277 if (is_gimple_min_invariant (expr))
4278 {
4279 if (TREE_CODE (expr) == INTEGER_CST)
4280 return new_cost (integer_cost [speed], 0);
4281
4282 if (TREE_CODE (expr) == ADDR_EXPR)
4283 {
4284 tree obj = TREE_OPERAND (expr, 0);
4285
4286 if (TREE_CODE (obj) == VAR_DECL
4287 || TREE_CODE (obj) == PARM_DECL
4288 || TREE_CODE (obj) == RESULT_DECL)
4289 return new_cost (symbol_cost [speed], 0);
4290 }
4291
4292 return new_cost (address_cost [speed], 0);
4293 }
4294
4295 switch (TREE_CODE (expr))
4296 {
4297 case POINTER_PLUS_EXPR:
4298 case PLUS_EXPR:
4299 case MINUS_EXPR:
4300 case MULT_EXPR:
4301 op0 = TREE_OPERAND (expr, 0);
4302 op1 = TREE_OPERAND (expr, 1);
4303 STRIP_NOPS (op0);
4304 STRIP_NOPS (op1);
4305 break;
4306
4307 CASE_CONVERT:
4308 case NEGATE_EXPR:
4309 op0 = TREE_OPERAND (expr, 0);
4310 STRIP_NOPS (op0);
4311 op1 = NULL_TREE;
4312 break;
4313
4314 default:
4315 /* Just an arbitrary value, FIXME. */
4316 return new_cost (target_spill_cost[speed], 0);
4317 }
4318
4319 if (op0 == NULL_TREE
4320 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4321 cost0 = no_cost;
4322 else
4323 cost0 = force_expr_to_var_cost (op0, speed);
4324
4325 if (op1 == NULL_TREE
4326 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4327 cost1 = no_cost;
4328 else
4329 cost1 = force_expr_to_var_cost (op1, speed);
4330
4331 mode = TYPE_MODE (TREE_TYPE (expr));
4332 switch (TREE_CODE (expr))
4333 {
4334 case POINTER_PLUS_EXPR:
4335 case PLUS_EXPR:
4336 case MINUS_EXPR:
4337 case NEGATE_EXPR:
4338 cost = new_cost (add_cost (speed, mode), 0);
4339 if (TREE_CODE (expr) != NEGATE_EXPR)
4340 {
4341 tree mult = NULL_TREE;
4342 comp_cost sa_cost;
4343 if (TREE_CODE (op1) == MULT_EXPR)
4344 mult = op1;
4345 else if (TREE_CODE (op0) == MULT_EXPR)
4346 mult = op0;
4347
4348 if (mult != NULL_TREE
4349 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4350 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4351 speed, &sa_cost))
4352 return sa_cost;
4353 }
4354 break;
4355
4356 CASE_CONVERT:
4357 {
4358 tree inner_mode, outer_mode;
4359 outer_mode = TREE_TYPE (expr);
4360 inner_mode = TREE_TYPE (op0);
4361 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4362 TYPE_MODE (inner_mode), speed), 0);
4363 }
4364 break;
4365
4366 case MULT_EXPR:
4367 if (cst_and_fits_in_hwi (op0))
4368 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4369 mode, speed), 0);
4370 else if (cst_and_fits_in_hwi (op1))
4371 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4372 mode, speed), 0);
4373 else
4374 return new_cost (target_spill_cost [speed], 0);
4375 break;
4376
4377 default:
4378 gcc_unreachable ();
4379 }
4380
4381 cost = add_costs (cost, cost0);
4382 cost = add_costs (cost, cost1);
4383
4384 /* Bound the cost by target_spill_cost. The parts of complicated
4385 computations often are either loop invariant or at least can
4386 be shared between several iv uses, so letting this grow without
4387 limits would not give reasonable results. */
4388 if (cost.cost > (int) target_spill_cost [speed])
4389 cost.cost = target_spill_cost [speed];
4390
4391 return cost;
4392 }
4393
4394 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4395 invariants the computation depends on. */
4396
4397 static comp_cost
4398 force_var_cost (struct ivopts_data *data,
4399 tree expr, bitmap *depends_on)
4400 {
4401 if (depends_on)
4402 {
4403 fd_ivopts_data = data;
4404 walk_tree (&expr, find_depends, depends_on, NULL);
4405 }
4406
4407 return force_expr_to_var_cost (expr, data->speed);
4408 }
4409
4410 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4411 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4412 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4413 invariants the computation depends on. */
4414
4415 static comp_cost
4416 split_address_cost (struct ivopts_data *data,
4417 tree addr, bool *symbol_present, bool *var_present,
4418 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4419 {
4420 tree core;
4421 HOST_WIDE_INT bitsize;
4422 HOST_WIDE_INT bitpos;
4423 tree toffset;
4424 machine_mode mode;
4425 int unsignedp, reversep, volatilep;
4426
4427 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4428 &unsignedp, &reversep, &volatilep, false);
4429
4430 if (toffset != 0
4431 || bitpos % BITS_PER_UNIT != 0
4432 || reversep
4433 || TREE_CODE (core) != VAR_DECL)
4434 {
4435 *symbol_present = false;
4436 *var_present = true;
4437 fd_ivopts_data = data;
4438 if (depends_on)
4439 walk_tree (&addr, find_depends, depends_on, NULL);
4440
4441 return new_cost (target_spill_cost[data->speed], 0);
4442 }
4443
4444 *offset += bitpos / BITS_PER_UNIT;
4445 if (TREE_STATIC (core)
4446 || DECL_EXTERNAL (core))
4447 {
4448 *symbol_present = true;
4449 *var_present = false;
4450 return no_cost;
4451 }
4452
4453 *symbol_present = false;
4454 *var_present = true;
4455 return no_cost;
4456 }
4457
4458 /* Estimates cost of expressing difference of addresses E1 - E2 as
4459 var + symbol + offset. The value of offset is added to OFFSET,
4460 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4461 part is missing. DEPENDS_ON is a set of the invariants the computation
4462 depends on. */
4463
4464 static comp_cost
4465 ptr_difference_cost (struct ivopts_data *data,
4466 tree e1, tree e2, bool *symbol_present, bool *var_present,
4467 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4468 {
4469 HOST_WIDE_INT diff = 0;
4470 aff_tree aff_e1, aff_e2;
4471 tree type;
4472
4473 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4474
4475 if (ptr_difference_const (e1, e2, &diff))
4476 {
4477 *offset += diff;
4478 *symbol_present = false;
4479 *var_present = false;
4480 return no_cost;
4481 }
4482
4483 if (integer_zerop (e2))
4484 return split_address_cost (data, TREE_OPERAND (e1, 0),
4485 symbol_present, var_present, offset, depends_on);
4486
4487 *symbol_present = false;
4488 *var_present = true;
4489
4490 type = signed_type_for (TREE_TYPE (e1));
4491 tree_to_aff_combination (e1, type, &aff_e1);
4492 tree_to_aff_combination (e2, type, &aff_e2);
4493 aff_combination_scale (&aff_e2, -1);
4494 aff_combination_add (&aff_e1, &aff_e2);
4495
4496 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4497 }
4498
4499 /* Estimates cost of expressing difference E1 - E2 as
4500 var + symbol + offset. The value of offset is added to OFFSET,
4501 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4502 part is missing. DEPENDS_ON is a set of the invariants the computation
4503 depends on. */
4504
4505 static comp_cost
4506 difference_cost (struct ivopts_data *data,
4507 tree e1, tree e2, bool *symbol_present, bool *var_present,
4508 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4509 {
4510 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4511 unsigned HOST_WIDE_INT off1, off2;
4512 aff_tree aff_e1, aff_e2;
4513 tree type;
4514
4515 e1 = strip_offset (e1, &off1);
4516 e2 = strip_offset (e2, &off2);
4517 *offset += off1 - off2;
4518
4519 STRIP_NOPS (e1);
4520 STRIP_NOPS (e2);
4521
4522 if (TREE_CODE (e1) == ADDR_EXPR)
4523 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4524 offset, depends_on);
4525 *symbol_present = false;
4526
4527 if (operand_equal_p (e1, e2, 0))
4528 {
4529 *var_present = false;
4530 return no_cost;
4531 }
4532
4533 *var_present = true;
4534
4535 if (integer_zerop (e2))
4536 return force_var_cost (data, e1, depends_on);
4537
4538 if (integer_zerop (e1))
4539 {
4540 comp_cost cost = force_var_cost (data, e2, depends_on);
4541 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4542 return cost;
4543 }
4544
4545 type = signed_type_for (TREE_TYPE (e1));
4546 tree_to_aff_combination (e1, type, &aff_e1);
4547 tree_to_aff_combination (e2, type, &aff_e2);
4548 aff_combination_scale (&aff_e2, -1);
4549 aff_combination_add (&aff_e1, &aff_e2);
4550
4551 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4552 }
4553
4554 /* Returns true if AFF1 and AFF2 are identical. */
4555
4556 static bool
4557 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4558 {
4559 unsigned i;
4560
4561 if (aff1->n != aff2->n)
4562 return false;
4563
4564 for (i = 0; i < aff1->n; i++)
4565 {
4566 if (aff1->elts[i].coef != aff2->elts[i].coef)
4567 return false;
4568
4569 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4570 return false;
4571 }
4572 return true;
4573 }
4574
4575 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
4576
4577 static iv_inv_expr_ent *
4578 record_inv_expr (struct ivopts_data *data, tree expr)
4579 {
4580 struct iv_inv_expr_ent ent;
4581 struct iv_inv_expr_ent **slot;
4582
4583 ent.expr = expr;
4584 ent.hash = iterative_hash_expr (expr, 0);
4585 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4586
4587 if (!*slot)
4588 {
4589 *slot = XNEW (struct iv_inv_expr_ent);
4590 (*slot)->expr = expr;
4591 (*slot)->hash = ent.hash;
4592 (*slot)->id = data->max_inv_expr_id++;
4593 }
4594
4595 return *slot;
4596 }
4597
4598 /* Returns the invariant expression if expression UBASE - RATIO * CBASE
4599 requires a new compiler generated temporary. Returns -1 otherwise.
4600 ADDRESS_P is a flag indicating if the expression is for address
4601 computation. */
4602
4603 static iv_inv_expr_ent *
4604 get_loop_invariant_expr (struct ivopts_data *data, tree ubase,
4605 tree cbase, HOST_WIDE_INT ratio,
4606 bool address_p)
4607 {
4608 aff_tree ubase_aff, cbase_aff;
4609 tree expr, ub, cb;
4610
4611 STRIP_NOPS (ubase);
4612 STRIP_NOPS (cbase);
4613 ub = ubase;
4614 cb = cbase;
4615
4616 if ((TREE_CODE (ubase) == INTEGER_CST)
4617 && (TREE_CODE (cbase) == INTEGER_CST))
4618 return NULL;
4619
4620 /* Strips the constant part. */
4621 if (TREE_CODE (ubase) == PLUS_EXPR
4622 || TREE_CODE (ubase) == MINUS_EXPR
4623 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4624 {
4625 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4626 ubase = TREE_OPERAND (ubase, 0);
4627 }
4628
4629 /* Strips the constant part. */
4630 if (TREE_CODE (cbase) == PLUS_EXPR
4631 || TREE_CODE (cbase) == MINUS_EXPR
4632 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4633 {
4634 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4635 cbase = TREE_OPERAND (cbase, 0);
4636 }
4637
4638 if (address_p)
4639 {
4640 if (((TREE_CODE (ubase) == SSA_NAME)
4641 || (TREE_CODE (ubase) == ADDR_EXPR
4642 && is_gimple_min_invariant (ubase)))
4643 && (TREE_CODE (cbase) == INTEGER_CST))
4644 return NULL;
4645
4646 if (((TREE_CODE (cbase) == SSA_NAME)
4647 || (TREE_CODE (cbase) == ADDR_EXPR
4648 && is_gimple_min_invariant (cbase)))
4649 && (TREE_CODE (ubase) == INTEGER_CST))
4650 return NULL;
4651 }
4652
4653 if (ratio == 1)
4654 {
4655 if (operand_equal_p (ubase, cbase, 0))
4656 return NULL;
4657
4658 if (TREE_CODE (ubase) == ADDR_EXPR
4659 && TREE_CODE (cbase) == ADDR_EXPR)
4660 {
4661 tree usym, csym;
4662
4663 usym = TREE_OPERAND (ubase, 0);
4664 csym = TREE_OPERAND (cbase, 0);
4665 if (TREE_CODE (usym) == ARRAY_REF)
4666 {
4667 tree ind = TREE_OPERAND (usym, 1);
4668 if (TREE_CODE (ind) == INTEGER_CST
4669 && tree_fits_shwi_p (ind)
4670 && tree_to_shwi (ind) == 0)
4671 usym = TREE_OPERAND (usym, 0);
4672 }
4673 if (TREE_CODE (csym) == ARRAY_REF)
4674 {
4675 tree ind = TREE_OPERAND (csym, 1);
4676 if (TREE_CODE (ind) == INTEGER_CST
4677 && tree_fits_shwi_p (ind)
4678 && tree_to_shwi (ind) == 0)
4679 csym = TREE_OPERAND (csym, 0);
4680 }
4681 if (operand_equal_p (usym, csym, 0))
4682 return NULL;
4683 }
4684 /* Now do more complex comparison */
4685 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4686 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4687 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4688 return NULL;
4689 }
4690
4691 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4692 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4693
4694 aff_combination_scale (&cbase_aff, -1 * ratio);
4695 aff_combination_add (&ubase_aff, &cbase_aff);
4696 expr = aff_combination_to_tree (&ubase_aff);
4697 return record_inv_expr (data, expr);
4698 }
4699
4700
4701
4702 /* Determines the cost of the computation by that USE is expressed
4703 from induction variable CAND. If ADDRESS_P is true, we just need
4704 to create an address from it, otherwise we want to get it into
4705 register. A set of invariants we depend on is stored in
4706 DEPENDS_ON. AT is the statement at that the value is computed.
4707 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4708 addressing is likely. */
4709
4710 static comp_cost
4711 get_computation_cost_at (struct ivopts_data *data,
4712 struct iv_use *use, struct iv_cand *cand,
4713 bool address_p, bitmap *depends_on, gimple *at,
4714 bool *can_autoinc,
4715 iv_inv_expr_ent **inv_expr)
4716 {
4717 tree ubase = use->iv->base, ustep = use->iv->step;
4718 tree cbase, cstep;
4719 tree utype = TREE_TYPE (ubase), ctype;
4720 unsigned HOST_WIDE_INT cstepi, offset = 0;
4721 HOST_WIDE_INT ratio, aratio;
4722 bool var_present, symbol_present, stmt_is_after_inc;
4723 comp_cost cost;
4724 widest_int rat;
4725 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4726 machine_mode mem_mode = (address_p
4727 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4728 : VOIDmode);
4729
4730 if (depends_on)
4731 *depends_on = NULL;
4732
4733 /* Only consider real candidates. */
4734 if (!cand->iv)
4735 return infinite_cost;
4736
4737 cbase = cand->iv->base;
4738 cstep = cand->iv->step;
4739 ctype = TREE_TYPE (cbase);
4740
4741 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4742 {
4743 /* We do not have a precision to express the values of use. */
4744 return infinite_cost;
4745 }
4746
4747 if (address_p
4748 || (use->iv->base_object
4749 && cand->iv->base_object
4750 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4751 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4752 {
4753 /* Do not try to express address of an object with computation based
4754 on address of a different object. This may cause problems in rtl
4755 level alias analysis (that does not expect this to be happening,
4756 as this is illegal in C), and would be unlikely to be useful
4757 anyway. */
4758 if (use->iv->base_object
4759 && cand->iv->base_object
4760 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4761 return infinite_cost;
4762 }
4763
4764 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4765 {
4766 /* TODO -- add direct handling of this case. */
4767 goto fallback;
4768 }
4769
4770 /* CSTEPI is removed from the offset in case statement is after the
4771 increment. If the step is not constant, we use zero instead.
4772 This is a bit imprecise (there is the extra addition), but
4773 redundancy elimination is likely to transform the code so that
4774 it uses value of the variable before increment anyway,
4775 so it is not that much unrealistic. */
4776 if (cst_and_fits_in_hwi (cstep))
4777 cstepi = int_cst_value (cstep);
4778 else
4779 cstepi = 0;
4780
4781 if (!constant_multiple_of (ustep, cstep, &rat))
4782 return infinite_cost;
4783
4784 if (wi::fits_shwi_p (rat))
4785 ratio = rat.to_shwi ();
4786 else
4787 return infinite_cost;
4788
4789 STRIP_NOPS (cbase);
4790 ctype = TREE_TYPE (cbase);
4791
4792 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4793
4794 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4795 or ratio == 1, it is better to handle this like
4796
4797 ubase - ratio * cbase + ratio * var
4798
4799 (also holds in the case ratio == -1, TODO. */
4800
4801 if (cst_and_fits_in_hwi (cbase))
4802 {
4803 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4804 cost = difference_cost (data,
4805 ubase, build_int_cst (utype, 0),
4806 &symbol_present, &var_present, &offset,
4807 depends_on);
4808 cost.cost /= avg_loop_niter (data->current_loop);
4809 }
4810 else if (ratio == 1)
4811 {
4812 tree real_cbase = cbase;
4813
4814 /* Check to see if any adjustment is needed. */
4815 if (cstepi == 0 && stmt_is_after_inc)
4816 {
4817 aff_tree real_cbase_aff;
4818 aff_tree cstep_aff;
4819
4820 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4821 &real_cbase_aff);
4822 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4823
4824 aff_combination_add (&real_cbase_aff, &cstep_aff);
4825 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4826 }
4827
4828 cost = difference_cost (data,
4829 ubase, real_cbase,
4830 &symbol_present, &var_present, &offset,
4831 depends_on);
4832 cost.cost /= avg_loop_niter (data->current_loop);
4833 }
4834 else if (address_p
4835 && !POINTER_TYPE_P (ctype)
4836 && multiplier_allowed_in_address_p
4837 (ratio, mem_mode,
4838 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4839 {
4840 tree real_cbase = cbase;
4841
4842 if (cstepi == 0 && stmt_is_after_inc)
4843 {
4844 if (POINTER_TYPE_P (ctype))
4845 real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4846 else
4847 real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4848 }
4849 real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase,
4850 build_int_cst (ctype, ratio));
4851 cost = difference_cost (data,
4852 ubase, real_cbase,
4853 &symbol_present, &var_present, &offset,
4854 depends_on);
4855 cost.cost /= avg_loop_niter (data->current_loop);
4856 }
4857 else
4858 {
4859 cost = force_var_cost (data, cbase, depends_on);
4860 cost = add_costs (cost,
4861 difference_cost (data,
4862 ubase, build_int_cst (utype, 0),
4863 &symbol_present, &var_present,
4864 &offset, depends_on));
4865 cost.cost /= avg_loop_niter (data->current_loop);
4866 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4867 }
4868
4869 /* Record setup cost in scrach field. */
4870 cost.scratch = cost.cost;
4871
4872 if (inv_expr && depends_on && *depends_on)
4873 {
4874 *inv_expr = get_loop_invariant_expr (data, ubase, cbase, ratio,
4875 address_p);
4876 /* Clear depends on. */
4877 if (*inv_expr != NULL)
4878 bitmap_clear (*depends_on);
4879 }
4880
4881 /* If we are after the increment, the value of the candidate is higher by
4882 one iteration. */
4883 if (stmt_is_after_inc)
4884 offset -= ratio * cstepi;
4885
4886 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4887 (symbol/var1/const parts may be omitted). If we are looking for an
4888 address, find the cost of addressing this. */
4889 if (address_p)
4890 return add_costs (cost,
4891 get_address_cost (symbol_present, var_present,
4892 offset, ratio, cstepi,
4893 mem_mode,
4894 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4895 speed, stmt_is_after_inc,
4896 can_autoinc));
4897
4898 /* Otherwise estimate the costs for computing the expression. */
4899 if (!symbol_present && !var_present && !offset)
4900 {
4901 if (ratio != 1)
4902 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4903 return cost;
4904 }
4905
4906 /* Symbol + offset should be compile-time computable so consider that they
4907 are added once to the variable, if present. */
4908 if (var_present && (symbol_present || offset))
4909 cost.cost += adjust_setup_cost (data,
4910 add_cost (speed, TYPE_MODE (ctype)));
4911
4912 /* Having offset does not affect runtime cost in case it is added to
4913 symbol, but it increases complexity. */
4914 if (offset)
4915 cost.complexity++;
4916
4917 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4918
4919 aratio = ratio > 0 ? ratio : -ratio;
4920 if (aratio != 1)
4921 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4922 return cost;
4923
4924 fallback:
4925 if (can_autoinc)
4926 *can_autoinc = false;
4927
4928 {
4929 /* Just get the expression, expand it and measure the cost. */
4930 tree comp = get_computation_at (data->current_loop, use, cand, at);
4931
4932 if (!comp)
4933 return infinite_cost;
4934
4935 if (address_p)
4936 comp = build_simple_mem_ref (comp);
4937
4938 cost = new_cost (computation_cost (comp, speed), 0);
4939 cost.scratch = 0;
4940 return cost;
4941 }
4942 }
4943
4944 /* Determines the cost of the computation by that USE is expressed
4945 from induction variable CAND. If ADDRESS_P is true, we just need
4946 to create an address from it, otherwise we want to get it into
4947 register. A set of invariants we depend on is stored in
4948 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4949 autoinc addressing is likely. */
4950
4951 static comp_cost
4952 get_computation_cost (struct ivopts_data *data,
4953 struct iv_use *use, struct iv_cand *cand,
4954 bool address_p, bitmap *depends_on,
4955 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4956 {
4957 return get_computation_cost_at (data,
4958 use, cand, address_p, depends_on, use->stmt,
4959 can_autoinc, inv_expr);
4960 }
4961
4962 /* Determines cost of computing the use in GROUP with CAND in a generic
4963 expression. */
4964
4965 static bool
4966 determine_group_iv_cost_generic (struct ivopts_data *data,
4967 struct iv_group *group, struct iv_cand *cand)
4968 {
4969 comp_cost cost;
4970 iv_inv_expr_ent *inv_expr = NULL;
4971 bitmap depends_on = NULL;
4972 struct iv_use *use = group->vuses[0];
4973
4974 /* The simple case first -- if we need to express value of the preserved
4975 original biv, the cost is 0. This also prevents us from counting the
4976 cost of increment twice -- once at this use and once in the cost of
4977 the candidate. */
4978 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4979 cost = no_cost;
4980 else
4981 cost = get_computation_cost (data, use, cand, false,
4982 &depends_on, NULL, &inv_expr);
4983
4984 set_group_iv_cost (data, group, cand, cost, depends_on,
4985 NULL_TREE, ERROR_MARK, inv_expr);
4986 return !infinite_cost_p (cost);
4987 }
4988
4989 /* Determines cost of computing uses in GROUP with CAND in addresses. */
4990
4991 static bool
4992 determine_group_iv_cost_address (struct ivopts_data *data,
4993 struct iv_group *group, struct iv_cand *cand)
4994 {
4995 unsigned i;
4996 bitmap depends_on;
4997 bool can_autoinc, first = true;
4998 iv_inv_expr_ent *inv_expr = NULL;
4999 struct iv_use *use = group->vuses[0];
5000 comp_cost sum_cost = no_cost, cost;
5001
5002 cost = get_computation_cost (data, use, cand, true,
5003 &depends_on, &can_autoinc, &inv_expr);
5004
5005 sum_cost = cost;
5006 if (!infinite_cost_p (sum_cost) && cand->ainc_use == use)
5007 {
5008 if (can_autoinc)
5009 sum_cost.cost -= cand->cost_step;
5010 /* If we generated the candidate solely for exploiting autoincrement
5011 opportunities, and it turns out it can't be used, set the cost to
5012 infinity to make sure we ignore it. */
5013 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5014 sum_cost = infinite_cost;
5015 }
5016
5017 /* Uses in a group can share setup code, so only add setup cost once. */
5018 cost.cost -= cost.scratch;
5019 /* Compute and add costs for rest uses of this group. */
5020 for (i = 1; i < group->vuses.length () && !infinite_cost_p (sum_cost); i++)
5021 {
5022 struct iv_use *next = group->vuses[i];
5023
5024 /* Compute cost for the first use with different offset to the main
5025 use and add it afterwards. Costs for these uses could be quite
5026 different. Given below uses in a group:
5027 use 0 : {base + A + offset_0, step}
5028 use 0.1: {base + A + offset_0, step}
5029 use 0.2: {base + A + offset_1, step}
5030 use 0.3: {base + A + offset_2, step}
5031 when we need to compute costs with candidate:
5032 cand 1 : {base + B + offset_0, step}
5033
5034 The first use with different offset is use 0.2, its cost is larger
5035 than cost of use 0/0.1 because we need to compute:
5036 A - B + offset_1 - offset_0
5037 rather than:
5038 A - B. */
5039 if (first && next->addr_offset != use->addr_offset)
5040 {
5041 first = false;
5042 cost = get_computation_cost (data, next, cand, true,
5043 NULL, &can_autoinc, NULL);
5044 /* Remove setup cost. */
5045 if (!infinite_cost_p (cost))
5046 cost.cost -= cost.scratch;
5047 }
5048 sum_cost = add_costs (sum_cost, cost);
5049 }
5050 set_group_iv_cost (data, group, cand, sum_cost, depends_on,
5051 NULL_TREE, ERROR_MARK, inv_expr);
5052
5053 return !infinite_cost_p (sum_cost);
5054 }
5055
5056 /* Computes value of candidate CAND at position AT in iteration NITER, and
5057 stores it to VAL. */
5058
5059 static void
5060 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5061 aff_tree *val)
5062 {
5063 aff_tree step, delta, nit;
5064 struct iv *iv = cand->iv;
5065 tree type = TREE_TYPE (iv->base);
5066 tree steptype = type;
5067 if (POINTER_TYPE_P (type))
5068 steptype = sizetype;
5069 steptype = unsigned_type_for (type);
5070
5071 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5072 aff_combination_convert (&step, steptype);
5073 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5074 aff_combination_convert (&nit, steptype);
5075 aff_combination_mult (&nit, &step, &delta);
5076 if (stmt_after_increment (loop, cand, at))
5077 aff_combination_add (&delta, &step);
5078
5079 tree_to_aff_combination (iv->base, type, val);
5080 if (!POINTER_TYPE_P (type))
5081 aff_combination_convert (val, steptype);
5082 aff_combination_add (val, &delta);
5083 }
5084
5085 /* Returns period of induction variable iv. */
5086
5087 static tree
5088 iv_period (struct iv *iv)
5089 {
5090 tree step = iv->step, period, type;
5091 tree pow2div;
5092
5093 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5094
5095 type = unsigned_type_for (TREE_TYPE (step));
5096 /* Period of the iv is lcm (step, type_range)/step -1,
5097 i.e., N*type_range/step - 1. Since type range is power
5098 of two, N == (step >> num_of_ending_zeros_binary (step),
5099 so the final result is
5100
5101 (type_range >> num_of_ending_zeros_binary (step)) - 1
5102
5103 */
5104 pow2div = num_ending_zeros (step);
5105
5106 period = build_low_bits_mask (type,
5107 (TYPE_PRECISION (type)
5108 - tree_to_uhwi (pow2div)));
5109
5110 return period;
5111 }
5112
5113 /* Returns the comparison operator used when eliminating the iv USE. */
5114
5115 static enum tree_code
5116 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5117 {
5118 struct loop *loop = data->current_loop;
5119 basic_block ex_bb;
5120 edge exit;
5121
5122 ex_bb = gimple_bb (use->stmt);
5123 exit = EDGE_SUCC (ex_bb, 0);
5124 if (flow_bb_inside_loop_p (loop, exit->dest))
5125 exit = EDGE_SUCC (ex_bb, 1);
5126
5127 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5128 }
5129
5130 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5131 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5132 calculation is performed in non-wrapping type.
5133
5134 TODO: More generally, we could test for the situation that
5135 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5136 This would require knowing the sign of OFFSET. */
5137
5138 static bool
5139 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5140 {
5141 enum tree_code code;
5142 tree e1, e2;
5143 aff_tree aff_e1, aff_e2, aff_offset;
5144
5145 if (!nowrap_type_p (TREE_TYPE (base)))
5146 return false;
5147
5148 base = expand_simple_operations (base);
5149
5150 if (TREE_CODE (base) == SSA_NAME)
5151 {
5152 gimple *stmt = SSA_NAME_DEF_STMT (base);
5153
5154 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5155 return false;
5156
5157 code = gimple_assign_rhs_code (stmt);
5158 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5159 return false;
5160
5161 e1 = gimple_assign_rhs1 (stmt);
5162 e2 = gimple_assign_rhs2 (stmt);
5163 }
5164 else
5165 {
5166 code = TREE_CODE (base);
5167 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5168 return false;
5169 e1 = TREE_OPERAND (base, 0);
5170 e2 = TREE_OPERAND (base, 1);
5171 }
5172
5173 /* Use affine expansion as deeper inspection to prove the equality. */
5174 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5175 &aff_e2, &data->name_expansion_cache);
5176 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5177 &aff_offset, &data->name_expansion_cache);
5178 aff_combination_scale (&aff_offset, -1);
5179 switch (code)
5180 {
5181 case PLUS_EXPR:
5182 aff_combination_add (&aff_e2, &aff_offset);
5183 if (aff_combination_zero_p (&aff_e2))
5184 return true;
5185
5186 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5187 &aff_e1, &data->name_expansion_cache);
5188 aff_combination_add (&aff_e1, &aff_offset);
5189 return aff_combination_zero_p (&aff_e1);
5190
5191 case POINTER_PLUS_EXPR:
5192 aff_combination_add (&aff_e2, &aff_offset);
5193 return aff_combination_zero_p (&aff_e2);
5194
5195 default:
5196 return false;
5197 }
5198 }
5199
5200 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5201 comparison with CAND. NITER describes the number of iterations of
5202 the loops. If successful, the comparison in COMP_P is altered accordingly.
5203
5204 We aim to handle the following situation:
5205
5206 sometype *base, *p;
5207 int a, b, i;
5208
5209 i = a;
5210 p = p_0 = base + a;
5211
5212 do
5213 {
5214 bla (*p);
5215 p++;
5216 i++;
5217 }
5218 while (i < b);
5219
5220 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5221 We aim to optimize this to
5222
5223 p = p_0 = base + a;
5224 do
5225 {
5226 bla (*p);
5227 p++;
5228 }
5229 while (p < p_0 - a + b);
5230
5231 This preserves the correctness, since the pointer arithmetics does not
5232 overflow. More precisely:
5233
5234 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5235 overflow in computing it or the values of p.
5236 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5237 overflow. To prove this, we use the fact that p_0 = base + a. */
5238
5239 static bool
5240 iv_elimination_compare_lt (struct ivopts_data *data,
5241 struct iv_cand *cand, enum tree_code *comp_p,
5242 struct tree_niter_desc *niter)
5243 {
5244 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5245 struct aff_tree nit, tmpa, tmpb;
5246 enum tree_code comp;
5247 HOST_WIDE_INT step;
5248
5249 /* We need to know that the candidate induction variable does not overflow.
5250 While more complex analysis may be used to prove this, for now just
5251 check that the variable appears in the original program and that it
5252 is computed in a type that guarantees no overflows. */
5253 cand_type = TREE_TYPE (cand->iv->base);
5254 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5255 return false;
5256
5257 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5258 the calculation of the BOUND could overflow, making the comparison
5259 invalid. */
5260 if (!data->loop_single_exit_p)
5261 return false;
5262
5263 /* We need to be able to decide whether candidate is increasing or decreasing
5264 in order to choose the right comparison operator. */
5265 if (!cst_and_fits_in_hwi (cand->iv->step))
5266 return false;
5267 step = int_cst_value (cand->iv->step);
5268
5269 /* Check that the number of iterations matches the expected pattern:
5270 a + 1 > b ? 0 : b - a - 1. */
5271 mbz = niter->may_be_zero;
5272 if (TREE_CODE (mbz) == GT_EXPR)
5273 {
5274 /* Handle a + 1 > b. */
5275 tree op0 = TREE_OPERAND (mbz, 0);
5276 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5277 {
5278 a = TREE_OPERAND (op0, 0);
5279 b = TREE_OPERAND (mbz, 1);
5280 }
5281 else
5282 return false;
5283 }
5284 else if (TREE_CODE (mbz) == LT_EXPR)
5285 {
5286 tree op1 = TREE_OPERAND (mbz, 1);
5287
5288 /* Handle b < a + 1. */
5289 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5290 {
5291 a = TREE_OPERAND (op1, 0);
5292 b = TREE_OPERAND (mbz, 0);
5293 }
5294 else
5295 return false;
5296 }
5297 else
5298 return false;
5299
5300 /* Expected number of iterations is B - A - 1. Check that it matches
5301 the actual number, i.e., that B - A - NITER = 1. */
5302 tree_to_aff_combination (niter->niter, nit_type, &nit);
5303 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5304 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5305 aff_combination_scale (&nit, -1);
5306 aff_combination_scale (&tmpa, -1);
5307 aff_combination_add (&tmpb, &tmpa);
5308 aff_combination_add (&tmpb, &nit);
5309 if (tmpb.n != 0 || tmpb.offset != 1)
5310 return false;
5311
5312 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5313 overflow. */
5314 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5315 cand->iv->step,
5316 fold_convert (TREE_TYPE (cand->iv->step), a));
5317 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5318 return false;
5319
5320 /* Determine the new comparison operator. */
5321 comp = step < 0 ? GT_EXPR : LT_EXPR;
5322 if (*comp_p == NE_EXPR)
5323 *comp_p = comp;
5324 else if (*comp_p == EQ_EXPR)
5325 *comp_p = invert_tree_comparison (comp, false);
5326 else
5327 gcc_unreachable ();
5328
5329 return true;
5330 }
5331
5332 /* Check whether it is possible to express the condition in USE by comparison
5333 of candidate CAND. If so, store the value compared with to BOUND, and the
5334 comparison operator to COMP. */
5335
5336 static bool
5337 may_eliminate_iv (struct ivopts_data *data,
5338 struct iv_use *use, struct iv_cand *cand, tree *bound,
5339 enum tree_code *comp)
5340 {
5341 basic_block ex_bb;
5342 edge exit;
5343 tree period;
5344 struct loop *loop = data->current_loop;
5345 aff_tree bnd;
5346 struct tree_niter_desc *desc = NULL;
5347
5348 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5349 return false;
5350
5351 /* For now works only for exits that dominate the loop latch.
5352 TODO: extend to other conditions inside loop body. */
5353 ex_bb = gimple_bb (use->stmt);
5354 if (use->stmt != last_stmt (ex_bb)
5355 || gimple_code (use->stmt) != GIMPLE_COND
5356 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5357 return false;
5358
5359 exit = EDGE_SUCC (ex_bb, 0);
5360 if (flow_bb_inside_loop_p (loop, exit->dest))
5361 exit = EDGE_SUCC (ex_bb, 1);
5362 if (flow_bb_inside_loop_p (loop, exit->dest))
5363 return false;
5364
5365 desc = niter_for_exit (data, exit);
5366 if (!desc)
5367 return false;
5368
5369 /* Determine whether we can use the variable to test the exit condition.
5370 This is the case iff the period of the induction variable is greater
5371 than the number of iterations for which the exit condition is true. */
5372 period = iv_period (cand->iv);
5373
5374 /* If the number of iterations is constant, compare against it directly. */
5375 if (TREE_CODE (desc->niter) == INTEGER_CST)
5376 {
5377 /* See cand_value_at. */
5378 if (stmt_after_increment (loop, cand, use->stmt))
5379 {
5380 if (!tree_int_cst_lt (desc->niter, period))
5381 return false;
5382 }
5383 else
5384 {
5385 if (tree_int_cst_lt (period, desc->niter))
5386 return false;
5387 }
5388 }
5389
5390 /* If not, and if this is the only possible exit of the loop, see whether
5391 we can get a conservative estimate on the number of iterations of the
5392 entire loop and compare against that instead. */
5393 else
5394 {
5395 widest_int period_value, max_niter;
5396
5397 max_niter = desc->max;
5398 if (stmt_after_increment (loop, cand, use->stmt))
5399 max_niter += 1;
5400 period_value = wi::to_widest (period);
5401 if (wi::gtu_p (max_niter, period_value))
5402 {
5403 /* See if we can take advantage of inferred loop bound
5404 information. */
5405 if (data->loop_single_exit_p)
5406 {
5407 if (!max_loop_iterations (loop, &max_niter))
5408 return false;
5409 /* The loop bound is already adjusted by adding 1. */
5410 if (wi::gtu_p (max_niter, period_value))
5411 return false;
5412 }
5413 else
5414 return false;
5415 }
5416 }
5417
5418 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5419
5420 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5421 aff_combination_to_tree (&bnd));
5422 *comp = iv_elimination_compare (data, use);
5423
5424 /* It is unlikely that computing the number of iterations using division
5425 would be more profitable than keeping the original induction variable. */
5426 if (expression_expensive_p (*bound))
5427 return false;
5428
5429 /* Sometimes, it is possible to handle the situation that the number of
5430 iterations may be zero unless additional assumtions by using <
5431 instead of != in the exit condition.
5432
5433 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5434 base the exit condition on it. However, that is often too
5435 expensive. */
5436 if (!integer_zerop (desc->may_be_zero))
5437 return iv_elimination_compare_lt (data, cand, comp, desc);
5438
5439 return true;
5440 }
5441
5442 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5443 be copied, if it is used in the loop body and DATA->body_includes_call. */
5444
5445 static int
5446 parm_decl_cost (struct ivopts_data *data, tree bound)
5447 {
5448 tree sbound = bound;
5449 STRIP_NOPS (sbound);
5450
5451 if (TREE_CODE (sbound) == SSA_NAME
5452 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5453 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5454 && data->body_includes_call)
5455 return COSTS_N_INSNS (1);
5456
5457 return 0;
5458 }
5459
5460 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5461
5462 static bool
5463 determine_group_iv_cost_cond (struct ivopts_data *data,
5464 struct iv_group *group, struct iv_cand *cand)
5465 {
5466 tree bound = NULL_TREE;
5467 struct iv *cmp_iv;
5468 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5469 comp_cost elim_cost, express_cost, cost, bound_cost;
5470 bool ok;
5471 iv_inv_expr_ent *elim_inv_expr = NULL, *express_inv_expr = NULL, *inv_expr;
5472 tree *control_var, *bound_cst;
5473 enum tree_code comp = ERROR_MARK;
5474 struct iv_use *use = group->vuses[0];
5475
5476 gcc_assert (cand->iv);
5477
5478 /* Try iv elimination. */
5479 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5480 {
5481 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5482 if (elim_cost.cost == 0)
5483 elim_cost.cost = parm_decl_cost (data, bound);
5484 else if (TREE_CODE (bound) == INTEGER_CST)
5485 elim_cost.cost = 0;
5486 /* If we replace a loop condition 'i < n' with 'p < base + n',
5487 depends_on_elim will have 'base' and 'n' set, which implies
5488 that both 'base' and 'n' will be live during the loop. More likely,
5489 'base + n' will be loop invariant, resulting in only one live value
5490 during the loop. So in that case we clear depends_on_elim and set
5491 elim_inv_expr_id instead. */
5492 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5493 {
5494 elim_inv_expr = record_inv_expr (data, bound);
5495 bitmap_clear (depends_on_elim);
5496 }
5497 /* The bound is a loop invariant, so it will be only computed
5498 once. */
5499 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5500 }
5501 else
5502 elim_cost = infinite_cost;
5503
5504 /* Try expressing the original giv. If it is compared with an invariant,
5505 note that we cannot get rid of it. */
5506 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5507 NULL, &cmp_iv);
5508 gcc_assert (ok);
5509
5510 /* When the condition is a comparison of the candidate IV against
5511 zero, prefer this IV.
5512
5513 TODO: The constant that we're subtracting from the cost should
5514 be target-dependent. This information should be added to the
5515 target costs for each backend. */
5516 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5517 && integer_zerop (*bound_cst)
5518 && (operand_equal_p (*control_var, cand->var_after, 0)
5519 || operand_equal_p (*control_var, cand->var_before, 0)))
5520 elim_cost.cost -= 1;
5521
5522 express_cost = get_computation_cost (data, use, cand, false,
5523 &depends_on_express, NULL,
5524 &express_inv_expr);
5525 fd_ivopts_data = data;
5526 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5527
5528 /* Count the cost of the original bound as well. */
5529 bound_cost = force_var_cost (data, *bound_cst, NULL);
5530 if (bound_cost.cost == 0)
5531 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5532 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5533 bound_cost.cost = 0;
5534 express_cost.cost += bound_cost.cost;
5535
5536 /* Choose the better approach, preferring the eliminated IV. */
5537 if (compare_costs (elim_cost, express_cost) <= 0)
5538 {
5539 cost = elim_cost;
5540 depends_on = depends_on_elim;
5541 depends_on_elim = NULL;
5542 inv_expr = elim_inv_expr;
5543 }
5544 else
5545 {
5546 cost = express_cost;
5547 depends_on = depends_on_express;
5548 depends_on_express = NULL;
5549 bound = NULL_TREE;
5550 comp = ERROR_MARK;
5551 inv_expr = express_inv_expr;
5552 }
5553
5554 set_group_iv_cost (data, group, cand, cost,
5555 depends_on, bound, comp, inv_expr);
5556
5557 if (depends_on_elim)
5558 BITMAP_FREE (depends_on_elim);
5559 if (depends_on_express)
5560 BITMAP_FREE (depends_on_express);
5561
5562 return !infinite_cost_p (cost);
5563 }
5564
5565 /* Determines cost of computing uses in GROUP with CAND. Returns false
5566 if USE cannot be represented with CAND. */
5567
5568 static bool
5569 determine_group_iv_cost (struct ivopts_data *data,
5570 struct iv_group *group, struct iv_cand *cand)
5571 {
5572 switch (group->type)
5573 {
5574 case USE_NONLINEAR_EXPR:
5575 return determine_group_iv_cost_generic (data, group, cand);
5576
5577 case USE_ADDRESS:
5578 return determine_group_iv_cost_address (data, group, cand);
5579
5580 case USE_COMPARE:
5581 return determine_group_iv_cost_cond (data, group, cand);
5582
5583 default:
5584 gcc_unreachable ();
5585 }
5586 }
5587
5588 /* Return true if get_computation_cost indicates that autoincrement is
5589 a possibility for the pair of USE and CAND, false otherwise. */
5590
5591 static bool
5592 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5593 struct iv_cand *cand)
5594 {
5595 bitmap depends_on;
5596 bool can_autoinc;
5597 comp_cost cost;
5598
5599 if (use->type != USE_ADDRESS)
5600 return false;
5601
5602 cost = get_computation_cost (data, use, cand, true, &depends_on,
5603 &can_autoinc, NULL);
5604
5605 BITMAP_FREE (depends_on);
5606
5607 return !infinite_cost_p (cost) && can_autoinc;
5608 }
5609
5610 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5611 use that allows autoincrement, and set their AINC_USE if possible. */
5612
5613 static void
5614 set_autoinc_for_original_candidates (struct ivopts_data *data)
5615 {
5616 unsigned i, j;
5617
5618 for (i = 0; i < data->vcands.length (); i++)
5619 {
5620 struct iv_cand *cand = data->vcands[i];
5621 struct iv_use *closest_before = NULL;
5622 struct iv_use *closest_after = NULL;
5623 if (cand->pos != IP_ORIGINAL)
5624 continue;
5625
5626 for (j = 0; j < data->vgroups.length (); j++)
5627 {
5628 struct iv_group *group = data->vgroups[j];
5629 struct iv_use *use = group->vuses[0];
5630 unsigned uid = gimple_uid (use->stmt);
5631
5632 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5633 continue;
5634
5635 if (uid < gimple_uid (cand->incremented_at)
5636 && (closest_before == NULL
5637 || uid > gimple_uid (closest_before->stmt)))
5638 closest_before = use;
5639
5640 if (uid > gimple_uid (cand->incremented_at)
5641 && (closest_after == NULL
5642 || uid < gimple_uid (closest_after->stmt)))
5643 closest_after = use;
5644 }
5645
5646 if (closest_before != NULL
5647 && autoinc_possible_for_pair (data, closest_before, cand))
5648 cand->ainc_use = closest_before;
5649 else if (closest_after != NULL
5650 && autoinc_possible_for_pair (data, closest_after, cand))
5651 cand->ainc_use = closest_after;
5652 }
5653 }
5654
5655 /* Finds the candidates for the induction variables. */
5656
5657 static void
5658 find_iv_candidates (struct ivopts_data *data)
5659 {
5660 /* Add commonly used ivs. */
5661 add_standard_iv_candidates (data);
5662
5663 /* Add old induction variables. */
5664 add_iv_candidate_for_bivs (data);
5665
5666 /* Add induction variables derived from uses. */
5667 add_iv_candidate_for_groups (data);
5668
5669 set_autoinc_for_original_candidates (data);
5670
5671 /* Record the important candidates. */
5672 record_important_candidates (data);
5673
5674 if (dump_file && (dump_flags & TDF_DETAILS))
5675 {
5676 unsigned i;
5677
5678 fprintf (dump_file, "\n<Important Candidates>:\t");
5679 for (i = 0; i < data->vcands.length (); i++)
5680 if (data->vcands[i]->important)
5681 fprintf (dump_file, " %d,", data->vcands[i]->id);
5682 fprintf (dump_file, "\n");
5683
5684 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5685 for (i = 0; i < data->vgroups.length (); i++)
5686 {
5687 struct iv_group *group = data->vgroups[i];
5688
5689 if (group->related_cands)
5690 {
5691 fprintf (dump_file, " Group %d:\t", group->id);
5692 dump_bitmap (dump_file, group->related_cands);
5693 }
5694 }
5695 fprintf (dump_file, "\n");
5696 }
5697 }
5698
5699 /* Determines costs of computing use of iv with an iv candidate. */
5700
5701 static void
5702 determine_group_iv_costs (struct ivopts_data *data)
5703 {
5704 unsigned i, j;
5705 struct iv_cand *cand;
5706 struct iv_group *group;
5707 bitmap to_clear = BITMAP_ALLOC (NULL);
5708
5709 alloc_use_cost_map (data);
5710
5711 for (i = 0; i < data->vgroups.length (); i++)
5712 {
5713 group = data->vgroups[i];
5714
5715 if (data->consider_all_candidates)
5716 {
5717 for (j = 0; j < data->vcands.length (); j++)
5718 {
5719 cand = data->vcands[j];
5720 determine_group_iv_cost (data, group, cand);
5721 }
5722 }
5723 else
5724 {
5725 bitmap_iterator bi;
5726
5727 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5728 {
5729 cand = data->vcands[j];
5730 if (!determine_group_iv_cost (data, group, cand))
5731 bitmap_set_bit (to_clear, j);
5732 }
5733
5734 /* Remove the candidates for that the cost is infinite from
5735 the list of related candidates. */
5736 bitmap_and_compl_into (group->related_cands, to_clear);
5737 bitmap_clear (to_clear);
5738 }
5739 }
5740
5741 BITMAP_FREE (to_clear);
5742
5743 if (dump_file && (dump_flags & TDF_DETAILS))
5744 {
5745 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5746 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5747
5748 for (hash_table<iv_inv_expr_hasher>::iterator it
5749 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5750 ++it)
5751 list.safe_push (*it);
5752
5753 list.qsort (sort_iv_inv_expr_ent);
5754
5755 for (i = 0; i < list.length (); ++i)
5756 {
5757 fprintf (dump_file, "inv_expr %d: \t", i);
5758 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5759 fprintf (dump_file, "\n");
5760 }
5761
5762 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5763
5764 for (i = 0; i < data->vgroups.length (); i++)
5765 {
5766 group = data->vgroups[i];
5767
5768 fprintf (dump_file, "Group %d:\n", i);
5769 fprintf (dump_file, " cand\tcost\tcompl.\tinv.ex.\tdepends on\n");
5770 for (j = 0; j < group->n_map_members; j++)
5771 {
5772 if (!group->cost_map[j].cand
5773 || infinite_cost_p (group->cost_map[j].cost))
5774 continue;
5775
5776 fprintf (dump_file, " %d\t%d\t%d\t",
5777 group->cost_map[j].cand->id,
5778 group->cost_map[j].cost.cost,
5779 group->cost_map[j].cost.complexity);
5780 if (group->cost_map[j].inv_expr != NULL)
5781 fprintf (dump_file, "%d\t",
5782 group->cost_map[j].inv_expr->id);
5783 else
5784 fprintf (dump_file, "\t");
5785 if (group->cost_map[j].depends_on)
5786 bitmap_print (dump_file,
5787 group->cost_map[j].depends_on, "","");
5788 fprintf (dump_file, "\n");
5789 }
5790
5791 fprintf (dump_file, "\n");
5792 }
5793 fprintf (dump_file, "\n");
5794 }
5795 }
5796
5797 /* Determines cost of the candidate CAND. */
5798
5799 static void
5800 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5801 {
5802 comp_cost cost_base;
5803 unsigned cost, cost_step;
5804 tree base;
5805
5806 if (!cand->iv)
5807 {
5808 cand->cost = 0;
5809 return;
5810 }
5811
5812 /* There are two costs associated with the candidate -- its increment
5813 and its initialization. The second is almost negligible for any loop
5814 that rolls enough, so we take it just very little into account. */
5815
5816 base = cand->iv->base;
5817 cost_base = force_var_cost (data, base, NULL);
5818 /* It will be exceptional that the iv register happens to be initialized with
5819 the proper value at no cost. In general, there will at least be a regcopy
5820 or a const set. */
5821 if (cost_base.cost == 0)
5822 cost_base.cost = COSTS_N_INSNS (1);
5823 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5824
5825 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5826
5827 /* Prefer the original ivs unless we may gain something by replacing it.
5828 The reason is to make debugging simpler; so this is not relevant for
5829 artificial ivs created by other optimization passes. */
5830 if (cand->pos != IP_ORIGINAL
5831 || !SSA_NAME_VAR (cand->var_before)
5832 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5833 cost++;
5834
5835 /* Prefer not to insert statements into latch unless there are some
5836 already (so that we do not create unnecessary jumps). */
5837 if (cand->pos == IP_END
5838 && empty_block_p (ip_end_pos (data->current_loop)))
5839 cost++;
5840
5841 cand->cost = cost;
5842 cand->cost_step = cost_step;
5843 }
5844
5845 /* Determines costs of computation of the candidates. */
5846
5847 static void
5848 determine_iv_costs (struct ivopts_data *data)
5849 {
5850 unsigned i;
5851
5852 if (dump_file && (dump_flags & TDF_DETAILS))
5853 {
5854 fprintf (dump_file, "<Candidate Costs>:\n");
5855 fprintf (dump_file, " cand\tcost\n");
5856 }
5857
5858 for (i = 0; i < data->vcands.length (); i++)
5859 {
5860 struct iv_cand *cand = data->vcands[i];
5861
5862 determine_iv_cost (data, cand);
5863
5864 if (dump_file && (dump_flags & TDF_DETAILS))
5865 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5866 }
5867
5868 if (dump_file && (dump_flags & TDF_DETAILS))
5869 fprintf (dump_file, "\n");
5870 }
5871
5872 /* Calculates cost for having SIZE induction variables. */
5873
5874 static unsigned
5875 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5876 {
5877 /* We add size to the cost, so that we prefer eliminating ivs
5878 if possible. */
5879 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5880 data->body_includes_call);
5881 }
5882
5883 /* For each size of the induction variable set determine the penalty. */
5884
5885 static void
5886 determine_set_costs (struct ivopts_data *data)
5887 {
5888 unsigned j, n;
5889 gphi *phi;
5890 gphi_iterator psi;
5891 tree op;
5892 struct loop *loop = data->current_loop;
5893 bitmap_iterator bi;
5894
5895 if (dump_file && (dump_flags & TDF_DETAILS))
5896 {
5897 fprintf (dump_file, "<Global Costs>:\n");
5898 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5899 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5900 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5901 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5902 }
5903
5904 n = 0;
5905 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5906 {
5907 phi = psi.phi ();
5908 op = PHI_RESULT (phi);
5909
5910 if (virtual_operand_p (op))
5911 continue;
5912
5913 if (get_iv (data, op))
5914 continue;
5915
5916 n++;
5917 }
5918
5919 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5920 {
5921 struct version_info *info = ver_info (data, j);
5922
5923 if (info->inv_id && info->has_nonlin_use)
5924 n++;
5925 }
5926
5927 data->regs_used = n;
5928 if (dump_file && (dump_flags & TDF_DETAILS))
5929 fprintf (dump_file, " regs_used %d\n", n);
5930
5931 if (dump_file && (dump_flags & TDF_DETAILS))
5932 {
5933 fprintf (dump_file, " cost for size:\n");
5934 fprintf (dump_file, " ivs\tcost\n");
5935 for (j = 0; j <= 2 * target_avail_regs; j++)
5936 fprintf (dump_file, " %d\t%d\n", j,
5937 ivopts_global_cost_for_size (data, j));
5938 fprintf (dump_file, "\n");
5939 }
5940 }
5941
5942 /* Returns true if A is a cheaper cost pair than B. */
5943
5944 static bool
5945 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5946 {
5947 int cmp;
5948
5949 if (!a)
5950 return false;
5951
5952 if (!b)
5953 return true;
5954
5955 cmp = compare_costs (a->cost, b->cost);
5956 if (cmp < 0)
5957 return true;
5958
5959 if (cmp > 0)
5960 return false;
5961
5962 /* In case the costs are the same, prefer the cheaper candidate. */
5963 if (a->cand->cost < b->cand->cost)
5964 return true;
5965
5966 return false;
5967 }
5968
5969
5970 /* Returns candidate by that USE is expressed in IVS. */
5971
5972 static struct cost_pair *
5973 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5974 {
5975 return ivs->cand_for_group[group->id];
5976 }
5977
5978 /* Computes the cost field of IVS structure. */
5979
5980 static void
5981 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5982 {
5983 comp_cost cost = ivs->cand_use_cost;
5984
5985 cost.cost += ivs->cand_cost;
5986
5987 cost.cost += ivopts_global_cost_for_size (data,
5988 ivs->n_regs
5989 + ivs->used_inv_exprs->elements ());
5990
5991 ivs->cost = cost;
5992 }
5993
5994 /* Remove invariants in set INVS to set IVS. */
5995
5996 static void
5997 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5998 {
5999 bitmap_iterator bi;
6000 unsigned iid;
6001
6002 if (!invs)
6003 return;
6004
6005 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6006 {
6007 ivs->n_invariant_uses[iid]--;
6008 if (ivs->n_invariant_uses[iid] == 0)
6009 ivs->n_regs--;
6010 }
6011 }
6012
6013 /* Set USE not to be expressed by any candidate in IVS. */
6014
6015 static void
6016 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6017 struct iv_group *group)
6018 {
6019 unsigned gid = group->id, cid;
6020 struct cost_pair *cp;
6021
6022 cp = ivs->cand_for_group[gid];
6023 if (!cp)
6024 return;
6025 cid = cp->cand->id;
6026
6027 ivs->bad_groups++;
6028 ivs->cand_for_group[gid] = NULL;
6029 ivs->n_cand_uses[cid]--;
6030
6031 if (ivs->n_cand_uses[cid] == 0)
6032 {
6033 bitmap_clear_bit (ivs->cands, cid);
6034 /* Do not count the pseudocandidates. */
6035 if (cp->cand->iv)
6036 ivs->n_regs--;
6037 ivs->n_cands--;
6038 ivs->cand_cost -= cp->cand->cost;
6039
6040 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6041 }
6042
6043 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
6044
6045 iv_ca_set_remove_invariants (ivs, cp->depends_on);
6046
6047 if (cp->inv_expr != NULL)
6048 {
6049 unsigned *slot = ivs->used_inv_exprs->get (cp->inv_expr);
6050 --(*slot);
6051 if (*slot == 0)
6052 ivs->used_inv_exprs->remove (cp->inv_expr);
6053 }
6054 iv_ca_recount_cost (data, ivs);
6055 }
6056
6057 /* Add invariants in set INVS to set IVS. */
6058
6059 static void
6060 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6061 {
6062 bitmap_iterator bi;
6063 unsigned iid;
6064
6065 if (!invs)
6066 return;
6067
6068 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6069 {
6070 ivs->n_invariant_uses[iid]++;
6071 if (ivs->n_invariant_uses[iid] == 1)
6072 ivs->n_regs++;
6073 }
6074 }
6075
6076 /* Set cost pair for GROUP in set IVS to CP. */
6077
6078 static void
6079 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6080 struct iv_group *group, struct cost_pair *cp)
6081 {
6082 unsigned gid = group->id, cid;
6083
6084 if (ivs->cand_for_group[gid] == cp)
6085 return;
6086
6087 if (ivs->cand_for_group[gid])
6088 iv_ca_set_no_cp (data, ivs, group);
6089
6090 if (cp)
6091 {
6092 cid = cp->cand->id;
6093
6094 ivs->bad_groups--;
6095 ivs->cand_for_group[gid] = cp;
6096 ivs->n_cand_uses[cid]++;
6097 if (ivs->n_cand_uses[cid] == 1)
6098 {
6099 bitmap_set_bit (ivs->cands, cid);
6100 /* Do not count the pseudocandidates. */
6101 if (cp->cand->iv)
6102 ivs->n_regs++;
6103 ivs->n_cands++;
6104 ivs->cand_cost += cp->cand->cost;
6105
6106 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6107 }
6108
6109 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
6110 iv_ca_set_add_invariants (ivs, cp->depends_on);
6111
6112 if (cp->inv_expr != NULL)
6113 {
6114 unsigned *slot = &ivs->used_inv_exprs->get_or_insert (cp->inv_expr);
6115 ++(*slot);
6116 }
6117 iv_ca_recount_cost (data, ivs);
6118 }
6119 }
6120
6121 /* Extend set IVS by expressing USE by some of the candidates in it
6122 if possible. Consider all important candidates if candidates in
6123 set IVS don't give any result. */
6124
6125 static void
6126 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
6127 struct iv_group *group)
6128 {
6129 struct cost_pair *best_cp = NULL, *cp;
6130 bitmap_iterator bi;
6131 unsigned i;
6132 struct iv_cand *cand;
6133
6134 gcc_assert (ivs->upto >= group->id);
6135 ivs->upto++;
6136 ivs->bad_groups++;
6137
6138 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6139 {
6140 cand = data->vcands[i];
6141 cp = get_group_iv_cost (data, group, cand);
6142 if (cheaper_cost_pair (cp, best_cp))
6143 best_cp = cp;
6144 }
6145
6146 if (best_cp == NULL)
6147 {
6148 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6149 {
6150 cand = data->vcands[i];
6151 cp = get_group_iv_cost (data, group, cand);
6152 if (cheaper_cost_pair (cp, best_cp))
6153 best_cp = cp;
6154 }
6155 }
6156
6157 iv_ca_set_cp (data, ivs, group, best_cp);
6158 }
6159
6160 /* Get cost for assignment IVS. */
6161
6162 static comp_cost
6163 iv_ca_cost (struct iv_ca *ivs)
6164 {
6165 /* This was a conditional expression but it triggered a bug in
6166 Sun C 5.5. */
6167 if (ivs->bad_groups)
6168 return infinite_cost;
6169 else
6170 return ivs->cost;
6171 }
6172
6173 /* Returns true if all dependences of CP are among invariants in IVS. */
6174
6175 static bool
6176 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6177 {
6178 unsigned i;
6179 bitmap_iterator bi;
6180
6181 if (!cp->depends_on)
6182 return true;
6183
6184 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6185 {
6186 if (ivs->n_invariant_uses[i] == 0)
6187 return false;
6188 }
6189
6190 return true;
6191 }
6192
6193 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6194 it before NEXT. */
6195
6196 static struct iv_ca_delta *
6197 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
6198 struct cost_pair *new_cp, struct iv_ca_delta *next)
6199 {
6200 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6201
6202 change->group = group;
6203 change->old_cp = old_cp;
6204 change->new_cp = new_cp;
6205 change->next = next;
6206
6207 return change;
6208 }
6209
6210 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6211 are rewritten. */
6212
6213 static struct iv_ca_delta *
6214 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6215 {
6216 struct iv_ca_delta *last;
6217
6218 if (!l2)
6219 return l1;
6220
6221 if (!l1)
6222 return l2;
6223
6224 for (last = l1; last->next; last = last->next)
6225 continue;
6226 last->next = l2;
6227
6228 return l1;
6229 }
6230
6231 /* Reverse the list of changes DELTA, forming the inverse to it. */
6232
6233 static struct iv_ca_delta *
6234 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6235 {
6236 struct iv_ca_delta *act, *next, *prev = NULL;
6237
6238 for (act = delta; act; act = next)
6239 {
6240 next = act->next;
6241 act->next = prev;
6242 prev = act;
6243
6244 std::swap (act->old_cp, act->new_cp);
6245 }
6246
6247 return prev;
6248 }
6249
6250 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6251 reverted instead. */
6252
6253 static void
6254 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6255 struct iv_ca_delta *delta, bool forward)
6256 {
6257 struct cost_pair *from, *to;
6258 struct iv_ca_delta *act;
6259
6260 if (!forward)
6261 delta = iv_ca_delta_reverse (delta);
6262
6263 for (act = delta; act; act = act->next)
6264 {
6265 from = act->old_cp;
6266 to = act->new_cp;
6267 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6268 iv_ca_set_cp (data, ivs, act->group, to);
6269 }
6270
6271 if (!forward)
6272 iv_ca_delta_reverse (delta);
6273 }
6274
6275 /* Returns true if CAND is used in IVS. */
6276
6277 static bool
6278 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6279 {
6280 return ivs->n_cand_uses[cand->id] > 0;
6281 }
6282
6283 /* Returns number of induction variable candidates in the set IVS. */
6284
6285 static unsigned
6286 iv_ca_n_cands (struct iv_ca *ivs)
6287 {
6288 return ivs->n_cands;
6289 }
6290
6291 /* Free the list of changes DELTA. */
6292
6293 static void
6294 iv_ca_delta_free (struct iv_ca_delta **delta)
6295 {
6296 struct iv_ca_delta *act, *next;
6297
6298 for (act = *delta; act; act = next)
6299 {
6300 next = act->next;
6301 free (act);
6302 }
6303
6304 *delta = NULL;
6305 }
6306
6307 /* Allocates new iv candidates assignment. */
6308
6309 static struct iv_ca *
6310 iv_ca_new (struct ivopts_data *data)
6311 {
6312 struct iv_ca *nw = XNEW (struct iv_ca);
6313
6314 nw->upto = 0;
6315 nw->bad_groups = 0;
6316 nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6317 data->vgroups.length ());
6318 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6319 nw->cands = BITMAP_ALLOC (NULL);
6320 nw->n_cands = 0;
6321 nw->n_regs = 0;
6322 nw->cand_use_cost = no_cost;
6323 nw->cand_cost = 0;
6324 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6325 nw->used_inv_exprs = new hash_map <iv_inv_expr_ent *, unsigned> (13);
6326 nw->cost = no_cost;
6327
6328 return nw;
6329 }
6330
6331 /* Free memory occupied by the set IVS. */
6332
6333 static void
6334 iv_ca_free (struct iv_ca **ivs)
6335 {
6336 free ((*ivs)->cand_for_group);
6337 free ((*ivs)->n_cand_uses);
6338 BITMAP_FREE ((*ivs)->cands);
6339 free ((*ivs)->n_invariant_uses);
6340 delete ((*ivs)->used_inv_exprs);
6341 free (*ivs);
6342 *ivs = NULL;
6343 }
6344
6345 /* Dumps IVS to FILE. */
6346
6347 static void
6348 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6349 {
6350 unsigned i;
6351 comp_cost cost = iv_ca_cost (ivs);
6352
6353 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6354 fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
6355 ivs->cand_cost, ivs->cand_use_cost.cost,
6356 ivs->cand_use_cost.complexity);
6357 bitmap_print (file, ivs->cands, " candidates: ","\n");
6358
6359 for (i = 0; i < ivs->upto; i++)
6360 {
6361 struct iv_group *group = data->vgroups[i];
6362 struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6363 if (cp)
6364 fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6365 group->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6366 else
6367 fprintf (file, " group:%d --> ??\n", group->id);
6368 }
6369
6370 const char *pref = "";
6371 fprintf (file, " invariant variables: ");
6372 for (i = 1; i <= data->max_inv_id; i++)
6373 if (ivs->n_invariant_uses[i])
6374 {
6375 fprintf (file, "%s%d", pref, i);
6376 pref = ", ";
6377 }
6378
6379 pref = "";
6380 fprintf (file, "\n invariant expressions: ");
6381 for (hash_map<iv_inv_expr_ent *, unsigned>::iterator it
6382 = ivs->used_inv_exprs->begin (); it != ivs->used_inv_exprs->end (); ++it)
6383 {
6384 fprintf (file, "%s%d", pref, (*it).first->id);
6385 pref = ", ";
6386 }
6387
6388 fprintf (file, "\n\n");
6389 }
6390
6391 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6392 new set, and store differences in DELTA. Number of induction variables
6393 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6394 the function will try to find a solution with mimimal iv candidates. */
6395
6396 static comp_cost
6397 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6398 struct iv_cand *cand, struct iv_ca_delta **delta,
6399 unsigned *n_ivs, bool min_ncand)
6400 {
6401 unsigned i;
6402 comp_cost cost;
6403 struct iv_group *group;
6404 struct cost_pair *old_cp, *new_cp;
6405
6406 *delta = NULL;
6407 for (i = 0; i < ivs->upto; i++)
6408 {
6409 group = data->vgroups[i];
6410 old_cp = iv_ca_cand_for_group (ivs, group);
6411
6412 if (old_cp
6413 && old_cp->cand == cand)
6414 continue;
6415
6416 new_cp = get_group_iv_cost (data, group, cand);
6417 if (!new_cp)
6418 continue;
6419
6420 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6421 continue;
6422
6423 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6424 continue;
6425
6426 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6427 }
6428
6429 iv_ca_delta_commit (data, ivs, *delta, true);
6430 cost = iv_ca_cost (ivs);
6431 if (n_ivs)
6432 *n_ivs = iv_ca_n_cands (ivs);
6433 iv_ca_delta_commit (data, ivs, *delta, false);
6434
6435 return cost;
6436 }
6437
6438 /* Try narrowing set IVS by removing CAND. Return the cost of
6439 the new set and store the differences in DELTA. START is
6440 the candidate with which we start narrowing. */
6441
6442 static comp_cost
6443 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6444 struct iv_cand *cand, struct iv_cand *start,
6445 struct iv_ca_delta **delta)
6446 {
6447 unsigned i, ci;
6448 struct iv_group *group;
6449 struct cost_pair *old_cp, *new_cp, *cp;
6450 bitmap_iterator bi;
6451 struct iv_cand *cnd;
6452 comp_cost cost, best_cost, acost;
6453
6454 *delta = NULL;
6455 for (i = 0; i < data->vgroups.length (); i++)
6456 {
6457 group = data->vgroups[i];
6458
6459 old_cp = iv_ca_cand_for_group (ivs, group);
6460 if (old_cp->cand != cand)
6461 continue;
6462
6463 best_cost = iv_ca_cost (ivs);
6464 /* Start narrowing with START. */
6465 new_cp = get_group_iv_cost (data, group, start);
6466
6467 if (data->consider_all_candidates)
6468 {
6469 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6470 {
6471 if (ci == cand->id || (start && ci == start->id))
6472 continue;
6473
6474 cnd = data->vcands[ci];
6475
6476 cp = get_group_iv_cost (data, group, cnd);
6477 if (!cp)
6478 continue;
6479
6480 iv_ca_set_cp (data, ivs, group, cp);
6481 acost = iv_ca_cost (ivs);
6482
6483 if (compare_costs (acost, best_cost) < 0)
6484 {
6485 best_cost = acost;
6486 new_cp = cp;
6487 }
6488 }
6489 }
6490 else
6491 {
6492 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6493 {
6494 if (ci == cand->id || (start && ci == start->id))
6495 continue;
6496
6497 cnd = data->vcands[ci];
6498
6499 cp = get_group_iv_cost (data, group, cnd);
6500 if (!cp)
6501 continue;
6502
6503 iv_ca_set_cp (data, ivs, group, cp);
6504 acost = iv_ca_cost (ivs);
6505
6506 if (compare_costs (acost, best_cost) < 0)
6507 {
6508 best_cost = acost;
6509 new_cp = cp;
6510 }
6511 }
6512 }
6513 /* Restore to old cp for use. */
6514 iv_ca_set_cp (data, ivs, group, old_cp);
6515
6516 if (!new_cp)
6517 {
6518 iv_ca_delta_free (delta);
6519 return infinite_cost;
6520 }
6521
6522 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6523 }
6524
6525 iv_ca_delta_commit (data, ivs, *delta, true);
6526 cost = iv_ca_cost (ivs);
6527 iv_ca_delta_commit (data, ivs, *delta, false);
6528
6529 return cost;
6530 }
6531
6532 /* Try optimizing the set of candidates IVS by removing candidates different
6533 from to EXCEPT_CAND from it. Return cost of the new set, and store
6534 differences in DELTA. */
6535
6536 static comp_cost
6537 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6538 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6539 {
6540 bitmap_iterator bi;
6541 struct iv_ca_delta *act_delta, *best_delta;
6542 unsigned i;
6543 comp_cost best_cost, acost;
6544 struct iv_cand *cand;
6545
6546 best_delta = NULL;
6547 best_cost = iv_ca_cost (ivs);
6548
6549 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6550 {
6551 cand = data->vcands[i];
6552
6553 if (cand == except_cand)
6554 continue;
6555
6556 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6557
6558 if (compare_costs (acost, best_cost) < 0)
6559 {
6560 best_cost = acost;
6561 iv_ca_delta_free (&best_delta);
6562 best_delta = act_delta;
6563 }
6564 else
6565 iv_ca_delta_free (&act_delta);
6566 }
6567
6568 if (!best_delta)
6569 {
6570 *delta = NULL;
6571 return best_cost;
6572 }
6573
6574 /* Recurse to possibly remove other unnecessary ivs. */
6575 iv_ca_delta_commit (data, ivs, best_delta, true);
6576 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6577 iv_ca_delta_commit (data, ivs, best_delta, false);
6578 *delta = iv_ca_delta_join (best_delta, *delta);
6579 return best_cost;
6580 }
6581
6582 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6583 cheaper local cost for GROUP than BEST_CP. Return pointer to
6584 the corresponding cost_pair, otherwise just return BEST_CP. */
6585
6586 static struct cost_pair*
6587 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6588 unsigned int cand_idx, struct iv_cand *old_cand,
6589 struct cost_pair *best_cp)
6590 {
6591 struct iv_cand *cand;
6592 struct cost_pair *cp;
6593
6594 gcc_assert (old_cand != NULL && best_cp != NULL);
6595 if (cand_idx == old_cand->id)
6596 return best_cp;
6597
6598 cand = data->vcands[cand_idx];
6599 cp = get_group_iv_cost (data, group, cand);
6600 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6601 return cp;
6602
6603 return best_cp;
6604 }
6605
6606 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6607 which are used by more than one iv uses. For each of those candidates,
6608 this function tries to represent iv uses under that candidate using
6609 other ones with lower local cost, then tries to prune the new set.
6610 If the new set has lower cost, It returns the new cost after recording
6611 candidate replacement in list DELTA. */
6612
6613 static comp_cost
6614 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6615 struct iv_ca_delta **delta)
6616 {
6617 bitmap_iterator bi, bj;
6618 unsigned int i, j, k;
6619 struct iv_cand *cand;
6620 comp_cost orig_cost, acost;
6621 struct iv_ca_delta *act_delta, *tmp_delta;
6622 struct cost_pair *old_cp, *best_cp = NULL;
6623
6624 *delta = NULL;
6625 orig_cost = iv_ca_cost (ivs);
6626
6627 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6628 {
6629 if (ivs->n_cand_uses[i] == 1
6630 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6631 continue;
6632
6633 cand = data->vcands[i];
6634
6635 act_delta = NULL;
6636 /* Represent uses under current candidate using other ones with
6637 lower local cost. */
6638 for (j = 0; j < ivs->upto; j++)
6639 {
6640 struct iv_group *group = data->vgroups[j];
6641 old_cp = iv_ca_cand_for_group (ivs, group);
6642
6643 if (old_cp->cand != cand)
6644 continue;
6645
6646 best_cp = old_cp;
6647 if (data->consider_all_candidates)
6648 for (k = 0; k < data->vcands.length (); k++)
6649 best_cp = cheaper_cost_with_cand (data, group, k,
6650 old_cp->cand, best_cp);
6651 else
6652 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6653 best_cp = cheaper_cost_with_cand (data, group, k,
6654 old_cp->cand, best_cp);
6655
6656 if (best_cp == old_cp)
6657 continue;
6658
6659 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6660 }
6661 /* No need for further prune. */
6662 if (!act_delta)
6663 continue;
6664
6665 /* Prune the new candidate set. */
6666 iv_ca_delta_commit (data, ivs, act_delta, true);
6667 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6668 iv_ca_delta_commit (data, ivs, act_delta, false);
6669 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6670
6671 if (compare_costs (acost, orig_cost) < 0)
6672 {
6673 *delta = act_delta;
6674 return acost;
6675 }
6676 else
6677 iv_ca_delta_free (&act_delta);
6678 }
6679
6680 return orig_cost;
6681 }
6682
6683 /* Tries to extend the sets IVS in the best possible way in order to
6684 express the GROUP. If ORIGINALP is true, prefer candidates from
6685 the original set of IVs, otherwise favor important candidates not
6686 based on any memory object. */
6687
6688 static bool
6689 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6690 struct iv_group *group, bool originalp)
6691 {
6692 comp_cost best_cost, act_cost;
6693 unsigned i;
6694 bitmap_iterator bi;
6695 struct iv_cand *cand;
6696 struct iv_ca_delta *best_delta = NULL, *act_delta;
6697 struct cost_pair *cp;
6698
6699 iv_ca_add_group (data, ivs, group);
6700 best_cost = iv_ca_cost (ivs);
6701 cp = iv_ca_cand_for_group (ivs, group);
6702 if (cp)
6703 {
6704 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6705 iv_ca_set_no_cp (data, ivs, group);
6706 }
6707
6708 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6709 first try important candidates not based on any memory object. Only if
6710 this fails, try the specific ones. Rationale -- in loops with many
6711 variables the best choice often is to use just one generic biv. If we
6712 added here many ivs specific to the uses, the optimization algorithm later
6713 would be likely to get stuck in a local minimum, thus causing us to create
6714 too many ivs. The approach from few ivs to more seems more likely to be
6715 successful -- starting from few ivs, replacing an expensive use by a
6716 specific iv should always be a win. */
6717 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6718 {
6719 cand = data->vcands[i];
6720
6721 if (originalp && cand->pos !=IP_ORIGINAL)
6722 continue;
6723
6724 if (!originalp && cand->iv->base_object != NULL_TREE)
6725 continue;
6726
6727 if (iv_ca_cand_used_p (ivs, cand))
6728 continue;
6729
6730 cp = get_group_iv_cost (data, group, cand);
6731 if (!cp)
6732 continue;
6733
6734 iv_ca_set_cp (data, ivs, group, cp);
6735 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6736 true);
6737 iv_ca_set_no_cp (data, ivs, group);
6738 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6739
6740 if (compare_costs (act_cost, best_cost) < 0)
6741 {
6742 best_cost = act_cost;
6743
6744 iv_ca_delta_free (&best_delta);
6745 best_delta = act_delta;
6746 }
6747 else
6748 iv_ca_delta_free (&act_delta);
6749 }
6750
6751 if (infinite_cost_p (best_cost))
6752 {
6753 for (i = 0; i < group->n_map_members; i++)
6754 {
6755 cp = group->cost_map + i;
6756 cand = cp->cand;
6757 if (!cand)
6758 continue;
6759
6760 /* Already tried this. */
6761 if (cand->important)
6762 {
6763 if (originalp && cand->pos == IP_ORIGINAL)
6764 continue;
6765 if (!originalp && cand->iv->base_object == NULL_TREE)
6766 continue;
6767 }
6768
6769 if (iv_ca_cand_used_p (ivs, cand))
6770 continue;
6771
6772 act_delta = NULL;
6773 iv_ca_set_cp (data, ivs, group, cp);
6774 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6775 iv_ca_set_no_cp (data, ivs, group);
6776 act_delta = iv_ca_delta_add (group,
6777 iv_ca_cand_for_group (ivs, group),
6778 cp, act_delta);
6779
6780 if (compare_costs (act_cost, best_cost) < 0)
6781 {
6782 best_cost = act_cost;
6783
6784 if (best_delta)
6785 iv_ca_delta_free (&best_delta);
6786 best_delta = act_delta;
6787 }
6788 else
6789 iv_ca_delta_free (&act_delta);
6790 }
6791 }
6792
6793 iv_ca_delta_commit (data, ivs, best_delta, true);
6794 iv_ca_delta_free (&best_delta);
6795
6796 return !infinite_cost_p (best_cost);
6797 }
6798
6799 /* Finds an initial assignment of candidates to uses. */
6800
6801 static struct iv_ca *
6802 get_initial_solution (struct ivopts_data *data, bool originalp)
6803 {
6804 unsigned i;
6805 struct iv_ca *ivs = iv_ca_new (data);
6806
6807 for (i = 0; i < data->vgroups.length (); i++)
6808 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6809 {
6810 iv_ca_free (&ivs);
6811 return NULL;
6812 }
6813
6814 return ivs;
6815 }
6816
6817 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6818 points to a bool variable, this function tries to break local
6819 optimal fixed-point by replacing candidates in IVS if it's true. */
6820
6821 static bool
6822 try_improve_iv_set (struct ivopts_data *data,
6823 struct iv_ca *ivs, bool *try_replace_p)
6824 {
6825 unsigned i, n_ivs;
6826 comp_cost acost, best_cost = iv_ca_cost (ivs);
6827 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6828 struct iv_cand *cand;
6829
6830 /* Try extending the set of induction variables by one. */
6831 for (i = 0; i < data->vcands.length (); i++)
6832 {
6833 cand = data->vcands[i];
6834
6835 if (iv_ca_cand_used_p (ivs, cand))
6836 continue;
6837
6838 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6839 if (!act_delta)
6840 continue;
6841
6842 /* If we successfully added the candidate and the set is small enough,
6843 try optimizing it by removing other candidates. */
6844 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6845 {
6846 iv_ca_delta_commit (data, ivs, act_delta, true);
6847 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6848 iv_ca_delta_commit (data, ivs, act_delta, false);
6849 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6850 }
6851
6852 if (compare_costs (acost, best_cost) < 0)
6853 {
6854 best_cost = acost;
6855 iv_ca_delta_free (&best_delta);
6856 best_delta = act_delta;
6857 }
6858 else
6859 iv_ca_delta_free (&act_delta);
6860 }
6861
6862 if (!best_delta)
6863 {
6864 /* Try removing the candidates from the set instead. */
6865 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6866
6867 if (!best_delta && *try_replace_p)
6868 {
6869 *try_replace_p = false;
6870 /* So far candidate selecting algorithm tends to choose fewer IVs
6871 so that it can handle cases in which loops have many variables
6872 but the best choice is often to use only one general biv. One
6873 weakness is it can't handle opposite cases, in which different
6874 candidates should be chosen with respect to each use. To solve
6875 the problem, we replace candidates in a manner described by the
6876 comments of iv_ca_replace, thus give general algorithm a chance
6877 to break local optimal fixed-point in these cases. */
6878 best_cost = iv_ca_replace (data, ivs, &best_delta);
6879 }
6880
6881 if (!best_delta)
6882 return false;
6883 }
6884
6885 iv_ca_delta_commit (data, ivs, best_delta, true);
6886 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6887 iv_ca_delta_free (&best_delta);
6888 return true;
6889 }
6890
6891 /* Attempts to find the optimal set of induction variables. We do simple
6892 greedy heuristic -- we try to replace at most one candidate in the selected
6893 solution and remove the unused ivs while this improves the cost. */
6894
6895 static struct iv_ca *
6896 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6897 {
6898 struct iv_ca *set;
6899 bool try_replace_p = true;
6900
6901 /* Get the initial solution. */
6902 set = get_initial_solution (data, originalp);
6903 if (!set)
6904 {
6905 if (dump_file && (dump_flags & TDF_DETAILS))
6906 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6907 return NULL;
6908 }
6909
6910 if (dump_file && (dump_flags & TDF_DETAILS))
6911 {
6912 fprintf (dump_file, "Initial set of candidates:\n");
6913 iv_ca_dump (data, dump_file, set);
6914 }
6915
6916 while (try_improve_iv_set (data, set, &try_replace_p))
6917 {
6918 if (dump_file && (dump_flags & TDF_DETAILS))
6919 {
6920 fprintf (dump_file, "Improved to:\n");
6921 iv_ca_dump (data, dump_file, set);
6922 }
6923 }
6924
6925 return set;
6926 }
6927
6928 static struct iv_ca *
6929 find_optimal_iv_set (struct ivopts_data *data)
6930 {
6931 unsigned i;
6932 comp_cost cost, origcost;
6933 struct iv_ca *set, *origset;
6934
6935 /* Determine the cost based on a strategy that starts with original IVs,
6936 and try again using a strategy that prefers candidates not based
6937 on any IVs. */
6938 origset = find_optimal_iv_set_1 (data, true);
6939 set = find_optimal_iv_set_1 (data, false);
6940
6941 if (!origset && !set)
6942 return NULL;
6943
6944 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6945 cost = set ? iv_ca_cost (set) : infinite_cost;
6946
6947 if (dump_file && (dump_flags & TDF_DETAILS))
6948 {
6949 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6950 origcost.cost, origcost.complexity);
6951 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6952 cost.cost, cost.complexity);
6953 }
6954
6955 /* Choose the one with the best cost. */
6956 if (compare_costs (origcost, cost) <= 0)
6957 {
6958 if (set)
6959 iv_ca_free (&set);
6960 set = origset;
6961 }
6962 else if (origset)
6963 iv_ca_free (&origset);
6964
6965 for (i = 0; i < data->vgroups.length (); i++)
6966 {
6967 struct iv_group *group = data->vgroups[i];
6968 group->selected = iv_ca_cand_for_group (set, group)->cand;
6969 }
6970
6971 return set;
6972 }
6973
6974 /* Creates a new induction variable corresponding to CAND. */
6975
6976 static void
6977 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6978 {
6979 gimple_stmt_iterator incr_pos;
6980 tree base;
6981 struct iv_use *use;
6982 struct iv_group *group;
6983 bool after = false;
6984
6985 if (!cand->iv)
6986 return;
6987
6988 switch (cand->pos)
6989 {
6990 case IP_NORMAL:
6991 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6992 break;
6993
6994 case IP_END:
6995 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6996 after = true;
6997 break;
6998
6999 case IP_AFTER_USE:
7000 after = true;
7001 /* fall through */
7002 case IP_BEFORE_USE:
7003 incr_pos = gsi_for_stmt (cand->incremented_at);
7004 break;
7005
7006 case IP_ORIGINAL:
7007 /* Mark that the iv is preserved. */
7008 name_info (data, cand->var_before)->preserve_biv = true;
7009 name_info (data, cand->var_after)->preserve_biv = true;
7010
7011 /* Rewrite the increment so that it uses var_before directly. */
7012 use = find_interesting_uses_op (data, cand->var_after);
7013 group = data->vgroups[use->group_id];
7014 group->selected = cand;
7015 return;
7016 }
7017
7018 gimple_add_tmp_var (cand->var_before);
7019
7020 base = unshare_expr (cand->iv->base);
7021
7022 create_iv (base, unshare_expr (cand->iv->step),
7023 cand->var_before, data->current_loop,
7024 &incr_pos, after, &cand->var_before, &cand->var_after);
7025 }
7026
7027 /* Creates new induction variables described in SET. */
7028
7029 static void
7030 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7031 {
7032 unsigned i;
7033 struct iv_cand *cand;
7034 bitmap_iterator bi;
7035
7036 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7037 {
7038 cand = data->vcands[i];
7039 create_new_iv (data, cand);
7040 }
7041
7042 if (dump_file && (dump_flags & TDF_DETAILS))
7043 {
7044 fprintf (dump_file, "Selected IV set for loop %d",
7045 data->current_loop->num);
7046 if (data->loop_loc != UNKNOWN_LOCATION)
7047 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7048 LOCATION_LINE (data->loop_loc));
7049 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7050 avg_loop_niter (data->current_loop));
7051 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_UNSIGNED " expressions",
7052 (unsigned HOST_WIDE_INT) set->used_inv_exprs->elements ());
7053 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7054 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7055 {
7056 cand = data->vcands[i];
7057 dump_cand (dump_file, cand);
7058 }
7059 fprintf (dump_file, "\n");
7060 }
7061 }
7062
7063 /* Rewrites USE (definition of iv used in a nonlinear expression)
7064 using candidate CAND. */
7065
7066 static void
7067 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7068 struct iv_use *use, struct iv_cand *cand)
7069 {
7070 tree comp;
7071 tree op, tgt;
7072 gassign *ass;
7073 gimple_stmt_iterator bsi;
7074
7075 /* An important special case -- if we are asked to express value of
7076 the original iv by itself, just exit; there is no need to
7077 introduce a new computation (that might also need casting the
7078 variable to unsigned and back). */
7079 if (cand->pos == IP_ORIGINAL
7080 && cand->incremented_at == use->stmt)
7081 {
7082 enum tree_code stmt_code;
7083
7084 gcc_assert (is_gimple_assign (use->stmt));
7085 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7086
7087 /* Check whether we may leave the computation unchanged.
7088 This is the case only if it does not rely on other
7089 computations in the loop -- otherwise, the computation
7090 we rely upon may be removed in remove_unused_ivs,
7091 thus leading to ICE. */
7092 stmt_code = gimple_assign_rhs_code (use->stmt);
7093 if (stmt_code == PLUS_EXPR
7094 || stmt_code == MINUS_EXPR
7095 || stmt_code == POINTER_PLUS_EXPR)
7096 {
7097 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7098 op = gimple_assign_rhs2 (use->stmt);
7099 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7100 op = gimple_assign_rhs1 (use->stmt);
7101 else
7102 op = NULL_TREE;
7103 }
7104 else
7105 op = NULL_TREE;
7106
7107 if (op && expr_invariant_in_loop_p (data->current_loop, op))
7108 return;
7109 }
7110
7111 comp = get_computation (data->current_loop, use, cand);
7112 gcc_assert (comp != NULL_TREE);
7113
7114 switch (gimple_code (use->stmt))
7115 {
7116 case GIMPLE_PHI:
7117 tgt = PHI_RESULT (use->stmt);
7118
7119 /* If we should keep the biv, do not replace it. */
7120 if (name_info (data, tgt)->preserve_biv)
7121 return;
7122
7123 bsi = gsi_after_labels (gimple_bb (use->stmt));
7124 break;
7125
7126 case GIMPLE_ASSIGN:
7127 tgt = gimple_assign_lhs (use->stmt);
7128 bsi = gsi_for_stmt (use->stmt);
7129 break;
7130
7131 default:
7132 gcc_unreachable ();
7133 }
7134
7135 if (!valid_gimple_rhs_p (comp)
7136 || (gimple_code (use->stmt) != GIMPLE_PHI
7137 /* We can't allow re-allocating the stmt as it might be pointed
7138 to still. */
7139 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7140 >= gimple_num_ops (gsi_stmt (bsi)))))
7141 {
7142 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7143 true, GSI_SAME_STMT);
7144 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7145 {
7146 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7147 /* As this isn't a plain copy we have to reset alignment
7148 information. */
7149 if (SSA_NAME_PTR_INFO (comp))
7150 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7151 }
7152 }
7153
7154 if (gimple_code (use->stmt) == GIMPLE_PHI)
7155 {
7156 ass = gimple_build_assign (tgt, comp);
7157 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7158
7159 bsi = gsi_for_stmt (use->stmt);
7160 remove_phi_node (&bsi, false);
7161 }
7162 else
7163 {
7164 gimple_assign_set_rhs_from_tree (&bsi, comp);
7165 use->stmt = gsi_stmt (bsi);
7166 }
7167 }
7168
7169 /* Performs a peephole optimization to reorder the iv update statement with
7170 a mem ref to enable instruction combining in later phases. The mem ref uses
7171 the iv value before the update, so the reordering transformation requires
7172 adjustment of the offset. CAND is the selected IV_CAND.
7173
7174 Example:
7175
7176 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7177 iv2 = iv1 + 1;
7178
7179 if (t < val) (1)
7180 goto L;
7181 goto Head;
7182
7183
7184 directly propagating t over to (1) will introduce overlapping live range
7185 thus increase register pressure. This peephole transform it into:
7186
7187
7188 iv2 = iv1 + 1;
7189 t = MEM_REF (base, iv2, 8, 8);
7190 if (t < val)
7191 goto L;
7192 goto Head;
7193 */
7194
7195 static void
7196 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7197 {
7198 tree var_after;
7199 gimple *iv_update, *stmt;
7200 basic_block bb;
7201 gimple_stmt_iterator gsi, gsi_iv;
7202
7203 if (cand->pos != IP_NORMAL)
7204 return;
7205
7206 var_after = cand->var_after;
7207 iv_update = SSA_NAME_DEF_STMT (var_after);
7208
7209 bb = gimple_bb (iv_update);
7210 gsi = gsi_last_nondebug_bb (bb);
7211 stmt = gsi_stmt (gsi);
7212
7213 /* Only handle conditional statement for now. */
7214 if (gimple_code (stmt) != GIMPLE_COND)
7215 return;
7216
7217 gsi_prev_nondebug (&gsi);
7218 stmt = gsi_stmt (gsi);
7219 if (stmt != iv_update)
7220 return;
7221
7222 gsi_prev_nondebug (&gsi);
7223 if (gsi_end_p (gsi))
7224 return;
7225
7226 stmt = gsi_stmt (gsi);
7227 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7228 return;
7229
7230 if (stmt != use->stmt)
7231 return;
7232
7233 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7234 return;
7235
7236 if (dump_file && (dump_flags & TDF_DETAILS))
7237 {
7238 fprintf (dump_file, "Reordering \n");
7239 print_gimple_stmt (dump_file, iv_update, 0, 0);
7240 print_gimple_stmt (dump_file, use->stmt, 0, 0);
7241 fprintf (dump_file, "\n");
7242 }
7243
7244 gsi = gsi_for_stmt (use->stmt);
7245 gsi_iv = gsi_for_stmt (iv_update);
7246 gsi_move_before (&gsi_iv, &gsi);
7247
7248 cand->pos = IP_BEFORE_USE;
7249 cand->incremented_at = use->stmt;
7250 }
7251
7252 /* Rewrites USE (address that is an iv) using candidate CAND. */
7253
7254 static void
7255 rewrite_use_address (struct ivopts_data *data,
7256 struct iv_use *use, struct iv_cand *cand)
7257 {
7258 aff_tree aff;
7259 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7260 tree base_hint = NULL_TREE;
7261 tree ref, iv;
7262 bool ok;
7263
7264 adjust_iv_update_pos (cand, use);
7265 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7266 gcc_assert (ok);
7267 unshare_aff_combination (&aff);
7268
7269 /* To avoid undefined overflow problems, all IV candidates use unsigned
7270 integer types. The drawback is that this makes it impossible for
7271 create_mem_ref to distinguish an IV that is based on a memory object
7272 from one that represents simply an offset.
7273
7274 To work around this problem, we pass a hint to create_mem_ref that
7275 indicates which variable (if any) in aff is an IV based on a memory
7276 object. Note that we only consider the candidate. If this is not
7277 based on an object, the base of the reference is in some subexpression
7278 of the use -- but these will use pointer types, so they are recognized
7279 by the create_mem_ref heuristics anyway. */
7280 if (cand->iv->base_object)
7281 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7282
7283 iv = var_at_stmt (data->current_loop, cand, use->stmt);
7284 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7285 reference_alias_ptr_type (*use->op_p),
7286 iv, base_hint, data->speed);
7287 copy_ref_info (ref, *use->op_p);
7288 *use->op_p = ref;
7289 }
7290
7291 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7292 candidate CAND. */
7293
7294 static void
7295 rewrite_use_compare (struct ivopts_data *data,
7296 struct iv_use *use, struct iv_cand *cand)
7297 {
7298 tree comp, *var_p, op, bound;
7299 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7300 enum tree_code compare;
7301 struct iv_group *group = data->vgroups[use->group_id];
7302 struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7303 bool ok;
7304
7305 bound = cp->value;
7306 if (bound)
7307 {
7308 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7309 tree var_type = TREE_TYPE (var);
7310 gimple_seq stmts;
7311
7312 if (dump_file && (dump_flags & TDF_DETAILS))
7313 {
7314 fprintf (dump_file, "Replacing exit test: ");
7315 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7316 }
7317 compare = cp->comp;
7318 bound = unshare_expr (fold_convert (var_type, bound));
7319 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7320 if (stmts)
7321 gsi_insert_seq_on_edge_immediate (
7322 loop_preheader_edge (data->current_loop),
7323 stmts);
7324
7325 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7326 gimple_cond_set_lhs (cond_stmt, var);
7327 gimple_cond_set_code (cond_stmt, compare);
7328 gimple_cond_set_rhs (cond_stmt, op);
7329 return;
7330 }
7331
7332 /* The induction variable elimination failed; just express the original
7333 giv. */
7334 comp = get_computation (data->current_loop, use, cand);
7335 gcc_assert (comp != NULL_TREE);
7336
7337 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7338 gcc_assert (ok);
7339
7340 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7341 true, GSI_SAME_STMT);
7342 }
7343
7344 /* Rewrite the groups using the selected induction variables. */
7345
7346 static void
7347 rewrite_groups (struct ivopts_data *data)
7348 {
7349 unsigned i, j;
7350
7351 for (i = 0; i < data->vgroups.length (); i++)
7352 {
7353 struct iv_group *group = data->vgroups[i];
7354 struct iv_cand *cand = group->selected;
7355
7356 gcc_assert (cand);
7357
7358 if (group->type == USE_NONLINEAR_EXPR)
7359 {
7360 for (j = 0; j < group->vuses.length (); j++)
7361 {
7362 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7363 update_stmt (group->vuses[j]->stmt);
7364 }
7365 }
7366 else if (group->type == USE_ADDRESS)
7367 {
7368 for (j = 0; j < group->vuses.length (); j++)
7369 {
7370 rewrite_use_address (data, group->vuses[j], cand);
7371 update_stmt (group->vuses[j]->stmt);
7372 }
7373 }
7374 else
7375 {
7376 gcc_assert (group->type == USE_COMPARE);
7377
7378 for (j = 0; j < group->vuses.length (); j++)
7379 {
7380 rewrite_use_compare (data, group->vuses[j], cand);
7381 update_stmt (group->vuses[j]->stmt);
7382 }
7383 }
7384 }
7385 }
7386
7387 /* Removes the ivs that are not used after rewriting. */
7388
7389 static void
7390 remove_unused_ivs (struct ivopts_data *data)
7391 {
7392 unsigned j;
7393 bitmap_iterator bi;
7394 bitmap toremove = BITMAP_ALLOC (NULL);
7395
7396 /* Figure out an order in which to release SSA DEFs so that we don't
7397 release something that we'd have to propagate into a debug stmt
7398 afterwards. */
7399 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7400 {
7401 struct version_info *info;
7402
7403 info = ver_info (data, j);
7404 if (info->iv
7405 && !integer_zerop (info->iv->step)
7406 && !info->inv_id
7407 && !info->iv->nonlin_use
7408 && !info->preserve_biv)
7409 {
7410 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7411
7412 tree def = info->iv->ssa_name;
7413
7414 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7415 {
7416 imm_use_iterator imm_iter;
7417 use_operand_p use_p;
7418 gimple *stmt;
7419 int count = 0;
7420
7421 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7422 {
7423 if (!gimple_debug_bind_p (stmt))
7424 continue;
7425
7426 /* We just want to determine whether to do nothing
7427 (count == 0), to substitute the computed
7428 expression into a single use of the SSA DEF by
7429 itself (count == 1), or to use a debug temp
7430 because the SSA DEF is used multiple times or as
7431 part of a larger expression (count > 1). */
7432 count++;
7433 if (gimple_debug_bind_get_value (stmt) != def)
7434 count++;
7435
7436 if (count > 1)
7437 BREAK_FROM_IMM_USE_STMT (imm_iter);
7438 }
7439
7440 if (!count)
7441 continue;
7442
7443 struct iv_use dummy_use;
7444 struct iv_cand *best_cand = NULL, *cand;
7445 unsigned i, best_pref = 0, cand_pref;
7446
7447 memset (&dummy_use, 0, sizeof (dummy_use));
7448 dummy_use.iv = info->iv;
7449 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7450 {
7451 cand = data->vgroups[i]->selected;
7452 if (cand == best_cand)
7453 continue;
7454 cand_pref = operand_equal_p (cand->iv->step,
7455 info->iv->step, 0)
7456 ? 4 : 0;
7457 cand_pref
7458 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7459 == TYPE_MODE (TREE_TYPE (info->iv->base))
7460 ? 2 : 0;
7461 cand_pref
7462 += TREE_CODE (cand->iv->base) == INTEGER_CST
7463 ? 1 : 0;
7464 if (best_cand == NULL || best_pref < cand_pref)
7465 {
7466 best_cand = cand;
7467 best_pref = cand_pref;
7468 }
7469 }
7470
7471 if (!best_cand)
7472 continue;
7473
7474 tree comp = get_computation_at (data->current_loop,
7475 &dummy_use, best_cand,
7476 SSA_NAME_DEF_STMT (def));
7477 if (!comp)
7478 continue;
7479
7480 if (count > 1)
7481 {
7482 tree vexpr = make_node (DEBUG_EXPR_DECL);
7483 DECL_ARTIFICIAL (vexpr) = 1;
7484 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7485 if (SSA_NAME_VAR (def))
7486 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7487 else
7488 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7489 gdebug *def_temp
7490 = gimple_build_debug_bind (vexpr, comp, NULL);
7491 gimple_stmt_iterator gsi;
7492
7493 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7494 gsi = gsi_after_labels (gimple_bb
7495 (SSA_NAME_DEF_STMT (def)));
7496 else
7497 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7498
7499 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7500 comp = vexpr;
7501 }
7502
7503 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7504 {
7505 if (!gimple_debug_bind_p (stmt))
7506 continue;
7507
7508 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7509 SET_USE (use_p, comp);
7510
7511 update_stmt (stmt);
7512 }
7513 }
7514 }
7515 }
7516
7517 release_defs_bitset (toremove);
7518
7519 BITMAP_FREE (toremove);
7520 }
7521
7522 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7523 for hash_map::traverse. */
7524
7525 bool
7526 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7527 {
7528 free (value);
7529 return true;
7530 }
7531
7532 /* Frees data allocated by the optimization of a single loop. */
7533
7534 static void
7535 free_loop_data (struct ivopts_data *data)
7536 {
7537 unsigned i, j;
7538 bitmap_iterator bi;
7539 tree obj;
7540
7541 if (data->niters)
7542 {
7543 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7544 delete data->niters;
7545 data->niters = NULL;
7546 }
7547
7548 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7549 {
7550 struct version_info *info;
7551
7552 info = ver_info (data, i);
7553 info->iv = NULL;
7554 info->has_nonlin_use = false;
7555 info->preserve_biv = false;
7556 info->inv_id = 0;
7557 }
7558 bitmap_clear (data->relevant);
7559 bitmap_clear (data->important_candidates);
7560
7561 for (i = 0; i < data->vgroups.length (); i++)
7562 {
7563 struct iv_group *group = data->vgroups[i];
7564
7565 for (j = 0; j < group->vuses.length (); j++)
7566 free (group->vuses[j]);
7567 group->vuses.release ();
7568
7569 BITMAP_FREE (group->related_cands);
7570 for (j = 0; j < group->n_map_members; j++)
7571 if (group->cost_map[j].depends_on)
7572 BITMAP_FREE (group->cost_map[j].depends_on);
7573
7574 free (group->cost_map);
7575 free (group);
7576 }
7577 data->vgroups.truncate (0);
7578
7579 for (i = 0; i < data->vcands.length (); i++)
7580 {
7581 struct iv_cand *cand = data->vcands[i];
7582
7583 if (cand->depends_on)
7584 BITMAP_FREE (cand->depends_on);
7585 free (cand);
7586 }
7587 data->vcands.truncate (0);
7588
7589 if (data->version_info_size < num_ssa_names)
7590 {
7591 data->version_info_size = 2 * num_ssa_names;
7592 free (data->version_info);
7593 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7594 }
7595
7596 data->max_inv_id = 0;
7597
7598 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7599 SET_DECL_RTL (obj, NULL_RTX);
7600
7601 decl_rtl_to_reset.truncate (0);
7602
7603 data->inv_expr_tab->empty ();
7604 data->max_inv_expr_id = 0;
7605
7606 data->iv_common_cand_tab->empty ();
7607 data->iv_common_cands.truncate (0);
7608 }
7609
7610 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7611 loop tree. */
7612
7613 static void
7614 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7615 {
7616 free_loop_data (data);
7617 free (data->version_info);
7618 BITMAP_FREE (data->relevant);
7619 BITMAP_FREE (data->important_candidates);
7620
7621 decl_rtl_to_reset.release ();
7622 data->vgroups.release ();
7623 data->vcands.release ();
7624 delete data->inv_expr_tab;
7625 data->inv_expr_tab = NULL;
7626 free_affine_expand_cache (&data->name_expansion_cache);
7627 delete data->iv_common_cand_tab;
7628 data->iv_common_cand_tab = NULL;
7629 data->iv_common_cands.release ();
7630 obstack_free (&data->iv_obstack, NULL);
7631 }
7632
7633 /* Returns true if the loop body BODY includes any function calls. */
7634
7635 static bool
7636 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7637 {
7638 gimple_stmt_iterator gsi;
7639 unsigned i;
7640
7641 for (i = 0; i < num_nodes; i++)
7642 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7643 {
7644 gimple *stmt = gsi_stmt (gsi);
7645 if (is_gimple_call (stmt)
7646 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7647 return true;
7648 }
7649 return false;
7650 }
7651
7652 /* Optimizes the LOOP. Returns true if anything changed. */
7653
7654 static bool
7655 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7656 {
7657 bool changed = false;
7658 struct iv_ca *iv_ca;
7659 edge exit = single_dom_exit (loop);
7660 basic_block *body;
7661
7662 gcc_assert (!data->niters);
7663 data->current_loop = loop;
7664 data->loop_loc = find_loop_location (loop);
7665 data->speed = optimize_loop_for_speed_p (loop);
7666
7667 if (dump_file && (dump_flags & TDF_DETAILS))
7668 {
7669 fprintf (dump_file, "Processing loop %d", loop->num);
7670 if (data->loop_loc != UNKNOWN_LOCATION)
7671 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7672 LOCATION_LINE (data->loop_loc));
7673 fprintf (dump_file, "\n");
7674
7675 if (exit)
7676 {
7677 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7678 exit->src->index, exit->dest->index);
7679 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7680 fprintf (dump_file, "\n");
7681 }
7682
7683 fprintf (dump_file, "\n");
7684 }
7685
7686 body = get_loop_body (loop);
7687 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7688 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7689 free (body);
7690
7691 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7692
7693 /* For each ssa name determines whether it behaves as an induction variable
7694 in some loop. */
7695 if (!find_induction_variables (data))
7696 goto finish;
7697
7698 /* Finds interesting uses (item 1). */
7699 find_interesting_uses (data);
7700 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7701 goto finish;
7702
7703 /* Finds candidates for the induction variables (item 2). */
7704 find_iv_candidates (data);
7705
7706 /* Calculates the costs (item 3, part 1). */
7707 determine_iv_costs (data);
7708 determine_group_iv_costs (data);
7709 determine_set_costs (data);
7710
7711 /* Find the optimal set of induction variables (item 3, part 2). */
7712 iv_ca = find_optimal_iv_set (data);
7713 if (!iv_ca)
7714 goto finish;
7715 changed = true;
7716
7717 /* Create the new induction variables (item 4, part 1). */
7718 create_new_ivs (data, iv_ca);
7719 iv_ca_free (&iv_ca);
7720
7721 /* Rewrite the uses (item 4, part 2). */
7722 rewrite_groups (data);
7723
7724 /* Remove the ivs that are unused after rewriting. */
7725 remove_unused_ivs (data);
7726
7727 /* We have changed the structure of induction variables; it might happen
7728 that definitions in the scev database refer to some of them that were
7729 eliminated. */
7730 scev_reset ();
7731
7732 finish:
7733 free_loop_data (data);
7734
7735 return changed;
7736 }
7737
7738 /* Main entry point. Optimizes induction variables in loops. */
7739
7740 void
7741 tree_ssa_iv_optimize (void)
7742 {
7743 struct loop *loop;
7744 struct ivopts_data data;
7745
7746 tree_ssa_iv_optimize_init (&data);
7747
7748 /* Optimize the loops starting with the innermost ones. */
7749 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7750 {
7751 if (dump_file && (dump_flags & TDF_DETAILS))
7752 flow_loop_dump (loop, dump_file, NULL, 1);
7753
7754 tree_ssa_iv_optimize_loop (&data, loop);
7755 }
7756
7757 tree_ssa_iv_optimize_finalize (&data);
7758 }