gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.  */
  68
  69 #include "config.h"
  70 #include "system.h"
  71 #include "coretypes.h"
  72 #include "backend.h"
  73 #include "rtl.h"
  74 #include "tree.h"
  75 #include "gimple.h"
  76 #include "cfghooks.h"
  77 #include "tree-pass.h"
  78 #include "memmodel.h"
  79 #include "tm_p.h"
  80 #include "ssa.h"
  81 #include "expmed.h"
  82 #include "insn-config.h"
  83 #include "emit-rtl.h"
  84 #include "recog.h"
  85 #include "cgraph.h"
  86 #include "gimple-pretty-print.h"
  87 #include "alias.h"
  88 #include "fold-const.h"
  89 #include "stor-layout.h"
  90 #include "tree-eh.h"
  91 #include "gimplify.h"
  92 #include "gimple-iterator.h"
  93 #include "gimplify-me.h"
  94 #include "tree-cfg.h"
  95 #include "tree-ssa-loop-ivopts.h"
  96 #include "tree-ssa-loop-manip.h"
  97 #include "tree-ssa-loop-niter.h"
  98 #include "tree-ssa-loop.h"
  99 #include "explow.h"
 100 #include "expr.h"
 101 #include "tree-dfa.h"
 102 #include "tree-ssa.h"
 103 #include "cfgloop.h"
 104 #include "tree-scalar-evolution.h"
 105 #include "params.h"
 106 #include "tree-affine.h"
 107 #include "tree-ssa-propagate.h"
 108 #include "tree-ssa-address.h"
 109 #include "builtins.h"
 110 #include "tree-vectorizer.h"
 111
 112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 113    cost of different addressing modes.  This should be moved to a TBD
 114    interface between the GIMPLE and RTL worlds.  */
 115
 116 /* The infinite cost.  */
 117 #define INFTY 10000000
 118
 119 /* Returns the expected number of loop iterations for LOOP.
 120    The average trip count is computed from profile data if it
 121    exists. */
 122
 123 static inline HOST_WIDE_INT
 124 avg_loop_niter (struct loop *loop)
 125 {
 126   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 127   if (niter == -1)
 128     {
 129       niter = likely_max_stmt_executions_int (loop);
 130
 131       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
 132         return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 133     }
 134
 135   return niter;
 136 }
 137
 138 struct iv_use;
 139
 140 /* Representation of the induction variable.  */
 141 struct iv
 142 {
 143   tree base;            /* Initial value of the iv.  */
 144   tree base_object;     /* A memory object to that the induction variable points.  */
 145   tree step;            /* Step of the iv (constant only).  */
 146   tree ssa_name;        /* The ssa name with the value.  */
 147   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 148   bool biv_p;           /* Is it a biv?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150   bool have_address_use;/* For biv, indicate if it's used in any address
 151                            type use.  */
 152 };
 153
 154 /* Per-ssa version information (induction variable descriptions, etc.).  */
 155 struct version_info
 156 {
 157   tree name;            /* The ssa name.  */
 158   struct iv *iv;        /* Induction variable description.  */
 159   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 160                            an expression that is not an induction variable.  */
 161   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 162   unsigned inv_id;      /* Id of an invariant.  */
 163 };
 164
 165 /* Types of uses.  */
 166 enum use_type
 167 {
 168   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 169   USE_ADDRESS,          /* Use in an address.  */
 170   USE_COMPARE           /* Use is a compare.  */
 171 };
 172
 173 /* Cost of a computation.  */
 174 struct comp_cost
 175 {
 176   comp_cost (): cost (0), complexity (0), scratch (0)
 177   {}
 178
 179   comp_cost (int cost, unsigned complexity, int scratch = 0)
 180     : cost (cost), complexity (complexity), scratch (scratch)
 181   {}
 182
 183   /* Returns true if COST is infinite.  */
 184   bool infinite_cost_p ();
 185
 186   /* Adds costs COST1 and COST2.  */
 187   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 188
 189   /* Adds COST to the comp_cost.  */
 190   comp_cost operator+= (comp_cost cost);
 191
 192   /* Adds constant C to this comp_cost.  */
 193   comp_cost operator+= (HOST_WIDE_INT c);
 194
 195   /* Subtracts constant C to this comp_cost.  */
 196   comp_cost operator-= (HOST_WIDE_INT c);
 197
 198   /* Divide the comp_cost by constant C.  */
 199   comp_cost operator/= (HOST_WIDE_INT c);
 200
 201   /* Multiply the comp_cost by constant C.  */
 202   comp_cost operator*= (HOST_WIDE_INT c);
 203
 204   /* Subtracts costs COST1 and COST2.  */
 205   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 206
 207   /* Subtracts COST from this comp_cost.  */
 208   comp_cost operator-= (comp_cost cost);
 209
 210   /* Returns true if COST1 is smaller than COST2.  */
 211   friend bool operator< (comp_cost cost1, comp_cost cost2);
 212
 213   /* Returns true if COST1 and COST2 are equal.  */
 214   friend bool operator== (comp_cost cost1, comp_cost cost2);
 215
 216   /* Returns true if COST1 is smaller or equal than COST2.  */
 217   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 218
 219   int cost;             /* The runtime cost.  */
 220   unsigned complexity;  /* The estimate of the complexity of the code for
 221                            the computation (in no concrete units --
 222                            complexity field should be larger for more
 223                            complex expressions and addressing modes).  */
 224   int scratch;          /* Scratch used during cost computation.  */
 225 };
 226
 227 static const comp_cost no_cost;
 228 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
 229
 230 bool
 231 comp_cost::infinite_cost_p ()
 232 {
 233   return cost == INFTY;
 234 }
 235
 236 comp_cost
 237 operator+ (comp_cost cost1, comp_cost cost2)
 238 {
 239   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 240     return infinite_cost;
 241
 242   cost1.cost += cost2.cost;
 243   cost1.complexity += cost2.complexity;
 244
 245   return cost1;
 246 }
 247
 248 comp_cost
 249 operator- (comp_cost cost1, comp_cost cost2)
 250 {
 251   if (cost1.infinite_cost_p ())
 252     return infinite_cost;
 253
 254   gcc_assert (!cost2.infinite_cost_p ());
 255
 256   cost1.cost -= cost2.cost;
 257   cost1.complexity -= cost2.complexity;
 258
 259   return cost1;
 260 }
 261
 262 comp_cost
 263 comp_cost::operator+= (comp_cost cost)
 264 {
 265   *this = *this + cost;
 266   return *this;
 267 }
 268
 269 comp_cost
 270 comp_cost::operator+= (HOST_WIDE_INT c)
 271 {
 272   if (infinite_cost_p ())
 273     return *this;
 274
 275   this->cost += c;
 276
 277   return *this;
 278 }
 279
 280 comp_cost
 281 comp_cost::operator-= (HOST_WIDE_INT c)
 282 {
 283   if (infinite_cost_p ())
 284     return *this;
 285
 286   this->cost -= c;
 287
 288   return *this;
 289 }
 290
 291 comp_cost
 292 comp_cost::operator/= (HOST_WIDE_INT c)
 293 {
 294   if (infinite_cost_p ())
 295     return *this;
 296
 297   this->cost /= c;
 298
 299   return *this;
 300 }
 301
 302 comp_cost
 303 comp_cost::operator*= (HOST_WIDE_INT c)
 304 {
 305   if (infinite_cost_p ())
 306     return *this;
 307
 308   this->cost *= c;
 309
 310   return *this;
 311 }
 312
 313 comp_cost
 314 comp_cost::operator-= (comp_cost cost)
 315 {
 316   *this = *this - cost;
 317   return *this;
 318 }
 319
 320 bool
 321 operator< (comp_cost cost1, comp_cost cost2)
 322 {
 323   if (cost1.cost == cost2.cost)
 324     return cost1.complexity < cost2.complexity;
 325
 326   return cost1.cost < cost2.cost;
 327 }
 328
 329 bool
 330 operator== (comp_cost cost1, comp_cost cost2)
 331 {
 332   return cost1.cost == cost2.cost
 333     && cost1.complexity == cost2.complexity;
 334 }
 335
 336 bool
 337 operator<= (comp_cost cost1, comp_cost cost2)
 338 {
 339   return cost1 < cost2 || cost1 == cost2;
 340 }
 341
 342 struct iv_inv_expr_ent;
 343
 344 /* The candidate - cost pair.  */
 345 struct cost_pair
 346 {
 347   struct iv_cand *cand; /* The candidate.  */
 348   comp_cost cost;       /* The cost.  */
 349   enum tree_code comp;  /* For iv elimination, the comparison.  */
 350   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 351                            preserved when representing iv_use with iv_cand.  */
 352   bitmap inv_exprs;     /* The list of newly created invariant expressions
 353                            when representing iv_use with iv_cand.  */
 354   tree value;           /* For final value elimination, the expression for
 355                            the final value of the iv.  For iv elimination,
 356                            the new bound to compare with.  */
 357 };
 358
 359 /* Use.  */
 360 struct iv_use
 361 {
 362   unsigned id;          /* The id of the use.  */
 363   unsigned group_id;    /* The group id the use belongs to.  */
 364   enum use_type type;   /* Type of the use.  */
 365   struct iv *iv;        /* The induction variable it is based on.  */
 366   gimple *stmt;         /* Statement in that it occurs.  */
 367   tree *op_p;           /* The place where it occurs.  */
 368
 369   tree addr_base;       /* Base address with const offset stripped.  */
 370   unsigned HOST_WIDE_INT addr_offset;
 371                         /* Const offset stripped from base address.  */
 372 };
 373
 374 /* Group of uses.  */
 375 struct iv_group
 376 {
 377   /* The id of the group.  */
 378   unsigned id;
 379   /* Uses of the group are of the same type.  */
 380   enum use_type type;
 381   /* The set of "related" IV candidates, plus the important ones.  */
 382   bitmap related_cands;
 383   /* Number of IV candidates in the cost_map.  */
 384   unsigned n_map_members;
 385   /* The costs wrto the iv candidates.  */
 386   struct cost_pair *cost_map;
 387   /* The selected candidate for the group.  */
 388   struct iv_cand *selected;
 389   /* Uses in the group.  */
 390   vec<struct iv_use *> vuses;
 391 };
 392
 393 /* The position where the iv is computed.  */
 394 enum iv_position
 395 {
 396   IP_NORMAL,            /* At the end, just before the exit condition.  */
 397   IP_END,               /* At the end of the latch block.  */
 398   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 399   IP_AFTER_USE,         /* Immediately after a specific use.  */
 400   IP_ORIGINAL           /* The original biv.  */
 401 };
 402
 403 /* The induction variable candidate.  */
 404 struct iv_cand
 405 {
 406   unsigned id;          /* The number of the candidate.  */
 407   bool important;       /* Whether this is an "important" candidate, i.e. such
 408                            that it should be considered by all uses.  */
 409   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 410   gimple *incremented_at;/* For original biv, the statement where it is
 411                            incremented.  */
 412   tree var_before;      /* The variable used for it before increment.  */
 413   tree var_after;       /* The variable used for it after increment.  */
 414   struct iv *iv;        /* The value of the candidate.  NULL for
 415                            "pseudocandidate" used to indicate the possibility
 416                            to replace the final value of an iv by direct
 417                            computation of the value.  */
 418   unsigned cost;        /* Cost of the candidate.  */
 419   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 420   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 421                               where it is incremented.  */
 422   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 423                            iv_cand.  */
 424   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 425                            hanlde it as a new invariant expression which will
 426                            be hoisted out of loop.  */
 427   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 428                            smaller type.  */
 429 };
 430
 431 /* Hashtable entry for common candidate derived from iv uses.  */
 432 struct iv_common_cand
 433 {
 434   tree base;
 435   tree step;
 436   /* IV uses from which this common candidate is derived.  */
 437   auto_vec<struct iv_use *> uses;
 438   hashval_t hash;
 439 };
 440
 441 /* Hashtable helpers.  */
 442
 443 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 444 {
 445   static inline hashval_t hash (const iv_common_cand *);
 446   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 447 };
 448
 449 /* Hash function for possible common candidates.  */
 450
 451 inline hashval_t
 452 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 453 {
 454   return ccand->hash;
 455 }
 456
 457 /* Hash table equality function for common candidates.  */
 458
 459 inline bool
 460 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 461                               const iv_common_cand *ccand2)
 462 {
 463   return (ccand1->hash == ccand2->hash
 464           && operand_equal_p (ccand1->base, ccand2->base, 0)
 465           && operand_equal_p (ccand1->step, ccand2->step, 0)
 466           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 467               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 468 }
 469
 470 /* Loop invariant expression hashtable entry.  */
 471
 472 struct iv_inv_expr_ent
 473 {
 474   /* Tree expression of the entry.  */
 475   tree expr;
 476   /* Unique indentifier.  */
 477   int id;
 478   /* Hash value.  */
 479   hashval_t hash;
 480 };
 481
 482 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 483
 484 static int
 485 sort_iv_inv_expr_ent (const void *a, const void *b)
 486 {
 487   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 488   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 489
 490   unsigned id1 = (*e1)->id;
 491   unsigned id2 = (*e2)->id;
 492
 493   if (id1 < id2)
 494     return -1;
 495   else if (id1 > id2)
 496     return 1;
 497   else
 498     return 0;
 499 }
 500
 501 /* Hashtable helpers.  */
 502
 503 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 504 {
 505   static inline hashval_t hash (const iv_inv_expr_ent *);
 506   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 507 };
 508
 509 /* Hash function for loop invariant expressions.  */
 510
 511 inline hashval_t
 512 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 513 {
 514   return expr->hash;
 515 }
 516
 517 /* Hash table equality function for expressions.  */
 518
 519 inline bool
 520 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 521                            const iv_inv_expr_ent *expr2)
 522 {
 523   return expr1->hash == expr2->hash
 524          && operand_equal_p (expr1->expr, expr2->expr, 0);
 525 }
 526
 527 struct ivopts_data
 528 {
 529   /* The currently optimized loop.  */
 530   struct loop *current_loop;
 531   source_location loop_loc;
 532
 533   /* Numbers of iterations for all exits of the current loop.  */
 534   hash_map<edge, tree_niter_desc *> *niters;
 535
 536   /* Number of registers used in it.  */
 537   unsigned regs_used;
 538
 539   /* The size of version_info array allocated.  */
 540   unsigned version_info_size;
 541
 542   /* The array of information for the ssa names.  */
 543   struct version_info *version_info;
 544
 545   /* The hashtable of loop invariant expressions created
 546      by ivopt.  */
 547   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 548
 549   /* The bitmap of indices in version_info whose value was changed.  */
 550   bitmap relevant;
 551
 552   /* The uses of induction variables.  */
 553   vec<iv_group *> vgroups;
 554
 555   /* The candidates.  */
 556   vec<iv_cand *> vcands;
 557
 558   /* A bitmap of important candidates.  */
 559   bitmap important_candidates;
 560
 561   /* Cache used by tree_to_aff_combination_expand.  */
 562   hash_map<tree, name_expansion *> *name_expansion_cache;
 563
 564   /* The hashtable of common candidates derived from iv uses.  */
 565   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 566
 567   /* The common candidates.  */
 568   vec<iv_common_cand *> iv_common_cands;
 569
 570   /* The maximum invariant variable id.  */
 571   unsigned max_inv_var_id;
 572
 573   /* The maximum invariant expression id.  */
 574   unsigned max_inv_expr_id;
 575
 576   /* Number of no_overflow BIVs which are not used in memory address.  */
 577   unsigned bivs_not_used_in_addr;
 578
 579   /* Obstack for iv structure.  */
 580   struct obstack iv_obstack;
 581
 582   /* Whether to consider just related and important candidates when replacing a
 583      use.  */
 584   bool consider_all_candidates;
 585
 586   /* Are we optimizing for speed?  */
 587   bool speed;
 588
 589   /* Whether the loop body includes any function calls.  */
 590   bool body_includes_call;
 591
 592   /* Whether the loop body can only be exited via single exit.  */
 593   bool loop_single_exit_p;
 594 };
 595
 596 /* An assignment of iv candidates to uses.  */
 597
 598 struct iv_ca
 599 {
 600   /* The number of uses covered by the assignment.  */
 601   unsigned upto;
 602
 603   /* Number of uses that cannot be expressed by the candidates in the set.  */
 604   unsigned bad_groups;
 605
 606   /* Candidate assigned to a use, together with the related costs.  */
 607   struct cost_pair **cand_for_group;
 608
 609   /* Number of times each candidate is used.  */
 610   unsigned *n_cand_uses;
 611
 612   /* The candidates used.  */
 613   bitmap cands;
 614
 615   /* The number of candidates in the set.  */
 616   unsigned n_cands;
 617
 618   /* The number of invariants needed, including both invariant variants and
 619      invariant expressions.  */
 620   unsigned n_invs;
 621
 622   /* Total cost of expressing uses.  */
 623   comp_cost cand_use_cost;
 624
 625   /* Total cost of candidates.  */
 626   unsigned cand_cost;
 627
 628   /* Number of times each invariant variable is used.  */
 629   unsigned *n_inv_var_uses;
 630
 631   /* Number of times each invariant expression is used.  */
 632   unsigned *n_inv_expr_uses;
 633
 634   /* Total cost of the assignment.  */
 635   comp_cost cost;
 636 };
 637
 638 /* Difference of two iv candidate assignments.  */
 639
 640 struct iv_ca_delta
 641 {
 642   /* Changed group.  */
 643   struct iv_group *group;
 644
 645   /* An old assignment (for rollback purposes).  */
 646   struct cost_pair *old_cp;
 647
 648   /* A new assignment.  */
 649   struct cost_pair *new_cp;
 650
 651   /* Next change in the list.  */
 652   struct iv_ca_delta *next;
 653 };
 654
 655 /* Bound on number of candidates below that all candidates are considered.  */
 656
 657 #define CONSIDER_ALL_CANDIDATES_BOUND \
 658   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 659
 660 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 661    optimizing such a loop would help, and it would take ages).  */
 662
 663 #define MAX_CONSIDERED_GROUPS \
 664   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 665
 666 /* If there are at most this number of ivs in the set, try removing unnecessary
 667    ivs from the set always.  */
 668
 669 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 670   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 671
 672 /* The list of trees for that the decl_rtl field must be reset is stored
 673    here.  */
 674
 675 static vec<tree> decl_rtl_to_reset;
 676
 677 static comp_cost force_expr_to_var_cost (tree, bool);
 678
 679 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 680
 681 edge
 682 single_dom_exit (struct loop *loop)
 683 {
 684   edge exit = single_exit (loop);
 685
 686   if (!exit)
 687     return NULL;
 688
 689   if (!just_once_each_iteration_p (loop, exit->src))
 690     return NULL;
 691
 692   return exit;
 693 }
 694
 695 /* Dumps information about the induction variable IV to FILE.  Don't dump
 696    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 697    preceding spaces indicated by INDENT_LEVEL.  */
 698
 699 void
 700 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 701 {
 702   const char *p;
 703   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 704
 705   if (indent_level > 4)
 706     indent_level = 4;
 707   p = spaces + 8 - (indent_level << 1);
 708
 709   fprintf (file, "%sIV struct:\n", p);
 710   if (iv->ssa_name && dump_name)
 711     {
 712       fprintf (file, "%s  SSA_NAME:\t", p);
 713       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 714       fprintf (file, "\n");
 715     }
 716
 717   fprintf (file, "%s  Type:\t", p);
 718   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 719   fprintf (file, "\n");
 720
 721   fprintf (file, "%s  Base:\t", p);
 722   print_generic_expr (file, iv->base, TDF_SLIM);
 723   fprintf (file, "\n");
 724
 725   fprintf (file, "%s  Step:\t", p);
 726   print_generic_expr (file, iv->step, TDF_SLIM);
 727   fprintf (file, "\n");
 728
 729   if (iv->base_object)
 730     {
 731       fprintf (file, "%s  Object:\t", p);
 732       print_generic_expr (file, iv->base_object, TDF_SLIM);
 733       fprintf (file, "\n");
 734     }
 735
 736   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 737
 738   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 739            p, iv->no_overflow ? "No-overflow" : "Overflow");
 740 }
 741
 742 /* Dumps information about the USE to FILE.  */
 743
 744 void
 745 dump_use (FILE *file, struct iv_use *use)
 746 {
 747   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 748   fprintf (file, "    At stmt:\t");
 749   print_gimple_stmt (file, use->stmt, 0, 0);
 750   fprintf (file, "    At pos:\t");
 751   if (use->op_p)
 752     print_generic_expr (file, *use->op_p, TDF_SLIM);
 753   fprintf (file, "\n");
 754   dump_iv (file, use->iv, false, 2);
 755 }
 756
 757 /* Dumps information about the uses to FILE.  */
 758
 759 void
 760 dump_groups (FILE *file, struct ivopts_data *data)
 761 {
 762   unsigned i, j;
 763   struct iv_group *group;
 764
 765   for (i = 0; i < data->vgroups.length (); i++)
 766     {
 767       group = data->vgroups[i];
 768       fprintf (file, "Group %d:\n", group->id);
 769       if (group->type == USE_NONLINEAR_EXPR)
 770         fprintf (file, "  Type:\tGENERIC\n");
 771       else if (group->type == USE_ADDRESS)
 772         fprintf (file, "  Type:\tADDRESS\n");
 773       else
 774         {
 775           gcc_assert (group->type == USE_COMPARE);
 776           fprintf (file, "  Type:\tCOMPARE\n");
 777         }
 778       for (j = 0; j < group->vuses.length (); j++)
 779         dump_use (file, group->vuses[j]);
 780     }
 781 }
 782
 783 /* Dumps information about induction variable candidate CAND to FILE.  */
 784
 785 void
 786 dump_cand (FILE *file, struct iv_cand *cand)
 787 {
 788   struct iv *iv = cand->iv;
 789
 790   fprintf (file, "Candidate %d:\n", cand->id);
 791   if (cand->inv_vars)
 792     {
 793       fprintf (file, "  Depend on inv.vars: ");
 794       dump_bitmap (file, cand->inv_vars);
 795     }
 796   if (cand->inv_exprs)
 797     {
 798       fprintf (file, "  Depend on inv.exprs: ");
 799       dump_bitmap (file, cand->inv_exprs);
 800     }
 801
 802   if (cand->var_before)
 803     {
 804       fprintf (file, "  Var befor: ");
 805       print_generic_expr (file, cand->var_before, TDF_SLIM);
 806       fprintf (file, "\n");
 807     }
 808   if (cand->var_after)
 809     {
 810       fprintf (file, "  Var after: ");
 811       print_generic_expr (file, cand->var_after, TDF_SLIM);
 812       fprintf (file, "\n");
 813     }
 814
 815   switch (cand->pos)
 816     {
 817     case IP_NORMAL:
 818       fprintf (file, "  Incr POS: before exit test\n");
 819       break;
 820
 821     case IP_BEFORE_USE:
 822       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 823       break;
 824
 825     case IP_AFTER_USE:
 826       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 827       break;
 828
 829     case IP_END:
 830       fprintf (file, "  Incr POS: at end\n");
 831       break;
 832
 833     case IP_ORIGINAL:
 834       fprintf (file, "  Incr POS: orig biv\n");
 835       break;
 836     }
 837
 838   dump_iv (file, iv, false, 1);
 839 }
 840
 841 /* Returns the info for ssa version VER.  */
 842
 843 static inline struct version_info *
 844 ver_info (struct ivopts_data *data, unsigned ver)
 845 {
 846   return data->version_info + ver;
 847 }
 848
 849 /* Returns the info for ssa name NAME.  */
 850
 851 static inline struct version_info *
 852 name_info (struct ivopts_data *data, tree name)
 853 {
 854   return ver_info (data, SSA_NAME_VERSION (name));
 855 }
 856
 857 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 858    emitted in LOOP.  */
 859
 860 static bool
 861 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 862 {
 863   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 864
 865   gcc_assert (bb);
 866
 867   if (sbb == loop->latch)
 868     return true;
 869
 870   if (sbb != bb)
 871     return false;
 872
 873   return stmt == last_stmt (bb);
 874 }
 875
 876 /* Returns true if STMT if after the place where the original induction
 877    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 878    if the positions are identical.  */
 879
 880 static bool
 881 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 882 {
 883   basic_block cand_bb = gimple_bb (cand->incremented_at);
 884   basic_block stmt_bb = gimple_bb (stmt);
 885
 886   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 887     return false;
 888
 889   if (stmt_bb != cand_bb)
 890     return true;
 891
 892   if (true_if_equal
 893       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 894     return true;
 895   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 896 }
 897
 898 /* Returns true if STMT if after the place where the induction variable
 899    CAND is incremented in LOOP.  */
 900
 901 static bool
 902 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 903 {
 904   switch (cand->pos)
 905     {
 906     case IP_END:
 907       return false;
 908
 909     case IP_NORMAL:
 910       return stmt_after_ip_normal_pos (loop, stmt);
 911
 912     case IP_ORIGINAL:
 913     case IP_AFTER_USE:
 914       return stmt_after_inc_pos (cand, stmt, false);
 915
 916     case IP_BEFORE_USE:
 917       return stmt_after_inc_pos (cand, stmt, true);
 918
 919     default:
 920       gcc_unreachable ();
 921     }
 922 }
 923
 924 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 925
 926 static bool
 927 abnormal_ssa_name_p (tree exp)
 928 {
 929   if (!exp)
 930     return false;
 931
 932   if (TREE_CODE (exp) != SSA_NAME)
 933     return false;
 934
 935   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 936 }
 937
 938 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 939    abnormal phi node.  Callback for for_each_index.  */
 940
 941 static bool
 942 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 943                                   void *data ATTRIBUTE_UNUSED)
 944 {
 945   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 946     {
 947       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 948         return false;
 949       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 950         return false;
 951     }
 952
 953   return !abnormal_ssa_name_p (*index);
 954 }
 955
 956 /* Returns true if EXPR contains a ssa name that occurs in an
 957    abnormal phi node.  */
 958
 959 bool
 960 contains_abnormal_ssa_name_p (tree expr)
 961 {
 962   enum tree_code code;
 963   enum tree_code_class codeclass;
 964
 965   if (!expr)
 966     return false;
 967
 968   code = TREE_CODE (expr);
 969   codeclass = TREE_CODE_CLASS (code);
 970
 971   if (code == SSA_NAME)
 972     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 973
 974   if (code == INTEGER_CST
 975       || is_gimple_min_invariant (expr))
 976     return false;
 977
 978   if (code == ADDR_EXPR)
 979     return !for_each_index (&TREE_OPERAND (expr, 0),
 980                             idx_contains_abnormal_ssa_name_p,
 981                             NULL);
 982
 983   if (code == COND_EXPR)
 984     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 985       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 986       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 987
 988   switch (codeclass)
 989     {
 990     case tcc_binary:
 991     case tcc_comparison:
 992       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 993         return true;
 994
 995       /* Fallthru.  */
 996     case tcc_unary:
 997       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 998         return true;
 999
1000       break;
1001
1002     default:
1003       gcc_unreachable ();
1004     }
1005
1006   return false;
1007 }
1008
1009 /*  Returns the structure describing number of iterations determined from
1010     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1011
1012 static struct tree_niter_desc *
1013 niter_for_exit (struct ivopts_data *data, edge exit)
1014 {
1015   struct tree_niter_desc *desc;
1016   tree_niter_desc **slot;
1017
1018   if (!data->niters)
1019     {
1020       data->niters = new hash_map<edge, tree_niter_desc *>;
1021       slot = NULL;
1022     }
1023   else
1024     slot = data->niters->get (exit);
1025
1026   if (!slot)
1027     {
1028       /* Try to determine number of iterations.  We cannot safely work with ssa
1029          names that appear in phi nodes on abnormal edges, so that we do not
1030          create overlapping life ranges for them (PR 27283).  */
1031       desc = XNEW (struct tree_niter_desc);
1032       if (!number_of_iterations_exit (data->current_loop,
1033                                       exit, desc, true)
1034           || contains_abnormal_ssa_name_p (desc->niter))
1035         {
1036           XDELETE (desc);
1037           desc = NULL;
1038         }
1039       data->niters->put (exit, desc);
1040     }
1041   else
1042     desc = *slot;
1043
1044   return desc;
1045 }
1046
1047 /* Returns the structure describing number of iterations determined from
1048    single dominating exit of DATA->current_loop, or NULL if something
1049    goes wrong.  */
1050
1051 static struct tree_niter_desc *
1052 niter_for_single_dom_exit (struct ivopts_data *data)
1053 {
1054   edge exit = single_dom_exit (data->current_loop);
1055
1056   if (!exit)
1057     return NULL;
1058
1059   return niter_for_exit (data, exit);
1060 }
1061
1062 /* Initializes data structures used by the iv optimization pass, stored
1063    in DATA.  */
1064
1065 static void
1066 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1067 {
1068   data->version_info_size = 2 * num_ssa_names;
1069   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1070   data->relevant = BITMAP_ALLOC (NULL);
1071   data->important_candidates = BITMAP_ALLOC (NULL);
1072   data->max_inv_var_id = 0;
1073   data->max_inv_expr_id = 0;
1074   data->niters = NULL;
1075   data->vgroups.create (20);
1076   data->vcands.create (20);
1077   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1078   data->name_expansion_cache = NULL;
1079   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1080   data->iv_common_cands.create (20);
1081   decl_rtl_to_reset.create (20);
1082   gcc_obstack_init (&data->iv_obstack);
1083 }
1084
1085 /* Returns a memory object to that EXPR points.  In case we are able to
1086    determine that it does not point to any such object, NULL is returned.  */
1087
1088 static tree
1089 determine_base_object (tree expr)
1090 {
1091   enum tree_code code = TREE_CODE (expr);
1092   tree base, obj;
1093
1094   /* If this is a pointer casted to any type, we need to determine
1095      the base object for the pointer; so handle conversions before
1096      throwing away non-pointer expressions.  */
1097   if (CONVERT_EXPR_P (expr))
1098     return determine_base_object (TREE_OPERAND (expr, 0));
1099
1100   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1101     return NULL_TREE;
1102
1103   switch (code)
1104     {
1105     case INTEGER_CST:
1106       return NULL_TREE;
1107
1108     case ADDR_EXPR:
1109       obj = TREE_OPERAND (expr, 0);
1110       base = get_base_address (obj);
1111
1112       if (!base)
1113         return expr;
1114
1115       if (TREE_CODE (base) == MEM_REF)
1116         return determine_base_object (TREE_OPERAND (base, 0));
1117
1118       return fold_convert (ptr_type_node,
1119                            build_fold_addr_expr (base));
1120
1121     case POINTER_PLUS_EXPR:
1122       return determine_base_object (TREE_OPERAND (expr, 0));
1123
1124     case PLUS_EXPR:
1125     case MINUS_EXPR:
1126       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1127       gcc_unreachable ();
1128
1129     default:
1130       return fold_convert (ptr_type_node, expr);
1131     }
1132 }
1133
1134 /* Return true if address expression with non-DECL_P operand appears
1135    in EXPR.  */
1136
1137 static bool
1138 contain_complex_addr_expr (tree expr)
1139 {
1140   bool res = false;
1141
1142   STRIP_NOPS (expr);
1143   switch (TREE_CODE (expr))
1144     {
1145     case POINTER_PLUS_EXPR:
1146     case PLUS_EXPR:
1147     case MINUS_EXPR:
1148       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1149       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1150       break;
1151
1152     case ADDR_EXPR:
1153       return (!DECL_P (TREE_OPERAND (expr, 0)));
1154
1155     default:
1156       return false;
1157     }
1158
1159   return res;
1160 }
1161
1162 /* Allocates an induction variable with given initial value BASE and step STEP
1163    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1164
1165 static struct iv *
1166 alloc_iv (struct ivopts_data *data, tree base, tree step,
1167           bool no_overflow = false)
1168 {
1169   tree expr = base;
1170   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1171                                               sizeof (struct iv));
1172   gcc_assert (step != NULL_TREE);
1173
1174   /* Lower address expression in base except ones with DECL_P as operand.
1175      By doing this:
1176        1) More accurate cost can be computed for address expressions;
1177        2) Duplicate candidates won't be created for bases in different
1178           forms, like &a[0] and &a.  */
1179   STRIP_NOPS (expr);
1180   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1181       || contain_complex_addr_expr (expr))
1182     {
1183       aff_tree comb;
1184       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1185       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1186     }
1187
1188   iv->base = base;
1189   iv->base_object = determine_base_object (base);
1190   iv->step = step;
1191   iv->biv_p = false;
1192   iv->nonlin_use = NULL;
1193   iv->ssa_name = NULL_TREE;
1194   if (!no_overflow
1195        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1196                               base, step))
1197     no_overflow = true;
1198   iv->no_overflow = no_overflow;
1199   iv->have_address_use = false;
1200
1201   return iv;
1202 }
1203
1204 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1205    doesn't overflow.  */
1206
1207 static void
1208 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1209         bool no_overflow)
1210 {
1211   struct version_info *info = name_info (data, iv);
1212
1213   gcc_assert (!info->iv);
1214
1215   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1216   info->iv = alloc_iv (data, base, step, no_overflow);
1217   info->iv->ssa_name = iv;
1218 }
1219
1220 /* Finds induction variable declaration for VAR.  */
1221
1222 static struct iv *
1223 get_iv (struct ivopts_data *data, tree var)
1224 {
1225   basic_block bb;
1226   tree type = TREE_TYPE (var);
1227
1228   if (!POINTER_TYPE_P (type)
1229       && !INTEGRAL_TYPE_P (type))
1230     return NULL;
1231
1232   if (!name_info (data, var)->iv)
1233     {
1234       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1235
1236       if (!bb
1237           || !flow_bb_inside_loop_p (data->current_loop, bb))
1238         set_iv (data, var, var, build_int_cst (type, 0), true);
1239     }
1240
1241   return name_info (data, var)->iv;
1242 }
1243
1244 /* Return the first non-invariant ssa var found in EXPR.  */
1245
1246 static tree
1247 extract_single_var_from_expr (tree expr)
1248 {
1249   int i, n;
1250   tree tmp;
1251   enum tree_code code;
1252
1253   if (!expr || is_gimple_min_invariant (expr))
1254     return NULL;
1255
1256   code = TREE_CODE (expr);
1257   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1258     {
1259       n = TREE_OPERAND_LENGTH (expr);
1260       for (i = 0; i < n; i++)
1261         {
1262           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1263
1264           if (tmp)
1265             return tmp;
1266         }
1267     }
1268   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1269 }
1270
1271 /* Finds basic ivs.  */
1272
1273 static bool
1274 find_bivs (struct ivopts_data *data)
1275 {
1276   gphi *phi;
1277   affine_iv iv;
1278   tree step, type, base, stop;
1279   bool found = false;
1280   struct loop *loop = data->current_loop;
1281   gphi_iterator psi;
1282
1283   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1284     {
1285       phi = psi.phi ();
1286
1287       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1288         continue;
1289
1290       if (virtual_operand_p (PHI_RESULT (phi)))
1291         continue;
1292
1293       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1294         continue;
1295
1296       if (integer_zerop (iv.step))
1297         continue;
1298
1299       step = iv.step;
1300       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1301       /* Stop expanding iv base at the first ssa var referred by iv step.
1302          Ideally we should stop at any ssa var, because that's expensive
1303          and unusual to happen, we just do it on the first one.
1304
1305          See PR64705 for the rationale.  */
1306       stop = extract_single_var_from_expr (step);
1307       base = expand_simple_operations (base, stop);
1308       if (contains_abnormal_ssa_name_p (base)
1309           || contains_abnormal_ssa_name_p (step))
1310         continue;
1311
1312       type = TREE_TYPE (PHI_RESULT (phi));
1313       base = fold_convert (type, base);
1314       if (step)
1315         {
1316           if (POINTER_TYPE_P (type))
1317             step = convert_to_ptrofftype (step);
1318           else
1319             step = fold_convert (type, step);
1320         }
1321
1322       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1323       found = true;
1324     }
1325
1326   return found;
1327 }
1328
1329 /* Marks basic ivs.  */
1330
1331 static void
1332 mark_bivs (struct ivopts_data *data)
1333 {
1334   gphi *phi;
1335   gimple *def;
1336   tree var;
1337   struct iv *iv, *incr_iv;
1338   struct loop *loop = data->current_loop;
1339   basic_block incr_bb;
1340   gphi_iterator psi;
1341
1342   data->bivs_not_used_in_addr = 0;
1343   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1344     {
1345       phi = psi.phi ();
1346
1347       iv = get_iv (data, PHI_RESULT (phi));
1348       if (!iv)
1349         continue;
1350
1351       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1352       def = SSA_NAME_DEF_STMT (var);
1353       /* Don't mark iv peeled from other one as biv.  */
1354       if (def
1355           && gimple_code (def) == GIMPLE_PHI
1356           && gimple_bb (def) == loop->header)
1357         continue;
1358
1359       incr_iv = get_iv (data, var);
1360       if (!incr_iv)
1361         continue;
1362
1363       /* If the increment is in the subloop, ignore it.  */
1364       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1365       if (incr_bb->loop_father != data->current_loop
1366           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1367         continue;
1368
1369       iv->biv_p = true;
1370       incr_iv->biv_p = true;
1371       if (iv->no_overflow)
1372         data->bivs_not_used_in_addr++;
1373       if (incr_iv->no_overflow)
1374         data->bivs_not_used_in_addr++;
1375     }
1376 }
1377
1378 /* Checks whether STMT defines a linear induction variable and stores its
1379    parameters to IV.  */
1380
1381 static bool
1382 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1383 {
1384   tree lhs, stop;
1385   struct loop *loop = data->current_loop;
1386
1387   iv->base = NULL_TREE;
1388   iv->step = NULL_TREE;
1389
1390   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1391     return false;
1392
1393   lhs = gimple_assign_lhs (stmt);
1394   if (TREE_CODE (lhs) != SSA_NAME)
1395     return false;
1396
1397   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1398     return false;
1399
1400   /* Stop expanding iv base at the first ssa var referred by iv step.
1401      Ideally we should stop at any ssa var, because that's expensive
1402      and unusual to happen, we just do it on the first one.
1403
1404      See PR64705 for the rationale.  */
1405   stop = extract_single_var_from_expr (iv->step);
1406   iv->base = expand_simple_operations (iv->base, stop);
1407   if (contains_abnormal_ssa_name_p (iv->base)
1408       || contains_abnormal_ssa_name_p (iv->step))
1409     return false;
1410
1411   /* If STMT could throw, then do not consider STMT as defining a GIV.
1412      While this will suppress optimizations, we can not safely delete this
1413      GIV and associated statements, even if it appears it is not used.  */
1414   if (stmt_could_throw_p (stmt))
1415     return false;
1416
1417   return true;
1418 }
1419
1420 /* Finds general ivs in statement STMT.  */
1421
1422 static void
1423 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1424 {
1425   affine_iv iv;
1426
1427   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1428     return;
1429
1430   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1431 }
1432
1433 /* Finds general ivs in basic block BB.  */
1434
1435 static void
1436 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1437 {
1438   gimple_stmt_iterator bsi;
1439
1440   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1441     find_givs_in_stmt (data, gsi_stmt (bsi));
1442 }
1443
1444 /* Finds general ivs.  */
1445
1446 static void
1447 find_givs (struct ivopts_data *data)
1448 {
1449   struct loop *loop = data->current_loop;
1450   basic_block *body = get_loop_body_in_dom_order (loop);
1451   unsigned i;
1452
1453   for (i = 0; i < loop->num_nodes; i++)
1454     find_givs_in_bb (data, body[i]);
1455   free (body);
1456 }
1457
1458 /* For each ssa name defined in LOOP determines whether it is an induction
1459    variable and if so, its initial value and step.  */
1460
1461 static bool
1462 find_induction_variables (struct ivopts_data *data)
1463 {
1464   unsigned i;
1465   bitmap_iterator bi;
1466
1467   if (!find_bivs (data))
1468     return false;
1469
1470   find_givs (data);
1471   mark_bivs (data);
1472
1473   if (dump_file && (dump_flags & TDF_DETAILS))
1474     {
1475       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1476
1477       if (niter)
1478         {
1479           fprintf (dump_file, "  number of iterations ");
1480           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1481           if (!integer_zerop (niter->may_be_zero))
1482             {
1483               fprintf (dump_file, "; zero if ");
1484               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1485             }
1486           fprintf (dump_file, "\n");
1487         };
1488
1489       fprintf (dump_file, "\n<Induction Vars>:\n");
1490       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1491         {
1492           struct version_info *info = ver_info (data, i);
1493           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1494             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1495         }
1496     }
1497
1498   return true;
1499 }
1500
1501 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1502    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1503    is the const offset stripped from IV base; for other types use, both
1504    are zero by default.  */
1505
1506 static struct iv_use *
1507 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1508             gimple *stmt, enum use_type type, tree addr_base,
1509             unsigned HOST_WIDE_INT addr_offset)
1510 {
1511   struct iv_use *use = XCNEW (struct iv_use);
1512
1513   use->id = group->vuses.length ();
1514   use->group_id = group->id;
1515   use->type = type;
1516   use->iv = iv;
1517   use->stmt = stmt;
1518   use->op_p = use_p;
1519   use->addr_base = addr_base;
1520   use->addr_offset = addr_offset;
1521
1522   group->vuses.safe_push (use);
1523   return use;
1524 }
1525
1526 /* Checks whether OP is a loop-level invariant and if so, records it.
1527    NONLINEAR_USE is true if the invariant is used in a way we do not
1528    handle specially.  */
1529
1530 static void
1531 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1532 {
1533   basic_block bb;
1534   struct version_info *info;
1535
1536   if (TREE_CODE (op) != SSA_NAME
1537       || virtual_operand_p (op))
1538     return;
1539
1540   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1541   if (bb
1542       && flow_bb_inside_loop_p (data->current_loop, bb))
1543     return;
1544
1545   info = name_info (data, op);
1546   info->name = op;
1547   info->has_nonlin_use |= nonlinear_use;
1548   if (!info->inv_id)
1549     info->inv_id = ++data->max_inv_var_id;
1550   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1551 }
1552
1553 static tree
1554 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1555
1556 /* Record a group of TYPE.  */
1557
1558 static struct iv_group *
1559 record_group (struct ivopts_data *data, enum use_type type)
1560 {
1561   struct iv_group *group = XCNEW (struct iv_group);
1562
1563   group->id = data->vgroups.length ();
1564   group->type = type;
1565   group->related_cands = BITMAP_ALLOC (NULL);
1566   group->vuses.create (1);
1567
1568   data->vgroups.safe_push (group);
1569   return group;
1570 }
1571
1572 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1573    New group will be created if there is no existing group for the use.  */
1574
1575 static struct iv_use *
1576 record_group_use (struct ivopts_data *data, tree *use_p,
1577                   struct iv *iv, gimple *stmt, enum use_type type)
1578 {
1579   tree addr_base = NULL;
1580   struct iv_group *group = NULL;
1581   unsigned HOST_WIDE_INT addr_offset = 0;
1582
1583   /* Record non address type use in a new group.  */
1584   if (type == USE_ADDRESS && iv->base_object)
1585     {
1586       unsigned int i;
1587
1588       addr_base = strip_offset (iv->base, &addr_offset);
1589       for (i = 0; i < data->vgroups.length (); i++)
1590         {
1591           struct iv_use *use;
1592
1593           group = data->vgroups[i];
1594           use = group->vuses[0];
1595           if (use->type != USE_ADDRESS || !use->iv->base_object)
1596             continue;
1597
1598           /* Check if it has the same stripped base and step.  */
1599           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1600               && operand_equal_p (iv->step, use->iv->step, 0)
1601               && operand_equal_p (addr_base, use->addr_base, 0))
1602             break;
1603         }
1604       if (i == data->vgroups.length ())
1605         group = NULL;
1606     }
1607
1608   if (!group)
1609     group = record_group (data, type);
1610
1611   return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1612 }
1613
1614 /* Checks whether the use OP is interesting and if so, records it.  */
1615
1616 static struct iv_use *
1617 find_interesting_uses_op (struct ivopts_data *data, tree op)
1618 {
1619   struct iv *iv;
1620   gimple *stmt;
1621   struct iv_use *use;
1622
1623   if (TREE_CODE (op) != SSA_NAME)
1624     return NULL;
1625
1626   iv = get_iv (data, op);
1627   if (!iv)
1628     return NULL;
1629
1630   if (iv->nonlin_use)
1631     {
1632       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1633       return iv->nonlin_use;
1634     }
1635
1636   if (integer_zerop (iv->step))
1637     {
1638       record_invariant (data, op, true);
1639       return NULL;
1640     }
1641
1642   stmt = SSA_NAME_DEF_STMT (op);
1643   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1644
1645   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1646   iv->nonlin_use = use;
1647   return use;
1648 }
1649
1650 /* Indicate how compare type iv_use can be handled.  */
1651 enum comp_iv_rewrite
1652 {
1653   COMP_IV_NA,
1654   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1655   COMP_IV_EXPR,
1656   /* We may rewrite compare type iv_uses on both sides of comparison by
1657      expressing value of each iv_use.  */
1658   COMP_IV_EXPR_2,
1659   /* We may rewrite compare type iv_use by expressing value of the iv_use
1660      or by eliminating it with other iv_cand.  */
1661   COMP_IV_ELIM
1662 };
1663
1664 /* Given a condition in statement STMT, checks whether it is a compare
1665    of an induction variable and an invariant.  If this is the case,
1666    CONTROL_VAR is set to location of the iv, BOUND to the location of
1667    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1668    induction variable descriptions, and true is returned.  If this is not
1669    the case, CONTROL_VAR and BOUND are set to the arguments of the
1670    condition and false is returned.  */
1671
1672 static enum comp_iv_rewrite
1673 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1674                        tree **control_var, tree **bound,
1675                        struct iv **iv_var, struct iv **iv_bound)
1676 {
1677   /* The objects returned when COND has constant operands.  */
1678   static struct iv const_iv;
1679   static tree zero;
1680   tree *op0 = &zero, *op1 = &zero;
1681   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1682   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1683
1684   if (gimple_code (stmt) == GIMPLE_COND)
1685     {
1686       gcond *cond_stmt = as_a <gcond *> (stmt);
1687       op0 = gimple_cond_lhs_ptr (cond_stmt);
1688       op1 = gimple_cond_rhs_ptr (cond_stmt);
1689     }
1690   else
1691     {
1692       op0 = gimple_assign_rhs1_ptr (stmt);
1693       op1 = gimple_assign_rhs2_ptr (stmt);
1694     }
1695
1696   zero = integer_zero_node;
1697   const_iv.step = integer_zero_node;
1698
1699   if (TREE_CODE (*op0) == SSA_NAME)
1700     iv0 = get_iv (data, *op0);
1701   if (TREE_CODE (*op1) == SSA_NAME)
1702     iv1 = get_iv (data, *op1);
1703
1704   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1705   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1706     {
1707       rewrite_type = COMP_IV_EXPR_2;
1708       goto end;
1709     }
1710
1711   /* If none side of comparison is IV.  */
1712   if ((!iv0 || integer_zerop (iv0->step))
1713       && (!iv1 || integer_zerop (iv1->step)))
1714     goto end;
1715
1716   /* Control variable may be on the other side.  */
1717   if (!iv0 || integer_zerop (iv0->step))
1718     {
1719       std::swap (op0, op1);
1720       std::swap (iv0, iv1);
1721     }
1722   /* If one side is IV and the other side isn't loop invariant.  */
1723   if (!iv1)
1724     rewrite_type = COMP_IV_EXPR;
1725   /* If one side is IV and the other side is loop invariant.  */
1726   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1727     rewrite_type = COMP_IV_ELIM;
1728
1729 end:
1730   if (control_var)
1731     *control_var = op0;
1732   if (iv_var)
1733     *iv_var = iv0;
1734   if (bound)
1735     *bound = op1;
1736   if (iv_bound)
1737     *iv_bound = iv1;
1738
1739   return rewrite_type;
1740 }
1741
1742 /* Checks whether the condition in STMT is interesting and if so,
1743    records it.  */
1744
1745 static void
1746 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1747 {
1748   tree *var_p, *bound_p;
1749   struct iv *var_iv, *bound_iv;
1750   enum comp_iv_rewrite ret;
1751
1752   ret = extract_cond_operands (data, stmt,
1753                                &var_p, &bound_p, &var_iv, &bound_iv);
1754   if (ret == COMP_IV_NA)
1755     {
1756       find_interesting_uses_op (data, *var_p);
1757       find_interesting_uses_op (data, *bound_p);
1758       return;
1759     }
1760
1761   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE);
1762   /* Record compare type iv_use for iv on the other side of comparison.  */
1763   if (ret == COMP_IV_EXPR_2)
1764     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE);
1765 }
1766
1767 /* Returns the outermost loop EXPR is obviously invariant in
1768    relative to the loop LOOP, i.e. if all its operands are defined
1769    outside of the returned loop.  Returns NULL if EXPR is not
1770    even obviously invariant in LOOP.  */
1771
1772 struct loop *
1773 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1774 {
1775   basic_block def_bb;
1776   unsigned i, len;
1777
1778   if (is_gimple_min_invariant (expr))
1779     return current_loops->tree_root;
1780
1781   if (TREE_CODE (expr) == SSA_NAME)
1782     {
1783       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1784       if (def_bb)
1785         {
1786           if (flow_bb_inside_loop_p (loop, def_bb))
1787             return NULL;
1788           return superloop_at_depth (loop,
1789                                      loop_depth (def_bb->loop_father) + 1);
1790         }
1791
1792       return current_loops->tree_root;
1793     }
1794
1795   if (!EXPR_P (expr))
1796     return NULL;
1797
1798   unsigned maxdepth = 0;
1799   len = TREE_OPERAND_LENGTH (expr);
1800   for (i = 0; i < len; i++)
1801     {
1802       struct loop *ivloop;
1803       if (!TREE_OPERAND (expr, i))
1804         continue;
1805
1806       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1807       if (!ivloop)
1808         return NULL;
1809       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1810     }
1811
1812   return superloop_at_depth (loop, maxdepth);
1813 }
1814
1815 /* Returns true if expression EXPR is obviously invariant in LOOP,
1816    i.e. if all its operands are defined outside of the LOOP.  LOOP
1817    should not be the function body.  */
1818
1819 bool
1820 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1821 {
1822   basic_block def_bb;
1823   unsigned i, len;
1824
1825   gcc_assert (loop_depth (loop) > 0);
1826
1827   if (is_gimple_min_invariant (expr))
1828     return true;
1829
1830   if (TREE_CODE (expr) == SSA_NAME)
1831     {
1832       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1833       if (def_bb
1834           && flow_bb_inside_loop_p (loop, def_bb))
1835         return false;
1836
1837       return true;
1838     }
1839
1840   if (!EXPR_P (expr))
1841     return false;
1842
1843   len = TREE_OPERAND_LENGTH (expr);
1844   for (i = 0; i < len; i++)
1845     if (TREE_OPERAND (expr, i)
1846         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1847       return false;
1848
1849   return true;
1850 }
1851
1852 /* Given expression EXPR which computes inductive values with respect
1853    to loop recorded in DATA, this function returns biv from which EXPR
1854    is derived by tracing definition chains of ssa variables in EXPR.  */
1855
1856 static struct iv*
1857 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1858 {
1859   struct iv *iv;
1860   unsigned i, n;
1861   tree e2, e1;
1862   enum tree_code code;
1863   gimple *stmt;
1864
1865   if (expr == NULL_TREE)
1866     return NULL;
1867
1868   if (is_gimple_min_invariant (expr))
1869     return NULL;
1870
1871   code = TREE_CODE (expr);
1872   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1873     {
1874       n = TREE_OPERAND_LENGTH (expr);
1875       for (i = 0; i < n; i++)
1876         {
1877           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1878           if (iv)
1879             return iv;
1880         }
1881     }
1882
1883   /* Stop if it's not ssa name.  */
1884   if (code != SSA_NAME)
1885     return NULL;
1886
1887   iv = get_iv (data, expr);
1888   if (!iv || integer_zerop (iv->step))
1889     return NULL;
1890   else if (iv->biv_p)
1891     return iv;
1892
1893   stmt = SSA_NAME_DEF_STMT (expr);
1894   if (gphi *phi = dyn_cast <gphi *> (stmt))
1895     {
1896       ssa_op_iter iter;
1897       use_operand_p use_p;
1898       basic_block phi_bb = gimple_bb (phi);
1899
1900       /* Skip loop header PHI that doesn't define biv.  */
1901       if (phi_bb->loop_father == data->current_loop)
1902         return NULL;
1903
1904       if (virtual_operand_p (gimple_phi_result (phi)))
1905         return NULL;
1906
1907       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1908         {
1909           tree use = USE_FROM_PTR (use_p);
1910           iv = find_deriving_biv_for_expr (data, use);
1911           if (iv)
1912             return iv;
1913         }
1914       return NULL;
1915     }
1916   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1917     return NULL;
1918
1919   e1 = gimple_assign_rhs1 (stmt);
1920   code = gimple_assign_rhs_code (stmt);
1921   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1922     return find_deriving_biv_for_expr (data, e1);
1923
1924   switch (code)
1925     {
1926     case MULT_EXPR:
1927     case PLUS_EXPR:
1928     case MINUS_EXPR:
1929     case POINTER_PLUS_EXPR:
1930       /* Increments, decrements and multiplications by a constant
1931          are simple.  */
1932       e2 = gimple_assign_rhs2 (stmt);
1933       iv = find_deriving_biv_for_expr (data, e2);
1934       if (iv)
1935         return iv;
1936       gcc_fallthrough ();
1937
1938     CASE_CONVERT:
1939       /* Casts are simple.  */
1940       return find_deriving_biv_for_expr (data, e1);
1941
1942     default:
1943       break;
1944     }
1945
1946   return NULL;
1947 }
1948
1949 /* Record BIV, its predecessor and successor that they are used in
1950    address type uses.  */
1951
1952 static void
1953 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1954 {
1955   unsigned i;
1956   tree type, base_1, base_2;
1957   bitmap_iterator bi;
1958
1959   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1960       || biv->have_address_use || !biv->no_overflow)
1961     return;
1962
1963   type = TREE_TYPE (biv->base);
1964   if (!INTEGRAL_TYPE_P (type))
1965     return;
1966
1967   biv->have_address_use = true;
1968   data->bivs_not_used_in_addr--;
1969   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1970   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1971     {
1972       struct iv *iv = ver_info (data, i)->iv;
1973
1974       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1975           || iv->have_address_use || !iv->no_overflow)
1976         continue;
1977
1978       if (type != TREE_TYPE (iv->base)
1979           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1980         continue;
1981
1982       if (!operand_equal_p (biv->step, iv->step, 0))
1983         continue;
1984
1985       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1986       if (operand_equal_p (base_1, iv->base, 0)
1987           || operand_equal_p (base_2, biv->base, 0))
1988         {
1989           iv->have_address_use = true;
1990           data->bivs_not_used_in_addr--;
1991         }
1992     }
1993 }
1994
1995 /* Cumulates the steps of indices into DATA and replaces their values with the
1996    initial ones.  Returns false when the value of the index cannot be determined.
1997    Callback for for_each_index.  */
1998
1999 struct ifs_ivopts_data
2000 {
2001   struct ivopts_data *ivopts_data;
2002   gimple *stmt;
2003   tree step;
2004 };
2005
2006 static bool
2007 idx_find_step (tree base, tree *idx, void *data)
2008 {
2009   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2010   struct iv *iv;
2011   bool use_overflow_semantics = false;
2012   tree step, iv_base, iv_step, lbound, off;
2013   struct loop *loop = dta->ivopts_data->current_loop;
2014
2015   /* If base is a component ref, require that the offset of the reference
2016      be invariant.  */
2017   if (TREE_CODE (base) == COMPONENT_REF)
2018     {
2019       off = component_ref_field_offset (base);
2020       return expr_invariant_in_loop_p (loop, off);
2021     }
2022
2023   /* If base is array, first check whether we will be able to move the
2024      reference out of the loop (in order to take its address in strength
2025      reduction).  In order for this to work we need both lower bound
2026      and step to be loop invariants.  */
2027   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2028     {
2029       /* Moreover, for a range, the size needs to be invariant as well.  */
2030       if (TREE_CODE (base) == ARRAY_RANGE_REF
2031           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2032         return false;
2033
2034       step = array_ref_element_size (base);
2035       lbound = array_ref_low_bound (base);
2036
2037       if (!expr_invariant_in_loop_p (loop, step)
2038           || !expr_invariant_in_loop_p (loop, lbound))
2039         return false;
2040     }
2041
2042   if (TREE_CODE (*idx) != SSA_NAME)
2043     return true;
2044
2045   iv = get_iv (dta->ivopts_data, *idx);
2046   if (!iv)
2047     return false;
2048
2049   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2050           *&x[0], which is not folded and does not trigger the
2051           ARRAY_REF path below.  */
2052   *idx = iv->base;
2053
2054   if (integer_zerop (iv->step))
2055     return true;
2056
2057   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2058     {
2059       step = array_ref_element_size (base);
2060
2061       /* We only handle addresses whose step is an integer constant.  */
2062       if (TREE_CODE (step) != INTEGER_CST)
2063         return false;
2064     }
2065   else
2066     /* The step for pointer arithmetics already is 1 byte.  */
2067     step = size_one_node;
2068
2069   iv_base = iv->base;
2070   iv_step = iv->step;
2071   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2072     use_overflow_semantics = true;
2073
2074   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2075                             sizetype, &iv_base, &iv_step, dta->stmt,
2076                             use_overflow_semantics))
2077     {
2078       /* The index might wrap.  */
2079       return false;
2080     }
2081
2082   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2083   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2084
2085   if (dta->ivopts_data->bivs_not_used_in_addr)
2086     {
2087       if (!iv->biv_p)
2088         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2089
2090       record_biv_for_address_use (dta->ivopts_data, iv);
2091     }
2092   return true;
2093 }
2094
2095 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2096    object is passed to it in DATA.  */
2097
2098 static bool
2099 idx_record_use (tree base, tree *idx,
2100                 void *vdata)
2101 {
2102   struct ivopts_data *data = (struct ivopts_data *) vdata;
2103   find_interesting_uses_op (data, *idx);
2104   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2105     {
2106       find_interesting_uses_op (data, array_ref_element_size (base));
2107       find_interesting_uses_op (data, array_ref_low_bound (base));
2108     }
2109   return true;
2110 }
2111
2112 /* If we can prove that TOP = cst * BOT for some constant cst,
2113    store cst to MUL and return true.  Otherwise return false.
2114    The returned value is always sign-extended, regardless of the
2115    signedness of TOP and BOT.  */
2116
2117 static bool
2118 constant_multiple_of (tree top, tree bot, widest_int *mul)
2119 {
2120   tree mby;
2121   enum tree_code code;
2122   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2123   widest_int res, p0, p1;
2124
2125   STRIP_NOPS (top);
2126   STRIP_NOPS (bot);
2127
2128   if (operand_equal_p (top, bot, 0))
2129     {
2130       *mul = 1;
2131       return true;
2132     }
2133
2134   code = TREE_CODE (top);
2135   switch (code)
2136     {
2137     case MULT_EXPR:
2138       mby = TREE_OPERAND (top, 1);
2139       if (TREE_CODE (mby) != INTEGER_CST)
2140         return false;
2141
2142       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2143         return false;
2144
2145       *mul = wi::sext (res * wi::to_widest (mby), precision);
2146       return true;
2147
2148     case PLUS_EXPR:
2149     case MINUS_EXPR:
2150       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2151           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2152         return false;
2153
2154       if (code == MINUS_EXPR)
2155         p1 = -p1;
2156       *mul = wi::sext (p0 + p1, precision);
2157       return true;
2158
2159     case INTEGER_CST:
2160       if (TREE_CODE (bot) != INTEGER_CST)
2161         return false;
2162
2163       p0 = widest_int::from (top, SIGNED);
2164       p1 = widest_int::from (bot, SIGNED);
2165       if (p1 == 0)
2166         return false;
2167       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2168       return res == 0;
2169
2170     default:
2171       return false;
2172     }
2173 }
2174
2175 /* Return true if memory reference REF with step STEP may be unaligned.  */
2176
2177 static bool
2178 may_be_unaligned_p (tree ref, tree step)
2179 {
2180   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2181      thus they are not misaligned.  */
2182   if (TREE_CODE (ref) == TARGET_MEM_REF)
2183     return false;
2184
2185   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2186   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2187     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2188
2189   unsigned HOST_WIDE_INT bitpos;
2190   unsigned int ref_align;
2191   get_object_alignment_1 (ref, &ref_align, &bitpos);
2192   if (ref_align < align
2193       || (bitpos % align) != 0
2194       || (bitpos % BITS_PER_UNIT) != 0)
2195     return true;
2196
2197   unsigned int trailing_zeros = tree_ctz (step);
2198   if (trailing_zeros < HOST_BITS_PER_INT
2199       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2200     return true;
2201
2202   return false;
2203 }
2204
2205 /* Return true if EXPR may be non-addressable.   */
2206
2207 bool
2208 may_be_nonaddressable_p (tree expr)
2209 {
2210   switch (TREE_CODE (expr))
2211     {
2212     case TARGET_MEM_REF:
2213       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2214          target, thus they are always addressable.  */
2215       return false;
2216
2217     case MEM_REF:
2218       /* Likewise for MEM_REFs, modulo the storage order.  */
2219       return REF_REVERSE_STORAGE_ORDER (expr);
2220
2221     case BIT_FIELD_REF:
2222       if (REF_REVERSE_STORAGE_ORDER (expr))
2223         return true;
2224       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2225
2226     case COMPONENT_REF:
2227       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2228         return true;
2229       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2230              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2231
2232     case ARRAY_REF:
2233     case ARRAY_RANGE_REF:
2234       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2235         return true;
2236       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2237
2238     case VIEW_CONVERT_EXPR:
2239       /* This kind of view-conversions may wrap non-addressable objects
2240          and make them look addressable.  After some processing the
2241          non-addressability may be uncovered again, causing ADDR_EXPRs
2242          of inappropriate objects to be built.  */
2243       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2244           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2245         return true;
2246       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2247
2248     CASE_CONVERT:
2249       return true;
2250
2251     default:
2252       break;
2253     }
2254
2255   return false;
2256 }
2257
2258 /* Finds addresses in *OP_P inside STMT.  */
2259
2260 static void
2261 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2262                                tree *op_p)
2263 {
2264   tree base = *op_p, step = size_zero_node;
2265   struct iv *civ;
2266   struct ifs_ivopts_data ifs_ivopts_data;
2267
2268   /* Do not play with volatile memory references.  A bit too conservative,
2269      perhaps, but safe.  */
2270   if (gimple_has_volatile_ops (stmt))
2271     goto fail;
2272
2273   /* Ignore bitfields for now.  Not really something terribly complicated
2274      to handle.  TODO.  */
2275   if (TREE_CODE (base) == BIT_FIELD_REF)
2276     goto fail;
2277
2278   base = unshare_expr (base);
2279
2280   if (TREE_CODE (base) == TARGET_MEM_REF)
2281     {
2282       tree type = build_pointer_type (TREE_TYPE (base));
2283       tree astep;
2284
2285       if (TMR_BASE (base)
2286           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2287         {
2288           civ = get_iv (data, TMR_BASE (base));
2289           if (!civ)
2290             goto fail;
2291
2292           TMR_BASE (base) = civ->base;
2293           step = civ->step;
2294         }
2295       if (TMR_INDEX2 (base)
2296           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2297         {
2298           civ = get_iv (data, TMR_INDEX2 (base));
2299           if (!civ)
2300             goto fail;
2301
2302           TMR_INDEX2 (base) = civ->base;
2303           step = civ->step;
2304         }
2305       if (TMR_INDEX (base)
2306           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2307         {
2308           civ = get_iv (data, TMR_INDEX (base));
2309           if (!civ)
2310             goto fail;
2311
2312           TMR_INDEX (base) = civ->base;
2313           astep = civ->step;
2314
2315           if (astep)
2316             {
2317               if (TMR_STEP (base))
2318                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2319
2320               step = fold_build2 (PLUS_EXPR, type, step, astep);
2321             }
2322         }
2323
2324       if (integer_zerop (step))
2325         goto fail;
2326       base = tree_mem_ref_addr (type, base);
2327     }
2328   else
2329     {
2330       ifs_ivopts_data.ivopts_data = data;
2331       ifs_ivopts_data.stmt = stmt;
2332       ifs_ivopts_data.step = size_zero_node;
2333       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2334           || integer_zerop (ifs_ivopts_data.step))
2335         goto fail;
2336       step = ifs_ivopts_data.step;
2337
2338       /* Check that the base expression is addressable.  This needs
2339          to be done after substituting bases of IVs into it.  */
2340       if (may_be_nonaddressable_p (base))
2341         goto fail;
2342
2343       /* Moreover, on strict alignment platforms, check that it is
2344          sufficiently aligned.  */
2345       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2346         goto fail;
2347
2348       base = build_fold_addr_expr (base);
2349
2350       /* Substituting bases of IVs into the base expression might
2351          have caused folding opportunities.  */
2352       if (TREE_CODE (base) == ADDR_EXPR)
2353         {
2354           tree *ref = &TREE_OPERAND (base, 0);
2355           while (handled_component_p (*ref))
2356             ref = &TREE_OPERAND (*ref, 0);
2357           if (TREE_CODE (*ref) == MEM_REF)
2358             {
2359               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2360                                       TREE_OPERAND (*ref, 0),
2361                                       TREE_OPERAND (*ref, 1));
2362               if (tem)
2363                 *ref = tem;
2364             }
2365         }
2366     }
2367
2368   civ = alloc_iv (data, base, step);
2369   /* Fail if base object of this memory reference is unknown.  */
2370   if (civ->base_object == NULL_TREE)
2371     goto fail;
2372
2373   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2374   return;
2375
2376 fail:
2377   for_each_index (op_p, idx_record_use, data);
2378 }
2379
2380 /* Finds and records invariants used in STMT.  */
2381
2382 static void
2383 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2384 {
2385   ssa_op_iter iter;
2386   use_operand_p use_p;
2387   tree op;
2388
2389   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2390     {
2391       op = USE_FROM_PTR (use_p);
2392       record_invariant (data, op, false);
2393     }
2394 }
2395
2396 /* Finds interesting uses of induction variables in the statement STMT.  */
2397
2398 static void
2399 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2400 {
2401   struct iv *iv;
2402   tree op, *lhs, *rhs;
2403   ssa_op_iter iter;
2404   use_operand_p use_p;
2405   enum tree_code code;
2406
2407   find_invariants_stmt (data, stmt);
2408
2409   if (gimple_code (stmt) == GIMPLE_COND)
2410     {
2411       find_interesting_uses_cond (data, stmt);
2412       return;
2413     }
2414
2415   if (is_gimple_assign (stmt))
2416     {
2417       lhs = gimple_assign_lhs_ptr (stmt);
2418       rhs = gimple_assign_rhs1_ptr (stmt);
2419
2420       if (TREE_CODE (*lhs) == SSA_NAME)
2421         {
2422           /* If the statement defines an induction variable, the uses are not
2423              interesting by themselves.  */
2424
2425           iv = get_iv (data, *lhs);
2426
2427           if (iv && !integer_zerop (iv->step))
2428             return;
2429         }
2430
2431       code = gimple_assign_rhs_code (stmt);
2432       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2433           && (REFERENCE_CLASS_P (*rhs)
2434               || is_gimple_val (*rhs)))
2435         {
2436           if (REFERENCE_CLASS_P (*rhs))
2437             find_interesting_uses_address (data, stmt, rhs);
2438           else
2439             find_interesting_uses_op (data, *rhs);
2440
2441           if (REFERENCE_CLASS_P (*lhs))
2442             find_interesting_uses_address (data, stmt, lhs);
2443           return;
2444         }
2445       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2446         {
2447           find_interesting_uses_cond (data, stmt);
2448           return;
2449         }
2450
2451       /* TODO -- we should also handle address uses of type
2452
2453          memory = call (whatever);
2454
2455          and
2456
2457          call (memory).  */
2458     }
2459
2460   if (gimple_code (stmt) == GIMPLE_PHI
2461       && gimple_bb (stmt) == data->current_loop->header)
2462     {
2463       iv = get_iv (data, PHI_RESULT (stmt));
2464
2465       if (iv && !integer_zerop (iv->step))
2466         return;
2467     }
2468
2469   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2470     {
2471       op = USE_FROM_PTR (use_p);
2472
2473       if (TREE_CODE (op) != SSA_NAME)
2474         continue;
2475
2476       iv = get_iv (data, op);
2477       if (!iv)
2478         continue;
2479
2480       find_interesting_uses_op (data, op);
2481     }
2482 }
2483
2484 /* Finds interesting uses of induction variables outside of loops
2485    on loop exit edge EXIT.  */
2486
2487 static void
2488 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2489 {
2490   gphi *phi;
2491   gphi_iterator psi;
2492   tree def;
2493
2494   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2495     {
2496       phi = psi.phi ();
2497       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2498       if (!virtual_operand_p (def))
2499         find_interesting_uses_op (data, def);
2500     }
2501 }
2502
2503 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2504    mode for memory reference represented by USE.  */
2505
2506 static GTY (()) vec<rtx, va_gc> *addr_list;
2507
2508 static bool
2509 addr_offset_valid_p (struct iv_use *use, HOST_WIDE_INT offset)
2510 {
2511   rtx reg, addr;
2512   unsigned list_index;
2513   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2514   machine_mode addr_mode, mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2515
2516   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2517   if (list_index >= vec_safe_length (addr_list))
2518     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2519
2520   addr = (*addr_list)[list_index];
2521   if (!addr)
2522     {
2523       addr_mode = targetm.addr_space.address_mode (as);
2524       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2525       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2526       (*addr_list)[list_index] = addr;
2527     }
2528   else
2529     addr_mode = GET_MODE (addr);
2530
2531   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2532   return (memory_address_addr_space_p (mem_mode, addr, as));
2533 }
2534
2535 /* Comparison function to sort group in ascending order of addr_offset.  */
2536
2537 static int
2538 group_compare_offset (const void *a, const void *b)
2539 {
2540   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2541   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2542
2543   if ((*u1)->addr_offset != (*u2)->addr_offset)
2544     return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2545   else
2546     return 0;
2547 }
2548
2549 /* Check if small groups should be split.  Return true if no group
2550    contains more than two uses with distinct addr_offsets.  Return
2551    false otherwise.  We want to split such groups because:
2552
2553      1) Small groups don't have much benefit and may interfer with
2554         general candidate selection.
2555      2) Size for problem with only small groups is usually small and
2556         general algorithm can handle it well.
2557
2558    TODO -- Above claim may not hold when we want to merge memory
2559    accesses with conseuctive addresses.  */
2560
2561 static bool
2562 split_small_address_groups_p (struct ivopts_data *data)
2563 {
2564   unsigned int i, j, distinct = 1;
2565   struct iv_use *pre;
2566   struct iv_group *group;
2567
2568   for (i = 0; i < data->vgroups.length (); i++)
2569     {
2570       group = data->vgroups[i];
2571       if (group->vuses.length () == 1)
2572         continue;
2573
2574       gcc_assert (group->type == USE_ADDRESS);
2575       if (group->vuses.length () == 2)
2576         {
2577           if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2578             std::swap (group->vuses[0], group->vuses[1]);
2579         }
2580       else
2581         group->vuses.qsort (group_compare_offset);
2582
2583       if (distinct > 2)
2584         continue;
2585
2586       distinct = 1;
2587       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2588         {
2589           if (group->vuses[j]->addr_offset != pre->addr_offset)
2590             {
2591               pre = group->vuses[j];
2592               distinct++;
2593             }
2594
2595           if (distinct > 2)
2596             break;
2597         }
2598     }
2599
2600   return (distinct <= 2);
2601 }
2602
2603 /* For each group of address type uses, this function further groups
2604    these uses according to the maximum offset supported by target's
2605    [base + offset] addressing mode.  */
2606
2607 static void
2608 split_address_groups (struct ivopts_data *data)
2609 {
2610   unsigned int i, j;
2611   /* Always split group.  */
2612   bool split_p = split_small_address_groups_p (data);
2613
2614   for (i = 0; i < data->vgroups.length (); i++)
2615     {
2616       struct iv_group *new_group = NULL;
2617       struct iv_group *group = data->vgroups[i];
2618       struct iv_use *use = group->vuses[0];
2619
2620       use->id = 0;
2621       use->group_id = group->id;
2622       if (group->vuses.length () == 1)
2623         continue;
2624
2625       gcc_assert (group->type == USE_ADDRESS);
2626
2627       for (j = 1; j < group->vuses.length ();)
2628         {
2629           struct iv_use *next = group->vuses[j];
2630           HOST_WIDE_INT offset = next->addr_offset - use->addr_offset;
2631
2632           /* Split group if aksed to, or the offset against the first
2633              use can't fit in offset part of addressing mode.  IV uses
2634              having the same offset are still kept in one group.  */
2635           if (offset != 0 &&
2636               (split_p || !addr_offset_valid_p (use, offset)))
2637             {
2638               if (!new_group)
2639                 new_group = record_group (data, group->type);
2640               group->vuses.ordered_remove (j);
2641               new_group->vuses.safe_push (next);
2642               continue;
2643             }
2644
2645           next->id = j;
2646           next->group_id = group->id;
2647           j++;
2648         }
2649     }
2650 }
2651
2652 /* Finds uses of the induction variables that are interesting.  */
2653
2654 static void
2655 find_interesting_uses (struct ivopts_data *data)
2656 {
2657   basic_block bb;
2658   gimple_stmt_iterator bsi;
2659   basic_block *body = get_loop_body (data->current_loop);
2660   unsigned i;
2661   edge e;
2662
2663   for (i = 0; i < data->current_loop->num_nodes; i++)
2664     {
2665       edge_iterator ei;
2666       bb = body[i];
2667
2668       FOR_EACH_EDGE (e, ei, bb->succs)
2669         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2670             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2671           find_interesting_uses_outside (data, e);
2672
2673       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2674         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2675       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2676         if (!is_gimple_debug (gsi_stmt (bsi)))
2677           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2678     }
2679
2680   split_address_groups (data);
2681
2682   if (dump_file && (dump_flags & TDF_DETAILS))
2683     {
2684       bitmap_iterator bi;
2685
2686       fprintf (dump_file, "\n<Invariant Vars>:\n");
2687       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2688         {
2689           struct version_info *info = ver_info (data, i);
2690           if (info->inv_id)
2691             {
2692               fprintf (dump_file, "Inv %d:\t", info->inv_id);
2693               print_generic_expr (dump_file, info->name, TDF_SLIM);
2694               fprintf (dump_file, "%s\n",
2695                        info->has_nonlin_use ? "" : "\t(eliminable)");
2696             }
2697         }
2698
2699       fprintf (dump_file, "\n<IV Groups>:\n");
2700       dump_groups (dump_file, data);
2701       fprintf (dump_file, "\n");
2702     }
2703
2704   free (body);
2705 }
2706
2707 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2708    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2709    we are at the top-level of the processed address.  */
2710
2711 static tree
2712 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2713                 HOST_WIDE_INT *offset)
2714 {
2715   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2716   enum tree_code code;
2717   tree type, orig_type = TREE_TYPE (expr);
2718   HOST_WIDE_INT off0, off1, st;
2719   tree orig_expr = expr;
2720
2721   STRIP_NOPS (expr);
2722
2723   type = TREE_TYPE (expr);
2724   code = TREE_CODE (expr);
2725   *offset = 0;
2726
2727   switch (code)
2728     {
2729     case INTEGER_CST:
2730       if (!cst_and_fits_in_hwi (expr)
2731           || integer_zerop (expr))
2732         return orig_expr;
2733
2734       *offset = int_cst_value (expr);
2735       return build_int_cst (orig_type, 0);
2736
2737     case POINTER_PLUS_EXPR:
2738     case PLUS_EXPR:
2739     case MINUS_EXPR:
2740       op0 = TREE_OPERAND (expr, 0);
2741       op1 = TREE_OPERAND (expr, 1);
2742
2743       op0 = strip_offset_1 (op0, false, false, &off0);
2744       op1 = strip_offset_1 (op1, false, false, &off1);
2745
2746       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2747       if (op0 == TREE_OPERAND (expr, 0)
2748           && op1 == TREE_OPERAND (expr, 1))
2749         return orig_expr;
2750
2751       if (integer_zerop (op1))
2752         expr = op0;
2753       else if (integer_zerop (op0))
2754         {
2755           if (code == MINUS_EXPR)
2756             expr = fold_build1 (NEGATE_EXPR, type, op1);
2757           else
2758             expr = op1;
2759         }
2760       else
2761         expr = fold_build2 (code, type, op0, op1);
2762
2763       return fold_convert (orig_type, expr);
2764
2765     case MULT_EXPR:
2766       op1 = TREE_OPERAND (expr, 1);
2767       if (!cst_and_fits_in_hwi (op1))
2768         return orig_expr;
2769
2770       op0 = TREE_OPERAND (expr, 0);
2771       op0 = strip_offset_1 (op0, false, false, &off0);
2772       if (op0 == TREE_OPERAND (expr, 0))
2773         return orig_expr;
2774
2775       *offset = off0 * int_cst_value (op1);
2776       if (integer_zerop (op0))
2777         expr = op0;
2778       else
2779         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2780
2781       return fold_convert (orig_type, expr);
2782
2783     case ARRAY_REF:
2784     case ARRAY_RANGE_REF:
2785       if (!inside_addr)
2786         return orig_expr;
2787
2788       step = array_ref_element_size (expr);
2789       if (!cst_and_fits_in_hwi (step))
2790         break;
2791
2792       st = int_cst_value (step);
2793       op1 = TREE_OPERAND (expr, 1);
2794       op1 = strip_offset_1 (op1, false, false, &off1);
2795       *offset = off1 * st;
2796
2797       if (top_compref
2798           && integer_zerop (op1))
2799         {
2800           /* Strip the component reference completely.  */
2801           op0 = TREE_OPERAND (expr, 0);
2802           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2803           *offset += off0;
2804           return op0;
2805         }
2806       break;
2807
2808     case COMPONENT_REF:
2809       {
2810         tree field;
2811
2812         if (!inside_addr)
2813           return orig_expr;
2814
2815         tmp = component_ref_field_offset (expr);
2816         field = TREE_OPERAND (expr, 1);
2817         if (top_compref
2818             && cst_and_fits_in_hwi (tmp)
2819             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2820           {
2821             HOST_WIDE_INT boffset, abs_off;
2822
2823             /* Strip the component reference completely.  */
2824             op0 = TREE_OPERAND (expr, 0);
2825             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2826             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2827             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2828             if (boffset < 0)
2829               abs_off = -abs_off;
2830
2831             *offset = off0 + int_cst_value (tmp) + abs_off;
2832             return op0;
2833           }
2834       }
2835       break;
2836
2837     case ADDR_EXPR:
2838       op0 = TREE_OPERAND (expr, 0);
2839       op0 = strip_offset_1 (op0, true, true, &off0);
2840       *offset += off0;
2841
2842       if (op0 == TREE_OPERAND (expr, 0))
2843         return orig_expr;
2844
2845       expr = build_fold_addr_expr (op0);
2846       return fold_convert (orig_type, expr);
2847
2848     case MEM_REF:
2849       /* ???  Offset operand?  */
2850       inside_addr = false;
2851       break;
2852
2853     default:
2854       return orig_expr;
2855     }
2856
2857   /* Default handling of expressions for that we want to recurse into
2858      the first operand.  */
2859   op0 = TREE_OPERAND (expr, 0);
2860   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2861   *offset += off0;
2862
2863   if (op0 == TREE_OPERAND (expr, 0)
2864       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2865     return orig_expr;
2866
2867   expr = copy_node (expr);
2868   TREE_OPERAND (expr, 0) = op0;
2869   if (op1)
2870     TREE_OPERAND (expr, 1) = op1;
2871
2872   /* Inside address, we might strip the top level component references,
2873      thus changing type of the expression.  Handling of ADDR_EXPR
2874      will fix that.  */
2875   expr = fold_convert (orig_type, expr);
2876
2877   return expr;
2878 }
2879
2880 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2881
2882 static tree
2883 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2884 {
2885   HOST_WIDE_INT off;
2886   tree core = strip_offset_1 (expr, false, false, &off);
2887   *offset = off;
2888   return core;
2889 }
2890
2891 /* Returns variant of TYPE that can be used as base for different uses.
2892    We return unsigned type with the same precision, which avoids problems
2893    with overflows.  */
2894
2895 static tree
2896 generic_type_for (tree type)
2897 {
2898   if (POINTER_TYPE_P (type))
2899     return unsigned_type_for (type);
2900
2901   if (TYPE_UNSIGNED (type))
2902     return type;
2903
2904   return unsigned_type_for (type);
2905 }
2906
2907 /* Private data for walk_tree.  */
2908
2909 struct walk_tree_data
2910 {
2911   bitmap *inv_vars;
2912   struct ivopts_data *idata;
2913 };
2914
2915 /* Callback function for walk_tree, it records invariants and symbol
2916    reference in *EXPR_P.  DATA is the structure storing result info.  */
2917
2918 static tree
2919 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2920 {
2921   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2922   struct version_info *info;
2923
2924   if (TREE_CODE (*expr_p) != SSA_NAME)
2925     return NULL_TREE;
2926
2927   info = name_info (wdata->idata, *expr_p);
2928   if (!info->inv_id || info->has_nonlin_use)
2929     return NULL_TREE;
2930
2931   if (!*wdata->inv_vars)
2932     *wdata->inv_vars = BITMAP_ALLOC (NULL);
2933   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
2934
2935   return NULL_TREE;
2936 }
2937
2938 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
2939    store it.  */
2940
2941 static inline void
2942 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
2943 {
2944   struct walk_tree_data wdata;
2945
2946   if (!inv_vars)
2947     return;
2948
2949   wdata.idata = data;
2950   wdata.inv_vars = inv_vars;
2951   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
2952 }
2953
2954 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
2955    will be recorded if it doesn't exist yet.  Given below two exprs:
2956      inv_expr + cst1, inv_expr + cst2
2957    It's hard to make decision whether constant part should be stripped
2958    or not.  We choose to not strip based on below facts:
2959      1) We need to count ADD cost for constant part if it's stripped,
2960         which is't always trivial where this functions is called.
2961      2) Stripping constant away may be conflict with following loop
2962         invariant hoisting pass.
2963      3) Not stripping constant away results in more invariant exprs,
2964         which usually leads to decision preferring lower reg pressure.  */
2965
2966 static iv_inv_expr_ent *
2967 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
2968 {
2969   STRIP_NOPS (inv_expr);
2970
2971   if (TREE_CODE (inv_expr) == INTEGER_CST || TREE_CODE (inv_expr) == SSA_NAME)
2972     return NULL;
2973
2974   /* Don't strip constant part away as we used to.  */
2975
2976   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
2977   struct iv_inv_expr_ent ent;
2978   ent.expr = inv_expr;
2979   ent.hash = iterative_hash_expr (inv_expr, 0);
2980   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
2981
2982   if (!*slot)
2983     {
2984       *slot = XNEW (struct iv_inv_expr_ent);
2985       (*slot)->expr = inv_expr;
2986       (*slot)->hash = ent.hash;
2987       (*slot)->id = ++data->max_inv_expr_id;
2988     }
2989
2990   return *slot;
2991 }
2992
2993 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2994    position to POS.  If USE is not NULL, the candidate is set as related to
2995    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2996    replacement of the final value of the iv by a direct computation.  */
2997
2998 static struct iv_cand *
2999 add_candidate_1 (struct ivopts_data *data,
3000                  tree base, tree step, bool important, enum iv_position pos,
3001                  struct iv_use *use, gimple *incremented_at,
3002                  struct iv *orig_iv = NULL)
3003 {
3004   unsigned i;
3005   struct iv_cand *cand = NULL;
3006   tree type, orig_type;
3007
3008   gcc_assert (base && step);
3009
3010   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3011      live, but the ivopts code may replace a real pointer with one
3012      pointing before or after the memory block that is then adjusted
3013      into the memory block during the loop.  FIXME: It would likely be
3014      better to actually force the pointer live and still use ivopts;
3015      for example, it would be enough to write the pointer into memory
3016      and keep it there until after the loop.  */
3017   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3018     return NULL;
3019
3020   /* For non-original variables, make sure their values are computed in a type
3021      that does not invoke undefined behavior on overflows (since in general,
3022      we cannot prove that these induction variables are non-wrapping).  */
3023   if (pos != IP_ORIGINAL)
3024     {
3025       orig_type = TREE_TYPE (base);
3026       type = generic_type_for (orig_type);
3027       if (type != orig_type)
3028         {
3029           base = fold_convert (type, base);
3030           step = fold_convert (type, step);
3031         }
3032     }
3033
3034   for (i = 0; i < data->vcands.length (); i++)
3035     {
3036       cand = data->vcands[i];
3037
3038       if (cand->pos != pos)
3039         continue;
3040
3041       if (cand->incremented_at != incremented_at
3042           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3043               && cand->ainc_use != use))
3044         continue;
3045
3046       if (operand_equal_p (base, cand->iv->base, 0)
3047           && operand_equal_p (step, cand->iv->step, 0)
3048           && (TYPE_PRECISION (TREE_TYPE (base))
3049               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3050         break;
3051     }
3052
3053   if (i == data->vcands.length ())
3054     {
3055       cand = XCNEW (struct iv_cand);
3056       cand->id = i;
3057       cand->iv = alloc_iv (data, base, step);
3058       cand->pos = pos;
3059       if (pos != IP_ORIGINAL)
3060         {
3061           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3062           cand->var_after = cand->var_before;
3063         }
3064       cand->important = important;
3065       cand->incremented_at = incremented_at;
3066       data->vcands.safe_push (cand);
3067
3068       if (TREE_CODE (step) != INTEGER_CST)
3069         {
3070           find_inv_vars (data, &step, &cand->inv_vars);
3071
3072           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3073           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3074           if (inv_expr != NULL)
3075             {
3076               cand->inv_exprs = cand->inv_vars;
3077               cand->inv_vars = NULL;
3078               if (cand->inv_exprs)
3079                 bitmap_clear (cand->inv_exprs);
3080               else
3081                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3082
3083               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3084             }
3085         }
3086
3087       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3088         cand->ainc_use = use;
3089       else
3090         cand->ainc_use = NULL;
3091
3092       cand->orig_iv = orig_iv;
3093       if (dump_file && (dump_flags & TDF_DETAILS))
3094         dump_cand (dump_file, cand);
3095     }
3096
3097   cand->important |= important;
3098
3099   /* Relate candidate to the group for which it is added.  */
3100   if (use)
3101     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3102
3103   return cand;
3104 }
3105
3106 /* Returns true if incrementing the induction variable at the end of the LOOP
3107    is allowed.
3108
3109    The purpose is to avoid splitting latch edge with a biv increment, thus
3110    creating a jump, possibly confusing other optimization passes and leaving
3111    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3112    available (so we do not have a better alternative), or if the latch edge
3113    is already nonempty.  */
3114
3115 static bool
3116 allow_ip_end_pos_p (struct loop *loop)
3117 {
3118   if (!ip_normal_pos (loop))
3119     return true;
3120
3121   if (!empty_block_p (ip_end_pos (loop)))
3122     return true;
3123
3124   return false;
3125 }
3126
3127 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3128    Important field is set to IMPORTANT.  */
3129
3130 static void
3131 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3132                         bool important, struct iv_use *use)
3133 {
3134   basic_block use_bb = gimple_bb (use->stmt);
3135   machine_mode mem_mode;
3136   unsigned HOST_WIDE_INT cstepi;
3137
3138   /* If we insert the increment in any position other than the standard
3139      ones, we must ensure that it is incremented once per iteration.
3140      It must not be in an inner nested loop, or one side of an if
3141      statement.  */
3142   if (use_bb->loop_father != data->current_loop
3143       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3144       || stmt_could_throw_p (use->stmt)
3145       || !cst_and_fits_in_hwi (step))
3146     return;
3147
3148   cstepi = int_cst_value (step);
3149
3150   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
3151   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3152         || USE_STORE_PRE_INCREMENT (mem_mode))
3153        && GET_MODE_SIZE (mem_mode) == cstepi)
3154       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3155            || USE_STORE_PRE_DECREMENT (mem_mode))
3156           && GET_MODE_SIZE (mem_mode) == -cstepi))
3157     {
3158       enum tree_code code = MINUS_EXPR;
3159       tree new_base;
3160       tree new_step = step;
3161
3162       if (POINTER_TYPE_P (TREE_TYPE (base)))
3163         {
3164           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3165           code = POINTER_PLUS_EXPR;
3166         }
3167       else
3168         new_step = fold_convert (TREE_TYPE (base), new_step);
3169       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3170       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3171                        use->stmt);
3172     }
3173   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3174         || USE_STORE_POST_INCREMENT (mem_mode))
3175        && GET_MODE_SIZE (mem_mode) == cstepi)
3176       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3177            || USE_STORE_POST_DECREMENT (mem_mode))
3178           && GET_MODE_SIZE (mem_mode) == -cstepi))
3179     {
3180       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3181                        use->stmt);
3182     }
3183 }
3184
3185 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3186    position to POS.  If USE is not NULL, the candidate is set as related to
3187    it.  The candidate computation is scheduled before exit condition and at
3188    the end of loop.  */
3189
3190 static void
3191 add_candidate (struct ivopts_data *data,
3192                tree base, tree step, bool important, struct iv_use *use,
3193                struct iv *orig_iv = NULL)
3194 {
3195   if (ip_normal_pos (data->current_loop))
3196     add_candidate_1 (data, base, step, important,
3197                      IP_NORMAL, use, NULL, orig_iv);
3198   if (ip_end_pos (data->current_loop)
3199       && allow_ip_end_pos_p (data->current_loop))
3200     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3201 }
3202
3203 /* Adds standard iv candidates.  */
3204
3205 static void
3206 add_standard_iv_candidates (struct ivopts_data *data)
3207 {
3208   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3209
3210   /* The same for a double-integer type if it is still fast enough.  */
3211   if (TYPE_PRECISION
3212         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3213       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3214     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3215                    build_int_cst (long_integer_type_node, 1), true, NULL);
3216
3217   /* The same for a double-integer type if it is still fast enough.  */
3218   if (TYPE_PRECISION
3219         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3220       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3221     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3222                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3223 }
3224
3225
3226 /* Adds candidates bases on the old induction variable IV.  */
3227
3228 static void
3229 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3230 {
3231   gimple *phi;
3232   tree def;
3233   struct iv_cand *cand;
3234
3235   /* Check if this biv is used in address type use.  */
3236   if (iv->no_overflow  && iv->have_address_use
3237       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3238       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3239     {
3240       tree base = fold_convert (sizetype, iv->base);
3241       tree step = fold_convert (sizetype, iv->step);
3242
3243       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3244       add_candidate (data, base, step, true, NULL, iv);
3245       /* Add iv cand of the original type only if it has nonlinear use.  */
3246       if (iv->nonlin_use)
3247         add_candidate (data, iv->base, iv->step, true, NULL);
3248     }
3249   else
3250     add_candidate (data, iv->base, iv->step, true, NULL);
3251
3252   /* The same, but with initial value zero.  */
3253   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3254     add_candidate (data, size_int (0), iv->step, true, NULL);
3255   else
3256     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3257                    iv->step, true, NULL);
3258
3259   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3260   if (gimple_code (phi) == GIMPLE_PHI)
3261     {
3262       /* Additionally record the possibility of leaving the original iv
3263          untouched.  */
3264       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3265       /* Don't add candidate if it's from another PHI node because
3266          it's an affine iv appearing in the form of PEELED_CHREC.  */
3267       phi = SSA_NAME_DEF_STMT (def);
3268       if (gimple_code (phi) != GIMPLE_PHI)
3269         {
3270           cand = add_candidate_1 (data,
3271                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3272                                   SSA_NAME_DEF_STMT (def));
3273           if (cand)
3274             {
3275               cand->var_before = iv->ssa_name;
3276               cand->var_after = def;
3277             }
3278         }
3279       else
3280         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3281     }
3282 }
3283
3284 /* Adds candidates based on the old induction variables.  */
3285
3286 static void
3287 add_iv_candidate_for_bivs (struct ivopts_data *data)
3288 {
3289   unsigned i;
3290   struct iv *iv;
3291   bitmap_iterator bi;
3292
3293   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3294     {
3295       iv = ver_info (data, i)->iv;
3296       if (iv && iv->biv_p && !integer_zerop (iv->step))
3297         add_iv_candidate_for_biv (data, iv);
3298     }
3299 }
3300
3301 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3302
3303 static void
3304 record_common_cand (struct ivopts_data *data, tree base,
3305                     tree step, struct iv_use *use)
3306 {
3307   struct iv_common_cand ent;
3308   struct iv_common_cand **slot;
3309
3310   ent.base = base;
3311   ent.step = step;
3312   ent.hash = iterative_hash_expr (base, 0);
3313   ent.hash = iterative_hash_expr (step, ent.hash);
3314
3315   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3316   if (*slot == NULL)
3317     {
3318       *slot = new iv_common_cand ();
3319       (*slot)->base = base;
3320       (*slot)->step = step;
3321       (*slot)->uses.create (8);
3322       (*slot)->hash = ent.hash;
3323       data->iv_common_cands.safe_push ((*slot));
3324     }
3325
3326   gcc_assert (use != NULL);
3327   (*slot)->uses.safe_push (use);
3328   return;
3329 }
3330
3331 /* Comparison function used to sort common candidates.  */
3332
3333 static int
3334 common_cand_cmp (const void *p1, const void *p2)
3335 {
3336   unsigned n1, n2;
3337   const struct iv_common_cand *const *const ccand1
3338     = (const struct iv_common_cand *const *)p1;
3339   const struct iv_common_cand *const *const ccand2
3340     = (const struct iv_common_cand *const *)p2;
3341
3342   n1 = (*ccand1)->uses.length ();
3343   n2 = (*ccand2)->uses.length ();
3344   return n2 - n1;
3345 }
3346
3347 /* Adds IV candidates based on common candidated recorded.  */
3348
3349 static void
3350 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3351 {
3352   unsigned i, j;
3353   struct iv_cand *cand_1, *cand_2;
3354
3355   data->iv_common_cands.qsort (common_cand_cmp);
3356   for (i = 0; i < data->iv_common_cands.length (); i++)
3357     {
3358       struct iv_common_cand *ptr = data->iv_common_cands[i];
3359
3360       /* Only add IV candidate if it's derived from multiple uses.  */
3361       if (ptr->uses.length () <= 1)
3362         break;
3363
3364       cand_1 = NULL;
3365       cand_2 = NULL;
3366       if (ip_normal_pos (data->current_loop))
3367         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3368                                   false, IP_NORMAL, NULL, NULL);
3369
3370       if (ip_end_pos (data->current_loop)
3371           && allow_ip_end_pos_p (data->current_loop))
3372         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3373                                   false, IP_END, NULL, NULL);
3374
3375       /* Bind deriving uses and the new candidates.  */
3376       for (j = 0; j < ptr->uses.length (); j++)
3377         {
3378           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3379           if (cand_1)
3380             bitmap_set_bit (group->related_cands, cand_1->id);
3381           if (cand_2)
3382             bitmap_set_bit (group->related_cands, cand_2->id);
3383         }
3384     }
3385
3386   /* Release data since it is useless from this point.  */
3387   data->iv_common_cand_tab->empty ();
3388   data->iv_common_cands.truncate (0);
3389 }
3390
3391 /* Adds candidates based on the value of USE's iv.  */
3392
3393 static void
3394 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3395 {
3396   unsigned HOST_WIDE_INT offset;
3397   tree base;
3398   tree basetype;
3399   struct iv *iv = use->iv;
3400
3401   add_candidate (data, iv->base, iv->step, false, use);
3402
3403   /* Record common candidate for use in case it can be shared by others.  */
3404   record_common_cand (data, iv->base, iv->step, use);
3405
3406   /* Record common candidate with initial value zero.  */
3407   basetype = TREE_TYPE (iv->base);
3408   if (POINTER_TYPE_P (basetype))
3409     basetype = sizetype;
3410   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3411
3412   /* Record common candidate with constant offset stripped in base.
3413      Like the use itself, we also add candidate directly for it.  */
3414   base = strip_offset (iv->base, &offset);
3415   if (offset || base != iv->base)
3416     {
3417       record_common_cand (data, base, iv->step, use);
3418       add_candidate (data, base, iv->step, false, use);
3419     }
3420
3421   /* Record common candidate with base_object removed in base.  */
3422   base = iv->base;
3423   STRIP_NOPS (base);
3424   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3425     {
3426       tree step = iv->step;
3427
3428       STRIP_NOPS (step);
3429       base = TREE_OPERAND (base, 1);
3430       step = fold_convert (sizetype, step);
3431       record_common_cand (data, base, step, use);
3432       /* Also record common candidate with offset stripped.  */
3433       base = strip_offset (base, &offset);
3434       if (offset)
3435         record_common_cand (data, base, step, use);
3436     }
3437
3438   /* At last, add auto-incremental candidates.  Make such variables
3439      important since other iv uses with same base object may be based
3440      on it.  */
3441   if (use != NULL && use->type == USE_ADDRESS)
3442     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3443 }
3444
3445 /* Adds candidates based on the uses.  */
3446
3447 static void
3448 add_iv_candidate_for_groups (struct ivopts_data *data)
3449 {
3450   unsigned i;
3451
3452   /* Only add candidate for the first use in group.  */
3453   for (i = 0; i < data->vgroups.length (); i++)
3454     {
3455       struct iv_group *group = data->vgroups[i];
3456
3457       gcc_assert (group->vuses[0] != NULL);
3458       add_iv_candidate_for_use (data, group->vuses[0]);
3459     }
3460   add_iv_candidate_derived_from_uses (data);
3461 }
3462
3463 /* Record important candidates and add them to related_cands bitmaps.  */
3464
3465 static void
3466 record_important_candidates (struct ivopts_data *data)
3467 {
3468   unsigned i;
3469   struct iv_group *group;
3470
3471   for (i = 0; i < data->vcands.length (); i++)
3472     {
3473       struct iv_cand *cand = data->vcands[i];
3474
3475       if (cand->important)
3476         bitmap_set_bit (data->important_candidates, i);
3477     }
3478
3479   data->consider_all_candidates = (data->vcands.length ()
3480                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3481
3482   /* Add important candidates to groups' related_cands bitmaps.  */
3483   for (i = 0; i < data->vgroups.length (); i++)
3484     {
3485       group = data->vgroups[i];
3486       bitmap_ior_into (group->related_cands, data->important_candidates);
3487     }
3488 }
3489
3490 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3491    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3492    we allocate a simple list to every use.  */
3493
3494 static void
3495 alloc_use_cost_map (struct ivopts_data *data)
3496 {
3497   unsigned i, size, s;
3498
3499   for (i = 0; i < data->vgroups.length (); i++)
3500     {
3501       struct iv_group *group = data->vgroups[i];
3502
3503       if (data->consider_all_candidates)
3504         size = data->vcands.length ();
3505       else
3506         {
3507           s = bitmap_count_bits (group->related_cands);
3508
3509           /* Round up to the power of two, so that moduling by it is fast.  */
3510           size = s ? (1 << ceil_log2 (s)) : 1;
3511         }
3512
3513       group->n_map_members = size;
3514       group->cost_map = XCNEWVEC (struct cost_pair, size);
3515     }
3516 }
3517
3518 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3519    on invariants INV_VARS and that the value used in expressing it is
3520    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3521
3522 static void
3523 set_group_iv_cost (struct ivopts_data *data,
3524                    struct iv_group *group, struct iv_cand *cand,
3525                    comp_cost cost, bitmap inv_vars, tree value,
3526                    enum tree_code comp, bitmap inv_exprs)
3527 {
3528   unsigned i, s;
3529
3530   if (cost.infinite_cost_p ())
3531     {
3532       BITMAP_FREE (inv_vars);
3533       BITMAP_FREE (inv_exprs);
3534       return;
3535     }
3536
3537   if (data->consider_all_candidates)
3538     {
3539       group->cost_map[cand->id].cand = cand;
3540       group->cost_map[cand->id].cost = cost;
3541       group->cost_map[cand->id].inv_vars = inv_vars;
3542       group->cost_map[cand->id].inv_exprs = inv_exprs;
3543       group->cost_map[cand->id].value = value;
3544       group->cost_map[cand->id].comp = comp;
3545       return;
3546     }
3547
3548   /* n_map_members is a power of two, so this computes modulo.  */
3549   s = cand->id & (group->n_map_members - 1);
3550   for (i = s; i < group->n_map_members; i++)
3551     if (!group->cost_map[i].cand)
3552       goto found;
3553   for (i = 0; i < s; i++)
3554     if (!group->cost_map[i].cand)
3555       goto found;
3556
3557   gcc_unreachable ();
3558
3559 found:
3560   group->cost_map[i].cand = cand;
3561   group->cost_map[i].cost = cost;
3562   group->cost_map[i].inv_vars = inv_vars;
3563   group->cost_map[i].inv_exprs = inv_exprs;
3564   group->cost_map[i].value = value;
3565   group->cost_map[i].comp = comp;
3566 }
3567
3568 /* Gets cost of (GROUP, CAND) pair.  */
3569
3570 static struct cost_pair *
3571 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3572                    struct iv_cand *cand)
3573 {
3574   unsigned i, s;
3575   struct cost_pair *ret;
3576
3577   if (!cand)
3578     return NULL;
3579
3580   if (data->consider_all_candidates)
3581     {
3582       ret = group->cost_map + cand->id;
3583       if (!ret->cand)
3584         return NULL;
3585
3586       return ret;
3587     }
3588
3589   /* n_map_members is a power of two, so this computes modulo.  */
3590   s = cand->id & (group->n_map_members - 1);
3591   for (i = s; i < group->n_map_members; i++)
3592     if (group->cost_map[i].cand == cand)
3593       return group->cost_map + i;
3594     else if (group->cost_map[i].cand == NULL)
3595       return NULL;
3596   for (i = 0; i < s; i++)
3597     if (group->cost_map[i].cand == cand)
3598       return group->cost_map + i;
3599     else if (group->cost_map[i].cand == NULL)
3600       return NULL;
3601
3602   return NULL;
3603 }
3604
3605 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3606 static rtx
3607 produce_memory_decl_rtl (tree obj, int *regno)
3608 {
3609   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3610   machine_mode address_mode = targetm.addr_space.address_mode (as);
3611   rtx x;
3612
3613   gcc_assert (obj);
3614   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3615     {
3616       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3617       x = gen_rtx_SYMBOL_REF (address_mode, name);
3618       SET_SYMBOL_REF_DECL (x, obj);
3619       x = gen_rtx_MEM (DECL_MODE (obj), x);
3620       set_mem_addr_space (x, as);
3621       targetm.encode_section_info (obj, x, true);
3622     }
3623   else
3624     {
3625       x = gen_raw_REG (address_mode, (*regno)++);
3626       x = gen_rtx_MEM (DECL_MODE (obj), x);
3627       set_mem_addr_space (x, as);
3628     }
3629
3630   return x;
3631 }
3632
3633 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3634    walk_tree.  DATA contains the actual fake register number.  */
3635
3636 static tree
3637 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3638 {
3639   tree obj = NULL_TREE;
3640   rtx x = NULL_RTX;
3641   int *regno = (int *) data;
3642
3643   switch (TREE_CODE (*expr_p))
3644     {
3645     case ADDR_EXPR:
3646       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3647            handled_component_p (*expr_p);
3648            expr_p = &TREE_OPERAND (*expr_p, 0))
3649         continue;
3650       obj = *expr_p;
3651       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3652         x = produce_memory_decl_rtl (obj, regno);
3653       break;
3654
3655     case SSA_NAME:
3656       *ws = 0;
3657       obj = SSA_NAME_VAR (*expr_p);
3658       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3659       if (!obj)
3660         return NULL_TREE;
3661       if (!DECL_RTL_SET_P (obj))
3662         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3663       break;
3664
3665     case VAR_DECL:
3666     case PARM_DECL:
3667     case RESULT_DECL:
3668       *ws = 0;
3669       obj = *expr_p;
3670
3671       if (DECL_RTL_SET_P (obj))
3672         break;
3673
3674       if (DECL_MODE (obj) == BLKmode)
3675         x = produce_memory_decl_rtl (obj, regno);
3676       else
3677         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3678
3679       break;
3680
3681     default:
3682       break;
3683     }
3684
3685   if (x)
3686     {
3687       decl_rtl_to_reset.safe_push (obj);
3688       SET_DECL_RTL (obj, x);
3689     }
3690
3691   return NULL_TREE;
3692 }
3693
3694 /* Determines cost of the computation of EXPR.  */
3695
3696 static unsigned
3697 computation_cost (tree expr, bool speed)
3698 {
3699   rtx_insn *seq;
3700   rtx rslt;
3701   tree type = TREE_TYPE (expr);
3702   unsigned cost;
3703   /* Avoid using hard regs in ways which may be unsupported.  */
3704   int regno = LAST_VIRTUAL_REGISTER + 1;
3705   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3706   enum node_frequency real_frequency = node->frequency;
3707
3708   node->frequency = NODE_FREQUENCY_NORMAL;
3709   crtl->maybe_hot_insn_p = speed;
3710   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3711   start_sequence ();
3712   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3713   seq = get_insns ();
3714   end_sequence ();
3715   default_rtl_profile ();
3716   node->frequency = real_frequency;
3717
3718   cost = seq_cost (seq, speed);
3719   if (MEM_P (rslt))
3720     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3721                           TYPE_ADDR_SPACE (type), speed);
3722   else if (!REG_P (rslt))
3723     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3724
3725   return cost;
3726 }
3727
3728 /* Returns variable containing the value of candidate CAND at statement AT.  */
3729
3730 static tree
3731 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3732 {
3733   if (stmt_after_increment (loop, cand, stmt))
3734     return cand->var_after;
3735   else
3736     return cand->var_before;
3737 }
3738
3739 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3740    same precision that is at least as wide as the precision of TYPE, stores
3741    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3742    type of A and B.  */
3743
3744 static tree
3745 determine_common_wider_type (tree *a, tree *b)
3746 {
3747   tree wider_type = NULL;
3748   tree suba, subb;
3749   tree atype = TREE_TYPE (*a);
3750
3751   if (CONVERT_EXPR_P (*a))
3752     {
3753       suba = TREE_OPERAND (*a, 0);
3754       wider_type = TREE_TYPE (suba);
3755       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3756         return atype;
3757     }
3758   else
3759     return atype;
3760
3761   if (CONVERT_EXPR_P (*b))
3762     {
3763       subb = TREE_OPERAND (*b, 0);
3764       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3765         return atype;
3766     }
3767   else
3768     return atype;
3769
3770   *a = suba;
3771   *b = subb;
3772   return wider_type;
3773 }
3774
3775 /* Determines the expression by that USE is expressed from induction variable
3776    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3777    decomposed form.  The invariant part is stored in AFF_INV; while variant
3778    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3779    non-null.  Returns false if USE cannot be expressed using CAND.  */
3780
3781 static bool
3782 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3783                        struct iv_cand *cand, struct aff_tree *aff_inv,
3784                        struct aff_tree *aff_var, widest_int *prat = NULL)
3785 {
3786   tree ubase = use->iv->base, ustep = use->iv->step;
3787   tree cbase = cand->iv->base, cstep = cand->iv->step;
3788   tree common_type, uutype, var, cstep_common;
3789   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3790   aff_tree aff_cbase;
3791   widest_int rat;
3792
3793   /* We must have a precision to express the values of use.  */
3794   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3795     return false;
3796
3797   var = var_at_stmt (loop, cand, at);
3798   uutype = unsigned_type_for (utype);
3799
3800   /* If the conversion is not noop, perform it.  */
3801   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3802     {
3803       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3804           && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3805         {
3806           tree inner_base, inner_step, inner_type;
3807           inner_base = TREE_OPERAND (cbase, 0);
3808           if (CONVERT_EXPR_P (cstep))
3809             inner_step = TREE_OPERAND (cstep, 0);
3810           else
3811             inner_step = cstep;
3812
3813           inner_type = TREE_TYPE (inner_base);
3814           /* If candidate is added from a biv whose type is smaller than
3815              ctype, we know both candidate and the biv won't overflow.
3816              In this case, it's safe to skip the convertion in candidate.
3817              As an example, (unsigned short)((unsigned long)A) equals to
3818              (unsigned short)A, if A has a type no larger than short.  */
3819           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3820             {
3821               cbase = inner_base;
3822               cstep = inner_step;
3823             }
3824         }
3825       cbase = fold_convert (uutype, cbase);
3826       cstep = fold_convert (uutype, cstep);
3827       var = fold_convert (uutype, var);
3828     }
3829
3830   /* Ratio is 1 when computing the value of biv cand by itself.
3831      We can't rely on constant_multiple_of in this case because the
3832      use is created after the original biv is selected.  The call
3833      could fail because of inconsistent fold behavior.  See PR68021
3834      for more information.  */
3835   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3836     {
3837       gcc_assert (is_gimple_assign (use->stmt));
3838       gcc_assert (use->iv->ssa_name == cand->var_after);
3839       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3840       rat = 1;
3841     }
3842   else if (!constant_multiple_of (ustep, cstep, &rat))
3843     return false;
3844
3845   if (prat)
3846     *prat = rat;
3847
3848   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3849      type, we achieve better folding by computing their difference in this
3850      wider type, and cast the result to UUTYPE.  We do not need to worry about
3851      overflows, as all the arithmetics will in the end be performed in UUTYPE
3852      anyway.  */
3853   common_type = determine_common_wider_type (&ubase, &cbase);
3854
3855   /* use = ubase - ratio * cbase + ratio * var.  */
3856   tree_to_aff_combination (ubase, common_type, aff_inv);
3857   tree_to_aff_combination (cbase, common_type, &aff_cbase);
3858   tree_to_aff_combination (var, uutype, aff_var);
3859
3860   /* We need to shift the value if we are after the increment.  */
3861   if (stmt_after_increment (loop, cand, at))
3862     {
3863       aff_tree cstep_aff;
3864
3865       if (common_type != uutype)
3866         cstep_common = fold_convert (common_type, cstep);
3867       else
3868         cstep_common = cstep;
3869
3870       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3871       aff_combination_add (&aff_cbase, &cstep_aff);
3872     }
3873
3874   aff_combination_scale (&aff_cbase, -rat);
3875   aff_combination_add (aff_inv, &aff_cbase);
3876   if (common_type != uutype)
3877     aff_combination_convert (aff_inv, uutype);
3878
3879   aff_combination_scale (aff_var, rat);
3880   return true;
3881 }
3882
3883 /* Determines the expression by that USE is expressed from induction variable
3884    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3885    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3886
3887 static bool
3888 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3889                      struct iv_cand *cand, struct aff_tree *aff)
3890 {
3891   aff_tree aff_var;
3892
3893   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3894     return false;
3895
3896   aff_combination_add (aff, &aff_var);
3897   return true;
3898 }
3899
3900 /* Return the type of USE.  */
3901
3902 static tree
3903 get_use_type (struct iv_use *use)
3904 {
3905   tree base_type = TREE_TYPE (use->iv->base);
3906   tree type;
3907
3908   if (use->type == USE_ADDRESS)
3909     {
3910       /* The base_type may be a void pointer.  Create a pointer type based on
3911          the mem_ref instead.  */
3912       type = build_pointer_type (TREE_TYPE (*use->op_p));
3913       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3914                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3915     }
3916   else
3917     type = base_type;
3918
3919   return type;
3920 }
3921
3922 /* Determines the expression by that USE is expressed from induction variable
3923    CAND at statement AT in LOOP.  The computation is unshared.  */
3924
3925 static tree
3926 get_computation_at (struct loop *loop, gimple *at,
3927                     struct iv_use *use, struct iv_cand *cand)
3928 {
3929   aff_tree aff;
3930   tree type = get_use_type (use);
3931
3932   if (!get_computation_aff (loop, at, use, cand, &aff))
3933     return NULL_TREE;
3934   unshare_aff_combination (&aff);
3935   return fold_convert (type, aff_combination_to_tree (&aff));
3936 }
3937
3938 /* Adjust the cost COST for being in loop setup rather than loop body.
3939    If we're optimizing for space, the loop setup overhead is constant;
3940    if we're optimizing for speed, amortize it over the per-iteration cost.
3941    If ROUND_UP_P is true, the result is round up rather than to zero when
3942    optimizing for speed.  */
3943 static unsigned
3944 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
3945                    bool round_up_p = false)
3946 {
3947   if (cost == INFTY)
3948     return cost;
3949   else if (optimize_loop_for_speed_p (data->current_loop))
3950     {
3951       HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
3952       return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
3953     }
3954   else
3955     return cost;
3956 }
3957
3958 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3959    EXPR operand holding the shift.  COST0 and COST1 are the costs for
3960    calculating the operands of EXPR.  Returns true if successful, and returns
3961    the cost in COST.  */
3962
3963 static bool
3964 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3965                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3966 {
3967   comp_cost res;
3968   tree op1 = TREE_OPERAND (expr, 1);
3969   tree cst = TREE_OPERAND (mult, 1);
3970   tree multop = TREE_OPERAND (mult, 0);
3971   int m = exact_log2 (int_cst_value (cst));
3972   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3973   int as_cost, sa_cost;
3974   bool mult_in_op1;
3975
3976   if (!(m >= 0 && m < maxm))
3977     return false;
3978
3979   STRIP_NOPS (op1);
3980   mult_in_op1 = operand_equal_p (op1, mult, 0);
3981
3982   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3983
3984   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3985      use that in preference to a shift insn followed by an add insn.  */
3986   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3987              ? shiftadd_cost (speed, mode, m)
3988              : (mult_in_op1
3989                 ? shiftsub1_cost (speed, mode, m)
3990                 : shiftsub0_cost (speed, mode, m)));
3991
3992   res = comp_cost (MIN (as_cost, sa_cost), 0);
3993   res += (mult_in_op1 ? cost0 : cost1);
3994
3995   STRIP_NOPS (multop);
3996   if (!is_gimple_val (multop))
3997     res += force_expr_to_var_cost (multop, speed);
3998
3999   *cost = res;
4000   return true;
4001 }
4002
4003 /* Estimates cost of forcing expression EXPR into a variable.  */
4004
4005 static comp_cost
4006 force_expr_to_var_cost (tree expr, bool speed)
4007 {
4008   static bool costs_initialized = false;
4009   static unsigned integer_cost [2];
4010   static unsigned symbol_cost [2];
4011   static unsigned address_cost [2];
4012   tree op0, op1;
4013   comp_cost cost0, cost1, cost;
4014   machine_mode mode;
4015
4016   if (!costs_initialized)
4017     {
4018       tree type = build_pointer_type (integer_type_node);
4019       tree var, addr;
4020       rtx x;
4021       int i;
4022
4023       var = create_tmp_var_raw (integer_type_node, "test_var");
4024       TREE_STATIC (var) = 1;
4025       x = produce_memory_decl_rtl (var, NULL);
4026       SET_DECL_RTL (var, x);
4027
4028       addr = build1 (ADDR_EXPR, type, var);
4029
4030
4031       for (i = 0; i < 2; i++)
4032         {
4033           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4034                                                              2000), i);
4035
4036           symbol_cost[i] = computation_cost (addr, i) + 1;
4037
4038           address_cost[i]
4039             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4040           if (dump_file && (dump_flags & TDF_DETAILS))
4041             {
4042               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4043               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4044               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4045               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4046               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4047               fprintf (dump_file, "\n");
4048             }
4049         }
4050
4051       costs_initialized = true;
4052     }
4053
4054   STRIP_NOPS (expr);
4055
4056   if (SSA_VAR_P (expr))
4057     return no_cost;
4058
4059   if (is_gimple_min_invariant (expr))
4060     {
4061       if (TREE_CODE (expr) == INTEGER_CST)
4062         return comp_cost (integer_cost [speed], 0);
4063
4064       if (TREE_CODE (expr) == ADDR_EXPR)
4065         {
4066           tree obj = TREE_OPERAND (expr, 0);
4067
4068           if (VAR_P (obj)
4069               || TREE_CODE (obj) == PARM_DECL
4070               || TREE_CODE (obj) == RESULT_DECL)
4071             return comp_cost (symbol_cost [speed], 0);
4072         }
4073
4074       return comp_cost (address_cost [speed], 0);
4075     }
4076
4077   switch (TREE_CODE (expr))
4078     {
4079     case POINTER_PLUS_EXPR:
4080     case PLUS_EXPR:
4081     case MINUS_EXPR:
4082     case MULT_EXPR:
4083     case TRUNC_DIV_EXPR:
4084     case BIT_AND_EXPR:
4085     case BIT_IOR_EXPR:
4086     case LSHIFT_EXPR:
4087     case RSHIFT_EXPR:
4088       op0 = TREE_OPERAND (expr, 0);
4089       op1 = TREE_OPERAND (expr, 1);
4090       STRIP_NOPS (op0);
4091       STRIP_NOPS (op1);
4092       break;
4093
4094     CASE_CONVERT:
4095     case NEGATE_EXPR:
4096     case BIT_NOT_EXPR:
4097       op0 = TREE_OPERAND (expr, 0);
4098       STRIP_NOPS (op0);
4099       op1 = NULL_TREE;
4100       break;
4101
4102     default:
4103       /* Just an arbitrary value, FIXME.  */
4104       return comp_cost (target_spill_cost[speed], 0);
4105     }
4106
4107   if (op0 == NULL_TREE
4108       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4109     cost0 = no_cost;
4110   else
4111     cost0 = force_expr_to_var_cost (op0, speed);
4112
4113   if (op1 == NULL_TREE
4114       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4115     cost1 = no_cost;
4116   else
4117     cost1 = force_expr_to_var_cost (op1, speed);
4118
4119   mode = TYPE_MODE (TREE_TYPE (expr));
4120   switch (TREE_CODE (expr))
4121     {
4122     case POINTER_PLUS_EXPR:
4123     case PLUS_EXPR:
4124     case MINUS_EXPR:
4125     case NEGATE_EXPR:
4126       cost = comp_cost (add_cost (speed, mode), 0);
4127       if (TREE_CODE (expr) != NEGATE_EXPR)
4128         {
4129           tree mult = NULL_TREE;
4130           comp_cost sa_cost;
4131           if (TREE_CODE (op1) == MULT_EXPR)
4132             mult = op1;
4133           else if (TREE_CODE (op0) == MULT_EXPR)
4134             mult = op0;
4135
4136           if (mult != NULL_TREE
4137               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4138               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4139                                     speed, &sa_cost))
4140             return sa_cost;
4141         }
4142       break;
4143
4144     CASE_CONVERT:
4145       {
4146         tree inner_mode, outer_mode;
4147         outer_mode = TREE_TYPE (expr);
4148         inner_mode = TREE_TYPE (op0);
4149         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4150                                        TYPE_MODE (inner_mode), speed), 0);
4151       }
4152       break;
4153
4154     case MULT_EXPR:
4155       if (cst_and_fits_in_hwi (op0))
4156         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4157                                              mode, speed), 0);
4158       else if (cst_and_fits_in_hwi (op1))
4159         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4160                                              mode, speed), 0);
4161       else
4162         return comp_cost (target_spill_cost [speed], 0);
4163       break;
4164
4165     case TRUNC_DIV_EXPR:
4166       /* Division by power of two is usually cheap, so we allow it.  Forbid
4167          anything else.  */
4168       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4169         cost = comp_cost (add_cost (speed, mode), 0);
4170       else
4171         cost = comp_cost (target_spill_cost[speed], 0);
4172       break;
4173
4174     case BIT_AND_EXPR:
4175     case BIT_IOR_EXPR:
4176     case BIT_NOT_EXPR:
4177     case LSHIFT_EXPR:
4178     case RSHIFT_EXPR:
4179       cost = comp_cost (add_cost (speed, mode), 0);
4180       break;
4181
4182     default:
4183       gcc_unreachable ();
4184     }
4185
4186   cost += cost0;
4187   cost += cost1;
4188   return cost;
4189 }
4190
4191 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4192    invariants the computation depends on.  */
4193
4194 static comp_cost
4195 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4196 {
4197   if (!expr)
4198     return no_cost;
4199
4200   find_inv_vars (data, &expr, inv_vars);
4201   return force_expr_to_var_cost (expr, data->speed);
4202 }
4203
4204 /* Returns cost of auto-modifying address expression in shape base + offset.
4205    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4206    address expression.  The address expression has ADDR_MODE in addr space
4207    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4208    speed or size.  */
4209
4210 enum ainc_type
4211 {
4212   AINC_PRE_INC,         /* Pre increment.  */
4213   AINC_PRE_DEC,         /* Pre decrement.  */
4214   AINC_POST_INC,        /* Post increment.  */
4215   AINC_POST_DEC,        /* Post decrement.  */
4216   AINC_NONE             /* Also the number of auto increment types.  */
4217 };
4218
4219 struct ainc_cost_data
4220 {
4221   unsigned costs[AINC_NONE];
4222 };
4223
4224 static comp_cost
4225 get_address_cost_ainc (HOST_WIDE_INT ainc_step, HOST_WIDE_INT ainc_offset,
4226                        machine_mode addr_mode, machine_mode mem_mode,
4227                        addr_space_t as, bool speed)
4228 {
4229   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4230       && !USE_STORE_PRE_DECREMENT (mem_mode)
4231       && !USE_LOAD_POST_DECREMENT (mem_mode)
4232       && !USE_STORE_POST_DECREMENT (mem_mode)
4233       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4234       && !USE_STORE_PRE_INCREMENT (mem_mode)
4235       && !USE_LOAD_POST_INCREMENT (mem_mode)
4236       && !USE_STORE_POST_INCREMENT (mem_mode))
4237     return infinite_cost;
4238
4239   static vec<ainc_cost_data *> ainc_cost_data_list;
4240   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4241   if (idx >= ainc_cost_data_list.length ())
4242     {
4243       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4244
4245       gcc_assert (nsize > idx);
4246       ainc_cost_data_list.safe_grow_cleared (nsize);
4247     }
4248
4249   ainc_cost_data *data = ainc_cost_data_list[idx];
4250   if (data == NULL)
4251     {
4252       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4253
4254       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4255       data->costs[AINC_PRE_DEC] = INFTY;
4256       data->costs[AINC_POST_DEC] = INFTY;
4257       data->costs[AINC_PRE_INC] = INFTY;
4258       data->costs[AINC_POST_INC] = INFTY;
4259       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4260           || USE_STORE_PRE_DECREMENT (mem_mode))
4261         {
4262           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4263
4264           if (memory_address_addr_space_p (mem_mode, addr, as))
4265             data->costs[AINC_PRE_DEC]
4266               = address_cost (addr, mem_mode, as, speed);
4267         }
4268       if (USE_LOAD_POST_DECREMENT (mem_mode)
4269           || USE_STORE_POST_DECREMENT (mem_mode))
4270         {
4271           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4272
4273           if (memory_address_addr_space_p (mem_mode, addr, as))
4274             data->costs[AINC_POST_DEC]
4275               = address_cost (addr, mem_mode, as, speed);
4276         }
4277       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4278           || USE_STORE_PRE_INCREMENT (mem_mode))
4279         {
4280           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4281
4282           if (memory_address_addr_space_p (mem_mode, addr, as))
4283             data->costs[AINC_PRE_INC]
4284               = address_cost (addr, mem_mode, as, speed);
4285         }
4286       if (USE_LOAD_POST_INCREMENT (mem_mode)
4287           || USE_STORE_POST_INCREMENT (mem_mode))
4288         {
4289           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4290
4291           if (memory_address_addr_space_p (mem_mode, addr, as))
4292             data->costs[AINC_POST_INC]
4293               = address_cost (addr, mem_mode, as, speed);
4294         }
4295       ainc_cost_data_list[idx] = data;
4296     }
4297
4298   HOST_WIDE_INT msize = GET_MODE_SIZE (mem_mode);
4299   if (ainc_offset == 0 && msize == ainc_step)
4300     return comp_cost (data->costs[AINC_POST_INC], 0);
4301   if (ainc_offset == 0 && msize == -ainc_step)
4302     return comp_cost (data->costs[AINC_POST_DEC], 0);
4303   if (ainc_offset == msize && msize == ainc_step)
4304     return comp_cost (data->costs[AINC_PRE_INC], 0);
4305   if (ainc_offset == -msize && msize == -ainc_step)
4306     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4307
4308   return infinite_cost;
4309 }
4310
4311 /* Return cost of computing USE's address expression by using CAND.
4312    AFF_INV and AFF_VAR represent invariant and variant parts of the
4313    address expression, respectively.  If AFF_INV is simple, store
4314    the loop invariant variables which are depended by it in INV_VARS;
4315    if AFF_INV is complicated, handle it as a new invariant expression
4316    and record it in INV_EXPR.  RATIO indicates multiple times between
4317    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4318    value to it indicating if this is an auto-increment address.  */
4319
4320 static comp_cost
4321 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4322                   struct iv_cand *cand, aff_tree *aff_inv,
4323                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4324                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4325                   bool *can_autoinc, bool speed)
4326 {
4327   rtx addr;
4328   bool simple_inv = true;
4329   tree comp_inv = NULL_TREE, type = aff_var->type;
4330   comp_cost var_cost = no_cost, cost = no_cost;
4331   struct mem_address parts = {NULL_TREE, integer_one_node,
4332                               NULL_TREE, NULL_TREE, NULL_TREE};
4333   machine_mode addr_mode = TYPE_MODE (type);
4334   machine_mode mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
4335   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4336
4337   if (!aff_combination_const_p (aff_inv))
4338     {
4339       parts.index = integer_one_node;
4340       /* Addressing mode "base + index".  */
4341       if (valid_mem_ref_p (mem_mode, as, &parts))
4342         {
4343           parts.step = wide_int_to_tree (type, ratio);
4344           /* Addressing mode "base + index << scale".  */
4345           if (ratio != 1 && !valid_mem_ref_p (mem_mode, as, &parts))
4346             parts.step = NULL_TREE;
4347
4348           if (aff_inv->offset != 0)
4349             {
4350               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4351               /* Addressing mode "base + index [<< scale] + offset".  */
4352               if (!valid_mem_ref_p (mem_mode, as, &parts))
4353                 parts.offset = NULL_TREE;
4354               else
4355                 aff_inv->offset = 0;
4356             }
4357
4358           move_fixed_address_to_symbol (&parts, aff_inv);
4359           /* Base is fixed address and is moved to symbol part.  */
4360           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4361             parts.base = NULL_TREE;
4362
4363           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4364           if (parts.symbol != NULL_TREE
4365               && !valid_mem_ref_p (mem_mode, as, &parts))
4366             {
4367               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4368               parts.symbol = NULL_TREE;
4369               /* Reset SIMPLE_INV since symbol address needs to be computed
4370                  outside of address expression in this case.  */
4371               simple_inv = false;
4372               /* Symbol part is moved back to base part, it can't be NULL.  */
4373               parts.base = integer_one_node;
4374             }
4375         }
4376       else
4377         parts.index = NULL_TREE;
4378     }
4379   else
4380     {
4381       if (can_autoinc && ratio == 1 && cst_and_fits_in_hwi (cand->iv->step))
4382         {
4383           HOST_WIDE_INT ainc_step = int_cst_value (cand->iv->step);
4384           HOST_WIDE_INT ainc_offset = (aff_inv->offset).to_shwi ();
4385
4386           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4387             ainc_offset += ainc_step;
4388           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4389                                         addr_mode, mem_mode, as, speed);
4390           if (!cost.infinite_cost_p ())
4391             {
4392               *can_autoinc = true;
4393               return cost;
4394             }
4395           cost = no_cost;
4396         }
4397       if (!aff_combination_zero_p (aff_inv))
4398         {
4399           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4400           /* Addressing mode "base + offset".  */
4401           if (!valid_mem_ref_p (mem_mode, as, &parts))
4402             parts.offset = NULL_TREE;
4403           else
4404             aff_inv->offset = 0;
4405         }
4406     }
4407
4408   if (simple_inv)
4409     simple_inv = (aff_inv == NULL
4410                   || aff_combination_const_p (aff_inv)
4411                   || aff_combination_singleton_var_p (aff_inv));
4412   if (!aff_combination_zero_p (aff_inv))
4413     comp_inv = aff_combination_to_tree (aff_inv);
4414   if (comp_inv != NULL_TREE)
4415     cost = force_var_cost (data, comp_inv, inv_vars);
4416   if (ratio != 1 && parts.step == NULL_TREE)
4417     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4418   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4419     var_cost += add_cost (speed, addr_mode);
4420
4421   if (comp_inv && inv_expr && !simple_inv)
4422     {
4423       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4424       /* Clear depends on.  */
4425       if (*inv_expr != NULL && inv_vars && *inv_vars)
4426         bitmap_clear (*inv_vars);
4427
4428       /* Cost of small invariant expression adjusted against loop niters
4429          is usually zero, which makes it difficult to be differentiated
4430          from candidate based on loop invariant variables.  Secondly, the
4431          generated invariant expression may not be hoisted out of loop by
4432          following pass.  We penalize the cost by rounding up in order to
4433          neutralize such effects.  */
4434       cost.cost = adjust_setup_cost (data, cost.cost, true);
4435       cost.scratch = cost.cost;
4436     }
4437
4438   cost += var_cost;
4439   addr = addr_for_mem_ref (&parts, as, false);
4440   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4441   cost += address_cost (addr, mem_mode, as, speed);
4442
4443   if (parts.symbol != NULL_TREE)
4444     cost.complexity += 1;
4445   if (parts.step != NULL_TREE && !integer_onep (parts.step))
4446     cost.complexity += 1;
4447   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4448     cost.complexity += 1;
4449   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4450     cost.complexity += 1;
4451
4452   return cost;
4453 }
4454
4455 /* Scale (multiply) the computed COST (except scratch part that should be
4456    hoisted out a loop) by header->frequency / AT->frequency, which makes
4457    expected cost more accurate.  */
4458
4459 static comp_cost
4460 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4461 {
4462    int loop_freq = data->current_loop->header->frequency;
4463    int bb_freq = gimple_bb (at)->frequency;
4464    if (loop_freq != 0)
4465      {
4466        gcc_assert (cost.scratch <= cost.cost);
4467        int scaled_cost
4468          = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4469
4470        if (dump_file && (dump_flags & TDF_DETAILS))
4471          fprintf (dump_file, "Scaling cost based on bb prob "
4472                   "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4473                   1.0f * bb_freq / loop_freq, cost.cost,
4474                   cost.scratch, scaled_cost, bb_freq, loop_freq);
4475
4476        cost.cost = scaled_cost;
4477      }
4478
4479   return cost;
4480 }
4481
4482 /* Determines the cost of the computation by that USE is expressed
4483    from induction variable CAND.  If ADDRESS_P is true, we just need
4484    to create an address from it, otherwise we want to get it into
4485    register.  A set of invariants we depend on is stored in INV_VARS.
4486    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4487    addressing is likely.  If INV_EXPR is nonnull, record invariant
4488    expr entry in it.  */
4489
4490 static comp_cost
4491 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4492                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4493                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4494 {
4495   gimple *at = use->stmt;
4496   tree ubase = use->iv->base, cbase = cand->iv->base;
4497   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4498   tree comp_inv = NULL_TREE;
4499   HOST_WIDE_INT ratio, aratio;
4500   comp_cost cost;
4501   widest_int rat;
4502   aff_tree aff_inv, aff_var;
4503   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4504
4505   if (inv_vars)
4506     *inv_vars = NULL;
4507   if (can_autoinc)
4508     *can_autoinc = false;
4509   if (inv_expr)
4510     *inv_expr = NULL;
4511
4512   /* Check if we have enough precision to express the values of use.  */
4513   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4514     return infinite_cost;
4515
4516   if (address_p
4517       || (use->iv->base_object
4518           && cand->iv->base_object
4519           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4520           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4521     {
4522       /* Do not try to express address of an object with computation based
4523          on address of a different object.  This may cause problems in rtl
4524          level alias analysis (that does not expect this to be happening,
4525          as this is illegal in C), and would be unlikely to be useful
4526          anyway.  */
4527       if (use->iv->base_object
4528           && cand->iv->base_object
4529           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4530         return infinite_cost;
4531     }
4532
4533   if (!get_computation_aff_1 (data->current_loop, at, use,
4534                               cand, &aff_inv, &aff_var, &rat)
4535       || !wi::fits_shwi_p (rat))
4536     return infinite_cost;
4537
4538   ratio = rat.to_shwi ();
4539   if (address_p)
4540     {
4541       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4542                                inv_vars, inv_expr, can_autoinc, speed);
4543       return get_scaled_computation_cost_at (data, at, cost);
4544     }
4545
4546   bool simple_inv = (aff_combination_const_p (&aff_inv)
4547                      || aff_combination_singleton_var_p (&aff_inv));
4548   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4549   aff_combination_convert (&aff_inv, signed_type);
4550   if (!aff_combination_zero_p (&aff_inv))
4551     comp_inv = aff_combination_to_tree (&aff_inv);
4552
4553   cost = force_var_cost (data, comp_inv, inv_vars);
4554   if (comp_inv && inv_expr && !simple_inv)
4555     {
4556       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4557       /* Clear depends on.  */
4558       if (*inv_expr != NULL && inv_vars && *inv_vars)
4559         bitmap_clear (*inv_vars);
4560
4561       cost.cost = adjust_setup_cost (data, cost.cost);
4562       /* Record setup cost in scratch field.  */
4563       cost.scratch = cost.cost;
4564     }
4565   /* Cost of constant integer can be covered when adding invariant part to
4566      variant part.  */
4567   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4568     cost = no_cost;
4569
4570   /* Need type narrowing to represent use with cand.  */
4571   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4572     {
4573       machine_mode outer_mode = TYPE_MODE (utype);
4574       machine_mode inner_mode = TYPE_MODE (ctype);
4575       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4576     }
4577
4578   /* Turn a + i * (-c) into a - i * c.  */
4579   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4580     aratio = -ratio;
4581   else
4582     aratio = ratio;
4583
4584   if (ratio != 1)
4585     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4586
4587   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4588      instruction.  */
4589   /* Need to add up the invariant and variant parts.  */
4590   if (comp_inv && !integer_zerop (comp_inv))
4591     cost += add_cost (speed, TYPE_MODE (utype));
4592
4593   return get_scaled_computation_cost_at (data, at, cost);
4594 }
4595
4596 /* Determines cost of computing the use in GROUP with CAND in a generic
4597    expression.  */
4598
4599 static bool
4600 determine_group_iv_cost_generic (struct ivopts_data *data,
4601                                  struct iv_group *group, struct iv_cand *cand)
4602 {
4603   comp_cost cost;
4604   iv_inv_expr_ent *inv_expr = NULL;
4605   bitmap inv_vars = NULL, inv_exprs = NULL;
4606   struct iv_use *use = group->vuses[0];
4607
4608   /* The simple case first -- if we need to express value of the preserved
4609      original biv, the cost is 0.  This also prevents us from counting the
4610      cost of increment twice -- once at this use and once in the cost of
4611      the candidate.  */
4612   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4613     cost = no_cost;
4614   else
4615     cost = get_computation_cost (data, use, cand, false,
4616                                  &inv_vars, NULL, &inv_expr);
4617
4618   if (inv_expr)
4619     {
4620       inv_exprs = BITMAP_ALLOC (NULL);
4621       bitmap_set_bit (inv_exprs, inv_expr->id);
4622     }
4623   set_group_iv_cost (data, group, cand, cost, inv_vars,
4624                      NULL_TREE, ERROR_MARK, inv_exprs);
4625   return !cost.infinite_cost_p ();
4626 }
4627
4628 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4629
4630 static bool
4631 determine_group_iv_cost_address (struct ivopts_data *data,
4632                                  struct iv_group *group, struct iv_cand *cand)
4633 {
4634   unsigned i;
4635   bitmap inv_vars = NULL, inv_exprs = NULL;
4636   bool can_autoinc;
4637   iv_inv_expr_ent *inv_expr = NULL;
4638   struct iv_use *use = group->vuses[0];
4639   comp_cost sum_cost = no_cost, cost;
4640
4641   cost = get_computation_cost (data, use, cand, true,
4642                                &inv_vars, &can_autoinc, &inv_expr);
4643
4644   if (inv_expr)
4645     {
4646       inv_exprs = BITMAP_ALLOC (NULL);
4647       bitmap_set_bit (inv_exprs, inv_expr->id);
4648     }
4649   sum_cost = cost;
4650   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4651     {
4652       if (can_autoinc)
4653         sum_cost -= cand->cost_step;
4654       /* If we generated the candidate solely for exploiting autoincrement
4655          opportunities, and it turns out it can't be used, set the cost to
4656          infinity to make sure we ignore it.  */
4657       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4658         sum_cost = infinite_cost;
4659     }
4660
4661   /* Uses in a group can share setup code, so only add setup cost once.  */
4662   cost -= cost.scratch;
4663   /* Compute and add costs for rest uses of this group.  */
4664   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4665     {
4666       struct iv_use *next = group->vuses[i];
4667
4668       /* TODO: We could skip computing cost for sub iv_use when it has the
4669          same cost as the first iv_use, but the cost really depends on the
4670          offset and where the iv_use is.  */
4671         cost = get_computation_cost (data, next, cand, true,
4672                                      NULL, &can_autoinc, &inv_expr);
4673         if (inv_expr)
4674           {
4675             if (!inv_exprs)
4676               inv_exprs = BITMAP_ALLOC (NULL);
4677
4678             bitmap_set_bit (inv_exprs, inv_expr->id);
4679           }
4680       sum_cost += cost;
4681     }
4682   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4683                      NULL_TREE, ERROR_MARK, inv_exprs);
4684
4685   return !sum_cost.infinite_cost_p ();
4686 }
4687
4688 /* Computes value of candidate CAND at position AT in iteration NITER, and
4689    stores it to VAL.  */
4690
4691 static void
4692 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4693                aff_tree *val)
4694 {
4695   aff_tree step, delta, nit;
4696   struct iv *iv = cand->iv;
4697   tree type = TREE_TYPE (iv->base);
4698   tree steptype;
4699   if (POINTER_TYPE_P (type))
4700     steptype = sizetype;
4701   else
4702     steptype = unsigned_type_for (type);
4703
4704   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4705   aff_combination_convert (&step, steptype);
4706   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4707   aff_combination_convert (&nit, steptype);
4708   aff_combination_mult (&nit, &step, &delta);
4709   if (stmt_after_increment (loop, cand, at))
4710     aff_combination_add (&delta, &step);
4711
4712   tree_to_aff_combination (iv->base, type, val);
4713   if (!POINTER_TYPE_P (type))
4714     aff_combination_convert (val, steptype);
4715   aff_combination_add (val, &delta);
4716 }
4717
4718 /* Returns period of induction variable iv.  */
4719
4720 static tree
4721 iv_period (struct iv *iv)
4722 {
4723   tree step = iv->step, period, type;
4724   tree pow2div;
4725
4726   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4727
4728   type = unsigned_type_for (TREE_TYPE (step));
4729   /* Period of the iv is lcm (step, type_range)/step -1,
4730      i.e., N*type_range/step - 1. Since type range is power
4731      of two, N == (step >> num_of_ending_zeros_binary (step),
4732      so the final result is
4733
4734        (type_range >> num_of_ending_zeros_binary (step)) - 1
4735
4736   */
4737   pow2div = num_ending_zeros (step);
4738
4739   period = build_low_bits_mask (type,
4740                                 (TYPE_PRECISION (type)
4741                                  - tree_to_uhwi (pow2div)));
4742
4743   return period;
4744 }
4745
4746 /* Returns the comparison operator used when eliminating the iv USE.  */
4747
4748 static enum tree_code
4749 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4750 {
4751   struct loop *loop = data->current_loop;
4752   basic_block ex_bb;
4753   edge exit;
4754
4755   ex_bb = gimple_bb (use->stmt);
4756   exit = EDGE_SUCC (ex_bb, 0);
4757   if (flow_bb_inside_loop_p (loop, exit->dest))
4758     exit = EDGE_SUCC (ex_bb, 1);
4759
4760   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4761 }
4762
4763 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4764    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4765    calculation is performed in non-wrapping type.
4766
4767    TODO: More generally, we could test for the situation that
4768          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4769          This would require knowing the sign of OFFSET.  */
4770
4771 static bool
4772 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4773 {
4774   enum tree_code code;
4775   tree e1, e2;
4776   aff_tree aff_e1, aff_e2, aff_offset;
4777
4778   if (!nowrap_type_p (TREE_TYPE (base)))
4779     return false;
4780
4781   base = expand_simple_operations (base);
4782
4783   if (TREE_CODE (base) == SSA_NAME)
4784     {
4785       gimple *stmt = SSA_NAME_DEF_STMT (base);
4786
4787       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4788         return false;
4789
4790       code = gimple_assign_rhs_code (stmt);
4791       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4792         return false;
4793
4794       e1 = gimple_assign_rhs1 (stmt);
4795       e2 = gimple_assign_rhs2 (stmt);
4796     }
4797   else
4798     {
4799       code = TREE_CODE (base);
4800       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4801         return false;
4802       e1 = TREE_OPERAND (base, 0);
4803       e2 = TREE_OPERAND (base, 1);
4804     }
4805
4806   /* Use affine expansion as deeper inspection to prove the equality.  */
4807   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4808                                   &aff_e2, &data->name_expansion_cache);
4809   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4810                                   &aff_offset, &data->name_expansion_cache);
4811   aff_combination_scale (&aff_offset, -1);
4812   switch (code)
4813     {
4814     case PLUS_EXPR:
4815       aff_combination_add (&aff_e2, &aff_offset);
4816       if (aff_combination_zero_p (&aff_e2))
4817         return true;
4818
4819       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4820                                       &aff_e1, &data->name_expansion_cache);
4821       aff_combination_add (&aff_e1, &aff_offset);
4822       return aff_combination_zero_p (&aff_e1);
4823
4824     case POINTER_PLUS_EXPR:
4825       aff_combination_add (&aff_e2, &aff_offset);
4826       return aff_combination_zero_p (&aff_e2);
4827
4828     default:
4829       return false;
4830     }
4831 }
4832
4833 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4834    comparison with CAND.  NITER describes the number of iterations of
4835    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4836
4837    We aim to handle the following situation:
4838
4839    sometype *base, *p;
4840    int a, b, i;
4841
4842    i = a;
4843    p = p_0 = base + a;
4844
4845    do
4846      {
4847        bla (*p);
4848        p++;
4849        i++;
4850      }
4851    while (i < b);
4852
4853    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4854    We aim to optimize this to
4855
4856    p = p_0 = base + a;
4857    do
4858      {
4859        bla (*p);
4860        p++;
4861      }
4862    while (p < p_0 - a + b);
4863
4864    This preserves the correctness, since the pointer arithmetics does not
4865    overflow.  More precisely:
4866
4867    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4868       overflow in computing it or the values of p.
4869    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4870       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4871
4872 static bool
4873 iv_elimination_compare_lt (struct ivopts_data *data,
4874                            struct iv_cand *cand, enum tree_code *comp_p,
4875                            struct tree_niter_desc *niter)
4876 {
4877   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4878   struct aff_tree nit, tmpa, tmpb;
4879   enum tree_code comp;
4880   HOST_WIDE_INT step;
4881
4882   /* We need to know that the candidate induction variable does not overflow.
4883      While more complex analysis may be used to prove this, for now just
4884      check that the variable appears in the original program and that it
4885      is computed in a type that guarantees no overflows.  */
4886   cand_type = TREE_TYPE (cand->iv->base);
4887   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4888     return false;
4889
4890   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4891      the calculation of the BOUND could overflow, making the comparison
4892      invalid.  */
4893   if (!data->loop_single_exit_p)
4894     return false;
4895
4896   /* We need to be able to decide whether candidate is increasing or decreasing
4897      in order to choose the right comparison operator.  */
4898   if (!cst_and_fits_in_hwi (cand->iv->step))
4899     return false;
4900   step = int_cst_value (cand->iv->step);
4901
4902   /* Check that the number of iterations matches the expected pattern:
4903      a + 1 > b ? 0 : b - a - 1.  */
4904   mbz = niter->may_be_zero;
4905   if (TREE_CODE (mbz) == GT_EXPR)
4906     {
4907       /* Handle a + 1 > b.  */
4908       tree op0 = TREE_OPERAND (mbz, 0);
4909       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4910         {
4911           a = TREE_OPERAND (op0, 0);
4912           b = TREE_OPERAND (mbz, 1);
4913         }
4914       else
4915         return false;
4916     }
4917   else if (TREE_CODE (mbz) == LT_EXPR)
4918     {
4919       tree op1 = TREE_OPERAND (mbz, 1);
4920
4921       /* Handle b < a + 1.  */
4922       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4923         {
4924           a = TREE_OPERAND (op1, 0);
4925           b = TREE_OPERAND (mbz, 0);
4926         }
4927       else
4928         return false;
4929     }
4930   else
4931     return false;
4932
4933   /* Expected number of iterations is B - A - 1.  Check that it matches
4934      the actual number, i.e., that B - A - NITER = 1.  */
4935   tree_to_aff_combination (niter->niter, nit_type, &nit);
4936   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4937   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4938   aff_combination_scale (&nit, -1);
4939   aff_combination_scale (&tmpa, -1);
4940   aff_combination_add (&tmpb, &tmpa);
4941   aff_combination_add (&tmpb, &nit);
4942   if (tmpb.n != 0 || tmpb.offset != 1)
4943     return false;
4944
4945   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4946      overflow.  */
4947   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4948                         cand->iv->step,
4949                         fold_convert (TREE_TYPE (cand->iv->step), a));
4950   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4951     return false;
4952
4953   /* Determine the new comparison operator.  */
4954   comp = step < 0 ? GT_EXPR : LT_EXPR;
4955   if (*comp_p == NE_EXPR)
4956     *comp_p = comp;
4957   else if (*comp_p == EQ_EXPR)
4958     *comp_p = invert_tree_comparison (comp, false);
4959   else
4960     gcc_unreachable ();
4961
4962   return true;
4963 }
4964
4965 /* Check whether it is possible to express the condition in USE by comparison
4966    of candidate CAND.  If so, store the value compared with to BOUND, and the
4967    comparison operator to COMP.  */
4968
4969 static bool
4970 may_eliminate_iv (struct ivopts_data *data,
4971                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4972                   enum tree_code *comp)
4973 {
4974   basic_block ex_bb;
4975   edge exit;
4976   tree period;
4977   struct loop *loop = data->current_loop;
4978   aff_tree bnd;
4979   struct tree_niter_desc *desc = NULL;
4980
4981   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4982     return false;
4983
4984   /* For now works only for exits that dominate the loop latch.
4985      TODO: extend to other conditions inside loop body.  */
4986   ex_bb = gimple_bb (use->stmt);
4987   if (use->stmt != last_stmt (ex_bb)
4988       || gimple_code (use->stmt) != GIMPLE_COND
4989       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4990     return false;
4991
4992   exit = EDGE_SUCC (ex_bb, 0);
4993   if (flow_bb_inside_loop_p (loop, exit->dest))
4994     exit = EDGE_SUCC (ex_bb, 1);
4995   if (flow_bb_inside_loop_p (loop, exit->dest))
4996     return false;
4997
4998   desc = niter_for_exit (data, exit);
4999   if (!desc)
5000     return false;
5001
5002   /* Determine whether we can use the variable to test the exit condition.
5003      This is the case iff the period of the induction variable is greater
5004      than the number of iterations for which the exit condition is true.  */
5005   period = iv_period (cand->iv);
5006
5007   /* If the number of iterations is constant, compare against it directly.  */
5008   if (TREE_CODE (desc->niter) == INTEGER_CST)
5009     {
5010       /* See cand_value_at.  */
5011       if (stmt_after_increment (loop, cand, use->stmt))
5012         {
5013           if (!tree_int_cst_lt (desc->niter, period))
5014             return false;
5015         }
5016       else
5017         {
5018           if (tree_int_cst_lt (period, desc->niter))
5019             return false;
5020         }
5021     }
5022
5023   /* If not, and if this is the only possible exit of the loop, see whether
5024      we can get a conservative estimate on the number of iterations of the
5025      entire loop and compare against that instead.  */
5026   else
5027     {
5028       widest_int period_value, max_niter;
5029
5030       max_niter = desc->max;
5031       if (stmt_after_increment (loop, cand, use->stmt))
5032         max_niter += 1;
5033       period_value = wi::to_widest (period);
5034       if (wi::gtu_p (max_niter, period_value))
5035         {
5036           /* See if we can take advantage of inferred loop bound
5037              information.  */
5038           if (data->loop_single_exit_p)
5039             {
5040               if (!max_loop_iterations (loop, &max_niter))
5041                 return false;
5042               /* The loop bound is already adjusted by adding 1.  */
5043               if (wi::gtu_p (max_niter, period_value))
5044                 return false;
5045             }
5046           else
5047             return false;
5048         }
5049     }
5050
5051   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5052
5053   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5054                          aff_combination_to_tree (&bnd));
5055   *comp = iv_elimination_compare (data, use);
5056
5057   /* It is unlikely that computing the number of iterations using division
5058      would be more profitable than keeping the original induction variable.  */
5059   if (expression_expensive_p (*bound))
5060     return false;
5061
5062   /* Sometimes, it is possible to handle the situation that the number of
5063      iterations may be zero unless additional assumptions by using <
5064      instead of != in the exit condition.
5065
5066      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5067            base the exit condition on it.  However, that is often too
5068            expensive.  */
5069   if (!integer_zerop (desc->may_be_zero))
5070     return iv_elimination_compare_lt (data, cand, comp, desc);
5071
5072   return true;
5073 }
5074
5075  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5076     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5077
5078 static int
5079 parm_decl_cost (struct ivopts_data *data, tree bound)
5080 {
5081   tree sbound = bound;
5082   STRIP_NOPS (sbound);
5083
5084   if (TREE_CODE (sbound) == SSA_NAME
5085       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5086       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5087       && data->body_includes_call)
5088     return COSTS_N_INSNS (1);
5089
5090   return 0;
5091 }
5092
5093 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5094
5095 static bool
5096 determine_group_iv_cost_cond (struct ivopts_data *data,
5097                               struct iv_group *group, struct iv_cand *cand)
5098 {
5099   tree bound = NULL_TREE;
5100   struct iv *cmp_iv;
5101   bitmap inv_exprs = NULL;
5102   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5103   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5104   enum comp_iv_rewrite rewrite_type;
5105   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5106   tree *control_var, *bound_cst;
5107   enum tree_code comp = ERROR_MARK;
5108   struct iv_use *use = group->vuses[0];
5109
5110   /* Extract condition operands.  */
5111   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5112                                         &bound_cst, NULL, &cmp_iv);
5113   gcc_assert (rewrite_type != COMP_IV_NA);
5114
5115   /* Try iv elimination.  */
5116   if (rewrite_type == COMP_IV_ELIM
5117       && may_eliminate_iv (data, use, cand, &bound, &comp))
5118     {
5119       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5120       if (elim_cost.cost == 0)
5121         elim_cost.cost = parm_decl_cost (data, bound);
5122       else if (TREE_CODE (bound) == INTEGER_CST)
5123         elim_cost.cost = 0;
5124       /* If we replace a loop condition 'i < n' with 'p < base + n',
5125          inv_vars_elim will have 'base' and 'n' set, which implies that both
5126          'base' and 'n' will be live during the loop.    More likely,
5127          'base + n' will be loop invariant, resulting in only one live value
5128          during the loop.  So in that case we clear inv_vars_elim and set
5129          inv_expr_elim instead.  */
5130       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5131         {
5132           inv_expr_elim = get_loop_invariant_expr (data, bound);
5133           bitmap_clear (inv_vars_elim);
5134         }
5135       /* The bound is a loop invariant, so it will be only computed
5136          once.  */
5137       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5138     }
5139
5140   /* When the condition is a comparison of the candidate IV against
5141      zero, prefer this IV.
5142
5143      TODO: The constant that we're subtracting from the cost should
5144      be target-dependent.  This information should be added to the
5145      target costs for each backend.  */
5146   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5147       && integer_zerop (*bound_cst)
5148       && (operand_equal_p (*control_var, cand->var_after, 0)
5149           || operand_equal_p (*control_var, cand->var_before, 0)))
5150     elim_cost -= 1;
5151
5152   express_cost = get_computation_cost (data, use, cand, false,
5153                                        &inv_vars_express, NULL,
5154                                        &inv_expr_express);
5155   if (cmp_iv != NULL)
5156     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5157
5158   /* Count the cost of the original bound as well.  */
5159   bound_cost = force_var_cost (data, *bound_cst, NULL);
5160   if (bound_cost.cost == 0)
5161     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5162   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5163     bound_cost.cost = 0;
5164   express_cost += bound_cost;
5165
5166   /* Choose the better approach, preferring the eliminated IV. */
5167   if (elim_cost <= express_cost)
5168     {
5169       cost = elim_cost;
5170       inv_vars = inv_vars_elim;
5171       inv_vars_elim = NULL;
5172       inv_expr = inv_expr_elim;
5173     }
5174   else
5175     {
5176       cost = express_cost;
5177       inv_vars = inv_vars_express;
5178       inv_vars_express = NULL;
5179       bound = NULL_TREE;
5180       comp = ERROR_MARK;
5181       inv_expr = inv_expr_express;
5182     }
5183
5184   if (inv_expr)
5185     {
5186       inv_exprs = BITMAP_ALLOC (NULL);
5187       bitmap_set_bit (inv_exprs, inv_expr->id);
5188     }
5189   set_group_iv_cost (data, group, cand, cost,
5190                      inv_vars, bound, comp, inv_exprs);
5191
5192   if (inv_vars_elim)
5193     BITMAP_FREE (inv_vars_elim);
5194   if (inv_vars_express)
5195     BITMAP_FREE (inv_vars_express);
5196
5197   return !cost.infinite_cost_p ();
5198 }
5199
5200 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5201    if USE cannot be represented with CAND.  */
5202
5203 static bool
5204 determine_group_iv_cost (struct ivopts_data *data,
5205                          struct iv_group *group, struct iv_cand *cand)
5206 {
5207   switch (group->type)
5208     {
5209     case USE_NONLINEAR_EXPR:
5210       return determine_group_iv_cost_generic (data, group, cand);
5211
5212     case USE_ADDRESS:
5213       return determine_group_iv_cost_address (data, group, cand);
5214
5215     case USE_COMPARE:
5216       return determine_group_iv_cost_cond (data, group, cand);
5217
5218     default:
5219       gcc_unreachable ();
5220     }
5221 }
5222
5223 /* Return true if get_computation_cost indicates that autoincrement is
5224    a possibility for the pair of USE and CAND, false otherwise.  */
5225
5226 static bool
5227 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5228                            struct iv_cand *cand)
5229 {
5230   if (use->type != USE_ADDRESS)
5231     return false;
5232
5233   bool can_autoinc = false;
5234   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5235   return can_autoinc;
5236 }
5237
5238 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5239    use that allows autoincrement, and set their AINC_USE if possible.  */
5240
5241 static void
5242 set_autoinc_for_original_candidates (struct ivopts_data *data)
5243 {
5244   unsigned i, j;
5245
5246   for (i = 0; i < data->vcands.length (); i++)
5247     {
5248       struct iv_cand *cand = data->vcands[i];
5249       struct iv_use *closest_before = NULL;
5250       struct iv_use *closest_after = NULL;
5251       if (cand->pos != IP_ORIGINAL)
5252         continue;
5253
5254       for (j = 0; j < data->vgroups.length (); j++)
5255         {
5256           struct iv_group *group = data->vgroups[j];
5257           struct iv_use *use = group->vuses[0];
5258           unsigned uid = gimple_uid (use->stmt);
5259
5260           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5261             continue;
5262
5263           if (uid < gimple_uid (cand->incremented_at)
5264               && (closest_before == NULL
5265                   || uid > gimple_uid (closest_before->stmt)))
5266             closest_before = use;
5267
5268           if (uid > gimple_uid (cand->incremented_at)
5269               && (closest_after == NULL
5270                   || uid < gimple_uid (closest_after->stmt)))
5271             closest_after = use;
5272         }
5273
5274       if (closest_before != NULL
5275           && autoinc_possible_for_pair (data, closest_before, cand))
5276         cand->ainc_use = closest_before;
5277       else if (closest_after != NULL
5278                && autoinc_possible_for_pair (data, closest_after, cand))
5279         cand->ainc_use = closest_after;
5280     }
5281 }
5282
5283 /* Relate compare use with all candidates.  */
5284
5285 static void
5286 relate_compare_use_with_all_cands (struct ivopts_data *data)
5287 {
5288   unsigned i, max_id = data->vcands.length () - 1;
5289   for (i = 0; i < data->vgroups.length (); i++)
5290     {
5291       struct iv_group *group = data->vgroups[i];
5292
5293       if (group->type == USE_COMPARE)
5294         bitmap_set_range (group->related_cands, 0, max_id);
5295     }
5296 }
5297
5298 /* Finds the candidates for the induction variables.  */
5299
5300 static void
5301 find_iv_candidates (struct ivopts_data *data)
5302 {
5303   /* Add commonly used ivs.  */
5304   add_standard_iv_candidates (data);
5305
5306   /* Add old induction variables.  */
5307   add_iv_candidate_for_bivs (data);
5308
5309   /* Add induction variables derived from uses.  */
5310   add_iv_candidate_for_groups (data);
5311
5312   set_autoinc_for_original_candidates (data);
5313
5314   /* Record the important candidates.  */
5315   record_important_candidates (data);
5316
5317   /* Relate compare iv_use with all candidates.  */
5318   if (!data->consider_all_candidates)
5319     relate_compare_use_with_all_cands (data);
5320
5321   if (dump_file && (dump_flags & TDF_DETAILS))
5322     {
5323       unsigned i;
5324
5325       fprintf (dump_file, "\n<Important Candidates>:\t");
5326       for (i = 0; i < data->vcands.length (); i++)
5327         if (data->vcands[i]->important)
5328           fprintf (dump_file, " %d,", data->vcands[i]->id);
5329       fprintf (dump_file, "\n");
5330
5331       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5332       for (i = 0; i < data->vgroups.length (); i++)
5333         {
5334           struct iv_group *group = data->vgroups[i];
5335
5336           if (group->related_cands)
5337             {
5338               fprintf (dump_file, "  Group %d:\t", group->id);
5339               dump_bitmap (dump_file, group->related_cands);
5340             }
5341         }
5342       fprintf (dump_file, "\n");
5343     }
5344 }
5345
5346 /* Determines costs of computing use of iv with an iv candidate.  */
5347
5348 static void
5349 determine_group_iv_costs (struct ivopts_data *data)
5350 {
5351   unsigned i, j;
5352   struct iv_cand *cand;
5353   struct iv_group *group;
5354   bitmap to_clear = BITMAP_ALLOC (NULL);
5355
5356   alloc_use_cost_map (data);
5357
5358   for (i = 0; i < data->vgroups.length (); i++)
5359     {
5360       group = data->vgroups[i];
5361
5362       if (data->consider_all_candidates)
5363         {
5364           for (j = 0; j < data->vcands.length (); j++)
5365             {
5366               cand = data->vcands[j];
5367               determine_group_iv_cost (data, group, cand);
5368             }
5369         }
5370       else
5371         {
5372           bitmap_iterator bi;
5373
5374           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5375             {
5376               cand = data->vcands[j];
5377               if (!determine_group_iv_cost (data, group, cand))
5378                 bitmap_set_bit (to_clear, j);
5379             }
5380
5381           /* Remove the candidates for that the cost is infinite from
5382              the list of related candidates.  */
5383           bitmap_and_compl_into (group->related_cands, to_clear);
5384           bitmap_clear (to_clear);
5385         }
5386     }
5387
5388   BITMAP_FREE (to_clear);
5389
5390   if (dump_file && (dump_flags & TDF_DETAILS))
5391     {
5392       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5393       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5394
5395       for (hash_table<iv_inv_expr_hasher>::iterator it
5396            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5397            ++it)
5398         list.safe_push (*it);
5399
5400       list.qsort (sort_iv_inv_expr_ent);
5401
5402       for (i = 0; i < list.length (); ++i)
5403         {
5404           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5405           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5406           fprintf (dump_file, "\n");
5407         }
5408
5409       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5410
5411       for (i = 0; i < data->vgroups.length (); i++)
5412         {
5413           group = data->vgroups[i];
5414
5415           fprintf (dump_file, "Group %d:\n", i);
5416           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5417           for (j = 0; j < group->n_map_members; j++)
5418             {
5419               if (!group->cost_map[j].cand
5420                   || group->cost_map[j].cost.infinite_cost_p ())
5421                 continue;
5422
5423               fprintf (dump_file, "  %d\t%d\t%d\t",
5424                        group->cost_map[j].cand->id,
5425                        group->cost_map[j].cost.cost,
5426                        group->cost_map[j].cost.complexity);
5427               if (!group->cost_map[j].inv_exprs
5428                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5429                 fprintf (dump_file, "NIL;\t");
5430               else
5431                 bitmap_print (dump_file,
5432                               group->cost_map[j].inv_exprs, "", ";\t");
5433               if (!group->cost_map[j].inv_vars
5434                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5435                 fprintf (dump_file, "NIL;\n");
5436               else
5437                 bitmap_print (dump_file,
5438                               group->cost_map[j].inv_vars, "", "\n");
5439             }
5440
5441           fprintf (dump_file, "\n");
5442         }
5443       fprintf (dump_file, "\n");
5444     }
5445 }
5446
5447 /* Determines cost of the candidate CAND.  */
5448
5449 static void
5450 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5451 {
5452   comp_cost cost_base;
5453   unsigned cost, cost_step;
5454   tree base;
5455
5456   gcc_assert (cand->iv != NULL);
5457
5458   /* There are two costs associated with the candidate -- its increment
5459      and its initialization.  The second is almost negligible for any loop
5460      that rolls enough, so we take it just very little into account.  */
5461
5462   base = cand->iv->base;
5463   cost_base = force_var_cost (data, base, NULL);
5464   /* It will be exceptional that the iv register happens to be initialized with
5465      the proper value at no cost.  In general, there will at least be a regcopy
5466      or a const set.  */
5467   if (cost_base.cost == 0)
5468     cost_base.cost = COSTS_N_INSNS (1);
5469   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5470
5471   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5472
5473   /* Prefer the original ivs unless we may gain something by replacing it.
5474      The reason is to make debugging simpler; so this is not relevant for
5475      artificial ivs created by other optimization passes.  */
5476   if (cand->pos != IP_ORIGINAL
5477       || !SSA_NAME_VAR (cand->var_before)
5478       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5479     cost++;
5480
5481   /* Prefer not to insert statements into latch unless there are some
5482      already (so that we do not create unnecessary jumps).  */
5483   if (cand->pos == IP_END
5484       && empty_block_p (ip_end_pos (data->current_loop)))
5485     cost++;
5486
5487   cand->cost = cost;
5488   cand->cost_step = cost_step;
5489 }
5490
5491 /* Determines costs of computation of the candidates.  */
5492
5493 static void
5494 determine_iv_costs (struct ivopts_data *data)
5495 {
5496   unsigned i;
5497
5498   if (dump_file && (dump_flags & TDF_DETAILS))
5499     {
5500       fprintf (dump_file, "<Candidate Costs>:\n");
5501       fprintf (dump_file, "  cand\tcost\n");
5502     }
5503
5504   for (i = 0; i < data->vcands.length (); i++)
5505     {
5506       struct iv_cand *cand = data->vcands[i];
5507
5508       determine_iv_cost (data, cand);
5509
5510       if (dump_file && (dump_flags & TDF_DETAILS))
5511         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5512     }
5513
5514   if (dump_file && (dump_flags & TDF_DETAILS))
5515     fprintf (dump_file, "\n");
5516 }
5517
5518 /* Calculates cost for having N_REGS registers.  This number includes
5519    induction variables, invariant variables and invariant expressions.  */
5520
5521 static unsigned
5522 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned n_regs)
5523 {
5524   unsigned cost = estimate_reg_pressure_cost (n_regs,
5525                                               data->regs_used, data->speed,
5526                                               data->body_includes_call);
5527   /* Add n_regs to the cost, so that we prefer eliminating ivs if possible.  */
5528   return n_regs + cost;
5529 }
5530
5531 /* For each size of the induction variable set determine the penalty.  */
5532
5533 static void
5534 determine_set_costs (struct ivopts_data *data)
5535 {
5536   unsigned j, n;
5537   gphi *phi;
5538   gphi_iterator psi;
5539   tree op;
5540   struct loop *loop = data->current_loop;
5541   bitmap_iterator bi;
5542
5543   if (dump_file && (dump_flags & TDF_DETAILS))
5544     {
5545       fprintf (dump_file, "<Global Costs>:\n");
5546       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5547       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5548       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5549       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5550     }
5551
5552   n = 0;
5553   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5554     {
5555       phi = psi.phi ();
5556       op = PHI_RESULT (phi);
5557
5558       if (virtual_operand_p (op))
5559         continue;
5560
5561       if (get_iv (data, op))
5562         continue;
5563
5564       n++;
5565     }
5566
5567   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5568     {
5569       struct version_info *info = ver_info (data, j);
5570
5571       if (info->inv_id && info->has_nonlin_use)
5572         n++;
5573     }
5574
5575   data->regs_used = n;
5576   if (dump_file && (dump_flags & TDF_DETAILS))
5577     fprintf (dump_file, "  regs_used %d\n", n);
5578
5579   if (dump_file && (dump_flags & TDF_DETAILS))
5580     {
5581       fprintf (dump_file, "  cost for size:\n");
5582       fprintf (dump_file, "  ivs\tcost\n");
5583       for (j = 0; j <= 2 * target_avail_regs; j++)
5584         fprintf (dump_file, "  %d\t%d\n", j,
5585                  ivopts_global_cost_for_size (data, j));
5586       fprintf (dump_file, "\n");
5587     }
5588 }
5589
5590 /* Returns true if A is a cheaper cost pair than B.  */
5591
5592 static bool
5593 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5594 {
5595   if (!a)
5596     return false;
5597
5598   if (!b)
5599     return true;
5600
5601   if (a->cost < b->cost)
5602     return true;
5603
5604   if (b->cost < a->cost)
5605     return false;
5606
5607   /* In case the costs are the same, prefer the cheaper candidate.  */
5608   if (a->cand->cost < b->cand->cost)
5609     return true;
5610
5611   return false;
5612 }
5613
5614
5615 /* Returns candidate by that USE is expressed in IVS.  */
5616
5617 static struct cost_pair *
5618 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5619 {
5620   return ivs->cand_for_group[group->id];
5621 }
5622
5623 /* Computes the cost field of IVS structure.  */
5624
5625 static void
5626 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5627 {
5628   comp_cost cost = ivs->cand_use_cost;
5629
5630   cost += ivs->cand_cost;
5631   cost += ivopts_global_cost_for_size (data, ivs->n_invs + ivs->n_cands);
5632   ivs->cost = cost;
5633 }
5634
5635 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5636    and IVS.  */
5637
5638 static void
5639 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5640 {
5641   bitmap_iterator bi;
5642   unsigned iid;
5643
5644   if (!invs)
5645     return;
5646
5647   gcc_assert (n_inv_uses != NULL);
5648   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5649     {
5650       n_inv_uses[iid]--;
5651       if (n_inv_uses[iid] == 0)
5652         ivs->n_invs--;
5653     }
5654 }
5655
5656 /* Set USE not to be expressed by any candidate in IVS.  */
5657
5658 static void
5659 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5660                  struct iv_group *group)
5661 {
5662   unsigned gid = group->id, cid;
5663   struct cost_pair *cp;
5664
5665   cp = ivs->cand_for_group[gid];
5666   if (!cp)
5667     return;
5668   cid = cp->cand->id;
5669
5670   ivs->bad_groups++;
5671   ivs->cand_for_group[gid] = NULL;
5672   ivs->n_cand_uses[cid]--;
5673
5674   if (ivs->n_cand_uses[cid] == 0)
5675     {
5676       bitmap_clear_bit (ivs->cands, cid);
5677       ivs->n_cands--;
5678       ivs->cand_cost -= cp->cand->cost;
5679       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5680       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5681     }
5682
5683   ivs->cand_use_cost -= cp->cost;
5684   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5685   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5686   iv_ca_recount_cost (data, ivs);
5687 }
5688
5689 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5690    IVS.  */
5691
5692 static void
5693 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5694 {
5695   bitmap_iterator bi;
5696   unsigned iid;
5697
5698   if (!invs)
5699     return;
5700
5701   gcc_assert (n_inv_uses != NULL);
5702   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5703     {
5704       n_inv_uses[iid]++;
5705       if (n_inv_uses[iid] == 1)
5706         ivs->n_invs++;
5707     }
5708 }
5709
5710 /* Set cost pair for GROUP in set IVS to CP.  */
5711
5712 static void
5713 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5714               struct iv_group *group, struct cost_pair *cp)
5715 {
5716   unsigned gid = group->id, cid;
5717
5718   if (ivs->cand_for_group[gid] == cp)
5719     return;
5720
5721   if (ivs->cand_for_group[gid])
5722     iv_ca_set_no_cp (data, ivs, group);
5723
5724   if (cp)
5725     {
5726       cid = cp->cand->id;
5727
5728       ivs->bad_groups--;
5729       ivs->cand_for_group[gid] = cp;
5730       ivs->n_cand_uses[cid]++;
5731       if (ivs->n_cand_uses[cid] == 1)
5732         {
5733           bitmap_set_bit (ivs->cands, cid);
5734           ivs->n_cands++;
5735           ivs->cand_cost += cp->cand->cost;
5736           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5737           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5738         }
5739
5740       ivs->cand_use_cost += cp->cost;
5741       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5742       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5743       iv_ca_recount_cost (data, ivs);
5744     }
5745 }
5746
5747 /* Extend set IVS by expressing USE by some of the candidates in it
5748    if possible.  Consider all important candidates if candidates in
5749    set IVS don't give any result.  */
5750
5751 static void
5752 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5753                struct iv_group *group)
5754 {
5755   struct cost_pair *best_cp = NULL, *cp;
5756   bitmap_iterator bi;
5757   unsigned i;
5758   struct iv_cand *cand;
5759
5760   gcc_assert (ivs->upto >= group->id);
5761   ivs->upto++;
5762   ivs->bad_groups++;
5763
5764   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5765     {
5766       cand = data->vcands[i];
5767       cp = get_group_iv_cost (data, group, cand);
5768       if (cheaper_cost_pair (cp, best_cp))
5769         best_cp = cp;
5770     }
5771
5772   if (best_cp == NULL)
5773     {
5774       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5775         {
5776           cand = data->vcands[i];
5777           cp = get_group_iv_cost (data, group, cand);
5778           if (cheaper_cost_pair (cp, best_cp))
5779             best_cp = cp;
5780         }
5781     }
5782
5783   iv_ca_set_cp (data, ivs, group, best_cp);
5784 }
5785
5786 /* Get cost for assignment IVS.  */
5787
5788 static comp_cost
5789 iv_ca_cost (struct iv_ca *ivs)
5790 {
5791   /* This was a conditional expression but it triggered a bug in
5792      Sun C 5.5.  */
5793   if (ivs->bad_groups)
5794     return infinite_cost;
5795   else
5796     return ivs->cost;
5797 }
5798
5799 /* Returns true if applying NEW_CP to GROUP for IVS introduces more
5800    invariants than OLD_CP.  */
5801
5802 static bool
5803 iv_ca_more_deps (struct ivopts_data *data, struct iv_ca *ivs,
5804                  struct iv_group *group, struct cost_pair *old_cp,
5805                  struct cost_pair *new_cp)
5806 {
5807   gcc_assert (old_cp && new_cp && old_cp != new_cp);
5808   unsigned old_n_invs = ivs->n_invs;
5809   iv_ca_set_cp (data, ivs, group, new_cp);
5810   unsigned new_n_invs = ivs->n_invs;
5811   iv_ca_set_cp (data, ivs, group, old_cp);
5812
5813   return (new_n_invs > old_n_invs);
5814 }
5815
5816 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5817    it before NEXT.  */
5818
5819 static struct iv_ca_delta *
5820 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5821                  struct cost_pair *new_cp, struct iv_ca_delta *next)
5822 {
5823   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5824
5825   change->group = group;
5826   change->old_cp = old_cp;
5827   change->new_cp = new_cp;
5828   change->next = next;
5829
5830   return change;
5831 }
5832
5833 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5834    are rewritten.  */
5835
5836 static struct iv_ca_delta *
5837 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5838 {
5839   struct iv_ca_delta *last;
5840
5841   if (!l2)
5842     return l1;
5843
5844   if (!l1)
5845     return l2;
5846
5847   for (last = l1; last->next; last = last->next)
5848     continue;
5849   last->next = l2;
5850
5851   return l1;
5852 }
5853
5854 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5855
5856 static struct iv_ca_delta *
5857 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5858 {
5859   struct iv_ca_delta *act, *next, *prev = NULL;
5860
5861   for (act = delta; act; act = next)
5862     {
5863       next = act->next;
5864       act->next = prev;
5865       prev = act;
5866
5867       std::swap (act->old_cp, act->new_cp);
5868     }
5869
5870   return prev;
5871 }
5872
5873 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5874    reverted instead.  */
5875
5876 static void
5877 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5878                     struct iv_ca_delta *delta, bool forward)
5879 {
5880   struct cost_pair *from, *to;
5881   struct iv_ca_delta *act;
5882
5883   if (!forward)
5884     delta = iv_ca_delta_reverse (delta);
5885
5886   for (act = delta; act; act = act->next)
5887     {
5888       from = act->old_cp;
5889       to = act->new_cp;
5890       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
5891       iv_ca_set_cp (data, ivs, act->group, to);
5892     }
5893
5894   if (!forward)
5895     iv_ca_delta_reverse (delta);
5896 }
5897
5898 /* Returns true if CAND is used in IVS.  */
5899
5900 static bool
5901 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5902 {
5903   return ivs->n_cand_uses[cand->id] > 0;
5904 }
5905
5906 /* Returns number of induction variable candidates in the set IVS.  */
5907
5908 static unsigned
5909 iv_ca_n_cands (struct iv_ca *ivs)
5910 {
5911   return ivs->n_cands;
5912 }
5913
5914 /* Free the list of changes DELTA.  */
5915
5916 static void
5917 iv_ca_delta_free (struct iv_ca_delta **delta)
5918 {
5919   struct iv_ca_delta *act, *next;
5920
5921   for (act = *delta; act; act = next)
5922     {
5923       next = act->next;
5924       free (act);
5925     }
5926
5927   *delta = NULL;
5928 }
5929
5930 /* Allocates new iv candidates assignment.  */
5931
5932 static struct iv_ca *
5933 iv_ca_new (struct ivopts_data *data)
5934 {
5935   struct iv_ca *nw = XNEW (struct iv_ca);
5936
5937   nw->upto = 0;
5938   nw->bad_groups = 0;
5939   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
5940                                  data->vgroups.length ());
5941   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
5942   nw->cands = BITMAP_ALLOC (NULL);
5943   nw->n_cands = 0;
5944   nw->n_invs = 0;
5945   nw->cand_use_cost = no_cost;
5946   nw->cand_cost = 0;
5947   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
5948   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
5949   nw->cost = no_cost;
5950
5951   return nw;
5952 }
5953
5954 /* Free memory occupied by the set IVS.  */
5955
5956 static void
5957 iv_ca_free (struct iv_ca **ivs)
5958 {
5959   free ((*ivs)->cand_for_group);
5960   free ((*ivs)->n_cand_uses);
5961   BITMAP_FREE ((*ivs)->cands);
5962   free ((*ivs)->n_inv_var_uses);
5963   free ((*ivs)->n_inv_expr_uses);
5964   free (*ivs);
5965   *ivs = NULL;
5966 }
5967
5968 /* Dumps IVS to FILE.  */
5969
5970 static void
5971 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5972 {
5973   unsigned i;
5974   comp_cost cost = iv_ca_cost (ivs);
5975
5976   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
5977            cost.complexity);
5978   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
5979            ivs->cand_cost, ivs->cand_use_cost.cost,
5980            ivs->cand_use_cost.complexity);
5981   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5982
5983   for (i = 0; i < ivs->upto; i++)
5984     {
5985       struct iv_group *group = data->vgroups[i];
5986       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
5987       if (cp)
5988         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
5989                  group->id, cp->cand->id, cp->cost.cost,
5990                  cp->cost.complexity);
5991       else
5992         fprintf (file, "   group:%d --> ??\n", group->id);
5993     }
5994
5995   const char *pref = "";
5996   fprintf (file, "  invariant variables: ");
5997   for (i = 1; i <= data->max_inv_var_id; i++)
5998     if (ivs->n_inv_var_uses[i])
5999       {
6000         fprintf (file, "%s%d", pref, i);
6001         pref = ", ";
6002       }
6003
6004   pref = "";
6005   fprintf (file, "\n  invariant expressions: ");
6006   for (i = 1; i <= data->max_inv_expr_id; i++)
6007     if (ivs->n_inv_expr_uses[i])
6008       {
6009         fprintf (file, "%s%d", pref, i);
6010         pref = ", ";
6011       }
6012
6013   fprintf (file, "\n\n");
6014 }
6015
6016 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6017    new set, and store differences in DELTA.  Number of induction variables
6018    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6019    the function will try to find a solution with mimimal iv candidates.  */
6020
6021 static comp_cost
6022 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6023               struct iv_cand *cand, struct iv_ca_delta **delta,
6024               unsigned *n_ivs, bool min_ncand)
6025 {
6026   unsigned i;
6027   comp_cost cost;
6028   struct iv_group *group;
6029   struct cost_pair *old_cp, *new_cp;
6030
6031   *delta = NULL;
6032   for (i = 0; i < ivs->upto; i++)
6033     {
6034       group = data->vgroups[i];
6035       old_cp = iv_ca_cand_for_group (ivs, group);
6036
6037       if (old_cp
6038           && old_cp->cand == cand)
6039         continue;
6040
6041       new_cp = get_group_iv_cost (data, group, cand);
6042       if (!new_cp)
6043         continue;
6044
6045       if (!min_ncand && iv_ca_more_deps (data, ivs, group, old_cp, new_cp))
6046         continue;
6047
6048       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6049         continue;
6050
6051       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6052     }
6053
6054   iv_ca_delta_commit (data, ivs, *delta, true);
6055   cost = iv_ca_cost (ivs);
6056   if (n_ivs)
6057     *n_ivs = iv_ca_n_cands (ivs);
6058   iv_ca_delta_commit (data, ivs, *delta, false);
6059
6060   return cost;
6061 }
6062
6063 /* Try narrowing set IVS by removing CAND.  Return the cost of
6064    the new set and store the differences in DELTA.  START is
6065    the candidate with which we start narrowing.  */
6066
6067 static comp_cost
6068 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6069               struct iv_cand *cand, struct iv_cand *start,
6070               struct iv_ca_delta **delta)
6071 {
6072   unsigned i, ci;
6073   struct iv_group *group;
6074   struct cost_pair *old_cp, *new_cp, *cp;
6075   bitmap_iterator bi;
6076   struct iv_cand *cnd;
6077   comp_cost cost, best_cost, acost;
6078
6079   *delta = NULL;
6080   for (i = 0; i < data->vgroups.length (); i++)
6081     {
6082       group = data->vgroups[i];
6083
6084       old_cp = iv_ca_cand_for_group (ivs, group);
6085       if (old_cp->cand != cand)
6086         continue;
6087
6088       best_cost = iv_ca_cost (ivs);
6089       /* Start narrowing with START.  */
6090       new_cp = get_group_iv_cost (data, group, start);
6091
6092       if (data->consider_all_candidates)
6093         {
6094           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6095             {
6096               if (ci == cand->id || (start && ci == start->id))
6097                 continue;
6098
6099               cnd = data->vcands[ci];
6100
6101               cp = get_group_iv_cost (data, group, cnd);
6102               if (!cp)
6103                 continue;
6104
6105               iv_ca_set_cp (data, ivs, group, cp);
6106               acost = iv_ca_cost (ivs);
6107
6108               if (acost < best_cost)
6109                 {
6110                   best_cost = acost;
6111                   new_cp = cp;
6112                 }
6113             }
6114         }
6115       else
6116         {
6117           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6118             {
6119               if (ci == cand->id || (start && ci == start->id))
6120                 continue;
6121
6122               cnd = data->vcands[ci];
6123
6124               cp = get_group_iv_cost (data, group, cnd);
6125               if (!cp)
6126                 continue;
6127
6128               iv_ca_set_cp (data, ivs, group, cp);
6129               acost = iv_ca_cost (ivs);
6130
6131               if (acost < best_cost)
6132                 {
6133                   best_cost = acost;
6134                   new_cp = cp;
6135                 }
6136             }
6137         }
6138       /* Restore to old cp for use.  */
6139       iv_ca_set_cp (data, ivs, group, old_cp);
6140
6141       if (!new_cp)
6142         {
6143           iv_ca_delta_free (delta);
6144           return infinite_cost;
6145         }
6146
6147       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6148     }
6149
6150   iv_ca_delta_commit (data, ivs, *delta, true);
6151   cost = iv_ca_cost (ivs);
6152   iv_ca_delta_commit (data, ivs, *delta, false);
6153
6154   return cost;
6155 }
6156
6157 /* Try optimizing the set of candidates IVS by removing candidates different
6158    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6159    differences in DELTA.  */
6160
6161 static comp_cost
6162 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6163              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6164 {
6165   bitmap_iterator bi;
6166   struct iv_ca_delta *act_delta, *best_delta;
6167   unsigned i;
6168   comp_cost best_cost, acost;
6169   struct iv_cand *cand;
6170
6171   best_delta = NULL;
6172   best_cost = iv_ca_cost (ivs);
6173
6174   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6175     {
6176       cand = data->vcands[i];
6177
6178       if (cand == except_cand)
6179         continue;
6180
6181       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6182
6183       if (acost < best_cost)
6184         {
6185           best_cost = acost;
6186           iv_ca_delta_free (&best_delta);
6187           best_delta = act_delta;
6188         }
6189       else
6190         iv_ca_delta_free (&act_delta);
6191     }
6192
6193   if (!best_delta)
6194     {
6195       *delta = NULL;
6196       return best_cost;
6197     }
6198
6199   /* Recurse to possibly remove other unnecessary ivs.  */
6200   iv_ca_delta_commit (data, ivs, best_delta, true);
6201   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6202   iv_ca_delta_commit (data, ivs, best_delta, false);
6203   *delta = iv_ca_delta_join (best_delta, *delta);
6204   return best_cost;
6205 }
6206
6207 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6208    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6209    the corresponding cost_pair, otherwise just return BEST_CP.  */
6210
6211 static struct cost_pair*
6212 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6213                         unsigned int cand_idx, struct iv_cand *old_cand,
6214                         struct cost_pair *best_cp)
6215 {
6216   struct iv_cand *cand;
6217   struct cost_pair *cp;
6218
6219   gcc_assert (old_cand != NULL && best_cp != NULL);
6220   if (cand_idx == old_cand->id)
6221     return best_cp;
6222
6223   cand = data->vcands[cand_idx];
6224   cp = get_group_iv_cost (data, group, cand);
6225   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6226     return cp;
6227
6228   return best_cp;
6229 }
6230
6231 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6232    which are used by more than one iv uses.  For each of those candidates,
6233    this function tries to represent iv uses under that candidate using
6234    other ones with lower local cost, then tries to prune the new set.
6235    If the new set has lower cost, It returns the new cost after recording
6236    candidate replacement in list DELTA.  */
6237
6238 static comp_cost
6239 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6240                struct iv_ca_delta **delta)
6241 {
6242   bitmap_iterator bi, bj;
6243   unsigned int i, j, k;
6244   struct iv_cand *cand;
6245   comp_cost orig_cost, acost;
6246   struct iv_ca_delta *act_delta, *tmp_delta;
6247   struct cost_pair *old_cp, *best_cp = NULL;
6248
6249   *delta = NULL;
6250   orig_cost = iv_ca_cost (ivs);
6251
6252   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6253     {
6254       if (ivs->n_cand_uses[i] == 1
6255           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6256         continue;
6257
6258       cand = data->vcands[i];
6259
6260       act_delta = NULL;
6261       /*  Represent uses under current candidate using other ones with
6262           lower local cost.  */
6263       for (j = 0; j < ivs->upto; j++)
6264         {
6265           struct iv_group *group = data->vgroups[j];
6266           old_cp = iv_ca_cand_for_group (ivs, group);
6267
6268           if (old_cp->cand != cand)
6269             continue;
6270
6271           best_cp = old_cp;
6272           if (data->consider_all_candidates)
6273             for (k = 0; k < data->vcands.length (); k++)
6274               best_cp = cheaper_cost_with_cand (data, group, k,
6275                                                 old_cp->cand, best_cp);
6276           else
6277             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6278               best_cp = cheaper_cost_with_cand (data, group, k,
6279                                                 old_cp->cand, best_cp);
6280
6281           if (best_cp == old_cp)
6282             continue;
6283
6284           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6285         }
6286       /* No need for further prune.  */
6287       if (!act_delta)
6288         continue;
6289
6290       /* Prune the new candidate set.  */
6291       iv_ca_delta_commit (data, ivs, act_delta, true);
6292       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6293       iv_ca_delta_commit (data, ivs, act_delta, false);
6294       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6295
6296       if (acost < orig_cost)
6297         {
6298           *delta = act_delta;
6299           return acost;
6300         }
6301       else
6302         iv_ca_delta_free (&act_delta);
6303     }
6304
6305   return orig_cost;
6306 }
6307
6308 /* Tries to extend the sets IVS in the best possible way in order to
6309    express the GROUP.  If ORIGINALP is true, prefer candidates from
6310    the original set of IVs, otherwise favor important candidates not
6311    based on any memory object.  */
6312
6313 static bool
6314 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6315                   struct iv_group *group, bool originalp)
6316 {
6317   comp_cost best_cost, act_cost;
6318   unsigned i;
6319   bitmap_iterator bi;
6320   struct iv_cand *cand;
6321   struct iv_ca_delta *best_delta = NULL, *act_delta;
6322   struct cost_pair *cp;
6323
6324   iv_ca_add_group (data, ivs, group);
6325   best_cost = iv_ca_cost (ivs);
6326   cp = iv_ca_cand_for_group (ivs, group);
6327   if (cp)
6328     {
6329       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6330       iv_ca_set_no_cp (data, ivs, group);
6331     }
6332
6333   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6334      first try important candidates not based on any memory object.  Only if
6335      this fails, try the specific ones.  Rationale -- in loops with many
6336      variables the best choice often is to use just one generic biv.  If we
6337      added here many ivs specific to the uses, the optimization algorithm later
6338      would be likely to get stuck in a local minimum, thus causing us to create
6339      too many ivs.  The approach from few ivs to more seems more likely to be
6340      successful -- starting from few ivs, replacing an expensive use by a
6341      specific iv should always be a win.  */
6342   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6343     {
6344       cand = data->vcands[i];
6345
6346       if (originalp && cand->pos !=IP_ORIGINAL)
6347         continue;
6348
6349       if (!originalp && cand->iv->base_object != NULL_TREE)
6350         continue;
6351
6352       if (iv_ca_cand_used_p (ivs, cand))
6353         continue;
6354
6355       cp = get_group_iv_cost (data, group, cand);
6356       if (!cp)
6357         continue;
6358
6359       iv_ca_set_cp (data, ivs, group, cp);
6360       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6361                                true);
6362       iv_ca_set_no_cp (data, ivs, group);
6363       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6364
6365       if (act_cost < best_cost)
6366         {
6367           best_cost = act_cost;
6368
6369           iv_ca_delta_free (&best_delta);
6370           best_delta = act_delta;
6371         }
6372       else
6373         iv_ca_delta_free (&act_delta);
6374     }
6375
6376   if (best_cost.infinite_cost_p ())
6377     {
6378       for (i = 0; i < group->n_map_members; i++)
6379         {
6380           cp = group->cost_map + i;
6381           cand = cp->cand;
6382           if (!cand)
6383             continue;
6384
6385           /* Already tried this.  */
6386           if (cand->important)
6387             {
6388               if (originalp && cand->pos == IP_ORIGINAL)
6389                 continue;
6390               if (!originalp && cand->iv->base_object == NULL_TREE)
6391                 continue;
6392             }
6393
6394           if (iv_ca_cand_used_p (ivs, cand))
6395             continue;
6396
6397           act_delta = NULL;
6398           iv_ca_set_cp (data, ivs, group, cp);
6399           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6400           iv_ca_set_no_cp (data, ivs, group);
6401           act_delta = iv_ca_delta_add (group,
6402                                        iv_ca_cand_for_group (ivs, group),
6403                                        cp, act_delta);
6404
6405           if (act_cost < best_cost)
6406             {
6407               best_cost = act_cost;
6408
6409               if (best_delta)
6410                 iv_ca_delta_free (&best_delta);
6411               best_delta = act_delta;
6412             }
6413           else
6414             iv_ca_delta_free (&act_delta);
6415         }
6416     }
6417
6418   iv_ca_delta_commit (data, ivs, best_delta, true);
6419   iv_ca_delta_free (&best_delta);
6420
6421   return !best_cost.infinite_cost_p ();
6422 }
6423
6424 /* Finds an initial assignment of candidates to uses.  */
6425
6426 static struct iv_ca *
6427 get_initial_solution (struct ivopts_data *data, bool originalp)
6428 {
6429   unsigned i;
6430   struct iv_ca *ivs = iv_ca_new (data);
6431
6432   for (i = 0; i < data->vgroups.length (); i++)
6433     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6434       {
6435         iv_ca_free (&ivs);
6436         return NULL;
6437       }
6438
6439   return ivs;
6440 }
6441
6442 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6443    points to a bool variable, this function tries to break local
6444    optimal fixed-point by replacing candidates in IVS if it's true.  */
6445
6446 static bool
6447 try_improve_iv_set (struct ivopts_data *data,
6448                     struct iv_ca *ivs, bool *try_replace_p)
6449 {
6450   unsigned i, n_ivs;
6451   comp_cost acost, best_cost = iv_ca_cost (ivs);
6452   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6453   struct iv_cand *cand;
6454
6455   /* Try extending the set of induction variables by one.  */
6456   for (i = 0; i < data->vcands.length (); i++)
6457     {
6458       cand = data->vcands[i];
6459
6460       if (iv_ca_cand_used_p (ivs, cand))
6461         continue;
6462
6463       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6464       if (!act_delta)
6465         continue;
6466
6467       /* If we successfully added the candidate and the set is small enough,
6468          try optimizing it by removing other candidates.  */
6469       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6470         {
6471           iv_ca_delta_commit (data, ivs, act_delta, true);
6472           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6473           iv_ca_delta_commit (data, ivs, act_delta, false);
6474           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6475         }
6476
6477       if (acost < best_cost)
6478         {
6479           best_cost = acost;
6480           iv_ca_delta_free (&best_delta);
6481           best_delta = act_delta;
6482         }
6483       else
6484         iv_ca_delta_free (&act_delta);
6485     }
6486
6487   if (!best_delta)
6488     {
6489       /* Try removing the candidates from the set instead.  */
6490       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6491
6492       if (!best_delta && *try_replace_p)
6493         {
6494           *try_replace_p = false;
6495           /* So far candidate selecting algorithm tends to choose fewer IVs
6496              so that it can handle cases in which loops have many variables
6497              but the best choice is often to use only one general biv.  One
6498              weakness is it can't handle opposite cases, in which different
6499              candidates should be chosen with respect to each use.  To solve
6500              the problem, we replace candidates in a manner described by the
6501              comments of iv_ca_replace, thus give general algorithm a chance
6502              to break local optimal fixed-point in these cases.  */
6503           best_cost = iv_ca_replace (data, ivs, &best_delta);
6504         }
6505
6506       if (!best_delta)
6507         return false;
6508     }
6509
6510   iv_ca_delta_commit (data, ivs, best_delta, true);
6511   gcc_assert (best_cost == iv_ca_cost (ivs));
6512   iv_ca_delta_free (&best_delta);
6513   return true;
6514 }
6515
6516 /* Attempts to find the optimal set of induction variables.  We do simple
6517    greedy heuristic -- we try to replace at most one candidate in the selected
6518    solution and remove the unused ivs while this improves the cost.  */
6519
6520 static struct iv_ca *
6521 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6522 {
6523   struct iv_ca *set;
6524   bool try_replace_p = true;
6525
6526   /* Get the initial solution.  */
6527   set = get_initial_solution (data, originalp);
6528   if (!set)
6529     {
6530       if (dump_file && (dump_flags & TDF_DETAILS))
6531         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6532       return NULL;
6533     }
6534
6535   if (dump_file && (dump_flags & TDF_DETAILS))
6536     {
6537       fprintf (dump_file, "Initial set of candidates:\n");
6538       iv_ca_dump (data, dump_file, set);
6539     }
6540
6541   while (try_improve_iv_set (data, set, &try_replace_p))
6542     {
6543       if (dump_file && (dump_flags & TDF_DETAILS))
6544         {
6545           fprintf (dump_file, "Improved to:\n");
6546           iv_ca_dump (data, dump_file, set);
6547         }
6548     }
6549
6550   return set;
6551 }
6552
6553 static struct iv_ca *
6554 find_optimal_iv_set (struct ivopts_data *data)
6555 {
6556   unsigned i;
6557   comp_cost cost, origcost;
6558   struct iv_ca *set, *origset;
6559
6560   /* Determine the cost based on a strategy that starts with original IVs,
6561      and try again using a strategy that prefers candidates not based
6562      on any IVs.  */
6563   origset = find_optimal_iv_set_1 (data, true);
6564   set = find_optimal_iv_set_1 (data, false);
6565
6566   if (!origset && !set)
6567     return NULL;
6568
6569   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6570   cost = set ? iv_ca_cost (set) : infinite_cost;
6571
6572   if (dump_file && (dump_flags & TDF_DETAILS))
6573     {
6574       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6575                origcost.cost, origcost.complexity);
6576       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6577                cost.cost, cost.complexity);
6578     }
6579
6580   /* Choose the one with the best cost.  */
6581   if (origcost <= cost)
6582     {
6583       if (set)
6584         iv_ca_free (&set);
6585       set = origset;
6586     }
6587   else if (origset)
6588     iv_ca_free (&origset);
6589
6590   for (i = 0; i < data->vgroups.length (); i++)
6591     {
6592       struct iv_group *group = data->vgroups[i];
6593       group->selected = iv_ca_cand_for_group (set, group)->cand;
6594     }
6595
6596   return set;
6597 }
6598
6599 /* Creates a new induction variable corresponding to CAND.  */
6600
6601 static void
6602 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6603 {
6604   gimple_stmt_iterator incr_pos;
6605   tree base;
6606   struct iv_use *use;
6607   struct iv_group *group;
6608   bool after = false;
6609
6610   gcc_assert (cand->iv != NULL);
6611
6612   switch (cand->pos)
6613     {
6614     case IP_NORMAL:
6615       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6616       break;
6617
6618     case IP_END:
6619       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6620       after = true;
6621       break;
6622
6623     case IP_AFTER_USE:
6624       after = true;
6625       /* fall through */
6626     case IP_BEFORE_USE:
6627       incr_pos = gsi_for_stmt (cand->incremented_at);
6628       break;
6629
6630     case IP_ORIGINAL:
6631       /* Mark that the iv is preserved.  */
6632       name_info (data, cand->var_before)->preserve_biv = true;
6633       name_info (data, cand->var_after)->preserve_biv = true;
6634
6635       /* Rewrite the increment so that it uses var_before directly.  */
6636       use = find_interesting_uses_op (data, cand->var_after);
6637       group = data->vgroups[use->group_id];
6638       group->selected = cand;
6639       return;
6640     }
6641
6642   gimple_add_tmp_var (cand->var_before);
6643
6644   base = unshare_expr (cand->iv->base);
6645
6646   create_iv (base, unshare_expr (cand->iv->step),
6647              cand->var_before, data->current_loop,
6648              &incr_pos, after, &cand->var_before, &cand->var_after);
6649 }
6650
6651 /* Creates new induction variables described in SET.  */
6652
6653 static void
6654 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6655 {
6656   unsigned i;
6657   struct iv_cand *cand;
6658   bitmap_iterator bi;
6659
6660   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6661     {
6662       cand = data->vcands[i];
6663       create_new_iv (data, cand);
6664     }
6665
6666   if (dump_file && (dump_flags & TDF_DETAILS))
6667     {
6668       fprintf (dump_file, "Selected IV set for loop %d",
6669                data->current_loop->num);
6670       if (data->loop_loc != UNKNOWN_LOCATION)
6671         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6672                  LOCATION_LINE (data->loop_loc));
6673       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6674                avg_loop_niter (data->current_loop));
6675       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6676       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6677         {
6678           cand = data->vcands[i];
6679           dump_cand (dump_file, cand);
6680         }
6681       fprintf (dump_file, "\n");
6682     }
6683 }
6684
6685 /* Rewrites USE (definition of iv used in a nonlinear expression)
6686    using candidate CAND.  */
6687
6688 static void
6689 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6690                             struct iv_use *use, struct iv_cand *cand)
6691 {
6692   gassign *ass;
6693   gimple_stmt_iterator bsi;
6694   tree comp, type = get_use_type (use), tgt;
6695
6696   /* An important special case -- if we are asked to express value of
6697      the original iv by itself, just exit; there is no need to
6698      introduce a new computation (that might also need casting the
6699      variable to unsigned and back).  */
6700   if (cand->pos == IP_ORIGINAL
6701       && cand->incremented_at == use->stmt)
6702     {
6703       tree op = NULL_TREE;
6704       enum tree_code stmt_code;
6705
6706       gcc_assert (is_gimple_assign (use->stmt));
6707       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6708
6709       /* Check whether we may leave the computation unchanged.
6710          This is the case only if it does not rely on other
6711          computations in the loop -- otherwise, the computation
6712          we rely upon may be removed in remove_unused_ivs,
6713          thus leading to ICE.  */
6714       stmt_code = gimple_assign_rhs_code (use->stmt);
6715       if (stmt_code == PLUS_EXPR
6716           || stmt_code == MINUS_EXPR
6717           || stmt_code == POINTER_PLUS_EXPR)
6718         {
6719           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6720             op = gimple_assign_rhs2 (use->stmt);
6721           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6722             op = gimple_assign_rhs1 (use->stmt);
6723         }
6724
6725       if (op != NULL_TREE)
6726         {
6727           if (expr_invariant_in_loop_p (data->current_loop, op))
6728             return;
6729           if (TREE_CODE (op) == SSA_NAME)
6730             {
6731               struct iv *iv = get_iv (data, op);
6732               if (iv != NULL && integer_zerop (iv->step))
6733                 return;
6734             }
6735         }
6736     }
6737
6738   switch (gimple_code (use->stmt))
6739     {
6740     case GIMPLE_PHI:
6741       tgt = PHI_RESULT (use->stmt);
6742
6743       /* If we should keep the biv, do not replace it.  */
6744       if (name_info (data, tgt)->preserve_biv)
6745         return;
6746
6747       bsi = gsi_after_labels (gimple_bb (use->stmt));
6748       break;
6749
6750     case GIMPLE_ASSIGN:
6751       tgt = gimple_assign_lhs (use->stmt);
6752       bsi = gsi_for_stmt (use->stmt);
6753       break;
6754
6755     default:
6756       gcc_unreachable ();
6757     }
6758
6759   aff_tree aff_inv, aff_var;
6760   if (!get_computation_aff_1 (data->current_loop, use->stmt,
6761                               use, cand, &aff_inv, &aff_var))
6762     gcc_unreachable ();
6763
6764   unshare_aff_combination (&aff_inv);
6765   unshare_aff_combination (&aff_var);
6766   /* Prefer CSE opportunity than loop invariant by adding offset at last
6767      so that iv_uses have different offsets can be CSEed.  */
6768   widest_int offset = aff_inv.offset;
6769   aff_inv.offset = 0;
6770
6771   gimple_seq stmt_list = NULL, seq = NULL;
6772   tree comp_op1 = aff_combination_to_tree (&aff_inv);
6773   tree comp_op2 = aff_combination_to_tree (&aff_var);
6774   gcc_assert (comp_op1 && comp_op2);
6775
6776   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6777   gimple_seq_add_seq (&stmt_list, seq);
6778   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6779   gimple_seq_add_seq (&stmt_list, seq);
6780
6781   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6782     std::swap (comp_op1, comp_op2);
6783
6784   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6785     {
6786       comp = fold_build_pointer_plus (comp_op1,
6787                                       fold_convert (sizetype, comp_op2));
6788       comp = fold_build_pointer_plus (comp,
6789                                       wide_int_to_tree (sizetype, offset));
6790     }
6791   else
6792     {
6793       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6794                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
6795       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6796                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6797     }
6798
6799   comp = fold_convert (type, comp);
6800   if (!valid_gimple_rhs_p (comp)
6801       || (gimple_code (use->stmt) != GIMPLE_PHI
6802           /* We can't allow re-allocating the stmt as it might be pointed
6803              to still.  */
6804           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6805               >= gimple_num_ops (gsi_stmt (bsi)))))
6806     {
6807       comp = force_gimple_operand (comp, &seq, true, NULL);
6808       gimple_seq_add_seq (&stmt_list, seq);
6809       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6810         {
6811           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6812           /* As this isn't a plain copy we have to reset alignment
6813              information.  */
6814           if (SSA_NAME_PTR_INFO (comp))
6815             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6816         }
6817     }
6818
6819   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
6820   if (gimple_code (use->stmt) == GIMPLE_PHI)
6821     {
6822       ass = gimple_build_assign (tgt, comp);
6823       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6824
6825       bsi = gsi_for_stmt (use->stmt);
6826       remove_phi_node (&bsi, false);
6827     }
6828   else
6829     {
6830       gimple_assign_set_rhs_from_tree (&bsi, comp);
6831       use->stmt = gsi_stmt (bsi);
6832     }
6833 }
6834
6835 /* Performs a peephole optimization to reorder the iv update statement with
6836    a mem ref to enable instruction combining in later phases. The mem ref uses
6837    the iv value before the update, so the reordering transformation requires
6838    adjustment of the offset. CAND is the selected IV_CAND.
6839
6840    Example:
6841
6842    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6843    iv2 = iv1 + 1;
6844
6845    if (t < val)      (1)
6846      goto L;
6847    goto Head;
6848
6849
6850    directly propagating t over to (1) will introduce overlapping live range
6851    thus increase register pressure. This peephole transform it into:
6852
6853
6854    iv2 = iv1 + 1;
6855    t = MEM_REF (base, iv2, 8, 8);
6856    if (t < val)
6857      goto L;
6858    goto Head;
6859 */
6860
6861 static void
6862 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6863 {
6864   tree var_after;
6865   gimple *iv_update, *stmt;
6866   basic_block bb;
6867   gimple_stmt_iterator gsi, gsi_iv;
6868
6869   if (cand->pos != IP_NORMAL)
6870     return;
6871
6872   var_after = cand->var_after;
6873   iv_update = SSA_NAME_DEF_STMT (var_after);
6874
6875   bb = gimple_bb (iv_update);
6876   gsi = gsi_last_nondebug_bb (bb);
6877   stmt = gsi_stmt (gsi);
6878
6879   /* Only handle conditional statement for now.  */
6880   if (gimple_code (stmt) != GIMPLE_COND)
6881     return;
6882
6883   gsi_prev_nondebug (&gsi);
6884   stmt = gsi_stmt (gsi);
6885   if (stmt != iv_update)
6886     return;
6887
6888   gsi_prev_nondebug (&gsi);
6889   if (gsi_end_p (gsi))
6890     return;
6891
6892   stmt = gsi_stmt (gsi);
6893   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6894     return;
6895
6896   if (stmt != use->stmt)
6897     return;
6898
6899   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6900     return;
6901
6902   if (dump_file && (dump_flags & TDF_DETAILS))
6903     {
6904       fprintf (dump_file, "Reordering \n");
6905       print_gimple_stmt (dump_file, iv_update, 0, 0);
6906       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6907       fprintf (dump_file, "\n");
6908     }
6909
6910   gsi = gsi_for_stmt (use->stmt);
6911   gsi_iv = gsi_for_stmt (iv_update);
6912   gsi_move_before (&gsi_iv, &gsi);
6913
6914   cand->pos = IP_BEFORE_USE;
6915   cand->incremented_at = use->stmt;
6916 }
6917
6918 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6919
6920 static void
6921 rewrite_use_address (struct ivopts_data *data,
6922                      struct iv_use *use, struct iv_cand *cand)
6923 {
6924   aff_tree aff;
6925   bool ok;
6926
6927   adjust_iv_update_pos (cand, use);
6928   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
6929   gcc_assert (ok);
6930   unshare_aff_combination (&aff);
6931
6932   /* To avoid undefined overflow problems, all IV candidates use unsigned
6933      integer types.  The drawback is that this makes it impossible for
6934      create_mem_ref to distinguish an IV that is based on a memory object
6935      from one that represents simply an offset.
6936
6937      To work around this problem, we pass a hint to create_mem_ref that
6938      indicates which variable (if any) in aff is an IV based on a memory
6939      object.  Note that we only consider the candidate.  If this is not
6940      based on an object, the base of the reference is in some subexpression
6941      of the use -- but these will use pointer types, so they are recognized
6942      by the create_mem_ref heuristics anyway.  */
6943   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
6944   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
6945   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6946   tree type = TREE_TYPE (*use->op_p);
6947   unsigned int align = get_object_alignment (*use->op_p);
6948   if (align != TYPE_ALIGN (type))
6949     type = build_aligned_type (type, align);
6950
6951   tree ref = create_mem_ref (&bsi, type, &aff,
6952                              reference_alias_ptr_type (*use->op_p),
6953                              iv, base_hint, data->speed);
6954
6955   copy_ref_info (ref, *use->op_p);
6956   *use->op_p = ref;
6957 }
6958
6959 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6960    candidate CAND.  */
6961
6962 static void
6963 rewrite_use_compare (struct ivopts_data *data,
6964                      struct iv_use *use, struct iv_cand *cand)
6965 {
6966   tree comp, op, bound;
6967   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6968   enum tree_code compare;
6969   struct iv_group *group = data->vgroups[use->group_id];
6970   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
6971
6972   bound = cp->value;
6973   if (bound)
6974     {
6975       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6976       tree var_type = TREE_TYPE (var);
6977       gimple_seq stmts;
6978
6979       if (dump_file && (dump_flags & TDF_DETAILS))
6980         {
6981           fprintf (dump_file, "Replacing exit test: ");
6982           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6983         }
6984       compare = cp->comp;
6985       bound = unshare_expr (fold_convert (var_type, bound));
6986       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6987       if (stmts)
6988         gsi_insert_seq_on_edge_immediate (
6989                 loop_preheader_edge (data->current_loop),
6990                 stmts);
6991
6992       gcond *cond_stmt = as_a <gcond *> (use->stmt);
6993       gimple_cond_set_lhs (cond_stmt, var);
6994       gimple_cond_set_code (cond_stmt, compare);
6995       gimple_cond_set_rhs (cond_stmt, op);
6996       return;
6997     }
6998
6999   /* The induction variable elimination failed; just express the original
7000      giv.  */
7001   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7002   gcc_assert (comp != NULL_TREE);
7003   gcc_assert (use->op_p != NULL);
7004   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7005                                          SSA_NAME_VAR (*use->op_p),
7006                                          true, GSI_SAME_STMT);
7007 }
7008
7009 /* Rewrite the groups using the selected induction variables.  */
7010
7011 static void
7012 rewrite_groups (struct ivopts_data *data)
7013 {
7014   unsigned i, j;
7015
7016   for (i = 0; i < data->vgroups.length (); i++)
7017     {
7018       struct iv_group *group = data->vgroups[i];
7019       struct iv_cand *cand = group->selected;
7020
7021       gcc_assert (cand);
7022
7023       if (group->type == USE_NONLINEAR_EXPR)
7024         {
7025           for (j = 0; j < group->vuses.length (); j++)
7026             {
7027               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7028               update_stmt (group->vuses[j]->stmt);
7029             }
7030         }
7031       else if (group->type == USE_ADDRESS)
7032         {
7033           for (j = 0; j < group->vuses.length (); j++)
7034             {
7035               rewrite_use_address (data, group->vuses[j], cand);
7036               update_stmt (group->vuses[j]->stmt);
7037             }
7038         }
7039       else
7040         {
7041           gcc_assert (group->type == USE_COMPARE);
7042
7043           for (j = 0; j < group->vuses.length (); j++)
7044             {
7045               rewrite_use_compare (data, group->vuses[j], cand);
7046               update_stmt (group->vuses[j]->stmt);
7047             }
7048         }
7049     }
7050 }
7051
7052 /* Removes the ivs that are not used after rewriting.  */
7053
7054 static void
7055 remove_unused_ivs (struct ivopts_data *data)
7056 {
7057   unsigned j;
7058   bitmap_iterator bi;
7059   bitmap toremove = BITMAP_ALLOC (NULL);
7060
7061   /* Figure out an order in which to release SSA DEFs so that we don't
7062      release something that we'd have to propagate into a debug stmt
7063      afterwards.  */
7064   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7065     {
7066       struct version_info *info;
7067
7068       info = ver_info (data, j);
7069       if (info->iv
7070           && !integer_zerop (info->iv->step)
7071           && !info->inv_id
7072           && !info->iv->nonlin_use
7073           && !info->preserve_biv)
7074         {
7075           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7076
7077           tree def = info->iv->ssa_name;
7078
7079           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7080             {
7081               imm_use_iterator imm_iter;
7082               use_operand_p use_p;
7083               gimple *stmt;
7084               int count = 0;
7085
7086               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7087                 {
7088                   if (!gimple_debug_bind_p (stmt))
7089                     continue;
7090
7091                   /* We just want to determine whether to do nothing
7092                      (count == 0), to substitute the computed
7093                      expression into a single use of the SSA DEF by
7094                      itself (count == 1), or to use a debug temp
7095                      because the SSA DEF is used multiple times or as
7096                      part of a larger expression (count > 1). */
7097                   count++;
7098                   if (gimple_debug_bind_get_value (stmt) != def)
7099                     count++;
7100
7101                   if (count > 1)
7102                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7103                 }
7104
7105               if (!count)
7106                 continue;
7107
7108               struct iv_use dummy_use;
7109               struct iv_cand *best_cand = NULL, *cand;
7110               unsigned i, best_pref = 0, cand_pref;
7111
7112               memset (&dummy_use, 0, sizeof (dummy_use));
7113               dummy_use.iv = info->iv;
7114               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7115                 {
7116                   cand = data->vgroups[i]->selected;
7117                   if (cand == best_cand)
7118                     continue;
7119                   cand_pref = operand_equal_p (cand->iv->step,
7120                                                info->iv->step, 0)
7121                     ? 4 : 0;
7122                   cand_pref
7123                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7124                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7125                     ? 2 : 0;
7126                   cand_pref
7127                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7128                     ? 1 : 0;
7129                   if (best_cand == NULL || best_pref < cand_pref)
7130                     {
7131                       best_cand = cand;
7132                       best_pref = cand_pref;
7133                     }
7134                 }
7135
7136               if (!best_cand)
7137                 continue;
7138
7139               tree comp = get_computation_at (data->current_loop,
7140                                               SSA_NAME_DEF_STMT (def),
7141                                               &dummy_use, best_cand);
7142               if (!comp)
7143                 continue;
7144
7145               if (count > 1)
7146                 {
7147                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7148                   DECL_ARTIFICIAL (vexpr) = 1;
7149                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7150                   if (SSA_NAME_VAR (def))
7151                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7152                   else
7153                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7154                   gdebug *def_temp
7155                     = gimple_build_debug_bind (vexpr, comp, NULL);
7156                   gimple_stmt_iterator gsi;
7157
7158                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7159                     gsi = gsi_after_labels (gimple_bb
7160                                             (SSA_NAME_DEF_STMT (def)));
7161                   else
7162                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7163
7164                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7165                   comp = vexpr;
7166                 }
7167
7168               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7169                 {
7170                   if (!gimple_debug_bind_p (stmt))
7171                     continue;
7172
7173                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7174                     SET_USE (use_p, comp);
7175
7176                   update_stmt (stmt);
7177                 }
7178             }
7179         }
7180     }
7181
7182   release_defs_bitset (toremove);
7183
7184   BITMAP_FREE (toremove);
7185 }
7186
7187 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7188    for hash_map::traverse.  */
7189
7190 bool
7191 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7192 {
7193   free (value);
7194   return true;
7195 }
7196
7197 /* Frees data allocated by the optimization of a single loop.  */
7198
7199 static void
7200 free_loop_data (struct ivopts_data *data)
7201 {
7202   unsigned i, j;
7203   bitmap_iterator bi;
7204   tree obj;
7205
7206   if (data->niters)
7207     {
7208       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7209       delete data->niters;
7210       data->niters = NULL;
7211     }
7212
7213   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7214     {
7215       struct version_info *info;
7216
7217       info = ver_info (data, i);
7218       info->iv = NULL;
7219       info->has_nonlin_use = false;
7220       info->preserve_biv = false;
7221       info->inv_id = 0;
7222     }
7223   bitmap_clear (data->relevant);
7224   bitmap_clear (data->important_candidates);
7225
7226   for (i = 0; i < data->vgroups.length (); i++)
7227     {
7228       struct iv_group *group = data->vgroups[i];
7229
7230       for (j = 0; j < group->vuses.length (); j++)
7231         free (group->vuses[j]);
7232       group->vuses.release ();
7233
7234       BITMAP_FREE (group->related_cands);
7235       for (j = 0; j < group->n_map_members; j++)
7236         {
7237           if (group->cost_map[j].inv_vars)
7238             BITMAP_FREE (group->cost_map[j].inv_vars);
7239           if (group->cost_map[j].inv_exprs)
7240             BITMAP_FREE (group->cost_map[j].inv_exprs);
7241         }
7242
7243       free (group->cost_map);
7244       free (group);
7245     }
7246   data->vgroups.truncate (0);
7247
7248   for (i = 0; i < data->vcands.length (); i++)
7249     {
7250       struct iv_cand *cand = data->vcands[i];
7251
7252       if (cand->inv_vars)
7253         BITMAP_FREE (cand->inv_vars);
7254       if (cand->inv_exprs)
7255         BITMAP_FREE (cand->inv_exprs);
7256       free (cand);
7257     }
7258   data->vcands.truncate (0);
7259
7260   if (data->version_info_size < num_ssa_names)
7261     {
7262       data->version_info_size = 2 * num_ssa_names;
7263       free (data->version_info);
7264       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7265     }
7266
7267   data->max_inv_var_id = 0;
7268   data->max_inv_expr_id = 0;
7269
7270   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7271     SET_DECL_RTL (obj, NULL_RTX);
7272
7273   decl_rtl_to_reset.truncate (0);
7274
7275   data->inv_expr_tab->empty ();
7276
7277   data->iv_common_cand_tab->empty ();
7278   data->iv_common_cands.truncate (0);
7279 }
7280
7281 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7282    loop tree.  */
7283
7284 static void
7285 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7286 {
7287   free_loop_data (data);
7288   free (data->version_info);
7289   BITMAP_FREE (data->relevant);
7290   BITMAP_FREE (data->important_candidates);
7291
7292   decl_rtl_to_reset.release ();
7293   data->vgroups.release ();
7294   data->vcands.release ();
7295   delete data->inv_expr_tab;
7296   data->inv_expr_tab = NULL;
7297   free_affine_expand_cache (&data->name_expansion_cache);
7298   delete data->iv_common_cand_tab;
7299   data->iv_common_cand_tab = NULL;
7300   data->iv_common_cands.release ();
7301   obstack_free (&data->iv_obstack, NULL);
7302 }
7303
7304 /* Returns true if the loop body BODY includes any function calls.  */
7305
7306 static bool
7307 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7308 {
7309   gimple_stmt_iterator gsi;
7310   unsigned i;
7311
7312   for (i = 0; i < num_nodes; i++)
7313     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7314       {
7315         gimple *stmt = gsi_stmt (gsi);
7316         if (is_gimple_call (stmt)
7317             && !gimple_call_internal_p (stmt)
7318             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7319           return true;
7320       }
7321   return false;
7322 }
7323
7324 /* Optimizes the LOOP.  Returns true if anything changed.  */
7325
7326 static bool
7327 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7328 {
7329   bool changed = false;
7330   struct iv_ca *iv_ca;
7331   edge exit = single_dom_exit (loop);
7332   basic_block *body;
7333
7334   gcc_assert (!data->niters);
7335   data->current_loop = loop;
7336   data->loop_loc = find_loop_location (loop);
7337   data->speed = optimize_loop_for_speed_p (loop);
7338
7339   if (dump_file && (dump_flags & TDF_DETAILS))
7340     {
7341       fprintf (dump_file, "Processing loop %d", loop->num);
7342       if (data->loop_loc != UNKNOWN_LOCATION)
7343         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7344                  LOCATION_LINE (data->loop_loc));
7345       fprintf (dump_file, "\n");
7346
7347       if (exit)
7348         {
7349           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7350                    exit->src->index, exit->dest->index);
7351           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7352           fprintf (dump_file, "\n");
7353         }
7354
7355       fprintf (dump_file, "\n");
7356     }
7357
7358   body = get_loop_body (loop);
7359   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7360   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7361   free (body);
7362
7363   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7364
7365   /* For each ssa name determines whether it behaves as an induction variable
7366      in some loop.  */
7367   if (!find_induction_variables (data))
7368     goto finish;
7369
7370   /* Finds interesting uses (item 1).  */
7371   find_interesting_uses (data);
7372   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7373     goto finish;
7374
7375   /* Finds candidates for the induction variables (item 2).  */
7376   find_iv_candidates (data);
7377
7378   /* Calculates the costs (item 3, part 1).  */
7379   determine_iv_costs (data);
7380   determine_group_iv_costs (data);
7381   determine_set_costs (data);
7382
7383   /* Find the optimal set of induction variables (item 3, part 2).  */
7384   iv_ca = find_optimal_iv_set (data);
7385   if (!iv_ca)
7386     goto finish;
7387   changed = true;
7388
7389   /* Create the new induction variables (item 4, part 1).  */
7390   create_new_ivs (data, iv_ca);
7391   iv_ca_free (&iv_ca);
7392
7393   /* Rewrite the uses (item 4, part 2).  */
7394   rewrite_groups (data);
7395
7396   /* Remove the ivs that are unused after rewriting.  */
7397   remove_unused_ivs (data);
7398
7399   /* We have changed the structure of induction variables; it might happen
7400      that definitions in the scev database refer to some of them that were
7401      eliminated.  */
7402   scev_reset ();
7403
7404 finish:
7405   free_loop_data (data);
7406
7407   return changed;
7408 }
7409
7410 /* Main entry point.  Optimizes induction variables in loops.  */
7411
7412 void
7413 tree_ssa_iv_optimize (void)
7414 {
7415   struct loop *loop;
7416   struct ivopts_data data;
7417
7418   tree_ssa_iv_optimize_init (&data);
7419
7420   /* Optimize the loops starting with the innermost ones.  */
7421   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7422     {
7423       if (dump_file && (dump_flags & TDF_DETAILS))
7424         flow_loop_dump (loop, dump_file, NULL, 1);
7425
7426       tree_ssa_iv_optimize_loop (&data, loop);
7427     }
7428
7429   tree_ssa_iv_optimize_finalize (&data);
7430 }
7431
7432 #include "gt-tree-ssa-loop-ivopts.h"