gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2019 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.
  68
  69    For the targets supporting low-overhead loops, IVOPTs has to take care of
  70    the loops which will probably be transformed in RTL doloop optimization,
  71    to try to make selected IV candidate set optimal.  The process of doloop
  72    support includes:
  73
  74    1) Analyze the current loop will be transformed to doloop or not, find and
  75       mark its compare type IV use as doloop use (iv_group field doloop_p), and
  76       set flag doloop_use_p of ivopts_data to notify subsequent processings on
  77       doloop.  See analyze_and_mark_doloop_use and its callees for the details.
  78       The target hook predict_doloop_p can be used for target specific checks.
  79
  80    2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
  81       set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
  82       like biv.  For cost determination between doloop IV cand and IV use, the
  83       target hooks doloop_cost_for_generic and doloop_cost_for_address are
  84       provided to add on extra costs for generic type and address type IV use.
  85       Zero cost is assigned to the pair between doloop IV cand and doloop IV
  86       use, and bound zero is set for IV elimination.
  87
  88    3) With the cost setting in step 2), the current cost model based IV
  89       selection algorithm will process as usual, pick up doloop dedicated IV if
  90       profitable.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "rtl.h"
  97 #include "tree.h"
  98 #include "gimple.h"
  99 #include "cfghooks.h"
 100 #include "tree-pass.h"
 101 #include "memmodel.h"
 102 #include "tm_p.h"
 103 #include "ssa.h"
 104 #include "expmed.h"
 105 #include "insn-config.h"
 106 #include "emit-rtl.h"
 107 #include "recog.h"
 108 #include "cgraph.h"
 109 #include "gimple-pretty-print.h"
 110 #include "alias.h"
 111 #include "fold-const.h"
 112 #include "stor-layout.h"
 113 #include "tree-eh.h"
 114 #include "gimplify.h"
 115 #include "gimple-iterator.h"
 116 #include "gimplify-me.h"
 117 #include "tree-cfg.h"
 118 #include "tree-ssa-loop-ivopts.h"
 119 #include "tree-ssa-loop-manip.h"
 120 #include "tree-ssa-loop-niter.h"
 121 #include "tree-ssa-loop.h"
 122 #include "explow.h"
 123 #include "expr.h"
 124 #include "tree-dfa.h"
 125 #include "tree-ssa.h"
 126 #include "cfgloop.h"
 127 #include "tree-scalar-evolution.h"
 128 #include "params.h"
 129 #include "tree-affine.h"
 130 #include "tree-ssa-propagate.h"
 131 #include "tree-ssa-address.h"
 132 #include "builtins.h"
 133 #include "tree-vectorizer.h"
 134
 135 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 136    cost of different addressing modes.  This should be moved to a TBD
 137    interface between the GIMPLE and RTL worlds.  */
 138
 139 /* The infinite cost.  */
 140 #define INFTY 1000000000
 141
 142 /* Returns the expected number of loop iterations for LOOP.
 143    The average trip count is computed from profile data if it
 144    exists. */
 145
 146 static inline HOST_WIDE_INT
 147 avg_loop_niter (class loop *loop)
 148 {
 149   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 150   if (niter == -1)
 151     {
 152       niter = likely_max_stmt_executions_int (loop);
 153
 154       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
 155         return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 156     }
 157
 158   return niter;
 159 }
 160
 161 struct iv_use;
 162
 163 /* Representation of the induction variable.  */
 164 struct iv
 165 {
 166   tree base;            /* Initial value of the iv.  */
 167   tree base_object;     /* A memory object to that the induction variable points.  */
 168   tree step;            /* Step of the iv (constant only).  */
 169   tree ssa_name;        /* The ssa name with the value.  */
 170   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 171   bool biv_p;           /* Is it a biv?  */
 172   bool no_overflow;     /* True if the iv doesn't overflow.  */
 173   bool have_address_use;/* For biv, indicate if it's used in any address
 174                            type use.  */
 175 };
 176
 177 /* Per-ssa version information (induction variable descriptions, etc.).  */
 178 struct version_info
 179 {
 180   tree name;            /* The ssa name.  */
 181   struct iv *iv;        /* Induction variable description.  */
 182   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 183                            an expression that is not an induction variable.  */
 184   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 185   unsigned inv_id;      /* Id of an invariant.  */
 186 };
 187
 188 /* Types of uses.  */
 189 enum use_type
 190 {
 191   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 192   USE_REF_ADDRESS,      /* Use is an address for an explicit memory
 193                            reference.  */
 194   USE_PTR_ADDRESS,      /* Use is a pointer argument to a function in
 195                            cases where the expansion of the function
 196                            will turn the argument into a normal address.  */
 197   USE_COMPARE           /* Use is a compare.  */
 198 };
 199
 200 /* Cost of a computation.  */
 201 class comp_cost
 202 {
 203 public:
 204   comp_cost (): cost (0), complexity (0), scratch (0)
 205   {}
 206
 207   comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
 208     : cost (cost), complexity (complexity), scratch (scratch)
 209   {}
 210
 211   /* Returns true if COST is infinite.  */
 212   bool infinite_cost_p ();
 213
 214   /* Adds costs COST1 and COST2.  */
 215   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 216
 217   /* Adds COST to the comp_cost.  */
 218   comp_cost operator+= (comp_cost cost);
 219
 220   /* Adds constant C to this comp_cost.  */
 221   comp_cost operator+= (HOST_WIDE_INT c);
 222
 223   /* Subtracts constant C to this comp_cost.  */
 224   comp_cost operator-= (HOST_WIDE_INT c);
 225
 226   /* Divide the comp_cost by constant C.  */
 227   comp_cost operator/= (HOST_WIDE_INT c);
 228
 229   /* Multiply the comp_cost by constant C.  */
 230   comp_cost operator*= (HOST_WIDE_INT c);
 231
 232   /* Subtracts costs COST1 and COST2.  */
 233   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 234
 235   /* Subtracts COST from this comp_cost.  */
 236   comp_cost operator-= (comp_cost cost);
 237
 238   /* Returns true if COST1 is smaller than COST2.  */
 239   friend bool operator< (comp_cost cost1, comp_cost cost2);
 240
 241   /* Returns true if COST1 and COST2 are equal.  */
 242   friend bool operator== (comp_cost cost1, comp_cost cost2);
 243
 244   /* Returns true if COST1 is smaller or equal than COST2.  */
 245   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 246
 247   int64_t cost;         /* The runtime cost.  */
 248   unsigned complexity;  /* The estimate of the complexity of the code for
 249                            the computation (in no concrete units --
 250                            complexity field should be larger for more
 251                            complex expressions and addressing modes).  */
 252   int64_t scratch;      /* Scratch used during cost computation.  */
 253 };
 254
 255 static const comp_cost no_cost;
 256 static const comp_cost infinite_cost (INFTY, 0, INFTY);
 257
 258 bool
 259 comp_cost::infinite_cost_p ()
 260 {
 261   return cost == INFTY;
 262 }
 263
 264 comp_cost
 265 operator+ (comp_cost cost1, comp_cost cost2)
 266 {
 267   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 268     return infinite_cost;
 269
 270   gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
 271   cost1.cost += cost2.cost;
 272   cost1.complexity += cost2.complexity;
 273
 274   return cost1;
 275 }
 276
 277 comp_cost
 278 operator- (comp_cost cost1, comp_cost cost2)
 279 {
 280   if (cost1.infinite_cost_p ())
 281     return infinite_cost;
 282
 283   gcc_assert (!cost2.infinite_cost_p ());
 284   gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
 285
 286   cost1.cost -= cost2.cost;
 287   cost1.complexity -= cost2.complexity;
 288
 289   return cost1;
 290 }
 291
 292 comp_cost
 293 comp_cost::operator+= (comp_cost cost)
 294 {
 295   *this = *this + cost;
 296   return *this;
 297 }
 298
 299 comp_cost
 300 comp_cost::operator+= (HOST_WIDE_INT c)
 301 {
 302   if (c >= INFTY)
 303     this->cost = INFTY;
 304
 305   if (infinite_cost_p ())
 306     return *this;
 307
 308   gcc_assert (this->cost + c < infinite_cost.cost);
 309   this->cost += c;
 310
 311   return *this;
 312 }
 313
 314 comp_cost
 315 comp_cost::operator-= (HOST_WIDE_INT c)
 316 {
 317   if (infinite_cost_p ())
 318     return *this;
 319
 320   gcc_assert (this->cost - c < infinite_cost.cost);
 321   this->cost -= c;
 322
 323   return *this;
 324 }
 325
 326 comp_cost
 327 comp_cost::operator/= (HOST_WIDE_INT c)
 328 {
 329   gcc_assert (c != 0);
 330   if (infinite_cost_p ())
 331     return *this;
 332
 333   this->cost /= c;
 334
 335   return *this;
 336 }
 337
 338 comp_cost
 339 comp_cost::operator*= (HOST_WIDE_INT c)
 340 {
 341   if (infinite_cost_p ())
 342     return *this;
 343
 344   gcc_assert (this->cost * c < infinite_cost.cost);
 345   this->cost *= c;
 346
 347   return *this;
 348 }
 349
 350 comp_cost
 351 comp_cost::operator-= (comp_cost cost)
 352 {
 353   *this = *this - cost;
 354   return *this;
 355 }
 356
 357 bool
 358 operator< (comp_cost cost1, comp_cost cost2)
 359 {
 360   if (cost1.cost == cost2.cost)
 361     return cost1.complexity < cost2.complexity;
 362
 363   return cost1.cost < cost2.cost;
 364 }
 365
 366 bool
 367 operator== (comp_cost cost1, comp_cost cost2)
 368 {
 369   return cost1.cost == cost2.cost
 370     && cost1.complexity == cost2.complexity;
 371 }
 372
 373 bool
 374 operator<= (comp_cost cost1, comp_cost cost2)
 375 {
 376   return cost1 < cost2 || cost1 == cost2;
 377 }
 378
 379 struct iv_inv_expr_ent;
 380
 381 /* The candidate - cost pair.  */
 382 class cost_pair
 383 {
 384 public:
 385   struct iv_cand *cand; /* The candidate.  */
 386   comp_cost cost;       /* The cost.  */
 387   enum tree_code comp;  /* For iv elimination, the comparison.  */
 388   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 389                            preserved when representing iv_use with iv_cand.  */
 390   bitmap inv_exprs;     /* The list of newly created invariant expressions
 391                            when representing iv_use with iv_cand.  */
 392   tree value;           /* For final value elimination, the expression for
 393                            the final value of the iv.  For iv elimination,
 394                            the new bound to compare with.  */
 395 };
 396
 397 /* Use.  */
 398 struct iv_use
 399 {
 400   unsigned id;          /* The id of the use.  */
 401   unsigned group_id;    /* The group id the use belongs to.  */
 402   enum use_type type;   /* Type of the use.  */
 403   tree mem_type;        /* The memory type to use when testing whether an
 404                            address is legitimate, and what the address's
 405                            cost is.  */
 406   struct iv *iv;        /* The induction variable it is based on.  */
 407   gimple *stmt;         /* Statement in that it occurs.  */
 408   tree *op_p;           /* The place where it occurs.  */
 409
 410   tree addr_base;       /* Base address with const offset stripped.  */
 411   poly_uint64_pod addr_offset;
 412                         /* Const offset stripped from base address.  */
 413 };
 414
 415 /* Group of uses.  */
 416 struct iv_group
 417 {
 418   /* The id of the group.  */
 419   unsigned id;
 420   /* Uses of the group are of the same type.  */
 421   enum use_type type;
 422   /* The set of "related" IV candidates, plus the important ones.  */
 423   bitmap related_cands;
 424   /* Number of IV candidates in the cost_map.  */
 425   unsigned n_map_members;
 426   /* The costs wrto the iv candidates.  */
 427   class cost_pair *cost_map;
 428   /* The selected candidate for the group.  */
 429   struct iv_cand *selected;
 430   /* To indicate this is a doloop use group.  */
 431   bool doloop_p;
 432   /* Uses in the group.  */
 433   vec<struct iv_use *> vuses;
 434 };
 435
 436 /* The position where the iv is computed.  */
 437 enum iv_position
 438 {
 439   IP_NORMAL,            /* At the end, just before the exit condition.  */
 440   IP_END,               /* At the end of the latch block.  */
 441   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 442   IP_AFTER_USE,         /* Immediately after a specific use.  */
 443   IP_ORIGINAL           /* The original biv.  */
 444 };
 445
 446 /* The induction variable candidate.  */
 447 struct iv_cand
 448 {
 449   unsigned id;          /* The number of the candidate.  */
 450   bool important;       /* Whether this is an "important" candidate, i.e. such
 451                            that it should be considered by all uses.  */
 452   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 453   gimple *incremented_at;/* For original biv, the statement where it is
 454                            incremented.  */
 455   tree var_before;      /* The variable used for it before increment.  */
 456   tree var_after;       /* The variable used for it after increment.  */
 457   struct iv *iv;        /* The value of the candidate.  NULL for
 458                            "pseudocandidate" used to indicate the possibility
 459                            to replace the final value of an iv by direct
 460                            computation of the value.  */
 461   unsigned cost;        /* Cost of the candidate.  */
 462   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 463   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 464                               where it is incremented.  */
 465   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 466                            iv_cand.  */
 467   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 468                            hanlde it as a new invariant expression which will
 469                            be hoisted out of loop.  */
 470   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 471                            smaller type.  */
 472   bool doloop_p;        /* Whether this is a doloop candidate.  */
 473 };
 474
 475 /* Hashtable entry for common candidate derived from iv uses.  */
 476 class iv_common_cand
 477 {
 478 public:
 479   tree base;
 480   tree step;
 481   /* IV uses from which this common candidate is derived.  */
 482   auto_vec<struct iv_use *> uses;
 483   hashval_t hash;
 484 };
 485
 486 /* Hashtable helpers.  */
 487
 488 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 489 {
 490   static inline hashval_t hash (const iv_common_cand *);
 491   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 492 };
 493
 494 /* Hash function for possible common candidates.  */
 495
 496 inline hashval_t
 497 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 498 {
 499   return ccand->hash;
 500 }
 501
 502 /* Hash table equality function for common candidates.  */
 503
 504 inline bool
 505 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 506                               const iv_common_cand *ccand2)
 507 {
 508   return (ccand1->hash == ccand2->hash
 509           && operand_equal_p (ccand1->base, ccand2->base, 0)
 510           && operand_equal_p (ccand1->step, ccand2->step, 0)
 511           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 512               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 513 }
 514
 515 /* Loop invariant expression hashtable entry.  */
 516
 517 struct iv_inv_expr_ent
 518 {
 519   /* Tree expression of the entry.  */
 520   tree expr;
 521   /* Unique indentifier.  */
 522   int id;
 523   /* Hash value.  */
 524   hashval_t hash;
 525 };
 526
 527 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 528
 529 static int
 530 sort_iv_inv_expr_ent (const void *a, const void *b)
 531 {
 532   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 533   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 534
 535   unsigned id1 = (*e1)->id;
 536   unsigned id2 = (*e2)->id;
 537
 538   if (id1 < id2)
 539     return -1;
 540   else if (id1 > id2)
 541     return 1;
 542   else
 543     return 0;
 544 }
 545
 546 /* Hashtable helpers.  */
 547
 548 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 549 {
 550   static inline hashval_t hash (const iv_inv_expr_ent *);
 551   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 552 };
 553
 554 /* Return true if uses of type TYPE represent some form of address.  */
 555
 556 inline bool
 557 address_p (use_type type)
 558 {
 559   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
 560 }
 561
 562 /* Hash function for loop invariant expressions.  */
 563
 564 inline hashval_t
 565 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 566 {
 567   return expr->hash;
 568 }
 569
 570 /* Hash table equality function for expressions.  */
 571
 572 inline bool
 573 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 574                            const iv_inv_expr_ent *expr2)
 575 {
 576   return expr1->hash == expr2->hash
 577          && operand_equal_p (expr1->expr, expr2->expr, 0);
 578 }
 579
 580 struct ivopts_data
 581 {
 582   /* The currently optimized loop.  */
 583   class loop *current_loop;
 584   location_t loop_loc;
 585
 586   /* Numbers of iterations for all exits of the current loop.  */
 587   hash_map<edge, tree_niter_desc *> *niters;
 588
 589   /* Number of registers used in it.  */
 590   unsigned regs_used;
 591
 592   /* The size of version_info array allocated.  */
 593   unsigned version_info_size;
 594
 595   /* The array of information for the ssa names.  */
 596   struct version_info *version_info;
 597
 598   /* The hashtable of loop invariant expressions created
 599      by ivopt.  */
 600   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 601
 602   /* The bitmap of indices in version_info whose value was changed.  */
 603   bitmap relevant;
 604
 605   /* The uses of induction variables.  */
 606   vec<iv_group *> vgroups;
 607
 608   /* The candidates.  */
 609   vec<iv_cand *> vcands;
 610
 611   /* A bitmap of important candidates.  */
 612   bitmap important_candidates;
 613
 614   /* Cache used by tree_to_aff_combination_expand.  */
 615   hash_map<tree, name_expansion *> *name_expansion_cache;
 616
 617   /* The hashtable of common candidates derived from iv uses.  */
 618   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 619
 620   /* The common candidates.  */
 621   vec<iv_common_cand *> iv_common_cands;
 622
 623   /* Hash map recording base object information of tree exp.  */
 624   hash_map<tree, tree> *base_object_map;
 625
 626   /* The maximum invariant variable id.  */
 627   unsigned max_inv_var_id;
 628
 629   /* The maximum invariant expression id.  */
 630   unsigned max_inv_expr_id;
 631
 632   /* Number of no_overflow BIVs which are not used in memory address.  */
 633   unsigned bivs_not_used_in_addr;
 634
 635   /* Obstack for iv structure.  */
 636   struct obstack iv_obstack;
 637
 638   /* Whether to consider just related and important candidates when replacing a
 639      use.  */
 640   bool consider_all_candidates;
 641
 642   /* Are we optimizing for speed?  */
 643   bool speed;
 644
 645   /* Whether the loop body includes any function calls.  */
 646   bool body_includes_call;
 647
 648   /* Whether the loop body can only be exited via single exit.  */
 649   bool loop_single_exit_p;
 650
 651   /* Whether the loop has doloop comparison use.  */
 652   bool doloop_use_p;
 653 };
 654
 655 /* An assignment of iv candidates to uses.  */
 656
 657 class iv_ca
 658 {
 659 public:
 660   /* The number of uses covered by the assignment.  */
 661   unsigned upto;
 662
 663   /* Number of uses that cannot be expressed by the candidates in the set.  */
 664   unsigned bad_groups;
 665
 666   /* Candidate assigned to a use, together with the related costs.  */
 667   class cost_pair **cand_for_group;
 668
 669   /* Number of times each candidate is used.  */
 670   unsigned *n_cand_uses;
 671
 672   /* The candidates used.  */
 673   bitmap cands;
 674
 675   /* The number of candidates in the set.  */
 676   unsigned n_cands;
 677
 678   /* The number of invariants needed, including both invariant variants and
 679      invariant expressions.  */
 680   unsigned n_invs;
 681
 682   /* Total cost of expressing uses.  */
 683   comp_cost cand_use_cost;
 684
 685   /* Total cost of candidates.  */
 686   int64_t cand_cost;
 687
 688   /* Number of times each invariant variable is used.  */
 689   unsigned *n_inv_var_uses;
 690
 691   /* Number of times each invariant expression is used.  */
 692   unsigned *n_inv_expr_uses;
 693
 694   /* Total cost of the assignment.  */
 695   comp_cost cost;
 696 };
 697
 698 /* Difference of two iv candidate assignments.  */
 699
 700 struct iv_ca_delta
 701 {
 702   /* Changed group.  */
 703   struct iv_group *group;
 704
 705   /* An old assignment (for rollback purposes).  */
 706   class cost_pair *old_cp;
 707
 708   /* A new assignment.  */
 709   class cost_pair *new_cp;
 710
 711   /* Next change in the list.  */
 712   struct iv_ca_delta *next;
 713 };
 714
 715 /* Bound on number of candidates below that all candidates are considered.  */
 716
 717 #define CONSIDER_ALL_CANDIDATES_BOUND \
 718   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 719
 720 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 721    optimizing such a loop would help, and it would take ages).  */
 722
 723 #define MAX_CONSIDERED_GROUPS \
 724   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 725
 726 /* If there are at most this number of ivs in the set, try removing unnecessary
 727    ivs from the set always.  */
 728
 729 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 730   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 731
 732 /* The list of trees for that the decl_rtl field must be reset is stored
 733    here.  */
 734
 735 static vec<tree> decl_rtl_to_reset;
 736
 737 static comp_cost force_expr_to_var_cost (tree, bool);
 738
 739 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 740
 741 edge
 742 single_dom_exit (class loop *loop)
 743 {
 744   edge exit = single_exit (loop);
 745
 746   if (!exit)
 747     return NULL;
 748
 749   if (!just_once_each_iteration_p (loop, exit->src))
 750     return NULL;
 751
 752   return exit;
 753 }
 754
 755 /* Dumps information about the induction variable IV to FILE.  Don't dump
 756    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 757    preceding spaces indicated by INDENT_LEVEL.  */
 758
 759 void
 760 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 761 {
 762   const char *p;
 763   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 764
 765   if (indent_level > 4)
 766     indent_level = 4;
 767   p = spaces + 8 - (indent_level << 1);
 768
 769   fprintf (file, "%sIV struct:\n", p);
 770   if (iv->ssa_name && dump_name)
 771     {
 772       fprintf (file, "%s  SSA_NAME:\t", p);
 773       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 774       fprintf (file, "\n");
 775     }
 776
 777   fprintf (file, "%s  Type:\t", p);
 778   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 779   fprintf (file, "\n");
 780
 781   fprintf (file, "%s  Base:\t", p);
 782   print_generic_expr (file, iv->base, TDF_SLIM);
 783   fprintf (file, "\n");
 784
 785   fprintf (file, "%s  Step:\t", p);
 786   print_generic_expr (file, iv->step, TDF_SLIM);
 787   fprintf (file, "\n");
 788
 789   if (iv->base_object)
 790     {
 791       fprintf (file, "%s  Object:\t", p);
 792       print_generic_expr (file, iv->base_object, TDF_SLIM);
 793       fprintf (file, "\n");
 794     }
 795
 796   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 797
 798   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 799            p, iv->no_overflow ? "No-overflow" : "Overflow");
 800 }
 801
 802 /* Dumps information about the USE to FILE.  */
 803
 804 void
 805 dump_use (FILE *file, struct iv_use *use)
 806 {
 807   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 808   fprintf (file, "    At stmt:\t");
 809   print_gimple_stmt (file, use->stmt, 0);
 810   fprintf (file, "    At pos:\t");
 811   if (use->op_p)
 812     print_generic_expr (file, *use->op_p, TDF_SLIM);
 813   fprintf (file, "\n");
 814   dump_iv (file, use->iv, false, 2);
 815 }
 816
 817 /* Dumps information about the uses to FILE.  */
 818
 819 void
 820 dump_groups (FILE *file, struct ivopts_data *data)
 821 {
 822   unsigned i, j;
 823   struct iv_group *group;
 824
 825   for (i = 0; i < data->vgroups.length (); i++)
 826     {
 827       group = data->vgroups[i];
 828       fprintf (file, "Group %d:\n", group->id);
 829       if (group->type == USE_NONLINEAR_EXPR)
 830         fprintf (file, "  Type:\tGENERIC\n");
 831       else if (group->type == USE_REF_ADDRESS)
 832         fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
 833       else if (group->type == USE_PTR_ADDRESS)
 834         fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
 835       else
 836         {
 837           gcc_assert (group->type == USE_COMPARE);
 838           fprintf (file, "  Type:\tCOMPARE\n");
 839         }
 840       for (j = 0; j < group->vuses.length (); j++)
 841         dump_use (file, group->vuses[j]);
 842     }
 843 }
 844
 845 /* Dumps information about induction variable candidate CAND to FILE.  */
 846
 847 void
 848 dump_cand (FILE *file, struct iv_cand *cand)
 849 {
 850   struct iv *iv = cand->iv;
 851
 852   fprintf (file, "Candidate %d:\n", cand->id);
 853   if (cand->inv_vars)
 854     {
 855       fprintf (file, "  Depend on inv.vars: ");
 856       dump_bitmap (file, cand->inv_vars);
 857     }
 858   if (cand->inv_exprs)
 859     {
 860       fprintf (file, "  Depend on inv.exprs: ");
 861       dump_bitmap (file, cand->inv_exprs);
 862     }
 863
 864   if (cand->var_before)
 865     {
 866       fprintf (file, "  Var befor: ");
 867       print_generic_expr (file, cand->var_before, TDF_SLIM);
 868       fprintf (file, "\n");
 869     }
 870   if (cand->var_after)
 871     {
 872       fprintf (file, "  Var after: ");
 873       print_generic_expr (file, cand->var_after, TDF_SLIM);
 874       fprintf (file, "\n");
 875     }
 876
 877   switch (cand->pos)
 878     {
 879     case IP_NORMAL:
 880       fprintf (file, "  Incr POS: before exit test\n");
 881       break;
 882
 883     case IP_BEFORE_USE:
 884       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 885       break;
 886
 887     case IP_AFTER_USE:
 888       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 889       break;
 890
 891     case IP_END:
 892       fprintf (file, "  Incr POS: at end\n");
 893       break;
 894
 895     case IP_ORIGINAL:
 896       fprintf (file, "  Incr POS: orig biv\n");
 897       break;
 898     }
 899
 900   dump_iv (file, iv, false, 1);
 901 }
 902
 903 /* Returns the info for ssa version VER.  */
 904
 905 static inline struct version_info *
 906 ver_info (struct ivopts_data *data, unsigned ver)
 907 {
 908   return data->version_info + ver;
 909 }
 910
 911 /* Returns the info for ssa name NAME.  */
 912
 913 static inline struct version_info *
 914 name_info (struct ivopts_data *data, tree name)
 915 {
 916   return ver_info (data, SSA_NAME_VERSION (name));
 917 }
 918
 919 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 920    emitted in LOOP.  */
 921
 922 static bool
 923 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
 924 {
 925   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 926
 927   gcc_assert (bb);
 928
 929   if (sbb == loop->latch)
 930     return true;
 931
 932   if (sbb != bb)
 933     return false;
 934
 935   return stmt == last_stmt (bb);
 936 }
 937
 938 /* Returns true if STMT if after the place where the original induction
 939    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 940    if the positions are identical.  */
 941
 942 static bool
 943 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 944 {
 945   basic_block cand_bb = gimple_bb (cand->incremented_at);
 946   basic_block stmt_bb = gimple_bb (stmt);
 947
 948   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 949     return false;
 950
 951   if (stmt_bb != cand_bb)
 952     return true;
 953
 954   if (true_if_equal
 955       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 956     return true;
 957   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 958 }
 959
 960 /* Returns true if STMT if after the place where the induction variable
 961    CAND is incremented in LOOP.  */
 962
 963 static bool
 964 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
 965 {
 966   switch (cand->pos)
 967     {
 968     case IP_END:
 969       return false;
 970
 971     case IP_NORMAL:
 972       return stmt_after_ip_normal_pos (loop, stmt);
 973
 974     case IP_ORIGINAL:
 975     case IP_AFTER_USE:
 976       return stmt_after_inc_pos (cand, stmt, false);
 977
 978     case IP_BEFORE_USE:
 979       return stmt_after_inc_pos (cand, stmt, true);
 980
 981     default:
 982       gcc_unreachable ();
 983     }
 984 }
 985
 986 /* walk_tree callback for contains_abnormal_ssa_name_p.  */
 987
 988 static tree
 989 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
 990 {
 991   if (TREE_CODE (*tp) == SSA_NAME
 992       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
 993     return *tp;
 994
 995   if (!EXPR_P (*tp))
 996     *walk_subtrees = 0;
 997
 998   return NULL_TREE;
 999 }
1000
1001 /* Returns true if EXPR contains a ssa name that occurs in an
1002    abnormal phi node.  */
1003
1004 bool
1005 contains_abnormal_ssa_name_p (tree expr)
1006 {
1007   return walk_tree_without_duplicates
1008            (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1009 }
1010
1011 /*  Returns the structure describing number of iterations determined from
1012     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1013
1014 static class tree_niter_desc *
1015 niter_for_exit (struct ivopts_data *data, edge exit)
1016 {
1017   class tree_niter_desc *desc;
1018   tree_niter_desc **slot;
1019
1020   if (!data->niters)
1021     {
1022       data->niters = new hash_map<edge, tree_niter_desc *>;
1023       slot = NULL;
1024     }
1025   else
1026     slot = data->niters->get (exit);
1027
1028   if (!slot)
1029     {
1030       /* Try to determine number of iterations.  We cannot safely work with ssa
1031          names that appear in phi nodes on abnormal edges, so that we do not
1032          create overlapping life ranges for them (PR 27283).  */
1033       desc = XNEW (class tree_niter_desc);
1034       if (!number_of_iterations_exit (data->current_loop,
1035                                       exit, desc, true)
1036           || contains_abnormal_ssa_name_p (desc->niter))
1037         {
1038           XDELETE (desc);
1039           desc = NULL;
1040         }
1041       data->niters->put (exit, desc);
1042     }
1043   else
1044     desc = *slot;
1045
1046   return desc;
1047 }
1048
1049 /* Returns the structure describing number of iterations determined from
1050    single dominating exit of DATA->current_loop, or NULL if something
1051    goes wrong.  */
1052
1053 static class tree_niter_desc *
1054 niter_for_single_dom_exit (struct ivopts_data *data)
1055 {
1056   edge exit = single_dom_exit (data->current_loop);
1057
1058   if (!exit)
1059     return NULL;
1060
1061   return niter_for_exit (data, exit);
1062 }
1063
1064 /* Initializes data structures used by the iv optimization pass, stored
1065    in DATA.  */
1066
1067 static void
1068 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1069 {
1070   data->version_info_size = 2 * num_ssa_names;
1071   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1072   data->relevant = BITMAP_ALLOC (NULL);
1073   data->important_candidates = BITMAP_ALLOC (NULL);
1074   data->max_inv_var_id = 0;
1075   data->max_inv_expr_id = 0;
1076   data->niters = NULL;
1077   data->vgroups.create (20);
1078   data->vcands.create (20);
1079   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1080   data->name_expansion_cache = NULL;
1081   data->base_object_map = NULL;
1082   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1083   data->iv_common_cands.create (20);
1084   decl_rtl_to_reset.create (20);
1085   gcc_obstack_init (&data->iv_obstack);
1086 }
1087
1088 /* walk_tree callback for determine_base_object.  */
1089
1090 static tree
1091 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1092 {
1093   tree_code code = TREE_CODE (*tp);
1094   tree obj = NULL_TREE;
1095   if (code == ADDR_EXPR)
1096     {
1097       tree base = get_base_address (TREE_OPERAND (*tp, 0));
1098       if (!base)
1099         obj = *tp;
1100       else if (TREE_CODE (base) != MEM_REF)
1101         obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1102     }
1103   else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1104         obj = fold_convert (ptr_type_node, *tp);
1105
1106   if (!obj)
1107     {
1108       if (!EXPR_P (*tp))
1109         *walk_subtrees = 0;
1110
1111       return NULL_TREE;
1112     }
1113   /* Record special node for multiple base objects and stop.  */
1114   if (*static_cast<tree *> (wdata))
1115     {
1116       *static_cast<tree *> (wdata) = integer_zero_node;
1117       return integer_zero_node;
1118     }
1119   /* Record the base object and continue looking.  */
1120   *static_cast<tree *> (wdata) = obj;
1121   return NULL_TREE;
1122 }
1123
1124 /* Returns a memory object to that EXPR points with caching.  Return NULL if we
1125    are able to determine that it does not point to any such object; specially
1126    return integer_zero_node if EXPR contains multiple base objects.  */
1127
1128 static tree
1129 determine_base_object (struct ivopts_data *data, tree expr)
1130 {
1131   tree *slot, obj = NULL_TREE;
1132   if (data->base_object_map)
1133     {
1134       if ((slot = data->base_object_map->get(expr)) != NULL)
1135         return *slot;
1136     }
1137   else
1138     data->base_object_map = new hash_map<tree, tree>;
1139
1140   (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1141   data->base_object_map->put (expr, obj);
1142   return obj;
1143 }
1144
1145 /* Return true if address expression with non-DECL_P operand appears
1146    in EXPR.  */
1147
1148 static bool
1149 contain_complex_addr_expr (tree expr)
1150 {
1151   bool res = false;
1152
1153   STRIP_NOPS (expr);
1154   switch (TREE_CODE (expr))
1155     {
1156     case POINTER_PLUS_EXPR:
1157     case PLUS_EXPR:
1158     case MINUS_EXPR:
1159       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1160       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1161       break;
1162
1163     case ADDR_EXPR:
1164       return (!DECL_P (TREE_OPERAND (expr, 0)));
1165
1166     default:
1167       return false;
1168     }
1169
1170   return res;
1171 }
1172
1173 /* Allocates an induction variable with given initial value BASE and step STEP
1174    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1175
1176 static struct iv *
1177 alloc_iv (struct ivopts_data *data, tree base, tree step,
1178           bool no_overflow = false)
1179 {
1180   tree expr = base;
1181   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1182                                               sizeof (struct iv));
1183   gcc_assert (step != NULL_TREE);
1184
1185   /* Lower address expression in base except ones with DECL_P as operand.
1186      By doing this:
1187        1) More accurate cost can be computed for address expressions;
1188        2) Duplicate candidates won't be created for bases in different
1189           forms, like &a[0] and &a.  */
1190   STRIP_NOPS (expr);
1191   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1192       || contain_complex_addr_expr (expr))
1193     {
1194       aff_tree comb;
1195       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1196       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1197     }
1198
1199   iv->base = base;
1200   iv->base_object = determine_base_object (data, base);
1201   iv->step = step;
1202   iv->biv_p = false;
1203   iv->nonlin_use = NULL;
1204   iv->ssa_name = NULL_TREE;
1205   if (!no_overflow
1206        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1207                               base, step))
1208     no_overflow = true;
1209   iv->no_overflow = no_overflow;
1210   iv->have_address_use = false;
1211
1212   return iv;
1213 }
1214
1215 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1216    doesn't overflow.  */
1217
1218 static void
1219 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1220         bool no_overflow)
1221 {
1222   struct version_info *info = name_info (data, iv);
1223
1224   gcc_assert (!info->iv);
1225
1226   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1227   info->iv = alloc_iv (data, base, step, no_overflow);
1228   info->iv->ssa_name = iv;
1229 }
1230
1231 /* Finds induction variable declaration for VAR.  */
1232
1233 static struct iv *
1234 get_iv (struct ivopts_data *data, tree var)
1235 {
1236   basic_block bb;
1237   tree type = TREE_TYPE (var);
1238
1239   if (!POINTER_TYPE_P (type)
1240       && !INTEGRAL_TYPE_P (type))
1241     return NULL;
1242
1243   if (!name_info (data, var)->iv)
1244     {
1245       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1246
1247       if (!bb
1248           || !flow_bb_inside_loop_p (data->current_loop, bb))
1249         set_iv (data, var, var, build_int_cst (type, 0), true);
1250     }
1251
1252   return name_info (data, var)->iv;
1253 }
1254
1255 /* Return the first non-invariant ssa var found in EXPR.  */
1256
1257 static tree
1258 extract_single_var_from_expr (tree expr)
1259 {
1260   int i, n;
1261   tree tmp;
1262   enum tree_code code;
1263
1264   if (!expr || is_gimple_min_invariant (expr))
1265     return NULL;
1266
1267   code = TREE_CODE (expr);
1268   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1269     {
1270       n = TREE_OPERAND_LENGTH (expr);
1271       for (i = 0; i < n; i++)
1272         {
1273           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1274
1275           if (tmp)
1276             return tmp;
1277         }
1278     }
1279   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1280 }
1281
1282 /* Finds basic ivs.  */
1283
1284 static bool
1285 find_bivs (struct ivopts_data *data)
1286 {
1287   gphi *phi;
1288   affine_iv iv;
1289   tree step, type, base, stop;
1290   bool found = false;
1291   class loop *loop = data->current_loop;
1292   gphi_iterator psi;
1293
1294   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1295     {
1296       phi = psi.phi ();
1297
1298       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1299         continue;
1300
1301       if (virtual_operand_p (PHI_RESULT (phi)))
1302         continue;
1303
1304       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1305         continue;
1306
1307       if (integer_zerop (iv.step))
1308         continue;
1309
1310       step = iv.step;
1311       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1312       /* Stop expanding iv base at the first ssa var referred by iv step.
1313          Ideally we should stop at any ssa var, because that's expensive
1314          and unusual to happen, we just do it on the first one.
1315
1316          See PR64705 for the rationale.  */
1317       stop = extract_single_var_from_expr (step);
1318       base = expand_simple_operations (base, stop);
1319       if (contains_abnormal_ssa_name_p (base)
1320           || contains_abnormal_ssa_name_p (step))
1321         continue;
1322
1323       type = TREE_TYPE (PHI_RESULT (phi));
1324       base = fold_convert (type, base);
1325       if (step)
1326         {
1327           if (POINTER_TYPE_P (type))
1328             step = convert_to_ptrofftype (step);
1329           else
1330             step = fold_convert (type, step);
1331         }
1332
1333       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1334       found = true;
1335     }
1336
1337   return found;
1338 }
1339
1340 /* Marks basic ivs.  */
1341
1342 static void
1343 mark_bivs (struct ivopts_data *data)
1344 {
1345   gphi *phi;
1346   gimple *def;
1347   tree var;
1348   struct iv *iv, *incr_iv;
1349   class loop *loop = data->current_loop;
1350   basic_block incr_bb;
1351   gphi_iterator psi;
1352
1353   data->bivs_not_used_in_addr = 0;
1354   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1355     {
1356       phi = psi.phi ();
1357
1358       iv = get_iv (data, PHI_RESULT (phi));
1359       if (!iv)
1360         continue;
1361
1362       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1363       def = SSA_NAME_DEF_STMT (var);
1364       /* Don't mark iv peeled from other one as biv.  */
1365       if (def
1366           && gimple_code (def) == GIMPLE_PHI
1367           && gimple_bb (def) == loop->header)
1368         continue;
1369
1370       incr_iv = get_iv (data, var);
1371       if (!incr_iv)
1372         continue;
1373
1374       /* If the increment is in the subloop, ignore it.  */
1375       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1376       if (incr_bb->loop_father != data->current_loop
1377           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1378         continue;
1379
1380       iv->biv_p = true;
1381       incr_iv->biv_p = true;
1382       if (iv->no_overflow)
1383         data->bivs_not_used_in_addr++;
1384       if (incr_iv->no_overflow)
1385         data->bivs_not_used_in_addr++;
1386     }
1387 }
1388
1389 /* Checks whether STMT defines a linear induction variable and stores its
1390    parameters to IV.  */
1391
1392 static bool
1393 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1394 {
1395   tree lhs, stop;
1396   class loop *loop = data->current_loop;
1397
1398   iv->base = NULL_TREE;
1399   iv->step = NULL_TREE;
1400
1401   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1402     return false;
1403
1404   lhs = gimple_assign_lhs (stmt);
1405   if (TREE_CODE (lhs) != SSA_NAME)
1406     return false;
1407
1408   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1409     return false;
1410
1411   /* Stop expanding iv base at the first ssa var referred by iv step.
1412      Ideally we should stop at any ssa var, because that's expensive
1413      and unusual to happen, we just do it on the first one.
1414
1415      See PR64705 for the rationale.  */
1416   stop = extract_single_var_from_expr (iv->step);
1417   iv->base = expand_simple_operations (iv->base, stop);
1418   if (contains_abnormal_ssa_name_p (iv->base)
1419       || contains_abnormal_ssa_name_p (iv->step))
1420     return false;
1421
1422   /* If STMT could throw, then do not consider STMT as defining a GIV.
1423      While this will suppress optimizations, we cannot safely delete this
1424      GIV and associated statements, even if it appears it is not used.  */
1425   if (stmt_could_throw_p (cfun, stmt))
1426     return false;
1427
1428   return true;
1429 }
1430
1431 /* Finds general ivs in statement STMT.  */
1432
1433 static void
1434 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1435 {
1436   affine_iv iv;
1437
1438   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1439     return;
1440
1441   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1442 }
1443
1444 /* Finds general ivs in basic block BB.  */
1445
1446 static void
1447 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1448 {
1449   gimple_stmt_iterator bsi;
1450
1451   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1452     find_givs_in_stmt (data, gsi_stmt (bsi));
1453 }
1454
1455 /* Finds general ivs.  */
1456
1457 static void
1458 find_givs (struct ivopts_data *data)
1459 {
1460   class loop *loop = data->current_loop;
1461   basic_block *body = get_loop_body_in_dom_order (loop);
1462   unsigned i;
1463
1464   for (i = 0; i < loop->num_nodes; i++)
1465     find_givs_in_bb (data, body[i]);
1466   free (body);
1467 }
1468
1469 /* For each ssa name defined in LOOP determines whether it is an induction
1470    variable and if so, its initial value and step.  */
1471
1472 static bool
1473 find_induction_variables (struct ivopts_data *data)
1474 {
1475   unsigned i;
1476   bitmap_iterator bi;
1477
1478   if (!find_bivs (data))
1479     return false;
1480
1481   find_givs (data);
1482   mark_bivs (data);
1483
1484   if (dump_file && (dump_flags & TDF_DETAILS))
1485     {
1486       class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1487
1488       if (niter)
1489         {
1490           fprintf (dump_file, "  number of iterations ");
1491           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1492           if (!integer_zerop (niter->may_be_zero))
1493             {
1494               fprintf (dump_file, "; zero if ");
1495               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1496             }
1497           fprintf (dump_file, "\n");
1498         };
1499
1500       fprintf (dump_file, "\n<Induction Vars>:\n");
1501       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1502         {
1503           struct version_info *info = ver_info (data, i);
1504           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1505             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1506         }
1507     }
1508
1509   return true;
1510 }
1511
1512 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1513    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1514    is the const offset stripped from IV base and MEM_TYPE is the type
1515    of the memory being addressed.  For uses of other types, ADDR_BASE
1516    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1517
1518 static struct iv_use *
1519 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1520             gimple *stmt, enum use_type type, tree mem_type,
1521             tree addr_base, poly_uint64 addr_offset)
1522 {
1523   struct iv_use *use = XCNEW (struct iv_use);
1524
1525   use->id = group->vuses.length ();
1526   use->group_id = group->id;
1527   use->type = type;
1528   use->mem_type = mem_type;
1529   use->iv = iv;
1530   use->stmt = stmt;
1531   use->op_p = use_p;
1532   use->addr_base = addr_base;
1533   use->addr_offset = addr_offset;
1534
1535   group->vuses.safe_push (use);
1536   return use;
1537 }
1538
1539 /* Checks whether OP is a loop-level invariant and if so, records it.
1540    NONLINEAR_USE is true if the invariant is used in a way we do not
1541    handle specially.  */
1542
1543 static void
1544 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1545 {
1546   basic_block bb;
1547   struct version_info *info;
1548
1549   if (TREE_CODE (op) != SSA_NAME
1550       || virtual_operand_p (op))
1551     return;
1552
1553   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1554   if (bb
1555       && flow_bb_inside_loop_p (data->current_loop, bb))
1556     return;
1557
1558   info = name_info (data, op);
1559   info->name = op;
1560   info->has_nonlin_use |= nonlinear_use;
1561   if (!info->inv_id)
1562     info->inv_id = ++data->max_inv_var_id;
1563   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1564 }
1565
1566 /* Record a group of TYPE.  */
1567
1568 static struct iv_group *
1569 record_group (struct ivopts_data *data, enum use_type type)
1570 {
1571   struct iv_group *group = XCNEW (struct iv_group);
1572
1573   group->id = data->vgroups.length ();
1574   group->type = type;
1575   group->related_cands = BITMAP_ALLOC (NULL);
1576   group->vuses.create (1);
1577   group->doloop_p = false;
1578
1579   data->vgroups.safe_push (group);
1580   return group;
1581 }
1582
1583 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1584    New group will be created if there is no existing group for the use.
1585    MEM_TYPE is the type of memory being addressed, or NULL if this
1586    isn't an address reference.  */
1587
1588 static struct iv_use *
1589 record_group_use (struct ivopts_data *data, tree *use_p,
1590                   struct iv *iv, gimple *stmt, enum use_type type,
1591                   tree mem_type)
1592 {
1593   tree addr_base = NULL;
1594   struct iv_group *group = NULL;
1595   poly_uint64 addr_offset = 0;
1596
1597   /* Record non address type use in a new group.  */
1598   if (address_p (type))
1599     {
1600       unsigned int i;
1601
1602       addr_base = strip_offset (iv->base, &addr_offset);
1603       for (i = 0; i < data->vgroups.length (); i++)
1604         {
1605           struct iv_use *use;
1606
1607           group = data->vgroups[i];
1608           use = group->vuses[0];
1609           if (!address_p (use->type))
1610             continue;
1611
1612           /* Check if it has the same stripped base and step.  */
1613           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1614               && operand_equal_p (iv->step, use->iv->step, 0)
1615               && operand_equal_p (addr_base, use->addr_base, 0))
1616             break;
1617         }
1618       if (i == data->vgroups.length ())
1619         group = NULL;
1620     }
1621
1622   if (!group)
1623     group = record_group (data, type);
1624
1625   return record_use (group, use_p, iv, stmt, type, mem_type,
1626                      addr_base, addr_offset);
1627 }
1628
1629 /* Checks whether the use OP is interesting and if so, records it.  */
1630
1631 static struct iv_use *
1632 find_interesting_uses_op (struct ivopts_data *data, tree op)
1633 {
1634   struct iv *iv;
1635   gimple *stmt;
1636   struct iv_use *use;
1637
1638   if (TREE_CODE (op) != SSA_NAME)
1639     return NULL;
1640
1641   iv = get_iv (data, op);
1642   if (!iv)
1643     return NULL;
1644
1645   if (iv->nonlin_use)
1646     {
1647       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1648       return iv->nonlin_use;
1649     }
1650
1651   if (integer_zerop (iv->step))
1652     {
1653       record_invariant (data, op, true);
1654       return NULL;
1655     }
1656
1657   stmt = SSA_NAME_DEF_STMT (op);
1658   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1659
1660   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1661   iv->nonlin_use = use;
1662   return use;
1663 }
1664
1665 /* Indicate how compare type iv_use can be handled.  */
1666 enum comp_iv_rewrite
1667 {
1668   COMP_IV_NA,
1669   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1670   COMP_IV_EXPR,
1671   /* We may rewrite compare type iv_uses on both sides of comparison by
1672      expressing value of each iv_use.  */
1673   COMP_IV_EXPR_2,
1674   /* We may rewrite compare type iv_use by expressing value of the iv_use
1675      or by eliminating it with other iv_cand.  */
1676   COMP_IV_ELIM
1677 };
1678
1679 /* Given a condition in statement STMT, checks whether it is a compare
1680    of an induction variable and an invariant.  If this is the case,
1681    CONTROL_VAR is set to location of the iv, BOUND to the location of
1682    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1683    induction variable descriptions, and true is returned.  If this is not
1684    the case, CONTROL_VAR and BOUND are set to the arguments of the
1685    condition and false is returned.  */
1686
1687 static enum comp_iv_rewrite
1688 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1689                        tree **control_var, tree **bound,
1690                        struct iv **iv_var, struct iv **iv_bound)
1691 {
1692   /* The objects returned when COND has constant operands.  */
1693   static struct iv const_iv;
1694   static tree zero;
1695   tree *op0 = &zero, *op1 = &zero;
1696   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1697   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1698
1699   if (gimple_code (stmt) == GIMPLE_COND)
1700     {
1701       gcond *cond_stmt = as_a <gcond *> (stmt);
1702       op0 = gimple_cond_lhs_ptr (cond_stmt);
1703       op1 = gimple_cond_rhs_ptr (cond_stmt);
1704     }
1705   else
1706     {
1707       op0 = gimple_assign_rhs1_ptr (stmt);
1708       op1 = gimple_assign_rhs2_ptr (stmt);
1709     }
1710
1711   zero = integer_zero_node;
1712   const_iv.step = integer_zero_node;
1713
1714   if (TREE_CODE (*op0) == SSA_NAME)
1715     iv0 = get_iv (data, *op0);
1716   if (TREE_CODE (*op1) == SSA_NAME)
1717     iv1 = get_iv (data, *op1);
1718
1719   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1720   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1721     {
1722       rewrite_type = COMP_IV_EXPR_2;
1723       goto end;
1724     }
1725
1726   /* If none side of comparison is IV.  */
1727   if ((!iv0 || integer_zerop (iv0->step))
1728       && (!iv1 || integer_zerop (iv1->step)))
1729     goto end;
1730
1731   /* Control variable may be on the other side.  */
1732   if (!iv0 || integer_zerop (iv0->step))
1733     {
1734       std::swap (op0, op1);
1735       std::swap (iv0, iv1);
1736     }
1737   /* If one side is IV and the other side isn't loop invariant.  */
1738   if (!iv1)
1739     rewrite_type = COMP_IV_EXPR;
1740   /* If one side is IV and the other side is loop invariant.  */
1741   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1742     rewrite_type = COMP_IV_ELIM;
1743
1744 end:
1745   if (control_var)
1746     *control_var = op0;
1747   if (iv_var)
1748     *iv_var = iv0;
1749   if (bound)
1750     *bound = op1;
1751   if (iv_bound)
1752     *iv_bound = iv1;
1753
1754   return rewrite_type;
1755 }
1756
1757 /* Checks whether the condition in STMT is interesting and if so,
1758    records it.  */
1759
1760 static void
1761 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1762 {
1763   tree *var_p, *bound_p;
1764   struct iv *var_iv, *bound_iv;
1765   enum comp_iv_rewrite ret;
1766
1767   ret = extract_cond_operands (data, stmt,
1768                                &var_p, &bound_p, &var_iv, &bound_iv);
1769   if (ret == COMP_IV_NA)
1770     {
1771       find_interesting_uses_op (data, *var_p);
1772       find_interesting_uses_op (data, *bound_p);
1773       return;
1774     }
1775
1776   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1777   /* Record compare type iv_use for iv on the other side of comparison.  */
1778   if (ret == COMP_IV_EXPR_2)
1779     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1780 }
1781
1782 /* Returns the outermost loop EXPR is obviously invariant in
1783    relative to the loop LOOP, i.e. if all its operands are defined
1784    outside of the returned loop.  Returns NULL if EXPR is not
1785    even obviously invariant in LOOP.  */
1786
1787 class loop *
1788 outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1789 {
1790   basic_block def_bb;
1791   unsigned i, len;
1792
1793   if (is_gimple_min_invariant (expr))
1794     return current_loops->tree_root;
1795
1796   if (TREE_CODE (expr) == SSA_NAME)
1797     {
1798       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1799       if (def_bb)
1800         {
1801           if (flow_bb_inside_loop_p (loop, def_bb))
1802             return NULL;
1803           return superloop_at_depth (loop,
1804                                      loop_depth (def_bb->loop_father) + 1);
1805         }
1806
1807       return current_loops->tree_root;
1808     }
1809
1810   if (!EXPR_P (expr))
1811     return NULL;
1812
1813   unsigned maxdepth = 0;
1814   len = TREE_OPERAND_LENGTH (expr);
1815   for (i = 0; i < len; i++)
1816     {
1817       class loop *ivloop;
1818       if (!TREE_OPERAND (expr, i))
1819         continue;
1820
1821       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1822       if (!ivloop)
1823         return NULL;
1824       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1825     }
1826
1827   return superloop_at_depth (loop, maxdepth);
1828 }
1829
1830 /* Returns true if expression EXPR is obviously invariant in LOOP,
1831    i.e. if all its operands are defined outside of the LOOP.  LOOP
1832    should not be the function body.  */
1833
1834 bool
1835 expr_invariant_in_loop_p (class loop *loop, tree expr)
1836 {
1837   basic_block def_bb;
1838   unsigned i, len;
1839
1840   gcc_assert (loop_depth (loop) > 0);
1841
1842   if (is_gimple_min_invariant (expr))
1843     return true;
1844
1845   if (TREE_CODE (expr) == SSA_NAME)
1846     {
1847       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1848       if (def_bb
1849           && flow_bb_inside_loop_p (loop, def_bb))
1850         return false;
1851
1852       return true;
1853     }
1854
1855   if (!EXPR_P (expr))
1856     return false;
1857
1858   len = TREE_OPERAND_LENGTH (expr);
1859   for (i = 0; i < len; i++)
1860     if (TREE_OPERAND (expr, i)
1861         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1862       return false;
1863
1864   return true;
1865 }
1866
1867 /* Given expression EXPR which computes inductive values with respect
1868    to loop recorded in DATA, this function returns biv from which EXPR
1869    is derived by tracing definition chains of ssa variables in EXPR.  */
1870
1871 static struct iv*
1872 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1873 {
1874   struct iv *iv;
1875   unsigned i, n;
1876   tree e2, e1;
1877   enum tree_code code;
1878   gimple *stmt;
1879
1880   if (expr == NULL_TREE)
1881     return NULL;
1882
1883   if (is_gimple_min_invariant (expr))
1884     return NULL;
1885
1886   code = TREE_CODE (expr);
1887   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1888     {
1889       n = TREE_OPERAND_LENGTH (expr);
1890       for (i = 0; i < n; i++)
1891         {
1892           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1893           if (iv)
1894             return iv;
1895         }
1896     }
1897
1898   /* Stop if it's not ssa name.  */
1899   if (code != SSA_NAME)
1900     return NULL;
1901
1902   iv = get_iv (data, expr);
1903   if (!iv || integer_zerop (iv->step))
1904     return NULL;
1905   else if (iv->biv_p)
1906     return iv;
1907
1908   stmt = SSA_NAME_DEF_STMT (expr);
1909   if (gphi *phi = dyn_cast <gphi *> (stmt))
1910     {
1911       ssa_op_iter iter;
1912       use_operand_p use_p;
1913       basic_block phi_bb = gimple_bb (phi);
1914
1915       /* Skip loop header PHI that doesn't define biv.  */
1916       if (phi_bb->loop_father == data->current_loop)
1917         return NULL;
1918
1919       if (virtual_operand_p (gimple_phi_result (phi)))
1920         return NULL;
1921
1922       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1923         {
1924           tree use = USE_FROM_PTR (use_p);
1925           iv = find_deriving_biv_for_expr (data, use);
1926           if (iv)
1927             return iv;
1928         }
1929       return NULL;
1930     }
1931   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1932     return NULL;
1933
1934   e1 = gimple_assign_rhs1 (stmt);
1935   code = gimple_assign_rhs_code (stmt);
1936   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1937     return find_deriving_biv_for_expr (data, e1);
1938
1939   switch (code)
1940     {
1941     case MULT_EXPR:
1942     case PLUS_EXPR:
1943     case MINUS_EXPR:
1944     case POINTER_PLUS_EXPR:
1945       /* Increments, decrements and multiplications by a constant
1946          are simple.  */
1947       e2 = gimple_assign_rhs2 (stmt);
1948       iv = find_deriving_biv_for_expr (data, e2);
1949       if (iv)
1950         return iv;
1951       gcc_fallthrough ();
1952
1953     CASE_CONVERT:
1954       /* Casts are simple.  */
1955       return find_deriving_biv_for_expr (data, e1);
1956
1957     default:
1958       break;
1959     }
1960
1961   return NULL;
1962 }
1963
1964 /* Record BIV, its predecessor and successor that they are used in
1965    address type uses.  */
1966
1967 static void
1968 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1969 {
1970   unsigned i;
1971   tree type, base_1, base_2;
1972   bitmap_iterator bi;
1973
1974   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1975       || biv->have_address_use || !biv->no_overflow)
1976     return;
1977
1978   type = TREE_TYPE (biv->base);
1979   if (!INTEGRAL_TYPE_P (type))
1980     return;
1981
1982   biv->have_address_use = true;
1983   data->bivs_not_used_in_addr--;
1984   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1985   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1986     {
1987       struct iv *iv = ver_info (data, i)->iv;
1988
1989       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1990           || iv->have_address_use || !iv->no_overflow)
1991         continue;
1992
1993       if (type != TREE_TYPE (iv->base)
1994           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1995         continue;
1996
1997       if (!operand_equal_p (biv->step, iv->step, 0))
1998         continue;
1999
2000       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2001       if (operand_equal_p (base_1, iv->base, 0)
2002           || operand_equal_p (base_2, biv->base, 0))
2003         {
2004           iv->have_address_use = true;
2005           data->bivs_not_used_in_addr--;
2006         }
2007     }
2008 }
2009
2010 /* Cumulates the steps of indices into DATA and replaces their values with the
2011    initial ones.  Returns false when the value of the index cannot be determined.
2012    Callback for for_each_index.  */
2013
2014 struct ifs_ivopts_data
2015 {
2016   struct ivopts_data *ivopts_data;
2017   gimple *stmt;
2018   tree step;
2019 };
2020
2021 static bool
2022 idx_find_step (tree base, tree *idx, void *data)
2023 {
2024   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2025   struct iv *iv;
2026   bool use_overflow_semantics = false;
2027   tree step, iv_base, iv_step, lbound, off;
2028   class loop *loop = dta->ivopts_data->current_loop;
2029
2030   /* If base is a component ref, require that the offset of the reference
2031      be invariant.  */
2032   if (TREE_CODE (base) == COMPONENT_REF)
2033     {
2034       off = component_ref_field_offset (base);
2035       return expr_invariant_in_loop_p (loop, off);
2036     }
2037
2038   /* If base is array, first check whether we will be able to move the
2039      reference out of the loop (in order to take its address in strength
2040      reduction).  In order for this to work we need both lower bound
2041      and step to be loop invariants.  */
2042   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2043     {
2044       /* Moreover, for a range, the size needs to be invariant as well.  */
2045       if (TREE_CODE (base) == ARRAY_RANGE_REF
2046           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2047         return false;
2048
2049       step = array_ref_element_size (base);
2050       lbound = array_ref_low_bound (base);
2051
2052       if (!expr_invariant_in_loop_p (loop, step)
2053           || !expr_invariant_in_loop_p (loop, lbound))
2054         return false;
2055     }
2056
2057   if (TREE_CODE (*idx) != SSA_NAME)
2058     return true;
2059
2060   iv = get_iv (dta->ivopts_data, *idx);
2061   if (!iv)
2062     return false;
2063
2064   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2065           *&x[0], which is not folded and does not trigger the
2066           ARRAY_REF path below.  */
2067   *idx = iv->base;
2068
2069   if (integer_zerop (iv->step))
2070     return true;
2071
2072   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2073     {
2074       step = array_ref_element_size (base);
2075
2076       /* We only handle addresses whose step is an integer constant.  */
2077       if (TREE_CODE (step) != INTEGER_CST)
2078         return false;
2079     }
2080   else
2081     /* The step for pointer arithmetics already is 1 byte.  */
2082     step = size_one_node;
2083
2084   iv_base = iv->base;
2085   iv_step = iv->step;
2086   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2087     use_overflow_semantics = true;
2088
2089   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2090                             sizetype, &iv_base, &iv_step, dta->stmt,
2091                             use_overflow_semantics))
2092     {
2093       /* The index might wrap.  */
2094       return false;
2095     }
2096
2097   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2098   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2099
2100   if (dta->ivopts_data->bivs_not_used_in_addr)
2101     {
2102       if (!iv->biv_p)
2103         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2104
2105       record_biv_for_address_use (dta->ivopts_data, iv);
2106     }
2107   return true;
2108 }
2109
2110 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2111    object is passed to it in DATA.  */
2112
2113 static bool
2114 idx_record_use (tree base, tree *idx,
2115                 void *vdata)
2116 {
2117   struct ivopts_data *data = (struct ivopts_data *) vdata;
2118   find_interesting_uses_op (data, *idx);
2119   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2120     {
2121       find_interesting_uses_op (data, array_ref_element_size (base));
2122       find_interesting_uses_op (data, array_ref_low_bound (base));
2123     }
2124   return true;
2125 }
2126
2127 /* If we can prove that TOP = cst * BOT for some constant cst,
2128    store cst to MUL and return true.  Otherwise return false.
2129    The returned value is always sign-extended, regardless of the
2130    signedness of TOP and BOT.  */
2131
2132 static bool
2133 constant_multiple_of (tree top, tree bot, widest_int *mul)
2134 {
2135   tree mby;
2136   enum tree_code code;
2137   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2138   widest_int res, p0, p1;
2139
2140   STRIP_NOPS (top);
2141   STRIP_NOPS (bot);
2142
2143   if (operand_equal_p (top, bot, 0))
2144     {
2145       *mul = 1;
2146       return true;
2147     }
2148
2149   code = TREE_CODE (top);
2150   switch (code)
2151     {
2152     case MULT_EXPR:
2153       mby = TREE_OPERAND (top, 1);
2154       if (TREE_CODE (mby) != INTEGER_CST)
2155         return false;
2156
2157       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2158         return false;
2159
2160       *mul = wi::sext (res * wi::to_widest (mby), precision);
2161       return true;
2162
2163     case PLUS_EXPR:
2164     case MINUS_EXPR:
2165       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2166           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2167         return false;
2168
2169       if (code == MINUS_EXPR)
2170         p1 = -p1;
2171       *mul = wi::sext (p0 + p1, precision);
2172       return true;
2173
2174     case INTEGER_CST:
2175       if (TREE_CODE (bot) != INTEGER_CST)
2176         return false;
2177
2178       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2179       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2180       if (p1 == 0)
2181         return false;
2182       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2183       return res == 0;
2184
2185     default:
2186       if (POLY_INT_CST_P (top)
2187           && POLY_INT_CST_P (bot)
2188           && constant_multiple_p (wi::to_poly_widest (top),
2189                                   wi::to_poly_widest (bot), mul))
2190         return true;
2191
2192       return false;
2193     }
2194 }
2195
2196 /* Return true if memory reference REF with step STEP may be unaligned.  */
2197
2198 static bool
2199 may_be_unaligned_p (tree ref, tree step)
2200 {
2201   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2202      thus they are not misaligned.  */
2203   if (TREE_CODE (ref) == TARGET_MEM_REF)
2204     return false;
2205
2206   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2207   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2208     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2209
2210   unsigned HOST_WIDE_INT bitpos;
2211   unsigned int ref_align;
2212   get_object_alignment_1 (ref, &ref_align, &bitpos);
2213   if (ref_align < align
2214       || (bitpos % align) != 0
2215       || (bitpos % BITS_PER_UNIT) != 0)
2216     return true;
2217
2218   unsigned int trailing_zeros = tree_ctz (step);
2219   if (trailing_zeros < HOST_BITS_PER_INT
2220       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2221     return true;
2222
2223   return false;
2224 }
2225
2226 /* Return true if EXPR may be non-addressable.   */
2227
2228 bool
2229 may_be_nonaddressable_p (tree expr)
2230 {
2231   switch (TREE_CODE (expr))
2232     {
2233     case VAR_DECL:
2234       /* Check if it's a register variable.  */
2235       return DECL_HARD_REGISTER (expr);
2236
2237     case TARGET_MEM_REF:
2238       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2239          target, thus they are always addressable.  */
2240       return false;
2241
2242     case MEM_REF:
2243       /* Likewise for MEM_REFs, modulo the storage order.  */
2244       return REF_REVERSE_STORAGE_ORDER (expr);
2245
2246     case BIT_FIELD_REF:
2247       if (REF_REVERSE_STORAGE_ORDER (expr))
2248         return true;
2249       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2250
2251     case COMPONENT_REF:
2252       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2253         return true;
2254       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2255              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2256
2257     case ARRAY_REF:
2258     case ARRAY_RANGE_REF:
2259       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2260         return true;
2261       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2262
2263     case VIEW_CONVERT_EXPR:
2264       /* This kind of view-conversions may wrap non-addressable objects
2265          and make them look addressable.  After some processing the
2266          non-addressability may be uncovered again, causing ADDR_EXPRs
2267          of inappropriate objects to be built.  */
2268       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2269           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2270         return true;
2271       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2272
2273     CASE_CONVERT:
2274       return true;
2275
2276     default:
2277       break;
2278     }
2279
2280   return false;
2281 }
2282
2283 /* Finds addresses in *OP_P inside STMT.  */
2284
2285 static void
2286 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2287                                tree *op_p)
2288 {
2289   tree base = *op_p, step = size_zero_node;
2290   struct iv *civ;
2291   struct ifs_ivopts_data ifs_ivopts_data;
2292
2293   /* Do not play with volatile memory references.  A bit too conservative,
2294      perhaps, but safe.  */
2295   if (gimple_has_volatile_ops (stmt))
2296     goto fail;
2297
2298   /* Ignore bitfields for now.  Not really something terribly complicated
2299      to handle.  TODO.  */
2300   if (TREE_CODE (base) == BIT_FIELD_REF)
2301     goto fail;
2302
2303   base = unshare_expr (base);
2304
2305   if (TREE_CODE (base) == TARGET_MEM_REF)
2306     {
2307       tree type = build_pointer_type (TREE_TYPE (base));
2308       tree astep;
2309
2310       if (TMR_BASE (base)
2311           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2312         {
2313           civ = get_iv (data, TMR_BASE (base));
2314           if (!civ)
2315             goto fail;
2316
2317           TMR_BASE (base) = civ->base;
2318           step = civ->step;
2319         }
2320       if (TMR_INDEX2 (base)
2321           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2322         {
2323           civ = get_iv (data, TMR_INDEX2 (base));
2324           if (!civ)
2325             goto fail;
2326
2327           TMR_INDEX2 (base) = civ->base;
2328           step = civ->step;
2329         }
2330       if (TMR_INDEX (base)
2331           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2332         {
2333           civ = get_iv (data, TMR_INDEX (base));
2334           if (!civ)
2335             goto fail;
2336
2337           TMR_INDEX (base) = civ->base;
2338           astep = civ->step;
2339
2340           if (astep)
2341             {
2342               if (TMR_STEP (base))
2343                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2344
2345               step = fold_build2 (PLUS_EXPR, type, step, astep);
2346             }
2347         }
2348
2349       if (integer_zerop (step))
2350         goto fail;
2351       base = tree_mem_ref_addr (type, base);
2352     }
2353   else
2354     {
2355       ifs_ivopts_data.ivopts_data = data;
2356       ifs_ivopts_data.stmt = stmt;
2357       ifs_ivopts_data.step = size_zero_node;
2358       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2359           || integer_zerop (ifs_ivopts_data.step))
2360         goto fail;
2361       step = ifs_ivopts_data.step;
2362
2363       /* Check that the base expression is addressable.  This needs
2364          to be done after substituting bases of IVs into it.  */
2365       if (may_be_nonaddressable_p (base))
2366         goto fail;
2367
2368       /* Moreover, on strict alignment platforms, check that it is
2369          sufficiently aligned.  */
2370       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2371         goto fail;
2372
2373       base = build_fold_addr_expr (base);
2374
2375       /* Substituting bases of IVs into the base expression might
2376          have caused folding opportunities.  */
2377       if (TREE_CODE (base) == ADDR_EXPR)
2378         {
2379           tree *ref = &TREE_OPERAND (base, 0);
2380           while (handled_component_p (*ref))
2381             ref = &TREE_OPERAND (*ref, 0);
2382           if (TREE_CODE (*ref) == MEM_REF)
2383             {
2384               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2385                                       TREE_OPERAND (*ref, 0),
2386                                       TREE_OPERAND (*ref, 1));
2387               if (tem)
2388                 *ref = tem;
2389             }
2390         }
2391     }
2392
2393   civ = alloc_iv (data, base, step);
2394   /* Fail if base object of this memory reference is unknown.  */
2395   if (civ->base_object == NULL_TREE)
2396     goto fail;
2397
2398   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2399   return;
2400
2401 fail:
2402   for_each_index (op_p, idx_record_use, data);
2403 }
2404
2405 /* Finds and records invariants used in STMT.  */
2406
2407 static void
2408 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2409 {
2410   ssa_op_iter iter;
2411   use_operand_p use_p;
2412   tree op;
2413
2414   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2415     {
2416       op = USE_FROM_PTR (use_p);
2417       record_invariant (data, op, false);
2418     }
2419 }
2420
2421 /* CALL calls an internal function.  If operand *OP_P will become an
2422    address when the call is expanded, return the type of the memory
2423    being addressed, otherwise return null.  */
2424
2425 static tree
2426 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2427 {
2428   switch (gimple_call_internal_fn (call))
2429     {
2430     case IFN_MASK_LOAD:
2431     case IFN_MASK_LOAD_LANES:
2432       if (op_p == gimple_call_arg_ptr (call, 0))
2433         return TREE_TYPE (gimple_call_lhs (call));
2434       return NULL_TREE;
2435
2436     case IFN_MASK_STORE:
2437     case IFN_MASK_STORE_LANES:
2438       if (op_p == gimple_call_arg_ptr (call, 0))
2439         return TREE_TYPE (gimple_call_arg (call, 3));
2440       return NULL_TREE;
2441
2442     default:
2443       return NULL_TREE;
2444     }
2445 }
2446
2447 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2448    Return true if the operand will become an address when STMT
2449    is expanded and record the associated address use if so.  */
2450
2451 static bool
2452 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2453                        struct iv *iv)
2454 {
2455   /* Fail if base object of this memory reference is unknown.  */
2456   if (iv->base_object == NULL_TREE)
2457     return false;
2458
2459   tree mem_type = NULL_TREE;
2460   if (gcall *call = dyn_cast <gcall *> (stmt))
2461     if (gimple_call_internal_p (call))
2462       mem_type = get_mem_type_for_internal_fn (call, op_p);
2463   if (mem_type)
2464     {
2465       iv = alloc_iv (data, iv->base, iv->step);
2466       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2467       return true;
2468     }
2469   return false;
2470 }
2471
2472 /* Finds interesting uses of induction variables in the statement STMT.  */
2473
2474 static void
2475 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2476 {
2477   struct iv *iv;
2478   tree op, *lhs, *rhs;
2479   ssa_op_iter iter;
2480   use_operand_p use_p;
2481   enum tree_code code;
2482
2483   find_invariants_stmt (data, stmt);
2484
2485   if (gimple_code (stmt) == GIMPLE_COND)
2486     {
2487       find_interesting_uses_cond (data, stmt);
2488       return;
2489     }
2490
2491   if (is_gimple_assign (stmt))
2492     {
2493       lhs = gimple_assign_lhs_ptr (stmt);
2494       rhs = gimple_assign_rhs1_ptr (stmt);
2495
2496       if (TREE_CODE (*lhs) == SSA_NAME)
2497         {
2498           /* If the statement defines an induction variable, the uses are not
2499              interesting by themselves.  */
2500
2501           iv = get_iv (data, *lhs);
2502
2503           if (iv && !integer_zerop (iv->step))
2504             return;
2505         }
2506
2507       code = gimple_assign_rhs_code (stmt);
2508       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2509           && (REFERENCE_CLASS_P (*rhs)
2510               || is_gimple_val (*rhs)))
2511         {
2512           if (REFERENCE_CLASS_P (*rhs))
2513             find_interesting_uses_address (data, stmt, rhs);
2514           else
2515             find_interesting_uses_op (data, *rhs);
2516
2517           if (REFERENCE_CLASS_P (*lhs))
2518             find_interesting_uses_address (data, stmt, lhs);
2519           return;
2520         }
2521       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2522         {
2523           find_interesting_uses_cond (data, stmt);
2524           return;
2525         }
2526
2527       /* TODO -- we should also handle address uses of type
2528
2529          memory = call (whatever);
2530
2531          and
2532
2533          call (memory).  */
2534     }
2535
2536   if (gimple_code (stmt) == GIMPLE_PHI
2537       && gimple_bb (stmt) == data->current_loop->header)
2538     {
2539       iv = get_iv (data, PHI_RESULT (stmt));
2540
2541       if (iv && !integer_zerop (iv->step))
2542         return;
2543     }
2544
2545   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2546     {
2547       op = USE_FROM_PTR (use_p);
2548
2549       if (TREE_CODE (op) != SSA_NAME)
2550         continue;
2551
2552       iv = get_iv (data, op);
2553       if (!iv)
2554         continue;
2555
2556       if (!find_address_like_use (data, stmt, use_p->use, iv))
2557         find_interesting_uses_op (data, op);
2558     }
2559 }
2560
2561 /* Finds interesting uses of induction variables outside of loops
2562    on loop exit edge EXIT.  */
2563
2564 static void
2565 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2566 {
2567   gphi *phi;
2568   gphi_iterator psi;
2569   tree def;
2570
2571   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2572     {
2573       phi = psi.phi ();
2574       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2575       if (!virtual_operand_p (def))
2576         find_interesting_uses_op (data, def);
2577     }
2578 }
2579
2580 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2581    mode for memory reference represented by USE.  */
2582
2583 static GTY (()) vec<rtx, va_gc> *addr_list;
2584
2585 static bool
2586 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2587 {
2588   rtx reg, addr;
2589   unsigned list_index;
2590   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2591   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2592
2593   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2594   if (list_index >= vec_safe_length (addr_list))
2595     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2596
2597   addr = (*addr_list)[list_index];
2598   if (!addr)
2599     {
2600       addr_mode = targetm.addr_space.address_mode (as);
2601       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2602       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2603       (*addr_list)[list_index] = addr;
2604     }
2605   else
2606     addr_mode = GET_MODE (addr);
2607
2608   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2609   return (memory_address_addr_space_p (mem_mode, addr, as));
2610 }
2611
2612 /* Comparison function to sort group in ascending order of addr_offset.  */
2613
2614 static int
2615 group_compare_offset (const void *a, const void *b)
2616 {
2617   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2618   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2619
2620   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2621 }
2622
2623 /* Check if small groups should be split.  Return true if no group
2624    contains more than two uses with distinct addr_offsets.  Return
2625    false otherwise.  We want to split such groups because:
2626
2627      1) Small groups don't have much benefit and may interfer with
2628         general candidate selection.
2629      2) Size for problem with only small groups is usually small and
2630         general algorithm can handle it well.
2631
2632    TODO -- Above claim may not hold when we want to merge memory
2633    accesses with conseuctive addresses.  */
2634
2635 static bool
2636 split_small_address_groups_p (struct ivopts_data *data)
2637 {
2638   unsigned int i, j, distinct = 1;
2639   struct iv_use *pre;
2640   struct iv_group *group;
2641
2642   for (i = 0; i < data->vgroups.length (); i++)
2643     {
2644       group = data->vgroups[i];
2645       if (group->vuses.length () == 1)
2646         continue;
2647
2648       gcc_assert (address_p (group->type));
2649       if (group->vuses.length () == 2)
2650         {
2651           if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2652                                       group->vuses[1]->addr_offset) > 0)
2653             std::swap (group->vuses[0], group->vuses[1]);
2654         }
2655       else
2656         group->vuses.qsort (group_compare_offset);
2657
2658       if (distinct > 2)
2659         continue;
2660
2661       distinct = 1;
2662       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2663         {
2664           if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2665             {
2666               pre = group->vuses[j];
2667               distinct++;
2668             }
2669
2670           if (distinct > 2)
2671             break;
2672         }
2673     }
2674
2675   return (distinct <= 2);
2676 }
2677
2678 /* For each group of address type uses, this function further groups
2679    these uses according to the maximum offset supported by target's
2680    [base + offset] addressing mode.  */
2681
2682 static void
2683 split_address_groups (struct ivopts_data *data)
2684 {
2685   unsigned int i, j;
2686   /* Always split group.  */
2687   bool split_p = split_small_address_groups_p (data);
2688
2689   for (i = 0; i < data->vgroups.length (); i++)
2690     {
2691       struct iv_group *new_group = NULL;
2692       struct iv_group *group = data->vgroups[i];
2693       struct iv_use *use = group->vuses[0];
2694
2695       use->id = 0;
2696       use->group_id = group->id;
2697       if (group->vuses.length () == 1)
2698         continue;
2699
2700       gcc_assert (address_p (use->type));
2701
2702       for (j = 1; j < group->vuses.length ();)
2703         {
2704           struct iv_use *next = group->vuses[j];
2705           poly_int64 offset = next->addr_offset - use->addr_offset;
2706
2707           /* Split group if aksed to, or the offset against the first
2708              use can't fit in offset part of addressing mode.  IV uses
2709              having the same offset are still kept in one group.  */
2710           if (maybe_ne (offset, 0)
2711               && (split_p || !addr_offset_valid_p (use, offset)))
2712             {
2713               if (!new_group)
2714                 new_group = record_group (data, group->type);
2715               group->vuses.ordered_remove (j);
2716               new_group->vuses.safe_push (next);
2717               continue;
2718             }
2719
2720           next->id = j;
2721           next->group_id = group->id;
2722           j++;
2723         }
2724     }
2725 }
2726
2727 /* Finds uses of the induction variables that are interesting.  */
2728
2729 static void
2730 find_interesting_uses (struct ivopts_data *data)
2731 {
2732   basic_block bb;
2733   gimple_stmt_iterator bsi;
2734   basic_block *body = get_loop_body (data->current_loop);
2735   unsigned i;
2736   edge e;
2737
2738   for (i = 0; i < data->current_loop->num_nodes; i++)
2739     {
2740       edge_iterator ei;
2741       bb = body[i];
2742
2743       FOR_EACH_EDGE (e, ei, bb->succs)
2744         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2745             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2746           find_interesting_uses_outside (data, e);
2747
2748       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2749         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2750       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2751         if (!is_gimple_debug (gsi_stmt (bsi)))
2752           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2753     }
2754   free (body);
2755
2756   split_address_groups (data);
2757
2758   if (dump_file && (dump_flags & TDF_DETAILS))
2759     {
2760       fprintf (dump_file, "\n<IV Groups>:\n");
2761       dump_groups (dump_file, data);
2762       fprintf (dump_file, "\n");
2763     }
2764 }
2765
2766 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2767    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2768    we are at the top-level of the processed address.  */
2769
2770 static tree
2771 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2772                 poly_int64 *offset)
2773 {
2774   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2775   enum tree_code code;
2776   tree type, orig_type = TREE_TYPE (expr);
2777   poly_int64 off0, off1;
2778   HOST_WIDE_INT st;
2779   tree orig_expr = expr;
2780
2781   STRIP_NOPS (expr);
2782
2783   type = TREE_TYPE (expr);
2784   code = TREE_CODE (expr);
2785   *offset = 0;
2786
2787   switch (code)
2788     {
2789     case POINTER_PLUS_EXPR:
2790     case PLUS_EXPR:
2791     case MINUS_EXPR:
2792       op0 = TREE_OPERAND (expr, 0);
2793       op1 = TREE_OPERAND (expr, 1);
2794
2795       op0 = strip_offset_1 (op0, false, false, &off0);
2796       op1 = strip_offset_1 (op1, false, false, &off1);
2797
2798       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2799       if (op0 == TREE_OPERAND (expr, 0)
2800           && op1 == TREE_OPERAND (expr, 1))
2801         return orig_expr;
2802
2803       if (integer_zerop (op1))
2804         expr = op0;
2805       else if (integer_zerop (op0))
2806         {
2807           if (code == MINUS_EXPR)
2808             expr = fold_build1 (NEGATE_EXPR, type, op1);
2809           else
2810             expr = op1;
2811         }
2812       else
2813         expr = fold_build2 (code, type, op0, op1);
2814
2815       return fold_convert (orig_type, expr);
2816
2817     case MULT_EXPR:
2818       op1 = TREE_OPERAND (expr, 1);
2819       if (!cst_and_fits_in_hwi (op1))
2820         return orig_expr;
2821
2822       op0 = TREE_OPERAND (expr, 0);
2823       op0 = strip_offset_1 (op0, false, false, &off0);
2824       if (op0 == TREE_OPERAND (expr, 0))
2825         return orig_expr;
2826
2827       *offset = off0 * int_cst_value (op1);
2828       if (integer_zerop (op0))
2829         expr = op0;
2830       else
2831         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2832
2833       return fold_convert (orig_type, expr);
2834
2835     case ARRAY_REF:
2836     case ARRAY_RANGE_REF:
2837       if (!inside_addr)
2838         return orig_expr;
2839
2840       step = array_ref_element_size (expr);
2841       if (!cst_and_fits_in_hwi (step))
2842         break;
2843
2844       st = int_cst_value (step);
2845       op1 = TREE_OPERAND (expr, 1);
2846       op1 = strip_offset_1 (op1, false, false, &off1);
2847       *offset = off1 * st;
2848
2849       if (top_compref
2850           && integer_zerop (op1))
2851         {
2852           /* Strip the component reference completely.  */
2853           op0 = TREE_OPERAND (expr, 0);
2854           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2855           *offset += off0;
2856           return op0;
2857         }
2858       break;
2859
2860     case COMPONENT_REF:
2861       {
2862         tree field;
2863
2864         if (!inside_addr)
2865           return orig_expr;
2866
2867         tmp = component_ref_field_offset (expr);
2868         field = TREE_OPERAND (expr, 1);
2869         if (top_compref
2870             && cst_and_fits_in_hwi (tmp)
2871             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2872           {
2873             HOST_WIDE_INT boffset, abs_off;
2874
2875             /* Strip the component reference completely.  */
2876             op0 = TREE_OPERAND (expr, 0);
2877             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2878             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2879             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2880             if (boffset < 0)
2881               abs_off = -abs_off;
2882
2883             *offset = off0 + int_cst_value (tmp) + abs_off;
2884             return op0;
2885           }
2886       }
2887       break;
2888
2889     case ADDR_EXPR:
2890       op0 = TREE_OPERAND (expr, 0);
2891       op0 = strip_offset_1 (op0, true, true, &off0);
2892       *offset += off0;
2893
2894       if (op0 == TREE_OPERAND (expr, 0))
2895         return orig_expr;
2896
2897       expr = build_fold_addr_expr (op0);
2898       return fold_convert (orig_type, expr);
2899
2900     case MEM_REF:
2901       /* ???  Offset operand?  */
2902       inside_addr = false;
2903       break;
2904
2905     default:
2906       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2907         return build_int_cst (orig_type, 0);
2908       return orig_expr;
2909     }
2910
2911   /* Default handling of expressions for that we want to recurse into
2912      the first operand.  */
2913   op0 = TREE_OPERAND (expr, 0);
2914   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2915   *offset += off0;
2916
2917   if (op0 == TREE_OPERAND (expr, 0)
2918       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2919     return orig_expr;
2920
2921   expr = copy_node (expr);
2922   TREE_OPERAND (expr, 0) = op0;
2923   if (op1)
2924     TREE_OPERAND (expr, 1) = op1;
2925
2926   /* Inside address, we might strip the top level component references,
2927      thus changing type of the expression.  Handling of ADDR_EXPR
2928      will fix that.  */
2929   expr = fold_convert (orig_type, expr);
2930
2931   return expr;
2932 }
2933
2934 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2935
2936 tree
2937 strip_offset (tree expr, poly_uint64_pod *offset)
2938 {
2939   poly_int64 off;
2940   tree core = strip_offset_1 (expr, false, false, &off);
2941   *offset = off;
2942   return core;
2943 }
2944
2945 /* Returns variant of TYPE that can be used as base for different uses.
2946    We return unsigned type with the same precision, which avoids problems
2947    with overflows.  */
2948
2949 static tree
2950 generic_type_for (tree type)
2951 {
2952   if (POINTER_TYPE_P (type))
2953     return unsigned_type_for (type);
2954
2955   if (TYPE_UNSIGNED (type))
2956     return type;
2957
2958   return unsigned_type_for (type);
2959 }
2960
2961 /* Private data for walk_tree.  */
2962
2963 struct walk_tree_data
2964 {
2965   bitmap *inv_vars;
2966   struct ivopts_data *idata;
2967 };
2968
2969 /* Callback function for walk_tree, it records invariants and symbol
2970    reference in *EXPR_P.  DATA is the structure storing result info.  */
2971
2972 static tree
2973 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2974 {
2975   tree op = *expr_p;
2976   struct version_info *info;
2977   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2978
2979   if (TREE_CODE (op) != SSA_NAME)
2980     return NULL_TREE;
2981
2982   info = name_info (wdata->idata, op);
2983   /* Because we expand simple operations when finding IVs, loop invariant
2984      variable that isn't referred by the original loop could be used now.
2985      Record such invariant variables here.  */
2986   if (!info->iv)
2987     {
2988       struct ivopts_data *idata = wdata->idata;
2989       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2990
2991       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2992         {
2993           set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
2994           record_invariant (idata, op, false);
2995         }
2996     }
2997   if (!info->inv_id || info->has_nonlin_use)
2998     return NULL_TREE;
2999
3000   if (!*wdata->inv_vars)
3001     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3002   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3003
3004   return NULL_TREE;
3005 }
3006
3007 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3008    store it.  */
3009
3010 static inline void
3011 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3012 {
3013   struct walk_tree_data wdata;
3014
3015   if (!inv_vars)
3016     return;
3017
3018   wdata.idata = data;
3019   wdata.inv_vars = inv_vars;
3020   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3021 }
3022
3023 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3024    will be recorded if it doesn't exist yet.  Given below two exprs:
3025      inv_expr + cst1, inv_expr + cst2
3026    It's hard to make decision whether constant part should be stripped
3027    or not.  We choose to not strip based on below facts:
3028      1) We need to count ADD cost for constant part if it's stripped,
3029         which isn't always trivial where this functions is called.
3030      2) Stripping constant away may be conflict with following loop
3031         invariant hoisting pass.
3032      3) Not stripping constant away results in more invariant exprs,
3033         which usually leads to decision preferring lower reg pressure.  */
3034
3035 static iv_inv_expr_ent *
3036 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3037 {
3038   STRIP_NOPS (inv_expr);
3039
3040   if (poly_int_tree_p (inv_expr)
3041       || TREE_CODE (inv_expr) == SSA_NAME)
3042     return NULL;
3043
3044   /* Don't strip constant part away as we used to.  */
3045
3046   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3047   struct iv_inv_expr_ent ent;
3048   ent.expr = inv_expr;
3049   ent.hash = iterative_hash_expr (inv_expr, 0);
3050   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3051
3052   if (!*slot)
3053     {
3054       *slot = XNEW (struct iv_inv_expr_ent);
3055       (*slot)->expr = inv_expr;
3056       (*slot)->hash = ent.hash;
3057       (*slot)->id = ++data->max_inv_expr_id;
3058     }
3059
3060   return *slot;
3061 }
3062
3063 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3064    position to POS.  If USE is not NULL, the candidate is set as related to
3065    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3066    replacement of the final value of the iv by a direct computation.  */
3067
3068 static struct iv_cand *
3069 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3070                  enum iv_position pos, struct iv_use *use,
3071                  gimple *incremented_at, struct iv *orig_iv = NULL,
3072                  bool doloop = false)
3073 {
3074   unsigned i;
3075   struct iv_cand *cand = NULL;
3076   tree type, orig_type;
3077
3078   gcc_assert (base && step);
3079
3080   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3081      live, but the ivopts code may replace a real pointer with one
3082      pointing before or after the memory block that is then adjusted
3083      into the memory block during the loop.  FIXME: It would likely be
3084      better to actually force the pointer live and still use ivopts;
3085      for example, it would be enough to write the pointer into memory
3086      and keep it there until after the loop.  */
3087   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3088     return NULL;
3089
3090   /* For non-original variables, make sure their values are computed in a type
3091      that does not invoke undefined behavior on overflows (since in general,
3092      we cannot prove that these induction variables are non-wrapping).  */
3093   if (pos != IP_ORIGINAL)
3094     {
3095       orig_type = TREE_TYPE (base);
3096       type = generic_type_for (orig_type);
3097       if (type != orig_type)
3098         {
3099           base = fold_convert (type, base);
3100           step = fold_convert (type, step);
3101         }
3102     }
3103
3104   for (i = 0; i < data->vcands.length (); i++)
3105     {
3106       cand = data->vcands[i];
3107
3108       if (cand->pos != pos)
3109         continue;
3110
3111       if (cand->incremented_at != incremented_at
3112           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3113               && cand->ainc_use != use))
3114         continue;
3115
3116       if (operand_equal_p (base, cand->iv->base, 0)
3117           && operand_equal_p (step, cand->iv->step, 0)
3118           && (TYPE_PRECISION (TREE_TYPE (base))
3119               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3120         break;
3121     }
3122
3123   if (i == data->vcands.length ())
3124     {
3125       cand = XCNEW (struct iv_cand);
3126       cand->id = i;
3127       cand->iv = alloc_iv (data, base, step);
3128       cand->pos = pos;
3129       if (pos != IP_ORIGINAL)
3130         {
3131           if (doloop)
3132             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3133           else
3134             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3135           cand->var_after = cand->var_before;
3136         }
3137       cand->important = important;
3138       cand->incremented_at = incremented_at;
3139       cand->doloop_p = doloop;
3140       data->vcands.safe_push (cand);
3141
3142       if (!poly_int_tree_p (step))
3143         {
3144           find_inv_vars (data, &step, &cand->inv_vars);
3145
3146           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3147           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3148           if (inv_expr != NULL)
3149             {
3150               cand->inv_exprs = cand->inv_vars;
3151               cand->inv_vars = NULL;
3152               if (cand->inv_exprs)
3153                 bitmap_clear (cand->inv_exprs);
3154               else
3155                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3156
3157               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3158             }
3159         }
3160
3161       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3162         cand->ainc_use = use;
3163       else
3164         cand->ainc_use = NULL;
3165
3166       cand->orig_iv = orig_iv;
3167       if (dump_file && (dump_flags & TDF_DETAILS))
3168         dump_cand (dump_file, cand);
3169     }
3170
3171   cand->important |= important;
3172   cand->doloop_p |= doloop;
3173
3174   /* Relate candidate to the group for which it is added.  */
3175   if (use)
3176     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3177
3178   return cand;
3179 }
3180
3181 /* Returns true if incrementing the induction variable at the end of the LOOP
3182    is allowed.
3183
3184    The purpose is to avoid splitting latch edge with a biv increment, thus
3185    creating a jump, possibly confusing other optimization passes and leaving
3186    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3187    available (so we do not have a better alternative), or if the latch edge
3188    is already nonempty.  */
3189
3190 static bool
3191 allow_ip_end_pos_p (class loop *loop)
3192 {
3193   if (!ip_normal_pos (loop))
3194     return true;
3195
3196   if (!empty_block_p (ip_end_pos (loop)))
3197     return true;
3198
3199   return false;
3200 }
3201
3202 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3203    Important field is set to IMPORTANT.  */
3204
3205 static void
3206 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3207                         bool important, struct iv_use *use)
3208 {
3209   basic_block use_bb = gimple_bb (use->stmt);
3210   machine_mode mem_mode;
3211   unsigned HOST_WIDE_INT cstepi;
3212
3213   /* If we insert the increment in any position other than the standard
3214      ones, we must ensure that it is incremented once per iteration.
3215      It must not be in an inner nested loop, or one side of an if
3216      statement.  */
3217   if (use_bb->loop_father != data->current_loop
3218       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3219       || stmt_can_throw_internal (cfun, use->stmt)
3220       || !cst_and_fits_in_hwi (step))
3221     return;
3222
3223   cstepi = int_cst_value (step);
3224
3225   mem_mode = TYPE_MODE (use->mem_type);
3226   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3227         || USE_STORE_PRE_INCREMENT (mem_mode))
3228        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3229       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3230            || USE_STORE_PRE_DECREMENT (mem_mode))
3231           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3232     {
3233       enum tree_code code = MINUS_EXPR;
3234       tree new_base;
3235       tree new_step = step;
3236
3237       if (POINTER_TYPE_P (TREE_TYPE (base)))
3238         {
3239           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3240           code = POINTER_PLUS_EXPR;
3241         }
3242       else
3243         new_step = fold_convert (TREE_TYPE (base), new_step);
3244       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3245       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3246                        use->stmt);
3247     }
3248   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3249         || USE_STORE_POST_INCREMENT (mem_mode))
3250        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3251       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3252            || USE_STORE_POST_DECREMENT (mem_mode))
3253           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3254     {
3255       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3256                        use->stmt);
3257     }
3258 }
3259
3260 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3261    position to POS.  If USE is not NULL, the candidate is set as related to
3262    it.  The candidate computation is scheduled before exit condition and at
3263    the end of loop.  */
3264
3265 static void
3266 add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3267                struct iv_use *use, struct iv *orig_iv = NULL,
3268                bool doloop = false)
3269 {
3270   if (ip_normal_pos (data->current_loop))
3271     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3272                      doloop);
3273   /* Exclude doloop candidate here since it requires decrement then comparison
3274      and jump, the IP_END position doesn't match.  */
3275   if (!doloop && ip_end_pos (data->current_loop)
3276       && allow_ip_end_pos_p (data->current_loop))
3277     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3278 }
3279
3280 /* Adds standard iv candidates.  */
3281
3282 static void
3283 add_standard_iv_candidates (struct ivopts_data *data)
3284 {
3285   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3286
3287   /* The same for a double-integer type if it is still fast enough.  */
3288   if (TYPE_PRECISION
3289         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3290       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3291     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3292                    build_int_cst (long_integer_type_node, 1), true, NULL);
3293
3294   /* The same for a double-integer type if it is still fast enough.  */
3295   if (TYPE_PRECISION
3296         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3297       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3298     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3299                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3300 }
3301
3302
3303 /* Adds candidates bases on the old induction variable IV.  */
3304
3305 static void
3306 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3307 {
3308   gimple *phi;
3309   tree def;
3310   struct iv_cand *cand;
3311
3312   /* Check if this biv is used in address type use.  */
3313   if (iv->no_overflow  && iv->have_address_use
3314       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3315       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3316     {
3317       tree base = fold_convert (sizetype, iv->base);
3318       tree step = fold_convert (sizetype, iv->step);
3319
3320       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3321       add_candidate (data, base, step, true, NULL, iv);
3322       /* Add iv cand of the original type only if it has nonlinear use.  */
3323       if (iv->nonlin_use)
3324         add_candidate (data, iv->base, iv->step, true, NULL);
3325     }
3326   else
3327     add_candidate (data, iv->base, iv->step, true, NULL);
3328
3329   /* The same, but with initial value zero.  */
3330   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3331     add_candidate (data, size_int (0), iv->step, true, NULL);
3332   else
3333     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3334                    iv->step, true, NULL);
3335
3336   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3337   if (gimple_code (phi) == GIMPLE_PHI)
3338     {
3339       /* Additionally record the possibility of leaving the original iv
3340          untouched.  */
3341       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3342       /* Don't add candidate if it's from another PHI node because
3343          it's an affine iv appearing in the form of PEELED_CHREC.  */
3344       phi = SSA_NAME_DEF_STMT (def);
3345       if (gimple_code (phi) != GIMPLE_PHI)
3346         {
3347           cand = add_candidate_1 (data,
3348                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3349                                   SSA_NAME_DEF_STMT (def));
3350           if (cand)
3351             {
3352               cand->var_before = iv->ssa_name;
3353               cand->var_after = def;
3354             }
3355         }
3356       else
3357         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3358     }
3359 }
3360
3361 /* Adds candidates based on the old induction variables.  */
3362
3363 static void
3364 add_iv_candidate_for_bivs (struct ivopts_data *data)
3365 {
3366   unsigned i;
3367   struct iv *iv;
3368   bitmap_iterator bi;
3369
3370   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3371     {
3372       iv = ver_info (data, i)->iv;
3373       if (iv && iv->biv_p && !integer_zerop (iv->step))
3374         add_iv_candidate_for_biv (data, iv);
3375     }
3376 }
3377
3378 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3379
3380 static void
3381 record_common_cand (struct ivopts_data *data, tree base,
3382                     tree step, struct iv_use *use)
3383 {
3384   class iv_common_cand ent;
3385   class iv_common_cand **slot;
3386
3387   ent.base = base;
3388   ent.step = step;
3389   ent.hash = iterative_hash_expr (base, 0);
3390   ent.hash = iterative_hash_expr (step, ent.hash);
3391
3392   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3393   if (*slot == NULL)
3394     {
3395       *slot = new iv_common_cand ();
3396       (*slot)->base = base;
3397       (*slot)->step = step;
3398       (*slot)->uses.create (8);
3399       (*slot)->hash = ent.hash;
3400       data->iv_common_cands.safe_push ((*slot));
3401     }
3402
3403   gcc_assert (use != NULL);
3404   (*slot)->uses.safe_push (use);
3405   return;
3406 }
3407
3408 /* Comparison function used to sort common candidates.  */
3409
3410 static int
3411 common_cand_cmp (const void *p1, const void *p2)
3412 {
3413   unsigned n1, n2;
3414   const class iv_common_cand *const *const ccand1
3415     = (const class iv_common_cand *const *)p1;
3416   const class iv_common_cand *const *const ccand2
3417     = (const class iv_common_cand *const *)p2;
3418
3419   n1 = (*ccand1)->uses.length ();
3420   n2 = (*ccand2)->uses.length ();
3421   return n2 - n1;
3422 }
3423
3424 /* Adds IV candidates based on common candidated recorded.  */
3425
3426 static void
3427 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3428 {
3429   unsigned i, j;
3430   struct iv_cand *cand_1, *cand_2;
3431
3432   data->iv_common_cands.qsort (common_cand_cmp);
3433   for (i = 0; i < data->iv_common_cands.length (); i++)
3434     {
3435       class iv_common_cand *ptr = data->iv_common_cands[i];
3436
3437       /* Only add IV candidate if it's derived from multiple uses.  */
3438       if (ptr->uses.length () <= 1)
3439         break;
3440
3441       cand_1 = NULL;
3442       cand_2 = NULL;
3443       if (ip_normal_pos (data->current_loop))
3444         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3445                                   false, IP_NORMAL, NULL, NULL);
3446
3447       if (ip_end_pos (data->current_loop)
3448           && allow_ip_end_pos_p (data->current_loop))
3449         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3450                                   false, IP_END, NULL, NULL);
3451
3452       /* Bind deriving uses and the new candidates.  */
3453       for (j = 0; j < ptr->uses.length (); j++)
3454         {
3455           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3456           if (cand_1)
3457             bitmap_set_bit (group->related_cands, cand_1->id);
3458           if (cand_2)
3459             bitmap_set_bit (group->related_cands, cand_2->id);
3460         }
3461     }
3462
3463   /* Release data since it is useless from this point.  */
3464   data->iv_common_cand_tab->empty ();
3465   data->iv_common_cands.truncate (0);
3466 }
3467
3468 /* Adds candidates based on the value of USE's iv.  */
3469
3470 static void
3471 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3472 {
3473   poly_uint64 offset;
3474   tree base;
3475   tree basetype;
3476   struct iv *iv = use->iv;
3477
3478   add_candidate (data, iv->base, iv->step, false, use);
3479
3480   /* Record common candidate for use in case it can be shared by others.  */
3481   record_common_cand (data, iv->base, iv->step, use);
3482
3483   /* Record common candidate with initial value zero.  */
3484   basetype = TREE_TYPE (iv->base);
3485   if (POINTER_TYPE_P (basetype))
3486     basetype = sizetype;
3487   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3488
3489   /* Compare the cost of an address with an unscaled index with the cost of
3490     an address with a scaled index and add candidate if useful.  */
3491   poly_int64 step;
3492   if (use != NULL
3493       && poly_int_tree_p (iv->step, &step)
3494       && address_p (use->type))
3495     {
3496       poly_int64 new_step;
3497       unsigned int fact = preferred_mem_scale_factor
3498         (use->iv->base,
3499          TYPE_MODE (use->mem_type),
3500          optimize_loop_for_speed_p (data->current_loop));
3501
3502       if (fact != 1
3503           && multiple_p (step, fact, &new_step))
3504         add_candidate (data, size_int (0),
3505                        wide_int_to_tree (sizetype, new_step),
3506                        true, NULL);
3507     }
3508
3509   /* Record common candidate with constant offset stripped in base.
3510      Like the use itself, we also add candidate directly for it.  */
3511   base = strip_offset (iv->base, &offset);
3512   if (maybe_ne (offset, 0U) || base != iv->base)
3513     {
3514       record_common_cand (data, base, iv->step, use);
3515       add_candidate (data, base, iv->step, false, use);
3516     }
3517
3518   /* Record common candidate with base_object removed in base.  */
3519   base = iv->base;
3520   STRIP_NOPS (base);
3521   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3522     {
3523       tree step = iv->step;
3524
3525       STRIP_NOPS (step);
3526       base = TREE_OPERAND (base, 1);
3527       step = fold_convert (sizetype, step);
3528       record_common_cand (data, base, step, use);
3529       /* Also record common candidate with offset stripped.  */
3530       base = strip_offset (base, &offset);
3531       if (maybe_ne (offset, 0U))
3532         record_common_cand (data, base, step, use);
3533     }
3534
3535   /* At last, add auto-incremental candidates.  Make such variables
3536      important since other iv uses with same base object may be based
3537      on it.  */
3538   if (use != NULL && address_p (use->type))
3539     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3540 }
3541
3542 /* Adds candidates based on the uses.  */
3543
3544 static void
3545 add_iv_candidate_for_groups (struct ivopts_data *data)
3546 {
3547   unsigned i;
3548
3549   /* Only add candidate for the first use in group.  */
3550   for (i = 0; i < data->vgroups.length (); i++)
3551     {
3552       struct iv_group *group = data->vgroups[i];
3553
3554       gcc_assert (group->vuses[0] != NULL);
3555       add_iv_candidate_for_use (data, group->vuses[0]);
3556     }
3557   add_iv_candidate_derived_from_uses (data);
3558 }
3559
3560 /* Record important candidates and add them to related_cands bitmaps.  */
3561
3562 static void
3563 record_important_candidates (struct ivopts_data *data)
3564 {
3565   unsigned i;
3566   struct iv_group *group;
3567
3568   for (i = 0; i < data->vcands.length (); i++)
3569     {
3570       struct iv_cand *cand = data->vcands[i];
3571
3572       if (cand->important)
3573         bitmap_set_bit (data->important_candidates, i);
3574     }
3575
3576   data->consider_all_candidates = (data->vcands.length ()
3577                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3578
3579   /* Add important candidates to groups' related_cands bitmaps.  */
3580   for (i = 0; i < data->vgroups.length (); i++)
3581     {
3582       group = data->vgroups[i];
3583       bitmap_ior_into (group->related_cands, data->important_candidates);
3584     }
3585 }
3586
3587 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3588    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3589    we allocate a simple list to every use.  */
3590
3591 static void
3592 alloc_use_cost_map (struct ivopts_data *data)
3593 {
3594   unsigned i, size, s;
3595
3596   for (i = 0; i < data->vgroups.length (); i++)
3597     {
3598       struct iv_group *group = data->vgroups[i];
3599
3600       if (data->consider_all_candidates)
3601         size = data->vcands.length ();
3602       else
3603         {
3604           s = bitmap_count_bits (group->related_cands);
3605
3606           /* Round up to the power of two, so that moduling by it is fast.  */
3607           size = s ? (1 << ceil_log2 (s)) : 1;
3608         }
3609
3610       group->n_map_members = size;
3611       group->cost_map = XCNEWVEC (class cost_pair, size);
3612     }
3613 }
3614
3615 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3616    on invariants INV_VARS and that the value used in expressing it is
3617    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3618
3619 static void
3620 set_group_iv_cost (struct ivopts_data *data,
3621                    struct iv_group *group, struct iv_cand *cand,
3622                    comp_cost cost, bitmap inv_vars, tree value,
3623                    enum tree_code comp, bitmap inv_exprs)
3624 {
3625   unsigned i, s;
3626
3627   if (cost.infinite_cost_p ())
3628     {
3629       BITMAP_FREE (inv_vars);
3630       BITMAP_FREE (inv_exprs);
3631       return;
3632     }
3633
3634   if (data->consider_all_candidates)
3635     {
3636       group->cost_map[cand->id].cand = cand;
3637       group->cost_map[cand->id].cost = cost;
3638       group->cost_map[cand->id].inv_vars = inv_vars;
3639       group->cost_map[cand->id].inv_exprs = inv_exprs;
3640       group->cost_map[cand->id].value = value;
3641       group->cost_map[cand->id].comp = comp;
3642       return;
3643     }
3644
3645   /* n_map_members is a power of two, so this computes modulo.  */
3646   s = cand->id & (group->n_map_members - 1);
3647   for (i = s; i < group->n_map_members; i++)
3648     if (!group->cost_map[i].cand)
3649       goto found;
3650   for (i = 0; i < s; i++)
3651     if (!group->cost_map[i].cand)
3652       goto found;
3653
3654   gcc_unreachable ();
3655
3656 found:
3657   group->cost_map[i].cand = cand;
3658   group->cost_map[i].cost = cost;
3659   group->cost_map[i].inv_vars = inv_vars;
3660   group->cost_map[i].inv_exprs = inv_exprs;
3661   group->cost_map[i].value = value;
3662   group->cost_map[i].comp = comp;
3663 }
3664
3665 /* Gets cost of (GROUP, CAND) pair.  */
3666
3667 static class cost_pair *
3668 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3669                    struct iv_cand *cand)
3670 {
3671   unsigned i, s;
3672   class cost_pair *ret;
3673
3674   if (!cand)
3675     return NULL;
3676
3677   if (data->consider_all_candidates)
3678     {
3679       ret = group->cost_map + cand->id;
3680       if (!ret->cand)
3681         return NULL;
3682
3683       return ret;
3684     }
3685
3686   /* n_map_members is a power of two, so this computes modulo.  */
3687   s = cand->id & (group->n_map_members - 1);
3688   for (i = s; i < group->n_map_members; i++)
3689     if (group->cost_map[i].cand == cand)
3690       return group->cost_map + i;
3691     else if (group->cost_map[i].cand == NULL)
3692       return NULL;
3693   for (i = 0; i < s; i++)
3694     if (group->cost_map[i].cand == cand)
3695       return group->cost_map + i;
3696     else if (group->cost_map[i].cand == NULL)
3697       return NULL;
3698
3699   return NULL;
3700 }
3701
3702 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3703 static rtx
3704 produce_memory_decl_rtl (tree obj, int *regno)
3705 {
3706   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3707   machine_mode address_mode = targetm.addr_space.address_mode (as);
3708   rtx x;
3709
3710   gcc_assert (obj);
3711   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3712     {
3713       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3714       x = gen_rtx_SYMBOL_REF (address_mode, name);
3715       SET_SYMBOL_REF_DECL (x, obj);
3716       x = gen_rtx_MEM (DECL_MODE (obj), x);
3717       set_mem_addr_space (x, as);
3718       targetm.encode_section_info (obj, x, true);
3719     }
3720   else
3721     {
3722       x = gen_raw_REG (address_mode, (*regno)++);
3723       x = gen_rtx_MEM (DECL_MODE (obj), x);
3724       set_mem_addr_space (x, as);
3725     }
3726
3727   return x;
3728 }
3729
3730 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3731    walk_tree.  DATA contains the actual fake register number.  */
3732
3733 static tree
3734 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3735 {
3736   tree obj = NULL_TREE;
3737   rtx x = NULL_RTX;
3738   int *regno = (int *) data;
3739
3740   switch (TREE_CODE (*expr_p))
3741     {
3742     case ADDR_EXPR:
3743       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3744            handled_component_p (*expr_p);
3745            expr_p = &TREE_OPERAND (*expr_p, 0))
3746         continue;
3747       obj = *expr_p;
3748       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3749         x = produce_memory_decl_rtl (obj, regno);
3750       break;
3751
3752     case SSA_NAME:
3753       *ws = 0;
3754       obj = SSA_NAME_VAR (*expr_p);
3755       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3756       if (!obj)
3757         return NULL_TREE;
3758       if (!DECL_RTL_SET_P (obj))
3759         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3760       break;
3761
3762     case VAR_DECL:
3763     case PARM_DECL:
3764     case RESULT_DECL:
3765       *ws = 0;
3766       obj = *expr_p;
3767
3768       if (DECL_RTL_SET_P (obj))
3769         break;
3770
3771       if (DECL_MODE (obj) == BLKmode)
3772         x = produce_memory_decl_rtl (obj, regno);
3773       else
3774         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3775
3776       break;
3777
3778     default:
3779       break;
3780     }
3781
3782   if (x)
3783     {
3784       decl_rtl_to_reset.safe_push (obj);
3785       SET_DECL_RTL (obj, x);
3786     }
3787
3788   return NULL_TREE;
3789 }
3790
3791 /* Predict whether the given loop will be transformed in the RTL
3792    doloop_optimize pass.  Attempt to duplicate some doloop_optimize checks.
3793    This is only for target independent checks, see targetm.predict_doloop_p
3794    for the target dependent ones.
3795
3796    Note that according to some initial investigation, some checks like costly
3797    niter check and invalid stmt scanning don't have much gains among general
3798    cases, so keep this as simple as possible first.
3799
3800    Some RTL specific checks seems unable to be checked in gimple, if any new
3801    checks or easy checks _are_ missing here, please add them.  */
3802
3803 static bool
3804 generic_predict_doloop_p (struct ivopts_data *data)
3805 {
3806   class loop *loop = data->current_loop;
3807
3808   /* Call target hook for target dependent checks.  */
3809   if (!targetm.predict_doloop_p (loop))
3810     {
3811       if (dump_file && (dump_flags & TDF_DETAILS))
3812         fprintf (dump_file, "Predict doloop failure due to"
3813                             " target specific checks.\n");
3814       return false;
3815     }
3816
3817   /* Similar to doloop_optimize, check iteration description to know it's
3818      suitable or not.  Keep it as simple as possible, feel free to extend it
3819      if you find any multiple exits cases matter.  */
3820   edge exit = single_dom_exit (loop);
3821   class tree_niter_desc *niter_desc;
3822   if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3823     {
3824       if (dump_file && (dump_flags & TDF_DETAILS))
3825         fprintf (dump_file, "Predict doloop failure due to"
3826                             " unexpected niters.\n");
3827       return false;
3828     }
3829
3830   /* Similar to doloop_optimize, check whether iteration count too small
3831      and not profitable.  */
3832   HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3833   if (est_niter == -1)
3834     est_niter = get_likely_max_loop_iterations_int (loop);
3835   if (est_niter >= 0 && est_niter < 3)
3836     {
3837       if (dump_file && (dump_flags & TDF_DETAILS))
3838         fprintf (dump_file,
3839                  "Predict doloop failure due to"
3840                  " too few iterations (%u).\n",
3841                  (unsigned int) est_niter);
3842       return false;
3843     }
3844
3845   return true;
3846 }
3847
3848 /* Determines cost of the computation of EXPR.  */
3849
3850 static unsigned
3851 computation_cost (tree expr, bool speed)
3852 {
3853   rtx_insn *seq;
3854   rtx rslt;
3855   tree type = TREE_TYPE (expr);
3856   unsigned cost;
3857   /* Avoid using hard regs in ways which may be unsupported.  */
3858   int regno = LAST_VIRTUAL_REGISTER + 1;
3859   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3860   enum node_frequency real_frequency = node->frequency;
3861
3862   node->frequency = NODE_FREQUENCY_NORMAL;
3863   crtl->maybe_hot_insn_p = speed;
3864   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3865   start_sequence ();
3866   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3867   seq = get_insns ();
3868   end_sequence ();
3869   default_rtl_profile ();
3870   node->frequency = real_frequency;
3871
3872   cost = seq_cost (seq, speed);
3873   if (MEM_P (rslt))
3874     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3875                           TYPE_ADDR_SPACE (type), speed);
3876   else if (!REG_P (rslt))
3877     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3878
3879   return cost;
3880 }
3881
3882 /* Returns variable containing the value of candidate CAND at statement AT.  */
3883
3884 static tree
3885 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3886 {
3887   if (stmt_after_increment (loop, cand, stmt))
3888     return cand->var_after;
3889   else
3890     return cand->var_before;
3891 }
3892
3893 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3894    same precision that is at least as wide as the precision of TYPE, stores
3895    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3896    type of A and B.  */
3897
3898 static tree
3899 determine_common_wider_type (tree *a, tree *b)
3900 {
3901   tree wider_type = NULL;
3902   tree suba, subb;
3903   tree atype = TREE_TYPE (*a);
3904
3905   if (CONVERT_EXPR_P (*a))
3906     {
3907       suba = TREE_OPERAND (*a, 0);
3908       wider_type = TREE_TYPE (suba);
3909       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3910         return atype;
3911     }
3912   else
3913     return atype;
3914
3915   if (CONVERT_EXPR_P (*b))
3916     {
3917       subb = TREE_OPERAND (*b, 0);
3918       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3919         return atype;
3920     }
3921   else
3922     return atype;
3923
3924   *a = suba;
3925   *b = subb;
3926   return wider_type;
3927 }
3928
3929 /* Determines the expression by that USE is expressed from induction variable
3930    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3931    decomposed form.  The invariant part is stored in AFF_INV; while variant
3932    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3933    non-null.  Returns false if USE cannot be expressed using CAND.  */
3934
3935 static bool
3936 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
3937                        struct iv_cand *cand, class aff_tree *aff_inv,
3938                        class aff_tree *aff_var, widest_int *prat = NULL)
3939 {
3940   tree ubase = use->iv->base, ustep = use->iv->step;
3941   tree cbase = cand->iv->base, cstep = cand->iv->step;
3942   tree common_type, uutype, var, cstep_common;
3943   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3944   aff_tree aff_cbase;
3945   widest_int rat;
3946
3947   /* We must have a precision to express the values of use.  */
3948   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3949     return false;
3950
3951   var = var_at_stmt (loop, cand, at);
3952   uutype = unsigned_type_for (utype);
3953
3954   /* If the conversion is not noop, perform it.  */
3955   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3956     {
3957       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3958           && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3959         {
3960           tree inner_base, inner_step, inner_type;
3961           inner_base = TREE_OPERAND (cbase, 0);
3962           if (CONVERT_EXPR_P (cstep))
3963             inner_step = TREE_OPERAND (cstep, 0);
3964           else
3965             inner_step = cstep;
3966
3967           inner_type = TREE_TYPE (inner_base);
3968           /* If candidate is added from a biv whose type is smaller than
3969              ctype, we know both candidate and the biv won't overflow.
3970              In this case, it's safe to skip the convertion in candidate.
3971              As an example, (unsigned short)((unsigned long)A) equals to
3972              (unsigned short)A, if A has a type no larger than short.  */
3973           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3974             {
3975               cbase = inner_base;
3976               cstep = inner_step;
3977             }
3978         }
3979       cbase = fold_convert (uutype, cbase);
3980       cstep = fold_convert (uutype, cstep);
3981       var = fold_convert (uutype, var);
3982     }
3983
3984   /* Ratio is 1 when computing the value of biv cand by itself.
3985      We can't rely on constant_multiple_of in this case because the
3986      use is created after the original biv is selected.  The call
3987      could fail because of inconsistent fold behavior.  See PR68021
3988      for more information.  */
3989   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3990     {
3991       gcc_assert (is_gimple_assign (use->stmt));
3992       gcc_assert (use->iv->ssa_name == cand->var_after);
3993       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3994       rat = 1;
3995     }
3996   else if (!constant_multiple_of (ustep, cstep, &rat))
3997     return false;
3998
3999   if (prat)
4000     *prat = rat;
4001
4002   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4003      type, we achieve better folding by computing their difference in this
4004      wider type, and cast the result to UUTYPE.  We do not need to worry about
4005      overflows, as all the arithmetics will in the end be performed in UUTYPE
4006      anyway.  */
4007   common_type = determine_common_wider_type (&ubase, &cbase);
4008
4009   /* use = ubase - ratio * cbase + ratio * var.  */
4010   tree_to_aff_combination (ubase, common_type, aff_inv);
4011   tree_to_aff_combination (cbase, common_type, &aff_cbase);
4012   tree_to_aff_combination (var, uutype, aff_var);
4013
4014   /* We need to shift the value if we are after the increment.  */
4015   if (stmt_after_increment (loop, cand, at))
4016     {
4017       aff_tree cstep_aff;
4018
4019       if (common_type != uutype)
4020         cstep_common = fold_convert (common_type, cstep);
4021       else
4022         cstep_common = cstep;
4023
4024       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4025       aff_combination_add (&aff_cbase, &cstep_aff);
4026     }
4027
4028   aff_combination_scale (&aff_cbase, -rat);
4029   aff_combination_add (aff_inv, &aff_cbase);
4030   if (common_type != uutype)
4031     aff_combination_convert (aff_inv, uutype);
4032
4033   aff_combination_scale (aff_var, rat);
4034   return true;
4035 }
4036
4037 /* Determines the expression by that USE is expressed from induction variable
4038    CAND at statement AT in LOOP.  The expression is stored in a decomposed
4039    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
4040
4041 static bool
4042 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4043                      struct iv_cand *cand, class aff_tree *aff)
4044 {
4045   aff_tree aff_var;
4046
4047   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4048     return false;
4049
4050   aff_combination_add (aff, &aff_var);
4051   return true;
4052 }
4053
4054 /* Return the type of USE.  */
4055
4056 static tree
4057 get_use_type (struct iv_use *use)
4058 {
4059   tree base_type = TREE_TYPE (use->iv->base);
4060   tree type;
4061
4062   if (use->type == USE_REF_ADDRESS)
4063     {
4064       /* The base_type may be a void pointer.  Create a pointer type based on
4065          the mem_ref instead.  */
4066       type = build_pointer_type (TREE_TYPE (*use->op_p));
4067       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4068                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4069     }
4070   else
4071     type = base_type;
4072
4073   return type;
4074 }
4075
4076 /* Determines the expression by that USE is expressed from induction variable
4077    CAND at statement AT in LOOP.  The computation is unshared.  */
4078
4079 static tree
4080 get_computation_at (class loop *loop, gimple *at,
4081                     struct iv_use *use, struct iv_cand *cand)
4082 {
4083   aff_tree aff;
4084   tree type = get_use_type (use);
4085
4086   if (!get_computation_aff (loop, at, use, cand, &aff))
4087     return NULL_TREE;
4088   unshare_aff_combination (&aff);
4089   return fold_convert (type, aff_combination_to_tree (&aff));
4090 }
4091
4092 /* Adjust the cost COST for being in loop setup rather than loop body.
4093    If we're optimizing for space, the loop setup overhead is constant;
4094    if we're optimizing for speed, amortize it over the per-iteration cost.
4095    If ROUND_UP_P is true, the result is round up rather than to zero when
4096    optimizing for speed.  */
4097 static int64_t
4098 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4099                    bool round_up_p = false)
4100 {
4101   if (cost == INFTY)
4102     return cost;
4103   else if (optimize_loop_for_speed_p (data->current_loop))
4104     {
4105       int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4106       return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4107     }
4108   else
4109     return cost;
4110 }
4111
4112 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4113    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4114    calculating the operands of EXPR.  Returns true if successful, and returns
4115    the cost in COST.  */
4116
4117 static bool
4118 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4119                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4120 {
4121   comp_cost res;
4122   tree op1 = TREE_OPERAND (expr, 1);
4123   tree cst = TREE_OPERAND (mult, 1);
4124   tree multop = TREE_OPERAND (mult, 0);
4125   int m = exact_log2 (int_cst_value (cst));
4126   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4127   int as_cost, sa_cost;
4128   bool mult_in_op1;
4129
4130   if (!(m >= 0 && m < maxm))
4131     return false;
4132
4133   STRIP_NOPS (op1);
4134   mult_in_op1 = operand_equal_p (op1, mult, 0);
4135
4136   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4137
4138   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4139      use that in preference to a shift insn followed by an add insn.  */
4140   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4141              ? shiftadd_cost (speed, mode, m)
4142              : (mult_in_op1
4143                 ? shiftsub1_cost (speed, mode, m)
4144                 : shiftsub0_cost (speed, mode, m)));
4145
4146   res = comp_cost (MIN (as_cost, sa_cost), 0);
4147   res += (mult_in_op1 ? cost0 : cost1);
4148
4149   STRIP_NOPS (multop);
4150   if (!is_gimple_val (multop))
4151     res += force_expr_to_var_cost (multop, speed);
4152
4153   *cost = res;
4154   return true;
4155 }
4156
4157 /* Estimates cost of forcing expression EXPR into a variable.  */
4158
4159 static comp_cost
4160 force_expr_to_var_cost (tree expr, bool speed)
4161 {
4162   static bool costs_initialized = false;
4163   static unsigned integer_cost [2];
4164   static unsigned symbol_cost [2];
4165   static unsigned address_cost [2];
4166   tree op0, op1;
4167   comp_cost cost0, cost1, cost;
4168   machine_mode mode;
4169   scalar_int_mode int_mode;
4170
4171   if (!costs_initialized)
4172     {
4173       tree type = build_pointer_type (integer_type_node);
4174       tree var, addr;
4175       rtx x;
4176       int i;
4177
4178       var = create_tmp_var_raw (integer_type_node, "test_var");
4179       TREE_STATIC (var) = 1;
4180       x = produce_memory_decl_rtl (var, NULL);
4181       SET_DECL_RTL (var, x);
4182
4183       addr = build1 (ADDR_EXPR, type, var);
4184
4185
4186       for (i = 0; i < 2; i++)
4187         {
4188           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4189                                                              2000), i);
4190
4191           symbol_cost[i] = computation_cost (addr, i) + 1;
4192
4193           address_cost[i]
4194             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4195           if (dump_file && (dump_flags & TDF_DETAILS))
4196             {
4197               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4198               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4199               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4200               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4201               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4202               fprintf (dump_file, "\n");
4203             }
4204         }
4205
4206       costs_initialized = true;
4207     }
4208
4209   STRIP_NOPS (expr);
4210
4211   if (SSA_VAR_P (expr))
4212     return no_cost;
4213
4214   if (is_gimple_min_invariant (expr))
4215     {
4216       if (poly_int_tree_p (expr))
4217         return comp_cost (integer_cost [speed], 0);
4218
4219       if (TREE_CODE (expr) == ADDR_EXPR)
4220         {
4221           tree obj = TREE_OPERAND (expr, 0);
4222
4223           if (VAR_P (obj)
4224               || TREE_CODE (obj) == PARM_DECL
4225               || TREE_CODE (obj) == RESULT_DECL)
4226             return comp_cost (symbol_cost [speed], 0);
4227         }
4228
4229       return comp_cost (address_cost [speed], 0);
4230     }
4231
4232   switch (TREE_CODE (expr))
4233     {
4234     case POINTER_PLUS_EXPR:
4235     case PLUS_EXPR:
4236     case MINUS_EXPR:
4237     case MULT_EXPR:
4238     case TRUNC_DIV_EXPR:
4239     case BIT_AND_EXPR:
4240     case BIT_IOR_EXPR:
4241     case LSHIFT_EXPR:
4242     case RSHIFT_EXPR:
4243       op0 = TREE_OPERAND (expr, 0);
4244       op1 = TREE_OPERAND (expr, 1);
4245       STRIP_NOPS (op0);
4246       STRIP_NOPS (op1);
4247       break;
4248
4249     CASE_CONVERT:
4250     case NEGATE_EXPR:
4251     case BIT_NOT_EXPR:
4252       op0 = TREE_OPERAND (expr, 0);
4253       STRIP_NOPS (op0);
4254       op1 = NULL_TREE;
4255       break;
4256     /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4257        introduce COND_EXPR for IV base, need to support better cost estimation
4258        for this COND_EXPR and tcc_comparison.  */
4259     case COND_EXPR:
4260       op0 = TREE_OPERAND (expr, 1);
4261       STRIP_NOPS (op0);
4262       op1 = TREE_OPERAND (expr, 2);
4263       STRIP_NOPS (op1);
4264       break;
4265     case LT_EXPR:
4266     case LE_EXPR:
4267     case GT_EXPR:
4268     case GE_EXPR:
4269     case EQ_EXPR:
4270     case NE_EXPR:
4271     case UNORDERED_EXPR:
4272     case ORDERED_EXPR:
4273     case UNLT_EXPR:
4274     case UNLE_EXPR:
4275     case UNGT_EXPR:
4276     case UNGE_EXPR:
4277     case UNEQ_EXPR:
4278     case LTGT_EXPR:
4279     case MAX_EXPR:
4280     case MIN_EXPR:
4281       op0 = TREE_OPERAND (expr, 0);
4282       STRIP_NOPS (op0);
4283       op1 = TREE_OPERAND (expr, 1);
4284       STRIP_NOPS (op1);
4285       break;
4286
4287     default:
4288       /* Just an arbitrary value, FIXME.  */
4289       return comp_cost (target_spill_cost[speed], 0);
4290     }
4291
4292   if (op0 == NULL_TREE
4293       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4294     cost0 = no_cost;
4295   else
4296     cost0 = force_expr_to_var_cost (op0, speed);
4297
4298   if (op1 == NULL_TREE
4299       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4300     cost1 = no_cost;
4301   else
4302     cost1 = force_expr_to_var_cost (op1, speed);
4303
4304   mode = TYPE_MODE (TREE_TYPE (expr));
4305   switch (TREE_CODE (expr))
4306     {
4307     case POINTER_PLUS_EXPR:
4308     case PLUS_EXPR:
4309     case MINUS_EXPR:
4310     case NEGATE_EXPR:
4311       cost = comp_cost (add_cost (speed, mode), 0);
4312       if (TREE_CODE (expr) != NEGATE_EXPR)
4313         {
4314           tree mult = NULL_TREE;
4315           comp_cost sa_cost;
4316           if (TREE_CODE (op1) == MULT_EXPR)
4317             mult = op1;
4318           else if (TREE_CODE (op0) == MULT_EXPR)
4319             mult = op0;
4320
4321           if (mult != NULL_TREE
4322               && is_a <scalar_int_mode> (mode, &int_mode)
4323               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4324               && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4325                                     speed, &sa_cost))
4326             return sa_cost;
4327         }
4328       break;
4329
4330     CASE_CONVERT:
4331       {
4332         tree inner_mode, outer_mode;
4333         outer_mode = TREE_TYPE (expr);
4334         inner_mode = TREE_TYPE (op0);
4335         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4336                                        TYPE_MODE (inner_mode), speed), 0);
4337       }
4338       break;
4339
4340     case MULT_EXPR:
4341       if (cst_and_fits_in_hwi (op0))
4342         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4343                                              mode, speed), 0);
4344       else if (cst_and_fits_in_hwi (op1))
4345         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4346                                              mode, speed), 0);
4347       else
4348         return comp_cost (target_spill_cost [speed], 0);
4349       break;
4350
4351     case TRUNC_DIV_EXPR:
4352       /* Division by power of two is usually cheap, so we allow it.  Forbid
4353          anything else.  */
4354       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4355         cost = comp_cost (add_cost (speed, mode), 0);
4356       else
4357         cost = comp_cost (target_spill_cost[speed], 0);
4358       break;
4359
4360     case BIT_AND_EXPR:
4361     case BIT_IOR_EXPR:
4362     case BIT_NOT_EXPR:
4363     case LSHIFT_EXPR:
4364     case RSHIFT_EXPR:
4365       cost = comp_cost (add_cost (speed, mode), 0);
4366       break;
4367     case COND_EXPR:
4368       op0 = TREE_OPERAND (expr, 0);
4369       STRIP_NOPS (op0);
4370       if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4371           || CONSTANT_CLASS_P (op0))
4372         cost = no_cost;
4373       else
4374         cost = force_expr_to_var_cost (op0, speed);
4375       break;
4376     case LT_EXPR:
4377     case LE_EXPR:
4378     case GT_EXPR:
4379     case GE_EXPR:
4380     case EQ_EXPR:
4381     case NE_EXPR:
4382     case UNORDERED_EXPR:
4383     case ORDERED_EXPR:
4384     case UNLT_EXPR:
4385     case UNLE_EXPR:
4386     case UNGT_EXPR:
4387     case UNGE_EXPR:
4388     case UNEQ_EXPR:
4389     case LTGT_EXPR:
4390     case MAX_EXPR:
4391     case MIN_EXPR:
4392       /* Simply use add cost for now, FIXME if there is some more accurate cost
4393          evaluation way.  */
4394       cost = comp_cost (add_cost (speed, mode), 0);
4395       break;
4396
4397     default:
4398       gcc_unreachable ();
4399     }
4400
4401   cost += cost0;
4402   cost += cost1;
4403   return cost;
4404 }
4405
4406 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4407    invariants the computation depends on.  */
4408
4409 static comp_cost
4410 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4411 {
4412   if (!expr)
4413     return no_cost;
4414
4415   find_inv_vars (data, &expr, inv_vars);
4416   return force_expr_to_var_cost (expr, data->speed);
4417 }
4418
4419 /* Returns cost of auto-modifying address expression in shape base + offset.
4420    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4421    address expression.  The address expression has ADDR_MODE in addr space
4422    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4423    speed or size.  */
4424
4425 enum ainc_type
4426 {
4427   AINC_PRE_INC,         /* Pre increment.  */
4428   AINC_PRE_DEC,         /* Pre decrement.  */
4429   AINC_POST_INC,        /* Post increment.  */
4430   AINC_POST_DEC,        /* Post decrement.  */
4431   AINC_NONE             /* Also the number of auto increment types.  */
4432 };
4433
4434 struct ainc_cost_data
4435 {
4436   int64_t costs[AINC_NONE];
4437 };
4438
4439 static comp_cost
4440 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4441                        machine_mode addr_mode, machine_mode mem_mode,
4442                        addr_space_t as, bool speed)
4443 {
4444   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4445       && !USE_STORE_PRE_DECREMENT (mem_mode)
4446       && !USE_LOAD_POST_DECREMENT (mem_mode)
4447       && !USE_STORE_POST_DECREMENT (mem_mode)
4448       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4449       && !USE_STORE_PRE_INCREMENT (mem_mode)
4450       && !USE_LOAD_POST_INCREMENT (mem_mode)
4451       && !USE_STORE_POST_INCREMENT (mem_mode))
4452     return infinite_cost;
4453
4454   static vec<ainc_cost_data *> ainc_cost_data_list;
4455   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4456   if (idx >= ainc_cost_data_list.length ())
4457     {
4458       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4459
4460       gcc_assert (nsize > idx);
4461       ainc_cost_data_list.safe_grow_cleared (nsize);
4462     }
4463
4464   ainc_cost_data *data = ainc_cost_data_list[idx];
4465   if (data == NULL)
4466     {
4467       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4468
4469       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4470       data->costs[AINC_PRE_DEC] = INFTY;
4471       data->costs[AINC_POST_DEC] = INFTY;
4472       data->costs[AINC_PRE_INC] = INFTY;
4473       data->costs[AINC_POST_INC] = INFTY;
4474       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4475           || USE_STORE_PRE_DECREMENT (mem_mode))
4476         {
4477           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4478
4479           if (memory_address_addr_space_p (mem_mode, addr, as))
4480             data->costs[AINC_PRE_DEC]
4481               = address_cost (addr, mem_mode, as, speed);
4482         }
4483       if (USE_LOAD_POST_DECREMENT (mem_mode)
4484           || USE_STORE_POST_DECREMENT (mem_mode))
4485         {
4486           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4487
4488           if (memory_address_addr_space_p (mem_mode, addr, as))
4489             data->costs[AINC_POST_DEC]
4490               = address_cost (addr, mem_mode, as, speed);
4491         }
4492       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4493           || USE_STORE_PRE_INCREMENT (mem_mode))
4494         {
4495           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4496
4497           if (memory_address_addr_space_p (mem_mode, addr, as))
4498             data->costs[AINC_PRE_INC]
4499               = address_cost (addr, mem_mode, as, speed);
4500         }
4501       if (USE_LOAD_POST_INCREMENT (mem_mode)
4502           || USE_STORE_POST_INCREMENT (mem_mode))
4503         {
4504           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4505
4506           if (memory_address_addr_space_p (mem_mode, addr, as))
4507             data->costs[AINC_POST_INC]
4508               = address_cost (addr, mem_mode, as, speed);
4509         }
4510       ainc_cost_data_list[idx] = data;
4511     }
4512
4513   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4514   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4515     return comp_cost (data->costs[AINC_POST_INC], 0);
4516   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4517     return comp_cost (data->costs[AINC_POST_DEC], 0);
4518   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4519     return comp_cost (data->costs[AINC_PRE_INC], 0);
4520   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4521     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4522
4523   return infinite_cost;
4524 }
4525
4526 /* Return cost of computing USE's address expression by using CAND.
4527    AFF_INV and AFF_VAR represent invariant and variant parts of the
4528    address expression, respectively.  If AFF_INV is simple, store
4529    the loop invariant variables which are depended by it in INV_VARS;
4530    if AFF_INV is complicated, handle it as a new invariant expression
4531    and record it in INV_EXPR.  RATIO indicates multiple times between
4532    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4533    value to it indicating if this is an auto-increment address.  */
4534
4535 static comp_cost
4536 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4537                   struct iv_cand *cand, aff_tree *aff_inv,
4538                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4539                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4540                   bool *can_autoinc, bool speed)
4541 {
4542   rtx addr;
4543   bool simple_inv = true;
4544   tree comp_inv = NULL_TREE, type = aff_var->type;
4545   comp_cost var_cost = no_cost, cost = no_cost;
4546   struct mem_address parts = {NULL_TREE, integer_one_node,
4547                               NULL_TREE, NULL_TREE, NULL_TREE};
4548   machine_mode addr_mode = TYPE_MODE (type);
4549   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4550   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4551   /* Only true if ratio != 1.  */
4552   bool ok_with_ratio_p = false;
4553   bool ok_without_ratio_p = false;
4554
4555   if (!aff_combination_const_p (aff_inv))
4556     {
4557       parts.index = integer_one_node;
4558       /* Addressing mode "base + index".  */
4559       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4560       if (ratio != 1)
4561         {
4562           parts.step = wide_int_to_tree (type, ratio);
4563           /* Addressing mode "base + index << scale".  */
4564           ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4565           if (!ok_with_ratio_p)
4566             parts.step = NULL_TREE;
4567         }
4568       if (ok_with_ratio_p || ok_without_ratio_p)
4569         {
4570           if (maybe_ne (aff_inv->offset, 0))
4571             {
4572               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4573               /* Addressing mode "base + index [<< scale] + offset".  */
4574               if (!valid_mem_ref_p (mem_mode, as, &parts))
4575                 parts.offset = NULL_TREE;
4576               else
4577                 aff_inv->offset = 0;
4578             }
4579
4580           move_fixed_address_to_symbol (&parts, aff_inv);
4581           /* Base is fixed address and is moved to symbol part.  */
4582           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4583             parts.base = NULL_TREE;
4584
4585           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4586           if (parts.symbol != NULL_TREE
4587               && !valid_mem_ref_p (mem_mode, as, &parts))
4588             {
4589               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4590               parts.symbol = NULL_TREE;
4591               /* Reset SIMPLE_INV since symbol address needs to be computed
4592                  outside of address expression in this case.  */
4593               simple_inv = false;
4594               /* Symbol part is moved back to base part, it can't be NULL.  */
4595               parts.base = integer_one_node;
4596             }
4597         }
4598       else
4599         parts.index = NULL_TREE;
4600     }
4601   else
4602     {
4603       poly_int64 ainc_step;
4604       if (can_autoinc
4605           && ratio == 1
4606           && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4607         {
4608           poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4609
4610           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4611             ainc_offset += ainc_step;
4612           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4613                                         addr_mode, mem_mode, as, speed);
4614           if (!cost.infinite_cost_p ())
4615             {
4616               *can_autoinc = true;
4617               return cost;
4618             }
4619           cost = no_cost;
4620         }
4621       if (!aff_combination_zero_p (aff_inv))
4622         {
4623           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4624           /* Addressing mode "base + offset".  */
4625           if (!valid_mem_ref_p (mem_mode, as, &parts))
4626             parts.offset = NULL_TREE;
4627           else
4628             aff_inv->offset = 0;
4629         }
4630     }
4631
4632   if (simple_inv)
4633     simple_inv = (aff_inv == NULL
4634                   || aff_combination_const_p (aff_inv)
4635                   || aff_combination_singleton_var_p (aff_inv));
4636   if (!aff_combination_zero_p (aff_inv))
4637     comp_inv = aff_combination_to_tree (aff_inv);
4638   if (comp_inv != NULL_TREE)
4639     cost = force_var_cost (data, comp_inv, inv_vars);
4640   if (ratio != 1 && parts.step == NULL_TREE)
4641     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4642   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4643     var_cost += add_cost (speed, addr_mode);
4644
4645   if (comp_inv && inv_expr && !simple_inv)
4646     {
4647       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4648       /* Clear depends on.  */
4649       if (*inv_expr != NULL && inv_vars && *inv_vars)
4650         bitmap_clear (*inv_vars);
4651
4652       /* Cost of small invariant expression adjusted against loop niters
4653          is usually zero, which makes it difficult to be differentiated
4654          from candidate based on loop invariant variables.  Secondly, the
4655          generated invariant expression may not be hoisted out of loop by
4656          following pass.  We penalize the cost by rounding up in order to
4657          neutralize such effects.  */
4658       cost.cost = adjust_setup_cost (data, cost.cost, true);
4659       cost.scratch = cost.cost;
4660     }
4661
4662   cost += var_cost;
4663   addr = addr_for_mem_ref (&parts, as, false);
4664   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4665   cost += address_cost (addr, mem_mode, as, speed);
4666
4667   if (parts.symbol != NULL_TREE)
4668     cost.complexity += 1;
4669   /* Don't increase the complexity of adding a scaled index if it's
4670      the only kind of index that the target allows.  */
4671   if (parts.step != NULL_TREE && ok_without_ratio_p)
4672     cost.complexity += 1;
4673   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4674     cost.complexity += 1;
4675   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4676     cost.complexity += 1;
4677
4678   return cost;
4679 }
4680
4681 /* Scale (multiply) the computed COST (except scratch part that should be
4682    hoisted out a loop) by header->frequency / AT->frequency, which makes
4683    expected cost more accurate.  */
4684
4685 static comp_cost
4686 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4687 {
4688   if (data->speed
4689       && data->current_loop->header->count.to_frequency (cfun) > 0)
4690     {
4691       basic_block bb = gimple_bb (at);
4692       gcc_assert (cost.scratch <= cost.cost);
4693       int scale_factor = (int)(intptr_t) bb->aux;
4694       if (scale_factor == 1)
4695         return cost;
4696
4697       int64_t scaled_cost
4698         = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4699
4700       if (dump_file && (dump_flags & TDF_DETAILS))
4701         fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4702                  "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4703                  1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4704
4705       cost.cost = scaled_cost;
4706     }
4707
4708   return cost;
4709 }
4710
4711 /* Determines the cost of the computation by that USE is expressed
4712    from induction variable CAND.  If ADDRESS_P is true, we just need
4713    to create an address from it, otherwise we want to get it into
4714    register.  A set of invariants we depend on is stored in INV_VARS.
4715    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4716    addressing is likely.  If INV_EXPR is nonnull, record invariant
4717    expr entry in it.  */
4718
4719 static comp_cost
4720 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4721                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4722                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4723 {
4724   gimple *at = use->stmt;
4725   tree ubase = use->iv->base, cbase = cand->iv->base;
4726   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4727   tree comp_inv = NULL_TREE;
4728   HOST_WIDE_INT ratio, aratio;
4729   comp_cost cost;
4730   widest_int rat;
4731   aff_tree aff_inv, aff_var;
4732   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4733
4734   if (inv_vars)
4735     *inv_vars = NULL;
4736   if (can_autoinc)
4737     *can_autoinc = false;
4738   if (inv_expr)
4739     *inv_expr = NULL;
4740
4741   /* Check if we have enough precision to express the values of use.  */
4742   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4743     return infinite_cost;
4744
4745   if (address_p
4746       || (use->iv->base_object
4747           && cand->iv->base_object
4748           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4749           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4750     {
4751       /* Do not try to express address of an object with computation based
4752          on address of a different object.  This may cause problems in rtl
4753          level alias analysis (that does not expect this to be happening,
4754          as this is illegal in C), and would be unlikely to be useful
4755          anyway.  */
4756       if (use->iv->base_object
4757           && cand->iv->base_object
4758           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4759         return infinite_cost;
4760     }
4761
4762   if (!get_computation_aff_1 (data->current_loop, at, use,
4763                               cand, &aff_inv, &aff_var, &rat)
4764       || !wi::fits_shwi_p (rat))
4765     return infinite_cost;
4766
4767   ratio = rat.to_shwi ();
4768   if (address_p)
4769     {
4770       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4771                                inv_vars, inv_expr, can_autoinc, speed);
4772       cost = get_scaled_computation_cost_at (data, at, cost);
4773       /* For doloop IV cand, add on the extra cost.  */
4774       cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4775       return cost;
4776     }
4777
4778   bool simple_inv = (aff_combination_const_p (&aff_inv)
4779                      || aff_combination_singleton_var_p (&aff_inv));
4780   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4781   aff_combination_convert (&aff_inv, signed_type);
4782   if (!aff_combination_zero_p (&aff_inv))
4783     comp_inv = aff_combination_to_tree (&aff_inv);
4784
4785   cost = force_var_cost (data, comp_inv, inv_vars);
4786   if (comp_inv && inv_expr && !simple_inv)
4787     {
4788       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4789       /* Clear depends on.  */
4790       if (*inv_expr != NULL && inv_vars && *inv_vars)
4791         bitmap_clear (*inv_vars);
4792
4793       cost.cost = adjust_setup_cost (data, cost.cost);
4794       /* Record setup cost in scratch field.  */
4795       cost.scratch = cost.cost;
4796     }
4797   /* Cost of constant integer can be covered when adding invariant part to
4798      variant part.  */
4799   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4800     cost = no_cost;
4801
4802   /* Need type narrowing to represent use with cand.  */
4803   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4804     {
4805       machine_mode outer_mode = TYPE_MODE (utype);
4806       machine_mode inner_mode = TYPE_MODE (ctype);
4807       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4808     }
4809
4810   /* Turn a + i * (-c) into a - i * c.  */
4811   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4812     aratio = -ratio;
4813   else
4814     aratio = ratio;
4815
4816   if (ratio != 1)
4817     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4818
4819   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4820      instruction.  */
4821   /* Need to add up the invariant and variant parts.  */
4822   if (comp_inv && !integer_zerop (comp_inv))
4823     cost += add_cost (speed, TYPE_MODE (utype));
4824
4825   cost = get_scaled_computation_cost_at (data, at, cost);
4826
4827   /* For doloop IV cand, add on the extra cost.  */
4828   if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4829     cost += targetm.doloop_cost_for_generic;
4830
4831   return cost;
4832 }
4833
4834 /* Determines cost of computing the use in GROUP with CAND in a generic
4835    expression.  */
4836
4837 static bool
4838 determine_group_iv_cost_generic (struct ivopts_data *data,
4839                                  struct iv_group *group, struct iv_cand *cand)
4840 {
4841   comp_cost cost;
4842   iv_inv_expr_ent *inv_expr = NULL;
4843   bitmap inv_vars = NULL, inv_exprs = NULL;
4844   struct iv_use *use = group->vuses[0];
4845
4846   /* The simple case first -- if we need to express value of the preserved
4847      original biv, the cost is 0.  This also prevents us from counting the
4848      cost of increment twice -- once at this use and once in the cost of
4849      the candidate.  */
4850   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4851     cost = no_cost;
4852   else
4853     cost = get_computation_cost (data, use, cand, false,
4854                                  &inv_vars, NULL, &inv_expr);
4855
4856   if (inv_expr)
4857     {
4858       inv_exprs = BITMAP_ALLOC (NULL);
4859       bitmap_set_bit (inv_exprs, inv_expr->id);
4860     }
4861   set_group_iv_cost (data, group, cand, cost, inv_vars,
4862                      NULL_TREE, ERROR_MARK, inv_exprs);
4863   return !cost.infinite_cost_p ();
4864 }
4865
4866 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4867
4868 static bool
4869 determine_group_iv_cost_address (struct ivopts_data *data,
4870                                  struct iv_group *group, struct iv_cand *cand)
4871 {
4872   unsigned i;
4873   bitmap inv_vars = NULL, inv_exprs = NULL;
4874   bool can_autoinc;
4875   iv_inv_expr_ent *inv_expr = NULL;
4876   struct iv_use *use = group->vuses[0];
4877   comp_cost sum_cost = no_cost, cost;
4878
4879   cost = get_computation_cost (data, use, cand, true,
4880                                &inv_vars, &can_autoinc, &inv_expr);
4881
4882   if (inv_expr)
4883     {
4884       inv_exprs = BITMAP_ALLOC (NULL);
4885       bitmap_set_bit (inv_exprs, inv_expr->id);
4886     }
4887   sum_cost = cost;
4888   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4889     {
4890       if (can_autoinc)
4891         sum_cost -= cand->cost_step;
4892       /* If we generated the candidate solely for exploiting autoincrement
4893          opportunities, and it turns out it can't be used, set the cost to
4894          infinity to make sure we ignore it.  */
4895       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4896         sum_cost = infinite_cost;
4897     }
4898
4899   /* Uses in a group can share setup code, so only add setup cost once.  */
4900   cost -= cost.scratch;
4901   /* Compute and add costs for rest uses of this group.  */
4902   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4903     {
4904       struct iv_use *next = group->vuses[i];
4905
4906       /* TODO: We could skip computing cost for sub iv_use when it has the
4907          same cost as the first iv_use, but the cost really depends on the
4908          offset and where the iv_use is.  */
4909         cost = get_computation_cost (data, next, cand, true,
4910                                      NULL, &can_autoinc, &inv_expr);
4911         if (inv_expr)
4912           {
4913             if (!inv_exprs)
4914               inv_exprs = BITMAP_ALLOC (NULL);
4915
4916             bitmap_set_bit (inv_exprs, inv_expr->id);
4917           }
4918       sum_cost += cost;
4919     }
4920   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4921                      NULL_TREE, ERROR_MARK, inv_exprs);
4922
4923   return !sum_cost.infinite_cost_p ();
4924 }
4925
4926 /* Computes value of candidate CAND at position AT in iteration NITER, and
4927    stores it to VAL.  */
4928
4929 static void
4930 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4931                aff_tree *val)
4932 {
4933   aff_tree step, delta, nit;
4934   struct iv *iv = cand->iv;
4935   tree type = TREE_TYPE (iv->base);
4936   tree steptype;
4937   if (POINTER_TYPE_P (type))
4938     steptype = sizetype;
4939   else
4940     steptype = unsigned_type_for (type);
4941
4942   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4943   aff_combination_convert (&step, steptype);
4944   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4945   aff_combination_convert (&nit, steptype);
4946   aff_combination_mult (&nit, &step, &delta);
4947   if (stmt_after_increment (loop, cand, at))
4948     aff_combination_add (&delta, &step);
4949
4950   tree_to_aff_combination (iv->base, type, val);
4951   if (!POINTER_TYPE_P (type))
4952     aff_combination_convert (val, steptype);
4953   aff_combination_add (val, &delta);
4954 }
4955
4956 /* Returns period of induction variable iv.  */
4957
4958 static tree
4959 iv_period (struct iv *iv)
4960 {
4961   tree step = iv->step, period, type;
4962   tree pow2div;
4963
4964   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4965
4966   type = unsigned_type_for (TREE_TYPE (step));
4967   /* Period of the iv is lcm (step, type_range)/step -1,
4968      i.e., N*type_range/step - 1. Since type range is power
4969      of two, N == (step >> num_of_ending_zeros_binary (step),
4970      so the final result is
4971
4972        (type_range >> num_of_ending_zeros_binary (step)) - 1
4973
4974   */
4975   pow2div = num_ending_zeros (step);
4976
4977   period = build_low_bits_mask (type,
4978                                 (TYPE_PRECISION (type)
4979                                  - tree_to_uhwi (pow2div)));
4980
4981   return period;
4982 }
4983
4984 /* Returns the comparison operator used when eliminating the iv USE.  */
4985
4986 static enum tree_code
4987 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4988 {
4989   class loop *loop = data->current_loop;
4990   basic_block ex_bb;
4991   edge exit;
4992
4993   ex_bb = gimple_bb (use->stmt);
4994   exit = EDGE_SUCC (ex_bb, 0);
4995   if (flow_bb_inside_loop_p (loop, exit->dest))
4996     exit = EDGE_SUCC (ex_bb, 1);
4997
4998   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4999 }
5000
5001 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5002    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5003    calculation is performed in non-wrapping type.
5004
5005    TODO: More generally, we could test for the situation that
5006          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5007          This would require knowing the sign of OFFSET.  */
5008
5009 static bool
5010 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5011 {
5012   enum tree_code code;
5013   tree e1, e2;
5014   aff_tree aff_e1, aff_e2, aff_offset;
5015
5016   if (!nowrap_type_p (TREE_TYPE (base)))
5017     return false;
5018
5019   base = expand_simple_operations (base);
5020
5021   if (TREE_CODE (base) == SSA_NAME)
5022     {
5023       gimple *stmt = SSA_NAME_DEF_STMT (base);
5024
5025       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5026         return false;
5027
5028       code = gimple_assign_rhs_code (stmt);
5029       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5030         return false;
5031
5032       e1 = gimple_assign_rhs1 (stmt);
5033       e2 = gimple_assign_rhs2 (stmt);
5034     }
5035   else
5036     {
5037       code = TREE_CODE (base);
5038       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5039         return false;
5040       e1 = TREE_OPERAND (base, 0);
5041       e2 = TREE_OPERAND (base, 1);
5042     }
5043
5044   /* Use affine expansion as deeper inspection to prove the equality.  */
5045   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5046                                   &aff_e2, &data->name_expansion_cache);
5047   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5048                                   &aff_offset, &data->name_expansion_cache);
5049   aff_combination_scale (&aff_offset, -1);
5050   switch (code)
5051     {
5052     case PLUS_EXPR:
5053       aff_combination_add (&aff_e2, &aff_offset);
5054       if (aff_combination_zero_p (&aff_e2))
5055         return true;
5056
5057       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5058                                       &aff_e1, &data->name_expansion_cache);
5059       aff_combination_add (&aff_e1, &aff_offset);
5060       return aff_combination_zero_p (&aff_e1);
5061
5062     case POINTER_PLUS_EXPR:
5063       aff_combination_add (&aff_e2, &aff_offset);
5064       return aff_combination_zero_p (&aff_e2);
5065
5066     default:
5067       return false;
5068     }
5069 }
5070
5071 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5072    comparison with CAND.  NITER describes the number of iterations of
5073    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5074
5075    We aim to handle the following situation:
5076
5077    sometype *base, *p;
5078    int a, b, i;
5079
5080    i = a;
5081    p = p_0 = base + a;
5082
5083    do
5084      {
5085        bla (*p);
5086        p++;
5087        i++;
5088      }
5089    while (i < b);
5090
5091    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5092    We aim to optimize this to
5093
5094    p = p_0 = base + a;
5095    do
5096      {
5097        bla (*p);
5098        p++;
5099      }
5100    while (p < p_0 - a + b);
5101
5102    This preserves the correctness, since the pointer arithmetics does not
5103    overflow.  More precisely:
5104
5105    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5106       overflow in computing it or the values of p.
5107    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5108       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5109
5110 static bool
5111 iv_elimination_compare_lt (struct ivopts_data *data,
5112                            struct iv_cand *cand, enum tree_code *comp_p,
5113                            class tree_niter_desc *niter)
5114 {
5115   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5116   class aff_tree nit, tmpa, tmpb;
5117   enum tree_code comp;
5118   HOST_WIDE_INT step;
5119
5120   /* We need to know that the candidate induction variable does not overflow.
5121      While more complex analysis may be used to prove this, for now just
5122      check that the variable appears in the original program and that it
5123      is computed in a type that guarantees no overflows.  */
5124   cand_type = TREE_TYPE (cand->iv->base);
5125   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5126     return false;
5127
5128   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5129      the calculation of the BOUND could overflow, making the comparison
5130      invalid.  */
5131   if (!data->loop_single_exit_p)
5132     return false;
5133
5134   /* We need to be able to decide whether candidate is increasing or decreasing
5135      in order to choose the right comparison operator.  */
5136   if (!cst_and_fits_in_hwi (cand->iv->step))
5137     return false;
5138   step = int_cst_value (cand->iv->step);
5139
5140   /* Check that the number of iterations matches the expected pattern:
5141      a + 1 > b ? 0 : b - a - 1.  */
5142   mbz = niter->may_be_zero;
5143   if (TREE_CODE (mbz) == GT_EXPR)
5144     {
5145       /* Handle a + 1 > b.  */
5146       tree op0 = TREE_OPERAND (mbz, 0);
5147       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5148         {
5149           a = TREE_OPERAND (op0, 0);
5150           b = TREE_OPERAND (mbz, 1);
5151         }
5152       else
5153         return false;
5154     }
5155   else if (TREE_CODE (mbz) == LT_EXPR)
5156     {
5157       tree op1 = TREE_OPERAND (mbz, 1);
5158
5159       /* Handle b < a + 1.  */
5160       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5161         {
5162           a = TREE_OPERAND (op1, 0);
5163           b = TREE_OPERAND (mbz, 0);
5164         }
5165       else
5166         return false;
5167     }
5168   else
5169     return false;
5170
5171   /* Expected number of iterations is B - A - 1.  Check that it matches
5172      the actual number, i.e., that B - A - NITER = 1.  */
5173   tree_to_aff_combination (niter->niter, nit_type, &nit);
5174   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5175   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5176   aff_combination_scale (&nit, -1);
5177   aff_combination_scale (&tmpa, -1);
5178   aff_combination_add (&tmpb, &tmpa);
5179   aff_combination_add (&tmpb, &nit);
5180   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5181     return false;
5182
5183   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5184      overflow.  */
5185   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5186                         cand->iv->step,
5187                         fold_convert (TREE_TYPE (cand->iv->step), a));
5188   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5189     return false;
5190
5191   /* Determine the new comparison operator.  */
5192   comp = step < 0 ? GT_EXPR : LT_EXPR;
5193   if (*comp_p == NE_EXPR)
5194     *comp_p = comp;
5195   else if (*comp_p == EQ_EXPR)
5196     *comp_p = invert_tree_comparison (comp, false);
5197   else
5198     gcc_unreachable ();
5199
5200   return true;
5201 }
5202
5203 /* Check whether it is possible to express the condition in USE by comparison
5204    of candidate CAND.  If so, store the value compared with to BOUND, and the
5205    comparison operator to COMP.  */
5206
5207 static bool
5208 may_eliminate_iv (struct ivopts_data *data,
5209                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5210                   enum tree_code *comp)
5211 {
5212   basic_block ex_bb;
5213   edge exit;
5214   tree period;
5215   class loop *loop = data->current_loop;
5216   aff_tree bnd;
5217   class tree_niter_desc *desc = NULL;
5218
5219   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5220     return false;
5221
5222   /* For now works only for exits that dominate the loop latch.
5223      TODO: extend to other conditions inside loop body.  */
5224   ex_bb = gimple_bb (use->stmt);
5225   if (use->stmt != last_stmt (ex_bb)
5226       || gimple_code (use->stmt) != GIMPLE_COND
5227       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5228     return false;
5229
5230   exit = EDGE_SUCC (ex_bb, 0);
5231   if (flow_bb_inside_loop_p (loop, exit->dest))
5232     exit = EDGE_SUCC (ex_bb, 1);
5233   if (flow_bb_inside_loop_p (loop, exit->dest))
5234     return false;
5235
5236   desc = niter_for_exit (data, exit);
5237   if (!desc)
5238     return false;
5239
5240   /* Determine whether we can use the variable to test the exit condition.
5241      This is the case iff the period of the induction variable is greater
5242      than the number of iterations for which the exit condition is true.  */
5243   period = iv_period (cand->iv);
5244
5245   /* If the number of iterations is constant, compare against it directly.  */
5246   if (TREE_CODE (desc->niter) == INTEGER_CST)
5247     {
5248       /* See cand_value_at.  */
5249       if (stmt_after_increment (loop, cand, use->stmt))
5250         {
5251           if (!tree_int_cst_lt (desc->niter, period))
5252             return false;
5253         }
5254       else
5255         {
5256           if (tree_int_cst_lt (period, desc->niter))
5257             return false;
5258         }
5259     }
5260
5261   /* If not, and if this is the only possible exit of the loop, see whether
5262      we can get a conservative estimate on the number of iterations of the
5263      entire loop and compare against that instead.  */
5264   else
5265     {
5266       widest_int period_value, max_niter;
5267
5268       max_niter = desc->max;
5269       if (stmt_after_increment (loop, cand, use->stmt))
5270         max_niter += 1;
5271       period_value = wi::to_widest (period);
5272       if (wi::gtu_p (max_niter, period_value))
5273         {
5274           /* See if we can take advantage of inferred loop bound
5275              information.  */
5276           if (data->loop_single_exit_p)
5277             {
5278               if (!max_loop_iterations (loop, &max_niter))
5279                 return false;
5280               /* The loop bound is already adjusted by adding 1.  */
5281               if (wi::gtu_p (max_niter, period_value))
5282                 return false;
5283             }
5284           else
5285             return false;
5286         }
5287     }
5288
5289   /* For doloop IV cand, the bound would be zero.  It's safe whether
5290      may_be_zero set or not.  */
5291   if (cand->doloop_p)
5292     {
5293       *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5294       *comp = iv_elimination_compare (data, use);
5295       return true;
5296     }
5297
5298   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5299
5300   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5301                          aff_combination_to_tree (&bnd));
5302   *comp = iv_elimination_compare (data, use);
5303
5304   /* It is unlikely that computing the number of iterations using division
5305      would be more profitable than keeping the original induction variable.  */
5306   if (expression_expensive_p (*bound))
5307     return false;
5308
5309   /* Sometimes, it is possible to handle the situation that the number of
5310      iterations may be zero unless additional assumptions by using <
5311      instead of != in the exit condition.
5312
5313      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5314            base the exit condition on it.  However, that is often too
5315            expensive.  */
5316   if (!integer_zerop (desc->may_be_zero))
5317     return iv_elimination_compare_lt (data, cand, comp, desc);
5318
5319   return true;
5320 }
5321
5322  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5323     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5324
5325 static int
5326 parm_decl_cost (struct ivopts_data *data, tree bound)
5327 {
5328   tree sbound = bound;
5329   STRIP_NOPS (sbound);
5330
5331   if (TREE_CODE (sbound) == SSA_NAME
5332       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5333       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5334       && data->body_includes_call)
5335     return COSTS_N_INSNS (1);
5336
5337   return 0;
5338 }
5339
5340 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5341
5342 static bool
5343 determine_group_iv_cost_cond (struct ivopts_data *data,
5344                               struct iv_group *group, struct iv_cand *cand)
5345 {
5346   tree bound = NULL_TREE;
5347   struct iv *cmp_iv;
5348   bitmap inv_exprs = NULL;
5349   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5350   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5351   enum comp_iv_rewrite rewrite_type;
5352   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5353   tree *control_var, *bound_cst;
5354   enum tree_code comp = ERROR_MARK;
5355   struct iv_use *use = group->vuses[0];
5356
5357   /* Extract condition operands.  */
5358   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5359                                         &bound_cst, NULL, &cmp_iv);
5360   gcc_assert (rewrite_type != COMP_IV_NA);
5361
5362   /* Try iv elimination.  */
5363   if (rewrite_type == COMP_IV_ELIM
5364       && may_eliminate_iv (data, use, cand, &bound, &comp))
5365     {
5366       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5367       if (elim_cost.cost == 0)
5368         elim_cost.cost = parm_decl_cost (data, bound);
5369       else if (TREE_CODE (bound) == INTEGER_CST)
5370         elim_cost.cost = 0;
5371       /* If we replace a loop condition 'i < n' with 'p < base + n',
5372          inv_vars_elim will have 'base' and 'n' set, which implies that both
5373          'base' and 'n' will be live during the loop.    More likely,
5374          'base + n' will be loop invariant, resulting in only one live value
5375          during the loop.  So in that case we clear inv_vars_elim and set
5376          inv_expr_elim instead.  */
5377       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5378         {
5379           inv_expr_elim = get_loop_invariant_expr (data, bound);
5380           bitmap_clear (inv_vars_elim);
5381         }
5382       /* The bound is a loop invariant, so it will be only computed
5383          once.  */
5384       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5385     }
5386
5387   /* When the condition is a comparison of the candidate IV against
5388      zero, prefer this IV.
5389
5390      TODO: The constant that we're subtracting from the cost should
5391      be target-dependent.  This information should be added to the
5392      target costs for each backend.  */
5393   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5394       && integer_zerop (*bound_cst)
5395       && (operand_equal_p (*control_var, cand->var_after, 0)
5396           || operand_equal_p (*control_var, cand->var_before, 0)))
5397     elim_cost -= 1;
5398
5399   express_cost = get_computation_cost (data, use, cand, false,
5400                                        &inv_vars_express, NULL,
5401                                        &inv_expr_express);
5402   if (cmp_iv != NULL)
5403     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5404
5405   /* Count the cost of the original bound as well.  */
5406   bound_cost = force_var_cost (data, *bound_cst, NULL);
5407   if (bound_cost.cost == 0)
5408     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5409   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5410     bound_cost.cost = 0;
5411   express_cost += bound_cost;
5412
5413   /* Choose the better approach, preferring the eliminated IV. */
5414   if (elim_cost <= express_cost)
5415     {
5416       cost = elim_cost;
5417       inv_vars = inv_vars_elim;
5418       inv_vars_elim = NULL;
5419       inv_expr = inv_expr_elim;
5420       /* For doloop candidate/use pair, adjust to zero cost.  */
5421       if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5422         cost = no_cost;
5423     }
5424   else
5425     {
5426       cost = express_cost;
5427       inv_vars = inv_vars_express;
5428       inv_vars_express = NULL;
5429       bound = NULL_TREE;
5430       comp = ERROR_MARK;
5431       inv_expr = inv_expr_express;
5432     }
5433
5434   if (inv_expr)
5435     {
5436       inv_exprs = BITMAP_ALLOC (NULL);
5437       bitmap_set_bit (inv_exprs, inv_expr->id);
5438     }
5439   set_group_iv_cost (data, group, cand, cost,
5440                      inv_vars, bound, comp, inv_exprs);
5441
5442   if (inv_vars_elim)
5443     BITMAP_FREE (inv_vars_elim);
5444   if (inv_vars_express)
5445     BITMAP_FREE (inv_vars_express);
5446
5447   return !cost.infinite_cost_p ();
5448 }
5449
5450 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5451    if USE cannot be represented with CAND.  */
5452
5453 static bool
5454 determine_group_iv_cost (struct ivopts_data *data,
5455                          struct iv_group *group, struct iv_cand *cand)
5456 {
5457   switch (group->type)
5458     {
5459     case USE_NONLINEAR_EXPR:
5460       return determine_group_iv_cost_generic (data, group, cand);
5461
5462     case USE_REF_ADDRESS:
5463     case USE_PTR_ADDRESS:
5464       return determine_group_iv_cost_address (data, group, cand);
5465
5466     case USE_COMPARE:
5467       return determine_group_iv_cost_cond (data, group, cand);
5468
5469     default:
5470       gcc_unreachable ();
5471     }
5472 }
5473
5474 /* Return true if get_computation_cost indicates that autoincrement is
5475    a possibility for the pair of USE and CAND, false otherwise.  */
5476
5477 static bool
5478 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5479                            struct iv_cand *cand)
5480 {
5481   if (!address_p (use->type))
5482     return false;
5483
5484   bool can_autoinc = false;
5485   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5486   return can_autoinc;
5487 }
5488
5489 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5490    use that allows autoincrement, and set their AINC_USE if possible.  */
5491
5492 static void
5493 set_autoinc_for_original_candidates (struct ivopts_data *data)
5494 {
5495   unsigned i, j;
5496
5497   for (i = 0; i < data->vcands.length (); i++)
5498     {
5499       struct iv_cand *cand = data->vcands[i];
5500       struct iv_use *closest_before = NULL;
5501       struct iv_use *closest_after = NULL;
5502       if (cand->pos != IP_ORIGINAL)
5503         continue;
5504
5505       for (j = 0; j < data->vgroups.length (); j++)
5506         {
5507           struct iv_group *group = data->vgroups[j];
5508           struct iv_use *use = group->vuses[0];
5509           unsigned uid = gimple_uid (use->stmt);
5510
5511           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5512             continue;
5513
5514           if (uid < gimple_uid (cand->incremented_at)
5515               && (closest_before == NULL
5516                   || uid > gimple_uid (closest_before->stmt)))
5517             closest_before = use;
5518
5519           if (uid > gimple_uid (cand->incremented_at)
5520               && (closest_after == NULL
5521                   || uid < gimple_uid (closest_after->stmt)))
5522             closest_after = use;
5523         }
5524
5525       if (closest_before != NULL
5526           && autoinc_possible_for_pair (data, closest_before, cand))
5527         cand->ainc_use = closest_before;
5528       else if (closest_after != NULL
5529                && autoinc_possible_for_pair (data, closest_after, cand))
5530         cand->ainc_use = closest_after;
5531     }
5532 }
5533
5534 /* Relate compare use with all candidates.  */
5535
5536 static void
5537 relate_compare_use_with_all_cands (struct ivopts_data *data)
5538 {
5539   unsigned i, count = data->vcands.length ();
5540   for (i = 0; i < data->vgroups.length (); i++)
5541     {
5542       struct iv_group *group = data->vgroups[i];
5543
5544       if (group->type == USE_COMPARE)
5545         bitmap_set_range (group->related_cands, 0, count);
5546     }
5547 }
5548
5549 /* Add one doloop dedicated IV candidate:
5550      - Base is (may_be_zero ? 1 : (niter + 1)).
5551      - Step is -1.  */
5552
5553 static void
5554 add_iv_candidate_for_doloop (struct ivopts_data *data)
5555 {
5556   tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5557   gcc_assert (niter_desc && niter_desc->assumptions);
5558
5559   tree niter = niter_desc->niter;
5560   tree ntype = TREE_TYPE (niter);
5561   gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5562
5563   tree may_be_zero = niter_desc->may_be_zero;
5564   if (may_be_zero && integer_zerop (may_be_zero))
5565     may_be_zero = NULL_TREE;
5566   if (may_be_zero)
5567     {
5568       if (COMPARISON_CLASS_P (may_be_zero))
5569         {
5570           niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5571                                build_int_cst (ntype, 0),
5572                                rewrite_to_non_trapping_overflow (niter));
5573         }
5574       /* Don't try to obtain the iteration count expression when may_be_zero is
5575          integer_nonzerop (actually iteration count is one) or else.  */
5576       else
5577         return;
5578     }
5579
5580   tree base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5581                            build_int_cst (ntype, 1));
5582   add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5583 }
5584
5585 /* Finds the candidates for the induction variables.  */
5586
5587 static void
5588 find_iv_candidates (struct ivopts_data *data)
5589 {
5590   /* Add commonly used ivs.  */
5591   add_standard_iv_candidates (data);
5592
5593   /* Add doloop dedicated ivs.  */
5594   if (data->doloop_use_p)
5595     add_iv_candidate_for_doloop (data);
5596
5597   /* Add old induction variables.  */
5598   add_iv_candidate_for_bivs (data);
5599
5600   /* Add induction variables derived from uses.  */
5601   add_iv_candidate_for_groups (data);
5602
5603   set_autoinc_for_original_candidates (data);
5604
5605   /* Record the important candidates.  */
5606   record_important_candidates (data);
5607
5608   /* Relate compare iv_use with all candidates.  */
5609   if (!data->consider_all_candidates)
5610     relate_compare_use_with_all_cands (data);
5611
5612   if (dump_file && (dump_flags & TDF_DETAILS))
5613     {
5614       unsigned i;
5615
5616       fprintf (dump_file, "\n<Important Candidates>:\t");
5617       for (i = 0; i < data->vcands.length (); i++)
5618         if (data->vcands[i]->important)
5619           fprintf (dump_file, " %d,", data->vcands[i]->id);
5620       fprintf (dump_file, "\n");
5621
5622       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5623       for (i = 0; i < data->vgroups.length (); i++)
5624         {
5625           struct iv_group *group = data->vgroups[i];
5626
5627           if (group->related_cands)
5628             {
5629               fprintf (dump_file, "  Group %d:\t", group->id);
5630               dump_bitmap (dump_file, group->related_cands);
5631             }
5632         }
5633       fprintf (dump_file, "\n");
5634     }
5635 }
5636
5637 /* Determines costs of computing use of iv with an iv candidate.  */
5638
5639 static void
5640 determine_group_iv_costs (struct ivopts_data *data)
5641 {
5642   unsigned i, j;
5643   struct iv_cand *cand;
5644   struct iv_group *group;
5645   bitmap to_clear = BITMAP_ALLOC (NULL);
5646
5647   alloc_use_cost_map (data);
5648
5649   for (i = 0; i < data->vgroups.length (); i++)
5650     {
5651       group = data->vgroups[i];
5652
5653       if (data->consider_all_candidates)
5654         {
5655           for (j = 0; j < data->vcands.length (); j++)
5656             {
5657               cand = data->vcands[j];
5658               determine_group_iv_cost (data, group, cand);
5659             }
5660         }
5661       else
5662         {
5663           bitmap_iterator bi;
5664
5665           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5666             {
5667               cand = data->vcands[j];
5668               if (!determine_group_iv_cost (data, group, cand))
5669                 bitmap_set_bit (to_clear, j);
5670             }
5671
5672           /* Remove the candidates for that the cost is infinite from
5673              the list of related candidates.  */
5674           bitmap_and_compl_into (group->related_cands, to_clear);
5675           bitmap_clear (to_clear);
5676         }
5677     }
5678
5679   BITMAP_FREE (to_clear);
5680
5681   if (dump_file && (dump_flags & TDF_DETAILS))
5682     {
5683       bitmap_iterator bi;
5684
5685       /* Dump invariant variables.  */
5686       fprintf (dump_file, "\n<Invariant Vars>:\n");
5687       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5688         {
5689           struct version_info *info = ver_info (data, i);
5690           if (info->inv_id)
5691             {
5692               fprintf (dump_file, "Inv %d:\t", info->inv_id);
5693               print_generic_expr (dump_file, info->name, TDF_SLIM);
5694               fprintf (dump_file, "%s\n",
5695                        info->has_nonlin_use ? "" : "\t(eliminable)");
5696             }
5697         }
5698
5699       /* Dump invariant expressions.  */
5700       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5701       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5702
5703       for (hash_table<iv_inv_expr_hasher>::iterator it
5704            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5705            ++it)
5706         list.safe_push (*it);
5707
5708       list.qsort (sort_iv_inv_expr_ent);
5709
5710       for (i = 0; i < list.length (); ++i)
5711         {
5712           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5713           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5714           fprintf (dump_file, "\n");
5715         }
5716
5717       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5718
5719       for (i = 0; i < data->vgroups.length (); i++)
5720         {
5721           group = data->vgroups[i];
5722
5723           fprintf (dump_file, "Group %d:\n", i);
5724           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5725           for (j = 0; j < group->n_map_members; j++)
5726             {
5727               if (!group->cost_map[j].cand
5728                   || group->cost_map[j].cost.infinite_cost_p ())
5729                 continue;
5730
5731               fprintf (dump_file, "  %d\t%" PRId64 "\t%d\t",
5732                        group->cost_map[j].cand->id,
5733                        group->cost_map[j].cost.cost,
5734                        group->cost_map[j].cost.complexity);
5735               if (!group->cost_map[j].inv_exprs
5736                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5737                 fprintf (dump_file, "NIL;\t");
5738               else
5739                 bitmap_print (dump_file,
5740                               group->cost_map[j].inv_exprs, "", ";\t");
5741               if (!group->cost_map[j].inv_vars
5742                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5743                 fprintf (dump_file, "NIL;\n");
5744               else
5745                 bitmap_print (dump_file,
5746                               group->cost_map[j].inv_vars, "", "\n");
5747             }
5748
5749           fprintf (dump_file, "\n");
5750         }
5751       fprintf (dump_file, "\n");
5752     }
5753 }
5754
5755 /* Determines cost of the candidate CAND.  */
5756
5757 static void
5758 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5759 {
5760   comp_cost cost_base;
5761   int64_t cost, cost_step;
5762   tree base;
5763
5764   gcc_assert (cand->iv != NULL);
5765
5766   /* There are two costs associated with the candidate -- its increment
5767      and its initialization.  The second is almost negligible for any loop
5768      that rolls enough, so we take it just very little into account.  */
5769
5770   base = cand->iv->base;
5771   cost_base = force_var_cost (data, base, NULL);
5772   /* It will be exceptional that the iv register happens to be initialized with
5773      the proper value at no cost.  In general, there will at least be a regcopy
5774      or a const set.  */
5775   if (cost_base.cost == 0)
5776     cost_base.cost = COSTS_N_INSNS (1);
5777   /* Doloop decrement should be considered as zero cost.  */
5778   if (cand->doloop_p)
5779     cost_step = 0;
5780   else
5781     cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5782   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5783
5784   /* Prefer the original ivs unless we may gain something by replacing it.
5785      The reason is to make debugging simpler; so this is not relevant for
5786      artificial ivs created by other optimization passes.  */
5787   if ((cand->pos != IP_ORIGINAL
5788        || !SSA_NAME_VAR (cand->var_before)
5789        || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5790       /* Prefer doloop as well.  */
5791       && !cand->doloop_p)
5792     cost++;
5793
5794   /* Prefer not to insert statements into latch unless there are some
5795      already (so that we do not create unnecessary jumps).  */
5796   if (cand->pos == IP_END
5797       && empty_block_p (ip_end_pos (data->current_loop)))
5798     cost++;
5799
5800   cand->cost = cost;
5801   cand->cost_step = cost_step;
5802 }
5803
5804 /* Determines costs of computation of the candidates.  */
5805
5806 static void
5807 determine_iv_costs (struct ivopts_data *data)
5808 {
5809   unsigned i;
5810
5811   if (dump_file && (dump_flags & TDF_DETAILS))
5812     {
5813       fprintf (dump_file, "<Candidate Costs>:\n");
5814       fprintf (dump_file, "  cand\tcost\n");
5815     }
5816
5817   for (i = 0; i < data->vcands.length (); i++)
5818     {
5819       struct iv_cand *cand = data->vcands[i];
5820
5821       determine_iv_cost (data, cand);
5822
5823       if (dump_file && (dump_flags & TDF_DETAILS))
5824         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5825     }
5826
5827   if (dump_file && (dump_flags & TDF_DETAILS))
5828     fprintf (dump_file, "\n");
5829 }
5830
5831 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5832    induction variables.  Note N_INVS includes both invariant variables and
5833    invariant expressions.  */
5834
5835 static unsigned
5836 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5837                               unsigned n_cands)
5838 {
5839   unsigned cost;
5840   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5841   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5842   bool speed = data->speed;
5843
5844   /* If there is a call in the loop body, the call-clobbered registers
5845      are not available for loop invariants.  */
5846   if (data->body_includes_call)
5847     available_regs = available_regs - target_clobbered_regs;
5848
5849   /* If we have enough registers.  */
5850   if (regs_needed + target_res_regs < available_regs)
5851     cost = n_new;
5852   /* If close to running out of registers, try to preserve them.  */
5853   else if (regs_needed <= available_regs)
5854     cost = target_reg_cost [speed] * regs_needed;
5855   /* If we run out of available registers but the number of candidates
5856      does not, we penalize extra registers using target_spill_cost.  */
5857   else if (n_cands <= available_regs)
5858     cost = target_reg_cost [speed] * available_regs
5859            + target_spill_cost [speed] * (regs_needed - available_regs);
5860   /* If the number of candidates runs out available registers, we penalize
5861      extra candidate registers using target_spill_cost * 2.  Because it is
5862      more expensive to spill induction variable than invariant.  */
5863   else
5864     cost = target_reg_cost [speed] * available_regs
5865            + target_spill_cost [speed] * (n_cands - available_regs) * 2
5866            + target_spill_cost [speed] * (regs_needed - n_cands);
5867
5868   /* Finally, add the number of candidates, so that we prefer eliminating
5869      induction variables if possible.  */
5870   return cost + n_cands;
5871 }
5872
5873 /* For each size of the induction variable set determine the penalty.  */
5874
5875 static void
5876 determine_set_costs (struct ivopts_data *data)
5877 {
5878   unsigned j, n;
5879   gphi *phi;
5880   gphi_iterator psi;
5881   tree op;
5882   class loop *loop = data->current_loop;
5883   bitmap_iterator bi;
5884
5885   if (dump_file && (dump_flags & TDF_DETAILS))
5886     {
5887       fprintf (dump_file, "<Global Costs>:\n");
5888       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5889       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5890       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5891       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5892     }
5893
5894   n = 0;
5895   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5896     {
5897       phi = psi.phi ();
5898       op = PHI_RESULT (phi);
5899
5900       if (virtual_operand_p (op))
5901         continue;
5902
5903       if (get_iv (data, op))
5904         continue;
5905
5906       if (!POINTER_TYPE_P (TREE_TYPE (op))
5907           && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5908         continue;
5909
5910       n++;
5911     }
5912
5913   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5914     {
5915       struct version_info *info = ver_info (data, j);
5916
5917       if (info->inv_id && info->has_nonlin_use)
5918         n++;
5919     }
5920
5921   data->regs_used = n;
5922   if (dump_file && (dump_flags & TDF_DETAILS))
5923     fprintf (dump_file, "  regs_used %d\n", n);
5924
5925   if (dump_file && (dump_flags & TDF_DETAILS))
5926     {
5927       fprintf (dump_file, "  cost for size:\n");
5928       fprintf (dump_file, "  ivs\tcost\n");
5929       for (j = 0; j <= 2 * target_avail_regs; j++)
5930         fprintf (dump_file, "  %d\t%d\n", j,
5931                  ivopts_estimate_reg_pressure (data, 0, j));
5932       fprintf (dump_file, "\n");
5933     }
5934 }
5935
5936 /* Returns true if A is a cheaper cost pair than B.  */
5937
5938 static bool
5939 cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
5940 {
5941   if (!a)
5942     return false;
5943
5944   if (!b)
5945     return true;
5946
5947   if (a->cost < b->cost)
5948     return true;
5949
5950   if (b->cost < a->cost)
5951     return false;
5952
5953   /* In case the costs are the same, prefer the cheaper candidate.  */
5954   if (a->cand->cost < b->cand->cost)
5955     return true;
5956
5957   return false;
5958 }
5959
5960 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
5961    for more expensive, equal and cheaper respectively.  */
5962
5963 static int
5964 compare_cost_pair (class cost_pair *a, class cost_pair *b)
5965 {
5966   if (cheaper_cost_pair (a, b))
5967     return -1;
5968   if (cheaper_cost_pair (b, a))
5969     return 1;
5970
5971   return 0;
5972 }
5973
5974 /* Returns candidate by that USE is expressed in IVS.  */
5975
5976 static class cost_pair *
5977 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
5978 {
5979   return ivs->cand_for_group[group->id];
5980 }
5981
5982 /* Computes the cost field of IVS structure.  */
5983
5984 static void
5985 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
5986 {
5987   comp_cost cost = ivs->cand_use_cost;
5988
5989   cost += ivs->cand_cost;
5990   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5991   ivs->cost = cost;
5992 }
5993
5994 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5995    and IVS.  */
5996
5997 static void
5998 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5999 {
6000   bitmap_iterator bi;
6001   unsigned iid;
6002
6003   if (!invs)
6004     return;
6005
6006   gcc_assert (n_inv_uses != NULL);
6007   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6008     {
6009       n_inv_uses[iid]--;
6010       if (n_inv_uses[iid] == 0)
6011         ivs->n_invs--;
6012     }
6013 }
6014
6015 /* Set USE not to be expressed by any candidate in IVS.  */
6016
6017 static void
6018 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6019                  struct iv_group *group)
6020 {
6021   unsigned gid = group->id, cid;
6022   class cost_pair *cp;
6023
6024   cp = ivs->cand_for_group[gid];
6025   if (!cp)
6026     return;
6027   cid = cp->cand->id;
6028
6029   ivs->bad_groups++;
6030   ivs->cand_for_group[gid] = NULL;
6031   ivs->n_cand_uses[cid]--;
6032
6033   if (ivs->n_cand_uses[cid] == 0)
6034     {
6035       bitmap_clear_bit (ivs->cands, cid);
6036       if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6037         ivs->n_cands--;
6038       ivs->cand_cost -= cp->cand->cost;
6039       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6040       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6041     }
6042
6043   ivs->cand_use_cost -= cp->cost;
6044   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6045   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6046   iv_ca_recount_cost (data, ivs);
6047 }
6048
6049 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6050    IVS.  */
6051
6052 static void
6053 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6054 {
6055   bitmap_iterator bi;
6056   unsigned iid;
6057
6058   if (!invs)
6059     return;
6060
6061   gcc_assert (n_inv_uses != NULL);
6062   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6063     {
6064       n_inv_uses[iid]++;
6065       if (n_inv_uses[iid] == 1)
6066         ivs->n_invs++;
6067     }
6068 }
6069
6070 /* Set cost pair for GROUP in set IVS to CP.  */
6071
6072 static void
6073 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6074               struct iv_group *group, class cost_pair *cp)
6075 {
6076   unsigned gid = group->id, cid;
6077
6078   if (ivs->cand_for_group[gid] == cp)
6079     return;
6080
6081   if (ivs->cand_for_group[gid])
6082     iv_ca_set_no_cp (data, ivs, group);
6083
6084   if (cp)
6085     {
6086       cid = cp->cand->id;
6087
6088       ivs->bad_groups--;
6089       ivs->cand_for_group[gid] = cp;
6090       ivs->n_cand_uses[cid]++;
6091       if (ivs->n_cand_uses[cid] == 1)
6092         {
6093           bitmap_set_bit (ivs->cands, cid);
6094           if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6095             ivs->n_cands++;
6096           ivs->cand_cost += cp->cand->cost;
6097           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6098           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6099         }
6100
6101       ivs->cand_use_cost += cp->cost;
6102       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6103       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6104       iv_ca_recount_cost (data, ivs);
6105     }
6106 }
6107
6108 /* Extend set IVS by expressing USE by some of the candidates in it
6109    if possible.  Consider all important candidates if candidates in
6110    set IVS don't give any result.  */
6111
6112 static void
6113 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6114                struct iv_group *group)
6115 {
6116   class cost_pair *best_cp = NULL, *cp;
6117   bitmap_iterator bi;
6118   unsigned i;
6119   struct iv_cand *cand;
6120
6121   gcc_assert (ivs->upto >= group->id);
6122   ivs->upto++;
6123   ivs->bad_groups++;
6124
6125   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6126     {
6127       cand = data->vcands[i];
6128       cp = get_group_iv_cost (data, group, cand);
6129       if (cheaper_cost_pair (cp, best_cp))
6130         best_cp = cp;
6131     }
6132
6133   if (best_cp == NULL)
6134     {
6135       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6136         {
6137           cand = data->vcands[i];
6138           cp = get_group_iv_cost (data, group, cand);
6139           if (cheaper_cost_pair (cp, best_cp))
6140             best_cp = cp;
6141         }
6142     }
6143
6144   iv_ca_set_cp (data, ivs, group, best_cp);
6145 }
6146
6147 /* Get cost for assignment IVS.  */
6148
6149 static comp_cost
6150 iv_ca_cost (class iv_ca *ivs)
6151 {
6152   /* This was a conditional expression but it triggered a bug in
6153      Sun C 5.5.  */
6154   if (ivs->bad_groups)
6155     return infinite_cost;
6156   else
6157     return ivs->cost;
6158 }
6159
6160 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6161    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
6162    respectively.  */
6163
6164 static int
6165 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6166                     struct iv_group *group, class cost_pair *old_cp,
6167                     class cost_pair *new_cp)
6168 {
6169   gcc_assert (old_cp && new_cp && old_cp != new_cp);
6170   unsigned old_n_invs = ivs->n_invs;
6171   iv_ca_set_cp (data, ivs, group, new_cp);
6172   unsigned new_n_invs = ivs->n_invs;
6173   iv_ca_set_cp (data, ivs, group, old_cp);
6174
6175   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6176 }
6177
6178 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6179    it before NEXT.  */
6180
6181 static struct iv_ca_delta *
6182 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6183                  class cost_pair *new_cp, struct iv_ca_delta *next)
6184 {
6185   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6186
6187   change->group = group;
6188   change->old_cp = old_cp;
6189   change->new_cp = new_cp;
6190   change->next = next;
6191
6192   return change;
6193 }
6194
6195 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6196    are rewritten.  */
6197
6198 static struct iv_ca_delta *
6199 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6200 {
6201   struct iv_ca_delta *last;
6202
6203   if (!l2)
6204     return l1;
6205
6206   if (!l1)
6207     return l2;
6208
6209   for (last = l1; last->next; last = last->next)
6210     continue;
6211   last->next = l2;
6212
6213   return l1;
6214 }
6215
6216 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6217
6218 static struct iv_ca_delta *
6219 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6220 {
6221   struct iv_ca_delta *act, *next, *prev = NULL;
6222
6223   for (act = delta; act; act = next)
6224     {
6225       next = act->next;
6226       act->next = prev;
6227       prev = act;
6228
6229       std::swap (act->old_cp, act->new_cp);
6230     }
6231
6232   return prev;
6233 }
6234
6235 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6236    reverted instead.  */
6237
6238 static void
6239 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6240                     struct iv_ca_delta *delta, bool forward)
6241 {
6242   class cost_pair *from, *to;
6243   struct iv_ca_delta *act;
6244
6245   if (!forward)
6246     delta = iv_ca_delta_reverse (delta);
6247
6248   for (act = delta; act; act = act->next)
6249     {
6250       from = act->old_cp;
6251       to = act->new_cp;
6252       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6253       iv_ca_set_cp (data, ivs, act->group, to);
6254     }
6255
6256   if (!forward)
6257     iv_ca_delta_reverse (delta);
6258 }
6259
6260 /* Returns true if CAND is used in IVS.  */
6261
6262 static bool
6263 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6264 {
6265   return ivs->n_cand_uses[cand->id] > 0;
6266 }
6267
6268 /* Returns number of induction variable candidates in the set IVS.  */
6269
6270 static unsigned
6271 iv_ca_n_cands (class iv_ca *ivs)
6272 {
6273   return ivs->n_cands;
6274 }
6275
6276 /* Free the list of changes DELTA.  */
6277
6278 static void
6279 iv_ca_delta_free (struct iv_ca_delta **delta)
6280 {
6281   struct iv_ca_delta *act, *next;
6282
6283   for (act = *delta; act; act = next)
6284     {
6285       next = act->next;
6286       free (act);
6287     }
6288
6289   *delta = NULL;
6290 }
6291
6292 /* Allocates new iv candidates assignment.  */
6293
6294 static class iv_ca *
6295 iv_ca_new (struct ivopts_data *data)
6296 {
6297   class iv_ca *nw = XNEW (class iv_ca);
6298
6299   nw->upto = 0;
6300   nw->bad_groups = 0;
6301   nw->cand_for_group = XCNEWVEC (class cost_pair *,
6302                                  data->vgroups.length ());
6303   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6304   nw->cands = BITMAP_ALLOC (NULL);
6305   nw->n_cands = 0;
6306   nw->n_invs = 0;
6307   nw->cand_use_cost = no_cost;
6308   nw->cand_cost = 0;
6309   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6310   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6311   nw->cost = no_cost;
6312
6313   return nw;
6314 }
6315
6316 /* Free memory occupied by the set IVS.  */
6317
6318 static void
6319 iv_ca_free (class iv_ca **ivs)
6320 {
6321   free ((*ivs)->cand_for_group);
6322   free ((*ivs)->n_cand_uses);
6323   BITMAP_FREE ((*ivs)->cands);
6324   free ((*ivs)->n_inv_var_uses);
6325   free ((*ivs)->n_inv_expr_uses);
6326   free (*ivs);
6327   *ivs = NULL;
6328 }
6329
6330 /* Dumps IVS to FILE.  */
6331
6332 static void
6333 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6334 {
6335   unsigned i;
6336   comp_cost cost = iv_ca_cost (ivs);
6337
6338   fprintf (file, "  cost: %" PRId64 " (complexity %d)\n", cost.cost,
6339            cost.complexity);
6340   fprintf (file, "  reg_cost: %d\n",
6341            ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6342   fprintf (file, "  cand_cost: %" PRId64 "\n  cand_group_cost: "
6343            "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6344            ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6345   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6346
6347   for (i = 0; i < ivs->upto; i++)
6348     {
6349       struct iv_group *group = data->vgroups[i];
6350       class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6351       if (cp)
6352         fprintf (file, "   group:%d --> iv_cand:%d, cost=("
6353                  "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6354                  cp->cost.cost, cp->cost.complexity);
6355       else
6356         fprintf (file, "   group:%d --> ??\n", group->id);
6357     }
6358
6359   const char *pref = "";
6360   fprintf (file, "  invariant variables: ");
6361   for (i = 1; i <= data->max_inv_var_id; i++)
6362     if (ivs->n_inv_var_uses[i])
6363       {
6364         fprintf (file, "%s%d", pref, i);
6365         pref = ", ";
6366       }
6367
6368   pref = "";
6369   fprintf (file, "\n  invariant expressions: ");
6370   for (i = 1; i <= data->max_inv_expr_id; i++)
6371     if (ivs->n_inv_expr_uses[i])
6372       {
6373         fprintf (file, "%s%d", pref, i);
6374         pref = ", ";
6375       }
6376
6377   fprintf (file, "\n\n");
6378 }
6379
6380 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6381    new set, and store differences in DELTA.  Number of induction variables
6382    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6383    the function will try to find a solution with mimimal iv candidates.  */
6384
6385 static comp_cost
6386 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6387               struct iv_cand *cand, struct iv_ca_delta **delta,
6388               unsigned *n_ivs, bool min_ncand)
6389 {
6390   unsigned i;
6391   comp_cost cost;
6392   struct iv_group *group;
6393   class cost_pair *old_cp, *new_cp;
6394
6395   *delta = NULL;
6396   for (i = 0; i < ivs->upto; i++)
6397     {
6398       group = data->vgroups[i];
6399       old_cp = iv_ca_cand_for_group (ivs, group);
6400
6401       if (old_cp
6402           && old_cp->cand == cand)
6403         continue;
6404
6405       new_cp = get_group_iv_cost (data, group, cand);
6406       if (!new_cp)
6407         continue;
6408
6409       if (!min_ncand)
6410         {
6411           int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6412           /* Skip if new_cp depends on more invariants.  */
6413           if (cmp_invs > 0)
6414             continue;
6415
6416           int cmp_cost = compare_cost_pair (new_cp, old_cp);
6417           /* Skip if new_cp is not cheaper.  */
6418           if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6419             continue;
6420         }
6421
6422       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6423     }
6424
6425   iv_ca_delta_commit (data, ivs, *delta, true);
6426   cost = iv_ca_cost (ivs);
6427   if (n_ivs)
6428     *n_ivs = iv_ca_n_cands (ivs);
6429   iv_ca_delta_commit (data, ivs, *delta, false);
6430
6431   return cost;
6432 }
6433
6434 /* Try narrowing set IVS by removing CAND.  Return the cost of
6435    the new set and store the differences in DELTA.  START is
6436    the candidate with which we start narrowing.  */
6437
6438 static comp_cost
6439 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6440               struct iv_cand *cand, struct iv_cand *start,
6441               struct iv_ca_delta **delta)
6442 {
6443   unsigned i, ci;
6444   struct iv_group *group;
6445   class cost_pair *old_cp, *new_cp, *cp;
6446   bitmap_iterator bi;
6447   struct iv_cand *cnd;
6448   comp_cost cost, best_cost, acost;
6449
6450   *delta = NULL;
6451   for (i = 0; i < data->vgroups.length (); i++)
6452     {
6453       group = data->vgroups[i];
6454
6455       old_cp = iv_ca_cand_for_group (ivs, group);
6456       if (old_cp->cand != cand)
6457         continue;
6458
6459       best_cost = iv_ca_cost (ivs);
6460       /* Start narrowing with START.  */
6461       new_cp = get_group_iv_cost (data, group, start);
6462
6463       if (data->consider_all_candidates)
6464         {
6465           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6466             {
6467               if (ci == cand->id || (start && ci == start->id))
6468                 continue;
6469
6470               cnd = data->vcands[ci];
6471
6472               cp = get_group_iv_cost (data, group, cnd);
6473               if (!cp)
6474                 continue;
6475
6476               iv_ca_set_cp (data, ivs, group, cp);
6477               acost = iv_ca_cost (ivs);
6478
6479               if (acost < best_cost)
6480                 {
6481                   best_cost = acost;
6482                   new_cp = cp;
6483                 }
6484             }
6485         }
6486       else
6487         {
6488           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6489             {
6490               if (ci == cand->id || (start && ci == start->id))
6491                 continue;
6492
6493               cnd = data->vcands[ci];
6494
6495               cp = get_group_iv_cost (data, group, cnd);
6496               if (!cp)
6497                 continue;
6498
6499               iv_ca_set_cp (data, ivs, group, cp);
6500               acost = iv_ca_cost (ivs);
6501
6502               if (acost < best_cost)
6503                 {
6504                   best_cost = acost;
6505                   new_cp = cp;
6506                 }
6507             }
6508         }
6509       /* Restore to old cp for use.  */
6510       iv_ca_set_cp (data, ivs, group, old_cp);
6511
6512       if (!new_cp)
6513         {
6514           iv_ca_delta_free (delta);
6515           return infinite_cost;
6516         }
6517
6518       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6519     }
6520
6521   iv_ca_delta_commit (data, ivs, *delta, true);
6522   cost = iv_ca_cost (ivs);
6523   iv_ca_delta_commit (data, ivs, *delta, false);
6524
6525   return cost;
6526 }
6527
6528 /* Try optimizing the set of candidates IVS by removing candidates different
6529    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6530    differences in DELTA.  */
6531
6532 static comp_cost
6533 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6534              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6535 {
6536   bitmap_iterator bi;
6537   struct iv_ca_delta *act_delta, *best_delta;
6538   unsigned i;
6539   comp_cost best_cost, acost;
6540   struct iv_cand *cand;
6541
6542   best_delta = NULL;
6543   best_cost = iv_ca_cost (ivs);
6544
6545   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6546     {
6547       cand = data->vcands[i];
6548
6549       if (cand == except_cand)
6550         continue;
6551
6552       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6553
6554       if (acost < best_cost)
6555         {
6556           best_cost = acost;
6557           iv_ca_delta_free (&best_delta);
6558           best_delta = act_delta;
6559         }
6560       else
6561         iv_ca_delta_free (&act_delta);
6562     }
6563
6564   if (!best_delta)
6565     {
6566       *delta = NULL;
6567       return best_cost;
6568     }
6569
6570   /* Recurse to possibly remove other unnecessary ivs.  */
6571   iv_ca_delta_commit (data, ivs, best_delta, true);
6572   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6573   iv_ca_delta_commit (data, ivs, best_delta, false);
6574   *delta = iv_ca_delta_join (best_delta, *delta);
6575   return best_cost;
6576 }
6577
6578 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6579    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6580    the corresponding cost_pair, otherwise just return BEST_CP.  */
6581
6582 static class cost_pair*
6583 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6584                         unsigned int cand_idx, struct iv_cand *old_cand,
6585                         class cost_pair *best_cp)
6586 {
6587   struct iv_cand *cand;
6588   class cost_pair *cp;
6589
6590   gcc_assert (old_cand != NULL && best_cp != NULL);
6591   if (cand_idx == old_cand->id)
6592     return best_cp;
6593
6594   cand = data->vcands[cand_idx];
6595   cp = get_group_iv_cost (data, group, cand);
6596   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6597     return cp;
6598
6599   return best_cp;
6600 }
6601
6602 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6603    which are used by more than one iv uses.  For each of those candidates,
6604    this function tries to represent iv uses under that candidate using
6605    other ones with lower local cost, then tries to prune the new set.
6606    If the new set has lower cost, It returns the new cost after recording
6607    candidate replacement in list DELTA.  */
6608
6609 static comp_cost
6610 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6611                struct iv_ca_delta **delta)
6612 {
6613   bitmap_iterator bi, bj;
6614   unsigned int i, j, k;
6615   struct iv_cand *cand;
6616   comp_cost orig_cost, acost;
6617   struct iv_ca_delta *act_delta, *tmp_delta;
6618   class cost_pair *old_cp, *best_cp = NULL;
6619
6620   *delta = NULL;
6621   orig_cost = iv_ca_cost (ivs);
6622
6623   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6624     {
6625       if (ivs->n_cand_uses[i] == 1
6626           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6627         continue;
6628
6629       cand = data->vcands[i];
6630
6631       act_delta = NULL;
6632       /*  Represent uses under current candidate using other ones with
6633           lower local cost.  */
6634       for (j = 0; j < ivs->upto; j++)
6635         {
6636           struct iv_group *group = data->vgroups[j];
6637           old_cp = iv_ca_cand_for_group (ivs, group);
6638
6639           if (old_cp->cand != cand)
6640             continue;
6641
6642           best_cp = old_cp;
6643           if (data->consider_all_candidates)
6644             for (k = 0; k < data->vcands.length (); k++)
6645               best_cp = cheaper_cost_with_cand (data, group, k,
6646                                                 old_cp->cand, best_cp);
6647           else
6648             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6649               best_cp = cheaper_cost_with_cand (data, group, k,
6650                                                 old_cp->cand, best_cp);
6651
6652           if (best_cp == old_cp)
6653             continue;
6654
6655           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6656         }
6657       /* No need for further prune.  */
6658       if (!act_delta)
6659         continue;
6660
6661       /* Prune the new candidate set.  */
6662       iv_ca_delta_commit (data, ivs, act_delta, true);
6663       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6664       iv_ca_delta_commit (data, ivs, act_delta, false);
6665       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6666
6667       if (acost < orig_cost)
6668         {
6669           *delta = act_delta;
6670           return acost;
6671         }
6672       else
6673         iv_ca_delta_free (&act_delta);
6674     }
6675
6676   return orig_cost;
6677 }
6678
6679 /* Tries to extend the sets IVS in the best possible way in order to
6680    express the GROUP.  If ORIGINALP is true, prefer candidates from
6681    the original set of IVs, otherwise favor important candidates not
6682    based on any memory object.  */
6683
6684 static bool
6685 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6686                   struct iv_group *group, bool originalp)
6687 {
6688   comp_cost best_cost, act_cost;
6689   unsigned i;
6690   bitmap_iterator bi;
6691   struct iv_cand *cand;
6692   struct iv_ca_delta *best_delta = NULL, *act_delta;
6693   class cost_pair *cp;
6694
6695   iv_ca_add_group (data, ivs, group);
6696   best_cost = iv_ca_cost (ivs);
6697   cp = iv_ca_cand_for_group (ivs, group);
6698   if (cp)
6699     {
6700       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6701       iv_ca_set_no_cp (data, ivs, group);
6702     }
6703
6704   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6705      first try important candidates not based on any memory object.  Only if
6706      this fails, try the specific ones.  Rationale -- in loops with many
6707      variables the best choice often is to use just one generic biv.  If we
6708      added here many ivs specific to the uses, the optimization algorithm later
6709      would be likely to get stuck in a local minimum, thus causing us to create
6710      too many ivs.  The approach from few ivs to more seems more likely to be
6711      successful -- starting from few ivs, replacing an expensive use by a
6712      specific iv should always be a win.  */
6713   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6714     {
6715       cand = data->vcands[i];
6716
6717       if (originalp && cand->pos !=IP_ORIGINAL)
6718         continue;
6719
6720       if (!originalp && cand->iv->base_object != NULL_TREE)
6721         continue;
6722
6723       if (iv_ca_cand_used_p (ivs, cand))
6724         continue;
6725
6726       cp = get_group_iv_cost (data, group, cand);
6727       if (!cp)
6728         continue;
6729
6730       iv_ca_set_cp (data, ivs, group, cp);
6731       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6732                                true);
6733       iv_ca_set_no_cp (data, ivs, group);
6734       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6735
6736       if (act_cost < best_cost)
6737         {
6738           best_cost = act_cost;
6739
6740           iv_ca_delta_free (&best_delta);
6741           best_delta = act_delta;
6742         }
6743       else
6744         iv_ca_delta_free (&act_delta);
6745     }
6746
6747   if (best_cost.infinite_cost_p ())
6748     {
6749       for (i = 0; i < group->n_map_members; i++)
6750         {
6751           cp = group->cost_map + i;
6752           cand = cp->cand;
6753           if (!cand)
6754             continue;
6755
6756           /* Already tried this.  */
6757           if (cand->important)
6758             {
6759               if (originalp && cand->pos == IP_ORIGINAL)
6760                 continue;
6761               if (!originalp && cand->iv->base_object == NULL_TREE)
6762                 continue;
6763             }
6764
6765           if (iv_ca_cand_used_p (ivs, cand))
6766             continue;
6767
6768           act_delta = NULL;
6769           iv_ca_set_cp (data, ivs, group, cp);
6770           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6771           iv_ca_set_no_cp (data, ivs, group);
6772           act_delta = iv_ca_delta_add (group,
6773                                        iv_ca_cand_for_group (ivs, group),
6774                                        cp, act_delta);
6775
6776           if (act_cost < best_cost)
6777             {
6778               best_cost = act_cost;
6779
6780               if (best_delta)
6781                 iv_ca_delta_free (&best_delta);
6782               best_delta = act_delta;
6783             }
6784           else
6785             iv_ca_delta_free (&act_delta);
6786         }
6787     }
6788
6789   iv_ca_delta_commit (data, ivs, best_delta, true);
6790   iv_ca_delta_free (&best_delta);
6791
6792   return !best_cost.infinite_cost_p ();
6793 }
6794
6795 /* Finds an initial assignment of candidates to uses.  */
6796
6797 static class iv_ca *
6798 get_initial_solution (struct ivopts_data *data, bool originalp)
6799 {
6800   unsigned i;
6801   class iv_ca *ivs = iv_ca_new (data);
6802
6803   for (i = 0; i < data->vgroups.length (); i++)
6804     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6805       {
6806         iv_ca_free (&ivs);
6807         return NULL;
6808       }
6809
6810   return ivs;
6811 }
6812
6813 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6814    points to a bool variable, this function tries to break local
6815    optimal fixed-point by replacing candidates in IVS if it's true.  */
6816
6817 static bool
6818 try_improve_iv_set (struct ivopts_data *data,
6819                     class iv_ca *ivs, bool *try_replace_p)
6820 {
6821   unsigned i, n_ivs;
6822   comp_cost acost, best_cost = iv_ca_cost (ivs);
6823   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6824   struct iv_cand *cand;
6825
6826   /* Try extending the set of induction variables by one.  */
6827   for (i = 0; i < data->vcands.length (); i++)
6828     {
6829       cand = data->vcands[i];
6830
6831       if (iv_ca_cand_used_p (ivs, cand))
6832         continue;
6833
6834       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6835       if (!act_delta)
6836         continue;
6837
6838       /* If we successfully added the candidate and the set is small enough,
6839          try optimizing it by removing other candidates.  */
6840       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6841         {
6842           iv_ca_delta_commit (data, ivs, act_delta, true);
6843           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6844           iv_ca_delta_commit (data, ivs, act_delta, false);
6845           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6846         }
6847
6848       if (acost < best_cost)
6849         {
6850           best_cost = acost;
6851           iv_ca_delta_free (&best_delta);
6852           best_delta = act_delta;
6853         }
6854       else
6855         iv_ca_delta_free (&act_delta);
6856     }
6857
6858   if (!best_delta)
6859     {
6860       /* Try removing the candidates from the set instead.  */
6861       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6862
6863       if (!best_delta && *try_replace_p)
6864         {
6865           *try_replace_p = false;
6866           /* So far candidate selecting algorithm tends to choose fewer IVs
6867              so that it can handle cases in which loops have many variables
6868              but the best choice is often to use only one general biv.  One
6869              weakness is it can't handle opposite cases, in which different
6870              candidates should be chosen with respect to each use.  To solve
6871              the problem, we replace candidates in a manner described by the
6872              comments of iv_ca_replace, thus give general algorithm a chance
6873              to break local optimal fixed-point in these cases.  */
6874           best_cost = iv_ca_replace (data, ivs, &best_delta);
6875         }
6876
6877       if (!best_delta)
6878         return false;
6879     }
6880
6881   iv_ca_delta_commit (data, ivs, best_delta, true);
6882   iv_ca_delta_free (&best_delta);
6883   return best_cost == iv_ca_cost (ivs);
6884 }
6885
6886 /* Attempts to find the optimal set of induction variables.  We do simple
6887    greedy heuristic -- we try to replace at most one candidate in the selected
6888    solution and remove the unused ivs while this improves the cost.  */
6889
6890 static class iv_ca *
6891 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6892 {
6893   class iv_ca *set;
6894   bool try_replace_p = true;
6895
6896   /* Get the initial solution.  */
6897   set = get_initial_solution (data, originalp);
6898   if (!set)
6899     {
6900       if (dump_file && (dump_flags & TDF_DETAILS))
6901         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6902       return NULL;
6903     }
6904
6905   if (dump_file && (dump_flags & TDF_DETAILS))
6906     {
6907       fprintf (dump_file, "Initial set of candidates:\n");
6908       iv_ca_dump (data, dump_file, set);
6909     }
6910
6911   while (try_improve_iv_set (data, set, &try_replace_p))
6912     {
6913       if (dump_file && (dump_flags & TDF_DETAILS))
6914         {
6915           fprintf (dump_file, "Improved to:\n");
6916           iv_ca_dump (data, dump_file, set);
6917         }
6918     }
6919
6920   /* If the set has infinite_cost, it can't be optimal.  */
6921   if (iv_ca_cost (set).infinite_cost_p ())
6922     {
6923       if (dump_file && (dump_flags & TDF_DETAILS))
6924         fprintf (dump_file,
6925                  "Overflow to infinite cost in try_improve_iv_set.\n");
6926       iv_ca_free (&set);
6927     }
6928   return set;
6929 }
6930
6931 static class iv_ca *
6932 find_optimal_iv_set (struct ivopts_data *data)
6933 {
6934   unsigned i;
6935   comp_cost cost, origcost;
6936   class iv_ca *set, *origset;
6937
6938   /* Determine the cost based on a strategy that starts with original IVs,
6939      and try again using a strategy that prefers candidates not based
6940      on any IVs.  */
6941   origset = find_optimal_iv_set_1 (data, true);
6942   set = find_optimal_iv_set_1 (data, false);
6943
6944   if (!origset && !set)
6945     return NULL;
6946
6947   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6948   cost = set ? iv_ca_cost (set) : infinite_cost;
6949
6950   if (dump_file && (dump_flags & TDF_DETAILS))
6951     {
6952       fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
6953                origcost.cost, origcost.complexity);
6954       fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
6955                cost.cost, cost.complexity);
6956     }
6957
6958   /* Choose the one with the best cost.  */
6959   if (origcost <= cost)
6960     {
6961       if (set)
6962         iv_ca_free (&set);
6963       set = origset;
6964     }
6965   else if (origset)
6966     iv_ca_free (&origset);
6967
6968   for (i = 0; i < data->vgroups.length (); i++)
6969     {
6970       struct iv_group *group = data->vgroups[i];
6971       group->selected = iv_ca_cand_for_group (set, group)->cand;
6972     }
6973
6974   return set;
6975 }
6976
6977 /* Creates a new induction variable corresponding to CAND.  */
6978
6979 static void
6980 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6981 {
6982   gimple_stmt_iterator incr_pos;
6983   tree base;
6984   struct iv_use *use;
6985   struct iv_group *group;
6986   bool after = false;
6987
6988   gcc_assert (cand->iv != NULL);
6989
6990   switch (cand->pos)
6991     {
6992     case IP_NORMAL:
6993       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6994       break;
6995
6996     case IP_END:
6997       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6998       after = true;
6999       break;
7000
7001     case IP_AFTER_USE:
7002       after = true;
7003       /* fall through */
7004     case IP_BEFORE_USE:
7005       incr_pos = gsi_for_stmt (cand->incremented_at);
7006       break;
7007
7008     case IP_ORIGINAL:
7009       /* Mark that the iv is preserved.  */
7010       name_info (data, cand->var_before)->preserve_biv = true;
7011       name_info (data, cand->var_after)->preserve_biv = true;
7012
7013       /* Rewrite the increment so that it uses var_before directly.  */
7014       use = find_interesting_uses_op (data, cand->var_after);
7015       group = data->vgroups[use->group_id];
7016       group->selected = cand;
7017       return;
7018     }
7019
7020   gimple_add_tmp_var (cand->var_before);
7021
7022   base = unshare_expr (cand->iv->base);
7023
7024   create_iv (base, unshare_expr (cand->iv->step),
7025              cand->var_before, data->current_loop,
7026              &incr_pos, after, &cand->var_before, &cand->var_after);
7027 }
7028
7029 /* Creates new induction variables described in SET.  */
7030
7031 static void
7032 create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7033 {
7034   unsigned i;
7035   struct iv_cand *cand;
7036   bitmap_iterator bi;
7037
7038   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7039     {
7040       cand = data->vcands[i];
7041       create_new_iv (data, cand);
7042     }
7043
7044   if (dump_file && (dump_flags & TDF_DETAILS))
7045     {
7046       fprintf (dump_file, "Selected IV set for loop %d",
7047                data->current_loop->num);
7048       if (data->loop_loc != UNKNOWN_LOCATION)
7049         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7050                  LOCATION_LINE (data->loop_loc));
7051       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7052                avg_loop_niter (data->current_loop));
7053       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7054       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7055         {
7056           cand = data->vcands[i];
7057           dump_cand (dump_file, cand);
7058         }
7059       fprintf (dump_file, "\n");
7060     }
7061 }
7062
7063 /* Rewrites USE (definition of iv used in a nonlinear expression)
7064    using candidate CAND.  */
7065
7066 static void
7067 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7068                             struct iv_use *use, struct iv_cand *cand)
7069 {
7070   gassign *ass;
7071   gimple_stmt_iterator bsi;
7072   tree comp, type = get_use_type (use), tgt;
7073
7074   /* An important special case -- if we are asked to express value of
7075      the original iv by itself, just exit; there is no need to
7076      introduce a new computation (that might also need casting the
7077      variable to unsigned and back).  */
7078   if (cand->pos == IP_ORIGINAL
7079       && cand->incremented_at == use->stmt)
7080     {
7081       tree op = NULL_TREE;
7082       enum tree_code stmt_code;
7083
7084       gcc_assert (is_gimple_assign (use->stmt));
7085       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7086
7087       /* Check whether we may leave the computation unchanged.
7088          This is the case only if it does not rely on other
7089          computations in the loop -- otherwise, the computation
7090          we rely upon may be removed in remove_unused_ivs,
7091          thus leading to ICE.  */
7092       stmt_code = gimple_assign_rhs_code (use->stmt);
7093       if (stmt_code == PLUS_EXPR
7094           || stmt_code == MINUS_EXPR
7095           || stmt_code == POINTER_PLUS_EXPR)
7096         {
7097           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7098             op = gimple_assign_rhs2 (use->stmt);
7099           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7100             op = gimple_assign_rhs1 (use->stmt);
7101         }
7102
7103       if (op != NULL_TREE)
7104         {
7105           if (expr_invariant_in_loop_p (data->current_loop, op))
7106             return;
7107           if (TREE_CODE (op) == SSA_NAME)
7108             {
7109               struct iv *iv = get_iv (data, op);
7110               if (iv != NULL && integer_zerop (iv->step))
7111                 return;
7112             }
7113         }
7114     }
7115
7116   switch (gimple_code (use->stmt))
7117     {
7118     case GIMPLE_PHI:
7119       tgt = PHI_RESULT (use->stmt);
7120
7121       /* If we should keep the biv, do not replace it.  */
7122       if (name_info (data, tgt)->preserve_biv)
7123         return;
7124
7125       bsi = gsi_after_labels (gimple_bb (use->stmt));
7126       break;
7127
7128     case GIMPLE_ASSIGN:
7129       tgt = gimple_assign_lhs (use->stmt);
7130       bsi = gsi_for_stmt (use->stmt);
7131       break;
7132
7133     default:
7134       gcc_unreachable ();
7135     }
7136
7137   aff_tree aff_inv, aff_var;
7138   if (!get_computation_aff_1 (data->current_loop, use->stmt,
7139                               use, cand, &aff_inv, &aff_var))
7140     gcc_unreachable ();
7141
7142   unshare_aff_combination (&aff_inv);
7143   unshare_aff_combination (&aff_var);
7144   /* Prefer CSE opportunity than loop invariant by adding offset at last
7145      so that iv_uses have different offsets can be CSEed.  */
7146   poly_widest_int offset = aff_inv.offset;
7147   aff_inv.offset = 0;
7148
7149   gimple_seq stmt_list = NULL, seq = NULL;
7150   tree comp_op1 = aff_combination_to_tree (&aff_inv);
7151   tree comp_op2 = aff_combination_to_tree (&aff_var);
7152   gcc_assert (comp_op1 && comp_op2);
7153
7154   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7155   gimple_seq_add_seq (&stmt_list, seq);
7156   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7157   gimple_seq_add_seq (&stmt_list, seq);
7158
7159   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7160     std::swap (comp_op1, comp_op2);
7161
7162   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7163     {
7164       comp = fold_build_pointer_plus (comp_op1,
7165                                       fold_convert (sizetype, comp_op2));
7166       comp = fold_build_pointer_plus (comp,
7167                                       wide_int_to_tree (sizetype, offset));
7168     }
7169   else
7170     {
7171       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7172                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
7173       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7174                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7175     }
7176
7177   comp = fold_convert (type, comp);
7178   if (!valid_gimple_rhs_p (comp)
7179       || (gimple_code (use->stmt) != GIMPLE_PHI
7180           /* We can't allow re-allocating the stmt as it might be pointed
7181              to still.  */
7182           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7183               >= gimple_num_ops (gsi_stmt (bsi)))))
7184     {
7185       comp = force_gimple_operand (comp, &seq, true, NULL);
7186       gimple_seq_add_seq (&stmt_list, seq);
7187       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7188         {
7189           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7190           /* As this isn't a plain copy we have to reset alignment
7191              information.  */
7192           if (SSA_NAME_PTR_INFO (comp))
7193             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7194         }
7195     }
7196
7197   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7198   if (gimple_code (use->stmt) == GIMPLE_PHI)
7199     {
7200       ass = gimple_build_assign (tgt, comp);
7201       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7202
7203       bsi = gsi_for_stmt (use->stmt);
7204       remove_phi_node (&bsi, false);
7205     }
7206   else
7207     {
7208       gimple_assign_set_rhs_from_tree (&bsi, comp);
7209       use->stmt = gsi_stmt (bsi);
7210     }
7211 }
7212
7213 /* Performs a peephole optimization to reorder the iv update statement with
7214    a mem ref to enable instruction combining in later phases. The mem ref uses
7215    the iv value before the update, so the reordering transformation requires
7216    adjustment of the offset. CAND is the selected IV_CAND.
7217
7218    Example:
7219
7220    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7221    iv2 = iv1 + 1;
7222
7223    if (t < val)      (1)
7224      goto L;
7225    goto Head;
7226
7227
7228    directly propagating t over to (1) will introduce overlapping live range
7229    thus increase register pressure. This peephole transform it into:
7230
7231
7232    iv2 = iv1 + 1;
7233    t = MEM_REF (base, iv2, 8, 8);
7234    if (t < val)
7235      goto L;
7236    goto Head;
7237 */
7238
7239 static void
7240 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7241 {
7242   tree var_after;
7243   gimple *iv_update, *stmt;
7244   basic_block bb;
7245   gimple_stmt_iterator gsi, gsi_iv;
7246
7247   if (cand->pos != IP_NORMAL)
7248     return;
7249
7250   var_after = cand->var_after;
7251   iv_update = SSA_NAME_DEF_STMT (var_after);
7252
7253   bb = gimple_bb (iv_update);
7254   gsi = gsi_last_nondebug_bb (bb);
7255   stmt = gsi_stmt (gsi);
7256
7257   /* Only handle conditional statement for now.  */
7258   if (gimple_code (stmt) != GIMPLE_COND)
7259     return;
7260
7261   gsi_prev_nondebug (&gsi);
7262   stmt = gsi_stmt (gsi);
7263   if (stmt != iv_update)
7264     return;
7265
7266   gsi_prev_nondebug (&gsi);
7267   if (gsi_end_p (gsi))
7268     return;
7269
7270   stmt = gsi_stmt (gsi);
7271   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7272     return;
7273
7274   if (stmt != use->stmt)
7275     return;
7276
7277   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7278     return;
7279
7280   if (dump_file && (dump_flags & TDF_DETAILS))
7281     {
7282       fprintf (dump_file, "Reordering \n");
7283       print_gimple_stmt (dump_file, iv_update, 0);
7284       print_gimple_stmt (dump_file, use->stmt, 0);
7285       fprintf (dump_file, "\n");
7286     }
7287
7288   gsi = gsi_for_stmt (use->stmt);
7289   gsi_iv = gsi_for_stmt (iv_update);
7290   gsi_move_before (&gsi_iv, &gsi);
7291
7292   cand->pos = IP_BEFORE_USE;
7293   cand->incremented_at = use->stmt;
7294 }
7295
7296 /* Return the alias pointer type that should be used for a MEM_REF
7297    associated with USE, which has type USE_PTR_ADDRESS.  */
7298
7299 static tree
7300 get_alias_ptr_type_for_ptr_address (iv_use *use)
7301 {
7302   gcall *call = as_a <gcall *> (use->stmt);
7303   switch (gimple_call_internal_fn (call))
7304     {
7305     case IFN_MASK_LOAD:
7306     case IFN_MASK_STORE:
7307     case IFN_MASK_LOAD_LANES:
7308     case IFN_MASK_STORE_LANES:
7309       /* The second argument contains the correct alias type.  */
7310       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7311       return TREE_TYPE (gimple_call_arg (call, 1));
7312
7313     default:
7314       gcc_unreachable ();
7315     }
7316 }
7317
7318
7319 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7320
7321 static void
7322 rewrite_use_address (struct ivopts_data *data,
7323                      struct iv_use *use, struct iv_cand *cand)
7324 {
7325   aff_tree aff;
7326   bool ok;
7327
7328   adjust_iv_update_pos (cand, use);
7329   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7330   gcc_assert (ok);
7331   unshare_aff_combination (&aff);
7332
7333   /* To avoid undefined overflow problems, all IV candidates use unsigned
7334      integer types.  The drawback is that this makes it impossible for
7335      create_mem_ref to distinguish an IV that is based on a memory object
7336      from one that represents simply an offset.
7337
7338      To work around this problem, we pass a hint to create_mem_ref that
7339      indicates which variable (if any) in aff is an IV based on a memory
7340      object.  Note that we only consider the candidate.  If this is not
7341      based on an object, the base of the reference is in some subexpression
7342      of the use -- but these will use pointer types, so they are recognized
7343      by the create_mem_ref heuristics anyway.  */
7344   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7345   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7346   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7347   tree type = use->mem_type;
7348   tree alias_ptr_type;
7349   if (use->type == USE_PTR_ADDRESS)
7350     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7351   else
7352     {
7353       gcc_assert (type == TREE_TYPE (*use->op_p));
7354       unsigned int align = get_object_alignment (*use->op_p);
7355       if (align != TYPE_ALIGN (type))
7356         type = build_aligned_type (type, align);
7357       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7358     }
7359   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7360                              iv, base_hint, data->speed);
7361
7362   if (use->type == USE_PTR_ADDRESS)
7363     {
7364       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7365       ref = fold_convert (get_use_type (use), ref);
7366       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7367                                       true, GSI_SAME_STMT);
7368     }
7369   else
7370     copy_ref_info (ref, *use->op_p);
7371
7372   *use->op_p = ref;
7373 }
7374
7375 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7376    candidate CAND.  */
7377
7378 static void
7379 rewrite_use_compare (struct ivopts_data *data,
7380                      struct iv_use *use, struct iv_cand *cand)
7381 {
7382   tree comp, op, bound;
7383   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7384   enum tree_code compare;
7385   struct iv_group *group = data->vgroups[use->group_id];
7386   class cost_pair *cp = get_group_iv_cost (data, group, cand);
7387
7388   bound = cp->value;
7389   if (bound)
7390     {
7391       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7392       tree var_type = TREE_TYPE (var);
7393       gimple_seq stmts;
7394
7395       if (dump_file && (dump_flags & TDF_DETAILS))
7396         {
7397           fprintf (dump_file, "Replacing exit test: ");
7398           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7399         }
7400       compare = cp->comp;
7401       bound = unshare_expr (fold_convert (var_type, bound));
7402       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7403       if (stmts)
7404         gsi_insert_seq_on_edge_immediate (
7405                 loop_preheader_edge (data->current_loop),
7406                 stmts);
7407
7408       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7409       gimple_cond_set_lhs (cond_stmt, var);
7410       gimple_cond_set_code (cond_stmt, compare);
7411       gimple_cond_set_rhs (cond_stmt, op);
7412       return;
7413     }
7414
7415   /* The induction variable elimination failed; just express the original
7416      giv.  */
7417   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7418   gcc_assert (comp != NULL_TREE);
7419   gcc_assert (use->op_p != NULL);
7420   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7421                                          SSA_NAME_VAR (*use->op_p),
7422                                          true, GSI_SAME_STMT);
7423 }
7424
7425 /* Rewrite the groups using the selected induction variables.  */
7426
7427 static void
7428 rewrite_groups (struct ivopts_data *data)
7429 {
7430   unsigned i, j;
7431
7432   for (i = 0; i < data->vgroups.length (); i++)
7433     {
7434       struct iv_group *group = data->vgroups[i];
7435       struct iv_cand *cand = group->selected;
7436
7437       gcc_assert (cand);
7438
7439       if (group->type == USE_NONLINEAR_EXPR)
7440         {
7441           for (j = 0; j < group->vuses.length (); j++)
7442             {
7443               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7444               update_stmt (group->vuses[j]->stmt);
7445             }
7446         }
7447       else if (address_p (group->type))
7448         {
7449           for (j = 0; j < group->vuses.length (); j++)
7450             {
7451               rewrite_use_address (data, group->vuses[j], cand);
7452               update_stmt (group->vuses[j]->stmt);
7453             }
7454         }
7455       else
7456         {
7457           gcc_assert (group->type == USE_COMPARE);
7458
7459           for (j = 0; j < group->vuses.length (); j++)
7460             {
7461               rewrite_use_compare (data, group->vuses[j], cand);
7462               update_stmt (group->vuses[j]->stmt);
7463             }
7464         }
7465     }
7466 }
7467
7468 /* Removes the ivs that are not used after rewriting.  */
7469
7470 static void
7471 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7472 {
7473   unsigned j;
7474   bitmap_iterator bi;
7475
7476   /* Figure out an order in which to release SSA DEFs so that we don't
7477      release something that we'd have to propagate into a debug stmt
7478      afterwards.  */
7479   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7480     {
7481       struct version_info *info;
7482
7483       info = ver_info (data, j);
7484       if (info->iv
7485           && !integer_zerop (info->iv->step)
7486           && !info->inv_id
7487           && !info->iv->nonlin_use
7488           && !info->preserve_biv)
7489         {
7490           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7491
7492           tree def = info->iv->ssa_name;
7493
7494           if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7495             {
7496               imm_use_iterator imm_iter;
7497               use_operand_p use_p;
7498               gimple *stmt;
7499               int count = 0;
7500
7501               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7502                 {
7503                   if (!gimple_debug_bind_p (stmt))
7504                     continue;
7505
7506                   /* We just want to determine whether to do nothing
7507                      (count == 0), to substitute the computed
7508                      expression into a single use of the SSA DEF by
7509                      itself (count == 1), or to use a debug temp
7510                      because the SSA DEF is used multiple times or as
7511                      part of a larger expression (count > 1). */
7512                   count++;
7513                   if (gimple_debug_bind_get_value (stmt) != def)
7514                     count++;
7515
7516                   if (count > 1)
7517                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7518                 }
7519
7520               if (!count)
7521                 continue;
7522
7523               struct iv_use dummy_use;
7524               struct iv_cand *best_cand = NULL, *cand;
7525               unsigned i, best_pref = 0, cand_pref;
7526
7527               memset (&dummy_use, 0, sizeof (dummy_use));
7528               dummy_use.iv = info->iv;
7529               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7530                 {
7531                   cand = data->vgroups[i]->selected;
7532                   if (cand == best_cand)
7533                     continue;
7534                   cand_pref = operand_equal_p (cand->iv->step,
7535                                                info->iv->step, 0)
7536                     ? 4 : 0;
7537                   cand_pref
7538                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7539                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7540                     ? 2 : 0;
7541                   cand_pref
7542                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7543                     ? 1 : 0;
7544                   if (best_cand == NULL || best_pref < cand_pref)
7545                     {
7546                       best_cand = cand;
7547                       best_pref = cand_pref;
7548                     }
7549                 }
7550
7551               if (!best_cand)
7552                 continue;
7553
7554               tree comp = get_computation_at (data->current_loop,
7555                                               SSA_NAME_DEF_STMT (def),
7556                                               &dummy_use, best_cand);
7557               if (!comp)
7558                 continue;
7559
7560               if (count > 1)
7561                 {
7562                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7563                   DECL_ARTIFICIAL (vexpr) = 1;
7564                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7565                   if (SSA_NAME_VAR (def))
7566                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7567                   else
7568                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7569                   gdebug *def_temp
7570                     = gimple_build_debug_bind (vexpr, comp, NULL);
7571                   gimple_stmt_iterator gsi;
7572
7573                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7574                     gsi = gsi_after_labels (gimple_bb
7575                                             (SSA_NAME_DEF_STMT (def)));
7576                   else
7577                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7578
7579                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7580                   comp = vexpr;
7581                 }
7582
7583               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7584                 {
7585                   if (!gimple_debug_bind_p (stmt))
7586                     continue;
7587
7588                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7589                     SET_USE (use_p, comp);
7590
7591                   update_stmt (stmt);
7592                 }
7593             }
7594         }
7595     }
7596 }
7597
7598 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7599    for hash_map::traverse.  */
7600
7601 bool
7602 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7603 {
7604   free (value);
7605   return true;
7606 }
7607
7608 /* Frees data allocated by the optimization of a single loop.  */
7609
7610 static void
7611 free_loop_data (struct ivopts_data *data)
7612 {
7613   unsigned i, j;
7614   bitmap_iterator bi;
7615   tree obj;
7616
7617   if (data->niters)
7618     {
7619       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7620       delete data->niters;
7621       data->niters = NULL;
7622     }
7623
7624   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7625     {
7626       struct version_info *info;
7627
7628       info = ver_info (data, i);
7629       info->iv = NULL;
7630       info->has_nonlin_use = false;
7631       info->preserve_biv = false;
7632       info->inv_id = 0;
7633     }
7634   bitmap_clear (data->relevant);
7635   bitmap_clear (data->important_candidates);
7636
7637   for (i = 0; i < data->vgroups.length (); i++)
7638     {
7639       struct iv_group *group = data->vgroups[i];
7640
7641       for (j = 0; j < group->vuses.length (); j++)
7642         free (group->vuses[j]);
7643       group->vuses.release ();
7644
7645       BITMAP_FREE (group->related_cands);
7646       for (j = 0; j < group->n_map_members; j++)
7647         {
7648           if (group->cost_map[j].inv_vars)
7649             BITMAP_FREE (group->cost_map[j].inv_vars);
7650           if (group->cost_map[j].inv_exprs)
7651             BITMAP_FREE (group->cost_map[j].inv_exprs);
7652         }
7653
7654       free (group->cost_map);
7655       free (group);
7656     }
7657   data->vgroups.truncate (0);
7658
7659   for (i = 0; i < data->vcands.length (); i++)
7660     {
7661       struct iv_cand *cand = data->vcands[i];
7662
7663       if (cand->inv_vars)
7664         BITMAP_FREE (cand->inv_vars);
7665       if (cand->inv_exprs)
7666         BITMAP_FREE (cand->inv_exprs);
7667       free (cand);
7668     }
7669   data->vcands.truncate (0);
7670
7671   if (data->version_info_size < num_ssa_names)
7672     {
7673       data->version_info_size = 2 * num_ssa_names;
7674       free (data->version_info);
7675       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7676     }
7677
7678   data->max_inv_var_id = 0;
7679   data->max_inv_expr_id = 0;
7680
7681   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7682     SET_DECL_RTL (obj, NULL_RTX);
7683
7684   decl_rtl_to_reset.truncate (0);
7685
7686   data->inv_expr_tab->empty ();
7687
7688   data->iv_common_cand_tab->empty ();
7689   data->iv_common_cands.truncate (0);
7690 }
7691
7692 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7693    loop tree.  */
7694
7695 static void
7696 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7697 {
7698   free_loop_data (data);
7699   free (data->version_info);
7700   BITMAP_FREE (data->relevant);
7701   BITMAP_FREE (data->important_candidates);
7702
7703   decl_rtl_to_reset.release ();
7704   data->vgroups.release ();
7705   data->vcands.release ();
7706   delete data->inv_expr_tab;
7707   data->inv_expr_tab = NULL;
7708   free_affine_expand_cache (&data->name_expansion_cache);
7709   if (data->base_object_map)
7710     delete data->base_object_map;
7711   delete data->iv_common_cand_tab;
7712   data->iv_common_cand_tab = NULL;
7713   data->iv_common_cands.release ();
7714   obstack_free (&data->iv_obstack, NULL);
7715 }
7716
7717 /* Returns true if the loop body BODY includes any function calls.  */
7718
7719 static bool
7720 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7721 {
7722   gimple_stmt_iterator gsi;
7723   unsigned i;
7724
7725   for (i = 0; i < num_nodes; i++)
7726     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7727       {
7728         gimple *stmt = gsi_stmt (gsi);
7729         if (is_gimple_call (stmt)
7730             && !gimple_call_internal_p (stmt)
7731             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7732           return true;
7733       }
7734   return false;
7735 }
7736
7737 /* Determine cost scaling factor for basic blocks in loop.  */
7738 #define COST_SCALING_FACTOR_BOUND (20)
7739
7740 static void
7741 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
7742 {
7743   int lfreq = data->current_loop->header->count.to_frequency (cfun);
7744   if (!data->speed || lfreq <= 0)
7745     return;
7746
7747   int max_freq = lfreq;
7748   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7749     {
7750       body[i]->aux = (void *)(intptr_t) 1;
7751       if (max_freq < body[i]->count.to_frequency (cfun))
7752         max_freq = body[i]->count.to_frequency (cfun);
7753     }
7754   if (max_freq > lfreq)
7755     {
7756       int divisor, factor;
7757       /* Check if scaling factor itself needs to be scaled by the bound.  This
7758          is to avoid overflow when scaling cost according to profile info.  */
7759       if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
7760         {
7761           divisor = max_freq;
7762           factor = COST_SCALING_FACTOR_BOUND;
7763         }
7764       else
7765         {
7766           divisor = lfreq;
7767           factor = 1;
7768         }
7769       for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7770         {
7771           int bfreq = body[i]->count.to_frequency (cfun);
7772           if (bfreq <= lfreq)
7773             continue;
7774
7775           body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
7776         }
7777     }
7778 }
7779
7780 /* Find doloop comparison use and set its doloop_p on if found.  */
7781
7782 static bool
7783 find_doloop_use (struct ivopts_data *data)
7784 {
7785   struct loop *loop = data->current_loop;
7786
7787   for (unsigned i = 0; i < data->vgroups.length (); i++)
7788     {
7789       struct iv_group *group = data->vgroups[i];
7790       if (group->type == USE_COMPARE)
7791         {
7792           gcc_assert (group->vuses.length () == 1);
7793           struct iv_use *use = group->vuses[0];
7794           gimple *stmt = use->stmt;
7795           if (gimple_code (stmt) == GIMPLE_COND)
7796             {
7797               basic_block bb = gimple_bb (stmt);
7798               edge true_edge, false_edge;
7799               extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
7800               /* This comparison is used for loop latch.  Require latch is empty
7801                  for now.  */
7802               if ((loop->latch == true_edge->dest
7803                    || loop->latch == false_edge->dest)
7804                   && empty_block_p (loop->latch))
7805                 {
7806                   group->doloop_p = true;
7807                   if (dump_file && (dump_flags & TDF_DETAILS))
7808                     {
7809                       fprintf (dump_file, "Doloop cmp iv use: ");
7810                       print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
7811                     }
7812                   return true;
7813                 }
7814             }
7815         }
7816     }
7817
7818   return false;
7819 }
7820
7821 /* For the targets which support doloop, to predict whether later RTL doloop
7822    transformation will perform on this loop, further detect the doloop use and
7823    mark the flag doloop_use_p if predicted.  */
7824
7825 void
7826 analyze_and_mark_doloop_use (struct ivopts_data *data)
7827 {
7828   data->doloop_use_p = false;
7829
7830   if (!flag_branch_on_count_reg)
7831     return;
7832
7833   if (!generic_predict_doloop_p (data))
7834     return;
7835
7836   if (find_doloop_use (data))
7837     {
7838       data->doloop_use_p = true;
7839       if (dump_file && (dump_flags & TDF_DETAILS))
7840         {
7841           struct loop *loop = data->current_loop;
7842           fprintf (dump_file,
7843                    "Predict loop %d can perform"
7844                    " doloop optimization later.\n",
7845                    loop->num);
7846           flow_loop_dump (loop, dump_file, NULL, 1);
7847         }
7848     }
7849 }
7850
7851 /* Optimizes the LOOP.  Returns true if anything changed.  */
7852
7853 static bool
7854 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
7855                            bitmap toremove)
7856 {
7857   bool changed = false;
7858   class iv_ca *iv_ca;
7859   edge exit = single_dom_exit (loop);
7860   basic_block *body;
7861
7862   gcc_assert (!data->niters);
7863   data->current_loop = loop;
7864   data->loop_loc = find_loop_location (loop).get_location_t ();
7865   data->speed = optimize_loop_for_speed_p (loop);
7866
7867   if (dump_file && (dump_flags & TDF_DETAILS))
7868     {
7869       fprintf (dump_file, "Processing loop %d", loop->num);
7870       if (data->loop_loc != UNKNOWN_LOCATION)
7871         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7872                  LOCATION_LINE (data->loop_loc));
7873       fprintf (dump_file, "\n");
7874
7875       if (exit)
7876         {
7877           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7878                    exit->src->index, exit->dest->index);
7879           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7880           fprintf (dump_file, "\n");
7881         }
7882
7883       fprintf (dump_file, "\n");
7884     }
7885
7886   body = get_loop_body (loop);
7887   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7888   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7889
7890   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7891
7892   /* For each ssa name determines whether it behaves as an induction variable
7893      in some loop.  */
7894   if (!find_induction_variables (data))
7895     goto finish;
7896
7897   /* Finds interesting uses (item 1).  */
7898   find_interesting_uses (data);
7899   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7900     goto finish;
7901
7902   /* Determine cost scaling factor for basic blocks in loop.  */
7903   determine_scaling_factor (data, body);
7904
7905   /* Analyze doloop possibility and mark the doloop use if predicted.  */
7906   analyze_and_mark_doloop_use (data);
7907
7908   /* Finds candidates for the induction variables (item 2).  */
7909   find_iv_candidates (data);
7910
7911   /* Calculates the costs (item 3, part 1).  */
7912   determine_iv_costs (data);
7913   determine_group_iv_costs (data);
7914   determine_set_costs (data);
7915
7916   /* Find the optimal set of induction variables (item 3, part 2).  */
7917   iv_ca = find_optimal_iv_set (data);
7918   /* Cleanup basic block aux field.  */
7919   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7920     body[i]->aux = NULL;
7921   if (!iv_ca)
7922     goto finish;
7923   changed = true;
7924
7925   /* Create the new induction variables (item 4, part 1).  */
7926   create_new_ivs (data, iv_ca);
7927   iv_ca_free (&iv_ca);
7928
7929   /* Rewrite the uses (item 4, part 2).  */
7930   rewrite_groups (data);
7931
7932   /* Remove the ivs that are unused after rewriting.  */
7933   remove_unused_ivs (data, toremove);
7934
7935 finish:
7936   free (body);
7937   free_loop_data (data);
7938
7939   return changed;
7940 }
7941
7942 /* Main entry point.  Optimizes induction variables in loops.  */
7943
7944 void
7945 tree_ssa_iv_optimize (void)
7946 {
7947   class loop *loop;
7948   struct ivopts_data data;
7949   auto_bitmap toremove;
7950
7951   tree_ssa_iv_optimize_init (&data);
7952
7953   /* Optimize the loops starting with the innermost ones.  */
7954   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7955     {
7956       if (dump_file && (dump_flags & TDF_DETAILS))
7957         flow_loop_dump (loop, dump_file, NULL, 1);
7958
7959       tree_ssa_iv_optimize_loop (&data, loop, toremove);
7960     }
7961
7962   /* Remove eliminated IV defs.  */
7963   release_defs_bitset (toremove);
7964
7965   /* We have changed the structure of induction variables; it might happen
7966      that definitions in the scev database refer to some of them that were
7967      eliminated.  */
7968   scev_reset_htab ();
7969   /* Likewise niter and control-IV information.  */
7970   free_numbers_of_iterations_estimates (cfun);
7971
7972   tree_ssa_iv_optimize_finalize (&data);
7973 }
7974
7975 #include "gt-tree-ssa-loop-ivopts.h"