gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2020 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.
  68
  69    For the targets supporting low-overhead loops, IVOPTs has to take care of
  70    the loops which will probably be transformed in RTL doloop optimization,
  71    to try to make selected IV candidate set optimal.  The process of doloop
  72    support includes:
  73
  74    1) Analyze the current loop will be transformed to doloop or not, find and
  75       mark its compare type IV use as doloop use (iv_group field doloop_p), and
  76       set flag doloop_use_p of ivopts_data to notify subsequent processings on
  77       doloop.  See analyze_and_mark_doloop_use and its callees for the details.
  78       The target hook predict_doloop_p can be used for target specific checks.
  79
  80    2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
  81       set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
  82       like biv.  For cost determination between doloop IV cand and IV use, the
  83       target hooks doloop_cost_for_generic and doloop_cost_for_address are
  84       provided to add on extra costs for generic type and address type IV use.
  85       Zero cost is assigned to the pair between doloop IV cand and doloop IV
  86       use, and bound zero is set for IV elimination.
  87
  88    3) With the cost setting in step 2), the current cost model based IV
  89       selection algorithm will process as usual, pick up doloop dedicated IV if
  90       profitable.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "rtl.h"
  97 #include "tree.h"
  98 #include "gimple.h"
  99 #include "cfghooks.h"
 100 #include "tree-pass.h"
 101 #include "memmodel.h"
 102 #include "tm_p.h"
 103 #include "ssa.h"
 104 #include "expmed.h"
 105 #include "insn-config.h"
 106 #include "emit-rtl.h"
 107 #include "recog.h"
 108 #include "cgraph.h"
 109 #include "gimple-pretty-print.h"
 110 #include "alias.h"
 111 #include "fold-const.h"
 112 #include "stor-layout.h"
 113 #include "tree-eh.h"
 114 #include "gimplify.h"
 115 #include "gimple-iterator.h"
 116 #include "gimplify-me.h"
 117 #include "tree-cfg.h"
 118 #include "tree-ssa-loop-ivopts.h"
 119 #include "tree-ssa-loop-manip.h"
 120 #include "tree-ssa-loop-niter.h"
 121 #include "tree-ssa-loop.h"
 122 #include "explow.h"
 123 #include "expr.h"
 124 #include "tree-dfa.h"
 125 #include "tree-ssa.h"
 126 #include "cfgloop.h"
 127 #include "tree-scalar-evolution.h"
 128 #include "tree-affine.h"
 129 #include "tree-ssa-propagate.h"
 130 #include "tree-ssa-address.h"
 131 #include "builtins.h"
 132 #include "tree-vectorizer.h"
 133 #include "dbgcnt.h"
 134
 135 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 136    cost of different addressing modes.  This should be moved to a TBD
 137    interface between the GIMPLE and RTL worlds.  */
 138
 139 /* The infinite cost.  */
 140 #define INFTY 1000000000
 141
 142 /* Returns the expected number of loop iterations for LOOP.
 143    The average trip count is computed from profile data if it
 144    exists. */
 145
 146 static inline HOST_WIDE_INT
 147 avg_loop_niter (class loop *loop)
 148 {
 149   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 150   if (niter == -1)
 151     {
 152       niter = likely_max_stmt_executions_int (loop);
 153
 154       if (niter == -1 || niter > param_avg_loop_niter)
 155         return param_avg_loop_niter;
 156     }
 157
 158   return niter;
 159 }
 160
 161 struct iv_use;
 162
 163 /* Representation of the induction variable.  */
 164 struct iv
 165 {
 166   tree base;            /* Initial value of the iv.  */
 167   tree base_object;     /* A memory object to that the induction variable points.  */
 168   tree step;            /* Step of the iv (constant only).  */
 169   tree ssa_name;        /* The ssa name with the value.  */
 170   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 171   bool biv_p;           /* Is it a biv?  */
 172   bool no_overflow;     /* True if the iv doesn't overflow.  */
 173   bool have_address_use;/* For biv, indicate if it's used in any address
 174                            type use.  */
 175 };
 176
 177 /* Per-ssa version information (induction variable descriptions, etc.).  */
 178 struct version_info
 179 {
 180   tree name;            /* The ssa name.  */
 181   struct iv *iv;        /* Induction variable description.  */
 182   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 183                            an expression that is not an induction variable.  */
 184   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 185   unsigned inv_id;      /* Id of an invariant.  */
 186 };
 187
 188 /* Types of uses.  */
 189 enum use_type
 190 {
 191   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 192   USE_REF_ADDRESS,      /* Use is an address for an explicit memory
 193                            reference.  */
 194   USE_PTR_ADDRESS,      /* Use is a pointer argument to a function in
 195                            cases where the expansion of the function
 196                            will turn the argument into a normal address.  */
 197   USE_COMPARE           /* Use is a compare.  */
 198 };
 199
 200 /* Cost of a computation.  */
 201 class comp_cost
 202 {
 203 public:
 204   comp_cost (): cost (0), complexity (0), scratch (0)
 205   {}
 206
 207   comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
 208     : cost (cost), complexity (complexity), scratch (scratch)
 209   {}
 210
 211   /* Returns true if COST is infinite.  */
 212   bool infinite_cost_p ();
 213
 214   /* Adds costs COST1 and COST2.  */
 215   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 216
 217   /* Adds COST to the comp_cost.  */
 218   comp_cost operator+= (comp_cost cost);
 219
 220   /* Adds constant C to this comp_cost.  */
 221   comp_cost operator+= (HOST_WIDE_INT c);
 222
 223   /* Subtracts constant C to this comp_cost.  */
 224   comp_cost operator-= (HOST_WIDE_INT c);
 225
 226   /* Divide the comp_cost by constant C.  */
 227   comp_cost operator/= (HOST_WIDE_INT c);
 228
 229   /* Multiply the comp_cost by constant C.  */
 230   comp_cost operator*= (HOST_WIDE_INT c);
 231
 232   /* Subtracts costs COST1 and COST2.  */
 233   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 234
 235   /* Subtracts COST from this comp_cost.  */
 236   comp_cost operator-= (comp_cost cost);
 237
 238   /* Returns true if COST1 is smaller than COST2.  */
 239   friend bool operator< (comp_cost cost1, comp_cost cost2);
 240
 241   /* Returns true if COST1 and COST2 are equal.  */
 242   friend bool operator== (comp_cost cost1, comp_cost cost2);
 243
 244   /* Returns true if COST1 is smaller or equal than COST2.  */
 245   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 246
 247   int64_t cost;         /* The runtime cost.  */
 248   unsigned complexity;  /* The estimate of the complexity of the code for
 249                            the computation (in no concrete units --
 250                            complexity field should be larger for more
 251                            complex expressions and addressing modes).  */
 252   int64_t scratch;      /* Scratch used during cost computation.  */
 253 };
 254
 255 static const comp_cost no_cost;
 256 static const comp_cost infinite_cost (INFTY, 0, INFTY);
 257
 258 bool
 259 comp_cost::infinite_cost_p ()
 260 {
 261   return cost == INFTY;
 262 }
 263
 264 comp_cost
 265 operator+ (comp_cost cost1, comp_cost cost2)
 266 {
 267   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 268     return infinite_cost;
 269
 270   gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
 271   cost1.cost += cost2.cost;
 272   cost1.complexity += cost2.complexity;
 273
 274   return cost1;
 275 }
 276
 277 comp_cost
 278 operator- (comp_cost cost1, comp_cost cost2)
 279 {
 280   if (cost1.infinite_cost_p ())
 281     return infinite_cost;
 282
 283   gcc_assert (!cost2.infinite_cost_p ());
 284   gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
 285
 286   cost1.cost -= cost2.cost;
 287   cost1.complexity -= cost2.complexity;
 288
 289   return cost1;
 290 }
 291
 292 comp_cost
 293 comp_cost::operator+= (comp_cost cost)
 294 {
 295   *this = *this + cost;
 296   return *this;
 297 }
 298
 299 comp_cost
 300 comp_cost::operator+= (HOST_WIDE_INT c)
 301 {
 302   if (c >= INFTY)
 303     this->cost = INFTY;
 304
 305   if (infinite_cost_p ())
 306     return *this;
 307
 308   gcc_assert (this->cost + c < infinite_cost.cost);
 309   this->cost += c;
 310
 311   return *this;
 312 }
 313
 314 comp_cost
 315 comp_cost::operator-= (HOST_WIDE_INT c)
 316 {
 317   if (infinite_cost_p ())
 318     return *this;
 319
 320   gcc_assert (this->cost - c < infinite_cost.cost);
 321   this->cost -= c;
 322
 323   return *this;
 324 }
 325
 326 comp_cost
 327 comp_cost::operator/= (HOST_WIDE_INT c)
 328 {
 329   gcc_assert (c != 0);
 330   if (infinite_cost_p ())
 331     return *this;
 332
 333   this->cost /= c;
 334
 335   return *this;
 336 }
 337
 338 comp_cost
 339 comp_cost::operator*= (HOST_WIDE_INT c)
 340 {
 341   if (infinite_cost_p ())
 342     return *this;
 343
 344   gcc_assert (this->cost * c < infinite_cost.cost);
 345   this->cost *= c;
 346
 347   return *this;
 348 }
 349
 350 comp_cost
 351 comp_cost::operator-= (comp_cost cost)
 352 {
 353   *this = *this - cost;
 354   return *this;
 355 }
 356
 357 bool
 358 operator< (comp_cost cost1, comp_cost cost2)
 359 {
 360   if (cost1.cost == cost2.cost)
 361     return cost1.complexity < cost2.complexity;
 362
 363   return cost1.cost < cost2.cost;
 364 }
 365
 366 bool
 367 operator== (comp_cost cost1, comp_cost cost2)
 368 {
 369   return cost1.cost == cost2.cost
 370     && cost1.complexity == cost2.complexity;
 371 }
 372
 373 bool
 374 operator<= (comp_cost cost1, comp_cost cost2)
 375 {
 376   return cost1 < cost2 || cost1 == cost2;
 377 }
 378
 379 struct iv_inv_expr_ent;
 380
 381 /* The candidate - cost pair.  */
 382 class cost_pair
 383 {
 384 public:
 385   struct iv_cand *cand; /* The candidate.  */
 386   comp_cost cost;       /* The cost.  */
 387   enum tree_code comp;  /* For iv elimination, the comparison.  */
 388   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 389                            preserved when representing iv_use with iv_cand.  */
 390   bitmap inv_exprs;     /* The list of newly created invariant expressions
 391                            when representing iv_use with iv_cand.  */
 392   tree value;           /* For final value elimination, the expression for
 393                            the final value of the iv.  For iv elimination,
 394                            the new bound to compare with.  */
 395 };
 396
 397 /* Use.  */
 398 struct iv_use
 399 {
 400   unsigned id;          /* The id of the use.  */
 401   unsigned group_id;    /* The group id the use belongs to.  */
 402   enum use_type type;   /* Type of the use.  */
 403   tree mem_type;        /* The memory type to use when testing whether an
 404                            address is legitimate, and what the address's
 405                            cost is.  */
 406   struct iv *iv;        /* The induction variable it is based on.  */
 407   gimple *stmt;         /* Statement in that it occurs.  */
 408   tree *op_p;           /* The place where it occurs.  */
 409
 410   tree addr_base;       /* Base address with const offset stripped.  */
 411   poly_uint64_pod addr_offset;
 412                         /* Const offset stripped from base address.  */
 413 };
 414
 415 /* Group of uses.  */
 416 struct iv_group
 417 {
 418   /* The id of the group.  */
 419   unsigned id;
 420   /* Uses of the group are of the same type.  */
 421   enum use_type type;
 422   /* The set of "related" IV candidates, plus the important ones.  */
 423   bitmap related_cands;
 424   /* Number of IV candidates in the cost_map.  */
 425   unsigned n_map_members;
 426   /* The costs wrto the iv candidates.  */
 427   class cost_pair *cost_map;
 428   /* The selected candidate for the group.  */
 429   struct iv_cand *selected;
 430   /* To indicate this is a doloop use group.  */
 431   bool doloop_p;
 432   /* Uses in the group.  */
 433   vec<struct iv_use *> vuses;
 434 };
 435
 436 /* The position where the iv is computed.  */
 437 enum iv_position
 438 {
 439   IP_NORMAL,            /* At the end, just before the exit condition.  */
 440   IP_END,               /* At the end of the latch block.  */
 441   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 442   IP_AFTER_USE,         /* Immediately after a specific use.  */
 443   IP_ORIGINAL           /* The original biv.  */
 444 };
 445
 446 /* The induction variable candidate.  */
 447 struct iv_cand
 448 {
 449   unsigned id;          /* The number of the candidate.  */
 450   bool important;       /* Whether this is an "important" candidate, i.e. such
 451                            that it should be considered by all uses.  */
 452   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 453   gimple *incremented_at;/* For original biv, the statement where it is
 454                            incremented.  */
 455   tree var_before;      /* The variable used for it before increment.  */
 456   tree var_after;       /* The variable used for it after increment.  */
 457   struct iv *iv;        /* The value of the candidate.  NULL for
 458                            "pseudocandidate" used to indicate the possibility
 459                            to replace the final value of an iv by direct
 460                            computation of the value.  */
 461   unsigned cost;        /* Cost of the candidate.  */
 462   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 463   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 464                               where it is incremented.  */
 465   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 466                            iv_cand.  */
 467   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 468                            hanlde it as a new invariant expression which will
 469                            be hoisted out of loop.  */
 470   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 471                            smaller type.  */
 472   bool doloop_p;        /* Whether this is a doloop candidate.  */
 473 };
 474
 475 /* Hashtable entry for common candidate derived from iv uses.  */
 476 class iv_common_cand
 477 {
 478 public:
 479   tree base;
 480   tree step;
 481   /* IV uses from which this common candidate is derived.  */
 482   auto_vec<struct iv_use *> uses;
 483   hashval_t hash;
 484 };
 485
 486 /* Hashtable helpers.  */
 487
 488 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 489 {
 490   static inline hashval_t hash (const iv_common_cand *);
 491   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 492 };
 493
 494 /* Hash function for possible common candidates.  */
 495
 496 inline hashval_t
 497 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 498 {
 499   return ccand->hash;
 500 }
 501
 502 /* Hash table equality function for common candidates.  */
 503
 504 inline bool
 505 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 506                               const iv_common_cand *ccand2)
 507 {
 508   return (ccand1->hash == ccand2->hash
 509           && operand_equal_p (ccand1->base, ccand2->base, 0)
 510           && operand_equal_p (ccand1->step, ccand2->step, 0)
 511           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 512               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 513 }
 514
 515 /* Loop invariant expression hashtable entry.  */
 516
 517 struct iv_inv_expr_ent
 518 {
 519   /* Tree expression of the entry.  */
 520   tree expr;
 521   /* Unique indentifier.  */
 522   int id;
 523   /* Hash value.  */
 524   hashval_t hash;
 525 };
 526
 527 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 528
 529 static int
 530 sort_iv_inv_expr_ent (const void *a, const void *b)
 531 {
 532   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 533   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 534
 535   unsigned id1 = (*e1)->id;
 536   unsigned id2 = (*e2)->id;
 537
 538   if (id1 < id2)
 539     return -1;
 540   else if (id1 > id2)
 541     return 1;
 542   else
 543     return 0;
 544 }
 545
 546 /* Hashtable helpers.  */
 547
 548 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 549 {
 550   static inline hashval_t hash (const iv_inv_expr_ent *);
 551   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 552 };
 553
 554 /* Return true if uses of type TYPE represent some form of address.  */
 555
 556 inline bool
 557 address_p (use_type type)
 558 {
 559   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
 560 }
 561
 562 /* Hash function for loop invariant expressions.  */
 563
 564 inline hashval_t
 565 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 566 {
 567   return expr->hash;
 568 }
 569
 570 /* Hash table equality function for expressions.  */
 571
 572 inline bool
 573 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 574                            const iv_inv_expr_ent *expr2)
 575 {
 576   return expr1->hash == expr2->hash
 577          && operand_equal_p (expr1->expr, expr2->expr, 0);
 578 }
 579
 580 struct ivopts_data
 581 {
 582   /* The currently optimized loop.  */
 583   class loop *current_loop;
 584   location_t loop_loc;
 585
 586   /* Numbers of iterations for all exits of the current loop.  */
 587   hash_map<edge, tree_niter_desc *> *niters;
 588
 589   /* Number of registers used in it.  */
 590   unsigned regs_used;
 591
 592   /* The size of version_info array allocated.  */
 593   unsigned version_info_size;
 594
 595   /* The array of information for the ssa names.  */
 596   struct version_info *version_info;
 597
 598   /* The hashtable of loop invariant expressions created
 599      by ivopt.  */
 600   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 601
 602   /* The bitmap of indices in version_info whose value was changed.  */
 603   bitmap relevant;
 604
 605   /* The uses of induction variables.  */
 606   vec<iv_group *> vgroups;
 607
 608   /* The candidates.  */
 609   vec<iv_cand *> vcands;
 610
 611   /* A bitmap of important candidates.  */
 612   bitmap important_candidates;
 613
 614   /* Cache used by tree_to_aff_combination_expand.  */
 615   hash_map<tree, name_expansion *> *name_expansion_cache;
 616
 617   /* The hashtable of common candidates derived from iv uses.  */
 618   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 619
 620   /* The common candidates.  */
 621   vec<iv_common_cand *> iv_common_cands;
 622
 623   /* Hash map recording base object information of tree exp.  */
 624   hash_map<tree, tree> *base_object_map;
 625
 626   /* The maximum invariant variable id.  */
 627   unsigned max_inv_var_id;
 628
 629   /* The maximum invariant expression id.  */
 630   unsigned max_inv_expr_id;
 631
 632   /* Number of no_overflow BIVs which are not used in memory address.  */
 633   unsigned bivs_not_used_in_addr;
 634
 635   /* Obstack for iv structure.  */
 636   struct obstack iv_obstack;
 637
 638   /* Whether to consider just related and important candidates when replacing a
 639      use.  */
 640   bool consider_all_candidates;
 641
 642   /* Are we optimizing for speed?  */
 643   bool speed;
 644
 645   /* Whether the loop body includes any function calls.  */
 646   bool body_includes_call;
 647
 648   /* Whether the loop body can only be exited via single exit.  */
 649   bool loop_single_exit_p;
 650
 651   /* Whether the loop has doloop comparison use.  */
 652   bool doloop_use_p;
 653 };
 654
 655 /* An assignment of iv candidates to uses.  */
 656
 657 class iv_ca
 658 {
 659 public:
 660   /* The number of uses covered by the assignment.  */
 661   unsigned upto;
 662
 663   /* Number of uses that cannot be expressed by the candidates in the set.  */
 664   unsigned bad_groups;
 665
 666   /* Candidate assigned to a use, together with the related costs.  */
 667   class cost_pair **cand_for_group;
 668
 669   /* Number of times each candidate is used.  */
 670   unsigned *n_cand_uses;
 671
 672   /* The candidates used.  */
 673   bitmap cands;
 674
 675   /* The number of candidates in the set.  */
 676   unsigned n_cands;
 677
 678   /* The number of invariants needed, including both invariant variants and
 679      invariant expressions.  */
 680   unsigned n_invs;
 681
 682   /* Total cost of expressing uses.  */
 683   comp_cost cand_use_cost;
 684
 685   /* Total cost of candidates.  */
 686   int64_t cand_cost;
 687
 688   /* Number of times each invariant variable is used.  */
 689   unsigned *n_inv_var_uses;
 690
 691   /* Number of times each invariant expression is used.  */
 692   unsigned *n_inv_expr_uses;
 693
 694   /* Total cost of the assignment.  */
 695   comp_cost cost;
 696 };
 697
 698 /* Difference of two iv candidate assignments.  */
 699
 700 struct iv_ca_delta
 701 {
 702   /* Changed group.  */
 703   struct iv_group *group;
 704
 705   /* An old assignment (for rollback purposes).  */
 706   class cost_pair *old_cp;
 707
 708   /* A new assignment.  */
 709   class cost_pair *new_cp;
 710
 711   /* Next change in the list.  */
 712   struct iv_ca_delta *next;
 713 };
 714
 715 /* Bound on number of candidates below that all candidates are considered.  */
 716
 717 #define CONSIDER_ALL_CANDIDATES_BOUND \
 718   ((unsigned) param_iv_consider_all_candidates_bound)
 719
 720 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 721    optimizing such a loop would help, and it would take ages).  */
 722
 723 #define MAX_CONSIDERED_GROUPS \
 724   ((unsigned) param_iv_max_considered_uses)
 725
 726 /* If there are at most this number of ivs in the set, try removing unnecessary
 727    ivs from the set always.  */
 728
 729 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 730   ((unsigned) param_iv_always_prune_cand_set_bound)
 731
 732 /* The list of trees for that the decl_rtl field must be reset is stored
 733    here.  */
 734
 735 static vec<tree> decl_rtl_to_reset;
 736
 737 static comp_cost force_expr_to_var_cost (tree, bool);
 738
 739 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 740
 741 edge
 742 single_dom_exit (class loop *loop)
 743 {
 744   edge exit = single_exit (loop);
 745
 746   if (!exit)
 747     return NULL;
 748
 749   if (!just_once_each_iteration_p (loop, exit->src))
 750     return NULL;
 751
 752   return exit;
 753 }
 754
 755 /* Dumps information about the induction variable IV to FILE.  Don't dump
 756    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 757    preceding spaces indicated by INDENT_LEVEL.  */
 758
 759 void
 760 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 761 {
 762   const char *p;
 763   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 764
 765   if (indent_level > 4)
 766     indent_level = 4;
 767   p = spaces + 8 - (indent_level << 1);
 768
 769   fprintf (file, "%sIV struct:\n", p);
 770   if (iv->ssa_name && dump_name)
 771     {
 772       fprintf (file, "%s  SSA_NAME:\t", p);
 773       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 774       fprintf (file, "\n");
 775     }
 776
 777   fprintf (file, "%s  Type:\t", p);
 778   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 779   fprintf (file, "\n");
 780
 781   fprintf (file, "%s  Base:\t", p);
 782   print_generic_expr (file, iv->base, TDF_SLIM);
 783   fprintf (file, "\n");
 784
 785   fprintf (file, "%s  Step:\t", p);
 786   print_generic_expr (file, iv->step, TDF_SLIM);
 787   fprintf (file, "\n");
 788
 789   if (iv->base_object)
 790     {
 791       fprintf (file, "%s  Object:\t", p);
 792       print_generic_expr (file, iv->base_object, TDF_SLIM);
 793       fprintf (file, "\n");
 794     }
 795
 796   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 797
 798   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 799            p, iv->no_overflow ? "No-overflow" : "Overflow");
 800 }
 801
 802 /* Dumps information about the USE to FILE.  */
 803
 804 void
 805 dump_use (FILE *file, struct iv_use *use)
 806 {
 807   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 808   fprintf (file, "    At stmt:\t");
 809   print_gimple_stmt (file, use->stmt, 0);
 810   fprintf (file, "    At pos:\t");
 811   if (use->op_p)
 812     print_generic_expr (file, *use->op_p, TDF_SLIM);
 813   fprintf (file, "\n");
 814   dump_iv (file, use->iv, false, 2);
 815 }
 816
 817 /* Dumps information about the uses to FILE.  */
 818
 819 void
 820 dump_groups (FILE *file, struct ivopts_data *data)
 821 {
 822   unsigned i, j;
 823   struct iv_group *group;
 824
 825   for (i = 0; i < data->vgroups.length (); i++)
 826     {
 827       group = data->vgroups[i];
 828       fprintf (file, "Group %d:\n", group->id);
 829       if (group->type == USE_NONLINEAR_EXPR)
 830         fprintf (file, "  Type:\tGENERIC\n");
 831       else if (group->type == USE_REF_ADDRESS)
 832         fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
 833       else if (group->type == USE_PTR_ADDRESS)
 834         fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
 835       else
 836         {
 837           gcc_assert (group->type == USE_COMPARE);
 838           fprintf (file, "  Type:\tCOMPARE\n");
 839         }
 840       for (j = 0; j < group->vuses.length (); j++)
 841         dump_use (file, group->vuses[j]);
 842     }
 843 }
 844
 845 /* Dumps information about induction variable candidate CAND to FILE.  */
 846
 847 void
 848 dump_cand (FILE *file, struct iv_cand *cand)
 849 {
 850   struct iv *iv = cand->iv;
 851
 852   fprintf (file, "Candidate %d:\n", cand->id);
 853   if (cand->inv_vars)
 854     {
 855       fprintf (file, "  Depend on inv.vars: ");
 856       dump_bitmap (file, cand->inv_vars);
 857     }
 858   if (cand->inv_exprs)
 859     {
 860       fprintf (file, "  Depend on inv.exprs: ");
 861       dump_bitmap (file, cand->inv_exprs);
 862     }
 863
 864   if (cand->var_before)
 865     {
 866       fprintf (file, "  Var befor: ");
 867       print_generic_expr (file, cand->var_before, TDF_SLIM);
 868       fprintf (file, "\n");
 869     }
 870   if (cand->var_after)
 871     {
 872       fprintf (file, "  Var after: ");
 873       print_generic_expr (file, cand->var_after, TDF_SLIM);
 874       fprintf (file, "\n");
 875     }
 876
 877   switch (cand->pos)
 878     {
 879     case IP_NORMAL:
 880       fprintf (file, "  Incr POS: before exit test\n");
 881       break;
 882
 883     case IP_BEFORE_USE:
 884       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 885       break;
 886
 887     case IP_AFTER_USE:
 888       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 889       break;
 890
 891     case IP_END:
 892       fprintf (file, "  Incr POS: at end\n");
 893       break;
 894
 895     case IP_ORIGINAL:
 896       fprintf (file, "  Incr POS: orig biv\n");
 897       break;
 898     }
 899
 900   dump_iv (file, iv, false, 1);
 901 }
 902
 903 /* Returns the info for ssa version VER.  */
 904
 905 static inline struct version_info *
 906 ver_info (struct ivopts_data *data, unsigned ver)
 907 {
 908   return data->version_info + ver;
 909 }
 910
 911 /* Returns the info for ssa name NAME.  */
 912
 913 static inline struct version_info *
 914 name_info (struct ivopts_data *data, tree name)
 915 {
 916   return ver_info (data, SSA_NAME_VERSION (name));
 917 }
 918
 919 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 920    emitted in LOOP.  */
 921
 922 static bool
 923 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
 924 {
 925   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 926
 927   gcc_assert (bb);
 928
 929   if (sbb == loop->latch)
 930     return true;
 931
 932   if (sbb != bb)
 933     return false;
 934
 935   return stmt == last_stmt (bb);
 936 }
 937
 938 /* Returns true if STMT if after the place where the original induction
 939    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 940    if the positions are identical.  */
 941
 942 static bool
 943 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 944 {
 945   basic_block cand_bb = gimple_bb (cand->incremented_at);
 946   basic_block stmt_bb = gimple_bb (stmt);
 947
 948   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 949     return false;
 950
 951   if (stmt_bb != cand_bb)
 952     return true;
 953
 954   if (true_if_equal
 955       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 956     return true;
 957   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 958 }
 959
 960 /* Returns true if STMT if after the place where the induction variable
 961    CAND is incremented in LOOP.  */
 962
 963 static bool
 964 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
 965 {
 966   switch (cand->pos)
 967     {
 968     case IP_END:
 969       return false;
 970
 971     case IP_NORMAL:
 972       return stmt_after_ip_normal_pos (loop, stmt);
 973
 974     case IP_ORIGINAL:
 975     case IP_AFTER_USE:
 976       return stmt_after_inc_pos (cand, stmt, false);
 977
 978     case IP_BEFORE_USE:
 979       return stmt_after_inc_pos (cand, stmt, true);
 980
 981     default:
 982       gcc_unreachable ();
 983     }
 984 }
 985
 986 /* walk_tree callback for contains_abnormal_ssa_name_p.  */
 987
 988 static tree
 989 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
 990 {
 991   if (TREE_CODE (*tp) == SSA_NAME
 992       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
 993     return *tp;
 994
 995   if (!EXPR_P (*tp))
 996     *walk_subtrees = 0;
 997
 998   return NULL_TREE;
 999 }
1000
1001 /* Returns true if EXPR contains a ssa name that occurs in an
1002    abnormal phi node.  */
1003
1004 bool
1005 contains_abnormal_ssa_name_p (tree expr)
1006 {
1007   return walk_tree_without_duplicates
1008            (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1009 }
1010
1011 /*  Returns the structure describing number of iterations determined from
1012     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1013
1014 static class tree_niter_desc *
1015 niter_for_exit (struct ivopts_data *data, edge exit)
1016 {
1017   class tree_niter_desc *desc;
1018   tree_niter_desc **slot;
1019
1020   if (!data->niters)
1021     {
1022       data->niters = new hash_map<edge, tree_niter_desc *>;
1023       slot = NULL;
1024     }
1025   else
1026     slot = data->niters->get (exit);
1027
1028   if (!slot)
1029     {
1030       /* Try to determine number of iterations.  We cannot safely work with ssa
1031          names that appear in phi nodes on abnormal edges, so that we do not
1032          create overlapping life ranges for them (PR 27283).  */
1033       desc = XNEW (class tree_niter_desc);
1034       if (!number_of_iterations_exit (data->current_loop,
1035                                       exit, desc, true)
1036           || contains_abnormal_ssa_name_p (desc->niter))
1037         {
1038           XDELETE (desc);
1039           desc = NULL;
1040         }
1041       data->niters->put (exit, desc);
1042     }
1043   else
1044     desc = *slot;
1045
1046   return desc;
1047 }
1048
1049 /* Returns the structure describing number of iterations determined from
1050    single dominating exit of DATA->current_loop, or NULL if something
1051    goes wrong.  */
1052
1053 static class tree_niter_desc *
1054 niter_for_single_dom_exit (struct ivopts_data *data)
1055 {
1056   edge exit = single_dom_exit (data->current_loop);
1057
1058   if (!exit)
1059     return NULL;
1060
1061   return niter_for_exit (data, exit);
1062 }
1063
1064 /* Initializes data structures used by the iv optimization pass, stored
1065    in DATA.  */
1066
1067 static void
1068 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1069 {
1070   data->version_info_size = 2 * num_ssa_names;
1071   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1072   data->relevant = BITMAP_ALLOC (NULL);
1073   data->important_candidates = BITMAP_ALLOC (NULL);
1074   data->max_inv_var_id = 0;
1075   data->max_inv_expr_id = 0;
1076   data->niters = NULL;
1077   data->vgroups.create (20);
1078   data->vcands.create (20);
1079   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1080   data->name_expansion_cache = NULL;
1081   data->base_object_map = NULL;
1082   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1083   data->iv_common_cands.create (20);
1084   decl_rtl_to_reset.create (20);
1085   gcc_obstack_init (&data->iv_obstack);
1086 }
1087
1088 /* walk_tree callback for determine_base_object.  */
1089
1090 static tree
1091 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1092 {
1093   tree_code code = TREE_CODE (*tp);
1094   tree obj = NULL_TREE;
1095   if (code == ADDR_EXPR)
1096     {
1097       tree base = get_base_address (TREE_OPERAND (*tp, 0));
1098       if (!base)
1099         obj = *tp;
1100       else if (TREE_CODE (base) != MEM_REF)
1101         obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1102     }
1103   else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1104         obj = fold_convert (ptr_type_node, *tp);
1105
1106   if (!obj)
1107     {
1108       if (!EXPR_P (*tp))
1109         *walk_subtrees = 0;
1110
1111       return NULL_TREE;
1112     }
1113   /* Record special node for multiple base objects and stop.  */
1114   if (*static_cast<tree *> (wdata))
1115     {
1116       *static_cast<tree *> (wdata) = integer_zero_node;
1117       return integer_zero_node;
1118     }
1119   /* Record the base object and continue looking.  */
1120   *static_cast<tree *> (wdata) = obj;
1121   return NULL_TREE;
1122 }
1123
1124 /* Returns a memory object to that EXPR points with caching.  Return NULL if we
1125    are able to determine that it does not point to any such object; specially
1126    return integer_zero_node if EXPR contains multiple base objects.  */
1127
1128 static tree
1129 determine_base_object (struct ivopts_data *data, tree expr)
1130 {
1131   tree *slot, obj = NULL_TREE;
1132   if (data->base_object_map)
1133     {
1134       if ((slot = data->base_object_map->get(expr)) != NULL)
1135         return *slot;
1136     }
1137   else
1138     data->base_object_map = new hash_map<tree, tree>;
1139
1140   (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1141   data->base_object_map->put (expr, obj);
1142   return obj;
1143 }
1144
1145 /* Return true if address expression with non-DECL_P operand appears
1146    in EXPR.  */
1147
1148 static bool
1149 contain_complex_addr_expr (tree expr)
1150 {
1151   bool res = false;
1152
1153   STRIP_NOPS (expr);
1154   switch (TREE_CODE (expr))
1155     {
1156     case POINTER_PLUS_EXPR:
1157     case PLUS_EXPR:
1158     case MINUS_EXPR:
1159       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1160       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1161       break;
1162
1163     case ADDR_EXPR:
1164       return (!DECL_P (TREE_OPERAND (expr, 0)));
1165
1166     default:
1167       return false;
1168     }
1169
1170   return res;
1171 }
1172
1173 /* Allocates an induction variable with given initial value BASE and step STEP
1174    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1175
1176 static struct iv *
1177 alloc_iv (struct ivopts_data *data, tree base, tree step,
1178           bool no_overflow = false)
1179 {
1180   tree expr = base;
1181   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1182                                               sizeof (struct iv));
1183   gcc_assert (step != NULL_TREE);
1184
1185   /* Lower address expression in base except ones with DECL_P as operand.
1186      By doing this:
1187        1) More accurate cost can be computed for address expressions;
1188        2) Duplicate candidates won't be created for bases in different
1189           forms, like &a[0] and &a.  */
1190   STRIP_NOPS (expr);
1191   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1192       || contain_complex_addr_expr (expr))
1193     {
1194       aff_tree comb;
1195       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1196       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1197     }
1198
1199   iv->base = base;
1200   iv->base_object = determine_base_object (data, base);
1201   iv->step = step;
1202   iv->biv_p = false;
1203   iv->nonlin_use = NULL;
1204   iv->ssa_name = NULL_TREE;
1205   if (!no_overflow
1206        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1207                               base, step))
1208     no_overflow = true;
1209   iv->no_overflow = no_overflow;
1210   iv->have_address_use = false;
1211
1212   return iv;
1213 }
1214
1215 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1216    doesn't overflow.  */
1217
1218 static void
1219 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1220         bool no_overflow)
1221 {
1222   struct version_info *info = name_info (data, iv);
1223
1224   gcc_assert (!info->iv);
1225
1226   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1227   info->iv = alloc_iv (data, base, step, no_overflow);
1228   info->iv->ssa_name = iv;
1229 }
1230
1231 /* Finds induction variable declaration for VAR.  */
1232
1233 static struct iv *
1234 get_iv (struct ivopts_data *data, tree var)
1235 {
1236   basic_block bb;
1237   tree type = TREE_TYPE (var);
1238
1239   if (!POINTER_TYPE_P (type)
1240       && !INTEGRAL_TYPE_P (type))
1241     return NULL;
1242
1243   if (!name_info (data, var)->iv)
1244     {
1245       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1246
1247       if (!bb
1248           || !flow_bb_inside_loop_p (data->current_loop, bb))
1249         set_iv (data, var, var, build_int_cst (type, 0), true);
1250     }
1251
1252   return name_info (data, var)->iv;
1253 }
1254
1255 /* Return the first non-invariant ssa var found in EXPR.  */
1256
1257 static tree
1258 extract_single_var_from_expr (tree expr)
1259 {
1260   int i, n;
1261   tree tmp;
1262   enum tree_code code;
1263
1264   if (!expr || is_gimple_min_invariant (expr))
1265     return NULL;
1266
1267   code = TREE_CODE (expr);
1268   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1269     {
1270       n = TREE_OPERAND_LENGTH (expr);
1271       for (i = 0; i < n; i++)
1272         {
1273           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1274
1275           if (tmp)
1276             return tmp;
1277         }
1278     }
1279   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1280 }
1281
1282 /* Finds basic ivs.  */
1283
1284 static bool
1285 find_bivs (struct ivopts_data *data)
1286 {
1287   gphi *phi;
1288   affine_iv iv;
1289   tree step, type, base, stop;
1290   bool found = false;
1291   class loop *loop = data->current_loop;
1292   gphi_iterator psi;
1293
1294   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1295     {
1296       phi = psi.phi ();
1297
1298       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1299         continue;
1300
1301       if (virtual_operand_p (PHI_RESULT (phi)))
1302         continue;
1303
1304       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1305         continue;
1306
1307       if (integer_zerop (iv.step))
1308         continue;
1309
1310       step = iv.step;
1311       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1312       /* Stop expanding iv base at the first ssa var referred by iv step.
1313          Ideally we should stop at any ssa var, because that's expensive
1314          and unusual to happen, we just do it on the first one.
1315
1316          See PR64705 for the rationale.  */
1317       stop = extract_single_var_from_expr (step);
1318       base = expand_simple_operations (base, stop);
1319       if (contains_abnormal_ssa_name_p (base)
1320           || contains_abnormal_ssa_name_p (step))
1321         continue;
1322
1323       type = TREE_TYPE (PHI_RESULT (phi));
1324       base = fold_convert (type, base);
1325       if (step)
1326         {
1327           if (POINTER_TYPE_P (type))
1328             step = convert_to_ptrofftype (step);
1329           else
1330             step = fold_convert (type, step);
1331         }
1332
1333       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1334       found = true;
1335     }
1336
1337   return found;
1338 }
1339
1340 /* Marks basic ivs.  */
1341
1342 static void
1343 mark_bivs (struct ivopts_data *data)
1344 {
1345   gphi *phi;
1346   gimple *def;
1347   tree var;
1348   struct iv *iv, *incr_iv;
1349   class loop *loop = data->current_loop;
1350   basic_block incr_bb;
1351   gphi_iterator psi;
1352
1353   data->bivs_not_used_in_addr = 0;
1354   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1355     {
1356       phi = psi.phi ();
1357
1358       iv = get_iv (data, PHI_RESULT (phi));
1359       if (!iv)
1360         continue;
1361
1362       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1363       def = SSA_NAME_DEF_STMT (var);
1364       /* Don't mark iv peeled from other one as biv.  */
1365       if (def
1366           && gimple_code (def) == GIMPLE_PHI
1367           && gimple_bb (def) == loop->header)
1368         continue;
1369
1370       incr_iv = get_iv (data, var);
1371       if (!incr_iv)
1372         continue;
1373
1374       /* If the increment is in the subloop, ignore it.  */
1375       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1376       if (incr_bb->loop_father != data->current_loop
1377           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1378         continue;
1379
1380       iv->biv_p = true;
1381       incr_iv->biv_p = true;
1382       if (iv->no_overflow)
1383         data->bivs_not_used_in_addr++;
1384       if (incr_iv->no_overflow)
1385         data->bivs_not_used_in_addr++;
1386     }
1387 }
1388
1389 /* Checks whether STMT defines a linear induction variable and stores its
1390    parameters to IV.  */
1391
1392 static bool
1393 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1394 {
1395   tree lhs, stop;
1396   class loop *loop = data->current_loop;
1397
1398   iv->base = NULL_TREE;
1399   iv->step = NULL_TREE;
1400
1401   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1402     return false;
1403
1404   lhs = gimple_assign_lhs (stmt);
1405   if (TREE_CODE (lhs) != SSA_NAME)
1406     return false;
1407
1408   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1409     return false;
1410
1411   /* Stop expanding iv base at the first ssa var referred by iv step.
1412      Ideally we should stop at any ssa var, because that's expensive
1413      and unusual to happen, we just do it on the first one.
1414
1415      See PR64705 for the rationale.  */
1416   stop = extract_single_var_from_expr (iv->step);
1417   iv->base = expand_simple_operations (iv->base, stop);
1418   if (contains_abnormal_ssa_name_p (iv->base)
1419       || contains_abnormal_ssa_name_p (iv->step))
1420     return false;
1421
1422   /* If STMT could throw, then do not consider STMT as defining a GIV.
1423      While this will suppress optimizations, we cannot safely delete this
1424      GIV and associated statements, even if it appears it is not used.  */
1425   if (stmt_could_throw_p (cfun, stmt))
1426     return false;
1427
1428   return true;
1429 }
1430
1431 /* Finds general ivs in statement STMT.  */
1432
1433 static void
1434 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1435 {
1436   affine_iv iv;
1437
1438   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1439     return;
1440
1441   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1442 }
1443
1444 /* Finds general ivs in basic block BB.  */
1445
1446 static void
1447 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1448 {
1449   gimple_stmt_iterator bsi;
1450
1451   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1452     find_givs_in_stmt (data, gsi_stmt (bsi));
1453 }
1454
1455 /* Finds general ivs.  */
1456
1457 static void
1458 find_givs (struct ivopts_data *data)
1459 {
1460   class loop *loop = data->current_loop;
1461   basic_block *body = get_loop_body_in_dom_order (loop);
1462   unsigned i;
1463
1464   for (i = 0; i < loop->num_nodes; i++)
1465     find_givs_in_bb (data, body[i]);
1466   free (body);
1467 }
1468
1469 /* For each ssa name defined in LOOP determines whether it is an induction
1470    variable and if so, its initial value and step.  */
1471
1472 static bool
1473 find_induction_variables (struct ivopts_data *data)
1474 {
1475   unsigned i;
1476   bitmap_iterator bi;
1477
1478   if (!find_bivs (data))
1479     return false;
1480
1481   find_givs (data);
1482   mark_bivs (data);
1483
1484   if (dump_file && (dump_flags & TDF_DETAILS))
1485     {
1486       class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1487
1488       if (niter)
1489         {
1490           fprintf (dump_file, "  number of iterations ");
1491           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1492           if (!integer_zerop (niter->may_be_zero))
1493             {
1494               fprintf (dump_file, "; zero if ");
1495               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1496             }
1497           fprintf (dump_file, "\n");
1498         };
1499
1500       fprintf (dump_file, "\n<Induction Vars>:\n");
1501       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1502         {
1503           struct version_info *info = ver_info (data, i);
1504           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1505             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1506         }
1507     }
1508
1509   return true;
1510 }
1511
1512 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1513    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1514    is the const offset stripped from IV base and MEM_TYPE is the type
1515    of the memory being addressed.  For uses of other types, ADDR_BASE
1516    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1517
1518 static struct iv_use *
1519 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1520             gimple *stmt, enum use_type type, tree mem_type,
1521             tree addr_base, poly_uint64 addr_offset)
1522 {
1523   struct iv_use *use = XCNEW (struct iv_use);
1524
1525   use->id = group->vuses.length ();
1526   use->group_id = group->id;
1527   use->type = type;
1528   use->mem_type = mem_type;
1529   use->iv = iv;
1530   use->stmt = stmt;
1531   use->op_p = use_p;
1532   use->addr_base = addr_base;
1533   use->addr_offset = addr_offset;
1534
1535   group->vuses.safe_push (use);
1536   return use;
1537 }
1538
1539 /* Checks whether OP is a loop-level invariant and if so, records it.
1540    NONLINEAR_USE is true if the invariant is used in a way we do not
1541    handle specially.  */
1542
1543 static void
1544 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1545 {
1546   basic_block bb;
1547   struct version_info *info;
1548
1549   if (TREE_CODE (op) != SSA_NAME
1550       || virtual_operand_p (op))
1551     return;
1552
1553   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1554   if (bb
1555       && flow_bb_inside_loop_p (data->current_loop, bb))
1556     return;
1557
1558   info = name_info (data, op);
1559   info->name = op;
1560   info->has_nonlin_use |= nonlinear_use;
1561   if (!info->inv_id)
1562     info->inv_id = ++data->max_inv_var_id;
1563   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1564 }
1565
1566 /* Record a group of TYPE.  */
1567
1568 static struct iv_group *
1569 record_group (struct ivopts_data *data, enum use_type type)
1570 {
1571   struct iv_group *group = XCNEW (struct iv_group);
1572
1573   group->id = data->vgroups.length ();
1574   group->type = type;
1575   group->related_cands = BITMAP_ALLOC (NULL);
1576   group->vuses.create (1);
1577   group->doloop_p = false;
1578
1579   data->vgroups.safe_push (group);
1580   return group;
1581 }
1582
1583 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1584    New group will be created if there is no existing group for the use.
1585    MEM_TYPE is the type of memory being addressed, or NULL if this
1586    isn't an address reference.  */
1587
1588 static struct iv_use *
1589 record_group_use (struct ivopts_data *data, tree *use_p,
1590                   struct iv *iv, gimple *stmt, enum use_type type,
1591                   tree mem_type)
1592 {
1593   tree addr_base = NULL;
1594   struct iv_group *group = NULL;
1595   poly_uint64 addr_offset = 0;
1596
1597   /* Record non address type use in a new group.  */
1598   if (address_p (type))
1599     {
1600       unsigned int i;
1601
1602       addr_base = strip_offset (iv->base, &addr_offset);
1603       for (i = 0; i < data->vgroups.length (); i++)
1604         {
1605           struct iv_use *use;
1606
1607           group = data->vgroups[i];
1608           use = group->vuses[0];
1609           if (!address_p (use->type))
1610             continue;
1611
1612           /* Check if it has the same stripped base and step.  */
1613           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1614               && operand_equal_p (iv->step, use->iv->step, 0)
1615               && operand_equal_p (addr_base, use->addr_base, 0))
1616             break;
1617         }
1618       if (i == data->vgroups.length ())
1619         group = NULL;
1620     }
1621
1622   if (!group)
1623     group = record_group (data, type);
1624
1625   return record_use (group, use_p, iv, stmt, type, mem_type,
1626                      addr_base, addr_offset);
1627 }
1628
1629 /* Checks whether the use OP is interesting and if so, records it.  */
1630
1631 static struct iv_use *
1632 find_interesting_uses_op (struct ivopts_data *data, tree op)
1633 {
1634   struct iv *iv;
1635   gimple *stmt;
1636   struct iv_use *use;
1637
1638   if (TREE_CODE (op) != SSA_NAME)
1639     return NULL;
1640
1641   iv = get_iv (data, op);
1642   if (!iv)
1643     return NULL;
1644
1645   if (iv->nonlin_use)
1646     {
1647       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1648       return iv->nonlin_use;
1649     }
1650
1651   if (integer_zerop (iv->step))
1652     {
1653       record_invariant (data, op, true);
1654       return NULL;
1655     }
1656
1657   stmt = SSA_NAME_DEF_STMT (op);
1658   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1659
1660   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1661   iv->nonlin_use = use;
1662   return use;
1663 }
1664
1665 /* Indicate how compare type iv_use can be handled.  */
1666 enum comp_iv_rewrite
1667 {
1668   COMP_IV_NA,
1669   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1670   COMP_IV_EXPR,
1671   /* We may rewrite compare type iv_uses on both sides of comparison by
1672      expressing value of each iv_use.  */
1673   COMP_IV_EXPR_2,
1674   /* We may rewrite compare type iv_use by expressing value of the iv_use
1675      or by eliminating it with other iv_cand.  */
1676   COMP_IV_ELIM
1677 };
1678
1679 /* Given a condition in statement STMT, checks whether it is a compare
1680    of an induction variable and an invariant.  If this is the case,
1681    CONTROL_VAR is set to location of the iv, BOUND to the location of
1682    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1683    induction variable descriptions, and true is returned.  If this is not
1684    the case, CONTROL_VAR and BOUND are set to the arguments of the
1685    condition and false is returned.  */
1686
1687 static enum comp_iv_rewrite
1688 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1689                        tree **control_var, tree **bound,
1690                        struct iv **iv_var, struct iv **iv_bound)
1691 {
1692   /* The objects returned when COND has constant operands.  */
1693   static struct iv const_iv;
1694   static tree zero;
1695   tree *op0 = &zero, *op1 = &zero;
1696   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1697   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1698
1699   if (gimple_code (stmt) == GIMPLE_COND)
1700     {
1701       gcond *cond_stmt = as_a <gcond *> (stmt);
1702       op0 = gimple_cond_lhs_ptr (cond_stmt);
1703       op1 = gimple_cond_rhs_ptr (cond_stmt);
1704     }
1705   else
1706     {
1707       op0 = gimple_assign_rhs1_ptr (stmt);
1708       op1 = gimple_assign_rhs2_ptr (stmt);
1709     }
1710
1711   zero = integer_zero_node;
1712   const_iv.step = integer_zero_node;
1713
1714   if (TREE_CODE (*op0) == SSA_NAME)
1715     iv0 = get_iv (data, *op0);
1716   if (TREE_CODE (*op1) == SSA_NAME)
1717     iv1 = get_iv (data, *op1);
1718
1719   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1720   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1721     {
1722       rewrite_type = COMP_IV_EXPR_2;
1723       goto end;
1724     }
1725
1726   /* If none side of comparison is IV.  */
1727   if ((!iv0 || integer_zerop (iv0->step))
1728       && (!iv1 || integer_zerop (iv1->step)))
1729     goto end;
1730
1731   /* Control variable may be on the other side.  */
1732   if (!iv0 || integer_zerop (iv0->step))
1733     {
1734       std::swap (op0, op1);
1735       std::swap (iv0, iv1);
1736     }
1737   /* If one side is IV and the other side isn't loop invariant.  */
1738   if (!iv1)
1739     rewrite_type = COMP_IV_EXPR;
1740   /* If one side is IV and the other side is loop invariant.  */
1741   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1742     rewrite_type = COMP_IV_ELIM;
1743
1744 end:
1745   if (control_var)
1746     *control_var = op0;
1747   if (iv_var)
1748     *iv_var = iv0;
1749   if (bound)
1750     *bound = op1;
1751   if (iv_bound)
1752     *iv_bound = iv1;
1753
1754   return rewrite_type;
1755 }
1756
1757 /* Checks whether the condition in STMT is interesting and if so,
1758    records it.  */
1759
1760 static void
1761 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1762 {
1763   tree *var_p, *bound_p;
1764   struct iv *var_iv, *bound_iv;
1765   enum comp_iv_rewrite ret;
1766
1767   ret = extract_cond_operands (data, stmt,
1768                                &var_p, &bound_p, &var_iv, &bound_iv);
1769   if (ret == COMP_IV_NA)
1770     {
1771       find_interesting_uses_op (data, *var_p);
1772       find_interesting_uses_op (data, *bound_p);
1773       return;
1774     }
1775
1776   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1777   /* Record compare type iv_use for iv on the other side of comparison.  */
1778   if (ret == COMP_IV_EXPR_2)
1779     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1780 }
1781
1782 /* Returns the outermost loop EXPR is obviously invariant in
1783    relative to the loop LOOP, i.e. if all its operands are defined
1784    outside of the returned loop.  Returns NULL if EXPR is not
1785    even obviously invariant in LOOP.  */
1786
1787 class loop *
1788 outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1789 {
1790   basic_block def_bb;
1791   unsigned i, len;
1792
1793   if (is_gimple_min_invariant (expr))
1794     return current_loops->tree_root;
1795
1796   if (TREE_CODE (expr) == SSA_NAME)
1797     {
1798       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1799       if (def_bb)
1800         {
1801           if (flow_bb_inside_loop_p (loop, def_bb))
1802             return NULL;
1803           return superloop_at_depth (loop,
1804                                      loop_depth (def_bb->loop_father) + 1);
1805         }
1806
1807       return current_loops->tree_root;
1808     }
1809
1810   if (!EXPR_P (expr))
1811     return NULL;
1812
1813   unsigned maxdepth = 0;
1814   len = TREE_OPERAND_LENGTH (expr);
1815   for (i = 0; i < len; i++)
1816     {
1817       class loop *ivloop;
1818       if (!TREE_OPERAND (expr, i))
1819         continue;
1820
1821       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1822       if (!ivloop)
1823         return NULL;
1824       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1825     }
1826
1827   return superloop_at_depth (loop, maxdepth);
1828 }
1829
1830 /* Returns true if expression EXPR is obviously invariant in LOOP,
1831    i.e. if all its operands are defined outside of the LOOP.  LOOP
1832    should not be the function body.  */
1833
1834 bool
1835 expr_invariant_in_loop_p (class loop *loop, tree expr)
1836 {
1837   basic_block def_bb;
1838   unsigned i, len;
1839
1840   gcc_assert (loop_depth (loop) > 0);
1841
1842   if (is_gimple_min_invariant (expr))
1843     return true;
1844
1845   if (TREE_CODE (expr) == SSA_NAME)
1846     {
1847       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1848       if (def_bb
1849           && flow_bb_inside_loop_p (loop, def_bb))
1850         return false;
1851
1852       return true;
1853     }
1854
1855   if (!EXPR_P (expr))
1856     return false;
1857
1858   len = TREE_OPERAND_LENGTH (expr);
1859   for (i = 0; i < len; i++)
1860     if (TREE_OPERAND (expr, i)
1861         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1862       return false;
1863
1864   return true;
1865 }
1866
1867 /* Given expression EXPR which computes inductive values with respect
1868    to loop recorded in DATA, this function returns biv from which EXPR
1869    is derived by tracing definition chains of ssa variables in EXPR.  */
1870
1871 static struct iv*
1872 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1873 {
1874   struct iv *iv;
1875   unsigned i, n;
1876   tree e2, e1;
1877   enum tree_code code;
1878   gimple *stmt;
1879
1880   if (expr == NULL_TREE)
1881     return NULL;
1882
1883   if (is_gimple_min_invariant (expr))
1884     return NULL;
1885
1886   code = TREE_CODE (expr);
1887   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1888     {
1889       n = TREE_OPERAND_LENGTH (expr);
1890       for (i = 0; i < n; i++)
1891         {
1892           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1893           if (iv)
1894             return iv;
1895         }
1896     }
1897
1898   /* Stop if it's not ssa name.  */
1899   if (code != SSA_NAME)
1900     return NULL;
1901
1902   iv = get_iv (data, expr);
1903   if (!iv || integer_zerop (iv->step))
1904     return NULL;
1905   else if (iv->biv_p)
1906     return iv;
1907
1908   stmt = SSA_NAME_DEF_STMT (expr);
1909   if (gphi *phi = dyn_cast <gphi *> (stmt))
1910     {
1911       ssa_op_iter iter;
1912       use_operand_p use_p;
1913       basic_block phi_bb = gimple_bb (phi);
1914
1915       /* Skip loop header PHI that doesn't define biv.  */
1916       if (phi_bb->loop_father == data->current_loop)
1917         return NULL;
1918
1919       if (virtual_operand_p (gimple_phi_result (phi)))
1920         return NULL;
1921
1922       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1923         {
1924           tree use = USE_FROM_PTR (use_p);
1925           iv = find_deriving_biv_for_expr (data, use);
1926           if (iv)
1927             return iv;
1928         }
1929       return NULL;
1930     }
1931   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1932     return NULL;
1933
1934   e1 = gimple_assign_rhs1 (stmt);
1935   code = gimple_assign_rhs_code (stmt);
1936   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1937     return find_deriving_biv_for_expr (data, e1);
1938
1939   switch (code)
1940     {
1941     case MULT_EXPR:
1942     case PLUS_EXPR:
1943     case MINUS_EXPR:
1944     case POINTER_PLUS_EXPR:
1945       /* Increments, decrements and multiplications by a constant
1946          are simple.  */
1947       e2 = gimple_assign_rhs2 (stmt);
1948       iv = find_deriving_biv_for_expr (data, e2);
1949       if (iv)
1950         return iv;
1951       gcc_fallthrough ();
1952
1953     CASE_CONVERT:
1954       /* Casts are simple.  */
1955       return find_deriving_biv_for_expr (data, e1);
1956
1957     default:
1958       break;
1959     }
1960
1961   return NULL;
1962 }
1963
1964 /* Record BIV, its predecessor and successor that they are used in
1965    address type uses.  */
1966
1967 static void
1968 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1969 {
1970   unsigned i;
1971   tree type, base_1, base_2;
1972   bitmap_iterator bi;
1973
1974   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1975       || biv->have_address_use || !biv->no_overflow)
1976     return;
1977
1978   type = TREE_TYPE (biv->base);
1979   if (!INTEGRAL_TYPE_P (type))
1980     return;
1981
1982   biv->have_address_use = true;
1983   data->bivs_not_used_in_addr--;
1984   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1985   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1986     {
1987       struct iv *iv = ver_info (data, i)->iv;
1988
1989       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1990           || iv->have_address_use || !iv->no_overflow)
1991         continue;
1992
1993       if (type != TREE_TYPE (iv->base)
1994           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1995         continue;
1996
1997       if (!operand_equal_p (biv->step, iv->step, 0))
1998         continue;
1999
2000       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2001       if (operand_equal_p (base_1, iv->base, 0)
2002           || operand_equal_p (base_2, biv->base, 0))
2003         {
2004           iv->have_address_use = true;
2005           data->bivs_not_used_in_addr--;
2006         }
2007     }
2008 }
2009
2010 /* Cumulates the steps of indices into DATA and replaces their values with the
2011    initial ones.  Returns false when the value of the index cannot be determined.
2012    Callback for for_each_index.  */
2013
2014 struct ifs_ivopts_data
2015 {
2016   struct ivopts_data *ivopts_data;
2017   gimple *stmt;
2018   tree step;
2019 };
2020
2021 static bool
2022 idx_find_step (tree base, tree *idx, void *data)
2023 {
2024   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2025   struct iv *iv;
2026   bool use_overflow_semantics = false;
2027   tree step, iv_base, iv_step, lbound, off;
2028   class loop *loop = dta->ivopts_data->current_loop;
2029
2030   /* If base is a component ref, require that the offset of the reference
2031      be invariant.  */
2032   if (TREE_CODE (base) == COMPONENT_REF)
2033     {
2034       off = component_ref_field_offset (base);
2035       return expr_invariant_in_loop_p (loop, off);
2036     }
2037
2038   /* If base is array, first check whether we will be able to move the
2039      reference out of the loop (in order to take its address in strength
2040      reduction).  In order for this to work we need both lower bound
2041      and step to be loop invariants.  */
2042   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2043     {
2044       /* Moreover, for a range, the size needs to be invariant as well.  */
2045       if (TREE_CODE (base) == ARRAY_RANGE_REF
2046           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2047         return false;
2048
2049       step = array_ref_element_size (base);
2050       lbound = array_ref_low_bound (base);
2051
2052       if (!expr_invariant_in_loop_p (loop, step)
2053           || !expr_invariant_in_loop_p (loop, lbound))
2054         return false;
2055     }
2056
2057   if (TREE_CODE (*idx) != SSA_NAME)
2058     return true;
2059
2060   iv = get_iv (dta->ivopts_data, *idx);
2061   if (!iv)
2062     return false;
2063
2064   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2065           *&x[0], which is not folded and does not trigger the
2066           ARRAY_REF path below.  */
2067   *idx = iv->base;
2068
2069   if (integer_zerop (iv->step))
2070     return true;
2071
2072   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2073     {
2074       step = array_ref_element_size (base);
2075
2076       /* We only handle addresses whose step is an integer constant.  */
2077       if (TREE_CODE (step) != INTEGER_CST)
2078         return false;
2079     }
2080   else
2081     /* The step for pointer arithmetics already is 1 byte.  */
2082     step = size_one_node;
2083
2084   iv_base = iv->base;
2085   iv_step = iv->step;
2086   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2087     use_overflow_semantics = true;
2088
2089   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2090                             sizetype, &iv_base, &iv_step, dta->stmt,
2091                             use_overflow_semantics))
2092     {
2093       /* The index might wrap.  */
2094       return false;
2095     }
2096
2097   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2098   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2099
2100   if (dta->ivopts_data->bivs_not_used_in_addr)
2101     {
2102       if (!iv->biv_p)
2103         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2104
2105       record_biv_for_address_use (dta->ivopts_data, iv);
2106     }
2107   return true;
2108 }
2109
2110 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2111    object is passed to it in DATA.  */
2112
2113 static bool
2114 idx_record_use (tree base, tree *idx,
2115                 void *vdata)
2116 {
2117   struct ivopts_data *data = (struct ivopts_data *) vdata;
2118   find_interesting_uses_op (data, *idx);
2119   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2120     {
2121       find_interesting_uses_op (data, array_ref_element_size (base));
2122       find_interesting_uses_op (data, array_ref_low_bound (base));
2123     }
2124   return true;
2125 }
2126
2127 /* If we can prove that TOP = cst * BOT for some constant cst,
2128    store cst to MUL and return true.  Otherwise return false.
2129    The returned value is always sign-extended, regardless of the
2130    signedness of TOP and BOT.  */
2131
2132 static bool
2133 constant_multiple_of (tree top, tree bot, widest_int *mul)
2134 {
2135   tree mby;
2136   enum tree_code code;
2137   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2138   widest_int res, p0, p1;
2139
2140   STRIP_NOPS (top);
2141   STRIP_NOPS (bot);
2142
2143   if (operand_equal_p (top, bot, 0))
2144     {
2145       *mul = 1;
2146       return true;
2147     }
2148
2149   code = TREE_CODE (top);
2150   switch (code)
2151     {
2152     case MULT_EXPR:
2153       mby = TREE_OPERAND (top, 1);
2154       if (TREE_CODE (mby) != INTEGER_CST)
2155         return false;
2156
2157       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2158         return false;
2159
2160       *mul = wi::sext (res * wi::to_widest (mby), precision);
2161       return true;
2162
2163     case PLUS_EXPR:
2164     case MINUS_EXPR:
2165       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2166           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2167         return false;
2168
2169       if (code == MINUS_EXPR)
2170         p1 = -p1;
2171       *mul = wi::sext (p0 + p1, precision);
2172       return true;
2173
2174     case INTEGER_CST:
2175       if (TREE_CODE (bot) != INTEGER_CST)
2176         return false;
2177
2178       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2179       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2180       if (p1 == 0)
2181         return false;
2182       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2183       return res == 0;
2184
2185     default:
2186       if (POLY_INT_CST_P (top)
2187           && POLY_INT_CST_P (bot)
2188           && constant_multiple_p (wi::to_poly_widest (top),
2189                                   wi::to_poly_widest (bot), mul))
2190         return true;
2191
2192       return false;
2193     }
2194 }
2195
2196 /* Return true if memory reference REF with step STEP may be unaligned.  */
2197
2198 static bool
2199 may_be_unaligned_p (tree ref, tree step)
2200 {
2201   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2202      thus they are not misaligned.  */
2203   if (TREE_CODE (ref) == TARGET_MEM_REF)
2204     return false;
2205
2206   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2207   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2208     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2209
2210   unsigned HOST_WIDE_INT bitpos;
2211   unsigned int ref_align;
2212   get_object_alignment_1 (ref, &ref_align, &bitpos);
2213   if (ref_align < align
2214       || (bitpos % align) != 0
2215       || (bitpos % BITS_PER_UNIT) != 0)
2216     return true;
2217
2218   unsigned int trailing_zeros = tree_ctz (step);
2219   if (trailing_zeros < HOST_BITS_PER_INT
2220       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2221     return true;
2222
2223   return false;
2224 }
2225
2226 /* Return true if EXPR may be non-addressable.   */
2227
2228 bool
2229 may_be_nonaddressable_p (tree expr)
2230 {
2231   switch (TREE_CODE (expr))
2232     {
2233     case VAR_DECL:
2234       /* Check if it's a register variable.  */
2235       return DECL_HARD_REGISTER (expr);
2236
2237     case TARGET_MEM_REF:
2238       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2239          target, thus they are always addressable.  */
2240       return false;
2241
2242     case MEM_REF:
2243       /* Likewise for MEM_REFs, modulo the storage order.  */
2244       return REF_REVERSE_STORAGE_ORDER (expr);
2245
2246     case BIT_FIELD_REF:
2247       if (REF_REVERSE_STORAGE_ORDER (expr))
2248         return true;
2249       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2250
2251     case COMPONENT_REF:
2252       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2253         return true;
2254       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2255              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2256
2257     case ARRAY_REF:
2258     case ARRAY_RANGE_REF:
2259       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2260         return true;
2261       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2262
2263     case VIEW_CONVERT_EXPR:
2264       /* This kind of view-conversions may wrap non-addressable objects
2265          and make them look addressable.  After some processing the
2266          non-addressability may be uncovered again, causing ADDR_EXPRs
2267          of inappropriate objects to be built.  */
2268       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2269           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2270         return true;
2271       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2272
2273     CASE_CONVERT:
2274       return true;
2275
2276     default:
2277       break;
2278     }
2279
2280   return false;
2281 }
2282
2283 /* Finds addresses in *OP_P inside STMT.  */
2284
2285 static void
2286 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2287                                tree *op_p)
2288 {
2289   tree base = *op_p, step = size_zero_node;
2290   struct iv *civ;
2291   struct ifs_ivopts_data ifs_ivopts_data;
2292
2293   /* Do not play with volatile memory references.  A bit too conservative,
2294      perhaps, but safe.  */
2295   if (gimple_has_volatile_ops (stmt))
2296     goto fail;
2297
2298   /* Ignore bitfields for now.  Not really something terribly complicated
2299      to handle.  TODO.  */
2300   if (TREE_CODE (base) == BIT_FIELD_REF)
2301     goto fail;
2302
2303   base = unshare_expr (base);
2304
2305   if (TREE_CODE (base) == TARGET_MEM_REF)
2306     {
2307       tree type = build_pointer_type (TREE_TYPE (base));
2308       tree astep;
2309
2310       if (TMR_BASE (base)
2311           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2312         {
2313           civ = get_iv (data, TMR_BASE (base));
2314           if (!civ)
2315             goto fail;
2316
2317           TMR_BASE (base) = civ->base;
2318           step = civ->step;
2319         }
2320       if (TMR_INDEX2 (base)
2321           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2322         {
2323           civ = get_iv (data, TMR_INDEX2 (base));
2324           if (!civ)
2325             goto fail;
2326
2327           TMR_INDEX2 (base) = civ->base;
2328           step = civ->step;
2329         }
2330       if (TMR_INDEX (base)
2331           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2332         {
2333           civ = get_iv (data, TMR_INDEX (base));
2334           if (!civ)
2335             goto fail;
2336
2337           TMR_INDEX (base) = civ->base;
2338           astep = civ->step;
2339
2340           if (astep)
2341             {
2342               if (TMR_STEP (base))
2343                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2344
2345               step = fold_build2 (PLUS_EXPR, type, step, astep);
2346             }
2347         }
2348
2349       if (integer_zerop (step))
2350         goto fail;
2351       base = tree_mem_ref_addr (type, base);
2352     }
2353   else
2354     {
2355       ifs_ivopts_data.ivopts_data = data;
2356       ifs_ivopts_data.stmt = stmt;
2357       ifs_ivopts_data.step = size_zero_node;
2358       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2359           || integer_zerop (ifs_ivopts_data.step))
2360         goto fail;
2361       step = ifs_ivopts_data.step;
2362
2363       /* Check that the base expression is addressable.  This needs
2364          to be done after substituting bases of IVs into it.  */
2365       if (may_be_nonaddressable_p (base))
2366         goto fail;
2367
2368       /* Moreover, on strict alignment platforms, check that it is
2369          sufficiently aligned.  */
2370       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2371         goto fail;
2372
2373       base = build_fold_addr_expr (base);
2374
2375       /* Substituting bases of IVs into the base expression might
2376          have caused folding opportunities.  */
2377       if (TREE_CODE (base) == ADDR_EXPR)
2378         {
2379           tree *ref = &TREE_OPERAND (base, 0);
2380           while (handled_component_p (*ref))
2381             ref = &TREE_OPERAND (*ref, 0);
2382           if (TREE_CODE (*ref) == MEM_REF)
2383             {
2384               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2385                                       TREE_OPERAND (*ref, 0),
2386                                       TREE_OPERAND (*ref, 1));
2387               if (tem)
2388                 *ref = tem;
2389             }
2390         }
2391     }
2392
2393   civ = alloc_iv (data, base, step);
2394   /* Fail if base object of this memory reference is unknown.  */
2395   if (civ->base_object == NULL_TREE)
2396     goto fail;
2397
2398   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2399   return;
2400
2401 fail:
2402   for_each_index (op_p, idx_record_use, data);
2403 }
2404
2405 /* Finds and records invariants used in STMT.  */
2406
2407 static void
2408 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2409 {
2410   ssa_op_iter iter;
2411   use_operand_p use_p;
2412   tree op;
2413
2414   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2415     {
2416       op = USE_FROM_PTR (use_p);
2417       record_invariant (data, op, false);
2418     }
2419 }
2420
2421 /* CALL calls an internal function.  If operand *OP_P will become an
2422    address when the call is expanded, return the type of the memory
2423    being addressed, otherwise return null.  */
2424
2425 static tree
2426 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2427 {
2428   switch (gimple_call_internal_fn (call))
2429     {
2430     case IFN_MASK_LOAD:
2431     case IFN_MASK_LOAD_LANES:
2432       if (op_p == gimple_call_arg_ptr (call, 0))
2433         return TREE_TYPE (gimple_call_lhs (call));
2434       return NULL_TREE;
2435
2436     case IFN_MASK_STORE:
2437     case IFN_MASK_STORE_LANES:
2438       if (op_p == gimple_call_arg_ptr (call, 0))
2439         return TREE_TYPE (gimple_call_arg (call, 3));
2440       return NULL_TREE;
2441
2442     default:
2443       return NULL_TREE;
2444     }
2445 }
2446
2447 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2448    Return true if the operand will become an address when STMT
2449    is expanded and record the associated address use if so.  */
2450
2451 static bool
2452 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2453                        struct iv *iv)
2454 {
2455   /* Fail if base object of this memory reference is unknown.  */
2456   if (iv->base_object == NULL_TREE)
2457     return false;
2458
2459   tree mem_type = NULL_TREE;
2460   if (gcall *call = dyn_cast <gcall *> (stmt))
2461     if (gimple_call_internal_p (call))
2462       mem_type = get_mem_type_for_internal_fn (call, op_p);
2463   if (mem_type)
2464     {
2465       iv = alloc_iv (data, iv->base, iv->step);
2466       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2467       return true;
2468     }
2469   return false;
2470 }
2471
2472 /* Finds interesting uses of induction variables in the statement STMT.  */
2473
2474 static void
2475 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2476 {
2477   struct iv *iv;
2478   tree op, *lhs, *rhs;
2479   ssa_op_iter iter;
2480   use_operand_p use_p;
2481   enum tree_code code;
2482
2483   find_invariants_stmt (data, stmt);
2484
2485   if (gimple_code (stmt) == GIMPLE_COND)
2486     {
2487       find_interesting_uses_cond (data, stmt);
2488       return;
2489     }
2490
2491   if (is_gimple_assign (stmt))
2492     {
2493       lhs = gimple_assign_lhs_ptr (stmt);
2494       rhs = gimple_assign_rhs1_ptr (stmt);
2495
2496       if (TREE_CODE (*lhs) == SSA_NAME)
2497         {
2498           /* If the statement defines an induction variable, the uses are not
2499              interesting by themselves.  */
2500
2501           iv = get_iv (data, *lhs);
2502
2503           if (iv && !integer_zerop (iv->step))
2504             return;
2505         }
2506
2507       code = gimple_assign_rhs_code (stmt);
2508       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2509           && (REFERENCE_CLASS_P (*rhs)
2510               || is_gimple_val (*rhs)))
2511         {
2512           if (REFERENCE_CLASS_P (*rhs))
2513             find_interesting_uses_address (data, stmt, rhs);
2514           else
2515             find_interesting_uses_op (data, *rhs);
2516
2517           if (REFERENCE_CLASS_P (*lhs))
2518             find_interesting_uses_address (data, stmt, lhs);
2519           return;
2520         }
2521       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2522         {
2523           find_interesting_uses_cond (data, stmt);
2524           return;
2525         }
2526
2527       /* TODO -- we should also handle address uses of type
2528
2529          memory = call (whatever);
2530
2531          and
2532
2533          call (memory).  */
2534     }
2535
2536   if (gimple_code (stmt) == GIMPLE_PHI
2537       && gimple_bb (stmt) == data->current_loop->header)
2538     {
2539       iv = get_iv (data, PHI_RESULT (stmt));
2540
2541       if (iv && !integer_zerop (iv->step))
2542         return;
2543     }
2544
2545   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2546     {
2547       op = USE_FROM_PTR (use_p);
2548
2549       if (TREE_CODE (op) != SSA_NAME)
2550         continue;
2551
2552       iv = get_iv (data, op);
2553       if (!iv)
2554         continue;
2555
2556       if (!find_address_like_use (data, stmt, use_p->use, iv))
2557         find_interesting_uses_op (data, op);
2558     }
2559 }
2560
2561 /* Finds interesting uses of induction variables outside of loops
2562    on loop exit edge EXIT.  */
2563
2564 static void
2565 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2566 {
2567   gphi *phi;
2568   gphi_iterator psi;
2569   tree def;
2570
2571   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2572     {
2573       phi = psi.phi ();
2574       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2575       if (!virtual_operand_p (def))
2576         find_interesting_uses_op (data, def);
2577     }
2578 }
2579
2580 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2581    mode for memory reference represented by USE.  */
2582
2583 static GTY (()) vec<rtx, va_gc> *addr_list;
2584
2585 static bool
2586 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2587 {
2588   rtx reg, addr;
2589   unsigned list_index;
2590   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2591   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2592
2593   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2594   if (list_index >= vec_safe_length (addr_list))
2595     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2596
2597   addr = (*addr_list)[list_index];
2598   if (!addr)
2599     {
2600       addr_mode = targetm.addr_space.address_mode (as);
2601       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2602       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2603       (*addr_list)[list_index] = addr;
2604     }
2605   else
2606     addr_mode = GET_MODE (addr);
2607
2608   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2609   return (memory_address_addr_space_p (mem_mode, addr, as));
2610 }
2611
2612 /* Comparison function to sort group in ascending order of addr_offset.  */
2613
2614 static int
2615 group_compare_offset (const void *a, const void *b)
2616 {
2617   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2618   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2619
2620   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2621 }
2622
2623 /* Check if small groups should be split.  Return true if no group
2624    contains more than two uses with distinct addr_offsets.  Return
2625    false otherwise.  We want to split such groups because:
2626
2627      1) Small groups don't have much benefit and may interfer with
2628         general candidate selection.
2629      2) Size for problem with only small groups is usually small and
2630         general algorithm can handle it well.
2631
2632    TODO -- Above claim may not hold when we want to merge memory
2633    accesses with conseuctive addresses.  */
2634
2635 static bool
2636 split_small_address_groups_p (struct ivopts_data *data)
2637 {
2638   unsigned int i, j, distinct = 1;
2639   struct iv_use *pre;
2640   struct iv_group *group;
2641
2642   for (i = 0; i < data->vgroups.length (); i++)
2643     {
2644       group = data->vgroups[i];
2645       if (group->vuses.length () == 1)
2646         continue;
2647
2648       gcc_assert (address_p (group->type));
2649       if (group->vuses.length () == 2)
2650         {
2651           if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2652                                       group->vuses[1]->addr_offset) > 0)
2653             std::swap (group->vuses[0], group->vuses[1]);
2654         }
2655       else
2656         group->vuses.qsort (group_compare_offset);
2657
2658       if (distinct > 2)
2659         continue;
2660
2661       distinct = 1;
2662       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2663         {
2664           if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2665             {
2666               pre = group->vuses[j];
2667               distinct++;
2668             }
2669
2670           if (distinct > 2)
2671             break;
2672         }
2673     }
2674
2675   return (distinct <= 2);
2676 }
2677
2678 /* For each group of address type uses, this function further groups
2679    these uses according to the maximum offset supported by target's
2680    [base + offset] addressing mode.  */
2681
2682 static void
2683 split_address_groups (struct ivopts_data *data)
2684 {
2685   unsigned int i, j;
2686   /* Always split group.  */
2687   bool split_p = split_small_address_groups_p (data);
2688
2689   for (i = 0; i < data->vgroups.length (); i++)
2690     {
2691       struct iv_group *new_group = NULL;
2692       struct iv_group *group = data->vgroups[i];
2693       struct iv_use *use = group->vuses[0];
2694
2695       use->id = 0;
2696       use->group_id = group->id;
2697       if (group->vuses.length () == 1)
2698         continue;
2699
2700       gcc_assert (address_p (use->type));
2701
2702       for (j = 1; j < group->vuses.length ();)
2703         {
2704           struct iv_use *next = group->vuses[j];
2705           poly_int64 offset = next->addr_offset - use->addr_offset;
2706
2707           /* Split group if aksed to, or the offset against the first
2708              use can't fit in offset part of addressing mode.  IV uses
2709              having the same offset are still kept in one group.  */
2710           if (maybe_ne (offset, 0)
2711               && (split_p || !addr_offset_valid_p (use, offset)))
2712             {
2713               if (!new_group)
2714                 new_group = record_group (data, group->type);
2715               group->vuses.ordered_remove (j);
2716               new_group->vuses.safe_push (next);
2717               continue;
2718             }
2719
2720           next->id = j;
2721           next->group_id = group->id;
2722           j++;
2723         }
2724     }
2725 }
2726
2727 /* Finds uses of the induction variables that are interesting.  */
2728
2729 static void
2730 find_interesting_uses (struct ivopts_data *data)
2731 {
2732   basic_block bb;
2733   gimple_stmt_iterator bsi;
2734   basic_block *body = get_loop_body (data->current_loop);
2735   unsigned i;
2736   edge e;
2737
2738   for (i = 0; i < data->current_loop->num_nodes; i++)
2739     {
2740       edge_iterator ei;
2741       bb = body[i];
2742
2743       FOR_EACH_EDGE (e, ei, bb->succs)
2744         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2745             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2746           find_interesting_uses_outside (data, e);
2747
2748       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2749         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2750       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2751         if (!is_gimple_debug (gsi_stmt (bsi)))
2752           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2753     }
2754   free (body);
2755
2756   split_address_groups (data);
2757
2758   if (dump_file && (dump_flags & TDF_DETAILS))
2759     {
2760       fprintf (dump_file, "\n<IV Groups>:\n");
2761       dump_groups (dump_file, data);
2762       fprintf (dump_file, "\n");
2763     }
2764 }
2765
2766 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2767    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2768    we are at the top-level of the processed address.  */
2769
2770 static tree
2771 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2772                 poly_int64 *offset)
2773 {
2774   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2775   enum tree_code code;
2776   tree type, orig_type = TREE_TYPE (expr);
2777   poly_int64 off0, off1;
2778   HOST_WIDE_INT st;
2779   tree orig_expr = expr;
2780
2781   STRIP_NOPS (expr);
2782
2783   type = TREE_TYPE (expr);
2784   code = TREE_CODE (expr);
2785   *offset = 0;
2786
2787   switch (code)
2788     {
2789     case POINTER_PLUS_EXPR:
2790     case PLUS_EXPR:
2791     case MINUS_EXPR:
2792       op0 = TREE_OPERAND (expr, 0);
2793       op1 = TREE_OPERAND (expr, 1);
2794
2795       op0 = strip_offset_1 (op0, false, false, &off0);
2796       op1 = strip_offset_1 (op1, false, false, &off1);
2797
2798       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2799       if (op0 == TREE_OPERAND (expr, 0)
2800           && op1 == TREE_OPERAND (expr, 1))
2801         return orig_expr;
2802
2803       if (integer_zerop (op1))
2804         expr = op0;
2805       else if (integer_zerop (op0))
2806         {
2807           if (code == MINUS_EXPR)
2808             expr = fold_build1 (NEGATE_EXPR, type, op1);
2809           else
2810             expr = op1;
2811         }
2812       else
2813         expr = fold_build2 (code, type, op0, op1);
2814
2815       return fold_convert (orig_type, expr);
2816
2817     case MULT_EXPR:
2818       op1 = TREE_OPERAND (expr, 1);
2819       if (!cst_and_fits_in_hwi (op1))
2820         return orig_expr;
2821
2822       op0 = TREE_OPERAND (expr, 0);
2823       op0 = strip_offset_1 (op0, false, false, &off0);
2824       if (op0 == TREE_OPERAND (expr, 0))
2825         return orig_expr;
2826
2827       *offset = off0 * int_cst_value (op1);
2828       if (integer_zerop (op0))
2829         expr = op0;
2830       else
2831         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2832
2833       return fold_convert (orig_type, expr);
2834
2835     case ARRAY_REF:
2836     case ARRAY_RANGE_REF:
2837       if (!inside_addr)
2838         return orig_expr;
2839
2840       step = array_ref_element_size (expr);
2841       if (!cst_and_fits_in_hwi (step))
2842         break;
2843
2844       st = int_cst_value (step);
2845       op1 = TREE_OPERAND (expr, 1);
2846       op1 = strip_offset_1 (op1, false, false, &off1);
2847       *offset = off1 * st;
2848
2849       if (top_compref
2850           && integer_zerop (op1))
2851         {
2852           /* Strip the component reference completely.  */
2853           op0 = TREE_OPERAND (expr, 0);
2854           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2855           *offset += off0;
2856           return op0;
2857         }
2858       break;
2859
2860     case COMPONENT_REF:
2861       {
2862         tree field;
2863
2864         if (!inside_addr)
2865           return orig_expr;
2866
2867         tmp = component_ref_field_offset (expr);
2868         field = TREE_OPERAND (expr, 1);
2869         if (top_compref
2870             && cst_and_fits_in_hwi (tmp)
2871             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2872           {
2873             HOST_WIDE_INT boffset, abs_off;
2874
2875             /* Strip the component reference completely.  */
2876             op0 = TREE_OPERAND (expr, 0);
2877             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2878             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2879             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2880             if (boffset < 0)
2881               abs_off = -abs_off;
2882
2883             *offset = off0 + int_cst_value (tmp) + abs_off;
2884             return op0;
2885           }
2886       }
2887       break;
2888
2889     case ADDR_EXPR:
2890       op0 = TREE_OPERAND (expr, 0);
2891       op0 = strip_offset_1 (op0, true, true, &off0);
2892       *offset += off0;
2893
2894       if (op0 == TREE_OPERAND (expr, 0))
2895         return orig_expr;
2896
2897       expr = build_fold_addr_expr (op0);
2898       return fold_convert (orig_type, expr);
2899
2900     case MEM_REF:
2901       /* ???  Offset operand?  */
2902       inside_addr = false;
2903       break;
2904
2905     default:
2906       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2907         return build_int_cst (orig_type, 0);
2908       return orig_expr;
2909     }
2910
2911   /* Default handling of expressions for that we want to recurse into
2912      the first operand.  */
2913   op0 = TREE_OPERAND (expr, 0);
2914   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2915   *offset += off0;
2916
2917   if (op0 == TREE_OPERAND (expr, 0)
2918       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2919     return orig_expr;
2920
2921   expr = copy_node (expr);
2922   TREE_OPERAND (expr, 0) = op0;
2923   if (op1)
2924     TREE_OPERAND (expr, 1) = op1;
2925
2926   /* Inside address, we might strip the top level component references,
2927      thus changing type of the expression.  Handling of ADDR_EXPR
2928      will fix that.  */
2929   expr = fold_convert (orig_type, expr);
2930
2931   return expr;
2932 }
2933
2934 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2935
2936 tree
2937 strip_offset (tree expr, poly_uint64_pod *offset)
2938 {
2939   poly_int64 off;
2940   tree core = strip_offset_1 (expr, false, false, &off);
2941   *offset = off;
2942   return core;
2943 }
2944
2945 /* Returns variant of TYPE that can be used as base for different uses.
2946    We return unsigned type with the same precision, which avoids problems
2947    with overflows.  */
2948
2949 static tree
2950 generic_type_for (tree type)
2951 {
2952   if (POINTER_TYPE_P (type))
2953     return unsigned_type_for (type);
2954
2955   if (TYPE_UNSIGNED (type))
2956     return type;
2957
2958   return unsigned_type_for (type);
2959 }
2960
2961 /* Private data for walk_tree.  */
2962
2963 struct walk_tree_data
2964 {
2965   bitmap *inv_vars;
2966   struct ivopts_data *idata;
2967 };
2968
2969 /* Callback function for walk_tree, it records invariants and symbol
2970    reference in *EXPR_P.  DATA is the structure storing result info.  */
2971
2972 static tree
2973 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2974 {
2975   tree op = *expr_p;
2976   struct version_info *info;
2977   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2978
2979   if (TREE_CODE (op) != SSA_NAME)
2980     return NULL_TREE;
2981
2982   info = name_info (wdata->idata, op);
2983   /* Because we expand simple operations when finding IVs, loop invariant
2984      variable that isn't referred by the original loop could be used now.
2985      Record such invariant variables here.  */
2986   if (!info->iv)
2987     {
2988       struct ivopts_data *idata = wdata->idata;
2989       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2990
2991       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2992         {
2993           set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
2994           record_invariant (idata, op, false);
2995         }
2996     }
2997   if (!info->inv_id || info->has_nonlin_use)
2998     return NULL_TREE;
2999
3000   if (!*wdata->inv_vars)
3001     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3002   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3003
3004   return NULL_TREE;
3005 }
3006
3007 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3008    store it.  */
3009
3010 static inline void
3011 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3012 {
3013   struct walk_tree_data wdata;
3014
3015   if (!inv_vars)
3016     return;
3017
3018   wdata.idata = data;
3019   wdata.inv_vars = inv_vars;
3020   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3021 }
3022
3023 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3024    will be recorded if it doesn't exist yet.  Given below two exprs:
3025      inv_expr + cst1, inv_expr + cst2
3026    It's hard to make decision whether constant part should be stripped
3027    or not.  We choose to not strip based on below facts:
3028      1) We need to count ADD cost for constant part if it's stripped,
3029         which isn't always trivial where this functions is called.
3030      2) Stripping constant away may be conflict with following loop
3031         invariant hoisting pass.
3032      3) Not stripping constant away results in more invariant exprs,
3033         which usually leads to decision preferring lower reg pressure.  */
3034
3035 static iv_inv_expr_ent *
3036 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3037 {
3038   STRIP_NOPS (inv_expr);
3039
3040   if (poly_int_tree_p (inv_expr)
3041       || TREE_CODE (inv_expr) == SSA_NAME)
3042     return NULL;
3043
3044   /* Don't strip constant part away as we used to.  */
3045
3046   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3047   struct iv_inv_expr_ent ent;
3048   ent.expr = inv_expr;
3049   ent.hash = iterative_hash_expr (inv_expr, 0);
3050   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3051
3052   if (!*slot)
3053     {
3054       *slot = XNEW (struct iv_inv_expr_ent);
3055       (*slot)->expr = inv_expr;
3056       (*slot)->hash = ent.hash;
3057       (*slot)->id = ++data->max_inv_expr_id;
3058     }
3059
3060   return *slot;
3061 }
3062
3063 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3064    position to POS.  If USE is not NULL, the candidate is set as related to
3065    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3066    replacement of the final value of the iv by a direct computation.  */
3067
3068 static struct iv_cand *
3069 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3070                  enum iv_position pos, struct iv_use *use,
3071                  gimple *incremented_at, struct iv *orig_iv = NULL,
3072                  bool doloop = false)
3073 {
3074   unsigned i;
3075   struct iv_cand *cand = NULL;
3076   tree type, orig_type;
3077
3078   gcc_assert (base && step);
3079
3080   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3081      live, but the ivopts code may replace a real pointer with one
3082      pointing before or after the memory block that is then adjusted
3083      into the memory block during the loop.  FIXME: It would likely be
3084      better to actually force the pointer live and still use ivopts;
3085      for example, it would be enough to write the pointer into memory
3086      and keep it there until after the loop.  */
3087   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3088     return NULL;
3089
3090   /* For non-original variables, make sure their values are computed in a type
3091      that does not invoke undefined behavior on overflows (since in general,
3092      we cannot prove that these induction variables are non-wrapping).  */
3093   if (pos != IP_ORIGINAL)
3094     {
3095       orig_type = TREE_TYPE (base);
3096       type = generic_type_for (orig_type);
3097       if (type != orig_type)
3098         {
3099           base = fold_convert (type, base);
3100           step = fold_convert (type, step);
3101         }
3102     }
3103
3104   for (i = 0; i < data->vcands.length (); i++)
3105     {
3106       cand = data->vcands[i];
3107
3108       if (cand->pos != pos)
3109         continue;
3110
3111       if (cand->incremented_at != incremented_at
3112           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3113               && cand->ainc_use != use))
3114         continue;
3115
3116       if (operand_equal_p (base, cand->iv->base, 0)
3117           && operand_equal_p (step, cand->iv->step, 0)
3118           && (TYPE_PRECISION (TREE_TYPE (base))
3119               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3120         break;
3121     }
3122
3123   if (i == data->vcands.length ())
3124     {
3125       cand = XCNEW (struct iv_cand);
3126       cand->id = i;
3127       cand->iv = alloc_iv (data, base, step);
3128       cand->pos = pos;
3129       if (pos != IP_ORIGINAL)
3130         {
3131           if (doloop)
3132             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3133           else
3134             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3135           cand->var_after = cand->var_before;
3136         }
3137       cand->important = important;
3138       cand->incremented_at = incremented_at;
3139       cand->doloop_p = doloop;
3140       data->vcands.safe_push (cand);
3141
3142       if (!poly_int_tree_p (step))
3143         {
3144           find_inv_vars (data, &step, &cand->inv_vars);
3145
3146           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3147           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3148           if (inv_expr != NULL)
3149             {
3150               cand->inv_exprs = cand->inv_vars;
3151               cand->inv_vars = NULL;
3152               if (cand->inv_exprs)
3153                 bitmap_clear (cand->inv_exprs);
3154               else
3155                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3156
3157               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3158             }
3159         }
3160
3161       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3162         cand->ainc_use = use;
3163       else
3164         cand->ainc_use = NULL;
3165
3166       cand->orig_iv = orig_iv;
3167       if (dump_file && (dump_flags & TDF_DETAILS))
3168         dump_cand (dump_file, cand);
3169     }
3170
3171   cand->important |= important;
3172   cand->doloop_p |= doloop;
3173
3174   /* Relate candidate to the group for which it is added.  */
3175   if (use)
3176     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3177
3178   return cand;
3179 }
3180
3181 /* Returns true if incrementing the induction variable at the end of the LOOP
3182    is allowed.
3183
3184    The purpose is to avoid splitting latch edge with a biv increment, thus
3185    creating a jump, possibly confusing other optimization passes and leaving
3186    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3187    available (so we do not have a better alternative), or if the latch edge
3188    is already nonempty.  */
3189
3190 static bool
3191 allow_ip_end_pos_p (class loop *loop)
3192 {
3193   if (!ip_normal_pos (loop))
3194     return true;
3195
3196   if (!empty_block_p (ip_end_pos (loop)))
3197     return true;
3198
3199   return false;
3200 }
3201
3202 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3203    Important field is set to IMPORTANT.  */
3204
3205 static void
3206 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3207                         bool important, struct iv_use *use)
3208 {
3209   basic_block use_bb = gimple_bb (use->stmt);
3210   machine_mode mem_mode;
3211   unsigned HOST_WIDE_INT cstepi;
3212
3213   /* If we insert the increment in any position other than the standard
3214      ones, we must ensure that it is incremented once per iteration.
3215      It must not be in an inner nested loop, or one side of an if
3216      statement.  */
3217   if (use_bb->loop_father != data->current_loop
3218       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3219       || stmt_can_throw_internal (cfun, use->stmt)
3220       || !cst_and_fits_in_hwi (step))
3221     return;
3222
3223   cstepi = int_cst_value (step);
3224
3225   mem_mode = TYPE_MODE (use->mem_type);
3226   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3227         || USE_STORE_PRE_INCREMENT (mem_mode))
3228        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3229       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3230            || USE_STORE_PRE_DECREMENT (mem_mode))
3231           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3232     {
3233       enum tree_code code = MINUS_EXPR;
3234       tree new_base;
3235       tree new_step = step;
3236
3237       if (POINTER_TYPE_P (TREE_TYPE (base)))
3238         {
3239           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3240           code = POINTER_PLUS_EXPR;
3241         }
3242       else
3243         new_step = fold_convert (TREE_TYPE (base), new_step);
3244       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3245       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3246                        use->stmt);
3247     }
3248   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3249         || USE_STORE_POST_INCREMENT (mem_mode))
3250        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3251       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3252            || USE_STORE_POST_DECREMENT (mem_mode))
3253           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3254     {
3255       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3256                        use->stmt);
3257     }
3258 }
3259
3260 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3261    position to POS.  If USE is not NULL, the candidate is set as related to
3262    it.  The candidate computation is scheduled before exit condition and at
3263    the end of loop.  */
3264
3265 static void
3266 add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3267                struct iv_use *use, struct iv *orig_iv = NULL,
3268                bool doloop = false)
3269 {
3270   if (ip_normal_pos (data->current_loop))
3271     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3272                      doloop);
3273   /* Exclude doloop candidate here since it requires decrement then comparison
3274      and jump, the IP_END position doesn't match.  */
3275   if (!doloop && ip_end_pos (data->current_loop)
3276       && allow_ip_end_pos_p (data->current_loop))
3277     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3278 }
3279
3280 /* Adds standard iv candidates.  */
3281
3282 static void
3283 add_standard_iv_candidates (struct ivopts_data *data)
3284 {
3285   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3286
3287   /* The same for a double-integer type if it is still fast enough.  */
3288   if (TYPE_PRECISION
3289         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3290       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3291     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3292                    build_int_cst (long_integer_type_node, 1), true, NULL);
3293
3294   /* The same for a double-integer type if it is still fast enough.  */
3295   if (TYPE_PRECISION
3296         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3297       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3298     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3299                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3300 }
3301
3302
3303 /* Adds candidates bases on the old induction variable IV.  */
3304
3305 static void
3306 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3307 {
3308   gimple *phi;
3309   tree def;
3310   struct iv_cand *cand;
3311
3312   /* Check if this biv is used in address type use.  */
3313   if (iv->no_overflow  && iv->have_address_use
3314       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3315       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3316     {
3317       tree base = fold_convert (sizetype, iv->base);
3318       tree step = fold_convert (sizetype, iv->step);
3319
3320       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3321       add_candidate (data, base, step, true, NULL, iv);
3322       /* Add iv cand of the original type only if it has nonlinear use.  */
3323       if (iv->nonlin_use)
3324         add_candidate (data, iv->base, iv->step, true, NULL);
3325     }
3326   else
3327     add_candidate (data, iv->base, iv->step, true, NULL);
3328
3329   /* The same, but with initial value zero.  */
3330   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3331     add_candidate (data, size_int (0), iv->step, true, NULL);
3332   else
3333     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3334                    iv->step, true, NULL);
3335
3336   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3337   if (gimple_code (phi) == GIMPLE_PHI)
3338     {
3339       /* Additionally record the possibility of leaving the original iv
3340          untouched.  */
3341       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3342       /* Don't add candidate if it's from another PHI node because
3343          it's an affine iv appearing in the form of PEELED_CHREC.  */
3344       phi = SSA_NAME_DEF_STMT (def);
3345       if (gimple_code (phi) != GIMPLE_PHI)
3346         {
3347           cand = add_candidate_1 (data,
3348                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3349                                   SSA_NAME_DEF_STMT (def));
3350           if (cand)
3351             {
3352               cand->var_before = iv->ssa_name;
3353               cand->var_after = def;
3354             }
3355         }
3356       else
3357         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3358     }
3359 }
3360
3361 /* Adds candidates based on the old induction variables.  */
3362
3363 static void
3364 add_iv_candidate_for_bivs (struct ivopts_data *data)
3365 {
3366   unsigned i;
3367   struct iv *iv;
3368   bitmap_iterator bi;
3369
3370   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3371     {
3372       iv = ver_info (data, i)->iv;
3373       if (iv && iv->biv_p && !integer_zerop (iv->step))
3374         add_iv_candidate_for_biv (data, iv);
3375     }
3376 }
3377
3378 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3379
3380 static void
3381 record_common_cand (struct ivopts_data *data, tree base,
3382                     tree step, struct iv_use *use)
3383 {
3384   class iv_common_cand ent;
3385   class iv_common_cand **slot;
3386
3387   ent.base = base;
3388   ent.step = step;
3389   ent.hash = iterative_hash_expr (base, 0);
3390   ent.hash = iterative_hash_expr (step, ent.hash);
3391
3392   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3393   if (*slot == NULL)
3394     {
3395       *slot = new iv_common_cand ();
3396       (*slot)->base = base;
3397       (*slot)->step = step;
3398       (*slot)->uses.create (8);
3399       (*slot)->hash = ent.hash;
3400       data->iv_common_cands.safe_push ((*slot));
3401     }
3402
3403   gcc_assert (use != NULL);
3404   (*slot)->uses.safe_push (use);
3405   return;
3406 }
3407
3408 /* Comparison function used to sort common candidates.  */
3409
3410 static int
3411 common_cand_cmp (const void *p1, const void *p2)
3412 {
3413   unsigned n1, n2;
3414   const class iv_common_cand *const *const ccand1
3415     = (const class iv_common_cand *const *)p1;
3416   const class iv_common_cand *const *const ccand2
3417     = (const class iv_common_cand *const *)p2;
3418
3419   n1 = (*ccand1)->uses.length ();
3420   n2 = (*ccand2)->uses.length ();
3421   return n2 - n1;
3422 }
3423
3424 /* Adds IV candidates based on common candidated recorded.  */
3425
3426 static void
3427 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3428 {
3429   unsigned i, j;
3430   struct iv_cand *cand_1, *cand_2;
3431
3432   data->iv_common_cands.qsort (common_cand_cmp);
3433   for (i = 0; i < data->iv_common_cands.length (); i++)
3434     {
3435       class iv_common_cand *ptr = data->iv_common_cands[i];
3436
3437       /* Only add IV candidate if it's derived from multiple uses.  */
3438       if (ptr->uses.length () <= 1)
3439         break;
3440
3441       cand_1 = NULL;
3442       cand_2 = NULL;
3443       if (ip_normal_pos (data->current_loop))
3444         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3445                                   false, IP_NORMAL, NULL, NULL);
3446
3447       if (ip_end_pos (data->current_loop)
3448           && allow_ip_end_pos_p (data->current_loop))
3449         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3450                                   false, IP_END, NULL, NULL);
3451
3452       /* Bind deriving uses and the new candidates.  */
3453       for (j = 0; j < ptr->uses.length (); j++)
3454         {
3455           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3456           if (cand_1)
3457             bitmap_set_bit (group->related_cands, cand_1->id);
3458           if (cand_2)
3459             bitmap_set_bit (group->related_cands, cand_2->id);
3460         }
3461     }
3462
3463   /* Release data since it is useless from this point.  */
3464   data->iv_common_cand_tab->empty ();
3465   data->iv_common_cands.truncate (0);
3466 }
3467
3468 /* Adds candidates based on the value of USE's iv.  */
3469
3470 static void
3471 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3472 {
3473   poly_uint64 offset;
3474   tree base;
3475   tree basetype;
3476   struct iv *iv = use->iv;
3477
3478   add_candidate (data, iv->base, iv->step, false, use);
3479
3480   /* Record common candidate for use in case it can be shared by others.  */
3481   record_common_cand (data, iv->base, iv->step, use);
3482
3483   /* Record common candidate with initial value zero.  */
3484   basetype = TREE_TYPE (iv->base);
3485   if (POINTER_TYPE_P (basetype))
3486     basetype = sizetype;
3487   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3488
3489   /* Compare the cost of an address with an unscaled index with the cost of
3490     an address with a scaled index and add candidate if useful.  */
3491   poly_int64 step;
3492   if (use != NULL
3493       && poly_int_tree_p (iv->step, &step)
3494       && address_p (use->type))
3495     {
3496       poly_int64 new_step;
3497       unsigned int fact = preferred_mem_scale_factor
3498         (use->iv->base,
3499          TYPE_MODE (use->mem_type),
3500          optimize_loop_for_speed_p (data->current_loop));
3501
3502       if (fact != 1
3503           && multiple_p (step, fact, &new_step))
3504         add_candidate (data, size_int (0),
3505                        wide_int_to_tree (sizetype, new_step),
3506                        true, NULL);
3507     }
3508
3509   /* Record common candidate with constant offset stripped in base.
3510      Like the use itself, we also add candidate directly for it.  */
3511   base = strip_offset (iv->base, &offset);
3512   if (maybe_ne (offset, 0U) || base != iv->base)
3513     {
3514       record_common_cand (data, base, iv->step, use);
3515       add_candidate (data, base, iv->step, false, use);
3516     }
3517
3518   /* Record common candidate with base_object removed in base.  */
3519   base = iv->base;
3520   STRIP_NOPS (base);
3521   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3522     {
3523       tree step = iv->step;
3524
3525       STRIP_NOPS (step);
3526       base = TREE_OPERAND (base, 1);
3527       step = fold_convert (sizetype, step);
3528       record_common_cand (data, base, step, use);
3529       /* Also record common candidate with offset stripped.  */
3530       base = strip_offset (base, &offset);
3531       if (maybe_ne (offset, 0U))
3532         record_common_cand (data, base, step, use);
3533     }
3534
3535   /* At last, add auto-incremental candidates.  Make such variables
3536      important since other iv uses with same base object may be based
3537      on it.  */
3538   if (use != NULL && address_p (use->type))
3539     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3540 }
3541
3542 /* Adds candidates based on the uses.  */
3543
3544 static void
3545 add_iv_candidate_for_groups (struct ivopts_data *data)
3546 {
3547   unsigned i;
3548
3549   /* Only add candidate for the first use in group.  */
3550   for (i = 0; i < data->vgroups.length (); i++)
3551     {
3552       struct iv_group *group = data->vgroups[i];
3553
3554       gcc_assert (group->vuses[0] != NULL);
3555       add_iv_candidate_for_use (data, group->vuses[0]);
3556     }
3557   add_iv_candidate_derived_from_uses (data);
3558 }
3559
3560 /* Record important candidates and add them to related_cands bitmaps.  */
3561
3562 static void
3563 record_important_candidates (struct ivopts_data *data)
3564 {
3565   unsigned i;
3566   struct iv_group *group;
3567
3568   for (i = 0; i < data->vcands.length (); i++)
3569     {
3570       struct iv_cand *cand = data->vcands[i];
3571
3572       if (cand->important)
3573         bitmap_set_bit (data->important_candidates, i);
3574     }
3575
3576   data->consider_all_candidates = (data->vcands.length ()
3577                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3578
3579   /* Add important candidates to groups' related_cands bitmaps.  */
3580   for (i = 0; i < data->vgroups.length (); i++)
3581     {
3582       group = data->vgroups[i];
3583       bitmap_ior_into (group->related_cands, data->important_candidates);
3584     }
3585 }
3586
3587 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3588    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3589    we allocate a simple list to every use.  */
3590
3591 static void
3592 alloc_use_cost_map (struct ivopts_data *data)
3593 {
3594   unsigned i, size, s;
3595
3596   for (i = 0; i < data->vgroups.length (); i++)
3597     {
3598       struct iv_group *group = data->vgroups[i];
3599
3600       if (data->consider_all_candidates)
3601         size = data->vcands.length ();
3602       else
3603         {
3604           s = bitmap_count_bits (group->related_cands);
3605
3606           /* Round up to the power of two, so that moduling by it is fast.  */
3607           size = s ? (1 << ceil_log2 (s)) : 1;
3608         }
3609
3610       group->n_map_members = size;
3611       group->cost_map = XCNEWVEC (class cost_pair, size);
3612     }
3613 }
3614
3615 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3616    on invariants INV_VARS and that the value used in expressing it is
3617    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3618
3619 static void
3620 set_group_iv_cost (struct ivopts_data *data,
3621                    struct iv_group *group, struct iv_cand *cand,
3622                    comp_cost cost, bitmap inv_vars, tree value,
3623                    enum tree_code comp, bitmap inv_exprs)
3624 {
3625   unsigned i, s;
3626
3627   if (cost.infinite_cost_p ())
3628     {
3629       BITMAP_FREE (inv_vars);
3630       BITMAP_FREE (inv_exprs);
3631       return;
3632     }
3633
3634   if (data->consider_all_candidates)
3635     {
3636       group->cost_map[cand->id].cand = cand;
3637       group->cost_map[cand->id].cost = cost;
3638       group->cost_map[cand->id].inv_vars = inv_vars;
3639       group->cost_map[cand->id].inv_exprs = inv_exprs;
3640       group->cost_map[cand->id].value = value;
3641       group->cost_map[cand->id].comp = comp;
3642       return;
3643     }
3644
3645   /* n_map_members is a power of two, so this computes modulo.  */
3646   s = cand->id & (group->n_map_members - 1);
3647   for (i = s; i < group->n_map_members; i++)
3648     if (!group->cost_map[i].cand)
3649       goto found;
3650   for (i = 0; i < s; i++)
3651     if (!group->cost_map[i].cand)
3652       goto found;
3653
3654   gcc_unreachable ();
3655
3656 found:
3657   group->cost_map[i].cand = cand;
3658   group->cost_map[i].cost = cost;
3659   group->cost_map[i].inv_vars = inv_vars;
3660   group->cost_map[i].inv_exprs = inv_exprs;
3661   group->cost_map[i].value = value;
3662   group->cost_map[i].comp = comp;
3663 }
3664
3665 /* Gets cost of (GROUP, CAND) pair.  */
3666
3667 static class cost_pair *
3668 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3669                    struct iv_cand *cand)
3670 {
3671   unsigned i, s;
3672   class cost_pair *ret;
3673
3674   if (!cand)
3675     return NULL;
3676
3677   if (data->consider_all_candidates)
3678     {
3679       ret = group->cost_map + cand->id;
3680       if (!ret->cand)
3681         return NULL;
3682
3683       return ret;
3684     }
3685
3686   /* n_map_members is a power of two, so this computes modulo.  */
3687   s = cand->id & (group->n_map_members - 1);
3688   for (i = s; i < group->n_map_members; i++)
3689     if (group->cost_map[i].cand == cand)
3690       return group->cost_map + i;
3691     else if (group->cost_map[i].cand == NULL)
3692       return NULL;
3693   for (i = 0; i < s; i++)
3694     if (group->cost_map[i].cand == cand)
3695       return group->cost_map + i;
3696     else if (group->cost_map[i].cand == NULL)
3697       return NULL;
3698
3699   return NULL;
3700 }
3701
3702 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3703 static rtx
3704 produce_memory_decl_rtl (tree obj, int *regno)
3705 {
3706   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3707   machine_mode address_mode = targetm.addr_space.address_mode (as);
3708   rtx x;
3709
3710   gcc_assert (obj);
3711   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3712     {
3713       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3714       x = gen_rtx_SYMBOL_REF (address_mode, name);
3715       SET_SYMBOL_REF_DECL (x, obj);
3716       x = gen_rtx_MEM (DECL_MODE (obj), x);
3717       set_mem_addr_space (x, as);
3718       targetm.encode_section_info (obj, x, true);
3719     }
3720   else
3721     {
3722       x = gen_raw_REG (address_mode, (*regno)++);
3723       x = gen_rtx_MEM (DECL_MODE (obj), x);
3724       set_mem_addr_space (x, as);
3725     }
3726
3727   return x;
3728 }
3729
3730 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3731    walk_tree.  DATA contains the actual fake register number.  */
3732
3733 static tree
3734 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3735 {
3736   tree obj = NULL_TREE;
3737   rtx x = NULL_RTX;
3738   int *regno = (int *) data;
3739
3740   switch (TREE_CODE (*expr_p))
3741     {
3742     case ADDR_EXPR:
3743       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3744            handled_component_p (*expr_p);
3745            expr_p = &TREE_OPERAND (*expr_p, 0))
3746         continue;
3747       obj = *expr_p;
3748       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3749         x = produce_memory_decl_rtl (obj, regno);
3750       break;
3751
3752     case SSA_NAME:
3753       *ws = 0;
3754       obj = SSA_NAME_VAR (*expr_p);
3755       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3756       if (!obj)
3757         return NULL_TREE;
3758       if (!DECL_RTL_SET_P (obj))
3759         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3760       break;
3761
3762     case VAR_DECL:
3763     case PARM_DECL:
3764     case RESULT_DECL:
3765       *ws = 0;
3766       obj = *expr_p;
3767
3768       if (DECL_RTL_SET_P (obj))
3769         break;
3770
3771       if (DECL_MODE (obj) == BLKmode)
3772         x = produce_memory_decl_rtl (obj, regno);
3773       else
3774         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3775
3776       break;
3777
3778     default:
3779       break;
3780     }
3781
3782   if (x)
3783     {
3784       decl_rtl_to_reset.safe_push (obj);
3785       SET_DECL_RTL (obj, x);
3786     }
3787
3788   return NULL_TREE;
3789 }
3790
3791 /* Predict whether the given loop will be transformed in the RTL
3792    doloop_optimize pass.  Attempt to duplicate some doloop_optimize checks.
3793    This is only for target independent checks, see targetm.predict_doloop_p
3794    for the target dependent ones.
3795
3796    Note that according to some initial investigation, some checks like costly
3797    niter check and invalid stmt scanning don't have much gains among general
3798    cases, so keep this as simple as possible first.
3799
3800    Some RTL specific checks seems unable to be checked in gimple, if any new
3801    checks or easy checks _are_ missing here, please add them.  */
3802
3803 static bool
3804 generic_predict_doloop_p (struct ivopts_data *data)
3805 {
3806   class loop *loop = data->current_loop;
3807
3808   /* Call target hook for target dependent checks.  */
3809   if (!targetm.predict_doloop_p (loop))
3810     {
3811       if (dump_file && (dump_flags & TDF_DETAILS))
3812         fprintf (dump_file, "Predict doloop failure due to"
3813                             " target specific checks.\n");
3814       return false;
3815     }
3816
3817   /* Similar to doloop_optimize, check iteration description to know it's
3818      suitable or not.  Keep it as simple as possible, feel free to extend it
3819      if you find any multiple exits cases matter.  */
3820   edge exit = single_dom_exit (loop);
3821   class tree_niter_desc *niter_desc;
3822   if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3823     {
3824       if (dump_file && (dump_flags & TDF_DETAILS))
3825         fprintf (dump_file, "Predict doloop failure due to"
3826                             " unexpected niters.\n");
3827       return false;
3828     }
3829
3830   /* Similar to doloop_optimize, check whether iteration count too small
3831      and not profitable.  */
3832   HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3833   if (est_niter == -1)
3834     est_niter = get_likely_max_loop_iterations_int (loop);
3835   if (est_niter >= 0 && est_niter < 3)
3836     {
3837       if (dump_file && (dump_flags & TDF_DETAILS))
3838         fprintf (dump_file,
3839                  "Predict doloop failure due to"
3840                  " too few iterations (%u).\n",
3841                  (unsigned int) est_niter);
3842       return false;
3843     }
3844
3845   return true;
3846 }
3847
3848 /* Determines cost of the computation of EXPR.  */
3849
3850 static unsigned
3851 computation_cost (tree expr, bool speed)
3852 {
3853   rtx_insn *seq;
3854   rtx rslt;
3855   tree type = TREE_TYPE (expr);
3856   unsigned cost;
3857   /* Avoid using hard regs in ways which may be unsupported.  */
3858   int regno = LAST_VIRTUAL_REGISTER + 1;
3859   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3860   enum node_frequency real_frequency = node->frequency;
3861
3862   node->frequency = NODE_FREQUENCY_NORMAL;
3863   crtl->maybe_hot_insn_p = speed;
3864   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3865   start_sequence ();
3866   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3867   seq = get_insns ();
3868   end_sequence ();
3869   default_rtl_profile ();
3870   node->frequency = real_frequency;
3871
3872   cost = seq_cost (seq, speed);
3873   if (MEM_P (rslt))
3874     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3875                           TYPE_ADDR_SPACE (type), speed);
3876   else if (!REG_P (rslt))
3877     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3878
3879   return cost;
3880 }
3881
3882 /* Returns variable containing the value of candidate CAND at statement AT.  */
3883
3884 static tree
3885 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3886 {
3887   if (stmt_after_increment (loop, cand, stmt))
3888     return cand->var_after;
3889   else
3890     return cand->var_before;
3891 }
3892
3893 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3894    same precision that is at least as wide as the precision of TYPE, stores
3895    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3896    type of A and B.  */
3897
3898 static tree
3899 determine_common_wider_type (tree *a, tree *b)
3900 {
3901   tree wider_type = NULL;
3902   tree suba, subb;
3903   tree atype = TREE_TYPE (*a);
3904
3905   if (CONVERT_EXPR_P (*a))
3906     {
3907       suba = TREE_OPERAND (*a, 0);
3908       wider_type = TREE_TYPE (suba);
3909       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3910         return atype;
3911     }
3912   else
3913     return atype;
3914
3915   if (CONVERT_EXPR_P (*b))
3916     {
3917       subb = TREE_OPERAND (*b, 0);
3918       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3919         return atype;
3920     }
3921   else
3922     return atype;
3923
3924   *a = suba;
3925   *b = subb;
3926   return wider_type;
3927 }
3928
3929 /* Determines the expression by that USE is expressed from induction variable
3930    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3931    decomposed form.  The invariant part is stored in AFF_INV; while variant
3932    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3933    non-null.  Returns false if USE cannot be expressed using CAND.  */
3934
3935 static bool
3936 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
3937                        struct iv_cand *cand, class aff_tree *aff_inv,
3938                        class aff_tree *aff_var, widest_int *prat = NULL)
3939 {
3940   tree ubase = use->iv->base, ustep = use->iv->step;
3941   tree cbase = cand->iv->base, cstep = cand->iv->step;
3942   tree common_type, uutype, var, cstep_common;
3943   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3944   aff_tree aff_cbase;
3945   widest_int rat;
3946
3947   /* We must have a precision to express the values of use.  */
3948   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3949     return false;
3950
3951   var = var_at_stmt (loop, cand, at);
3952   uutype = unsigned_type_for (utype);
3953
3954   /* If the conversion is not noop, perform it.  */
3955   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3956     {
3957       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3958           && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3959         {
3960           tree inner_base, inner_step, inner_type;
3961           inner_base = TREE_OPERAND (cbase, 0);
3962           if (CONVERT_EXPR_P (cstep))
3963             inner_step = TREE_OPERAND (cstep, 0);
3964           else
3965             inner_step = cstep;
3966
3967           inner_type = TREE_TYPE (inner_base);
3968           /* If candidate is added from a biv whose type is smaller than
3969              ctype, we know both candidate and the biv won't overflow.
3970              In this case, it's safe to skip the convertion in candidate.
3971              As an example, (unsigned short)((unsigned long)A) equals to
3972              (unsigned short)A, if A has a type no larger than short.  */
3973           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3974             {
3975               cbase = inner_base;
3976               cstep = inner_step;
3977             }
3978         }
3979       cbase = fold_convert (uutype, cbase);
3980       cstep = fold_convert (uutype, cstep);
3981       var = fold_convert (uutype, var);
3982     }
3983
3984   /* Ratio is 1 when computing the value of biv cand by itself.
3985      We can't rely on constant_multiple_of in this case because the
3986      use is created after the original biv is selected.  The call
3987      could fail because of inconsistent fold behavior.  See PR68021
3988      for more information.  */
3989   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3990     {
3991       gcc_assert (is_gimple_assign (use->stmt));
3992       gcc_assert (use->iv->ssa_name == cand->var_after);
3993       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3994       rat = 1;
3995     }
3996   else if (!constant_multiple_of (ustep, cstep, &rat))
3997     return false;
3998
3999   if (prat)
4000     *prat = rat;
4001
4002   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4003      type, we achieve better folding by computing their difference in this
4004      wider type, and cast the result to UUTYPE.  We do not need to worry about
4005      overflows, as all the arithmetics will in the end be performed in UUTYPE
4006      anyway.  */
4007   common_type = determine_common_wider_type (&ubase, &cbase);
4008
4009   /* use = ubase - ratio * cbase + ratio * var.  */
4010   tree_to_aff_combination (ubase, common_type, aff_inv);
4011   tree_to_aff_combination (cbase, common_type, &aff_cbase);
4012   tree_to_aff_combination (var, uutype, aff_var);
4013
4014   /* We need to shift the value if we are after the increment.  */
4015   if (stmt_after_increment (loop, cand, at))
4016     {
4017       aff_tree cstep_aff;
4018
4019       if (common_type != uutype)
4020         cstep_common = fold_convert (common_type, cstep);
4021       else
4022         cstep_common = cstep;
4023
4024       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4025       aff_combination_add (&aff_cbase, &cstep_aff);
4026     }
4027
4028   aff_combination_scale (&aff_cbase, -rat);
4029   aff_combination_add (aff_inv, &aff_cbase);
4030   if (common_type != uutype)
4031     aff_combination_convert (aff_inv, uutype);
4032
4033   aff_combination_scale (aff_var, rat);
4034   return true;
4035 }
4036
4037 /* Determines the expression by that USE is expressed from induction variable
4038    CAND at statement AT in LOOP.  The expression is stored in a decomposed
4039    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
4040
4041 static bool
4042 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4043                      struct iv_cand *cand, class aff_tree *aff)
4044 {
4045   aff_tree aff_var;
4046
4047   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4048     return false;
4049
4050   aff_combination_add (aff, &aff_var);
4051   return true;
4052 }
4053
4054 /* Return the type of USE.  */
4055
4056 static tree
4057 get_use_type (struct iv_use *use)
4058 {
4059   tree base_type = TREE_TYPE (use->iv->base);
4060   tree type;
4061
4062   if (use->type == USE_REF_ADDRESS)
4063     {
4064       /* The base_type may be a void pointer.  Create a pointer type based on
4065          the mem_ref instead.  */
4066       type = build_pointer_type (TREE_TYPE (*use->op_p));
4067       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4068                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4069     }
4070   else
4071     type = base_type;
4072
4073   return type;
4074 }
4075
4076 /* Determines the expression by that USE is expressed from induction variable
4077    CAND at statement AT in LOOP.  The computation is unshared.  */
4078
4079 static tree
4080 get_computation_at (class loop *loop, gimple *at,
4081                     struct iv_use *use, struct iv_cand *cand)
4082 {
4083   aff_tree aff;
4084   tree type = get_use_type (use);
4085
4086   if (!get_computation_aff (loop, at, use, cand, &aff))
4087     return NULL_TREE;
4088   unshare_aff_combination (&aff);
4089   return fold_convert (type, aff_combination_to_tree (&aff));
4090 }
4091
4092 /* Like get_computation_at, but try harder, even if the computation
4093    is more expensive.  Intended for debug stmts.  */
4094
4095 static tree
4096 get_debug_computation_at (class loop *loop, gimple *at,
4097                           struct iv_use *use, struct iv_cand *cand)
4098 {
4099   if (tree ret = get_computation_at (loop, at, use, cand))
4100     return ret;
4101
4102   tree ubase = use->iv->base, ustep = use->iv->step;
4103   tree cbase = cand->iv->base, cstep = cand->iv->step;
4104   tree var;
4105   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4106   widest_int rat;
4107
4108   /* We must have a precision to express the values of use.  */
4109   if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4110     return NULL_TREE;
4111
4112   /* Try to handle the case that get_computation_at doesn't,
4113      try to express
4114      use = ubase + (var - cbase) / ratio.  */
4115   if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4116                              &rat))
4117     return NULL_TREE;
4118
4119   bool neg_p = false;
4120   if (wi::neg_p (rat))
4121     {
4122       if (TYPE_UNSIGNED (ctype))
4123         return NULL_TREE;
4124       neg_p = true;
4125       rat = wi::neg (rat);
4126     }
4127
4128   /* If both IVs can wrap around and CAND doesn't have a power of two step,
4129      it is unsafe.  Consider uint16_t CAND with step 9, when wrapping around,
4130      the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4131      uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4132      ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59.  */
4133   if (!use->iv->no_overflow
4134       && !cand->iv->no_overflow
4135       && !integer_pow2p (cstep))
4136     return NULL_TREE;
4137
4138   int bits = wi::exact_log2 (rat);
4139   if (bits == -1)
4140     bits = wi::floor_log2 (rat) + 1;
4141   if (!cand->iv->no_overflow
4142       && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4143     return NULL_TREE;
4144
4145   var = var_at_stmt (loop, cand, at);
4146
4147   if (POINTER_TYPE_P (ctype))
4148     {
4149       ctype = unsigned_type_for (ctype);
4150       cbase = fold_convert (ctype, cbase);
4151       cstep = fold_convert (ctype, cstep);
4152       var = fold_convert (ctype, var);
4153     }
4154
4155   if (stmt_after_increment (loop, cand, at))
4156     var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4157                        unshare_expr (cstep));
4158
4159   var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4160   var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4161                      wide_int_to_tree (TREE_TYPE (var), rat));
4162   if (POINTER_TYPE_P (utype))
4163     {
4164       var = fold_convert (sizetype, var);
4165       if (neg_p)
4166         var = fold_build1 (NEGATE_EXPR, sizetype, var);
4167       var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4168     }
4169   else
4170     {
4171       var = fold_convert (utype, var);
4172       var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4173                          ubase, var);
4174     }
4175   return var;
4176 }
4177
4178 /* Adjust the cost COST for being in loop setup rather than loop body.
4179    If we're optimizing for space, the loop setup overhead is constant;
4180    if we're optimizing for speed, amortize it over the per-iteration cost.
4181    If ROUND_UP_P is true, the result is round up rather than to zero when
4182    optimizing for speed.  */
4183 static int64_t
4184 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4185                    bool round_up_p = false)
4186 {
4187   if (cost == INFTY)
4188     return cost;
4189   else if (optimize_loop_for_speed_p (data->current_loop))
4190     {
4191       int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4192       return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4193     }
4194   else
4195     return cost;
4196 }
4197
4198 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4199    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4200    calculating the operands of EXPR.  Returns true if successful, and returns
4201    the cost in COST.  */
4202
4203 static bool
4204 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4205                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4206 {
4207   comp_cost res;
4208   tree op1 = TREE_OPERAND (expr, 1);
4209   tree cst = TREE_OPERAND (mult, 1);
4210   tree multop = TREE_OPERAND (mult, 0);
4211   int m = exact_log2 (int_cst_value (cst));
4212   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4213   int as_cost, sa_cost;
4214   bool mult_in_op1;
4215
4216   if (!(m >= 0 && m < maxm))
4217     return false;
4218
4219   STRIP_NOPS (op1);
4220   mult_in_op1 = operand_equal_p (op1, mult, 0);
4221
4222   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4223
4224   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4225      use that in preference to a shift insn followed by an add insn.  */
4226   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4227              ? shiftadd_cost (speed, mode, m)
4228              : (mult_in_op1
4229                 ? shiftsub1_cost (speed, mode, m)
4230                 : shiftsub0_cost (speed, mode, m)));
4231
4232   res = comp_cost (MIN (as_cost, sa_cost), 0);
4233   res += (mult_in_op1 ? cost0 : cost1);
4234
4235   STRIP_NOPS (multop);
4236   if (!is_gimple_val (multop))
4237     res += force_expr_to_var_cost (multop, speed);
4238
4239   *cost = res;
4240   return true;
4241 }
4242
4243 /* Estimates cost of forcing expression EXPR into a variable.  */
4244
4245 static comp_cost
4246 force_expr_to_var_cost (tree expr, bool speed)
4247 {
4248   static bool costs_initialized = false;
4249   static unsigned integer_cost [2];
4250   static unsigned symbol_cost [2];
4251   static unsigned address_cost [2];
4252   tree op0, op1;
4253   comp_cost cost0, cost1, cost;
4254   machine_mode mode;
4255   scalar_int_mode int_mode;
4256
4257   if (!costs_initialized)
4258     {
4259       tree type = build_pointer_type (integer_type_node);
4260       tree var, addr;
4261       rtx x;
4262       int i;
4263
4264       var = create_tmp_var_raw (integer_type_node, "test_var");
4265       TREE_STATIC (var) = 1;
4266       x = produce_memory_decl_rtl (var, NULL);
4267       SET_DECL_RTL (var, x);
4268
4269       addr = build1 (ADDR_EXPR, type, var);
4270
4271
4272       for (i = 0; i < 2; i++)
4273         {
4274           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4275                                                              2000), i);
4276
4277           symbol_cost[i] = computation_cost (addr, i) + 1;
4278
4279           address_cost[i]
4280             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4281           if (dump_file && (dump_flags & TDF_DETAILS))
4282             {
4283               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4284               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4285               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4286               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4287               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4288               fprintf (dump_file, "\n");
4289             }
4290         }
4291
4292       costs_initialized = true;
4293     }
4294
4295   STRIP_NOPS (expr);
4296
4297   if (SSA_VAR_P (expr))
4298     return no_cost;
4299
4300   if (is_gimple_min_invariant (expr))
4301     {
4302       if (poly_int_tree_p (expr))
4303         return comp_cost (integer_cost [speed], 0);
4304
4305       if (TREE_CODE (expr) == ADDR_EXPR)
4306         {
4307           tree obj = TREE_OPERAND (expr, 0);
4308
4309           if (VAR_P (obj)
4310               || TREE_CODE (obj) == PARM_DECL
4311               || TREE_CODE (obj) == RESULT_DECL)
4312             return comp_cost (symbol_cost [speed], 0);
4313         }
4314
4315       return comp_cost (address_cost [speed], 0);
4316     }
4317
4318   switch (TREE_CODE (expr))
4319     {
4320     case POINTER_PLUS_EXPR:
4321     case PLUS_EXPR:
4322     case MINUS_EXPR:
4323     case MULT_EXPR:
4324     case TRUNC_DIV_EXPR:
4325     case BIT_AND_EXPR:
4326     case BIT_IOR_EXPR:
4327     case LSHIFT_EXPR:
4328     case RSHIFT_EXPR:
4329       op0 = TREE_OPERAND (expr, 0);
4330       op1 = TREE_OPERAND (expr, 1);
4331       STRIP_NOPS (op0);
4332       STRIP_NOPS (op1);
4333       break;
4334
4335     CASE_CONVERT:
4336     case NEGATE_EXPR:
4337     case BIT_NOT_EXPR:
4338       op0 = TREE_OPERAND (expr, 0);
4339       STRIP_NOPS (op0);
4340       op1 = NULL_TREE;
4341       break;
4342     /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4343        introduce COND_EXPR for IV base, need to support better cost estimation
4344        for this COND_EXPR and tcc_comparison.  */
4345     case COND_EXPR:
4346       op0 = TREE_OPERAND (expr, 1);
4347       STRIP_NOPS (op0);
4348       op1 = TREE_OPERAND (expr, 2);
4349       STRIP_NOPS (op1);
4350       break;
4351     case LT_EXPR:
4352     case LE_EXPR:
4353     case GT_EXPR:
4354     case GE_EXPR:
4355     case EQ_EXPR:
4356     case NE_EXPR:
4357     case UNORDERED_EXPR:
4358     case ORDERED_EXPR:
4359     case UNLT_EXPR:
4360     case UNLE_EXPR:
4361     case UNGT_EXPR:
4362     case UNGE_EXPR:
4363     case UNEQ_EXPR:
4364     case LTGT_EXPR:
4365     case MAX_EXPR:
4366     case MIN_EXPR:
4367       op0 = TREE_OPERAND (expr, 0);
4368       STRIP_NOPS (op0);
4369       op1 = TREE_OPERAND (expr, 1);
4370       STRIP_NOPS (op1);
4371       break;
4372
4373     default:
4374       /* Just an arbitrary value, FIXME.  */
4375       return comp_cost (target_spill_cost[speed], 0);
4376     }
4377
4378   if (op0 == NULL_TREE
4379       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4380     cost0 = no_cost;
4381   else
4382     cost0 = force_expr_to_var_cost (op0, speed);
4383
4384   if (op1 == NULL_TREE
4385       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4386     cost1 = no_cost;
4387   else
4388     cost1 = force_expr_to_var_cost (op1, speed);
4389
4390   mode = TYPE_MODE (TREE_TYPE (expr));
4391   switch (TREE_CODE (expr))
4392     {
4393     case POINTER_PLUS_EXPR:
4394     case PLUS_EXPR:
4395     case MINUS_EXPR:
4396     case NEGATE_EXPR:
4397       cost = comp_cost (add_cost (speed, mode), 0);
4398       if (TREE_CODE (expr) != NEGATE_EXPR)
4399         {
4400           tree mult = NULL_TREE;
4401           comp_cost sa_cost;
4402           if (TREE_CODE (op1) == MULT_EXPR)
4403             mult = op1;
4404           else if (TREE_CODE (op0) == MULT_EXPR)
4405             mult = op0;
4406
4407           if (mult != NULL_TREE
4408               && is_a <scalar_int_mode> (mode, &int_mode)
4409               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4410               && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4411                                     speed, &sa_cost))
4412             return sa_cost;
4413         }
4414       break;
4415
4416     CASE_CONVERT:
4417       {
4418         tree inner_mode, outer_mode;
4419         outer_mode = TREE_TYPE (expr);
4420         inner_mode = TREE_TYPE (op0);
4421         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4422                                        TYPE_MODE (inner_mode), speed), 0);
4423       }
4424       break;
4425
4426     case MULT_EXPR:
4427       if (cst_and_fits_in_hwi (op0))
4428         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4429                                              mode, speed), 0);
4430       else if (cst_and_fits_in_hwi (op1))
4431         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4432                                              mode, speed), 0);
4433       else
4434         return comp_cost (target_spill_cost [speed], 0);
4435       break;
4436
4437     case TRUNC_DIV_EXPR:
4438       /* Division by power of two is usually cheap, so we allow it.  Forbid
4439          anything else.  */
4440       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4441         cost = comp_cost (add_cost (speed, mode), 0);
4442       else
4443         cost = comp_cost (target_spill_cost[speed], 0);
4444       break;
4445
4446     case BIT_AND_EXPR:
4447     case BIT_IOR_EXPR:
4448     case BIT_NOT_EXPR:
4449     case LSHIFT_EXPR:
4450     case RSHIFT_EXPR:
4451       cost = comp_cost (add_cost (speed, mode), 0);
4452       break;
4453     case COND_EXPR:
4454       op0 = TREE_OPERAND (expr, 0);
4455       STRIP_NOPS (op0);
4456       if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4457           || CONSTANT_CLASS_P (op0))
4458         cost = no_cost;
4459       else
4460         cost = force_expr_to_var_cost (op0, speed);
4461       break;
4462     case LT_EXPR:
4463     case LE_EXPR:
4464     case GT_EXPR:
4465     case GE_EXPR:
4466     case EQ_EXPR:
4467     case NE_EXPR:
4468     case UNORDERED_EXPR:
4469     case ORDERED_EXPR:
4470     case UNLT_EXPR:
4471     case UNLE_EXPR:
4472     case UNGT_EXPR:
4473     case UNGE_EXPR:
4474     case UNEQ_EXPR:
4475     case LTGT_EXPR:
4476     case MAX_EXPR:
4477     case MIN_EXPR:
4478       /* Simply use add cost for now, FIXME if there is some more accurate cost
4479          evaluation way.  */
4480       cost = comp_cost (add_cost (speed, mode), 0);
4481       break;
4482
4483     default:
4484       gcc_unreachable ();
4485     }
4486
4487   cost += cost0;
4488   cost += cost1;
4489   return cost;
4490 }
4491
4492 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4493    invariants the computation depends on.  */
4494
4495 static comp_cost
4496 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4497 {
4498   if (!expr)
4499     return no_cost;
4500
4501   find_inv_vars (data, &expr, inv_vars);
4502   return force_expr_to_var_cost (expr, data->speed);
4503 }
4504
4505 /* Returns cost of auto-modifying address expression in shape base + offset.
4506    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4507    address expression.  The address expression has ADDR_MODE in addr space
4508    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4509    speed or size.  */
4510
4511 enum ainc_type
4512 {
4513   AINC_PRE_INC,         /* Pre increment.  */
4514   AINC_PRE_DEC,         /* Pre decrement.  */
4515   AINC_POST_INC,        /* Post increment.  */
4516   AINC_POST_DEC,        /* Post decrement.  */
4517   AINC_NONE             /* Also the number of auto increment types.  */
4518 };
4519
4520 struct ainc_cost_data
4521 {
4522   int64_t costs[AINC_NONE];
4523 };
4524
4525 static comp_cost
4526 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4527                        machine_mode addr_mode, machine_mode mem_mode,
4528                        addr_space_t as, bool speed)
4529 {
4530   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4531       && !USE_STORE_PRE_DECREMENT (mem_mode)
4532       && !USE_LOAD_POST_DECREMENT (mem_mode)
4533       && !USE_STORE_POST_DECREMENT (mem_mode)
4534       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4535       && !USE_STORE_PRE_INCREMENT (mem_mode)
4536       && !USE_LOAD_POST_INCREMENT (mem_mode)
4537       && !USE_STORE_POST_INCREMENT (mem_mode))
4538     return infinite_cost;
4539
4540   static vec<ainc_cost_data *> ainc_cost_data_list;
4541   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4542   if (idx >= ainc_cost_data_list.length ())
4543     {
4544       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4545
4546       gcc_assert (nsize > idx);
4547       ainc_cost_data_list.safe_grow_cleared (nsize);
4548     }
4549
4550   ainc_cost_data *data = ainc_cost_data_list[idx];
4551   if (data == NULL)
4552     {
4553       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4554
4555       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4556       data->costs[AINC_PRE_DEC] = INFTY;
4557       data->costs[AINC_POST_DEC] = INFTY;
4558       data->costs[AINC_PRE_INC] = INFTY;
4559       data->costs[AINC_POST_INC] = INFTY;
4560       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4561           || USE_STORE_PRE_DECREMENT (mem_mode))
4562         {
4563           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4564
4565           if (memory_address_addr_space_p (mem_mode, addr, as))
4566             data->costs[AINC_PRE_DEC]
4567               = address_cost (addr, mem_mode, as, speed);
4568         }
4569       if (USE_LOAD_POST_DECREMENT (mem_mode)
4570           || USE_STORE_POST_DECREMENT (mem_mode))
4571         {
4572           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4573
4574           if (memory_address_addr_space_p (mem_mode, addr, as))
4575             data->costs[AINC_POST_DEC]
4576               = address_cost (addr, mem_mode, as, speed);
4577         }
4578       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4579           || USE_STORE_PRE_INCREMENT (mem_mode))
4580         {
4581           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4582
4583           if (memory_address_addr_space_p (mem_mode, addr, as))
4584             data->costs[AINC_PRE_INC]
4585               = address_cost (addr, mem_mode, as, speed);
4586         }
4587       if (USE_LOAD_POST_INCREMENT (mem_mode)
4588           || USE_STORE_POST_INCREMENT (mem_mode))
4589         {
4590           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4591
4592           if (memory_address_addr_space_p (mem_mode, addr, as))
4593             data->costs[AINC_POST_INC]
4594               = address_cost (addr, mem_mode, as, speed);
4595         }
4596       ainc_cost_data_list[idx] = data;
4597     }
4598
4599   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4600   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4601     return comp_cost (data->costs[AINC_POST_INC], 0);
4602   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4603     return comp_cost (data->costs[AINC_POST_DEC], 0);
4604   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4605     return comp_cost (data->costs[AINC_PRE_INC], 0);
4606   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4607     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4608
4609   return infinite_cost;
4610 }
4611
4612 /* Return cost of computing USE's address expression by using CAND.
4613    AFF_INV and AFF_VAR represent invariant and variant parts of the
4614    address expression, respectively.  If AFF_INV is simple, store
4615    the loop invariant variables which are depended by it in INV_VARS;
4616    if AFF_INV is complicated, handle it as a new invariant expression
4617    and record it in INV_EXPR.  RATIO indicates multiple times between
4618    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4619    value to it indicating if this is an auto-increment address.  */
4620
4621 static comp_cost
4622 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4623                   struct iv_cand *cand, aff_tree *aff_inv,
4624                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4625                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4626                   bool *can_autoinc, bool speed)
4627 {
4628   rtx addr;
4629   bool simple_inv = true;
4630   tree comp_inv = NULL_TREE, type = aff_var->type;
4631   comp_cost var_cost = no_cost, cost = no_cost;
4632   struct mem_address parts = {NULL_TREE, integer_one_node,
4633                               NULL_TREE, NULL_TREE, NULL_TREE};
4634   machine_mode addr_mode = TYPE_MODE (type);
4635   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4636   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4637   /* Only true if ratio != 1.  */
4638   bool ok_with_ratio_p = false;
4639   bool ok_without_ratio_p = false;
4640
4641   if (!aff_combination_const_p (aff_inv))
4642     {
4643       parts.index = integer_one_node;
4644       /* Addressing mode "base + index".  */
4645       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4646       if (ratio != 1)
4647         {
4648           parts.step = wide_int_to_tree (type, ratio);
4649           /* Addressing mode "base + index << scale".  */
4650           ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4651           if (!ok_with_ratio_p)
4652             parts.step = NULL_TREE;
4653         }
4654       if (ok_with_ratio_p || ok_without_ratio_p)
4655         {
4656           if (maybe_ne (aff_inv->offset, 0))
4657             {
4658               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4659               /* Addressing mode "base + index [<< scale] + offset".  */
4660               if (!valid_mem_ref_p (mem_mode, as, &parts))
4661                 parts.offset = NULL_TREE;
4662               else
4663                 aff_inv->offset = 0;
4664             }
4665
4666           move_fixed_address_to_symbol (&parts, aff_inv);
4667           /* Base is fixed address and is moved to symbol part.  */
4668           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4669             parts.base = NULL_TREE;
4670
4671           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4672           if (parts.symbol != NULL_TREE
4673               && !valid_mem_ref_p (mem_mode, as, &parts))
4674             {
4675               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4676               parts.symbol = NULL_TREE;
4677               /* Reset SIMPLE_INV since symbol address needs to be computed
4678                  outside of address expression in this case.  */
4679               simple_inv = false;
4680               /* Symbol part is moved back to base part, it can't be NULL.  */
4681               parts.base = integer_one_node;
4682             }
4683         }
4684       else
4685         parts.index = NULL_TREE;
4686     }
4687   else
4688     {
4689       poly_int64 ainc_step;
4690       if (can_autoinc
4691           && ratio == 1
4692           && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4693         {
4694           poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4695
4696           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4697             ainc_offset += ainc_step;
4698           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4699                                         addr_mode, mem_mode, as, speed);
4700           if (!cost.infinite_cost_p ())
4701             {
4702               *can_autoinc = true;
4703               return cost;
4704             }
4705           cost = no_cost;
4706         }
4707       if (!aff_combination_zero_p (aff_inv))
4708         {
4709           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4710           /* Addressing mode "base + offset".  */
4711           if (!valid_mem_ref_p (mem_mode, as, &parts))
4712             parts.offset = NULL_TREE;
4713           else
4714             aff_inv->offset = 0;
4715         }
4716     }
4717
4718   if (simple_inv)
4719     simple_inv = (aff_inv == NULL
4720                   || aff_combination_const_p (aff_inv)
4721                   || aff_combination_singleton_var_p (aff_inv));
4722   if (!aff_combination_zero_p (aff_inv))
4723     comp_inv = aff_combination_to_tree (aff_inv);
4724   if (comp_inv != NULL_TREE)
4725     cost = force_var_cost (data, comp_inv, inv_vars);
4726   if (ratio != 1 && parts.step == NULL_TREE)
4727     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4728   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4729     var_cost += add_cost (speed, addr_mode);
4730
4731   if (comp_inv && inv_expr && !simple_inv)
4732     {
4733       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4734       /* Clear depends on.  */
4735       if (*inv_expr != NULL && inv_vars && *inv_vars)
4736         bitmap_clear (*inv_vars);
4737
4738       /* Cost of small invariant expression adjusted against loop niters
4739          is usually zero, which makes it difficult to be differentiated
4740          from candidate based on loop invariant variables.  Secondly, the
4741          generated invariant expression may not be hoisted out of loop by
4742          following pass.  We penalize the cost by rounding up in order to
4743          neutralize such effects.  */
4744       cost.cost = adjust_setup_cost (data, cost.cost, true);
4745       cost.scratch = cost.cost;
4746     }
4747
4748   cost += var_cost;
4749   addr = addr_for_mem_ref (&parts, as, false);
4750   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4751   cost += address_cost (addr, mem_mode, as, speed);
4752
4753   if (parts.symbol != NULL_TREE)
4754     cost.complexity += 1;
4755   /* Don't increase the complexity of adding a scaled index if it's
4756      the only kind of index that the target allows.  */
4757   if (parts.step != NULL_TREE && ok_without_ratio_p)
4758     cost.complexity += 1;
4759   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4760     cost.complexity += 1;
4761   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4762     cost.complexity += 1;
4763
4764   return cost;
4765 }
4766
4767 /* Scale (multiply) the computed COST (except scratch part that should be
4768    hoisted out a loop) by header->frequency / AT->frequency, which makes
4769    expected cost more accurate.  */
4770
4771 static comp_cost
4772 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4773 {
4774   if (data->speed
4775       && data->current_loop->header->count.to_frequency (cfun) > 0)
4776     {
4777       basic_block bb = gimple_bb (at);
4778       gcc_assert (cost.scratch <= cost.cost);
4779       int scale_factor = (int)(intptr_t) bb->aux;
4780       if (scale_factor == 1)
4781         return cost;
4782
4783       int64_t scaled_cost
4784         = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4785
4786       if (dump_file && (dump_flags & TDF_DETAILS))
4787         fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4788                  "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4789                  1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4790
4791       cost.cost = scaled_cost;
4792     }
4793
4794   return cost;
4795 }
4796
4797 /* Determines the cost of the computation by that USE is expressed
4798    from induction variable CAND.  If ADDRESS_P is true, we just need
4799    to create an address from it, otherwise we want to get it into
4800    register.  A set of invariants we depend on is stored in INV_VARS.
4801    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4802    addressing is likely.  If INV_EXPR is nonnull, record invariant
4803    expr entry in it.  */
4804
4805 static comp_cost
4806 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4807                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4808                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4809 {
4810   gimple *at = use->stmt;
4811   tree ubase = use->iv->base, cbase = cand->iv->base;
4812   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4813   tree comp_inv = NULL_TREE;
4814   HOST_WIDE_INT ratio, aratio;
4815   comp_cost cost;
4816   widest_int rat;
4817   aff_tree aff_inv, aff_var;
4818   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4819
4820   if (inv_vars)
4821     *inv_vars = NULL;
4822   if (can_autoinc)
4823     *can_autoinc = false;
4824   if (inv_expr)
4825     *inv_expr = NULL;
4826
4827   /* Check if we have enough precision to express the values of use.  */
4828   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4829     return infinite_cost;
4830
4831   if (address_p
4832       || (use->iv->base_object
4833           && cand->iv->base_object
4834           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4835           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4836     {
4837       /* Do not try to express address of an object with computation based
4838          on address of a different object.  This may cause problems in rtl
4839          level alias analysis (that does not expect this to be happening,
4840          as this is illegal in C), and would be unlikely to be useful
4841          anyway.  */
4842       if (use->iv->base_object
4843           && cand->iv->base_object
4844           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4845         return infinite_cost;
4846     }
4847
4848   if (!get_computation_aff_1 (data->current_loop, at, use,
4849                               cand, &aff_inv, &aff_var, &rat)
4850       || !wi::fits_shwi_p (rat))
4851     return infinite_cost;
4852
4853   ratio = rat.to_shwi ();
4854   if (address_p)
4855     {
4856       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4857                                inv_vars, inv_expr, can_autoinc, speed);
4858       cost = get_scaled_computation_cost_at (data, at, cost);
4859       /* For doloop IV cand, add on the extra cost.  */
4860       cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4861       return cost;
4862     }
4863
4864   bool simple_inv = (aff_combination_const_p (&aff_inv)
4865                      || aff_combination_singleton_var_p (&aff_inv));
4866   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4867   aff_combination_convert (&aff_inv, signed_type);
4868   if (!aff_combination_zero_p (&aff_inv))
4869     comp_inv = aff_combination_to_tree (&aff_inv);
4870
4871   cost = force_var_cost (data, comp_inv, inv_vars);
4872   if (comp_inv && inv_expr && !simple_inv)
4873     {
4874       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4875       /* Clear depends on.  */
4876       if (*inv_expr != NULL && inv_vars && *inv_vars)
4877         bitmap_clear (*inv_vars);
4878
4879       cost.cost = adjust_setup_cost (data, cost.cost);
4880       /* Record setup cost in scratch field.  */
4881       cost.scratch = cost.cost;
4882     }
4883   /* Cost of constant integer can be covered when adding invariant part to
4884      variant part.  */
4885   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4886     cost = no_cost;
4887
4888   /* Need type narrowing to represent use with cand.  */
4889   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4890     {
4891       machine_mode outer_mode = TYPE_MODE (utype);
4892       machine_mode inner_mode = TYPE_MODE (ctype);
4893       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4894     }
4895
4896   /* Turn a + i * (-c) into a - i * c.  */
4897   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4898     aratio = -ratio;
4899   else
4900     aratio = ratio;
4901
4902   if (ratio != 1)
4903     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4904
4905   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4906      instruction.  */
4907   /* Need to add up the invariant and variant parts.  */
4908   if (comp_inv && !integer_zerop (comp_inv))
4909     cost += add_cost (speed, TYPE_MODE (utype));
4910
4911   cost = get_scaled_computation_cost_at (data, at, cost);
4912
4913   /* For doloop IV cand, add on the extra cost.  */
4914   if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4915     cost += targetm.doloop_cost_for_generic;
4916
4917   return cost;
4918 }
4919
4920 /* Determines cost of computing the use in GROUP with CAND in a generic
4921    expression.  */
4922
4923 static bool
4924 determine_group_iv_cost_generic (struct ivopts_data *data,
4925                                  struct iv_group *group, struct iv_cand *cand)
4926 {
4927   comp_cost cost;
4928   iv_inv_expr_ent *inv_expr = NULL;
4929   bitmap inv_vars = NULL, inv_exprs = NULL;
4930   struct iv_use *use = group->vuses[0];
4931
4932   /* The simple case first -- if we need to express value of the preserved
4933      original biv, the cost is 0.  This also prevents us from counting the
4934      cost of increment twice -- once at this use and once in the cost of
4935      the candidate.  */
4936   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4937     cost = no_cost;
4938   else
4939     cost = get_computation_cost (data, use, cand, false,
4940                                  &inv_vars, NULL, &inv_expr);
4941
4942   if (inv_expr)
4943     {
4944       inv_exprs = BITMAP_ALLOC (NULL);
4945       bitmap_set_bit (inv_exprs, inv_expr->id);
4946     }
4947   set_group_iv_cost (data, group, cand, cost, inv_vars,
4948                      NULL_TREE, ERROR_MARK, inv_exprs);
4949   return !cost.infinite_cost_p ();
4950 }
4951
4952 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4953
4954 static bool
4955 determine_group_iv_cost_address (struct ivopts_data *data,
4956                                  struct iv_group *group, struct iv_cand *cand)
4957 {
4958   unsigned i;
4959   bitmap inv_vars = NULL, inv_exprs = NULL;
4960   bool can_autoinc;
4961   iv_inv_expr_ent *inv_expr = NULL;
4962   struct iv_use *use = group->vuses[0];
4963   comp_cost sum_cost = no_cost, cost;
4964
4965   cost = get_computation_cost (data, use, cand, true,
4966                                &inv_vars, &can_autoinc, &inv_expr);
4967
4968   if (inv_expr)
4969     {
4970       inv_exprs = BITMAP_ALLOC (NULL);
4971       bitmap_set_bit (inv_exprs, inv_expr->id);
4972     }
4973   sum_cost = cost;
4974   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4975     {
4976       if (can_autoinc)
4977         sum_cost -= cand->cost_step;
4978       /* If we generated the candidate solely for exploiting autoincrement
4979          opportunities, and it turns out it can't be used, set the cost to
4980          infinity to make sure we ignore it.  */
4981       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4982         sum_cost = infinite_cost;
4983     }
4984
4985   /* Uses in a group can share setup code, so only add setup cost once.  */
4986   cost -= cost.scratch;
4987   /* Compute and add costs for rest uses of this group.  */
4988   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4989     {
4990       struct iv_use *next = group->vuses[i];
4991
4992       /* TODO: We could skip computing cost for sub iv_use when it has the
4993          same cost as the first iv_use, but the cost really depends on the
4994          offset and where the iv_use is.  */
4995         cost = get_computation_cost (data, next, cand, true,
4996                                      NULL, &can_autoinc, &inv_expr);
4997         if (inv_expr)
4998           {
4999             if (!inv_exprs)
5000               inv_exprs = BITMAP_ALLOC (NULL);
5001
5002             bitmap_set_bit (inv_exprs, inv_expr->id);
5003           }
5004       sum_cost += cost;
5005     }
5006   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5007                      NULL_TREE, ERROR_MARK, inv_exprs);
5008
5009   return !sum_cost.infinite_cost_p ();
5010 }
5011
5012 /* Computes value of candidate CAND at position AT in iteration NITER, and
5013    stores it to VAL.  */
5014
5015 static void
5016 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5017                aff_tree *val)
5018 {
5019   aff_tree step, delta, nit;
5020   struct iv *iv = cand->iv;
5021   tree type = TREE_TYPE (iv->base);
5022   tree steptype;
5023   if (POINTER_TYPE_P (type))
5024     steptype = sizetype;
5025   else
5026     steptype = unsigned_type_for (type);
5027
5028   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5029   aff_combination_convert (&step, steptype);
5030   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5031   aff_combination_convert (&nit, steptype);
5032   aff_combination_mult (&nit, &step, &delta);
5033   if (stmt_after_increment (loop, cand, at))
5034     aff_combination_add (&delta, &step);
5035
5036   tree_to_aff_combination (iv->base, type, val);
5037   if (!POINTER_TYPE_P (type))
5038     aff_combination_convert (val, steptype);
5039   aff_combination_add (val, &delta);
5040 }
5041
5042 /* Returns period of induction variable iv.  */
5043
5044 static tree
5045 iv_period (struct iv *iv)
5046 {
5047   tree step = iv->step, period, type;
5048   tree pow2div;
5049
5050   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5051
5052   type = unsigned_type_for (TREE_TYPE (step));
5053   /* Period of the iv is lcm (step, type_range)/step -1,
5054      i.e., N*type_range/step - 1. Since type range is power
5055      of two, N == (step >> num_of_ending_zeros_binary (step),
5056      so the final result is
5057
5058        (type_range >> num_of_ending_zeros_binary (step)) - 1
5059
5060   */
5061   pow2div = num_ending_zeros (step);
5062
5063   period = build_low_bits_mask (type,
5064                                 (TYPE_PRECISION (type)
5065                                  - tree_to_uhwi (pow2div)));
5066
5067   return period;
5068 }
5069
5070 /* Returns the comparison operator used when eliminating the iv USE.  */
5071
5072 static enum tree_code
5073 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5074 {
5075   class loop *loop = data->current_loop;
5076   basic_block ex_bb;
5077   edge exit;
5078
5079   ex_bb = gimple_bb (use->stmt);
5080   exit = EDGE_SUCC (ex_bb, 0);
5081   if (flow_bb_inside_loop_p (loop, exit->dest))
5082     exit = EDGE_SUCC (ex_bb, 1);
5083
5084   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5085 }
5086
5087 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5088    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5089    calculation is performed in non-wrapping type.
5090
5091    TODO: More generally, we could test for the situation that
5092          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5093          This would require knowing the sign of OFFSET.  */
5094
5095 static bool
5096 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5097 {
5098   enum tree_code code;
5099   tree e1, e2;
5100   aff_tree aff_e1, aff_e2, aff_offset;
5101
5102   if (!nowrap_type_p (TREE_TYPE (base)))
5103     return false;
5104
5105   base = expand_simple_operations (base);
5106
5107   if (TREE_CODE (base) == SSA_NAME)
5108     {
5109       gimple *stmt = SSA_NAME_DEF_STMT (base);
5110
5111       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5112         return false;
5113
5114       code = gimple_assign_rhs_code (stmt);
5115       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5116         return false;
5117
5118       e1 = gimple_assign_rhs1 (stmt);
5119       e2 = gimple_assign_rhs2 (stmt);
5120     }
5121   else
5122     {
5123       code = TREE_CODE (base);
5124       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5125         return false;
5126       e1 = TREE_OPERAND (base, 0);
5127       e2 = TREE_OPERAND (base, 1);
5128     }
5129
5130   /* Use affine expansion as deeper inspection to prove the equality.  */
5131   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5132                                   &aff_e2, &data->name_expansion_cache);
5133   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5134                                   &aff_offset, &data->name_expansion_cache);
5135   aff_combination_scale (&aff_offset, -1);
5136   switch (code)
5137     {
5138     case PLUS_EXPR:
5139       aff_combination_add (&aff_e2, &aff_offset);
5140       if (aff_combination_zero_p (&aff_e2))
5141         return true;
5142
5143       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5144                                       &aff_e1, &data->name_expansion_cache);
5145       aff_combination_add (&aff_e1, &aff_offset);
5146       return aff_combination_zero_p (&aff_e1);
5147
5148     case POINTER_PLUS_EXPR:
5149       aff_combination_add (&aff_e2, &aff_offset);
5150       return aff_combination_zero_p (&aff_e2);
5151
5152     default:
5153       return false;
5154     }
5155 }
5156
5157 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5158    comparison with CAND.  NITER describes the number of iterations of
5159    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5160
5161    We aim to handle the following situation:
5162
5163    sometype *base, *p;
5164    int a, b, i;
5165
5166    i = a;
5167    p = p_0 = base + a;
5168
5169    do
5170      {
5171        bla (*p);
5172        p++;
5173        i++;
5174      }
5175    while (i < b);
5176
5177    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5178    We aim to optimize this to
5179
5180    p = p_0 = base + a;
5181    do
5182      {
5183        bla (*p);
5184        p++;
5185      }
5186    while (p < p_0 - a + b);
5187
5188    This preserves the correctness, since the pointer arithmetics does not
5189    overflow.  More precisely:
5190
5191    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5192       overflow in computing it or the values of p.
5193    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5194       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5195
5196 static bool
5197 iv_elimination_compare_lt (struct ivopts_data *data,
5198                            struct iv_cand *cand, enum tree_code *comp_p,
5199                            class tree_niter_desc *niter)
5200 {
5201   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5202   class aff_tree nit, tmpa, tmpb;
5203   enum tree_code comp;
5204   HOST_WIDE_INT step;
5205
5206   /* We need to know that the candidate induction variable does not overflow.
5207      While more complex analysis may be used to prove this, for now just
5208      check that the variable appears in the original program and that it
5209      is computed in a type that guarantees no overflows.  */
5210   cand_type = TREE_TYPE (cand->iv->base);
5211   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5212     return false;
5213
5214   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5215      the calculation of the BOUND could overflow, making the comparison
5216      invalid.  */
5217   if (!data->loop_single_exit_p)
5218     return false;
5219
5220   /* We need to be able to decide whether candidate is increasing or decreasing
5221      in order to choose the right comparison operator.  */
5222   if (!cst_and_fits_in_hwi (cand->iv->step))
5223     return false;
5224   step = int_cst_value (cand->iv->step);
5225
5226   /* Check that the number of iterations matches the expected pattern:
5227      a + 1 > b ? 0 : b - a - 1.  */
5228   mbz = niter->may_be_zero;
5229   if (TREE_CODE (mbz) == GT_EXPR)
5230     {
5231       /* Handle a + 1 > b.  */
5232       tree op0 = TREE_OPERAND (mbz, 0);
5233       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5234         {
5235           a = TREE_OPERAND (op0, 0);
5236           b = TREE_OPERAND (mbz, 1);
5237         }
5238       else
5239         return false;
5240     }
5241   else if (TREE_CODE (mbz) == LT_EXPR)
5242     {
5243       tree op1 = TREE_OPERAND (mbz, 1);
5244
5245       /* Handle b < a + 1.  */
5246       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5247         {
5248           a = TREE_OPERAND (op1, 0);
5249           b = TREE_OPERAND (mbz, 0);
5250         }
5251       else
5252         return false;
5253     }
5254   else
5255     return false;
5256
5257   /* Expected number of iterations is B - A - 1.  Check that it matches
5258      the actual number, i.e., that B - A - NITER = 1.  */
5259   tree_to_aff_combination (niter->niter, nit_type, &nit);
5260   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5261   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5262   aff_combination_scale (&nit, -1);
5263   aff_combination_scale (&tmpa, -1);
5264   aff_combination_add (&tmpb, &tmpa);
5265   aff_combination_add (&tmpb, &nit);
5266   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5267     return false;
5268
5269   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5270      overflow.  */
5271   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5272                         cand->iv->step,
5273                         fold_convert (TREE_TYPE (cand->iv->step), a));
5274   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5275     return false;
5276
5277   /* Determine the new comparison operator.  */
5278   comp = step < 0 ? GT_EXPR : LT_EXPR;
5279   if (*comp_p == NE_EXPR)
5280     *comp_p = comp;
5281   else if (*comp_p == EQ_EXPR)
5282     *comp_p = invert_tree_comparison (comp, false);
5283   else
5284     gcc_unreachable ();
5285
5286   return true;
5287 }
5288
5289 /* Check whether it is possible to express the condition in USE by comparison
5290    of candidate CAND.  If so, store the value compared with to BOUND, and the
5291    comparison operator to COMP.  */
5292
5293 static bool
5294 may_eliminate_iv (struct ivopts_data *data,
5295                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5296                   enum tree_code *comp)
5297 {
5298   basic_block ex_bb;
5299   edge exit;
5300   tree period;
5301   class loop *loop = data->current_loop;
5302   aff_tree bnd;
5303   class tree_niter_desc *desc = NULL;
5304
5305   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5306     return false;
5307
5308   /* For now works only for exits that dominate the loop latch.
5309      TODO: extend to other conditions inside loop body.  */
5310   ex_bb = gimple_bb (use->stmt);
5311   if (use->stmt != last_stmt (ex_bb)
5312       || gimple_code (use->stmt) != GIMPLE_COND
5313       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5314     return false;
5315
5316   exit = EDGE_SUCC (ex_bb, 0);
5317   if (flow_bb_inside_loop_p (loop, exit->dest))
5318     exit = EDGE_SUCC (ex_bb, 1);
5319   if (flow_bb_inside_loop_p (loop, exit->dest))
5320     return false;
5321
5322   desc = niter_for_exit (data, exit);
5323   if (!desc)
5324     return false;
5325
5326   /* Determine whether we can use the variable to test the exit condition.
5327      This is the case iff the period of the induction variable is greater
5328      than the number of iterations for which the exit condition is true.  */
5329   period = iv_period (cand->iv);
5330
5331   /* If the number of iterations is constant, compare against it directly.  */
5332   if (TREE_CODE (desc->niter) == INTEGER_CST)
5333     {
5334       /* See cand_value_at.  */
5335       if (stmt_after_increment (loop, cand, use->stmt))
5336         {
5337           if (!tree_int_cst_lt (desc->niter, period))
5338             return false;
5339         }
5340       else
5341         {
5342           if (tree_int_cst_lt (period, desc->niter))
5343             return false;
5344         }
5345     }
5346
5347   /* If not, and if this is the only possible exit of the loop, see whether
5348      we can get a conservative estimate on the number of iterations of the
5349      entire loop and compare against that instead.  */
5350   else
5351     {
5352       widest_int period_value, max_niter;
5353
5354       max_niter = desc->max;
5355       if (stmt_after_increment (loop, cand, use->stmt))
5356         max_niter += 1;
5357       period_value = wi::to_widest (period);
5358       if (wi::gtu_p (max_niter, period_value))
5359         {
5360           /* See if we can take advantage of inferred loop bound
5361              information.  */
5362           if (data->loop_single_exit_p)
5363             {
5364               if (!max_loop_iterations (loop, &max_niter))
5365                 return false;
5366               /* The loop bound is already adjusted by adding 1.  */
5367               if (wi::gtu_p (max_niter, period_value))
5368                 return false;
5369             }
5370           else
5371             return false;
5372         }
5373     }
5374
5375   /* For doloop IV cand, the bound would be zero.  It's safe whether
5376      may_be_zero set or not.  */
5377   if (cand->doloop_p)
5378     {
5379       *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5380       *comp = iv_elimination_compare (data, use);
5381       return true;
5382     }
5383
5384   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5385
5386   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5387                          aff_combination_to_tree (&bnd));
5388   *comp = iv_elimination_compare (data, use);
5389
5390   /* It is unlikely that computing the number of iterations using division
5391      would be more profitable than keeping the original induction variable.  */
5392   if (expression_expensive_p (*bound))
5393     return false;
5394
5395   /* Sometimes, it is possible to handle the situation that the number of
5396      iterations may be zero unless additional assumptions by using <
5397      instead of != in the exit condition.
5398
5399      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5400            base the exit condition on it.  However, that is often too
5401            expensive.  */
5402   if (!integer_zerop (desc->may_be_zero))
5403     return iv_elimination_compare_lt (data, cand, comp, desc);
5404
5405   return true;
5406 }
5407
5408  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5409     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5410
5411 static int
5412 parm_decl_cost (struct ivopts_data *data, tree bound)
5413 {
5414   tree sbound = bound;
5415   STRIP_NOPS (sbound);
5416
5417   if (TREE_CODE (sbound) == SSA_NAME
5418       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5419       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5420       && data->body_includes_call)
5421     return COSTS_N_INSNS (1);
5422
5423   return 0;
5424 }
5425
5426 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5427
5428 static bool
5429 determine_group_iv_cost_cond (struct ivopts_data *data,
5430                               struct iv_group *group, struct iv_cand *cand)
5431 {
5432   tree bound = NULL_TREE;
5433   struct iv *cmp_iv;
5434   bitmap inv_exprs = NULL;
5435   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5436   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5437   enum comp_iv_rewrite rewrite_type;
5438   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5439   tree *control_var, *bound_cst;
5440   enum tree_code comp = ERROR_MARK;
5441   struct iv_use *use = group->vuses[0];
5442
5443   /* Extract condition operands.  */
5444   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5445                                         &bound_cst, NULL, &cmp_iv);
5446   gcc_assert (rewrite_type != COMP_IV_NA);
5447
5448   /* Try iv elimination.  */
5449   if (rewrite_type == COMP_IV_ELIM
5450       && may_eliminate_iv (data, use, cand, &bound, &comp))
5451     {
5452       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5453       if (elim_cost.cost == 0)
5454         elim_cost.cost = parm_decl_cost (data, bound);
5455       else if (TREE_CODE (bound) == INTEGER_CST)
5456         elim_cost.cost = 0;
5457       /* If we replace a loop condition 'i < n' with 'p < base + n',
5458          inv_vars_elim will have 'base' and 'n' set, which implies that both
5459          'base' and 'n' will be live during the loop.    More likely,
5460          'base + n' will be loop invariant, resulting in only one live value
5461          during the loop.  So in that case we clear inv_vars_elim and set
5462          inv_expr_elim instead.  */
5463       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5464         {
5465           inv_expr_elim = get_loop_invariant_expr (data, bound);
5466           bitmap_clear (inv_vars_elim);
5467         }
5468       /* The bound is a loop invariant, so it will be only computed
5469          once.  */
5470       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5471     }
5472
5473   /* When the condition is a comparison of the candidate IV against
5474      zero, prefer this IV.
5475
5476      TODO: The constant that we're subtracting from the cost should
5477      be target-dependent.  This information should be added to the
5478      target costs for each backend.  */
5479   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5480       && integer_zerop (*bound_cst)
5481       && (operand_equal_p (*control_var, cand->var_after, 0)
5482           || operand_equal_p (*control_var, cand->var_before, 0)))
5483     elim_cost -= 1;
5484
5485   express_cost = get_computation_cost (data, use, cand, false,
5486                                        &inv_vars_express, NULL,
5487                                        &inv_expr_express);
5488   if (cmp_iv != NULL)
5489     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5490
5491   /* Count the cost of the original bound as well.  */
5492   bound_cost = force_var_cost (data, *bound_cst, NULL);
5493   if (bound_cost.cost == 0)
5494     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5495   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5496     bound_cost.cost = 0;
5497   express_cost += bound_cost;
5498
5499   /* Choose the better approach, preferring the eliminated IV. */
5500   if (elim_cost <= express_cost)
5501     {
5502       cost = elim_cost;
5503       inv_vars = inv_vars_elim;
5504       inv_vars_elim = NULL;
5505       inv_expr = inv_expr_elim;
5506       /* For doloop candidate/use pair, adjust to zero cost.  */
5507       if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5508         cost = no_cost;
5509     }
5510   else
5511     {
5512       cost = express_cost;
5513       inv_vars = inv_vars_express;
5514       inv_vars_express = NULL;
5515       bound = NULL_TREE;
5516       comp = ERROR_MARK;
5517       inv_expr = inv_expr_express;
5518     }
5519
5520   if (inv_expr)
5521     {
5522       inv_exprs = BITMAP_ALLOC (NULL);
5523       bitmap_set_bit (inv_exprs, inv_expr->id);
5524     }
5525   set_group_iv_cost (data, group, cand, cost,
5526                      inv_vars, bound, comp, inv_exprs);
5527
5528   if (inv_vars_elim)
5529     BITMAP_FREE (inv_vars_elim);
5530   if (inv_vars_express)
5531     BITMAP_FREE (inv_vars_express);
5532
5533   return !cost.infinite_cost_p ();
5534 }
5535
5536 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5537    if USE cannot be represented with CAND.  */
5538
5539 static bool
5540 determine_group_iv_cost (struct ivopts_data *data,
5541                          struct iv_group *group, struct iv_cand *cand)
5542 {
5543   switch (group->type)
5544     {
5545     case USE_NONLINEAR_EXPR:
5546       return determine_group_iv_cost_generic (data, group, cand);
5547
5548     case USE_REF_ADDRESS:
5549     case USE_PTR_ADDRESS:
5550       return determine_group_iv_cost_address (data, group, cand);
5551
5552     case USE_COMPARE:
5553       return determine_group_iv_cost_cond (data, group, cand);
5554
5555     default:
5556       gcc_unreachable ();
5557     }
5558 }
5559
5560 /* Return true if get_computation_cost indicates that autoincrement is
5561    a possibility for the pair of USE and CAND, false otherwise.  */
5562
5563 static bool
5564 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5565                            struct iv_cand *cand)
5566 {
5567   if (!address_p (use->type))
5568     return false;
5569
5570   bool can_autoinc = false;
5571   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5572   return can_autoinc;
5573 }
5574
5575 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5576    use that allows autoincrement, and set their AINC_USE if possible.  */
5577
5578 static void
5579 set_autoinc_for_original_candidates (struct ivopts_data *data)
5580 {
5581   unsigned i, j;
5582
5583   for (i = 0; i < data->vcands.length (); i++)
5584     {
5585       struct iv_cand *cand = data->vcands[i];
5586       struct iv_use *closest_before = NULL;
5587       struct iv_use *closest_after = NULL;
5588       if (cand->pos != IP_ORIGINAL)
5589         continue;
5590
5591       for (j = 0; j < data->vgroups.length (); j++)
5592         {
5593           struct iv_group *group = data->vgroups[j];
5594           struct iv_use *use = group->vuses[0];
5595           unsigned uid = gimple_uid (use->stmt);
5596
5597           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5598             continue;
5599
5600           if (uid < gimple_uid (cand->incremented_at)
5601               && (closest_before == NULL
5602                   || uid > gimple_uid (closest_before->stmt)))
5603             closest_before = use;
5604
5605           if (uid > gimple_uid (cand->incremented_at)
5606               && (closest_after == NULL
5607                   || uid < gimple_uid (closest_after->stmt)))
5608             closest_after = use;
5609         }
5610
5611       if (closest_before != NULL
5612           && autoinc_possible_for_pair (data, closest_before, cand))
5613         cand->ainc_use = closest_before;
5614       else if (closest_after != NULL
5615                && autoinc_possible_for_pair (data, closest_after, cand))
5616         cand->ainc_use = closest_after;
5617     }
5618 }
5619
5620 /* Relate compare use with all candidates.  */
5621
5622 static void
5623 relate_compare_use_with_all_cands (struct ivopts_data *data)
5624 {
5625   unsigned i, count = data->vcands.length ();
5626   for (i = 0; i < data->vgroups.length (); i++)
5627     {
5628       struct iv_group *group = data->vgroups[i];
5629
5630       if (group->type == USE_COMPARE)
5631         bitmap_set_range (group->related_cands, 0, count);
5632     }
5633 }
5634
5635 /* Add one doloop dedicated IV candidate:
5636      - Base is (may_be_zero ? 1 : (niter + 1)).
5637      - Step is -1.  */
5638
5639 static void
5640 add_iv_candidate_for_doloop (struct ivopts_data *data)
5641 {
5642   tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5643   gcc_assert (niter_desc && niter_desc->assumptions);
5644
5645   tree niter = niter_desc->niter;
5646   tree ntype = TREE_TYPE (niter);
5647   gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5648
5649   tree may_be_zero = niter_desc->may_be_zero;
5650   if (may_be_zero && integer_zerop (may_be_zero))
5651     may_be_zero = NULL_TREE;
5652   if (may_be_zero)
5653     {
5654       if (COMPARISON_CLASS_P (may_be_zero))
5655         {
5656           niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5657                                build_int_cst (ntype, 0),
5658                                rewrite_to_non_trapping_overflow (niter));
5659         }
5660       /* Don't try to obtain the iteration count expression when may_be_zero is
5661          integer_nonzerop (actually iteration count is one) or else.  */
5662       else
5663         return;
5664     }
5665
5666   tree base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5667                            build_int_cst (ntype, 1));
5668   add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5669 }
5670
5671 /* Finds the candidates for the induction variables.  */
5672
5673 static void
5674 find_iv_candidates (struct ivopts_data *data)
5675 {
5676   /* Add commonly used ivs.  */
5677   add_standard_iv_candidates (data);
5678
5679   /* Add doloop dedicated ivs.  */
5680   if (data->doloop_use_p)
5681     add_iv_candidate_for_doloop (data);
5682
5683   /* Add old induction variables.  */
5684   add_iv_candidate_for_bivs (data);
5685
5686   /* Add induction variables derived from uses.  */
5687   add_iv_candidate_for_groups (data);
5688
5689   set_autoinc_for_original_candidates (data);
5690
5691   /* Record the important candidates.  */
5692   record_important_candidates (data);
5693
5694   /* Relate compare iv_use with all candidates.  */
5695   if (!data->consider_all_candidates)
5696     relate_compare_use_with_all_cands (data);
5697
5698   if (dump_file && (dump_flags & TDF_DETAILS))
5699     {
5700       unsigned i;
5701
5702       fprintf (dump_file, "\n<Important Candidates>:\t");
5703       for (i = 0; i < data->vcands.length (); i++)
5704         if (data->vcands[i]->important)
5705           fprintf (dump_file, " %d,", data->vcands[i]->id);
5706       fprintf (dump_file, "\n");
5707
5708       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5709       for (i = 0; i < data->vgroups.length (); i++)
5710         {
5711           struct iv_group *group = data->vgroups[i];
5712
5713           if (group->related_cands)
5714             {
5715               fprintf (dump_file, "  Group %d:\t", group->id);
5716               dump_bitmap (dump_file, group->related_cands);
5717             }
5718         }
5719       fprintf (dump_file, "\n");
5720     }
5721 }
5722
5723 /* Determines costs of computing use of iv with an iv candidate.  */
5724
5725 static void
5726 determine_group_iv_costs (struct ivopts_data *data)
5727 {
5728   unsigned i, j;
5729   struct iv_cand *cand;
5730   struct iv_group *group;
5731   bitmap to_clear = BITMAP_ALLOC (NULL);
5732
5733   alloc_use_cost_map (data);
5734
5735   for (i = 0; i < data->vgroups.length (); i++)
5736     {
5737       group = data->vgroups[i];
5738
5739       if (data->consider_all_candidates)
5740         {
5741           for (j = 0; j < data->vcands.length (); j++)
5742             {
5743               cand = data->vcands[j];
5744               determine_group_iv_cost (data, group, cand);
5745             }
5746         }
5747       else
5748         {
5749           bitmap_iterator bi;
5750
5751           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5752             {
5753               cand = data->vcands[j];
5754               if (!determine_group_iv_cost (data, group, cand))
5755                 bitmap_set_bit (to_clear, j);
5756             }
5757
5758           /* Remove the candidates for that the cost is infinite from
5759              the list of related candidates.  */
5760           bitmap_and_compl_into (group->related_cands, to_clear);
5761           bitmap_clear (to_clear);
5762         }
5763     }
5764
5765   BITMAP_FREE (to_clear);
5766
5767   if (dump_file && (dump_flags & TDF_DETAILS))
5768     {
5769       bitmap_iterator bi;
5770
5771       /* Dump invariant variables.  */
5772       fprintf (dump_file, "\n<Invariant Vars>:\n");
5773       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5774         {
5775           struct version_info *info = ver_info (data, i);
5776           if (info->inv_id)
5777             {
5778               fprintf (dump_file, "Inv %d:\t", info->inv_id);
5779               print_generic_expr (dump_file, info->name, TDF_SLIM);
5780               fprintf (dump_file, "%s\n",
5781                        info->has_nonlin_use ? "" : "\t(eliminable)");
5782             }
5783         }
5784
5785       /* Dump invariant expressions.  */
5786       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5787       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5788
5789       for (hash_table<iv_inv_expr_hasher>::iterator it
5790            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5791            ++it)
5792         list.safe_push (*it);
5793
5794       list.qsort (sort_iv_inv_expr_ent);
5795
5796       for (i = 0; i < list.length (); ++i)
5797         {
5798           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5799           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5800           fprintf (dump_file, "\n");
5801         }
5802
5803       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5804
5805       for (i = 0; i < data->vgroups.length (); i++)
5806         {
5807           group = data->vgroups[i];
5808
5809           fprintf (dump_file, "Group %d:\n", i);
5810           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5811           for (j = 0; j < group->n_map_members; j++)
5812             {
5813               if (!group->cost_map[j].cand
5814                   || group->cost_map[j].cost.infinite_cost_p ())
5815                 continue;
5816
5817               fprintf (dump_file, "  %d\t%" PRId64 "\t%d\t",
5818                        group->cost_map[j].cand->id,
5819                        group->cost_map[j].cost.cost,
5820                        group->cost_map[j].cost.complexity);
5821               if (!group->cost_map[j].inv_exprs
5822                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5823                 fprintf (dump_file, "NIL;\t");
5824               else
5825                 bitmap_print (dump_file,
5826                               group->cost_map[j].inv_exprs, "", ";\t");
5827               if (!group->cost_map[j].inv_vars
5828                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5829                 fprintf (dump_file, "NIL;\n");
5830               else
5831                 bitmap_print (dump_file,
5832                               group->cost_map[j].inv_vars, "", "\n");
5833             }
5834
5835           fprintf (dump_file, "\n");
5836         }
5837       fprintf (dump_file, "\n");
5838     }
5839 }
5840
5841 /* Determines cost of the candidate CAND.  */
5842
5843 static void
5844 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5845 {
5846   comp_cost cost_base;
5847   int64_t cost, cost_step;
5848   tree base;
5849
5850   gcc_assert (cand->iv != NULL);
5851
5852   /* There are two costs associated with the candidate -- its increment
5853      and its initialization.  The second is almost negligible for any loop
5854      that rolls enough, so we take it just very little into account.  */
5855
5856   base = cand->iv->base;
5857   cost_base = force_var_cost (data, base, NULL);
5858   /* It will be exceptional that the iv register happens to be initialized with
5859      the proper value at no cost.  In general, there will at least be a regcopy
5860      or a const set.  */
5861   if (cost_base.cost == 0)
5862     cost_base.cost = COSTS_N_INSNS (1);
5863   /* Doloop decrement should be considered as zero cost.  */
5864   if (cand->doloop_p)
5865     cost_step = 0;
5866   else
5867     cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5868   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5869
5870   /* Prefer the original ivs unless we may gain something by replacing it.
5871      The reason is to make debugging simpler; so this is not relevant for
5872      artificial ivs created by other optimization passes.  */
5873   if ((cand->pos != IP_ORIGINAL
5874        || !SSA_NAME_VAR (cand->var_before)
5875        || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5876       /* Prefer doloop as well.  */
5877       && !cand->doloop_p)
5878     cost++;
5879
5880   /* Prefer not to insert statements into latch unless there are some
5881      already (so that we do not create unnecessary jumps).  */
5882   if (cand->pos == IP_END
5883       && empty_block_p (ip_end_pos (data->current_loop)))
5884     cost++;
5885
5886   cand->cost = cost;
5887   cand->cost_step = cost_step;
5888 }
5889
5890 /* Determines costs of computation of the candidates.  */
5891
5892 static void
5893 determine_iv_costs (struct ivopts_data *data)
5894 {
5895   unsigned i;
5896
5897   if (dump_file && (dump_flags & TDF_DETAILS))
5898     {
5899       fprintf (dump_file, "<Candidate Costs>:\n");
5900       fprintf (dump_file, "  cand\tcost\n");
5901     }
5902
5903   for (i = 0; i < data->vcands.length (); i++)
5904     {
5905       struct iv_cand *cand = data->vcands[i];
5906
5907       determine_iv_cost (data, cand);
5908
5909       if (dump_file && (dump_flags & TDF_DETAILS))
5910         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5911     }
5912
5913   if (dump_file && (dump_flags & TDF_DETAILS))
5914     fprintf (dump_file, "\n");
5915 }
5916
5917 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5918    induction variables.  Note N_INVS includes both invariant variables and
5919    invariant expressions.  */
5920
5921 static unsigned
5922 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5923                               unsigned n_cands)
5924 {
5925   unsigned cost;
5926   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5927   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5928   bool speed = data->speed;
5929
5930   /* If there is a call in the loop body, the call-clobbered registers
5931      are not available for loop invariants.  */
5932   if (data->body_includes_call)
5933     available_regs = available_regs - target_clobbered_regs;
5934
5935   /* If we have enough registers.  */
5936   if (regs_needed + target_res_regs < available_regs)
5937     cost = n_new;
5938   /* If close to running out of registers, try to preserve them.  */
5939   else if (regs_needed <= available_regs)
5940     cost = target_reg_cost [speed] * regs_needed;
5941   /* If we run out of available registers but the number of candidates
5942      does not, we penalize extra registers using target_spill_cost.  */
5943   else if (n_cands <= available_regs)
5944     cost = target_reg_cost [speed] * available_regs
5945            + target_spill_cost [speed] * (regs_needed - available_regs);
5946   /* If the number of candidates runs out available registers, we penalize
5947      extra candidate registers using target_spill_cost * 2.  Because it is
5948      more expensive to spill induction variable than invariant.  */
5949   else
5950     cost = target_reg_cost [speed] * available_regs
5951            + target_spill_cost [speed] * (n_cands - available_regs) * 2
5952            + target_spill_cost [speed] * (regs_needed - n_cands);
5953
5954   /* Finally, add the number of candidates, so that we prefer eliminating
5955      induction variables if possible.  */
5956   return cost + n_cands;
5957 }
5958
5959 /* For each size of the induction variable set determine the penalty.  */
5960
5961 static void
5962 determine_set_costs (struct ivopts_data *data)
5963 {
5964   unsigned j, n;
5965   gphi *phi;
5966   gphi_iterator psi;
5967   tree op;
5968   class loop *loop = data->current_loop;
5969   bitmap_iterator bi;
5970
5971   if (dump_file && (dump_flags & TDF_DETAILS))
5972     {
5973       fprintf (dump_file, "<Global Costs>:\n");
5974       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5975       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5976       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5977       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5978     }
5979
5980   n = 0;
5981   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5982     {
5983       phi = psi.phi ();
5984       op = PHI_RESULT (phi);
5985
5986       if (virtual_operand_p (op))
5987         continue;
5988
5989       if (get_iv (data, op))
5990         continue;
5991
5992       if (!POINTER_TYPE_P (TREE_TYPE (op))
5993           && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5994         continue;
5995
5996       n++;
5997     }
5998
5999   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6000     {
6001       struct version_info *info = ver_info (data, j);
6002
6003       if (info->inv_id && info->has_nonlin_use)
6004         n++;
6005     }
6006
6007   data->regs_used = n;
6008   if (dump_file && (dump_flags & TDF_DETAILS))
6009     fprintf (dump_file, "  regs_used %d\n", n);
6010
6011   if (dump_file && (dump_flags & TDF_DETAILS))
6012     {
6013       fprintf (dump_file, "  cost for size:\n");
6014       fprintf (dump_file, "  ivs\tcost\n");
6015       for (j = 0; j <= 2 * target_avail_regs; j++)
6016         fprintf (dump_file, "  %d\t%d\n", j,
6017                  ivopts_estimate_reg_pressure (data, 0, j));
6018       fprintf (dump_file, "\n");
6019     }
6020 }
6021
6022 /* Returns true if A is a cheaper cost pair than B.  */
6023
6024 static bool
6025 cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6026 {
6027   if (!a)
6028     return false;
6029
6030   if (!b)
6031     return true;
6032
6033   if (a->cost < b->cost)
6034     return true;
6035
6036   if (b->cost < a->cost)
6037     return false;
6038
6039   /* In case the costs are the same, prefer the cheaper candidate.  */
6040   if (a->cand->cost < b->cand->cost)
6041     return true;
6042
6043   return false;
6044 }
6045
6046 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
6047    for more expensive, equal and cheaper respectively.  */
6048
6049 static int
6050 compare_cost_pair (class cost_pair *a, class cost_pair *b)
6051 {
6052   if (cheaper_cost_pair (a, b))
6053     return -1;
6054   if (cheaper_cost_pair (b, a))
6055     return 1;
6056
6057   return 0;
6058 }
6059
6060 /* Returns candidate by that USE is expressed in IVS.  */
6061
6062 static class cost_pair *
6063 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6064 {
6065   return ivs->cand_for_group[group->id];
6066 }
6067
6068 /* Computes the cost field of IVS structure.  */
6069
6070 static void
6071 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6072 {
6073   comp_cost cost = ivs->cand_use_cost;
6074
6075   cost += ivs->cand_cost;
6076   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6077   ivs->cost = cost;
6078 }
6079
6080 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6081    and IVS.  */
6082
6083 static void
6084 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6085 {
6086   bitmap_iterator bi;
6087   unsigned iid;
6088
6089   if (!invs)
6090     return;
6091
6092   gcc_assert (n_inv_uses != NULL);
6093   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6094     {
6095       n_inv_uses[iid]--;
6096       if (n_inv_uses[iid] == 0)
6097         ivs->n_invs--;
6098     }
6099 }
6100
6101 /* Set USE not to be expressed by any candidate in IVS.  */
6102
6103 static void
6104 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6105                  struct iv_group *group)
6106 {
6107   unsigned gid = group->id, cid;
6108   class cost_pair *cp;
6109
6110   cp = ivs->cand_for_group[gid];
6111   if (!cp)
6112     return;
6113   cid = cp->cand->id;
6114
6115   ivs->bad_groups++;
6116   ivs->cand_for_group[gid] = NULL;
6117   ivs->n_cand_uses[cid]--;
6118
6119   if (ivs->n_cand_uses[cid] == 0)
6120     {
6121       bitmap_clear_bit (ivs->cands, cid);
6122       if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6123         ivs->n_cands--;
6124       ivs->cand_cost -= cp->cand->cost;
6125       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6126       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6127     }
6128
6129   ivs->cand_use_cost -= cp->cost;
6130   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6131   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6132   iv_ca_recount_cost (data, ivs);
6133 }
6134
6135 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6136    IVS.  */
6137
6138 static void
6139 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6140 {
6141   bitmap_iterator bi;
6142   unsigned iid;
6143
6144   if (!invs)
6145     return;
6146
6147   gcc_assert (n_inv_uses != NULL);
6148   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6149     {
6150       n_inv_uses[iid]++;
6151       if (n_inv_uses[iid] == 1)
6152         ivs->n_invs++;
6153     }
6154 }
6155
6156 /* Set cost pair for GROUP in set IVS to CP.  */
6157
6158 static void
6159 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6160               struct iv_group *group, class cost_pair *cp)
6161 {
6162   unsigned gid = group->id, cid;
6163
6164   if (ivs->cand_for_group[gid] == cp)
6165     return;
6166
6167   if (ivs->cand_for_group[gid])
6168     iv_ca_set_no_cp (data, ivs, group);
6169
6170   if (cp)
6171     {
6172       cid = cp->cand->id;
6173
6174       ivs->bad_groups--;
6175       ivs->cand_for_group[gid] = cp;
6176       ivs->n_cand_uses[cid]++;
6177       if (ivs->n_cand_uses[cid] == 1)
6178         {
6179           bitmap_set_bit (ivs->cands, cid);
6180           if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6181             ivs->n_cands++;
6182           ivs->cand_cost += cp->cand->cost;
6183           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6184           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6185         }
6186
6187       ivs->cand_use_cost += cp->cost;
6188       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6189       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6190       iv_ca_recount_cost (data, ivs);
6191     }
6192 }
6193
6194 /* Extend set IVS by expressing USE by some of the candidates in it
6195    if possible.  Consider all important candidates if candidates in
6196    set IVS don't give any result.  */
6197
6198 static void
6199 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6200                struct iv_group *group)
6201 {
6202   class cost_pair *best_cp = NULL, *cp;
6203   bitmap_iterator bi;
6204   unsigned i;
6205   struct iv_cand *cand;
6206
6207   gcc_assert (ivs->upto >= group->id);
6208   ivs->upto++;
6209   ivs->bad_groups++;
6210
6211   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6212     {
6213       cand = data->vcands[i];
6214       cp = get_group_iv_cost (data, group, cand);
6215       if (cheaper_cost_pair (cp, best_cp))
6216         best_cp = cp;
6217     }
6218
6219   if (best_cp == NULL)
6220     {
6221       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6222         {
6223           cand = data->vcands[i];
6224           cp = get_group_iv_cost (data, group, cand);
6225           if (cheaper_cost_pair (cp, best_cp))
6226             best_cp = cp;
6227         }
6228     }
6229
6230   iv_ca_set_cp (data, ivs, group, best_cp);
6231 }
6232
6233 /* Get cost for assignment IVS.  */
6234
6235 static comp_cost
6236 iv_ca_cost (class iv_ca *ivs)
6237 {
6238   /* This was a conditional expression but it triggered a bug in
6239      Sun C 5.5.  */
6240   if (ivs->bad_groups)
6241     return infinite_cost;
6242   else
6243     return ivs->cost;
6244 }
6245
6246 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6247    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
6248    respectively.  */
6249
6250 static int
6251 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6252                     struct iv_group *group, class cost_pair *old_cp,
6253                     class cost_pair *new_cp)
6254 {
6255   gcc_assert (old_cp && new_cp && old_cp != new_cp);
6256   unsigned old_n_invs = ivs->n_invs;
6257   iv_ca_set_cp (data, ivs, group, new_cp);
6258   unsigned new_n_invs = ivs->n_invs;
6259   iv_ca_set_cp (data, ivs, group, old_cp);
6260
6261   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6262 }
6263
6264 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6265    it before NEXT.  */
6266
6267 static struct iv_ca_delta *
6268 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6269                  class cost_pair *new_cp, struct iv_ca_delta *next)
6270 {
6271   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6272
6273   change->group = group;
6274   change->old_cp = old_cp;
6275   change->new_cp = new_cp;
6276   change->next = next;
6277
6278   return change;
6279 }
6280
6281 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6282    are rewritten.  */
6283
6284 static struct iv_ca_delta *
6285 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6286 {
6287   struct iv_ca_delta *last;
6288
6289   if (!l2)
6290     return l1;
6291
6292   if (!l1)
6293     return l2;
6294
6295   for (last = l1; last->next; last = last->next)
6296     continue;
6297   last->next = l2;
6298
6299   return l1;
6300 }
6301
6302 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6303
6304 static struct iv_ca_delta *
6305 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6306 {
6307   struct iv_ca_delta *act, *next, *prev = NULL;
6308
6309   for (act = delta; act; act = next)
6310     {
6311       next = act->next;
6312       act->next = prev;
6313       prev = act;
6314
6315       std::swap (act->old_cp, act->new_cp);
6316     }
6317
6318   return prev;
6319 }
6320
6321 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6322    reverted instead.  */
6323
6324 static void
6325 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6326                     struct iv_ca_delta *delta, bool forward)
6327 {
6328   class cost_pair *from, *to;
6329   struct iv_ca_delta *act;
6330
6331   if (!forward)
6332     delta = iv_ca_delta_reverse (delta);
6333
6334   for (act = delta; act; act = act->next)
6335     {
6336       from = act->old_cp;
6337       to = act->new_cp;
6338       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6339       iv_ca_set_cp (data, ivs, act->group, to);
6340     }
6341
6342   if (!forward)
6343     iv_ca_delta_reverse (delta);
6344 }
6345
6346 /* Returns true if CAND is used in IVS.  */
6347
6348 static bool
6349 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6350 {
6351   return ivs->n_cand_uses[cand->id] > 0;
6352 }
6353
6354 /* Returns number of induction variable candidates in the set IVS.  */
6355
6356 static unsigned
6357 iv_ca_n_cands (class iv_ca *ivs)
6358 {
6359   return ivs->n_cands;
6360 }
6361
6362 /* Free the list of changes DELTA.  */
6363
6364 static void
6365 iv_ca_delta_free (struct iv_ca_delta **delta)
6366 {
6367   struct iv_ca_delta *act, *next;
6368
6369   for (act = *delta; act; act = next)
6370     {
6371       next = act->next;
6372       free (act);
6373     }
6374
6375   *delta = NULL;
6376 }
6377
6378 /* Allocates new iv candidates assignment.  */
6379
6380 static class iv_ca *
6381 iv_ca_new (struct ivopts_data *data)
6382 {
6383   class iv_ca *nw = XNEW (class iv_ca);
6384
6385   nw->upto = 0;
6386   nw->bad_groups = 0;
6387   nw->cand_for_group = XCNEWVEC (class cost_pair *,
6388                                  data->vgroups.length ());
6389   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6390   nw->cands = BITMAP_ALLOC (NULL);
6391   nw->n_cands = 0;
6392   nw->n_invs = 0;
6393   nw->cand_use_cost = no_cost;
6394   nw->cand_cost = 0;
6395   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6396   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6397   nw->cost = no_cost;
6398
6399   return nw;
6400 }
6401
6402 /* Free memory occupied by the set IVS.  */
6403
6404 static void
6405 iv_ca_free (class iv_ca **ivs)
6406 {
6407   free ((*ivs)->cand_for_group);
6408   free ((*ivs)->n_cand_uses);
6409   BITMAP_FREE ((*ivs)->cands);
6410   free ((*ivs)->n_inv_var_uses);
6411   free ((*ivs)->n_inv_expr_uses);
6412   free (*ivs);
6413   *ivs = NULL;
6414 }
6415
6416 /* Dumps IVS to FILE.  */
6417
6418 static void
6419 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6420 {
6421   unsigned i;
6422   comp_cost cost = iv_ca_cost (ivs);
6423
6424   fprintf (file, "  cost: %" PRId64 " (complexity %d)\n", cost.cost,
6425            cost.complexity);
6426   fprintf (file, "  reg_cost: %d\n",
6427            ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6428   fprintf (file, "  cand_cost: %" PRId64 "\n  cand_group_cost: "
6429            "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6430            ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6431   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6432
6433   for (i = 0; i < ivs->upto; i++)
6434     {
6435       struct iv_group *group = data->vgroups[i];
6436       class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6437       if (cp)
6438         fprintf (file, "   group:%d --> iv_cand:%d, cost=("
6439                  "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6440                  cp->cost.cost, cp->cost.complexity);
6441       else
6442         fprintf (file, "   group:%d --> ??\n", group->id);
6443     }
6444
6445   const char *pref = "";
6446   fprintf (file, "  invariant variables: ");
6447   for (i = 1; i <= data->max_inv_var_id; i++)
6448     if (ivs->n_inv_var_uses[i])
6449       {
6450         fprintf (file, "%s%d", pref, i);
6451         pref = ", ";
6452       }
6453
6454   pref = "";
6455   fprintf (file, "\n  invariant expressions: ");
6456   for (i = 1; i <= data->max_inv_expr_id; i++)
6457     if (ivs->n_inv_expr_uses[i])
6458       {
6459         fprintf (file, "%s%d", pref, i);
6460         pref = ", ";
6461       }
6462
6463   fprintf (file, "\n\n");
6464 }
6465
6466 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6467    new set, and store differences in DELTA.  Number of induction variables
6468    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6469    the function will try to find a solution with mimimal iv candidates.  */
6470
6471 static comp_cost
6472 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6473               struct iv_cand *cand, struct iv_ca_delta **delta,
6474               unsigned *n_ivs, bool min_ncand)
6475 {
6476   unsigned i;
6477   comp_cost cost;
6478   struct iv_group *group;
6479   class cost_pair *old_cp, *new_cp;
6480
6481   *delta = NULL;
6482   for (i = 0; i < ivs->upto; i++)
6483     {
6484       group = data->vgroups[i];
6485       old_cp = iv_ca_cand_for_group (ivs, group);
6486
6487       if (old_cp
6488           && old_cp->cand == cand)
6489         continue;
6490
6491       new_cp = get_group_iv_cost (data, group, cand);
6492       if (!new_cp)
6493         continue;
6494
6495       if (!min_ncand)
6496         {
6497           int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6498           /* Skip if new_cp depends on more invariants.  */
6499           if (cmp_invs > 0)
6500             continue;
6501
6502           int cmp_cost = compare_cost_pair (new_cp, old_cp);
6503           /* Skip if new_cp is not cheaper.  */
6504           if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6505             continue;
6506         }
6507
6508       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6509     }
6510
6511   iv_ca_delta_commit (data, ivs, *delta, true);
6512   cost = iv_ca_cost (ivs);
6513   if (n_ivs)
6514     *n_ivs = iv_ca_n_cands (ivs);
6515   iv_ca_delta_commit (data, ivs, *delta, false);
6516
6517   return cost;
6518 }
6519
6520 /* Try narrowing set IVS by removing CAND.  Return the cost of
6521    the new set and store the differences in DELTA.  START is
6522    the candidate with which we start narrowing.  */
6523
6524 static comp_cost
6525 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6526               struct iv_cand *cand, struct iv_cand *start,
6527               struct iv_ca_delta **delta)
6528 {
6529   unsigned i, ci;
6530   struct iv_group *group;
6531   class cost_pair *old_cp, *new_cp, *cp;
6532   bitmap_iterator bi;
6533   struct iv_cand *cnd;
6534   comp_cost cost, best_cost, acost;
6535
6536   *delta = NULL;
6537   for (i = 0; i < data->vgroups.length (); i++)
6538     {
6539       group = data->vgroups[i];
6540
6541       old_cp = iv_ca_cand_for_group (ivs, group);
6542       if (old_cp->cand != cand)
6543         continue;
6544
6545       best_cost = iv_ca_cost (ivs);
6546       /* Start narrowing with START.  */
6547       new_cp = get_group_iv_cost (data, group, start);
6548
6549       if (data->consider_all_candidates)
6550         {
6551           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6552             {
6553               if (ci == cand->id || (start && ci == start->id))
6554                 continue;
6555
6556               cnd = data->vcands[ci];
6557
6558               cp = get_group_iv_cost (data, group, cnd);
6559               if (!cp)
6560                 continue;
6561
6562               iv_ca_set_cp (data, ivs, group, cp);
6563               acost = iv_ca_cost (ivs);
6564
6565               if (acost < best_cost)
6566                 {
6567                   best_cost = acost;
6568                   new_cp = cp;
6569                 }
6570             }
6571         }
6572       else
6573         {
6574           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6575             {
6576               if (ci == cand->id || (start && ci == start->id))
6577                 continue;
6578
6579               cnd = data->vcands[ci];
6580
6581               cp = get_group_iv_cost (data, group, cnd);
6582               if (!cp)
6583                 continue;
6584
6585               iv_ca_set_cp (data, ivs, group, cp);
6586               acost = iv_ca_cost (ivs);
6587
6588               if (acost < best_cost)
6589                 {
6590                   best_cost = acost;
6591                   new_cp = cp;
6592                 }
6593             }
6594         }
6595       /* Restore to old cp for use.  */
6596       iv_ca_set_cp (data, ivs, group, old_cp);
6597
6598       if (!new_cp)
6599         {
6600           iv_ca_delta_free (delta);
6601           return infinite_cost;
6602         }
6603
6604       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6605     }
6606
6607   iv_ca_delta_commit (data, ivs, *delta, true);
6608   cost = iv_ca_cost (ivs);
6609   iv_ca_delta_commit (data, ivs, *delta, false);
6610
6611   return cost;
6612 }
6613
6614 /* Try optimizing the set of candidates IVS by removing candidates different
6615    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6616    differences in DELTA.  */
6617
6618 static comp_cost
6619 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6620              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6621 {
6622   bitmap_iterator bi;
6623   struct iv_ca_delta *act_delta, *best_delta;
6624   unsigned i;
6625   comp_cost best_cost, acost;
6626   struct iv_cand *cand;
6627
6628   best_delta = NULL;
6629   best_cost = iv_ca_cost (ivs);
6630
6631   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6632     {
6633       cand = data->vcands[i];
6634
6635       if (cand == except_cand)
6636         continue;
6637
6638       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6639
6640       if (acost < best_cost)
6641         {
6642           best_cost = acost;
6643           iv_ca_delta_free (&best_delta);
6644           best_delta = act_delta;
6645         }
6646       else
6647         iv_ca_delta_free (&act_delta);
6648     }
6649
6650   if (!best_delta)
6651     {
6652       *delta = NULL;
6653       return best_cost;
6654     }
6655
6656   /* Recurse to possibly remove other unnecessary ivs.  */
6657   iv_ca_delta_commit (data, ivs, best_delta, true);
6658   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6659   iv_ca_delta_commit (data, ivs, best_delta, false);
6660   *delta = iv_ca_delta_join (best_delta, *delta);
6661   return best_cost;
6662 }
6663
6664 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6665    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6666    the corresponding cost_pair, otherwise just return BEST_CP.  */
6667
6668 static class cost_pair*
6669 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6670                         unsigned int cand_idx, struct iv_cand *old_cand,
6671                         class cost_pair *best_cp)
6672 {
6673   struct iv_cand *cand;
6674   class cost_pair *cp;
6675
6676   gcc_assert (old_cand != NULL && best_cp != NULL);
6677   if (cand_idx == old_cand->id)
6678     return best_cp;
6679
6680   cand = data->vcands[cand_idx];
6681   cp = get_group_iv_cost (data, group, cand);
6682   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6683     return cp;
6684
6685   return best_cp;
6686 }
6687
6688 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6689    which are used by more than one iv uses.  For each of those candidates,
6690    this function tries to represent iv uses under that candidate using
6691    other ones with lower local cost, then tries to prune the new set.
6692    If the new set has lower cost, It returns the new cost after recording
6693    candidate replacement in list DELTA.  */
6694
6695 static comp_cost
6696 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6697                struct iv_ca_delta **delta)
6698 {
6699   bitmap_iterator bi, bj;
6700   unsigned int i, j, k;
6701   struct iv_cand *cand;
6702   comp_cost orig_cost, acost;
6703   struct iv_ca_delta *act_delta, *tmp_delta;
6704   class cost_pair *old_cp, *best_cp = NULL;
6705
6706   *delta = NULL;
6707   orig_cost = iv_ca_cost (ivs);
6708
6709   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6710     {
6711       if (ivs->n_cand_uses[i] == 1
6712           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6713         continue;
6714
6715       cand = data->vcands[i];
6716
6717       act_delta = NULL;
6718       /*  Represent uses under current candidate using other ones with
6719           lower local cost.  */
6720       for (j = 0; j < ivs->upto; j++)
6721         {
6722           struct iv_group *group = data->vgroups[j];
6723           old_cp = iv_ca_cand_for_group (ivs, group);
6724
6725           if (old_cp->cand != cand)
6726             continue;
6727
6728           best_cp = old_cp;
6729           if (data->consider_all_candidates)
6730             for (k = 0; k < data->vcands.length (); k++)
6731               best_cp = cheaper_cost_with_cand (data, group, k,
6732                                                 old_cp->cand, best_cp);
6733           else
6734             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6735               best_cp = cheaper_cost_with_cand (data, group, k,
6736                                                 old_cp->cand, best_cp);
6737
6738           if (best_cp == old_cp)
6739             continue;
6740
6741           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6742         }
6743       /* No need for further prune.  */
6744       if (!act_delta)
6745         continue;
6746
6747       /* Prune the new candidate set.  */
6748       iv_ca_delta_commit (data, ivs, act_delta, true);
6749       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6750       iv_ca_delta_commit (data, ivs, act_delta, false);
6751       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6752
6753       if (acost < orig_cost)
6754         {
6755           *delta = act_delta;
6756           return acost;
6757         }
6758       else
6759         iv_ca_delta_free (&act_delta);
6760     }
6761
6762   return orig_cost;
6763 }
6764
6765 /* Tries to extend the sets IVS in the best possible way in order to
6766    express the GROUP.  If ORIGINALP is true, prefer candidates from
6767    the original set of IVs, otherwise favor important candidates not
6768    based on any memory object.  */
6769
6770 static bool
6771 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6772                   struct iv_group *group, bool originalp)
6773 {
6774   comp_cost best_cost, act_cost;
6775   unsigned i;
6776   bitmap_iterator bi;
6777   struct iv_cand *cand;
6778   struct iv_ca_delta *best_delta = NULL, *act_delta;
6779   class cost_pair *cp;
6780
6781   iv_ca_add_group (data, ivs, group);
6782   best_cost = iv_ca_cost (ivs);
6783   cp = iv_ca_cand_for_group (ivs, group);
6784   if (cp)
6785     {
6786       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6787       iv_ca_set_no_cp (data, ivs, group);
6788     }
6789
6790   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6791      first try important candidates not based on any memory object.  Only if
6792      this fails, try the specific ones.  Rationale -- in loops with many
6793      variables the best choice often is to use just one generic biv.  If we
6794      added here many ivs specific to the uses, the optimization algorithm later
6795      would be likely to get stuck in a local minimum, thus causing us to create
6796      too many ivs.  The approach from few ivs to more seems more likely to be
6797      successful -- starting from few ivs, replacing an expensive use by a
6798      specific iv should always be a win.  */
6799   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6800     {
6801       cand = data->vcands[i];
6802
6803       if (originalp && cand->pos !=IP_ORIGINAL)
6804         continue;
6805
6806       if (!originalp && cand->iv->base_object != NULL_TREE)
6807         continue;
6808
6809       if (iv_ca_cand_used_p (ivs, cand))
6810         continue;
6811
6812       cp = get_group_iv_cost (data, group, cand);
6813       if (!cp)
6814         continue;
6815
6816       iv_ca_set_cp (data, ivs, group, cp);
6817       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6818                                true);
6819       iv_ca_set_no_cp (data, ivs, group);
6820       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6821
6822       if (act_cost < best_cost)
6823         {
6824           best_cost = act_cost;
6825
6826           iv_ca_delta_free (&best_delta);
6827           best_delta = act_delta;
6828         }
6829       else
6830         iv_ca_delta_free (&act_delta);
6831     }
6832
6833   if (best_cost.infinite_cost_p ())
6834     {
6835       for (i = 0; i < group->n_map_members; i++)
6836         {
6837           cp = group->cost_map + i;
6838           cand = cp->cand;
6839           if (!cand)
6840             continue;
6841
6842           /* Already tried this.  */
6843           if (cand->important)
6844             {
6845               if (originalp && cand->pos == IP_ORIGINAL)
6846                 continue;
6847               if (!originalp && cand->iv->base_object == NULL_TREE)
6848                 continue;
6849             }
6850
6851           if (iv_ca_cand_used_p (ivs, cand))
6852             continue;
6853
6854           act_delta = NULL;
6855           iv_ca_set_cp (data, ivs, group, cp);
6856           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6857           iv_ca_set_no_cp (data, ivs, group);
6858           act_delta = iv_ca_delta_add (group,
6859                                        iv_ca_cand_for_group (ivs, group),
6860                                        cp, act_delta);
6861
6862           if (act_cost < best_cost)
6863             {
6864               best_cost = act_cost;
6865
6866               if (best_delta)
6867                 iv_ca_delta_free (&best_delta);
6868               best_delta = act_delta;
6869             }
6870           else
6871             iv_ca_delta_free (&act_delta);
6872         }
6873     }
6874
6875   iv_ca_delta_commit (data, ivs, best_delta, true);
6876   iv_ca_delta_free (&best_delta);
6877
6878   return !best_cost.infinite_cost_p ();
6879 }
6880
6881 /* Finds an initial assignment of candidates to uses.  */
6882
6883 static class iv_ca *
6884 get_initial_solution (struct ivopts_data *data, bool originalp)
6885 {
6886   unsigned i;
6887   class iv_ca *ivs = iv_ca_new (data);
6888
6889   for (i = 0; i < data->vgroups.length (); i++)
6890     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6891       {
6892         iv_ca_free (&ivs);
6893         return NULL;
6894       }
6895
6896   return ivs;
6897 }
6898
6899 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6900    points to a bool variable, this function tries to break local
6901    optimal fixed-point by replacing candidates in IVS if it's true.  */
6902
6903 static bool
6904 try_improve_iv_set (struct ivopts_data *data,
6905                     class iv_ca *ivs, bool *try_replace_p)
6906 {
6907   unsigned i, n_ivs;
6908   comp_cost acost, best_cost = iv_ca_cost (ivs);
6909   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6910   struct iv_cand *cand;
6911
6912   /* Try extending the set of induction variables by one.  */
6913   for (i = 0; i < data->vcands.length (); i++)
6914     {
6915       cand = data->vcands[i];
6916
6917       if (iv_ca_cand_used_p (ivs, cand))
6918         continue;
6919
6920       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6921       if (!act_delta)
6922         continue;
6923
6924       /* If we successfully added the candidate and the set is small enough,
6925          try optimizing it by removing other candidates.  */
6926       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6927         {
6928           iv_ca_delta_commit (data, ivs, act_delta, true);
6929           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6930           iv_ca_delta_commit (data, ivs, act_delta, false);
6931           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6932         }
6933
6934       if (acost < best_cost)
6935         {
6936           best_cost = acost;
6937           iv_ca_delta_free (&best_delta);
6938           best_delta = act_delta;
6939         }
6940       else
6941         iv_ca_delta_free (&act_delta);
6942     }
6943
6944   if (!best_delta)
6945     {
6946       /* Try removing the candidates from the set instead.  */
6947       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6948
6949       if (!best_delta && *try_replace_p)
6950         {
6951           *try_replace_p = false;
6952           /* So far candidate selecting algorithm tends to choose fewer IVs
6953              so that it can handle cases in which loops have many variables
6954              but the best choice is often to use only one general biv.  One
6955              weakness is it can't handle opposite cases, in which different
6956              candidates should be chosen with respect to each use.  To solve
6957              the problem, we replace candidates in a manner described by the
6958              comments of iv_ca_replace, thus give general algorithm a chance
6959              to break local optimal fixed-point in these cases.  */
6960           best_cost = iv_ca_replace (data, ivs, &best_delta);
6961         }
6962
6963       if (!best_delta)
6964         return false;
6965     }
6966
6967   iv_ca_delta_commit (data, ivs, best_delta, true);
6968   iv_ca_delta_free (&best_delta);
6969   return best_cost == iv_ca_cost (ivs);
6970 }
6971
6972 /* Attempts to find the optimal set of induction variables.  We do simple
6973    greedy heuristic -- we try to replace at most one candidate in the selected
6974    solution and remove the unused ivs while this improves the cost.  */
6975
6976 static class iv_ca *
6977 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6978 {
6979   class iv_ca *set;
6980   bool try_replace_p = true;
6981
6982   /* Get the initial solution.  */
6983   set = get_initial_solution (data, originalp);
6984   if (!set)
6985     {
6986       if (dump_file && (dump_flags & TDF_DETAILS))
6987         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6988       return NULL;
6989     }
6990
6991   if (dump_file && (dump_flags & TDF_DETAILS))
6992     {
6993       fprintf (dump_file, "Initial set of candidates:\n");
6994       iv_ca_dump (data, dump_file, set);
6995     }
6996
6997   while (try_improve_iv_set (data, set, &try_replace_p))
6998     {
6999       if (dump_file && (dump_flags & TDF_DETAILS))
7000         {
7001           fprintf (dump_file, "Improved to:\n");
7002           iv_ca_dump (data, dump_file, set);
7003         }
7004     }
7005
7006   /* If the set has infinite_cost, it can't be optimal.  */
7007   if (iv_ca_cost (set).infinite_cost_p ())
7008     {
7009       if (dump_file && (dump_flags & TDF_DETAILS))
7010         fprintf (dump_file,
7011                  "Overflow to infinite cost in try_improve_iv_set.\n");
7012       iv_ca_free (&set);
7013     }
7014   return set;
7015 }
7016
7017 static class iv_ca *
7018 find_optimal_iv_set (struct ivopts_data *data)
7019 {
7020   unsigned i;
7021   comp_cost cost, origcost;
7022   class iv_ca *set, *origset;
7023
7024   /* Determine the cost based on a strategy that starts with original IVs,
7025      and try again using a strategy that prefers candidates not based
7026      on any IVs.  */
7027   origset = find_optimal_iv_set_1 (data, true);
7028   set = find_optimal_iv_set_1 (data, false);
7029
7030   if (!origset && !set)
7031     return NULL;
7032
7033   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7034   cost = set ? iv_ca_cost (set) : infinite_cost;
7035
7036   if (dump_file && (dump_flags & TDF_DETAILS))
7037     {
7038       fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7039                origcost.cost, origcost.complexity);
7040       fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7041                cost.cost, cost.complexity);
7042     }
7043
7044   /* Choose the one with the best cost.  */
7045   if (origcost <= cost)
7046     {
7047       if (set)
7048         iv_ca_free (&set);
7049       set = origset;
7050     }
7051   else if (origset)
7052     iv_ca_free (&origset);
7053
7054   for (i = 0; i < data->vgroups.length (); i++)
7055     {
7056       struct iv_group *group = data->vgroups[i];
7057       group->selected = iv_ca_cand_for_group (set, group)->cand;
7058     }
7059
7060   return set;
7061 }
7062
7063 /* Creates a new induction variable corresponding to CAND.  */
7064
7065 static void
7066 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7067 {
7068   gimple_stmt_iterator incr_pos;
7069   tree base;
7070   struct iv_use *use;
7071   struct iv_group *group;
7072   bool after = false;
7073
7074   gcc_assert (cand->iv != NULL);
7075
7076   switch (cand->pos)
7077     {
7078     case IP_NORMAL:
7079       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7080       break;
7081
7082     case IP_END:
7083       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7084       after = true;
7085       break;
7086
7087     case IP_AFTER_USE:
7088       after = true;
7089       /* fall through */
7090     case IP_BEFORE_USE:
7091       incr_pos = gsi_for_stmt (cand->incremented_at);
7092       break;
7093
7094     case IP_ORIGINAL:
7095       /* Mark that the iv is preserved.  */
7096       name_info (data, cand->var_before)->preserve_biv = true;
7097       name_info (data, cand->var_after)->preserve_biv = true;
7098
7099       /* Rewrite the increment so that it uses var_before directly.  */
7100       use = find_interesting_uses_op (data, cand->var_after);
7101       group = data->vgroups[use->group_id];
7102       group->selected = cand;
7103       return;
7104     }
7105
7106   gimple_add_tmp_var (cand->var_before);
7107
7108   base = unshare_expr (cand->iv->base);
7109
7110   create_iv (base, unshare_expr (cand->iv->step),
7111              cand->var_before, data->current_loop,
7112              &incr_pos, after, &cand->var_before, &cand->var_after);
7113 }
7114
7115 /* Creates new induction variables described in SET.  */
7116
7117 static void
7118 create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7119 {
7120   unsigned i;
7121   struct iv_cand *cand;
7122   bitmap_iterator bi;
7123
7124   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7125     {
7126       cand = data->vcands[i];
7127       create_new_iv (data, cand);
7128     }
7129
7130   if (dump_file && (dump_flags & TDF_DETAILS))
7131     {
7132       fprintf (dump_file, "Selected IV set for loop %d",
7133                data->current_loop->num);
7134       if (data->loop_loc != UNKNOWN_LOCATION)
7135         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7136                  LOCATION_LINE (data->loop_loc));
7137       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7138                avg_loop_niter (data->current_loop));
7139       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7140       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7141         {
7142           cand = data->vcands[i];
7143           dump_cand (dump_file, cand);
7144         }
7145       fprintf (dump_file, "\n");
7146     }
7147 }
7148
7149 /* Rewrites USE (definition of iv used in a nonlinear expression)
7150    using candidate CAND.  */
7151
7152 static void
7153 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7154                             struct iv_use *use, struct iv_cand *cand)
7155 {
7156   gassign *ass;
7157   gimple_stmt_iterator bsi;
7158   tree comp, type = get_use_type (use), tgt;
7159
7160   /* An important special case -- if we are asked to express value of
7161      the original iv by itself, just exit; there is no need to
7162      introduce a new computation (that might also need casting the
7163      variable to unsigned and back).  */
7164   if (cand->pos == IP_ORIGINAL
7165       && cand->incremented_at == use->stmt)
7166     {
7167       tree op = NULL_TREE;
7168       enum tree_code stmt_code;
7169
7170       gcc_assert (is_gimple_assign (use->stmt));
7171       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7172
7173       /* Check whether we may leave the computation unchanged.
7174          This is the case only if it does not rely on other
7175          computations in the loop -- otherwise, the computation
7176          we rely upon may be removed in remove_unused_ivs,
7177          thus leading to ICE.  */
7178       stmt_code = gimple_assign_rhs_code (use->stmt);
7179       if (stmt_code == PLUS_EXPR
7180           || stmt_code == MINUS_EXPR
7181           || stmt_code == POINTER_PLUS_EXPR)
7182         {
7183           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7184             op = gimple_assign_rhs2 (use->stmt);
7185           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7186             op = gimple_assign_rhs1 (use->stmt);
7187         }
7188
7189       if (op != NULL_TREE)
7190         {
7191           if (expr_invariant_in_loop_p (data->current_loop, op))
7192             return;
7193           if (TREE_CODE (op) == SSA_NAME)
7194             {
7195               struct iv *iv = get_iv (data, op);
7196               if (iv != NULL && integer_zerop (iv->step))
7197                 return;
7198             }
7199         }
7200     }
7201
7202   switch (gimple_code (use->stmt))
7203     {
7204     case GIMPLE_PHI:
7205       tgt = PHI_RESULT (use->stmt);
7206
7207       /* If we should keep the biv, do not replace it.  */
7208       if (name_info (data, tgt)->preserve_biv)
7209         return;
7210
7211       bsi = gsi_after_labels (gimple_bb (use->stmt));
7212       break;
7213
7214     case GIMPLE_ASSIGN:
7215       tgt = gimple_assign_lhs (use->stmt);
7216       bsi = gsi_for_stmt (use->stmt);
7217       break;
7218
7219     default:
7220       gcc_unreachable ();
7221     }
7222
7223   aff_tree aff_inv, aff_var;
7224   if (!get_computation_aff_1 (data->current_loop, use->stmt,
7225                               use, cand, &aff_inv, &aff_var))
7226     gcc_unreachable ();
7227
7228   unshare_aff_combination (&aff_inv);
7229   unshare_aff_combination (&aff_var);
7230   /* Prefer CSE opportunity than loop invariant by adding offset at last
7231      so that iv_uses have different offsets can be CSEed.  */
7232   poly_widest_int offset = aff_inv.offset;
7233   aff_inv.offset = 0;
7234
7235   gimple_seq stmt_list = NULL, seq = NULL;
7236   tree comp_op1 = aff_combination_to_tree (&aff_inv);
7237   tree comp_op2 = aff_combination_to_tree (&aff_var);
7238   gcc_assert (comp_op1 && comp_op2);
7239
7240   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7241   gimple_seq_add_seq (&stmt_list, seq);
7242   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7243   gimple_seq_add_seq (&stmt_list, seq);
7244
7245   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7246     std::swap (comp_op1, comp_op2);
7247
7248   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7249     {
7250       comp = fold_build_pointer_plus (comp_op1,
7251                                       fold_convert (sizetype, comp_op2));
7252       comp = fold_build_pointer_plus (comp,
7253                                       wide_int_to_tree (sizetype, offset));
7254     }
7255   else
7256     {
7257       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7258                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
7259       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7260                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7261     }
7262
7263   comp = fold_convert (type, comp);
7264   if (!valid_gimple_rhs_p (comp)
7265       || (gimple_code (use->stmt) != GIMPLE_PHI
7266           /* We can't allow re-allocating the stmt as it might be pointed
7267              to still.  */
7268           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7269               >= gimple_num_ops (gsi_stmt (bsi)))))
7270     {
7271       comp = force_gimple_operand (comp, &seq, true, NULL);
7272       gimple_seq_add_seq (&stmt_list, seq);
7273       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7274         {
7275           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7276           /* As this isn't a plain copy we have to reset alignment
7277              information.  */
7278           if (SSA_NAME_PTR_INFO (comp))
7279             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7280         }
7281     }
7282
7283   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7284   if (gimple_code (use->stmt) == GIMPLE_PHI)
7285     {
7286       ass = gimple_build_assign (tgt, comp);
7287       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7288
7289       bsi = gsi_for_stmt (use->stmt);
7290       remove_phi_node (&bsi, false);
7291     }
7292   else
7293     {
7294       gimple_assign_set_rhs_from_tree (&bsi, comp);
7295       use->stmt = gsi_stmt (bsi);
7296     }
7297 }
7298
7299 /* Performs a peephole optimization to reorder the iv update statement with
7300    a mem ref to enable instruction combining in later phases. The mem ref uses
7301    the iv value before the update, so the reordering transformation requires
7302    adjustment of the offset. CAND is the selected IV_CAND.
7303
7304    Example:
7305
7306    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7307    iv2 = iv1 + 1;
7308
7309    if (t < val)      (1)
7310      goto L;
7311    goto Head;
7312
7313
7314    directly propagating t over to (1) will introduce overlapping live range
7315    thus increase register pressure. This peephole transform it into:
7316
7317
7318    iv2 = iv1 + 1;
7319    t = MEM_REF (base, iv2, 8, 8);
7320    if (t < val)
7321      goto L;
7322    goto Head;
7323 */
7324
7325 static void
7326 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7327 {
7328   tree var_after;
7329   gimple *iv_update, *stmt;
7330   basic_block bb;
7331   gimple_stmt_iterator gsi, gsi_iv;
7332
7333   if (cand->pos != IP_NORMAL)
7334     return;
7335
7336   var_after = cand->var_after;
7337   iv_update = SSA_NAME_DEF_STMT (var_after);
7338
7339   bb = gimple_bb (iv_update);
7340   gsi = gsi_last_nondebug_bb (bb);
7341   stmt = gsi_stmt (gsi);
7342
7343   /* Only handle conditional statement for now.  */
7344   if (gimple_code (stmt) != GIMPLE_COND)
7345     return;
7346
7347   gsi_prev_nondebug (&gsi);
7348   stmt = gsi_stmt (gsi);
7349   if (stmt != iv_update)
7350     return;
7351
7352   gsi_prev_nondebug (&gsi);
7353   if (gsi_end_p (gsi))
7354     return;
7355
7356   stmt = gsi_stmt (gsi);
7357   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7358     return;
7359
7360   if (stmt != use->stmt)
7361     return;
7362
7363   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7364     return;
7365
7366   if (dump_file && (dump_flags & TDF_DETAILS))
7367     {
7368       fprintf (dump_file, "Reordering \n");
7369       print_gimple_stmt (dump_file, iv_update, 0);
7370       print_gimple_stmt (dump_file, use->stmt, 0);
7371       fprintf (dump_file, "\n");
7372     }
7373
7374   gsi = gsi_for_stmt (use->stmt);
7375   gsi_iv = gsi_for_stmt (iv_update);
7376   gsi_move_before (&gsi_iv, &gsi);
7377
7378   cand->pos = IP_BEFORE_USE;
7379   cand->incremented_at = use->stmt;
7380 }
7381
7382 /* Return the alias pointer type that should be used for a MEM_REF
7383    associated with USE, which has type USE_PTR_ADDRESS.  */
7384
7385 static tree
7386 get_alias_ptr_type_for_ptr_address (iv_use *use)
7387 {
7388   gcall *call = as_a <gcall *> (use->stmt);
7389   switch (gimple_call_internal_fn (call))
7390     {
7391     case IFN_MASK_LOAD:
7392     case IFN_MASK_STORE:
7393     case IFN_MASK_LOAD_LANES:
7394     case IFN_MASK_STORE_LANES:
7395       /* The second argument contains the correct alias type.  */
7396       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7397       return TREE_TYPE (gimple_call_arg (call, 1));
7398
7399     default:
7400       gcc_unreachable ();
7401     }
7402 }
7403
7404
7405 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7406
7407 static void
7408 rewrite_use_address (struct ivopts_data *data,
7409                      struct iv_use *use, struct iv_cand *cand)
7410 {
7411   aff_tree aff;
7412   bool ok;
7413
7414   adjust_iv_update_pos (cand, use);
7415   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7416   gcc_assert (ok);
7417   unshare_aff_combination (&aff);
7418
7419   /* To avoid undefined overflow problems, all IV candidates use unsigned
7420      integer types.  The drawback is that this makes it impossible for
7421      create_mem_ref to distinguish an IV that is based on a memory object
7422      from one that represents simply an offset.
7423
7424      To work around this problem, we pass a hint to create_mem_ref that
7425      indicates which variable (if any) in aff is an IV based on a memory
7426      object.  Note that we only consider the candidate.  If this is not
7427      based on an object, the base of the reference is in some subexpression
7428      of the use -- but these will use pointer types, so they are recognized
7429      by the create_mem_ref heuristics anyway.  */
7430   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7431   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7432   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7433   tree type = use->mem_type;
7434   tree alias_ptr_type;
7435   if (use->type == USE_PTR_ADDRESS)
7436     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7437   else
7438     {
7439       gcc_assert (type == TREE_TYPE (*use->op_p));
7440       unsigned int align = get_object_alignment (*use->op_p);
7441       if (align != TYPE_ALIGN (type))
7442         type = build_aligned_type (type, align);
7443       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7444     }
7445   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7446                              iv, base_hint, data->speed);
7447
7448   if (use->type == USE_PTR_ADDRESS)
7449     {
7450       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7451       ref = fold_convert (get_use_type (use), ref);
7452       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7453                                       true, GSI_SAME_STMT);
7454     }
7455   else
7456     copy_ref_info (ref, *use->op_p);
7457
7458   *use->op_p = ref;
7459 }
7460
7461 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7462    candidate CAND.  */
7463
7464 static void
7465 rewrite_use_compare (struct ivopts_data *data,
7466                      struct iv_use *use, struct iv_cand *cand)
7467 {
7468   tree comp, op, bound;
7469   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7470   enum tree_code compare;
7471   struct iv_group *group = data->vgroups[use->group_id];
7472   class cost_pair *cp = get_group_iv_cost (data, group, cand);
7473
7474   bound = cp->value;
7475   if (bound)
7476     {
7477       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7478       tree var_type = TREE_TYPE (var);
7479       gimple_seq stmts;
7480
7481       if (dump_file && (dump_flags & TDF_DETAILS))
7482         {
7483           fprintf (dump_file, "Replacing exit test: ");
7484           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7485         }
7486       compare = cp->comp;
7487       bound = unshare_expr (fold_convert (var_type, bound));
7488       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7489       if (stmts)
7490         gsi_insert_seq_on_edge_immediate (
7491                 loop_preheader_edge (data->current_loop),
7492                 stmts);
7493
7494       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7495       gimple_cond_set_lhs (cond_stmt, var);
7496       gimple_cond_set_code (cond_stmt, compare);
7497       gimple_cond_set_rhs (cond_stmt, op);
7498       return;
7499     }
7500
7501   /* The induction variable elimination failed; just express the original
7502      giv.  */
7503   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7504   gcc_assert (comp != NULL_TREE);
7505   gcc_assert (use->op_p != NULL);
7506   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7507                                          SSA_NAME_VAR (*use->op_p),
7508                                          true, GSI_SAME_STMT);
7509 }
7510
7511 /* Rewrite the groups using the selected induction variables.  */
7512
7513 static void
7514 rewrite_groups (struct ivopts_data *data)
7515 {
7516   unsigned i, j;
7517
7518   for (i = 0; i < data->vgroups.length (); i++)
7519     {
7520       struct iv_group *group = data->vgroups[i];
7521       struct iv_cand *cand = group->selected;
7522
7523       gcc_assert (cand);
7524
7525       if (group->type == USE_NONLINEAR_EXPR)
7526         {
7527           for (j = 0; j < group->vuses.length (); j++)
7528             {
7529               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7530               update_stmt (group->vuses[j]->stmt);
7531             }
7532         }
7533       else if (address_p (group->type))
7534         {
7535           for (j = 0; j < group->vuses.length (); j++)
7536             {
7537               rewrite_use_address (data, group->vuses[j], cand);
7538               update_stmt (group->vuses[j]->stmt);
7539             }
7540         }
7541       else
7542         {
7543           gcc_assert (group->type == USE_COMPARE);
7544
7545           for (j = 0; j < group->vuses.length (); j++)
7546             {
7547               rewrite_use_compare (data, group->vuses[j], cand);
7548               update_stmt (group->vuses[j]->stmt);
7549             }
7550         }
7551     }
7552 }
7553
7554 /* Removes the ivs that are not used after rewriting.  */
7555
7556 static void
7557 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7558 {
7559   unsigned j;
7560   bitmap_iterator bi;
7561
7562   /* Figure out an order in which to release SSA DEFs so that we don't
7563      release something that we'd have to propagate into a debug stmt
7564      afterwards.  */
7565   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7566     {
7567       struct version_info *info;
7568
7569       info = ver_info (data, j);
7570       if (info->iv
7571           && !integer_zerop (info->iv->step)
7572           && !info->inv_id
7573           && !info->iv->nonlin_use
7574           && !info->preserve_biv)
7575         {
7576           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7577
7578           tree def = info->iv->ssa_name;
7579
7580           if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7581             {
7582               imm_use_iterator imm_iter;
7583               use_operand_p use_p;
7584               gimple *stmt;
7585               int count = 0;
7586
7587               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7588                 {
7589                   if (!gimple_debug_bind_p (stmt))
7590                     continue;
7591
7592                   /* We just want to determine whether to do nothing
7593                      (count == 0), to substitute the computed
7594                      expression into a single use of the SSA DEF by
7595                      itself (count == 1), or to use a debug temp
7596                      because the SSA DEF is used multiple times or as
7597                      part of a larger expression (count > 1). */
7598                   count++;
7599                   if (gimple_debug_bind_get_value (stmt) != def)
7600                     count++;
7601
7602                   if (count > 1)
7603                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7604                 }
7605
7606               if (!count)
7607                 continue;
7608
7609               struct iv_use dummy_use;
7610               struct iv_cand *best_cand = NULL, *cand;
7611               unsigned i, best_pref = 0, cand_pref;
7612               tree comp = NULL_TREE;
7613
7614               memset (&dummy_use, 0, sizeof (dummy_use));
7615               dummy_use.iv = info->iv;
7616               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7617                 {
7618                   cand = data->vgroups[i]->selected;
7619                   if (cand == best_cand)
7620                     continue;
7621                   cand_pref = operand_equal_p (cand->iv->step,
7622                                                info->iv->step, 0)
7623                     ? 4 : 0;
7624                   cand_pref
7625                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7626                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7627                     ? 2 : 0;
7628                   cand_pref
7629                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7630                     ? 1 : 0;
7631                   if (best_cand == NULL || best_pref < cand_pref)
7632                     {
7633                       tree this_comp
7634                         = get_debug_computation_at (data->current_loop,
7635                                                     SSA_NAME_DEF_STMT (def),
7636                                                     &dummy_use, cand);
7637                       if (this_comp)
7638                         {
7639                           best_cand = cand;
7640                           best_pref = cand_pref;
7641                           comp = this_comp;
7642                         }
7643                     }
7644                 }
7645
7646               if (!best_cand)
7647                 continue;
7648
7649               comp = unshare_expr (comp);
7650               if (count > 1)
7651                 {
7652                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7653                   DECL_ARTIFICIAL (vexpr) = 1;
7654                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7655                   if (SSA_NAME_VAR (def))
7656                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7657                   else
7658                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7659                   gdebug *def_temp
7660                     = gimple_build_debug_bind (vexpr, comp, NULL);
7661                   gimple_stmt_iterator gsi;
7662
7663                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7664                     gsi = gsi_after_labels (gimple_bb
7665                                             (SSA_NAME_DEF_STMT (def)));
7666                   else
7667                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7668
7669                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7670                   comp = vexpr;
7671                 }
7672
7673               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7674                 {
7675                   if (!gimple_debug_bind_p (stmt))
7676                     continue;
7677
7678                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7679                     SET_USE (use_p, comp);
7680
7681                   update_stmt (stmt);
7682                 }
7683             }
7684         }
7685     }
7686 }
7687
7688 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7689    for hash_map::traverse.  */
7690
7691 bool
7692 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7693 {
7694   free (value);
7695   return true;
7696 }
7697
7698 /* Frees data allocated by the optimization of a single loop.  */
7699
7700 static void
7701 free_loop_data (struct ivopts_data *data)
7702 {
7703   unsigned i, j;
7704   bitmap_iterator bi;
7705   tree obj;
7706
7707   if (data->niters)
7708     {
7709       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7710       delete data->niters;
7711       data->niters = NULL;
7712     }
7713
7714   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7715     {
7716       struct version_info *info;
7717
7718       info = ver_info (data, i);
7719       info->iv = NULL;
7720       info->has_nonlin_use = false;
7721       info->preserve_biv = false;
7722       info->inv_id = 0;
7723     }
7724   bitmap_clear (data->relevant);
7725   bitmap_clear (data->important_candidates);
7726
7727   for (i = 0; i < data->vgroups.length (); i++)
7728     {
7729       struct iv_group *group = data->vgroups[i];
7730
7731       for (j = 0; j < group->vuses.length (); j++)
7732         free (group->vuses[j]);
7733       group->vuses.release ();
7734
7735       BITMAP_FREE (group->related_cands);
7736       for (j = 0; j < group->n_map_members; j++)
7737         {
7738           if (group->cost_map[j].inv_vars)
7739             BITMAP_FREE (group->cost_map[j].inv_vars);
7740           if (group->cost_map[j].inv_exprs)
7741             BITMAP_FREE (group->cost_map[j].inv_exprs);
7742         }
7743
7744       free (group->cost_map);
7745       free (group);
7746     }
7747   data->vgroups.truncate (0);
7748
7749   for (i = 0; i < data->vcands.length (); i++)
7750     {
7751       struct iv_cand *cand = data->vcands[i];
7752
7753       if (cand->inv_vars)
7754         BITMAP_FREE (cand->inv_vars);
7755       if (cand->inv_exprs)
7756         BITMAP_FREE (cand->inv_exprs);
7757       free (cand);
7758     }
7759   data->vcands.truncate (0);
7760
7761   if (data->version_info_size < num_ssa_names)
7762     {
7763       data->version_info_size = 2 * num_ssa_names;
7764       free (data->version_info);
7765       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7766     }
7767
7768   data->max_inv_var_id = 0;
7769   data->max_inv_expr_id = 0;
7770
7771   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7772     SET_DECL_RTL (obj, NULL_RTX);
7773
7774   decl_rtl_to_reset.truncate (0);
7775
7776   data->inv_expr_tab->empty ();
7777
7778   data->iv_common_cand_tab->empty ();
7779   data->iv_common_cands.truncate (0);
7780 }
7781
7782 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7783    loop tree.  */
7784
7785 static void
7786 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7787 {
7788   free_loop_data (data);
7789   free (data->version_info);
7790   BITMAP_FREE (data->relevant);
7791   BITMAP_FREE (data->important_candidates);
7792
7793   decl_rtl_to_reset.release ();
7794   data->vgroups.release ();
7795   data->vcands.release ();
7796   delete data->inv_expr_tab;
7797   data->inv_expr_tab = NULL;
7798   free_affine_expand_cache (&data->name_expansion_cache);
7799   if (data->base_object_map)
7800     delete data->base_object_map;
7801   delete data->iv_common_cand_tab;
7802   data->iv_common_cand_tab = NULL;
7803   data->iv_common_cands.release ();
7804   obstack_free (&data->iv_obstack, NULL);
7805 }
7806
7807 /* Returns true if the loop body BODY includes any function calls.  */
7808
7809 static bool
7810 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7811 {
7812   gimple_stmt_iterator gsi;
7813   unsigned i;
7814
7815   for (i = 0; i < num_nodes; i++)
7816     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7817       {
7818         gimple *stmt = gsi_stmt (gsi);
7819         if (is_gimple_call (stmt)
7820             && !gimple_call_internal_p (stmt)
7821             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7822           return true;
7823       }
7824   return false;
7825 }
7826
7827 /* Determine cost scaling factor for basic blocks in loop.  */
7828 #define COST_SCALING_FACTOR_BOUND (20)
7829
7830 static void
7831 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
7832 {
7833   int lfreq = data->current_loop->header->count.to_frequency (cfun);
7834   if (!data->speed || lfreq <= 0)
7835     return;
7836
7837   int max_freq = lfreq;
7838   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7839     {
7840       body[i]->aux = (void *)(intptr_t) 1;
7841       if (max_freq < body[i]->count.to_frequency (cfun))
7842         max_freq = body[i]->count.to_frequency (cfun);
7843     }
7844   if (max_freq > lfreq)
7845     {
7846       int divisor, factor;
7847       /* Check if scaling factor itself needs to be scaled by the bound.  This
7848          is to avoid overflow when scaling cost according to profile info.  */
7849       if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
7850         {
7851           divisor = max_freq;
7852           factor = COST_SCALING_FACTOR_BOUND;
7853         }
7854       else
7855         {
7856           divisor = lfreq;
7857           factor = 1;
7858         }
7859       for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7860         {
7861           int bfreq = body[i]->count.to_frequency (cfun);
7862           if (bfreq <= lfreq)
7863             continue;
7864
7865           body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
7866         }
7867     }
7868 }
7869
7870 /* Find doloop comparison use and set its doloop_p on if found.  */
7871
7872 static bool
7873 find_doloop_use (struct ivopts_data *data)
7874 {
7875   struct loop *loop = data->current_loop;
7876
7877   for (unsigned i = 0; i < data->vgroups.length (); i++)
7878     {
7879       struct iv_group *group = data->vgroups[i];
7880       if (group->type == USE_COMPARE)
7881         {
7882           gcc_assert (group->vuses.length () == 1);
7883           struct iv_use *use = group->vuses[0];
7884           gimple *stmt = use->stmt;
7885           if (gimple_code (stmt) == GIMPLE_COND)
7886             {
7887               basic_block bb = gimple_bb (stmt);
7888               edge true_edge, false_edge;
7889               extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
7890               /* This comparison is used for loop latch.  Require latch is empty
7891                  for now.  */
7892               if ((loop->latch == true_edge->dest
7893                    || loop->latch == false_edge->dest)
7894                   && empty_block_p (loop->latch))
7895                 {
7896                   group->doloop_p = true;
7897                   if (dump_file && (dump_flags & TDF_DETAILS))
7898                     {
7899                       fprintf (dump_file, "Doloop cmp iv use: ");
7900                       print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
7901                     }
7902                   return true;
7903                 }
7904             }
7905         }
7906     }
7907
7908   return false;
7909 }
7910
7911 /* For the targets which support doloop, to predict whether later RTL doloop
7912    transformation will perform on this loop, further detect the doloop use and
7913    mark the flag doloop_use_p if predicted.  */
7914
7915 void
7916 analyze_and_mark_doloop_use (struct ivopts_data *data)
7917 {
7918   data->doloop_use_p = false;
7919
7920   if (!flag_branch_on_count_reg)
7921     return;
7922
7923   if (!generic_predict_doloop_p (data))
7924     return;
7925
7926   if (find_doloop_use (data))
7927     {
7928       data->doloop_use_p = true;
7929       if (dump_file && (dump_flags & TDF_DETAILS))
7930         {
7931           struct loop *loop = data->current_loop;
7932           fprintf (dump_file,
7933                    "Predict loop %d can perform"
7934                    " doloop optimization later.\n",
7935                    loop->num);
7936           flow_loop_dump (loop, dump_file, NULL, 1);
7937         }
7938     }
7939 }
7940
7941 /* Optimizes the LOOP.  Returns true if anything changed.  */
7942
7943 static bool
7944 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
7945                            bitmap toremove)
7946 {
7947   bool changed = false;
7948   class iv_ca *iv_ca;
7949   edge exit = single_dom_exit (loop);
7950   basic_block *body;
7951
7952   gcc_assert (!data->niters);
7953   data->current_loop = loop;
7954   data->loop_loc = find_loop_location (loop).get_location_t ();
7955   data->speed = optimize_loop_for_speed_p (loop);
7956
7957   if (dump_file && (dump_flags & TDF_DETAILS))
7958     {
7959       fprintf (dump_file, "Processing loop %d", loop->num);
7960       if (data->loop_loc != UNKNOWN_LOCATION)
7961         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7962                  LOCATION_LINE (data->loop_loc));
7963       fprintf (dump_file, "\n");
7964
7965       if (exit)
7966         {
7967           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7968                    exit->src->index, exit->dest->index);
7969           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7970           fprintf (dump_file, "\n");
7971         }
7972
7973       fprintf (dump_file, "\n");
7974     }
7975
7976   body = get_loop_body (loop);
7977   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7978   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7979
7980   data->loop_single_exit_p
7981     = exit != NULL && loop_only_exit_p (loop, body, exit);
7982
7983   /* For each ssa name determines whether it behaves as an induction variable
7984      in some loop.  */
7985   if (!find_induction_variables (data))
7986     goto finish;
7987
7988   /* Finds interesting uses (item 1).  */
7989   find_interesting_uses (data);
7990   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7991     goto finish;
7992
7993   /* Determine cost scaling factor for basic blocks in loop.  */
7994   determine_scaling_factor (data, body);
7995
7996   /* Analyze doloop possibility and mark the doloop use if predicted.  */
7997   analyze_and_mark_doloop_use (data);
7998
7999   /* Finds candidates for the induction variables (item 2).  */
8000   find_iv_candidates (data);
8001
8002   /* Calculates the costs (item 3, part 1).  */
8003   determine_iv_costs (data);
8004   determine_group_iv_costs (data);
8005   determine_set_costs (data);
8006
8007   /* Find the optimal set of induction variables (item 3, part 2).  */
8008   iv_ca = find_optimal_iv_set (data);
8009   /* Cleanup basic block aux field.  */
8010   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8011     body[i]->aux = NULL;
8012   if (!iv_ca)
8013     goto finish;
8014   changed = true;
8015
8016   /* Create the new induction variables (item 4, part 1).  */
8017   create_new_ivs (data, iv_ca);
8018   iv_ca_free (&iv_ca);
8019
8020   /* Rewrite the uses (item 4, part 2).  */
8021   rewrite_groups (data);
8022
8023   /* Remove the ivs that are unused after rewriting.  */
8024   remove_unused_ivs (data, toremove);
8025
8026 finish:
8027   free (body);
8028   free_loop_data (data);
8029
8030   return changed;
8031 }
8032
8033 /* Main entry point.  Optimizes induction variables in loops.  */
8034
8035 void
8036 tree_ssa_iv_optimize (void)
8037 {
8038   class loop *loop;
8039   struct ivopts_data data;
8040   auto_bitmap toremove;
8041
8042   tree_ssa_iv_optimize_init (&data);
8043
8044   /* Optimize the loops starting with the innermost ones.  */
8045   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
8046     {
8047       if (!dbg_cnt (ivopts_loop))
8048         continue;
8049
8050       if (dump_file && (dump_flags & TDF_DETAILS))
8051         flow_loop_dump (loop, dump_file, NULL, 1);
8052
8053       tree_ssa_iv_optimize_loop (&data, loop, toremove);
8054     }
8055
8056   /* Remove eliminated IV defs.  */
8057   release_defs_bitset (toremove);
8058
8059   /* We have changed the structure of induction variables; it might happen
8060      that definitions in the scev database refer to some of them that were
8061      eliminated.  */
8062   scev_reset_htab ();
8063   /* Likewise niter and control-IV information.  */
8064   free_numbers_of_iterations_estimates (cfun);
8065
8066   tree_ssa_iv_optimize_finalize (&data);
8067 }
8068
8069 #include "gt-tree-ssa-loop-ivopts.h"