gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2020 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.
  68
  69    For the targets supporting low-overhead loops, IVOPTs has to take care of
  70    the loops which will probably be transformed in RTL doloop optimization,
  71    to try to make selected IV candidate set optimal.  The process of doloop
  72    support includes:
  73
  74    1) Analyze the current loop will be transformed to doloop or not, find and
  75       mark its compare type IV use as doloop use (iv_group field doloop_p), and
  76       set flag doloop_use_p of ivopts_data to notify subsequent processings on
  77       doloop.  See analyze_and_mark_doloop_use and its callees for the details.
  78       The target hook predict_doloop_p can be used for target specific checks.
  79
  80    2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
  81       set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
  82       like biv.  For cost determination between doloop IV cand and IV use, the
  83       target hooks doloop_cost_for_generic and doloop_cost_for_address are
  84       provided to add on extra costs for generic type and address type IV use.
  85       Zero cost is assigned to the pair between doloop IV cand and doloop IV
  86       use, and bound zero is set for IV elimination.
  87
  88    3) With the cost setting in step 2), the current cost model based IV
  89       selection algorithm will process as usual, pick up doloop dedicated IV if
  90       profitable.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "rtl.h"
  97 #include "tree.h"
  98 #include "gimple.h"
  99 #include "cfghooks.h"
 100 #include "tree-pass.h"
 101 #include "memmodel.h"
 102 #include "tm_p.h"
 103 #include "ssa.h"
 104 #include "expmed.h"
 105 #include "insn-config.h"
 106 #include "emit-rtl.h"
 107 #include "recog.h"
 108 #include "cgraph.h"
 109 #include "gimple-pretty-print.h"
 110 #include "alias.h"
 111 #include "fold-const.h"
 112 #include "stor-layout.h"
 113 #include "tree-eh.h"
 114 #include "gimplify.h"
 115 #include "gimple-iterator.h"
 116 #include "gimplify-me.h"
 117 #include "tree-cfg.h"
 118 #include "tree-ssa-loop-ivopts.h"
 119 #include "tree-ssa-loop-manip.h"
 120 #include "tree-ssa-loop-niter.h"
 121 #include "tree-ssa-loop.h"
 122 #include "explow.h"
 123 #include "expr.h"
 124 #include "tree-dfa.h"
 125 #include "tree-ssa.h"
 126 #include "cfgloop.h"
 127 #include "tree-scalar-evolution.h"
 128 #include "tree-affine.h"
 129 #include "tree-ssa-propagate.h"
 130 #include "tree-ssa-address.h"
 131 #include "builtins.h"
 132 #include "tree-vectorizer.h"
 133 #include "dbgcnt.h"
 134
 135 /* For lang_hooks.types.type_for_mode.  */
 136 #include "langhooks.h"
 137
 138 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 139    cost of different addressing modes.  This should be moved to a TBD
 140    interface between the GIMPLE and RTL worlds.  */
 141
 142 /* The infinite cost.  */
 143 #define INFTY 1000000000
 144
 145 /* Returns the expected number of loop iterations for LOOP.
 146    The average trip count is computed from profile data if it
 147    exists. */
 148
 149 static inline HOST_WIDE_INT
 150 avg_loop_niter (class loop *loop)
 151 {
 152   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 153   if (niter == -1)
 154     {
 155       niter = likely_max_stmt_executions_int (loop);
 156
 157       if (niter == -1 || niter > param_avg_loop_niter)
 158         return param_avg_loop_niter;
 159     }
 160
 161   return niter;
 162 }
 163
 164 struct iv_use;
 165
 166 /* Representation of the induction variable.  */
 167 struct iv
 168 {
 169   tree base;            /* Initial value of the iv.  */
 170   tree base_object;     /* A memory object to that the induction variable points.  */
 171   tree step;            /* Step of the iv (constant only).  */
 172   tree ssa_name;        /* The ssa name with the value.  */
 173   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 174   bool biv_p;           /* Is it a biv?  */
 175   bool no_overflow;     /* True if the iv doesn't overflow.  */
 176   bool have_address_use;/* For biv, indicate if it's used in any address
 177                            type use.  */
 178 };
 179
 180 /* Per-ssa version information (induction variable descriptions, etc.).  */
 181 struct version_info
 182 {
 183   tree name;            /* The ssa name.  */
 184   struct iv *iv;        /* Induction variable description.  */
 185   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 186                            an expression that is not an induction variable.  */
 187   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 188   unsigned inv_id;      /* Id of an invariant.  */
 189 };
 190
 191 /* Types of uses.  */
 192 enum use_type
 193 {
 194   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 195   USE_REF_ADDRESS,      /* Use is an address for an explicit memory
 196                            reference.  */
 197   USE_PTR_ADDRESS,      /* Use is a pointer argument to a function in
 198                            cases where the expansion of the function
 199                            will turn the argument into a normal address.  */
 200   USE_COMPARE           /* Use is a compare.  */
 201 };
 202
 203 /* Cost of a computation.  */
 204 class comp_cost
 205 {
 206 public:
 207   comp_cost (): cost (0), complexity (0), scratch (0)
 208   {}
 209
 210   comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
 211     : cost (cost), complexity (complexity), scratch (scratch)
 212   {}
 213
 214   /* Returns true if COST is infinite.  */
 215   bool infinite_cost_p ();
 216
 217   /* Adds costs COST1 and COST2.  */
 218   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 219
 220   /* Adds COST to the comp_cost.  */
 221   comp_cost operator+= (comp_cost cost);
 222
 223   /* Adds constant C to this comp_cost.  */
 224   comp_cost operator+= (HOST_WIDE_INT c);
 225
 226   /* Subtracts constant C to this comp_cost.  */
 227   comp_cost operator-= (HOST_WIDE_INT c);
 228
 229   /* Divide the comp_cost by constant C.  */
 230   comp_cost operator/= (HOST_WIDE_INT c);
 231
 232   /* Multiply the comp_cost by constant C.  */
 233   comp_cost operator*= (HOST_WIDE_INT c);
 234
 235   /* Subtracts costs COST1 and COST2.  */
 236   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 237
 238   /* Subtracts COST from this comp_cost.  */
 239   comp_cost operator-= (comp_cost cost);
 240
 241   /* Returns true if COST1 is smaller than COST2.  */
 242   friend bool operator< (comp_cost cost1, comp_cost cost2);
 243
 244   /* Returns true if COST1 and COST2 are equal.  */
 245   friend bool operator== (comp_cost cost1, comp_cost cost2);
 246
 247   /* Returns true if COST1 is smaller or equal than COST2.  */
 248   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 249
 250   int64_t cost;         /* The runtime cost.  */
 251   unsigned complexity;  /* The estimate of the complexity of the code for
 252                            the computation (in no concrete units --
 253                            complexity field should be larger for more
 254                            complex expressions and addressing modes).  */
 255   int64_t scratch;      /* Scratch used during cost computation.  */
 256 };
 257
 258 static const comp_cost no_cost;
 259 static const comp_cost infinite_cost (INFTY, 0, INFTY);
 260
 261 bool
 262 comp_cost::infinite_cost_p ()
 263 {
 264   return cost == INFTY;
 265 }
 266
 267 comp_cost
 268 operator+ (comp_cost cost1, comp_cost cost2)
 269 {
 270   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 271     return infinite_cost;
 272
 273   gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
 274   cost1.cost += cost2.cost;
 275   cost1.complexity += cost2.complexity;
 276
 277   return cost1;
 278 }
 279
 280 comp_cost
 281 operator- (comp_cost cost1, comp_cost cost2)
 282 {
 283   if (cost1.infinite_cost_p ())
 284     return infinite_cost;
 285
 286   gcc_assert (!cost2.infinite_cost_p ());
 287   gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
 288
 289   cost1.cost -= cost2.cost;
 290   cost1.complexity -= cost2.complexity;
 291
 292   return cost1;
 293 }
 294
 295 comp_cost
 296 comp_cost::operator+= (comp_cost cost)
 297 {
 298   *this = *this + cost;
 299   return *this;
 300 }
 301
 302 comp_cost
 303 comp_cost::operator+= (HOST_WIDE_INT c)
 304 {
 305   if (c >= INFTY)
 306     this->cost = INFTY;
 307
 308   if (infinite_cost_p ())
 309     return *this;
 310
 311   gcc_assert (this->cost + c < infinite_cost.cost);
 312   this->cost += c;
 313
 314   return *this;
 315 }
 316
 317 comp_cost
 318 comp_cost::operator-= (HOST_WIDE_INT c)
 319 {
 320   if (infinite_cost_p ())
 321     return *this;
 322
 323   gcc_assert (this->cost - c < infinite_cost.cost);
 324   this->cost -= c;
 325
 326   return *this;
 327 }
 328
 329 comp_cost
 330 comp_cost::operator/= (HOST_WIDE_INT c)
 331 {
 332   gcc_assert (c != 0);
 333   if (infinite_cost_p ())
 334     return *this;
 335
 336   this->cost /= c;
 337
 338   return *this;
 339 }
 340
 341 comp_cost
 342 comp_cost::operator*= (HOST_WIDE_INT c)
 343 {
 344   if (infinite_cost_p ())
 345     return *this;
 346
 347   gcc_assert (this->cost * c < infinite_cost.cost);
 348   this->cost *= c;
 349
 350   return *this;
 351 }
 352
 353 comp_cost
 354 comp_cost::operator-= (comp_cost cost)
 355 {
 356   *this = *this - cost;
 357   return *this;
 358 }
 359
 360 bool
 361 operator< (comp_cost cost1, comp_cost cost2)
 362 {
 363   if (cost1.cost == cost2.cost)
 364     return cost1.complexity < cost2.complexity;
 365
 366   return cost1.cost < cost2.cost;
 367 }
 368
 369 bool
 370 operator== (comp_cost cost1, comp_cost cost2)
 371 {
 372   return cost1.cost == cost2.cost
 373     && cost1.complexity == cost2.complexity;
 374 }
 375
 376 bool
 377 operator<= (comp_cost cost1, comp_cost cost2)
 378 {
 379   return cost1 < cost2 || cost1 == cost2;
 380 }
 381
 382 struct iv_inv_expr_ent;
 383
 384 /* The candidate - cost pair.  */
 385 class cost_pair
 386 {
 387 public:
 388   struct iv_cand *cand; /* The candidate.  */
 389   comp_cost cost;       /* The cost.  */
 390   enum tree_code comp;  /* For iv elimination, the comparison.  */
 391   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 392                            preserved when representing iv_use with iv_cand.  */
 393   bitmap inv_exprs;     /* The list of newly created invariant expressions
 394                            when representing iv_use with iv_cand.  */
 395   tree value;           /* For final value elimination, the expression for
 396                            the final value of the iv.  For iv elimination,
 397                            the new bound to compare with.  */
 398 };
 399
 400 /* Use.  */
 401 struct iv_use
 402 {
 403   unsigned id;          /* The id of the use.  */
 404   unsigned group_id;    /* The group id the use belongs to.  */
 405   enum use_type type;   /* Type of the use.  */
 406   tree mem_type;        /* The memory type to use when testing whether an
 407                            address is legitimate, and what the address's
 408                            cost is.  */
 409   struct iv *iv;        /* The induction variable it is based on.  */
 410   gimple *stmt;         /* Statement in that it occurs.  */
 411   tree *op_p;           /* The place where it occurs.  */
 412
 413   tree addr_base;       /* Base address with const offset stripped.  */
 414   poly_uint64_pod addr_offset;
 415                         /* Const offset stripped from base address.  */
 416 };
 417
 418 /* Group of uses.  */
 419 struct iv_group
 420 {
 421   /* The id of the group.  */
 422   unsigned id;
 423   /* Uses of the group are of the same type.  */
 424   enum use_type type;
 425   /* The set of "related" IV candidates, plus the important ones.  */
 426   bitmap related_cands;
 427   /* Number of IV candidates in the cost_map.  */
 428   unsigned n_map_members;
 429   /* The costs wrto the iv candidates.  */
 430   class cost_pair *cost_map;
 431   /* The selected candidate for the group.  */
 432   struct iv_cand *selected;
 433   /* To indicate this is a doloop use group.  */
 434   bool doloop_p;
 435   /* Uses in the group.  */
 436   vec<struct iv_use *> vuses;
 437 };
 438
 439 /* The position where the iv is computed.  */
 440 enum iv_position
 441 {
 442   IP_NORMAL,            /* At the end, just before the exit condition.  */
 443   IP_END,               /* At the end of the latch block.  */
 444   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 445   IP_AFTER_USE,         /* Immediately after a specific use.  */
 446   IP_ORIGINAL           /* The original biv.  */
 447 };
 448
 449 /* The induction variable candidate.  */
 450 struct iv_cand
 451 {
 452   unsigned id;          /* The number of the candidate.  */
 453   bool important;       /* Whether this is an "important" candidate, i.e. such
 454                            that it should be considered by all uses.  */
 455   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 456   gimple *incremented_at;/* For original biv, the statement where it is
 457                            incremented.  */
 458   tree var_before;      /* The variable used for it before increment.  */
 459   tree var_after;       /* The variable used for it after increment.  */
 460   struct iv *iv;        /* The value of the candidate.  NULL for
 461                            "pseudocandidate" used to indicate the possibility
 462                            to replace the final value of an iv by direct
 463                            computation of the value.  */
 464   unsigned cost;        /* Cost of the candidate.  */
 465   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 466   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 467                               where it is incremented.  */
 468   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 469                            iv_cand.  */
 470   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 471                            hanlde it as a new invariant expression which will
 472                            be hoisted out of loop.  */
 473   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 474                            smaller type.  */
 475   bool doloop_p;        /* Whether this is a doloop candidate.  */
 476 };
 477
 478 /* Hashtable entry for common candidate derived from iv uses.  */
 479 class iv_common_cand
 480 {
 481 public:
 482   tree base;
 483   tree step;
 484   /* IV uses from which this common candidate is derived.  */
 485   auto_vec<struct iv_use *> uses;
 486   hashval_t hash;
 487 };
 488
 489 /* Hashtable helpers.  */
 490
 491 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 492 {
 493   static inline hashval_t hash (const iv_common_cand *);
 494   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 495 };
 496
 497 /* Hash function for possible common candidates.  */
 498
 499 inline hashval_t
 500 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 501 {
 502   return ccand->hash;
 503 }
 504
 505 /* Hash table equality function for common candidates.  */
 506
 507 inline bool
 508 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 509                               const iv_common_cand *ccand2)
 510 {
 511   return (ccand1->hash == ccand2->hash
 512           && operand_equal_p (ccand1->base, ccand2->base, 0)
 513           && operand_equal_p (ccand1->step, ccand2->step, 0)
 514           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 515               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 516 }
 517
 518 /* Loop invariant expression hashtable entry.  */
 519
 520 struct iv_inv_expr_ent
 521 {
 522   /* Tree expression of the entry.  */
 523   tree expr;
 524   /* Unique indentifier.  */
 525   int id;
 526   /* Hash value.  */
 527   hashval_t hash;
 528 };
 529
 530 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 531
 532 static int
 533 sort_iv_inv_expr_ent (const void *a, const void *b)
 534 {
 535   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 536   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 537
 538   unsigned id1 = (*e1)->id;
 539   unsigned id2 = (*e2)->id;
 540
 541   if (id1 < id2)
 542     return -1;
 543   else if (id1 > id2)
 544     return 1;
 545   else
 546     return 0;
 547 }
 548
 549 /* Hashtable helpers.  */
 550
 551 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 552 {
 553   static inline hashval_t hash (const iv_inv_expr_ent *);
 554   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 555 };
 556
 557 /* Return true if uses of type TYPE represent some form of address.  */
 558
 559 inline bool
 560 address_p (use_type type)
 561 {
 562   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
 563 }
 564
 565 /* Hash function for loop invariant expressions.  */
 566
 567 inline hashval_t
 568 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 569 {
 570   return expr->hash;
 571 }
 572
 573 /* Hash table equality function for expressions.  */
 574
 575 inline bool
 576 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 577                            const iv_inv_expr_ent *expr2)
 578 {
 579   return expr1->hash == expr2->hash
 580          && operand_equal_p (expr1->expr, expr2->expr, 0);
 581 }
 582
 583 struct ivopts_data
 584 {
 585   /* The currently optimized loop.  */
 586   class loop *current_loop;
 587   location_t loop_loc;
 588
 589   /* Numbers of iterations for all exits of the current loop.  */
 590   hash_map<edge, tree_niter_desc *> *niters;
 591
 592   /* Number of registers used in it.  */
 593   unsigned regs_used;
 594
 595   /* The size of version_info array allocated.  */
 596   unsigned version_info_size;
 597
 598   /* The array of information for the ssa names.  */
 599   struct version_info *version_info;
 600
 601   /* The hashtable of loop invariant expressions created
 602      by ivopt.  */
 603   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 604
 605   /* The bitmap of indices in version_info whose value was changed.  */
 606   bitmap relevant;
 607
 608   /* The uses of induction variables.  */
 609   vec<iv_group *> vgroups;
 610
 611   /* The candidates.  */
 612   vec<iv_cand *> vcands;
 613
 614   /* A bitmap of important candidates.  */
 615   bitmap important_candidates;
 616
 617   /* Cache used by tree_to_aff_combination_expand.  */
 618   hash_map<tree, name_expansion *> *name_expansion_cache;
 619
 620   /* The hashtable of common candidates derived from iv uses.  */
 621   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 622
 623   /* The common candidates.  */
 624   vec<iv_common_cand *> iv_common_cands;
 625
 626   /* Hash map recording base object information of tree exp.  */
 627   hash_map<tree, tree> *base_object_map;
 628
 629   /* The maximum invariant variable id.  */
 630   unsigned max_inv_var_id;
 631
 632   /* The maximum invariant expression id.  */
 633   unsigned max_inv_expr_id;
 634
 635   /* Number of no_overflow BIVs which are not used in memory address.  */
 636   unsigned bivs_not_used_in_addr;
 637
 638   /* Obstack for iv structure.  */
 639   struct obstack iv_obstack;
 640
 641   /* Whether to consider just related and important candidates when replacing a
 642      use.  */
 643   bool consider_all_candidates;
 644
 645   /* Are we optimizing for speed?  */
 646   bool speed;
 647
 648   /* Whether the loop body includes any function calls.  */
 649   bool body_includes_call;
 650
 651   /* Whether the loop body can only be exited via single exit.  */
 652   bool loop_single_exit_p;
 653
 654   /* Whether the loop has doloop comparison use.  */
 655   bool doloop_use_p;
 656 };
 657
 658 /* An assignment of iv candidates to uses.  */
 659
 660 class iv_ca
 661 {
 662 public:
 663   /* The number of uses covered by the assignment.  */
 664   unsigned upto;
 665
 666   /* Number of uses that cannot be expressed by the candidates in the set.  */
 667   unsigned bad_groups;
 668
 669   /* Candidate assigned to a use, together with the related costs.  */
 670   class cost_pair **cand_for_group;
 671
 672   /* Number of times each candidate is used.  */
 673   unsigned *n_cand_uses;
 674
 675   /* The candidates used.  */
 676   bitmap cands;
 677
 678   /* The number of candidates in the set.  */
 679   unsigned n_cands;
 680
 681   /* The number of invariants needed, including both invariant variants and
 682      invariant expressions.  */
 683   unsigned n_invs;
 684
 685   /* Total cost of expressing uses.  */
 686   comp_cost cand_use_cost;
 687
 688   /* Total cost of candidates.  */
 689   int64_t cand_cost;
 690
 691   /* Number of times each invariant variable is used.  */
 692   unsigned *n_inv_var_uses;
 693
 694   /* Number of times each invariant expression is used.  */
 695   unsigned *n_inv_expr_uses;
 696
 697   /* Total cost of the assignment.  */
 698   comp_cost cost;
 699 };
 700
 701 /* Difference of two iv candidate assignments.  */
 702
 703 struct iv_ca_delta
 704 {
 705   /* Changed group.  */
 706   struct iv_group *group;
 707
 708   /* An old assignment (for rollback purposes).  */
 709   class cost_pair *old_cp;
 710
 711   /* A new assignment.  */
 712   class cost_pair *new_cp;
 713
 714   /* Next change in the list.  */
 715   struct iv_ca_delta *next;
 716 };
 717
 718 /* Bound on number of candidates below that all candidates are considered.  */
 719
 720 #define CONSIDER_ALL_CANDIDATES_BOUND \
 721   ((unsigned) param_iv_consider_all_candidates_bound)
 722
 723 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 724    optimizing such a loop would help, and it would take ages).  */
 725
 726 #define MAX_CONSIDERED_GROUPS \
 727   ((unsigned) param_iv_max_considered_uses)
 728
 729 /* If there are at most this number of ivs in the set, try removing unnecessary
 730    ivs from the set always.  */
 731
 732 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 733   ((unsigned) param_iv_always_prune_cand_set_bound)
 734
 735 /* The list of trees for that the decl_rtl field must be reset is stored
 736    here.  */
 737
 738 static vec<tree> decl_rtl_to_reset;
 739
 740 static comp_cost force_expr_to_var_cost (tree, bool);
 741
 742 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 743
 744 edge
 745 single_dom_exit (class loop *loop)
 746 {
 747   edge exit = single_exit (loop);
 748
 749   if (!exit)
 750     return NULL;
 751
 752   if (!just_once_each_iteration_p (loop, exit->src))
 753     return NULL;
 754
 755   return exit;
 756 }
 757
 758 /* Dumps information about the induction variable IV to FILE.  Don't dump
 759    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 760    preceding spaces indicated by INDENT_LEVEL.  */
 761
 762 void
 763 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 764 {
 765   const char *p;
 766   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 767
 768   if (indent_level > 4)
 769     indent_level = 4;
 770   p = spaces + 8 - (indent_level << 1);
 771
 772   fprintf (file, "%sIV struct:\n", p);
 773   if (iv->ssa_name && dump_name)
 774     {
 775       fprintf (file, "%s  SSA_NAME:\t", p);
 776       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 777       fprintf (file, "\n");
 778     }
 779
 780   fprintf (file, "%s  Type:\t", p);
 781   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 782   fprintf (file, "\n");
 783
 784   fprintf (file, "%s  Base:\t", p);
 785   print_generic_expr (file, iv->base, TDF_SLIM);
 786   fprintf (file, "\n");
 787
 788   fprintf (file, "%s  Step:\t", p);
 789   print_generic_expr (file, iv->step, TDF_SLIM);
 790   fprintf (file, "\n");
 791
 792   if (iv->base_object)
 793     {
 794       fprintf (file, "%s  Object:\t", p);
 795       print_generic_expr (file, iv->base_object, TDF_SLIM);
 796       fprintf (file, "\n");
 797     }
 798
 799   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 800
 801   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 802            p, iv->no_overflow ? "No-overflow" : "Overflow");
 803 }
 804
 805 /* Dumps information about the USE to FILE.  */
 806
 807 void
 808 dump_use (FILE *file, struct iv_use *use)
 809 {
 810   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 811   fprintf (file, "    At stmt:\t");
 812   print_gimple_stmt (file, use->stmt, 0);
 813   fprintf (file, "    At pos:\t");
 814   if (use->op_p)
 815     print_generic_expr (file, *use->op_p, TDF_SLIM);
 816   fprintf (file, "\n");
 817   dump_iv (file, use->iv, false, 2);
 818 }
 819
 820 /* Dumps information about the uses to FILE.  */
 821
 822 void
 823 dump_groups (FILE *file, struct ivopts_data *data)
 824 {
 825   unsigned i, j;
 826   struct iv_group *group;
 827
 828   for (i = 0; i < data->vgroups.length (); i++)
 829     {
 830       group = data->vgroups[i];
 831       fprintf (file, "Group %d:\n", group->id);
 832       if (group->type == USE_NONLINEAR_EXPR)
 833         fprintf (file, "  Type:\tGENERIC\n");
 834       else if (group->type == USE_REF_ADDRESS)
 835         fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
 836       else if (group->type == USE_PTR_ADDRESS)
 837         fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
 838       else
 839         {
 840           gcc_assert (group->type == USE_COMPARE);
 841           fprintf (file, "  Type:\tCOMPARE\n");
 842         }
 843       for (j = 0; j < group->vuses.length (); j++)
 844         dump_use (file, group->vuses[j]);
 845     }
 846 }
 847
 848 /* Dumps information about induction variable candidate CAND to FILE.  */
 849
 850 void
 851 dump_cand (FILE *file, struct iv_cand *cand)
 852 {
 853   struct iv *iv = cand->iv;
 854
 855   fprintf (file, "Candidate %d:\n", cand->id);
 856   if (cand->inv_vars)
 857     {
 858       fprintf (file, "  Depend on inv.vars: ");
 859       dump_bitmap (file, cand->inv_vars);
 860     }
 861   if (cand->inv_exprs)
 862     {
 863       fprintf (file, "  Depend on inv.exprs: ");
 864       dump_bitmap (file, cand->inv_exprs);
 865     }
 866
 867   if (cand->var_before)
 868     {
 869       fprintf (file, "  Var befor: ");
 870       print_generic_expr (file, cand->var_before, TDF_SLIM);
 871       fprintf (file, "\n");
 872     }
 873   if (cand->var_after)
 874     {
 875       fprintf (file, "  Var after: ");
 876       print_generic_expr (file, cand->var_after, TDF_SLIM);
 877       fprintf (file, "\n");
 878     }
 879
 880   switch (cand->pos)
 881     {
 882     case IP_NORMAL:
 883       fprintf (file, "  Incr POS: before exit test\n");
 884       break;
 885
 886     case IP_BEFORE_USE:
 887       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 888       break;
 889
 890     case IP_AFTER_USE:
 891       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 892       break;
 893
 894     case IP_END:
 895       fprintf (file, "  Incr POS: at end\n");
 896       break;
 897
 898     case IP_ORIGINAL:
 899       fprintf (file, "  Incr POS: orig biv\n");
 900       break;
 901     }
 902
 903   dump_iv (file, iv, false, 1);
 904 }
 905
 906 /* Returns the info for ssa version VER.  */
 907
 908 static inline struct version_info *
 909 ver_info (struct ivopts_data *data, unsigned ver)
 910 {
 911   return data->version_info + ver;
 912 }
 913
 914 /* Returns the info for ssa name NAME.  */
 915
 916 static inline struct version_info *
 917 name_info (struct ivopts_data *data, tree name)
 918 {
 919   return ver_info (data, SSA_NAME_VERSION (name));
 920 }
 921
 922 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 923    emitted in LOOP.  */
 924
 925 static bool
 926 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
 927 {
 928   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 929
 930   gcc_assert (bb);
 931
 932   if (sbb == loop->latch)
 933     return true;
 934
 935   if (sbb != bb)
 936     return false;
 937
 938   return stmt == last_stmt (bb);
 939 }
 940
 941 /* Returns true if STMT if after the place where the original induction
 942    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 943    if the positions are identical.  */
 944
 945 static bool
 946 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 947 {
 948   basic_block cand_bb = gimple_bb (cand->incremented_at);
 949   basic_block stmt_bb = gimple_bb (stmt);
 950
 951   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 952     return false;
 953
 954   if (stmt_bb != cand_bb)
 955     return true;
 956
 957   if (true_if_equal
 958       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 959     return true;
 960   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 961 }
 962
 963 /* Returns true if STMT if after the place where the induction variable
 964    CAND is incremented in LOOP.  */
 965
 966 static bool
 967 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
 968 {
 969   switch (cand->pos)
 970     {
 971     case IP_END:
 972       return false;
 973
 974     case IP_NORMAL:
 975       return stmt_after_ip_normal_pos (loop, stmt);
 976
 977     case IP_ORIGINAL:
 978     case IP_AFTER_USE:
 979       return stmt_after_inc_pos (cand, stmt, false);
 980
 981     case IP_BEFORE_USE:
 982       return stmt_after_inc_pos (cand, stmt, true);
 983
 984     default:
 985       gcc_unreachable ();
 986     }
 987 }
 988
 989 /* walk_tree callback for contains_abnormal_ssa_name_p.  */
 990
 991 static tree
 992 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
 993 {
 994   if (TREE_CODE (*tp) == SSA_NAME
 995       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
 996     return *tp;
 997
 998   if (!EXPR_P (*tp))
 999     *walk_subtrees = 0;
1000
1001   return NULL_TREE;
1002 }
1003
1004 /* Returns true if EXPR contains a ssa name that occurs in an
1005    abnormal phi node.  */
1006
1007 bool
1008 contains_abnormal_ssa_name_p (tree expr)
1009 {
1010   return walk_tree_without_duplicates
1011            (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1012 }
1013
1014 /*  Returns the structure describing number of iterations determined from
1015     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1016
1017 static class tree_niter_desc *
1018 niter_for_exit (struct ivopts_data *data, edge exit)
1019 {
1020   class tree_niter_desc *desc;
1021   tree_niter_desc **slot;
1022
1023   if (!data->niters)
1024     {
1025       data->niters = new hash_map<edge, tree_niter_desc *>;
1026       slot = NULL;
1027     }
1028   else
1029     slot = data->niters->get (exit);
1030
1031   if (!slot)
1032     {
1033       /* Try to determine number of iterations.  We cannot safely work with ssa
1034          names that appear in phi nodes on abnormal edges, so that we do not
1035          create overlapping life ranges for them (PR 27283).  */
1036       desc = XNEW (class tree_niter_desc);
1037       if (!number_of_iterations_exit (data->current_loop,
1038                                       exit, desc, true)
1039           || contains_abnormal_ssa_name_p (desc->niter))
1040         {
1041           XDELETE (desc);
1042           desc = NULL;
1043         }
1044       data->niters->put (exit, desc);
1045     }
1046   else
1047     desc = *slot;
1048
1049   return desc;
1050 }
1051
1052 /* Returns the structure describing number of iterations determined from
1053    single dominating exit of DATA->current_loop, or NULL if something
1054    goes wrong.  */
1055
1056 static class tree_niter_desc *
1057 niter_for_single_dom_exit (struct ivopts_data *data)
1058 {
1059   edge exit = single_dom_exit (data->current_loop);
1060
1061   if (!exit)
1062     return NULL;
1063
1064   return niter_for_exit (data, exit);
1065 }
1066
1067 /* Initializes data structures used by the iv optimization pass, stored
1068    in DATA.  */
1069
1070 static void
1071 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1072 {
1073   data->version_info_size = 2 * num_ssa_names;
1074   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1075   data->relevant = BITMAP_ALLOC (NULL);
1076   data->important_candidates = BITMAP_ALLOC (NULL);
1077   data->max_inv_var_id = 0;
1078   data->max_inv_expr_id = 0;
1079   data->niters = NULL;
1080   data->vgroups.create (20);
1081   data->vcands.create (20);
1082   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1083   data->name_expansion_cache = NULL;
1084   data->base_object_map = NULL;
1085   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1086   data->iv_common_cands.create (20);
1087   decl_rtl_to_reset.create (20);
1088   gcc_obstack_init (&data->iv_obstack);
1089 }
1090
1091 /* walk_tree callback for determine_base_object.  */
1092
1093 static tree
1094 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1095 {
1096   tree_code code = TREE_CODE (*tp);
1097   tree obj = NULL_TREE;
1098   if (code == ADDR_EXPR)
1099     {
1100       tree base = get_base_address (TREE_OPERAND (*tp, 0));
1101       if (!base)
1102         obj = *tp;
1103       else if (TREE_CODE (base) != MEM_REF)
1104         obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1105     }
1106   else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1107         obj = fold_convert (ptr_type_node, *tp);
1108
1109   if (!obj)
1110     {
1111       if (!EXPR_P (*tp))
1112         *walk_subtrees = 0;
1113
1114       return NULL_TREE;
1115     }
1116   /* Record special node for multiple base objects and stop.  */
1117   if (*static_cast<tree *> (wdata))
1118     {
1119       *static_cast<tree *> (wdata) = integer_zero_node;
1120       return integer_zero_node;
1121     }
1122   /* Record the base object and continue looking.  */
1123   *static_cast<tree *> (wdata) = obj;
1124   return NULL_TREE;
1125 }
1126
1127 /* Returns a memory object to that EXPR points with caching.  Return NULL if we
1128    are able to determine that it does not point to any such object; specially
1129    return integer_zero_node if EXPR contains multiple base objects.  */
1130
1131 static tree
1132 determine_base_object (struct ivopts_data *data, tree expr)
1133 {
1134   tree *slot, obj = NULL_TREE;
1135   if (data->base_object_map)
1136     {
1137       if ((slot = data->base_object_map->get(expr)) != NULL)
1138         return *slot;
1139     }
1140   else
1141     data->base_object_map = new hash_map<tree, tree>;
1142
1143   (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1144   data->base_object_map->put (expr, obj);
1145   return obj;
1146 }
1147
1148 /* Return true if address expression with non-DECL_P operand appears
1149    in EXPR.  */
1150
1151 static bool
1152 contain_complex_addr_expr (tree expr)
1153 {
1154   bool res = false;
1155
1156   STRIP_NOPS (expr);
1157   switch (TREE_CODE (expr))
1158     {
1159     case POINTER_PLUS_EXPR:
1160     case PLUS_EXPR:
1161     case MINUS_EXPR:
1162       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1163       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1164       break;
1165
1166     case ADDR_EXPR:
1167       return (!DECL_P (TREE_OPERAND (expr, 0)));
1168
1169     default:
1170       return false;
1171     }
1172
1173   return res;
1174 }
1175
1176 /* Allocates an induction variable with given initial value BASE and step STEP
1177    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1178
1179 static struct iv *
1180 alloc_iv (struct ivopts_data *data, tree base, tree step,
1181           bool no_overflow = false)
1182 {
1183   tree expr = base;
1184   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1185                                               sizeof (struct iv));
1186   gcc_assert (step != NULL_TREE);
1187
1188   /* Lower address expression in base except ones with DECL_P as operand.
1189      By doing this:
1190        1) More accurate cost can be computed for address expressions;
1191        2) Duplicate candidates won't be created for bases in different
1192           forms, like &a[0] and &a.  */
1193   STRIP_NOPS (expr);
1194   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1195       || contain_complex_addr_expr (expr))
1196     {
1197       aff_tree comb;
1198       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1199       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1200     }
1201
1202   iv->base = base;
1203   iv->base_object = determine_base_object (data, base);
1204   iv->step = step;
1205   iv->biv_p = false;
1206   iv->nonlin_use = NULL;
1207   iv->ssa_name = NULL_TREE;
1208   if (!no_overflow
1209        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1210                               base, step))
1211     no_overflow = true;
1212   iv->no_overflow = no_overflow;
1213   iv->have_address_use = false;
1214
1215   return iv;
1216 }
1217
1218 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1219    doesn't overflow.  */
1220
1221 static void
1222 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1223         bool no_overflow)
1224 {
1225   struct version_info *info = name_info (data, iv);
1226
1227   gcc_assert (!info->iv);
1228
1229   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1230   info->iv = alloc_iv (data, base, step, no_overflow);
1231   info->iv->ssa_name = iv;
1232 }
1233
1234 /* Finds induction variable declaration for VAR.  */
1235
1236 static struct iv *
1237 get_iv (struct ivopts_data *data, tree var)
1238 {
1239   basic_block bb;
1240   tree type = TREE_TYPE (var);
1241
1242   if (!POINTER_TYPE_P (type)
1243       && !INTEGRAL_TYPE_P (type))
1244     return NULL;
1245
1246   if (!name_info (data, var)->iv)
1247     {
1248       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1249
1250       if (!bb
1251           || !flow_bb_inside_loop_p (data->current_loop, bb))
1252         {
1253           if (POINTER_TYPE_P (type))
1254             type = sizetype;
1255           set_iv (data, var, var, build_int_cst (type, 0), true);
1256         }
1257     }
1258
1259   return name_info (data, var)->iv;
1260 }
1261
1262 /* Return the first non-invariant ssa var found in EXPR.  */
1263
1264 static tree
1265 extract_single_var_from_expr (tree expr)
1266 {
1267   int i, n;
1268   tree tmp;
1269   enum tree_code code;
1270
1271   if (!expr || is_gimple_min_invariant (expr))
1272     return NULL;
1273
1274   code = TREE_CODE (expr);
1275   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1276     {
1277       n = TREE_OPERAND_LENGTH (expr);
1278       for (i = 0; i < n; i++)
1279         {
1280           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1281
1282           if (tmp)
1283             return tmp;
1284         }
1285     }
1286   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1287 }
1288
1289 /* Finds basic ivs.  */
1290
1291 static bool
1292 find_bivs (struct ivopts_data *data)
1293 {
1294   gphi *phi;
1295   affine_iv iv;
1296   tree step, type, base, stop;
1297   bool found = false;
1298   class loop *loop = data->current_loop;
1299   gphi_iterator psi;
1300
1301   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1302     {
1303       phi = psi.phi ();
1304
1305       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1306         continue;
1307
1308       if (virtual_operand_p (PHI_RESULT (phi)))
1309         continue;
1310
1311       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1312         continue;
1313
1314       if (integer_zerop (iv.step))
1315         continue;
1316
1317       step = iv.step;
1318       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1319       /* Stop expanding iv base at the first ssa var referred by iv step.
1320          Ideally we should stop at any ssa var, because that's expensive
1321          and unusual to happen, we just do it on the first one.
1322
1323          See PR64705 for the rationale.  */
1324       stop = extract_single_var_from_expr (step);
1325       base = expand_simple_operations (base, stop);
1326       if (contains_abnormal_ssa_name_p (base)
1327           || contains_abnormal_ssa_name_p (step))
1328         continue;
1329
1330       type = TREE_TYPE (PHI_RESULT (phi));
1331       base = fold_convert (type, base);
1332       if (step)
1333         {
1334           if (POINTER_TYPE_P (type))
1335             step = convert_to_ptrofftype (step);
1336           else
1337             step = fold_convert (type, step);
1338         }
1339
1340       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1341       found = true;
1342     }
1343
1344   return found;
1345 }
1346
1347 /* Marks basic ivs.  */
1348
1349 static void
1350 mark_bivs (struct ivopts_data *data)
1351 {
1352   gphi *phi;
1353   gimple *def;
1354   tree var;
1355   struct iv *iv, *incr_iv;
1356   class loop *loop = data->current_loop;
1357   basic_block incr_bb;
1358   gphi_iterator psi;
1359
1360   data->bivs_not_used_in_addr = 0;
1361   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1362     {
1363       phi = psi.phi ();
1364
1365       iv = get_iv (data, PHI_RESULT (phi));
1366       if (!iv)
1367         continue;
1368
1369       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1370       def = SSA_NAME_DEF_STMT (var);
1371       /* Don't mark iv peeled from other one as biv.  */
1372       if (def
1373           && gimple_code (def) == GIMPLE_PHI
1374           && gimple_bb (def) == loop->header)
1375         continue;
1376
1377       incr_iv = get_iv (data, var);
1378       if (!incr_iv)
1379         continue;
1380
1381       /* If the increment is in the subloop, ignore it.  */
1382       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1383       if (incr_bb->loop_father != data->current_loop
1384           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1385         continue;
1386
1387       iv->biv_p = true;
1388       incr_iv->biv_p = true;
1389       if (iv->no_overflow)
1390         data->bivs_not_used_in_addr++;
1391       if (incr_iv->no_overflow)
1392         data->bivs_not_used_in_addr++;
1393     }
1394 }
1395
1396 /* Checks whether STMT defines a linear induction variable and stores its
1397    parameters to IV.  */
1398
1399 static bool
1400 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1401 {
1402   tree lhs, stop;
1403   class loop *loop = data->current_loop;
1404
1405   iv->base = NULL_TREE;
1406   iv->step = NULL_TREE;
1407
1408   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1409     return false;
1410
1411   lhs = gimple_assign_lhs (stmt);
1412   if (TREE_CODE (lhs) != SSA_NAME)
1413     return false;
1414
1415   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1416     return false;
1417
1418   /* Stop expanding iv base at the first ssa var referred by iv step.
1419      Ideally we should stop at any ssa var, because that's expensive
1420      and unusual to happen, we just do it on the first one.
1421
1422      See PR64705 for the rationale.  */
1423   stop = extract_single_var_from_expr (iv->step);
1424   iv->base = expand_simple_operations (iv->base, stop);
1425   if (contains_abnormal_ssa_name_p (iv->base)
1426       || contains_abnormal_ssa_name_p (iv->step))
1427     return false;
1428
1429   /* If STMT could throw, then do not consider STMT as defining a GIV.
1430      While this will suppress optimizations, we cannot safely delete this
1431      GIV and associated statements, even if it appears it is not used.  */
1432   if (stmt_could_throw_p (cfun, stmt))
1433     return false;
1434
1435   return true;
1436 }
1437
1438 /* Finds general ivs in statement STMT.  */
1439
1440 static void
1441 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1442 {
1443   affine_iv iv;
1444
1445   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1446     return;
1447
1448   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1449 }
1450
1451 /* Finds general ivs in basic block BB.  */
1452
1453 static void
1454 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1455 {
1456   gimple_stmt_iterator bsi;
1457
1458   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1459     find_givs_in_stmt (data, gsi_stmt (bsi));
1460 }
1461
1462 /* Finds general ivs.  */
1463
1464 static void
1465 find_givs (struct ivopts_data *data)
1466 {
1467   class loop *loop = data->current_loop;
1468   basic_block *body = get_loop_body_in_dom_order (loop);
1469   unsigned i;
1470
1471   for (i = 0; i < loop->num_nodes; i++)
1472     find_givs_in_bb (data, body[i]);
1473   free (body);
1474 }
1475
1476 /* For each ssa name defined in LOOP determines whether it is an induction
1477    variable and if so, its initial value and step.  */
1478
1479 static bool
1480 find_induction_variables (struct ivopts_data *data)
1481 {
1482   unsigned i;
1483   bitmap_iterator bi;
1484
1485   if (!find_bivs (data))
1486     return false;
1487
1488   find_givs (data);
1489   mark_bivs (data);
1490
1491   if (dump_file && (dump_flags & TDF_DETAILS))
1492     {
1493       class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1494
1495       if (niter)
1496         {
1497           fprintf (dump_file, "  number of iterations ");
1498           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1499           if (!integer_zerop (niter->may_be_zero))
1500             {
1501               fprintf (dump_file, "; zero if ");
1502               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1503             }
1504           fprintf (dump_file, "\n");
1505         };
1506
1507       fprintf (dump_file, "\n<Induction Vars>:\n");
1508       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1509         {
1510           struct version_info *info = ver_info (data, i);
1511           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1512             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1513         }
1514     }
1515
1516   return true;
1517 }
1518
1519 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1520    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1521    is the const offset stripped from IV base and MEM_TYPE is the type
1522    of the memory being addressed.  For uses of other types, ADDR_BASE
1523    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1524
1525 static struct iv_use *
1526 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1527             gimple *stmt, enum use_type type, tree mem_type,
1528             tree addr_base, poly_uint64 addr_offset)
1529 {
1530   struct iv_use *use = XCNEW (struct iv_use);
1531
1532   use->id = group->vuses.length ();
1533   use->group_id = group->id;
1534   use->type = type;
1535   use->mem_type = mem_type;
1536   use->iv = iv;
1537   use->stmt = stmt;
1538   use->op_p = use_p;
1539   use->addr_base = addr_base;
1540   use->addr_offset = addr_offset;
1541
1542   group->vuses.safe_push (use);
1543   return use;
1544 }
1545
1546 /* Checks whether OP is a loop-level invariant and if so, records it.
1547    NONLINEAR_USE is true if the invariant is used in a way we do not
1548    handle specially.  */
1549
1550 static void
1551 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1552 {
1553   basic_block bb;
1554   struct version_info *info;
1555
1556   if (TREE_CODE (op) != SSA_NAME
1557       || virtual_operand_p (op))
1558     return;
1559
1560   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1561   if (bb
1562       && flow_bb_inside_loop_p (data->current_loop, bb))
1563     return;
1564
1565   info = name_info (data, op);
1566   info->name = op;
1567   info->has_nonlin_use |= nonlinear_use;
1568   if (!info->inv_id)
1569     info->inv_id = ++data->max_inv_var_id;
1570   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1571 }
1572
1573 /* Record a group of TYPE.  */
1574
1575 static struct iv_group *
1576 record_group (struct ivopts_data *data, enum use_type type)
1577 {
1578   struct iv_group *group = XCNEW (struct iv_group);
1579
1580   group->id = data->vgroups.length ();
1581   group->type = type;
1582   group->related_cands = BITMAP_ALLOC (NULL);
1583   group->vuses.create (1);
1584   group->doloop_p = false;
1585
1586   data->vgroups.safe_push (group);
1587   return group;
1588 }
1589
1590 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1591    New group will be created if there is no existing group for the use.
1592    MEM_TYPE is the type of memory being addressed, or NULL if this
1593    isn't an address reference.  */
1594
1595 static struct iv_use *
1596 record_group_use (struct ivopts_data *data, tree *use_p,
1597                   struct iv *iv, gimple *stmt, enum use_type type,
1598                   tree mem_type)
1599 {
1600   tree addr_base = NULL;
1601   struct iv_group *group = NULL;
1602   poly_uint64 addr_offset = 0;
1603
1604   /* Record non address type use in a new group.  */
1605   if (address_p (type))
1606     {
1607       unsigned int i;
1608
1609       addr_base = strip_offset (iv->base, &addr_offset);
1610       for (i = 0; i < data->vgroups.length (); i++)
1611         {
1612           struct iv_use *use;
1613
1614           group = data->vgroups[i];
1615           use = group->vuses[0];
1616           if (!address_p (use->type))
1617             continue;
1618
1619           /* Check if it has the same stripped base and step.  */
1620           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1621               && operand_equal_p (iv->step, use->iv->step, 0)
1622               && operand_equal_p (addr_base, use->addr_base, 0))
1623             break;
1624         }
1625       if (i == data->vgroups.length ())
1626         group = NULL;
1627     }
1628
1629   if (!group)
1630     group = record_group (data, type);
1631
1632   return record_use (group, use_p, iv, stmt, type, mem_type,
1633                      addr_base, addr_offset);
1634 }
1635
1636 /* Checks whether the use OP is interesting and if so, records it.  */
1637
1638 static struct iv_use *
1639 find_interesting_uses_op (struct ivopts_data *data, tree op)
1640 {
1641   struct iv *iv;
1642   gimple *stmt;
1643   struct iv_use *use;
1644
1645   if (TREE_CODE (op) != SSA_NAME)
1646     return NULL;
1647
1648   iv = get_iv (data, op);
1649   if (!iv)
1650     return NULL;
1651
1652   if (iv->nonlin_use)
1653     {
1654       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1655       return iv->nonlin_use;
1656     }
1657
1658   if (integer_zerop (iv->step))
1659     {
1660       record_invariant (data, op, true);
1661       return NULL;
1662     }
1663
1664   stmt = SSA_NAME_DEF_STMT (op);
1665   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1666
1667   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1668   iv->nonlin_use = use;
1669   return use;
1670 }
1671
1672 /* Indicate how compare type iv_use can be handled.  */
1673 enum comp_iv_rewrite
1674 {
1675   COMP_IV_NA,
1676   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1677   COMP_IV_EXPR,
1678   /* We may rewrite compare type iv_uses on both sides of comparison by
1679      expressing value of each iv_use.  */
1680   COMP_IV_EXPR_2,
1681   /* We may rewrite compare type iv_use by expressing value of the iv_use
1682      or by eliminating it with other iv_cand.  */
1683   COMP_IV_ELIM
1684 };
1685
1686 /* Given a condition in statement STMT, checks whether it is a compare
1687    of an induction variable and an invariant.  If this is the case,
1688    CONTROL_VAR is set to location of the iv, BOUND to the location of
1689    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1690    induction variable descriptions, and true is returned.  If this is not
1691    the case, CONTROL_VAR and BOUND are set to the arguments of the
1692    condition and false is returned.  */
1693
1694 static enum comp_iv_rewrite
1695 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1696                        tree **control_var, tree **bound,
1697                        struct iv **iv_var, struct iv **iv_bound)
1698 {
1699   /* The objects returned when COND has constant operands.  */
1700   static struct iv const_iv;
1701   static tree zero;
1702   tree *op0 = &zero, *op1 = &zero;
1703   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1704   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1705
1706   if (gimple_code (stmt) == GIMPLE_COND)
1707     {
1708       gcond *cond_stmt = as_a <gcond *> (stmt);
1709       op0 = gimple_cond_lhs_ptr (cond_stmt);
1710       op1 = gimple_cond_rhs_ptr (cond_stmt);
1711     }
1712   else
1713     {
1714       op0 = gimple_assign_rhs1_ptr (stmt);
1715       op1 = gimple_assign_rhs2_ptr (stmt);
1716     }
1717
1718   zero = integer_zero_node;
1719   const_iv.step = integer_zero_node;
1720
1721   if (TREE_CODE (*op0) == SSA_NAME)
1722     iv0 = get_iv (data, *op0);
1723   if (TREE_CODE (*op1) == SSA_NAME)
1724     iv1 = get_iv (data, *op1);
1725
1726   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1727   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1728     {
1729       rewrite_type = COMP_IV_EXPR_2;
1730       goto end;
1731     }
1732
1733   /* If none side of comparison is IV.  */
1734   if ((!iv0 || integer_zerop (iv0->step))
1735       && (!iv1 || integer_zerop (iv1->step)))
1736     goto end;
1737
1738   /* Control variable may be on the other side.  */
1739   if (!iv0 || integer_zerop (iv0->step))
1740     {
1741       std::swap (op0, op1);
1742       std::swap (iv0, iv1);
1743     }
1744   /* If one side is IV and the other side isn't loop invariant.  */
1745   if (!iv1)
1746     rewrite_type = COMP_IV_EXPR;
1747   /* If one side is IV and the other side is loop invariant.  */
1748   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1749     rewrite_type = COMP_IV_ELIM;
1750
1751 end:
1752   if (control_var)
1753     *control_var = op0;
1754   if (iv_var)
1755     *iv_var = iv0;
1756   if (bound)
1757     *bound = op1;
1758   if (iv_bound)
1759     *iv_bound = iv1;
1760
1761   return rewrite_type;
1762 }
1763
1764 /* Checks whether the condition in STMT is interesting and if so,
1765    records it.  */
1766
1767 static void
1768 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1769 {
1770   tree *var_p, *bound_p;
1771   struct iv *var_iv, *bound_iv;
1772   enum comp_iv_rewrite ret;
1773
1774   ret = extract_cond_operands (data, stmt,
1775                                &var_p, &bound_p, &var_iv, &bound_iv);
1776   if (ret == COMP_IV_NA)
1777     {
1778       find_interesting_uses_op (data, *var_p);
1779       find_interesting_uses_op (data, *bound_p);
1780       return;
1781     }
1782
1783   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1784   /* Record compare type iv_use for iv on the other side of comparison.  */
1785   if (ret == COMP_IV_EXPR_2)
1786     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1787 }
1788
1789 /* Returns the outermost loop EXPR is obviously invariant in
1790    relative to the loop LOOP, i.e. if all its operands are defined
1791    outside of the returned loop.  Returns NULL if EXPR is not
1792    even obviously invariant in LOOP.  */
1793
1794 class loop *
1795 outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1796 {
1797   basic_block def_bb;
1798   unsigned i, len;
1799
1800   if (is_gimple_min_invariant (expr))
1801     return current_loops->tree_root;
1802
1803   if (TREE_CODE (expr) == SSA_NAME)
1804     {
1805       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1806       if (def_bb)
1807         {
1808           if (flow_bb_inside_loop_p (loop, def_bb))
1809             return NULL;
1810           return superloop_at_depth (loop,
1811                                      loop_depth (def_bb->loop_father) + 1);
1812         }
1813
1814       return current_loops->tree_root;
1815     }
1816
1817   if (!EXPR_P (expr))
1818     return NULL;
1819
1820   unsigned maxdepth = 0;
1821   len = TREE_OPERAND_LENGTH (expr);
1822   for (i = 0; i < len; i++)
1823     {
1824       class loop *ivloop;
1825       if (!TREE_OPERAND (expr, i))
1826         continue;
1827
1828       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1829       if (!ivloop)
1830         return NULL;
1831       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1832     }
1833
1834   return superloop_at_depth (loop, maxdepth);
1835 }
1836
1837 /* Returns true if expression EXPR is obviously invariant in LOOP,
1838    i.e. if all its operands are defined outside of the LOOP.  LOOP
1839    should not be the function body.  */
1840
1841 bool
1842 expr_invariant_in_loop_p (class loop *loop, tree expr)
1843 {
1844   basic_block def_bb;
1845   unsigned i, len;
1846
1847   gcc_assert (loop_depth (loop) > 0);
1848
1849   if (is_gimple_min_invariant (expr))
1850     return true;
1851
1852   if (TREE_CODE (expr) == SSA_NAME)
1853     {
1854       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1855       if (def_bb
1856           && flow_bb_inside_loop_p (loop, def_bb))
1857         return false;
1858
1859       return true;
1860     }
1861
1862   if (!EXPR_P (expr))
1863     return false;
1864
1865   len = TREE_OPERAND_LENGTH (expr);
1866   for (i = 0; i < len; i++)
1867     if (TREE_OPERAND (expr, i)
1868         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1869       return false;
1870
1871   return true;
1872 }
1873
1874 /* Given expression EXPR which computes inductive values with respect
1875    to loop recorded in DATA, this function returns biv from which EXPR
1876    is derived by tracing definition chains of ssa variables in EXPR.  */
1877
1878 static struct iv*
1879 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1880 {
1881   struct iv *iv;
1882   unsigned i, n;
1883   tree e2, e1;
1884   enum tree_code code;
1885   gimple *stmt;
1886
1887   if (expr == NULL_TREE)
1888     return NULL;
1889
1890   if (is_gimple_min_invariant (expr))
1891     return NULL;
1892
1893   code = TREE_CODE (expr);
1894   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1895     {
1896       n = TREE_OPERAND_LENGTH (expr);
1897       for (i = 0; i < n; i++)
1898         {
1899           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1900           if (iv)
1901             return iv;
1902         }
1903     }
1904
1905   /* Stop if it's not ssa name.  */
1906   if (code != SSA_NAME)
1907     return NULL;
1908
1909   iv = get_iv (data, expr);
1910   if (!iv || integer_zerop (iv->step))
1911     return NULL;
1912   else if (iv->biv_p)
1913     return iv;
1914
1915   stmt = SSA_NAME_DEF_STMT (expr);
1916   if (gphi *phi = dyn_cast <gphi *> (stmt))
1917     {
1918       ssa_op_iter iter;
1919       use_operand_p use_p;
1920       basic_block phi_bb = gimple_bb (phi);
1921
1922       /* Skip loop header PHI that doesn't define biv.  */
1923       if (phi_bb->loop_father == data->current_loop)
1924         return NULL;
1925
1926       if (virtual_operand_p (gimple_phi_result (phi)))
1927         return NULL;
1928
1929       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1930         {
1931           tree use = USE_FROM_PTR (use_p);
1932           iv = find_deriving_biv_for_expr (data, use);
1933           if (iv)
1934             return iv;
1935         }
1936       return NULL;
1937     }
1938   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1939     return NULL;
1940
1941   e1 = gimple_assign_rhs1 (stmt);
1942   code = gimple_assign_rhs_code (stmt);
1943   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1944     return find_deriving_biv_for_expr (data, e1);
1945
1946   switch (code)
1947     {
1948     case MULT_EXPR:
1949     case PLUS_EXPR:
1950     case MINUS_EXPR:
1951     case POINTER_PLUS_EXPR:
1952       /* Increments, decrements and multiplications by a constant
1953          are simple.  */
1954       e2 = gimple_assign_rhs2 (stmt);
1955       iv = find_deriving_biv_for_expr (data, e2);
1956       if (iv)
1957         return iv;
1958       gcc_fallthrough ();
1959
1960     CASE_CONVERT:
1961       /* Casts are simple.  */
1962       return find_deriving_biv_for_expr (data, e1);
1963
1964     default:
1965       break;
1966     }
1967
1968   return NULL;
1969 }
1970
1971 /* Record BIV, its predecessor and successor that they are used in
1972    address type uses.  */
1973
1974 static void
1975 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1976 {
1977   unsigned i;
1978   tree type, base_1, base_2;
1979   bitmap_iterator bi;
1980
1981   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1982       || biv->have_address_use || !biv->no_overflow)
1983     return;
1984
1985   type = TREE_TYPE (biv->base);
1986   if (!INTEGRAL_TYPE_P (type))
1987     return;
1988
1989   biv->have_address_use = true;
1990   data->bivs_not_used_in_addr--;
1991   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1992   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1993     {
1994       struct iv *iv = ver_info (data, i)->iv;
1995
1996       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1997           || iv->have_address_use || !iv->no_overflow)
1998         continue;
1999
2000       if (type != TREE_TYPE (iv->base)
2001           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2002         continue;
2003
2004       if (!operand_equal_p (biv->step, iv->step, 0))
2005         continue;
2006
2007       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2008       if (operand_equal_p (base_1, iv->base, 0)
2009           || operand_equal_p (base_2, biv->base, 0))
2010         {
2011           iv->have_address_use = true;
2012           data->bivs_not_used_in_addr--;
2013         }
2014     }
2015 }
2016
2017 /* Cumulates the steps of indices into DATA and replaces their values with the
2018    initial ones.  Returns false when the value of the index cannot be determined.
2019    Callback for for_each_index.  */
2020
2021 struct ifs_ivopts_data
2022 {
2023   struct ivopts_data *ivopts_data;
2024   gimple *stmt;
2025   tree step;
2026 };
2027
2028 static bool
2029 idx_find_step (tree base, tree *idx, void *data)
2030 {
2031   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2032   struct iv *iv;
2033   bool use_overflow_semantics = false;
2034   tree step, iv_base, iv_step, lbound, off;
2035   class loop *loop = dta->ivopts_data->current_loop;
2036
2037   /* If base is a component ref, require that the offset of the reference
2038      be invariant.  */
2039   if (TREE_CODE (base) == COMPONENT_REF)
2040     {
2041       off = component_ref_field_offset (base);
2042       return expr_invariant_in_loop_p (loop, off);
2043     }
2044
2045   /* If base is array, first check whether we will be able to move the
2046      reference out of the loop (in order to take its address in strength
2047      reduction).  In order for this to work we need both lower bound
2048      and step to be loop invariants.  */
2049   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2050     {
2051       /* Moreover, for a range, the size needs to be invariant as well.  */
2052       if (TREE_CODE (base) == ARRAY_RANGE_REF
2053           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2054         return false;
2055
2056       step = array_ref_element_size (base);
2057       lbound = array_ref_low_bound (base);
2058
2059       if (!expr_invariant_in_loop_p (loop, step)
2060           || !expr_invariant_in_loop_p (loop, lbound))
2061         return false;
2062     }
2063
2064   if (TREE_CODE (*idx) != SSA_NAME)
2065     return true;
2066
2067   iv = get_iv (dta->ivopts_data, *idx);
2068   if (!iv)
2069     return false;
2070
2071   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2072           *&x[0], which is not folded and does not trigger the
2073           ARRAY_REF path below.  */
2074   *idx = iv->base;
2075
2076   if (integer_zerop (iv->step))
2077     return true;
2078
2079   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2080     {
2081       step = array_ref_element_size (base);
2082
2083       /* We only handle addresses whose step is an integer constant.  */
2084       if (TREE_CODE (step) != INTEGER_CST)
2085         return false;
2086     }
2087   else
2088     /* The step for pointer arithmetics already is 1 byte.  */
2089     step = size_one_node;
2090
2091   iv_base = iv->base;
2092   iv_step = iv->step;
2093   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2094     use_overflow_semantics = true;
2095
2096   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2097                             sizetype, &iv_base, &iv_step, dta->stmt,
2098                             use_overflow_semantics))
2099     {
2100       /* The index might wrap.  */
2101       return false;
2102     }
2103
2104   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2105   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2106
2107   if (dta->ivopts_data->bivs_not_used_in_addr)
2108     {
2109       if (!iv->biv_p)
2110         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2111
2112       record_biv_for_address_use (dta->ivopts_data, iv);
2113     }
2114   return true;
2115 }
2116
2117 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2118    object is passed to it in DATA.  */
2119
2120 static bool
2121 idx_record_use (tree base, tree *idx,
2122                 void *vdata)
2123 {
2124   struct ivopts_data *data = (struct ivopts_data *) vdata;
2125   find_interesting_uses_op (data, *idx);
2126   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2127     {
2128       find_interesting_uses_op (data, array_ref_element_size (base));
2129       find_interesting_uses_op (data, array_ref_low_bound (base));
2130     }
2131   return true;
2132 }
2133
2134 /* If we can prove that TOP = cst * BOT for some constant cst,
2135    store cst to MUL and return true.  Otherwise return false.
2136    The returned value is always sign-extended, regardless of the
2137    signedness of TOP and BOT.  */
2138
2139 static bool
2140 constant_multiple_of (tree top, tree bot, widest_int *mul)
2141 {
2142   tree mby;
2143   enum tree_code code;
2144   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2145   widest_int res, p0, p1;
2146
2147   STRIP_NOPS (top);
2148   STRIP_NOPS (bot);
2149
2150   if (operand_equal_p (top, bot, 0))
2151     {
2152       *mul = 1;
2153       return true;
2154     }
2155
2156   code = TREE_CODE (top);
2157   switch (code)
2158     {
2159     case MULT_EXPR:
2160       mby = TREE_OPERAND (top, 1);
2161       if (TREE_CODE (mby) != INTEGER_CST)
2162         return false;
2163
2164       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2165         return false;
2166
2167       *mul = wi::sext (res * wi::to_widest (mby), precision);
2168       return true;
2169
2170     case PLUS_EXPR:
2171     case MINUS_EXPR:
2172       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2173           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2174         return false;
2175
2176       if (code == MINUS_EXPR)
2177         p1 = -p1;
2178       *mul = wi::sext (p0 + p1, precision);
2179       return true;
2180
2181     case INTEGER_CST:
2182       if (TREE_CODE (bot) != INTEGER_CST)
2183         return false;
2184
2185       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2186       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2187       if (p1 == 0)
2188         return false;
2189       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2190       return res == 0;
2191
2192     default:
2193       if (POLY_INT_CST_P (top)
2194           && POLY_INT_CST_P (bot)
2195           && constant_multiple_p (wi::to_poly_widest (top),
2196                                   wi::to_poly_widest (bot), mul))
2197         return true;
2198
2199       return false;
2200     }
2201 }
2202
2203 /* Return true if memory reference REF with step STEP may be unaligned.  */
2204
2205 static bool
2206 may_be_unaligned_p (tree ref, tree step)
2207 {
2208   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2209      thus they are not misaligned.  */
2210   if (TREE_CODE (ref) == TARGET_MEM_REF)
2211     return false;
2212
2213   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2214   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2215     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2216
2217   unsigned HOST_WIDE_INT bitpos;
2218   unsigned int ref_align;
2219   get_object_alignment_1 (ref, &ref_align, &bitpos);
2220   if (ref_align < align
2221       || (bitpos % align) != 0
2222       || (bitpos % BITS_PER_UNIT) != 0)
2223     return true;
2224
2225   unsigned int trailing_zeros = tree_ctz (step);
2226   if (trailing_zeros < HOST_BITS_PER_INT
2227       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2228     return true;
2229
2230   return false;
2231 }
2232
2233 /* Return true if EXPR may be non-addressable.   */
2234
2235 bool
2236 may_be_nonaddressable_p (tree expr)
2237 {
2238   switch (TREE_CODE (expr))
2239     {
2240     case VAR_DECL:
2241       /* Check if it's a register variable.  */
2242       return DECL_HARD_REGISTER (expr);
2243
2244     case TARGET_MEM_REF:
2245       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2246          target, thus they are always addressable.  */
2247       return false;
2248
2249     case MEM_REF:
2250       /* Likewise for MEM_REFs, modulo the storage order.  */
2251       return REF_REVERSE_STORAGE_ORDER (expr);
2252
2253     case BIT_FIELD_REF:
2254       if (REF_REVERSE_STORAGE_ORDER (expr))
2255         return true;
2256       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2257
2258     case COMPONENT_REF:
2259       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2260         return true;
2261       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2262              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2263
2264     case ARRAY_REF:
2265     case ARRAY_RANGE_REF:
2266       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2267         return true;
2268       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2269
2270     case VIEW_CONVERT_EXPR:
2271       /* This kind of view-conversions may wrap non-addressable objects
2272          and make them look addressable.  After some processing the
2273          non-addressability may be uncovered again, causing ADDR_EXPRs
2274          of inappropriate objects to be built.  */
2275       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2276           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2277         return true;
2278       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2279
2280     CASE_CONVERT:
2281       return true;
2282
2283     default:
2284       break;
2285     }
2286
2287   return false;
2288 }
2289
2290 /* Finds addresses in *OP_P inside STMT.  */
2291
2292 static void
2293 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2294                                tree *op_p)
2295 {
2296   tree base = *op_p, step = size_zero_node;
2297   struct iv *civ;
2298   struct ifs_ivopts_data ifs_ivopts_data;
2299
2300   /* Do not play with volatile memory references.  A bit too conservative,
2301      perhaps, but safe.  */
2302   if (gimple_has_volatile_ops (stmt))
2303     goto fail;
2304
2305   /* Ignore bitfields for now.  Not really something terribly complicated
2306      to handle.  TODO.  */
2307   if (TREE_CODE (base) == BIT_FIELD_REF)
2308     goto fail;
2309
2310   base = unshare_expr (base);
2311
2312   if (TREE_CODE (base) == TARGET_MEM_REF)
2313     {
2314       tree type = build_pointer_type (TREE_TYPE (base));
2315       tree astep;
2316
2317       if (TMR_BASE (base)
2318           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2319         {
2320           civ = get_iv (data, TMR_BASE (base));
2321           if (!civ)
2322             goto fail;
2323
2324           TMR_BASE (base) = civ->base;
2325           step = civ->step;
2326         }
2327       if (TMR_INDEX2 (base)
2328           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2329         {
2330           civ = get_iv (data, TMR_INDEX2 (base));
2331           if (!civ)
2332             goto fail;
2333
2334           TMR_INDEX2 (base) = civ->base;
2335           step = civ->step;
2336         }
2337       if (TMR_INDEX (base)
2338           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2339         {
2340           civ = get_iv (data, TMR_INDEX (base));
2341           if (!civ)
2342             goto fail;
2343
2344           TMR_INDEX (base) = civ->base;
2345           astep = civ->step;
2346
2347           if (astep)
2348             {
2349               if (TMR_STEP (base))
2350                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2351
2352               step = fold_build2 (PLUS_EXPR, type, step, astep);
2353             }
2354         }
2355
2356       if (integer_zerop (step))
2357         goto fail;
2358       base = tree_mem_ref_addr (type, base);
2359     }
2360   else
2361     {
2362       ifs_ivopts_data.ivopts_data = data;
2363       ifs_ivopts_data.stmt = stmt;
2364       ifs_ivopts_data.step = size_zero_node;
2365       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2366           || integer_zerop (ifs_ivopts_data.step))
2367         goto fail;
2368       step = ifs_ivopts_data.step;
2369
2370       /* Check that the base expression is addressable.  This needs
2371          to be done after substituting bases of IVs into it.  */
2372       if (may_be_nonaddressable_p (base))
2373         goto fail;
2374
2375       /* Moreover, on strict alignment platforms, check that it is
2376          sufficiently aligned.  */
2377       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2378         goto fail;
2379
2380       base = build_fold_addr_expr (base);
2381
2382       /* Substituting bases of IVs into the base expression might
2383          have caused folding opportunities.  */
2384       if (TREE_CODE (base) == ADDR_EXPR)
2385         {
2386           tree *ref = &TREE_OPERAND (base, 0);
2387           while (handled_component_p (*ref))
2388             ref = &TREE_OPERAND (*ref, 0);
2389           if (TREE_CODE (*ref) == MEM_REF)
2390             {
2391               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2392                                       TREE_OPERAND (*ref, 0),
2393                                       TREE_OPERAND (*ref, 1));
2394               if (tem)
2395                 *ref = tem;
2396             }
2397         }
2398     }
2399
2400   civ = alloc_iv (data, base, step);
2401   /* Fail if base object of this memory reference is unknown.  */
2402   if (civ->base_object == NULL_TREE)
2403     goto fail;
2404
2405   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2406   return;
2407
2408 fail:
2409   for_each_index (op_p, idx_record_use, data);
2410 }
2411
2412 /* Finds and records invariants used in STMT.  */
2413
2414 static void
2415 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2416 {
2417   ssa_op_iter iter;
2418   use_operand_p use_p;
2419   tree op;
2420
2421   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2422     {
2423       op = USE_FROM_PTR (use_p);
2424       record_invariant (data, op, false);
2425     }
2426 }
2427
2428 /* CALL calls an internal function.  If operand *OP_P will become an
2429    address when the call is expanded, return the type of the memory
2430    being addressed, otherwise return null.  */
2431
2432 static tree
2433 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2434 {
2435   switch (gimple_call_internal_fn (call))
2436     {
2437     case IFN_MASK_LOAD:
2438     case IFN_MASK_LOAD_LANES:
2439       if (op_p == gimple_call_arg_ptr (call, 0))
2440         return TREE_TYPE (gimple_call_lhs (call));
2441       return NULL_TREE;
2442
2443     case IFN_MASK_STORE:
2444     case IFN_MASK_STORE_LANES:
2445       if (op_p == gimple_call_arg_ptr (call, 0))
2446         return TREE_TYPE (gimple_call_arg (call, 3));
2447       return NULL_TREE;
2448
2449     default:
2450       return NULL_TREE;
2451     }
2452 }
2453
2454 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2455    Return true if the operand will become an address when STMT
2456    is expanded and record the associated address use if so.  */
2457
2458 static bool
2459 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2460                        struct iv *iv)
2461 {
2462   /* Fail if base object of this memory reference is unknown.  */
2463   if (iv->base_object == NULL_TREE)
2464     return false;
2465
2466   tree mem_type = NULL_TREE;
2467   if (gcall *call = dyn_cast <gcall *> (stmt))
2468     if (gimple_call_internal_p (call))
2469       mem_type = get_mem_type_for_internal_fn (call, op_p);
2470   if (mem_type)
2471     {
2472       iv = alloc_iv (data, iv->base, iv->step);
2473       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2474       return true;
2475     }
2476   return false;
2477 }
2478
2479 /* Finds interesting uses of induction variables in the statement STMT.  */
2480
2481 static void
2482 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2483 {
2484   struct iv *iv;
2485   tree op, *lhs, *rhs;
2486   ssa_op_iter iter;
2487   use_operand_p use_p;
2488   enum tree_code code;
2489
2490   find_invariants_stmt (data, stmt);
2491
2492   if (gimple_code (stmt) == GIMPLE_COND)
2493     {
2494       find_interesting_uses_cond (data, stmt);
2495       return;
2496     }
2497
2498   if (is_gimple_assign (stmt))
2499     {
2500       lhs = gimple_assign_lhs_ptr (stmt);
2501       rhs = gimple_assign_rhs1_ptr (stmt);
2502
2503       if (TREE_CODE (*lhs) == SSA_NAME)
2504         {
2505           /* If the statement defines an induction variable, the uses are not
2506              interesting by themselves.  */
2507
2508           iv = get_iv (data, *lhs);
2509
2510           if (iv && !integer_zerop (iv->step))
2511             return;
2512         }
2513
2514       code = gimple_assign_rhs_code (stmt);
2515       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2516           && (REFERENCE_CLASS_P (*rhs)
2517               || is_gimple_val (*rhs)))
2518         {
2519           if (REFERENCE_CLASS_P (*rhs))
2520             find_interesting_uses_address (data, stmt, rhs);
2521           else
2522             find_interesting_uses_op (data, *rhs);
2523
2524           if (REFERENCE_CLASS_P (*lhs))
2525             find_interesting_uses_address (data, stmt, lhs);
2526           return;
2527         }
2528       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2529         {
2530           find_interesting_uses_cond (data, stmt);
2531           return;
2532         }
2533
2534       /* TODO -- we should also handle address uses of type
2535
2536          memory = call (whatever);
2537
2538          and
2539
2540          call (memory).  */
2541     }
2542
2543   if (gimple_code (stmt) == GIMPLE_PHI
2544       && gimple_bb (stmt) == data->current_loop->header)
2545     {
2546       iv = get_iv (data, PHI_RESULT (stmt));
2547
2548       if (iv && !integer_zerop (iv->step))
2549         return;
2550     }
2551
2552   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2553     {
2554       op = USE_FROM_PTR (use_p);
2555
2556       if (TREE_CODE (op) != SSA_NAME)
2557         continue;
2558
2559       iv = get_iv (data, op);
2560       if (!iv)
2561         continue;
2562
2563       if (!find_address_like_use (data, stmt, use_p->use, iv))
2564         find_interesting_uses_op (data, op);
2565     }
2566 }
2567
2568 /* Finds interesting uses of induction variables outside of loops
2569    on loop exit edge EXIT.  */
2570
2571 static void
2572 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2573 {
2574   gphi *phi;
2575   gphi_iterator psi;
2576   tree def;
2577
2578   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2579     {
2580       phi = psi.phi ();
2581       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2582       if (!virtual_operand_p (def))
2583         find_interesting_uses_op (data, def);
2584     }
2585 }
2586
2587 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2588    mode for memory reference represented by USE.  */
2589
2590 static GTY (()) vec<rtx, va_gc> *addr_list;
2591
2592 static bool
2593 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2594 {
2595   rtx reg, addr;
2596   unsigned list_index;
2597   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2598   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2599
2600   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2601   if (list_index >= vec_safe_length (addr_list))
2602     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2603
2604   addr = (*addr_list)[list_index];
2605   if (!addr)
2606     {
2607       addr_mode = targetm.addr_space.address_mode (as);
2608       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2609       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2610       (*addr_list)[list_index] = addr;
2611     }
2612   else
2613     addr_mode = GET_MODE (addr);
2614
2615   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2616   return (memory_address_addr_space_p (mem_mode, addr, as));
2617 }
2618
2619 /* Comparison function to sort group in ascending order of addr_offset.  */
2620
2621 static int
2622 group_compare_offset (const void *a, const void *b)
2623 {
2624   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2625   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2626
2627   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2628 }
2629
2630 /* Check if small groups should be split.  Return true if no group
2631    contains more than two uses with distinct addr_offsets.  Return
2632    false otherwise.  We want to split such groups because:
2633
2634      1) Small groups don't have much benefit and may interfer with
2635         general candidate selection.
2636      2) Size for problem with only small groups is usually small and
2637         general algorithm can handle it well.
2638
2639    TODO -- Above claim may not hold when we want to merge memory
2640    accesses with conseuctive addresses.  */
2641
2642 static bool
2643 split_small_address_groups_p (struct ivopts_data *data)
2644 {
2645   unsigned int i, j, distinct = 1;
2646   struct iv_use *pre;
2647   struct iv_group *group;
2648
2649   for (i = 0; i < data->vgroups.length (); i++)
2650     {
2651       group = data->vgroups[i];
2652       if (group->vuses.length () == 1)
2653         continue;
2654
2655       gcc_assert (address_p (group->type));
2656       if (group->vuses.length () == 2)
2657         {
2658           if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2659                                       group->vuses[1]->addr_offset) > 0)
2660             std::swap (group->vuses[0], group->vuses[1]);
2661         }
2662       else
2663         group->vuses.qsort (group_compare_offset);
2664
2665       if (distinct > 2)
2666         continue;
2667
2668       distinct = 1;
2669       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2670         {
2671           if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2672             {
2673               pre = group->vuses[j];
2674               distinct++;
2675             }
2676
2677           if (distinct > 2)
2678             break;
2679         }
2680     }
2681
2682   return (distinct <= 2);
2683 }
2684
2685 /* For each group of address type uses, this function further groups
2686    these uses according to the maximum offset supported by target's
2687    [base + offset] addressing mode.  */
2688
2689 static void
2690 split_address_groups (struct ivopts_data *data)
2691 {
2692   unsigned int i, j;
2693   /* Always split group.  */
2694   bool split_p = split_small_address_groups_p (data);
2695
2696   for (i = 0; i < data->vgroups.length (); i++)
2697     {
2698       struct iv_group *new_group = NULL;
2699       struct iv_group *group = data->vgroups[i];
2700       struct iv_use *use = group->vuses[0];
2701
2702       use->id = 0;
2703       use->group_id = group->id;
2704       if (group->vuses.length () == 1)
2705         continue;
2706
2707       gcc_assert (address_p (use->type));
2708
2709       for (j = 1; j < group->vuses.length ();)
2710         {
2711           struct iv_use *next = group->vuses[j];
2712           poly_int64 offset = next->addr_offset - use->addr_offset;
2713
2714           /* Split group if aksed to, or the offset against the first
2715              use can't fit in offset part of addressing mode.  IV uses
2716              having the same offset are still kept in one group.  */
2717           if (maybe_ne (offset, 0)
2718               && (split_p || !addr_offset_valid_p (use, offset)))
2719             {
2720               if (!new_group)
2721                 new_group = record_group (data, group->type);
2722               group->vuses.ordered_remove (j);
2723               new_group->vuses.safe_push (next);
2724               continue;
2725             }
2726
2727           next->id = j;
2728           next->group_id = group->id;
2729           j++;
2730         }
2731     }
2732 }
2733
2734 /* Finds uses of the induction variables that are interesting.  */
2735
2736 static void
2737 find_interesting_uses (struct ivopts_data *data)
2738 {
2739   basic_block bb;
2740   gimple_stmt_iterator bsi;
2741   basic_block *body = get_loop_body (data->current_loop);
2742   unsigned i;
2743   edge e;
2744
2745   for (i = 0; i < data->current_loop->num_nodes; i++)
2746     {
2747       edge_iterator ei;
2748       bb = body[i];
2749
2750       FOR_EACH_EDGE (e, ei, bb->succs)
2751         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2752             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2753           find_interesting_uses_outside (data, e);
2754
2755       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2756         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2757       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2758         if (!is_gimple_debug (gsi_stmt (bsi)))
2759           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2760     }
2761   free (body);
2762
2763   split_address_groups (data);
2764
2765   if (dump_file && (dump_flags & TDF_DETAILS))
2766     {
2767       fprintf (dump_file, "\n<IV Groups>:\n");
2768       dump_groups (dump_file, data);
2769       fprintf (dump_file, "\n");
2770     }
2771 }
2772
2773 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2774    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2775    we are at the top-level of the processed address.  */
2776
2777 static tree
2778 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2779                 poly_int64 *offset)
2780 {
2781   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2782   enum tree_code code;
2783   tree type, orig_type = TREE_TYPE (expr);
2784   poly_int64 off0, off1;
2785   HOST_WIDE_INT st;
2786   tree orig_expr = expr;
2787
2788   STRIP_NOPS (expr);
2789
2790   type = TREE_TYPE (expr);
2791   code = TREE_CODE (expr);
2792   *offset = 0;
2793
2794   switch (code)
2795     {
2796     case POINTER_PLUS_EXPR:
2797     case PLUS_EXPR:
2798     case MINUS_EXPR:
2799       op0 = TREE_OPERAND (expr, 0);
2800       op1 = TREE_OPERAND (expr, 1);
2801
2802       op0 = strip_offset_1 (op0, false, false, &off0);
2803       op1 = strip_offset_1 (op1, false, false, &off1);
2804
2805       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2806       if (op0 == TREE_OPERAND (expr, 0)
2807           && op1 == TREE_OPERAND (expr, 1))
2808         return orig_expr;
2809
2810       if (integer_zerop (op1))
2811         expr = op0;
2812       else if (integer_zerop (op0))
2813         {
2814           if (code == MINUS_EXPR)
2815             expr = fold_build1 (NEGATE_EXPR, type, op1);
2816           else
2817             expr = op1;
2818         }
2819       else
2820         expr = fold_build2 (code, type, op0, op1);
2821
2822       return fold_convert (orig_type, expr);
2823
2824     case MULT_EXPR:
2825       op1 = TREE_OPERAND (expr, 1);
2826       if (!cst_and_fits_in_hwi (op1))
2827         return orig_expr;
2828
2829       op0 = TREE_OPERAND (expr, 0);
2830       op0 = strip_offset_1 (op0, false, false, &off0);
2831       if (op0 == TREE_OPERAND (expr, 0))
2832         return orig_expr;
2833
2834       *offset = off0 * int_cst_value (op1);
2835       if (integer_zerop (op0))
2836         expr = op0;
2837       else
2838         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2839
2840       return fold_convert (orig_type, expr);
2841
2842     case ARRAY_REF:
2843     case ARRAY_RANGE_REF:
2844       if (!inside_addr)
2845         return orig_expr;
2846
2847       step = array_ref_element_size (expr);
2848       if (!cst_and_fits_in_hwi (step))
2849         break;
2850
2851       st = int_cst_value (step);
2852       op1 = TREE_OPERAND (expr, 1);
2853       op1 = strip_offset_1 (op1, false, false, &off1);
2854       *offset = off1 * st;
2855
2856       if (top_compref
2857           && integer_zerop (op1))
2858         {
2859           /* Strip the component reference completely.  */
2860           op0 = TREE_OPERAND (expr, 0);
2861           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2862           *offset += off0;
2863           return op0;
2864         }
2865       break;
2866
2867     case COMPONENT_REF:
2868       {
2869         tree field;
2870
2871         if (!inside_addr)
2872           return orig_expr;
2873
2874         tmp = component_ref_field_offset (expr);
2875         field = TREE_OPERAND (expr, 1);
2876         if (top_compref
2877             && cst_and_fits_in_hwi (tmp)
2878             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2879           {
2880             HOST_WIDE_INT boffset, abs_off;
2881
2882             /* Strip the component reference completely.  */
2883             op0 = TREE_OPERAND (expr, 0);
2884             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2885             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2886             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2887             if (boffset < 0)
2888               abs_off = -abs_off;
2889
2890             *offset = off0 + int_cst_value (tmp) + abs_off;
2891             return op0;
2892           }
2893       }
2894       break;
2895
2896     case ADDR_EXPR:
2897       op0 = TREE_OPERAND (expr, 0);
2898       op0 = strip_offset_1 (op0, true, true, &off0);
2899       *offset += off0;
2900
2901       if (op0 == TREE_OPERAND (expr, 0))
2902         return orig_expr;
2903
2904       expr = build_fold_addr_expr (op0);
2905       return fold_convert (orig_type, expr);
2906
2907     case MEM_REF:
2908       /* ???  Offset operand?  */
2909       inside_addr = false;
2910       break;
2911
2912     default:
2913       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2914         return build_int_cst (orig_type, 0);
2915       return orig_expr;
2916     }
2917
2918   /* Default handling of expressions for that we want to recurse into
2919      the first operand.  */
2920   op0 = TREE_OPERAND (expr, 0);
2921   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2922   *offset += off0;
2923
2924   if (op0 == TREE_OPERAND (expr, 0)
2925       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2926     return orig_expr;
2927
2928   expr = copy_node (expr);
2929   TREE_OPERAND (expr, 0) = op0;
2930   if (op1)
2931     TREE_OPERAND (expr, 1) = op1;
2932
2933   /* Inside address, we might strip the top level component references,
2934      thus changing type of the expression.  Handling of ADDR_EXPR
2935      will fix that.  */
2936   expr = fold_convert (orig_type, expr);
2937
2938   return expr;
2939 }
2940
2941 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2942
2943 tree
2944 strip_offset (tree expr, poly_uint64_pod *offset)
2945 {
2946   poly_int64 off;
2947   tree core = strip_offset_1 (expr, false, false, &off);
2948   *offset = off;
2949   return core;
2950 }
2951
2952 /* Returns variant of TYPE that can be used as base for different uses.
2953    We return unsigned type with the same precision, which avoids problems
2954    with overflows.  */
2955
2956 static tree
2957 generic_type_for (tree type)
2958 {
2959   if (POINTER_TYPE_P (type))
2960     return unsigned_type_for (type);
2961
2962   if (TYPE_UNSIGNED (type))
2963     return type;
2964
2965   return unsigned_type_for (type);
2966 }
2967
2968 /* Private data for walk_tree.  */
2969
2970 struct walk_tree_data
2971 {
2972   bitmap *inv_vars;
2973   struct ivopts_data *idata;
2974 };
2975
2976 /* Callback function for walk_tree, it records invariants and symbol
2977    reference in *EXPR_P.  DATA is the structure storing result info.  */
2978
2979 static tree
2980 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2981 {
2982   tree op = *expr_p;
2983   struct version_info *info;
2984   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2985
2986   if (TREE_CODE (op) != SSA_NAME)
2987     return NULL_TREE;
2988
2989   info = name_info (wdata->idata, op);
2990   /* Because we expand simple operations when finding IVs, loop invariant
2991      variable that isn't referred by the original loop could be used now.
2992      Record such invariant variables here.  */
2993   if (!info->iv)
2994     {
2995       struct ivopts_data *idata = wdata->idata;
2996       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2997
2998       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2999         {
3000           tree steptype = TREE_TYPE (op);
3001           if (POINTER_TYPE_P (steptype))
3002             steptype = sizetype;
3003           set_iv (idata, op, op, build_int_cst (steptype, 0), true);
3004           record_invariant (idata, op, false);
3005         }
3006     }
3007   if (!info->inv_id || info->has_nonlin_use)
3008     return NULL_TREE;
3009
3010   if (!*wdata->inv_vars)
3011     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3012   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3013
3014   return NULL_TREE;
3015 }
3016
3017 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3018    store it.  */
3019
3020 static inline void
3021 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3022 {
3023   struct walk_tree_data wdata;
3024
3025   if (!inv_vars)
3026     return;
3027
3028   wdata.idata = data;
3029   wdata.inv_vars = inv_vars;
3030   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3031 }
3032
3033 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3034    will be recorded if it doesn't exist yet.  Given below two exprs:
3035      inv_expr + cst1, inv_expr + cst2
3036    It's hard to make decision whether constant part should be stripped
3037    or not.  We choose to not strip based on below facts:
3038      1) We need to count ADD cost for constant part if it's stripped,
3039         which isn't always trivial where this functions is called.
3040      2) Stripping constant away may be conflict with following loop
3041         invariant hoisting pass.
3042      3) Not stripping constant away results in more invariant exprs,
3043         which usually leads to decision preferring lower reg pressure.  */
3044
3045 static iv_inv_expr_ent *
3046 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3047 {
3048   STRIP_NOPS (inv_expr);
3049
3050   if (poly_int_tree_p (inv_expr)
3051       || TREE_CODE (inv_expr) == SSA_NAME)
3052     return NULL;
3053
3054   /* Don't strip constant part away as we used to.  */
3055
3056   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3057   struct iv_inv_expr_ent ent;
3058   ent.expr = inv_expr;
3059   ent.hash = iterative_hash_expr (inv_expr, 0);
3060   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3061
3062   if (!*slot)
3063     {
3064       *slot = XNEW (struct iv_inv_expr_ent);
3065       (*slot)->expr = inv_expr;
3066       (*slot)->hash = ent.hash;
3067       (*slot)->id = ++data->max_inv_expr_id;
3068     }
3069
3070   return *slot;
3071 }
3072
3073 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3074    position to POS.  If USE is not NULL, the candidate is set as related to
3075    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3076    replacement of the final value of the iv by a direct computation.  */
3077
3078 static struct iv_cand *
3079 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3080                  enum iv_position pos, struct iv_use *use,
3081                  gimple *incremented_at, struct iv *orig_iv = NULL,
3082                  bool doloop = false)
3083 {
3084   unsigned i;
3085   struct iv_cand *cand = NULL;
3086   tree type, orig_type;
3087
3088   gcc_assert (base && step);
3089
3090   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3091      live, but the ivopts code may replace a real pointer with one
3092      pointing before or after the memory block that is then adjusted
3093      into the memory block during the loop.  FIXME: It would likely be
3094      better to actually force the pointer live and still use ivopts;
3095      for example, it would be enough to write the pointer into memory
3096      and keep it there until after the loop.  */
3097   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3098     return NULL;
3099
3100   /* For non-original variables, make sure their values are computed in a type
3101      that does not invoke undefined behavior on overflows (since in general,
3102      we cannot prove that these induction variables are non-wrapping).  */
3103   if (pos != IP_ORIGINAL)
3104     {
3105       orig_type = TREE_TYPE (base);
3106       type = generic_type_for (orig_type);
3107       if (type != orig_type)
3108         {
3109           base = fold_convert (type, base);
3110           step = fold_convert (type, step);
3111         }
3112     }
3113
3114   for (i = 0; i < data->vcands.length (); i++)
3115     {
3116       cand = data->vcands[i];
3117
3118       if (cand->pos != pos)
3119         continue;
3120
3121       if (cand->incremented_at != incremented_at
3122           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3123               && cand->ainc_use != use))
3124         continue;
3125
3126       if (operand_equal_p (base, cand->iv->base, 0)
3127           && operand_equal_p (step, cand->iv->step, 0)
3128           && (TYPE_PRECISION (TREE_TYPE (base))
3129               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3130         break;
3131     }
3132
3133   if (i == data->vcands.length ())
3134     {
3135       cand = XCNEW (struct iv_cand);
3136       cand->id = i;
3137       cand->iv = alloc_iv (data, base, step);
3138       cand->pos = pos;
3139       if (pos != IP_ORIGINAL)
3140         {
3141           if (doloop)
3142             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3143           else
3144             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3145           cand->var_after = cand->var_before;
3146         }
3147       cand->important = important;
3148       cand->incremented_at = incremented_at;
3149       cand->doloop_p = doloop;
3150       data->vcands.safe_push (cand);
3151
3152       if (!poly_int_tree_p (step))
3153         {
3154           find_inv_vars (data, &step, &cand->inv_vars);
3155
3156           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3157           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3158           if (inv_expr != NULL)
3159             {
3160               cand->inv_exprs = cand->inv_vars;
3161               cand->inv_vars = NULL;
3162               if (cand->inv_exprs)
3163                 bitmap_clear (cand->inv_exprs);
3164               else
3165                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3166
3167               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3168             }
3169         }
3170
3171       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3172         cand->ainc_use = use;
3173       else
3174         cand->ainc_use = NULL;
3175
3176       cand->orig_iv = orig_iv;
3177       if (dump_file && (dump_flags & TDF_DETAILS))
3178         dump_cand (dump_file, cand);
3179     }
3180
3181   cand->important |= important;
3182   cand->doloop_p |= doloop;
3183
3184   /* Relate candidate to the group for which it is added.  */
3185   if (use)
3186     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3187
3188   return cand;
3189 }
3190
3191 /* Returns true if incrementing the induction variable at the end of the LOOP
3192    is allowed.
3193
3194    The purpose is to avoid splitting latch edge with a biv increment, thus
3195    creating a jump, possibly confusing other optimization passes and leaving
3196    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3197    available (so we do not have a better alternative), or if the latch edge
3198    is already nonempty.  */
3199
3200 static bool
3201 allow_ip_end_pos_p (class loop *loop)
3202 {
3203   if (!ip_normal_pos (loop))
3204     return true;
3205
3206   if (!empty_block_p (ip_end_pos (loop)))
3207     return true;
3208
3209   return false;
3210 }
3211
3212 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3213    Important field is set to IMPORTANT.  */
3214
3215 static void
3216 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3217                         bool important, struct iv_use *use)
3218 {
3219   basic_block use_bb = gimple_bb (use->stmt);
3220   machine_mode mem_mode;
3221   unsigned HOST_WIDE_INT cstepi;
3222
3223   /* If we insert the increment in any position other than the standard
3224      ones, we must ensure that it is incremented once per iteration.
3225      It must not be in an inner nested loop, or one side of an if
3226      statement.  */
3227   if (use_bb->loop_father != data->current_loop
3228       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3229       || stmt_can_throw_internal (cfun, use->stmt)
3230       || !cst_and_fits_in_hwi (step))
3231     return;
3232
3233   cstepi = int_cst_value (step);
3234
3235   mem_mode = TYPE_MODE (use->mem_type);
3236   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3237         || USE_STORE_PRE_INCREMENT (mem_mode))
3238        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3239       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3240            || USE_STORE_PRE_DECREMENT (mem_mode))
3241           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3242     {
3243       enum tree_code code = MINUS_EXPR;
3244       tree new_base;
3245       tree new_step = step;
3246
3247       if (POINTER_TYPE_P (TREE_TYPE (base)))
3248         {
3249           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3250           code = POINTER_PLUS_EXPR;
3251         }
3252       else
3253         new_step = fold_convert (TREE_TYPE (base), new_step);
3254       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3255       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3256                        use->stmt);
3257     }
3258   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3259         || USE_STORE_POST_INCREMENT (mem_mode))
3260        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3261       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3262            || USE_STORE_POST_DECREMENT (mem_mode))
3263           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3264     {
3265       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3266                        use->stmt);
3267     }
3268 }
3269
3270 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3271    position to POS.  If USE is not NULL, the candidate is set as related to
3272    it.  The candidate computation is scheduled before exit condition and at
3273    the end of loop.  */
3274
3275 static void
3276 add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3277                struct iv_use *use, struct iv *orig_iv = NULL,
3278                bool doloop = false)
3279 {
3280   if (ip_normal_pos (data->current_loop))
3281     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3282                      doloop);
3283   /* Exclude doloop candidate here since it requires decrement then comparison
3284      and jump, the IP_END position doesn't match.  */
3285   if (!doloop && ip_end_pos (data->current_loop)
3286       && allow_ip_end_pos_p (data->current_loop))
3287     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3288 }
3289
3290 /* Adds standard iv candidates.  */
3291
3292 static void
3293 add_standard_iv_candidates (struct ivopts_data *data)
3294 {
3295   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3296
3297   /* The same for a double-integer type if it is still fast enough.  */
3298   if (TYPE_PRECISION
3299         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3300       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3301     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3302                    build_int_cst (long_integer_type_node, 1), true, NULL);
3303
3304   /* The same for a double-integer type if it is still fast enough.  */
3305   if (TYPE_PRECISION
3306         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3307       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3308     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3309                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3310 }
3311
3312
3313 /* Adds candidates bases on the old induction variable IV.  */
3314
3315 static void
3316 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3317 {
3318   gimple *phi;
3319   tree def;
3320   struct iv_cand *cand;
3321
3322   /* Check if this biv is used in address type use.  */
3323   if (iv->no_overflow  && iv->have_address_use
3324       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3325       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3326     {
3327       tree base = fold_convert (sizetype, iv->base);
3328       tree step = fold_convert (sizetype, iv->step);
3329
3330       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3331       add_candidate (data, base, step, true, NULL, iv);
3332       /* Add iv cand of the original type only if it has nonlinear use.  */
3333       if (iv->nonlin_use)
3334         add_candidate (data, iv->base, iv->step, true, NULL);
3335     }
3336   else
3337     add_candidate (data, iv->base, iv->step, true, NULL);
3338
3339   /* The same, but with initial value zero.  */
3340   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3341     add_candidate (data, size_int (0), iv->step, true, NULL);
3342   else
3343     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3344                    iv->step, true, NULL);
3345
3346   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3347   if (gimple_code (phi) == GIMPLE_PHI)
3348     {
3349       /* Additionally record the possibility of leaving the original iv
3350          untouched.  */
3351       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3352       /* Don't add candidate if it's from another PHI node because
3353          it's an affine iv appearing in the form of PEELED_CHREC.  */
3354       phi = SSA_NAME_DEF_STMT (def);
3355       if (gimple_code (phi) != GIMPLE_PHI)
3356         {
3357           cand = add_candidate_1 (data,
3358                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3359                                   SSA_NAME_DEF_STMT (def));
3360           if (cand)
3361             {
3362               cand->var_before = iv->ssa_name;
3363               cand->var_after = def;
3364             }
3365         }
3366       else
3367         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3368     }
3369 }
3370
3371 /* Adds candidates based on the old induction variables.  */
3372
3373 static void
3374 add_iv_candidate_for_bivs (struct ivopts_data *data)
3375 {
3376   unsigned i;
3377   struct iv *iv;
3378   bitmap_iterator bi;
3379
3380   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3381     {
3382       iv = ver_info (data, i)->iv;
3383       if (iv && iv->biv_p && !integer_zerop (iv->step))
3384         add_iv_candidate_for_biv (data, iv);
3385     }
3386 }
3387
3388 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3389
3390 static void
3391 record_common_cand (struct ivopts_data *data, tree base,
3392                     tree step, struct iv_use *use)
3393 {
3394   class iv_common_cand ent;
3395   class iv_common_cand **slot;
3396
3397   ent.base = base;
3398   ent.step = step;
3399   ent.hash = iterative_hash_expr (base, 0);
3400   ent.hash = iterative_hash_expr (step, ent.hash);
3401
3402   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3403   if (*slot == NULL)
3404     {
3405       *slot = new iv_common_cand ();
3406       (*slot)->base = base;
3407       (*slot)->step = step;
3408       (*slot)->uses.create (8);
3409       (*slot)->hash = ent.hash;
3410       data->iv_common_cands.safe_push ((*slot));
3411     }
3412
3413   gcc_assert (use != NULL);
3414   (*slot)->uses.safe_push (use);
3415   return;
3416 }
3417
3418 /* Comparison function used to sort common candidates.  */
3419
3420 static int
3421 common_cand_cmp (const void *p1, const void *p2)
3422 {
3423   unsigned n1, n2;
3424   const class iv_common_cand *const *const ccand1
3425     = (const class iv_common_cand *const *)p1;
3426   const class iv_common_cand *const *const ccand2
3427     = (const class iv_common_cand *const *)p2;
3428
3429   n1 = (*ccand1)->uses.length ();
3430   n2 = (*ccand2)->uses.length ();
3431   return n2 - n1;
3432 }
3433
3434 /* Adds IV candidates based on common candidated recorded.  */
3435
3436 static void
3437 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3438 {
3439   unsigned i, j;
3440   struct iv_cand *cand_1, *cand_2;
3441
3442   data->iv_common_cands.qsort (common_cand_cmp);
3443   for (i = 0; i < data->iv_common_cands.length (); i++)
3444     {
3445       class iv_common_cand *ptr = data->iv_common_cands[i];
3446
3447       /* Only add IV candidate if it's derived from multiple uses.  */
3448       if (ptr->uses.length () <= 1)
3449         break;
3450
3451       cand_1 = NULL;
3452       cand_2 = NULL;
3453       if (ip_normal_pos (data->current_loop))
3454         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3455                                   false, IP_NORMAL, NULL, NULL);
3456
3457       if (ip_end_pos (data->current_loop)
3458           && allow_ip_end_pos_p (data->current_loop))
3459         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3460                                   false, IP_END, NULL, NULL);
3461
3462       /* Bind deriving uses and the new candidates.  */
3463       for (j = 0; j < ptr->uses.length (); j++)
3464         {
3465           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3466           if (cand_1)
3467             bitmap_set_bit (group->related_cands, cand_1->id);
3468           if (cand_2)
3469             bitmap_set_bit (group->related_cands, cand_2->id);
3470         }
3471     }
3472
3473   /* Release data since it is useless from this point.  */
3474   data->iv_common_cand_tab->empty ();
3475   data->iv_common_cands.truncate (0);
3476 }
3477
3478 /* Adds candidates based on the value of USE's iv.  */
3479
3480 static void
3481 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3482 {
3483   poly_uint64 offset;
3484   tree base;
3485   struct iv *iv = use->iv;
3486   tree basetype = TREE_TYPE (iv->base);
3487
3488   /* Don't add candidate for iv_use with non integer, pointer or non-mode
3489      precision types, instead, add candidate for the corresponding scev in
3490      unsigned type with the same precision.  See PR93674 for more info.  */
3491   if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3492       || !type_has_mode_precision_p (basetype))
3493     {
3494       basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3495                                                  TYPE_UNSIGNED (basetype));
3496       add_candidate (data, fold_convert (basetype, iv->base),
3497                      fold_convert (basetype, iv->step), false, NULL);
3498       return;
3499     }
3500
3501   add_candidate (data, iv->base, iv->step, false, use);
3502
3503   /* Record common candidate for use in case it can be shared by others.  */
3504   record_common_cand (data, iv->base, iv->step, use);
3505
3506   /* Record common candidate with initial value zero.  */
3507   basetype = TREE_TYPE (iv->base);
3508   if (POINTER_TYPE_P (basetype))
3509     basetype = sizetype;
3510   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3511
3512   /* Compare the cost of an address with an unscaled index with the cost of
3513     an address with a scaled index and add candidate if useful.  */
3514   poly_int64 step;
3515   if (use != NULL
3516       && poly_int_tree_p (iv->step, &step)
3517       && address_p (use->type))
3518     {
3519       poly_int64 new_step;
3520       unsigned int fact = preferred_mem_scale_factor
3521         (use->iv->base,
3522          TYPE_MODE (use->mem_type),
3523          optimize_loop_for_speed_p (data->current_loop));
3524
3525       if (fact != 1
3526           && multiple_p (step, fact, &new_step))
3527         add_candidate (data, size_int (0),
3528                        wide_int_to_tree (sizetype, new_step),
3529                        true, NULL);
3530     }
3531
3532   /* Record common candidate with constant offset stripped in base.
3533      Like the use itself, we also add candidate directly for it.  */
3534   base = strip_offset (iv->base, &offset);
3535   if (maybe_ne (offset, 0U) || base != iv->base)
3536     {
3537       record_common_cand (data, base, iv->step, use);
3538       add_candidate (data, base, iv->step, false, use);
3539     }
3540
3541   /* Record common candidate with base_object removed in base.  */
3542   base = iv->base;
3543   STRIP_NOPS (base);
3544   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3545     {
3546       tree step = iv->step;
3547
3548       STRIP_NOPS (step);
3549       base = TREE_OPERAND (base, 1);
3550       step = fold_convert (sizetype, step);
3551       record_common_cand (data, base, step, use);
3552       /* Also record common candidate with offset stripped.  */
3553       base = strip_offset (base, &offset);
3554       if (maybe_ne (offset, 0U))
3555         record_common_cand (data, base, step, use);
3556     }
3557
3558   /* At last, add auto-incremental candidates.  Make such variables
3559      important since other iv uses with same base object may be based
3560      on it.  */
3561   if (use != NULL && address_p (use->type))
3562     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3563 }
3564
3565 /* Adds candidates based on the uses.  */
3566
3567 static void
3568 add_iv_candidate_for_groups (struct ivopts_data *data)
3569 {
3570   unsigned i;
3571
3572   /* Only add candidate for the first use in group.  */
3573   for (i = 0; i < data->vgroups.length (); i++)
3574     {
3575       struct iv_group *group = data->vgroups[i];
3576
3577       gcc_assert (group->vuses[0] != NULL);
3578       add_iv_candidate_for_use (data, group->vuses[0]);
3579     }
3580   add_iv_candidate_derived_from_uses (data);
3581 }
3582
3583 /* Record important candidates and add them to related_cands bitmaps.  */
3584
3585 static void
3586 record_important_candidates (struct ivopts_data *data)
3587 {
3588   unsigned i;
3589   struct iv_group *group;
3590
3591   for (i = 0; i < data->vcands.length (); i++)
3592     {
3593       struct iv_cand *cand = data->vcands[i];
3594
3595       if (cand->important)
3596         bitmap_set_bit (data->important_candidates, i);
3597     }
3598
3599   data->consider_all_candidates = (data->vcands.length ()
3600                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3601
3602   /* Add important candidates to groups' related_cands bitmaps.  */
3603   for (i = 0; i < data->vgroups.length (); i++)
3604     {
3605       group = data->vgroups[i];
3606       bitmap_ior_into (group->related_cands, data->important_candidates);
3607     }
3608 }
3609
3610 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3611    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3612    we allocate a simple list to every use.  */
3613
3614 static void
3615 alloc_use_cost_map (struct ivopts_data *data)
3616 {
3617   unsigned i, size, s;
3618
3619   for (i = 0; i < data->vgroups.length (); i++)
3620     {
3621       struct iv_group *group = data->vgroups[i];
3622
3623       if (data->consider_all_candidates)
3624         size = data->vcands.length ();
3625       else
3626         {
3627           s = bitmap_count_bits (group->related_cands);
3628
3629           /* Round up to the power of two, so that moduling by it is fast.  */
3630           size = s ? (1 << ceil_log2 (s)) : 1;
3631         }
3632
3633       group->n_map_members = size;
3634       group->cost_map = XCNEWVEC (class cost_pair, size);
3635     }
3636 }
3637
3638 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3639    on invariants INV_VARS and that the value used in expressing it is
3640    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3641
3642 static void
3643 set_group_iv_cost (struct ivopts_data *data,
3644                    struct iv_group *group, struct iv_cand *cand,
3645                    comp_cost cost, bitmap inv_vars, tree value,
3646                    enum tree_code comp, bitmap inv_exprs)
3647 {
3648   unsigned i, s;
3649
3650   if (cost.infinite_cost_p ())
3651     {
3652       BITMAP_FREE (inv_vars);
3653       BITMAP_FREE (inv_exprs);
3654       return;
3655     }
3656
3657   if (data->consider_all_candidates)
3658     {
3659       group->cost_map[cand->id].cand = cand;
3660       group->cost_map[cand->id].cost = cost;
3661       group->cost_map[cand->id].inv_vars = inv_vars;
3662       group->cost_map[cand->id].inv_exprs = inv_exprs;
3663       group->cost_map[cand->id].value = value;
3664       group->cost_map[cand->id].comp = comp;
3665       return;
3666     }
3667
3668   /* n_map_members is a power of two, so this computes modulo.  */
3669   s = cand->id & (group->n_map_members - 1);
3670   for (i = s; i < group->n_map_members; i++)
3671     if (!group->cost_map[i].cand)
3672       goto found;
3673   for (i = 0; i < s; i++)
3674     if (!group->cost_map[i].cand)
3675       goto found;
3676
3677   gcc_unreachable ();
3678
3679 found:
3680   group->cost_map[i].cand = cand;
3681   group->cost_map[i].cost = cost;
3682   group->cost_map[i].inv_vars = inv_vars;
3683   group->cost_map[i].inv_exprs = inv_exprs;
3684   group->cost_map[i].value = value;
3685   group->cost_map[i].comp = comp;
3686 }
3687
3688 /* Gets cost of (GROUP, CAND) pair.  */
3689
3690 static class cost_pair *
3691 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3692                    struct iv_cand *cand)
3693 {
3694   unsigned i, s;
3695   class cost_pair *ret;
3696
3697   if (!cand)
3698     return NULL;
3699
3700   if (data->consider_all_candidates)
3701     {
3702       ret = group->cost_map + cand->id;
3703       if (!ret->cand)
3704         return NULL;
3705
3706       return ret;
3707     }
3708
3709   /* n_map_members is a power of two, so this computes modulo.  */
3710   s = cand->id & (group->n_map_members - 1);
3711   for (i = s; i < group->n_map_members; i++)
3712     if (group->cost_map[i].cand == cand)
3713       return group->cost_map + i;
3714     else if (group->cost_map[i].cand == NULL)
3715       return NULL;
3716   for (i = 0; i < s; i++)
3717     if (group->cost_map[i].cand == cand)
3718       return group->cost_map + i;
3719     else if (group->cost_map[i].cand == NULL)
3720       return NULL;
3721
3722   return NULL;
3723 }
3724
3725 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3726 static rtx
3727 produce_memory_decl_rtl (tree obj, int *regno)
3728 {
3729   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3730   machine_mode address_mode = targetm.addr_space.address_mode (as);
3731   rtx x;
3732
3733   gcc_assert (obj);
3734   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3735     {
3736       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3737       x = gen_rtx_SYMBOL_REF (address_mode, name);
3738       SET_SYMBOL_REF_DECL (x, obj);
3739       x = gen_rtx_MEM (DECL_MODE (obj), x);
3740       set_mem_addr_space (x, as);
3741       targetm.encode_section_info (obj, x, true);
3742     }
3743   else
3744     {
3745       x = gen_raw_REG (address_mode, (*regno)++);
3746       x = gen_rtx_MEM (DECL_MODE (obj), x);
3747       set_mem_addr_space (x, as);
3748     }
3749
3750   return x;
3751 }
3752
3753 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3754    walk_tree.  DATA contains the actual fake register number.  */
3755
3756 static tree
3757 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3758 {
3759   tree obj = NULL_TREE;
3760   rtx x = NULL_RTX;
3761   int *regno = (int *) data;
3762
3763   switch (TREE_CODE (*expr_p))
3764     {
3765     case ADDR_EXPR:
3766       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3767            handled_component_p (*expr_p);
3768            expr_p = &TREE_OPERAND (*expr_p, 0))
3769         continue;
3770       obj = *expr_p;
3771       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3772         x = produce_memory_decl_rtl (obj, regno);
3773       break;
3774
3775     case SSA_NAME:
3776       *ws = 0;
3777       obj = SSA_NAME_VAR (*expr_p);
3778       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3779       if (!obj)
3780         return NULL_TREE;
3781       if (!DECL_RTL_SET_P (obj))
3782         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3783       break;
3784
3785     case VAR_DECL:
3786     case PARM_DECL:
3787     case RESULT_DECL:
3788       *ws = 0;
3789       obj = *expr_p;
3790
3791       if (DECL_RTL_SET_P (obj))
3792         break;
3793
3794       if (DECL_MODE (obj) == BLKmode)
3795         x = produce_memory_decl_rtl (obj, regno);
3796       else
3797         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3798
3799       break;
3800
3801     default:
3802       break;
3803     }
3804
3805   if (x)
3806     {
3807       decl_rtl_to_reset.safe_push (obj);
3808       SET_DECL_RTL (obj, x);
3809     }
3810
3811   return NULL_TREE;
3812 }
3813
3814 /* Predict whether the given loop will be transformed in the RTL
3815    doloop_optimize pass.  Attempt to duplicate some doloop_optimize checks.
3816    This is only for target independent checks, see targetm.predict_doloop_p
3817    for the target dependent ones.
3818
3819    Note that according to some initial investigation, some checks like costly
3820    niter check and invalid stmt scanning don't have much gains among general
3821    cases, so keep this as simple as possible first.
3822
3823    Some RTL specific checks seems unable to be checked in gimple, if any new
3824    checks or easy checks _are_ missing here, please add them.  */
3825
3826 static bool
3827 generic_predict_doloop_p (struct ivopts_data *data)
3828 {
3829   class loop *loop = data->current_loop;
3830
3831   /* Call target hook for target dependent checks.  */
3832   if (!targetm.predict_doloop_p (loop))
3833     {
3834       if (dump_file && (dump_flags & TDF_DETAILS))
3835         fprintf (dump_file, "Predict doloop failure due to"
3836                             " target specific checks.\n");
3837       return false;
3838     }
3839
3840   /* Similar to doloop_optimize, check iteration description to know it's
3841      suitable or not.  Keep it as simple as possible, feel free to extend it
3842      if you find any multiple exits cases matter.  */
3843   edge exit = single_dom_exit (loop);
3844   class tree_niter_desc *niter_desc;
3845   if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3846     {
3847       if (dump_file && (dump_flags & TDF_DETAILS))
3848         fprintf (dump_file, "Predict doloop failure due to"
3849                             " unexpected niters.\n");
3850       return false;
3851     }
3852
3853   /* Similar to doloop_optimize, check whether iteration count too small
3854      and not profitable.  */
3855   HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3856   if (est_niter == -1)
3857     est_niter = get_likely_max_loop_iterations_int (loop);
3858   if (est_niter >= 0 && est_niter < 3)
3859     {
3860       if (dump_file && (dump_flags & TDF_DETAILS))
3861         fprintf (dump_file,
3862                  "Predict doloop failure due to"
3863                  " too few iterations (%u).\n",
3864                  (unsigned int) est_niter);
3865       return false;
3866     }
3867
3868   return true;
3869 }
3870
3871 /* Determines cost of the computation of EXPR.  */
3872
3873 static unsigned
3874 computation_cost (tree expr, bool speed)
3875 {
3876   rtx_insn *seq;
3877   rtx rslt;
3878   tree type = TREE_TYPE (expr);
3879   unsigned cost;
3880   /* Avoid using hard regs in ways which may be unsupported.  */
3881   int regno = LAST_VIRTUAL_REGISTER + 1;
3882   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3883   enum node_frequency real_frequency = node->frequency;
3884
3885   node->frequency = NODE_FREQUENCY_NORMAL;
3886   crtl->maybe_hot_insn_p = speed;
3887   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3888   start_sequence ();
3889   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3890   seq = get_insns ();
3891   end_sequence ();
3892   default_rtl_profile ();
3893   node->frequency = real_frequency;
3894
3895   cost = seq_cost (seq, speed);
3896   if (MEM_P (rslt))
3897     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3898                           TYPE_ADDR_SPACE (type), speed);
3899   else if (!REG_P (rslt))
3900     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3901
3902   return cost;
3903 }
3904
3905 /* Returns variable containing the value of candidate CAND at statement AT.  */
3906
3907 static tree
3908 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3909 {
3910   if (stmt_after_increment (loop, cand, stmt))
3911     return cand->var_after;
3912   else
3913     return cand->var_before;
3914 }
3915
3916 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3917    same precision that is at least as wide as the precision of TYPE, stores
3918    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3919    type of A and B.  */
3920
3921 static tree
3922 determine_common_wider_type (tree *a, tree *b)
3923 {
3924   tree wider_type = NULL;
3925   tree suba, subb;
3926   tree atype = TREE_TYPE (*a);
3927
3928   if (CONVERT_EXPR_P (*a))
3929     {
3930       suba = TREE_OPERAND (*a, 0);
3931       wider_type = TREE_TYPE (suba);
3932       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3933         return atype;
3934     }
3935   else
3936     return atype;
3937
3938   if (CONVERT_EXPR_P (*b))
3939     {
3940       subb = TREE_OPERAND (*b, 0);
3941       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3942         return atype;
3943     }
3944   else
3945     return atype;
3946
3947   *a = suba;
3948   *b = subb;
3949   return wider_type;
3950 }
3951
3952 /* Determines the expression by that USE is expressed from induction variable
3953    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3954    decomposed form.  The invariant part is stored in AFF_INV; while variant
3955    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3956    non-null.  Returns false if USE cannot be expressed using CAND.  */
3957
3958 static bool
3959 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
3960                        struct iv_cand *cand, class aff_tree *aff_inv,
3961                        class aff_tree *aff_var, widest_int *prat = NULL)
3962 {
3963   tree ubase = use->iv->base, ustep = use->iv->step;
3964   tree cbase = cand->iv->base, cstep = cand->iv->step;
3965   tree common_type, uutype, var, cstep_common;
3966   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3967   aff_tree aff_cbase;
3968   widest_int rat;
3969
3970   /* We must have a precision to express the values of use.  */
3971   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3972     return false;
3973
3974   var = var_at_stmt (loop, cand, at);
3975   uutype = unsigned_type_for (utype);
3976
3977   /* If the conversion is not noop, perform it.  */
3978   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3979     {
3980       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3981           && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3982         {
3983           tree inner_base, inner_step, inner_type;
3984           inner_base = TREE_OPERAND (cbase, 0);
3985           if (CONVERT_EXPR_P (cstep))
3986             inner_step = TREE_OPERAND (cstep, 0);
3987           else
3988             inner_step = cstep;
3989
3990           inner_type = TREE_TYPE (inner_base);
3991           /* If candidate is added from a biv whose type is smaller than
3992              ctype, we know both candidate and the biv won't overflow.
3993              In this case, it's safe to skip the convertion in candidate.
3994              As an example, (unsigned short)((unsigned long)A) equals to
3995              (unsigned short)A, if A has a type no larger than short.  */
3996           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3997             {
3998               cbase = inner_base;
3999               cstep = inner_step;
4000             }
4001         }
4002       cbase = fold_convert (uutype, cbase);
4003       cstep = fold_convert (uutype, cstep);
4004       var = fold_convert (uutype, var);
4005     }
4006
4007   /* Ratio is 1 when computing the value of biv cand by itself.
4008      We can't rely on constant_multiple_of in this case because the
4009      use is created after the original biv is selected.  The call
4010      could fail because of inconsistent fold behavior.  See PR68021
4011      for more information.  */
4012   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4013     {
4014       gcc_assert (is_gimple_assign (use->stmt));
4015       gcc_assert (use->iv->ssa_name == cand->var_after);
4016       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4017       rat = 1;
4018     }
4019   else if (!constant_multiple_of (ustep, cstep, &rat))
4020     return false;
4021
4022   if (prat)
4023     *prat = rat;
4024
4025   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4026      type, we achieve better folding by computing their difference in this
4027      wider type, and cast the result to UUTYPE.  We do not need to worry about
4028      overflows, as all the arithmetics will in the end be performed in UUTYPE
4029      anyway.  */
4030   common_type = determine_common_wider_type (&ubase, &cbase);
4031
4032   /* use = ubase - ratio * cbase + ratio * var.  */
4033   tree_to_aff_combination (ubase, common_type, aff_inv);
4034   tree_to_aff_combination (cbase, common_type, &aff_cbase);
4035   tree_to_aff_combination (var, uutype, aff_var);
4036
4037   /* We need to shift the value if we are after the increment.  */
4038   if (stmt_after_increment (loop, cand, at))
4039     {
4040       aff_tree cstep_aff;
4041
4042       if (common_type != uutype)
4043         cstep_common = fold_convert (common_type, cstep);
4044       else
4045         cstep_common = cstep;
4046
4047       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4048       aff_combination_add (&aff_cbase, &cstep_aff);
4049     }
4050
4051   aff_combination_scale (&aff_cbase, -rat);
4052   aff_combination_add (aff_inv, &aff_cbase);
4053   if (common_type != uutype)
4054     aff_combination_convert (aff_inv, uutype);
4055
4056   aff_combination_scale (aff_var, rat);
4057   return true;
4058 }
4059
4060 /* Determines the expression by that USE is expressed from induction variable
4061    CAND at statement AT in LOOP.  The expression is stored in a decomposed
4062    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
4063
4064 static bool
4065 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4066                      struct iv_cand *cand, class aff_tree *aff)
4067 {
4068   aff_tree aff_var;
4069
4070   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4071     return false;
4072
4073   aff_combination_add (aff, &aff_var);
4074   return true;
4075 }
4076
4077 /* Return the type of USE.  */
4078
4079 static tree
4080 get_use_type (struct iv_use *use)
4081 {
4082   tree base_type = TREE_TYPE (use->iv->base);
4083   tree type;
4084
4085   if (use->type == USE_REF_ADDRESS)
4086     {
4087       /* The base_type may be a void pointer.  Create a pointer type based on
4088          the mem_ref instead.  */
4089       type = build_pointer_type (TREE_TYPE (*use->op_p));
4090       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4091                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4092     }
4093   else
4094     type = base_type;
4095
4096   return type;
4097 }
4098
4099 /* Determines the expression by that USE is expressed from induction variable
4100    CAND at statement AT in LOOP.  The computation is unshared.  */
4101
4102 static tree
4103 get_computation_at (class loop *loop, gimple *at,
4104                     struct iv_use *use, struct iv_cand *cand)
4105 {
4106   aff_tree aff;
4107   tree type = get_use_type (use);
4108
4109   if (!get_computation_aff (loop, at, use, cand, &aff))
4110     return NULL_TREE;
4111   unshare_aff_combination (&aff);
4112   return fold_convert (type, aff_combination_to_tree (&aff));
4113 }
4114
4115 /* Like get_computation_at, but try harder, even if the computation
4116    is more expensive.  Intended for debug stmts.  */
4117
4118 static tree
4119 get_debug_computation_at (class loop *loop, gimple *at,
4120                           struct iv_use *use, struct iv_cand *cand)
4121 {
4122   if (tree ret = get_computation_at (loop, at, use, cand))
4123     return ret;
4124
4125   tree ubase = use->iv->base, ustep = use->iv->step;
4126   tree cbase = cand->iv->base, cstep = cand->iv->step;
4127   tree var;
4128   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4129   widest_int rat;
4130
4131   /* We must have a precision to express the values of use.  */
4132   if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4133     return NULL_TREE;
4134
4135   /* Try to handle the case that get_computation_at doesn't,
4136      try to express
4137      use = ubase + (var - cbase) / ratio.  */
4138   if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4139                              &rat))
4140     return NULL_TREE;
4141
4142   bool neg_p = false;
4143   if (wi::neg_p (rat))
4144     {
4145       if (TYPE_UNSIGNED (ctype))
4146         return NULL_TREE;
4147       neg_p = true;
4148       rat = wi::neg (rat);
4149     }
4150
4151   /* If both IVs can wrap around and CAND doesn't have a power of two step,
4152      it is unsafe.  Consider uint16_t CAND with step 9, when wrapping around,
4153      the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4154      uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4155      ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59.  */
4156   if (!use->iv->no_overflow
4157       && !cand->iv->no_overflow
4158       && !integer_pow2p (cstep))
4159     return NULL_TREE;
4160
4161   int bits = wi::exact_log2 (rat);
4162   if (bits == -1)
4163     bits = wi::floor_log2 (rat) + 1;
4164   if (!cand->iv->no_overflow
4165       && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4166     return NULL_TREE;
4167
4168   var = var_at_stmt (loop, cand, at);
4169
4170   if (POINTER_TYPE_P (ctype))
4171     {
4172       ctype = unsigned_type_for (ctype);
4173       cbase = fold_convert (ctype, cbase);
4174       cstep = fold_convert (ctype, cstep);
4175       var = fold_convert (ctype, var);
4176     }
4177
4178   if (stmt_after_increment (loop, cand, at))
4179     var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4180                        unshare_expr (cstep));
4181
4182   var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4183   var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4184                      wide_int_to_tree (TREE_TYPE (var), rat));
4185   if (POINTER_TYPE_P (utype))
4186     {
4187       var = fold_convert (sizetype, var);
4188       if (neg_p)
4189         var = fold_build1 (NEGATE_EXPR, sizetype, var);
4190       var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4191     }
4192   else
4193     {
4194       var = fold_convert (utype, var);
4195       var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4196                          ubase, var);
4197     }
4198   return var;
4199 }
4200
4201 /* Adjust the cost COST for being in loop setup rather than loop body.
4202    If we're optimizing for space, the loop setup overhead is constant;
4203    if we're optimizing for speed, amortize it over the per-iteration cost.
4204    If ROUND_UP_P is true, the result is round up rather than to zero when
4205    optimizing for speed.  */
4206 static int64_t
4207 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4208                    bool round_up_p = false)
4209 {
4210   if (cost == INFTY)
4211     return cost;
4212   else if (optimize_loop_for_speed_p (data->current_loop))
4213     {
4214       int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4215       return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4216     }
4217   else
4218     return cost;
4219 }
4220
4221 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4222    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4223    calculating the operands of EXPR.  Returns true if successful, and returns
4224    the cost in COST.  */
4225
4226 static bool
4227 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4228                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4229 {
4230   comp_cost res;
4231   tree op1 = TREE_OPERAND (expr, 1);
4232   tree cst = TREE_OPERAND (mult, 1);
4233   tree multop = TREE_OPERAND (mult, 0);
4234   int m = exact_log2 (int_cst_value (cst));
4235   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4236   int as_cost, sa_cost;
4237   bool mult_in_op1;
4238
4239   if (!(m >= 0 && m < maxm))
4240     return false;
4241
4242   STRIP_NOPS (op1);
4243   mult_in_op1 = operand_equal_p (op1, mult, 0);
4244
4245   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4246
4247   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4248      use that in preference to a shift insn followed by an add insn.  */
4249   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4250              ? shiftadd_cost (speed, mode, m)
4251              : (mult_in_op1
4252                 ? shiftsub1_cost (speed, mode, m)
4253                 : shiftsub0_cost (speed, mode, m)));
4254
4255   res = comp_cost (MIN (as_cost, sa_cost), 0);
4256   res += (mult_in_op1 ? cost0 : cost1);
4257
4258   STRIP_NOPS (multop);
4259   if (!is_gimple_val (multop))
4260     res += force_expr_to_var_cost (multop, speed);
4261
4262   *cost = res;
4263   return true;
4264 }
4265
4266 /* Estimates cost of forcing expression EXPR into a variable.  */
4267
4268 static comp_cost
4269 force_expr_to_var_cost (tree expr, bool speed)
4270 {
4271   static bool costs_initialized = false;
4272   static unsigned integer_cost [2];
4273   static unsigned symbol_cost [2];
4274   static unsigned address_cost [2];
4275   tree op0, op1;
4276   comp_cost cost0, cost1, cost;
4277   machine_mode mode;
4278   scalar_int_mode int_mode;
4279
4280   if (!costs_initialized)
4281     {
4282       tree type = build_pointer_type (integer_type_node);
4283       tree var, addr;
4284       rtx x;
4285       int i;
4286
4287       var = create_tmp_var_raw (integer_type_node, "test_var");
4288       TREE_STATIC (var) = 1;
4289       x = produce_memory_decl_rtl (var, NULL);
4290       SET_DECL_RTL (var, x);
4291
4292       addr = build1 (ADDR_EXPR, type, var);
4293
4294
4295       for (i = 0; i < 2; i++)
4296         {
4297           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4298                                                              2000), i);
4299
4300           symbol_cost[i] = computation_cost (addr, i) + 1;
4301
4302           address_cost[i]
4303             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4304           if (dump_file && (dump_flags & TDF_DETAILS))
4305             {
4306               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4307               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4308               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4309               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4310               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4311               fprintf (dump_file, "\n");
4312             }
4313         }
4314
4315       costs_initialized = true;
4316     }
4317
4318   STRIP_NOPS (expr);
4319
4320   if (SSA_VAR_P (expr))
4321     return no_cost;
4322
4323   if (is_gimple_min_invariant (expr))
4324     {
4325       if (poly_int_tree_p (expr))
4326         return comp_cost (integer_cost [speed], 0);
4327
4328       if (TREE_CODE (expr) == ADDR_EXPR)
4329         {
4330           tree obj = TREE_OPERAND (expr, 0);
4331
4332           if (VAR_P (obj)
4333               || TREE_CODE (obj) == PARM_DECL
4334               || TREE_CODE (obj) == RESULT_DECL)
4335             return comp_cost (symbol_cost [speed], 0);
4336         }
4337
4338       return comp_cost (address_cost [speed], 0);
4339     }
4340
4341   switch (TREE_CODE (expr))
4342     {
4343     case POINTER_PLUS_EXPR:
4344     case PLUS_EXPR:
4345     case MINUS_EXPR:
4346     case MULT_EXPR:
4347     case TRUNC_DIV_EXPR:
4348     case BIT_AND_EXPR:
4349     case BIT_IOR_EXPR:
4350     case LSHIFT_EXPR:
4351     case RSHIFT_EXPR:
4352       op0 = TREE_OPERAND (expr, 0);
4353       op1 = TREE_OPERAND (expr, 1);
4354       STRIP_NOPS (op0);
4355       STRIP_NOPS (op1);
4356       break;
4357
4358     CASE_CONVERT:
4359     case NEGATE_EXPR:
4360     case BIT_NOT_EXPR:
4361       op0 = TREE_OPERAND (expr, 0);
4362       STRIP_NOPS (op0);
4363       op1 = NULL_TREE;
4364       break;
4365     /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4366        introduce COND_EXPR for IV base, need to support better cost estimation
4367        for this COND_EXPR and tcc_comparison.  */
4368     case COND_EXPR:
4369       op0 = TREE_OPERAND (expr, 1);
4370       STRIP_NOPS (op0);
4371       op1 = TREE_OPERAND (expr, 2);
4372       STRIP_NOPS (op1);
4373       break;
4374     case LT_EXPR:
4375     case LE_EXPR:
4376     case GT_EXPR:
4377     case GE_EXPR:
4378     case EQ_EXPR:
4379     case NE_EXPR:
4380     case UNORDERED_EXPR:
4381     case ORDERED_EXPR:
4382     case UNLT_EXPR:
4383     case UNLE_EXPR:
4384     case UNGT_EXPR:
4385     case UNGE_EXPR:
4386     case UNEQ_EXPR:
4387     case LTGT_EXPR:
4388     case MAX_EXPR:
4389     case MIN_EXPR:
4390       op0 = TREE_OPERAND (expr, 0);
4391       STRIP_NOPS (op0);
4392       op1 = TREE_OPERAND (expr, 1);
4393       STRIP_NOPS (op1);
4394       break;
4395
4396     default:
4397       /* Just an arbitrary value, FIXME.  */
4398       return comp_cost (target_spill_cost[speed], 0);
4399     }
4400
4401   if (op0 == NULL_TREE
4402       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4403     cost0 = no_cost;
4404   else
4405     cost0 = force_expr_to_var_cost (op0, speed);
4406
4407   if (op1 == NULL_TREE
4408       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4409     cost1 = no_cost;
4410   else
4411     cost1 = force_expr_to_var_cost (op1, speed);
4412
4413   mode = TYPE_MODE (TREE_TYPE (expr));
4414   switch (TREE_CODE (expr))
4415     {
4416     case POINTER_PLUS_EXPR:
4417     case PLUS_EXPR:
4418     case MINUS_EXPR:
4419     case NEGATE_EXPR:
4420       cost = comp_cost (add_cost (speed, mode), 0);
4421       if (TREE_CODE (expr) != NEGATE_EXPR)
4422         {
4423           tree mult = NULL_TREE;
4424           comp_cost sa_cost;
4425           if (TREE_CODE (op1) == MULT_EXPR)
4426             mult = op1;
4427           else if (TREE_CODE (op0) == MULT_EXPR)
4428             mult = op0;
4429
4430           if (mult != NULL_TREE
4431               && is_a <scalar_int_mode> (mode, &int_mode)
4432               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4433               && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4434                                     speed, &sa_cost))
4435             return sa_cost;
4436         }
4437       break;
4438
4439     CASE_CONVERT:
4440       {
4441         tree inner_mode, outer_mode;
4442         outer_mode = TREE_TYPE (expr);
4443         inner_mode = TREE_TYPE (op0);
4444         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4445                                        TYPE_MODE (inner_mode), speed), 0);
4446       }
4447       break;
4448
4449     case MULT_EXPR:
4450       if (cst_and_fits_in_hwi (op0))
4451         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4452                                              mode, speed), 0);
4453       else if (cst_and_fits_in_hwi (op1))
4454         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4455                                              mode, speed), 0);
4456       else
4457         return comp_cost (target_spill_cost [speed], 0);
4458       break;
4459
4460     case TRUNC_DIV_EXPR:
4461       /* Division by power of two is usually cheap, so we allow it.  Forbid
4462          anything else.  */
4463       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4464         cost = comp_cost (add_cost (speed, mode), 0);
4465       else
4466         cost = comp_cost (target_spill_cost[speed], 0);
4467       break;
4468
4469     case BIT_AND_EXPR:
4470     case BIT_IOR_EXPR:
4471     case BIT_NOT_EXPR:
4472     case LSHIFT_EXPR:
4473     case RSHIFT_EXPR:
4474       cost = comp_cost (add_cost (speed, mode), 0);
4475       break;
4476     case COND_EXPR:
4477       op0 = TREE_OPERAND (expr, 0);
4478       STRIP_NOPS (op0);
4479       if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4480           || CONSTANT_CLASS_P (op0))
4481         cost = no_cost;
4482       else
4483         cost = force_expr_to_var_cost (op0, speed);
4484       break;
4485     case LT_EXPR:
4486     case LE_EXPR:
4487     case GT_EXPR:
4488     case GE_EXPR:
4489     case EQ_EXPR:
4490     case NE_EXPR:
4491     case UNORDERED_EXPR:
4492     case ORDERED_EXPR:
4493     case UNLT_EXPR:
4494     case UNLE_EXPR:
4495     case UNGT_EXPR:
4496     case UNGE_EXPR:
4497     case UNEQ_EXPR:
4498     case LTGT_EXPR:
4499     case MAX_EXPR:
4500     case MIN_EXPR:
4501       /* Simply use add cost for now, FIXME if there is some more accurate cost
4502          evaluation way.  */
4503       cost = comp_cost (add_cost (speed, mode), 0);
4504       break;
4505
4506     default:
4507       gcc_unreachable ();
4508     }
4509
4510   cost += cost0;
4511   cost += cost1;
4512   return cost;
4513 }
4514
4515 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4516    invariants the computation depends on.  */
4517
4518 static comp_cost
4519 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4520 {
4521   if (!expr)
4522     return no_cost;
4523
4524   find_inv_vars (data, &expr, inv_vars);
4525   return force_expr_to_var_cost (expr, data->speed);
4526 }
4527
4528 /* Returns cost of auto-modifying address expression in shape base + offset.
4529    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4530    address expression.  The address expression has ADDR_MODE in addr space
4531    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4532    speed or size.  */
4533
4534 enum ainc_type
4535 {
4536   AINC_PRE_INC,         /* Pre increment.  */
4537   AINC_PRE_DEC,         /* Pre decrement.  */
4538   AINC_POST_INC,        /* Post increment.  */
4539   AINC_POST_DEC,        /* Post decrement.  */
4540   AINC_NONE             /* Also the number of auto increment types.  */
4541 };
4542
4543 struct ainc_cost_data
4544 {
4545   int64_t costs[AINC_NONE];
4546 };
4547
4548 static comp_cost
4549 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4550                        machine_mode addr_mode, machine_mode mem_mode,
4551                        addr_space_t as, bool speed)
4552 {
4553   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4554       && !USE_STORE_PRE_DECREMENT (mem_mode)
4555       && !USE_LOAD_POST_DECREMENT (mem_mode)
4556       && !USE_STORE_POST_DECREMENT (mem_mode)
4557       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4558       && !USE_STORE_PRE_INCREMENT (mem_mode)
4559       && !USE_LOAD_POST_INCREMENT (mem_mode)
4560       && !USE_STORE_POST_INCREMENT (mem_mode))
4561     return infinite_cost;
4562
4563   static vec<ainc_cost_data *> ainc_cost_data_list;
4564   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4565   if (idx >= ainc_cost_data_list.length ())
4566     {
4567       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4568
4569       gcc_assert (nsize > idx);
4570       ainc_cost_data_list.safe_grow_cleared (nsize);
4571     }
4572
4573   ainc_cost_data *data = ainc_cost_data_list[idx];
4574   if (data == NULL)
4575     {
4576       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4577
4578       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4579       data->costs[AINC_PRE_DEC] = INFTY;
4580       data->costs[AINC_POST_DEC] = INFTY;
4581       data->costs[AINC_PRE_INC] = INFTY;
4582       data->costs[AINC_POST_INC] = INFTY;
4583       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4584           || USE_STORE_PRE_DECREMENT (mem_mode))
4585         {
4586           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4587
4588           if (memory_address_addr_space_p (mem_mode, addr, as))
4589             data->costs[AINC_PRE_DEC]
4590               = address_cost (addr, mem_mode, as, speed);
4591         }
4592       if (USE_LOAD_POST_DECREMENT (mem_mode)
4593           || USE_STORE_POST_DECREMENT (mem_mode))
4594         {
4595           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4596
4597           if (memory_address_addr_space_p (mem_mode, addr, as))
4598             data->costs[AINC_POST_DEC]
4599               = address_cost (addr, mem_mode, as, speed);
4600         }
4601       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4602           || USE_STORE_PRE_INCREMENT (mem_mode))
4603         {
4604           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4605
4606           if (memory_address_addr_space_p (mem_mode, addr, as))
4607             data->costs[AINC_PRE_INC]
4608               = address_cost (addr, mem_mode, as, speed);
4609         }
4610       if (USE_LOAD_POST_INCREMENT (mem_mode)
4611           || USE_STORE_POST_INCREMENT (mem_mode))
4612         {
4613           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4614
4615           if (memory_address_addr_space_p (mem_mode, addr, as))
4616             data->costs[AINC_POST_INC]
4617               = address_cost (addr, mem_mode, as, speed);
4618         }
4619       ainc_cost_data_list[idx] = data;
4620     }
4621
4622   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4623   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4624     return comp_cost (data->costs[AINC_POST_INC], 0);
4625   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4626     return comp_cost (data->costs[AINC_POST_DEC], 0);
4627   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4628     return comp_cost (data->costs[AINC_PRE_INC], 0);
4629   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4630     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4631
4632   return infinite_cost;
4633 }
4634
4635 /* Return cost of computing USE's address expression by using CAND.
4636    AFF_INV and AFF_VAR represent invariant and variant parts of the
4637    address expression, respectively.  If AFF_INV is simple, store
4638    the loop invariant variables which are depended by it in INV_VARS;
4639    if AFF_INV is complicated, handle it as a new invariant expression
4640    and record it in INV_EXPR.  RATIO indicates multiple times between
4641    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4642    value to it indicating if this is an auto-increment address.  */
4643
4644 static comp_cost
4645 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4646                   struct iv_cand *cand, aff_tree *aff_inv,
4647                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4648                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4649                   bool *can_autoinc, bool speed)
4650 {
4651   rtx addr;
4652   bool simple_inv = true;
4653   tree comp_inv = NULL_TREE, type = aff_var->type;
4654   comp_cost var_cost = no_cost, cost = no_cost;
4655   struct mem_address parts = {NULL_TREE, integer_one_node,
4656                               NULL_TREE, NULL_TREE, NULL_TREE};
4657   machine_mode addr_mode = TYPE_MODE (type);
4658   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4659   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4660   /* Only true if ratio != 1.  */
4661   bool ok_with_ratio_p = false;
4662   bool ok_without_ratio_p = false;
4663
4664   if (!aff_combination_const_p (aff_inv))
4665     {
4666       parts.index = integer_one_node;
4667       /* Addressing mode "base + index".  */
4668       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4669       if (ratio != 1)
4670         {
4671           parts.step = wide_int_to_tree (type, ratio);
4672           /* Addressing mode "base + index << scale".  */
4673           ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4674           if (!ok_with_ratio_p)
4675             parts.step = NULL_TREE;
4676         }
4677       if (ok_with_ratio_p || ok_without_ratio_p)
4678         {
4679           if (maybe_ne (aff_inv->offset, 0))
4680             {
4681               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4682               /* Addressing mode "base + index [<< scale] + offset".  */
4683               if (!valid_mem_ref_p (mem_mode, as, &parts))
4684                 parts.offset = NULL_TREE;
4685               else
4686                 aff_inv->offset = 0;
4687             }
4688
4689           move_fixed_address_to_symbol (&parts, aff_inv);
4690           /* Base is fixed address and is moved to symbol part.  */
4691           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4692             parts.base = NULL_TREE;
4693
4694           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4695           if (parts.symbol != NULL_TREE
4696               && !valid_mem_ref_p (mem_mode, as, &parts))
4697             {
4698               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4699               parts.symbol = NULL_TREE;
4700               /* Reset SIMPLE_INV since symbol address needs to be computed
4701                  outside of address expression in this case.  */
4702               simple_inv = false;
4703               /* Symbol part is moved back to base part, it can't be NULL.  */
4704               parts.base = integer_one_node;
4705             }
4706         }
4707       else
4708         parts.index = NULL_TREE;
4709     }
4710   else
4711     {
4712       poly_int64 ainc_step;
4713       if (can_autoinc
4714           && ratio == 1
4715           && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4716         {
4717           poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4718
4719           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4720             ainc_offset += ainc_step;
4721           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4722                                         addr_mode, mem_mode, as, speed);
4723           if (!cost.infinite_cost_p ())
4724             {
4725               *can_autoinc = true;
4726               return cost;
4727             }
4728           cost = no_cost;
4729         }
4730       if (!aff_combination_zero_p (aff_inv))
4731         {
4732           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4733           /* Addressing mode "base + offset".  */
4734           if (!valid_mem_ref_p (mem_mode, as, &parts))
4735             parts.offset = NULL_TREE;
4736           else
4737             aff_inv->offset = 0;
4738         }
4739     }
4740
4741   if (simple_inv)
4742     simple_inv = (aff_inv == NULL
4743                   || aff_combination_const_p (aff_inv)
4744                   || aff_combination_singleton_var_p (aff_inv));
4745   if (!aff_combination_zero_p (aff_inv))
4746     comp_inv = aff_combination_to_tree (aff_inv);
4747   if (comp_inv != NULL_TREE)
4748     cost = force_var_cost (data, comp_inv, inv_vars);
4749   if (ratio != 1 && parts.step == NULL_TREE)
4750     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4751   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4752     var_cost += add_cost (speed, addr_mode);
4753
4754   if (comp_inv && inv_expr && !simple_inv)
4755     {
4756       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4757       /* Clear depends on.  */
4758       if (*inv_expr != NULL && inv_vars && *inv_vars)
4759         bitmap_clear (*inv_vars);
4760
4761       /* Cost of small invariant expression adjusted against loop niters
4762          is usually zero, which makes it difficult to be differentiated
4763          from candidate based on loop invariant variables.  Secondly, the
4764          generated invariant expression may not be hoisted out of loop by
4765          following pass.  We penalize the cost by rounding up in order to
4766          neutralize such effects.  */
4767       cost.cost = adjust_setup_cost (data, cost.cost, true);
4768       cost.scratch = cost.cost;
4769     }
4770
4771   cost += var_cost;
4772   addr = addr_for_mem_ref (&parts, as, false);
4773   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4774   cost += address_cost (addr, mem_mode, as, speed);
4775
4776   if (parts.symbol != NULL_TREE)
4777     cost.complexity += 1;
4778   /* Don't increase the complexity of adding a scaled index if it's
4779      the only kind of index that the target allows.  */
4780   if (parts.step != NULL_TREE && ok_without_ratio_p)
4781     cost.complexity += 1;
4782   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4783     cost.complexity += 1;
4784   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4785     cost.complexity += 1;
4786
4787   return cost;
4788 }
4789
4790 /* Scale (multiply) the computed COST (except scratch part that should be
4791    hoisted out a loop) by header->frequency / AT->frequency, which makes
4792    expected cost more accurate.  */
4793
4794 static comp_cost
4795 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4796 {
4797   if (data->speed
4798       && data->current_loop->header->count.to_frequency (cfun) > 0)
4799     {
4800       basic_block bb = gimple_bb (at);
4801       gcc_assert (cost.scratch <= cost.cost);
4802       int scale_factor = (int)(intptr_t) bb->aux;
4803       if (scale_factor == 1)
4804         return cost;
4805
4806       int64_t scaled_cost
4807         = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4808
4809       if (dump_file && (dump_flags & TDF_DETAILS))
4810         fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4811                  "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4812                  1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4813
4814       cost.cost = scaled_cost;
4815     }
4816
4817   return cost;
4818 }
4819
4820 /* Determines the cost of the computation by that USE is expressed
4821    from induction variable CAND.  If ADDRESS_P is true, we just need
4822    to create an address from it, otherwise we want to get it into
4823    register.  A set of invariants we depend on is stored in INV_VARS.
4824    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4825    addressing is likely.  If INV_EXPR is nonnull, record invariant
4826    expr entry in it.  */
4827
4828 static comp_cost
4829 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4830                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4831                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4832 {
4833   gimple *at = use->stmt;
4834   tree ubase = use->iv->base, cbase = cand->iv->base;
4835   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4836   tree comp_inv = NULL_TREE;
4837   HOST_WIDE_INT ratio, aratio;
4838   comp_cost cost;
4839   widest_int rat;
4840   aff_tree aff_inv, aff_var;
4841   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4842
4843   if (inv_vars)
4844     *inv_vars = NULL;
4845   if (can_autoinc)
4846     *can_autoinc = false;
4847   if (inv_expr)
4848     *inv_expr = NULL;
4849
4850   /* Check if we have enough precision to express the values of use.  */
4851   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4852     return infinite_cost;
4853
4854   if (address_p
4855       || (use->iv->base_object
4856           && cand->iv->base_object
4857           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4858           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4859     {
4860       /* Do not try to express address of an object with computation based
4861          on address of a different object.  This may cause problems in rtl
4862          level alias analysis (that does not expect this to be happening,
4863          as this is illegal in C), and would be unlikely to be useful
4864          anyway.  */
4865       if (use->iv->base_object
4866           && cand->iv->base_object
4867           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4868         return infinite_cost;
4869     }
4870
4871   if (!get_computation_aff_1 (data->current_loop, at, use,
4872                               cand, &aff_inv, &aff_var, &rat)
4873       || !wi::fits_shwi_p (rat))
4874     return infinite_cost;
4875
4876   ratio = rat.to_shwi ();
4877   if (address_p)
4878     {
4879       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4880                                inv_vars, inv_expr, can_autoinc, speed);
4881       cost = get_scaled_computation_cost_at (data, at, cost);
4882       /* For doloop IV cand, add on the extra cost.  */
4883       cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4884       return cost;
4885     }
4886
4887   bool simple_inv = (aff_combination_const_p (&aff_inv)
4888                      || aff_combination_singleton_var_p (&aff_inv));
4889   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4890   aff_combination_convert (&aff_inv, signed_type);
4891   if (!aff_combination_zero_p (&aff_inv))
4892     comp_inv = aff_combination_to_tree (&aff_inv);
4893
4894   cost = force_var_cost (data, comp_inv, inv_vars);
4895   if (comp_inv && inv_expr && !simple_inv)
4896     {
4897       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4898       /* Clear depends on.  */
4899       if (*inv_expr != NULL && inv_vars && *inv_vars)
4900         bitmap_clear (*inv_vars);
4901
4902       cost.cost = adjust_setup_cost (data, cost.cost);
4903       /* Record setup cost in scratch field.  */
4904       cost.scratch = cost.cost;
4905     }
4906   /* Cost of constant integer can be covered when adding invariant part to
4907      variant part.  */
4908   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4909     cost = no_cost;
4910
4911   /* Need type narrowing to represent use with cand.  */
4912   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4913     {
4914       machine_mode outer_mode = TYPE_MODE (utype);
4915       machine_mode inner_mode = TYPE_MODE (ctype);
4916       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4917     }
4918
4919   /* Turn a + i * (-c) into a - i * c.  */
4920   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4921     aratio = -ratio;
4922   else
4923     aratio = ratio;
4924
4925   if (ratio != 1)
4926     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4927
4928   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4929      instruction.  */
4930   /* Need to add up the invariant and variant parts.  */
4931   if (comp_inv && !integer_zerop (comp_inv))
4932     cost += add_cost (speed, TYPE_MODE (utype));
4933
4934   cost = get_scaled_computation_cost_at (data, at, cost);
4935
4936   /* For doloop IV cand, add on the extra cost.  */
4937   if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4938     cost += targetm.doloop_cost_for_generic;
4939
4940   return cost;
4941 }
4942
4943 /* Determines cost of computing the use in GROUP with CAND in a generic
4944    expression.  */
4945
4946 static bool
4947 determine_group_iv_cost_generic (struct ivopts_data *data,
4948                                  struct iv_group *group, struct iv_cand *cand)
4949 {
4950   comp_cost cost;
4951   iv_inv_expr_ent *inv_expr = NULL;
4952   bitmap inv_vars = NULL, inv_exprs = NULL;
4953   struct iv_use *use = group->vuses[0];
4954
4955   /* The simple case first -- if we need to express value of the preserved
4956      original biv, the cost is 0.  This also prevents us from counting the
4957      cost of increment twice -- once at this use and once in the cost of
4958      the candidate.  */
4959   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4960     cost = no_cost;
4961   else
4962     cost = get_computation_cost (data, use, cand, false,
4963                                  &inv_vars, NULL, &inv_expr);
4964
4965   if (inv_expr)
4966     {
4967       inv_exprs = BITMAP_ALLOC (NULL);
4968       bitmap_set_bit (inv_exprs, inv_expr->id);
4969     }
4970   set_group_iv_cost (data, group, cand, cost, inv_vars,
4971                      NULL_TREE, ERROR_MARK, inv_exprs);
4972   return !cost.infinite_cost_p ();
4973 }
4974
4975 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4976
4977 static bool
4978 determine_group_iv_cost_address (struct ivopts_data *data,
4979                                  struct iv_group *group, struct iv_cand *cand)
4980 {
4981   unsigned i;
4982   bitmap inv_vars = NULL, inv_exprs = NULL;
4983   bool can_autoinc;
4984   iv_inv_expr_ent *inv_expr = NULL;
4985   struct iv_use *use = group->vuses[0];
4986   comp_cost sum_cost = no_cost, cost;
4987
4988   cost = get_computation_cost (data, use, cand, true,
4989                                &inv_vars, &can_autoinc, &inv_expr);
4990
4991   if (inv_expr)
4992     {
4993       inv_exprs = BITMAP_ALLOC (NULL);
4994       bitmap_set_bit (inv_exprs, inv_expr->id);
4995     }
4996   sum_cost = cost;
4997   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4998     {
4999       if (can_autoinc)
5000         sum_cost -= cand->cost_step;
5001       /* If we generated the candidate solely for exploiting autoincrement
5002          opportunities, and it turns out it can't be used, set the cost to
5003          infinity to make sure we ignore it.  */
5004       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5005         sum_cost = infinite_cost;
5006     }
5007
5008   /* Uses in a group can share setup code, so only add setup cost once.  */
5009   cost -= cost.scratch;
5010   /* Compute and add costs for rest uses of this group.  */
5011   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5012     {
5013       struct iv_use *next = group->vuses[i];
5014
5015       /* TODO: We could skip computing cost for sub iv_use when it has the
5016          same cost as the first iv_use, but the cost really depends on the
5017          offset and where the iv_use is.  */
5018         cost = get_computation_cost (data, next, cand, true,
5019                                      NULL, &can_autoinc, &inv_expr);
5020         if (inv_expr)
5021           {
5022             if (!inv_exprs)
5023               inv_exprs = BITMAP_ALLOC (NULL);
5024
5025             bitmap_set_bit (inv_exprs, inv_expr->id);
5026           }
5027       sum_cost += cost;
5028     }
5029   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5030                      NULL_TREE, ERROR_MARK, inv_exprs);
5031
5032   return !sum_cost.infinite_cost_p ();
5033 }
5034
5035 /* Computes value of candidate CAND at position AT in iteration NITER, and
5036    stores it to VAL.  */
5037
5038 static void
5039 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5040                aff_tree *val)
5041 {
5042   aff_tree step, delta, nit;
5043   struct iv *iv = cand->iv;
5044   tree type = TREE_TYPE (iv->base);
5045   tree steptype;
5046   if (POINTER_TYPE_P (type))
5047     steptype = sizetype;
5048   else
5049     steptype = unsigned_type_for (type);
5050
5051   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5052   aff_combination_convert (&step, steptype);
5053   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5054   aff_combination_convert (&nit, steptype);
5055   aff_combination_mult (&nit, &step, &delta);
5056   if (stmt_after_increment (loop, cand, at))
5057     aff_combination_add (&delta, &step);
5058
5059   tree_to_aff_combination (iv->base, type, val);
5060   if (!POINTER_TYPE_P (type))
5061     aff_combination_convert (val, steptype);
5062   aff_combination_add (val, &delta);
5063 }
5064
5065 /* Returns period of induction variable iv.  */
5066
5067 static tree
5068 iv_period (struct iv *iv)
5069 {
5070   tree step = iv->step, period, type;
5071   tree pow2div;
5072
5073   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5074
5075   type = unsigned_type_for (TREE_TYPE (step));
5076   /* Period of the iv is lcm (step, type_range)/step -1,
5077      i.e., N*type_range/step - 1. Since type range is power
5078      of two, N == (step >> num_of_ending_zeros_binary (step),
5079      so the final result is
5080
5081        (type_range >> num_of_ending_zeros_binary (step)) - 1
5082
5083   */
5084   pow2div = num_ending_zeros (step);
5085
5086   period = build_low_bits_mask (type,
5087                                 (TYPE_PRECISION (type)
5088                                  - tree_to_uhwi (pow2div)));
5089
5090   return period;
5091 }
5092
5093 /* Returns the comparison operator used when eliminating the iv USE.  */
5094
5095 static enum tree_code
5096 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5097 {
5098   class loop *loop = data->current_loop;
5099   basic_block ex_bb;
5100   edge exit;
5101
5102   ex_bb = gimple_bb (use->stmt);
5103   exit = EDGE_SUCC (ex_bb, 0);
5104   if (flow_bb_inside_loop_p (loop, exit->dest))
5105     exit = EDGE_SUCC (ex_bb, 1);
5106
5107   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5108 }
5109
5110 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5111    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5112    calculation is performed in non-wrapping type.
5113
5114    TODO: More generally, we could test for the situation that
5115          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5116          This would require knowing the sign of OFFSET.  */
5117
5118 static bool
5119 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5120 {
5121   enum tree_code code;
5122   tree e1, e2;
5123   aff_tree aff_e1, aff_e2, aff_offset;
5124
5125   if (!nowrap_type_p (TREE_TYPE (base)))
5126     return false;
5127
5128   base = expand_simple_operations (base);
5129
5130   if (TREE_CODE (base) == SSA_NAME)
5131     {
5132       gimple *stmt = SSA_NAME_DEF_STMT (base);
5133
5134       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5135         return false;
5136
5137       code = gimple_assign_rhs_code (stmt);
5138       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5139         return false;
5140
5141       e1 = gimple_assign_rhs1 (stmt);
5142       e2 = gimple_assign_rhs2 (stmt);
5143     }
5144   else
5145     {
5146       code = TREE_CODE (base);
5147       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5148         return false;
5149       e1 = TREE_OPERAND (base, 0);
5150       e2 = TREE_OPERAND (base, 1);
5151     }
5152
5153   /* Use affine expansion as deeper inspection to prove the equality.  */
5154   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5155                                   &aff_e2, &data->name_expansion_cache);
5156   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5157                                   &aff_offset, &data->name_expansion_cache);
5158   aff_combination_scale (&aff_offset, -1);
5159   switch (code)
5160     {
5161     case PLUS_EXPR:
5162       aff_combination_add (&aff_e2, &aff_offset);
5163       if (aff_combination_zero_p (&aff_e2))
5164         return true;
5165
5166       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5167                                       &aff_e1, &data->name_expansion_cache);
5168       aff_combination_add (&aff_e1, &aff_offset);
5169       return aff_combination_zero_p (&aff_e1);
5170
5171     case POINTER_PLUS_EXPR:
5172       aff_combination_add (&aff_e2, &aff_offset);
5173       return aff_combination_zero_p (&aff_e2);
5174
5175     default:
5176       return false;
5177     }
5178 }
5179
5180 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5181    comparison with CAND.  NITER describes the number of iterations of
5182    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5183
5184    We aim to handle the following situation:
5185
5186    sometype *base, *p;
5187    int a, b, i;
5188
5189    i = a;
5190    p = p_0 = base + a;
5191
5192    do
5193      {
5194        bla (*p);
5195        p++;
5196        i++;
5197      }
5198    while (i < b);
5199
5200    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5201    We aim to optimize this to
5202
5203    p = p_0 = base + a;
5204    do
5205      {
5206        bla (*p);
5207        p++;
5208      }
5209    while (p < p_0 - a + b);
5210
5211    This preserves the correctness, since the pointer arithmetics does not
5212    overflow.  More precisely:
5213
5214    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5215       overflow in computing it or the values of p.
5216    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5217       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5218
5219 static bool
5220 iv_elimination_compare_lt (struct ivopts_data *data,
5221                            struct iv_cand *cand, enum tree_code *comp_p,
5222                            class tree_niter_desc *niter)
5223 {
5224   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5225   class aff_tree nit, tmpa, tmpb;
5226   enum tree_code comp;
5227   HOST_WIDE_INT step;
5228
5229   /* We need to know that the candidate induction variable does not overflow.
5230      While more complex analysis may be used to prove this, for now just
5231      check that the variable appears in the original program and that it
5232      is computed in a type that guarantees no overflows.  */
5233   cand_type = TREE_TYPE (cand->iv->base);
5234   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5235     return false;
5236
5237   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5238      the calculation of the BOUND could overflow, making the comparison
5239      invalid.  */
5240   if (!data->loop_single_exit_p)
5241     return false;
5242
5243   /* We need to be able to decide whether candidate is increasing or decreasing
5244      in order to choose the right comparison operator.  */
5245   if (!cst_and_fits_in_hwi (cand->iv->step))
5246     return false;
5247   step = int_cst_value (cand->iv->step);
5248
5249   /* Check that the number of iterations matches the expected pattern:
5250      a + 1 > b ? 0 : b - a - 1.  */
5251   mbz = niter->may_be_zero;
5252   if (TREE_CODE (mbz) == GT_EXPR)
5253     {
5254       /* Handle a + 1 > b.  */
5255       tree op0 = TREE_OPERAND (mbz, 0);
5256       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5257         {
5258           a = TREE_OPERAND (op0, 0);
5259           b = TREE_OPERAND (mbz, 1);
5260         }
5261       else
5262         return false;
5263     }
5264   else if (TREE_CODE (mbz) == LT_EXPR)
5265     {
5266       tree op1 = TREE_OPERAND (mbz, 1);
5267
5268       /* Handle b < a + 1.  */
5269       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5270         {
5271           a = TREE_OPERAND (op1, 0);
5272           b = TREE_OPERAND (mbz, 0);
5273         }
5274       else
5275         return false;
5276     }
5277   else
5278     return false;
5279
5280   /* Expected number of iterations is B - A - 1.  Check that it matches
5281      the actual number, i.e., that B - A - NITER = 1.  */
5282   tree_to_aff_combination (niter->niter, nit_type, &nit);
5283   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5284   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5285   aff_combination_scale (&nit, -1);
5286   aff_combination_scale (&tmpa, -1);
5287   aff_combination_add (&tmpb, &tmpa);
5288   aff_combination_add (&tmpb, &nit);
5289   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5290     return false;
5291
5292   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5293      overflow.  */
5294   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5295                         cand->iv->step,
5296                         fold_convert (TREE_TYPE (cand->iv->step), a));
5297   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5298     return false;
5299
5300   /* Determine the new comparison operator.  */
5301   comp = step < 0 ? GT_EXPR : LT_EXPR;
5302   if (*comp_p == NE_EXPR)
5303     *comp_p = comp;
5304   else if (*comp_p == EQ_EXPR)
5305     *comp_p = invert_tree_comparison (comp, false);
5306   else
5307     gcc_unreachable ();
5308
5309   return true;
5310 }
5311
5312 /* Check whether it is possible to express the condition in USE by comparison
5313    of candidate CAND.  If so, store the value compared with to BOUND, and the
5314    comparison operator to COMP.  */
5315
5316 static bool
5317 may_eliminate_iv (struct ivopts_data *data,
5318                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5319                   enum tree_code *comp)
5320 {
5321   basic_block ex_bb;
5322   edge exit;
5323   tree period;
5324   class loop *loop = data->current_loop;
5325   aff_tree bnd;
5326   class tree_niter_desc *desc = NULL;
5327
5328   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5329     return false;
5330
5331   /* For now works only for exits that dominate the loop latch.
5332      TODO: extend to other conditions inside loop body.  */
5333   ex_bb = gimple_bb (use->stmt);
5334   if (use->stmt != last_stmt (ex_bb)
5335       || gimple_code (use->stmt) != GIMPLE_COND
5336       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5337     return false;
5338
5339   exit = EDGE_SUCC (ex_bb, 0);
5340   if (flow_bb_inside_loop_p (loop, exit->dest))
5341     exit = EDGE_SUCC (ex_bb, 1);
5342   if (flow_bb_inside_loop_p (loop, exit->dest))
5343     return false;
5344
5345   desc = niter_for_exit (data, exit);
5346   if (!desc)
5347     return false;
5348
5349   /* Determine whether we can use the variable to test the exit condition.
5350      This is the case iff the period of the induction variable is greater
5351      than the number of iterations for which the exit condition is true.  */
5352   period = iv_period (cand->iv);
5353
5354   /* If the number of iterations is constant, compare against it directly.  */
5355   if (TREE_CODE (desc->niter) == INTEGER_CST)
5356     {
5357       /* See cand_value_at.  */
5358       if (stmt_after_increment (loop, cand, use->stmt))
5359         {
5360           if (!tree_int_cst_lt (desc->niter, period))
5361             return false;
5362         }
5363       else
5364         {
5365           if (tree_int_cst_lt (period, desc->niter))
5366             return false;
5367         }
5368     }
5369
5370   /* If not, and if this is the only possible exit of the loop, see whether
5371      we can get a conservative estimate on the number of iterations of the
5372      entire loop and compare against that instead.  */
5373   else
5374     {
5375       widest_int period_value, max_niter;
5376
5377       max_niter = desc->max;
5378       if (stmt_after_increment (loop, cand, use->stmt))
5379         max_niter += 1;
5380       period_value = wi::to_widest (period);
5381       if (wi::gtu_p (max_niter, period_value))
5382         {
5383           /* See if we can take advantage of inferred loop bound
5384              information.  */
5385           if (data->loop_single_exit_p)
5386             {
5387               if (!max_loop_iterations (loop, &max_niter))
5388                 return false;
5389               /* The loop bound is already adjusted by adding 1.  */
5390               if (wi::gtu_p (max_niter, period_value))
5391                 return false;
5392             }
5393           else
5394             return false;
5395         }
5396     }
5397
5398   /* For doloop IV cand, the bound would be zero.  It's safe whether
5399      may_be_zero set or not.  */
5400   if (cand->doloop_p)
5401     {
5402       *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5403       *comp = iv_elimination_compare (data, use);
5404       return true;
5405     }
5406
5407   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5408
5409   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5410                          aff_combination_to_tree (&bnd));
5411   *comp = iv_elimination_compare (data, use);
5412
5413   /* It is unlikely that computing the number of iterations using division
5414      would be more profitable than keeping the original induction variable.  */
5415   if (expression_expensive_p (*bound))
5416     return false;
5417
5418   /* Sometimes, it is possible to handle the situation that the number of
5419      iterations may be zero unless additional assumptions by using <
5420      instead of != in the exit condition.
5421
5422      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5423            base the exit condition on it.  However, that is often too
5424            expensive.  */
5425   if (!integer_zerop (desc->may_be_zero))
5426     return iv_elimination_compare_lt (data, cand, comp, desc);
5427
5428   return true;
5429 }
5430
5431  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5432     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5433
5434 static int
5435 parm_decl_cost (struct ivopts_data *data, tree bound)
5436 {
5437   tree sbound = bound;
5438   STRIP_NOPS (sbound);
5439
5440   if (TREE_CODE (sbound) == SSA_NAME
5441       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5442       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5443       && data->body_includes_call)
5444     return COSTS_N_INSNS (1);
5445
5446   return 0;
5447 }
5448
5449 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5450
5451 static bool
5452 determine_group_iv_cost_cond (struct ivopts_data *data,
5453                               struct iv_group *group, struct iv_cand *cand)
5454 {
5455   tree bound = NULL_TREE;
5456   struct iv *cmp_iv;
5457   bitmap inv_exprs = NULL;
5458   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5459   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5460   enum comp_iv_rewrite rewrite_type;
5461   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5462   tree *control_var, *bound_cst;
5463   enum tree_code comp = ERROR_MARK;
5464   struct iv_use *use = group->vuses[0];
5465
5466   /* Extract condition operands.  */
5467   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5468                                         &bound_cst, NULL, &cmp_iv);
5469   gcc_assert (rewrite_type != COMP_IV_NA);
5470
5471   /* Try iv elimination.  */
5472   if (rewrite_type == COMP_IV_ELIM
5473       && may_eliminate_iv (data, use, cand, &bound, &comp))
5474     {
5475       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5476       if (elim_cost.cost == 0)
5477         elim_cost.cost = parm_decl_cost (data, bound);
5478       else if (TREE_CODE (bound) == INTEGER_CST)
5479         elim_cost.cost = 0;
5480       /* If we replace a loop condition 'i < n' with 'p < base + n',
5481          inv_vars_elim will have 'base' and 'n' set, which implies that both
5482          'base' and 'n' will be live during the loop.    More likely,
5483          'base + n' will be loop invariant, resulting in only one live value
5484          during the loop.  So in that case we clear inv_vars_elim and set
5485          inv_expr_elim instead.  */
5486       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5487         {
5488           inv_expr_elim = get_loop_invariant_expr (data, bound);
5489           bitmap_clear (inv_vars_elim);
5490         }
5491       /* The bound is a loop invariant, so it will be only computed
5492          once.  */
5493       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5494     }
5495
5496   /* When the condition is a comparison of the candidate IV against
5497      zero, prefer this IV.
5498
5499      TODO: The constant that we're subtracting from the cost should
5500      be target-dependent.  This information should be added to the
5501      target costs for each backend.  */
5502   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5503       && integer_zerop (*bound_cst)
5504       && (operand_equal_p (*control_var, cand->var_after, 0)
5505           || operand_equal_p (*control_var, cand->var_before, 0)))
5506     elim_cost -= 1;
5507
5508   express_cost = get_computation_cost (data, use, cand, false,
5509                                        &inv_vars_express, NULL,
5510                                        &inv_expr_express);
5511   if (cmp_iv != NULL)
5512     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5513
5514   /* Count the cost of the original bound as well.  */
5515   bound_cost = force_var_cost (data, *bound_cst, NULL);
5516   if (bound_cost.cost == 0)
5517     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5518   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5519     bound_cost.cost = 0;
5520   express_cost += bound_cost;
5521
5522   /* Choose the better approach, preferring the eliminated IV. */
5523   if (elim_cost <= express_cost)
5524     {
5525       cost = elim_cost;
5526       inv_vars = inv_vars_elim;
5527       inv_vars_elim = NULL;
5528       inv_expr = inv_expr_elim;
5529       /* For doloop candidate/use pair, adjust to zero cost.  */
5530       if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5531         cost = no_cost;
5532     }
5533   else
5534     {
5535       cost = express_cost;
5536       inv_vars = inv_vars_express;
5537       inv_vars_express = NULL;
5538       bound = NULL_TREE;
5539       comp = ERROR_MARK;
5540       inv_expr = inv_expr_express;
5541     }
5542
5543   if (inv_expr)
5544     {
5545       inv_exprs = BITMAP_ALLOC (NULL);
5546       bitmap_set_bit (inv_exprs, inv_expr->id);
5547     }
5548   set_group_iv_cost (data, group, cand, cost,
5549                      inv_vars, bound, comp, inv_exprs);
5550
5551   if (inv_vars_elim)
5552     BITMAP_FREE (inv_vars_elim);
5553   if (inv_vars_express)
5554     BITMAP_FREE (inv_vars_express);
5555
5556   return !cost.infinite_cost_p ();
5557 }
5558
5559 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5560    if USE cannot be represented with CAND.  */
5561
5562 static bool
5563 determine_group_iv_cost (struct ivopts_data *data,
5564                          struct iv_group *group, struct iv_cand *cand)
5565 {
5566   switch (group->type)
5567     {
5568     case USE_NONLINEAR_EXPR:
5569       return determine_group_iv_cost_generic (data, group, cand);
5570
5571     case USE_REF_ADDRESS:
5572     case USE_PTR_ADDRESS:
5573       return determine_group_iv_cost_address (data, group, cand);
5574
5575     case USE_COMPARE:
5576       return determine_group_iv_cost_cond (data, group, cand);
5577
5578     default:
5579       gcc_unreachable ();
5580     }
5581 }
5582
5583 /* Return true if get_computation_cost indicates that autoincrement is
5584    a possibility for the pair of USE and CAND, false otherwise.  */
5585
5586 static bool
5587 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5588                            struct iv_cand *cand)
5589 {
5590   if (!address_p (use->type))
5591     return false;
5592
5593   bool can_autoinc = false;
5594   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5595   return can_autoinc;
5596 }
5597
5598 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5599    use that allows autoincrement, and set their AINC_USE if possible.  */
5600
5601 static void
5602 set_autoinc_for_original_candidates (struct ivopts_data *data)
5603 {
5604   unsigned i, j;
5605
5606   for (i = 0; i < data->vcands.length (); i++)
5607     {
5608       struct iv_cand *cand = data->vcands[i];
5609       struct iv_use *closest_before = NULL;
5610       struct iv_use *closest_after = NULL;
5611       if (cand->pos != IP_ORIGINAL)
5612         continue;
5613
5614       for (j = 0; j < data->vgroups.length (); j++)
5615         {
5616           struct iv_group *group = data->vgroups[j];
5617           struct iv_use *use = group->vuses[0];
5618           unsigned uid = gimple_uid (use->stmt);
5619
5620           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5621             continue;
5622
5623           if (uid < gimple_uid (cand->incremented_at)
5624               && (closest_before == NULL
5625                   || uid > gimple_uid (closest_before->stmt)))
5626             closest_before = use;
5627
5628           if (uid > gimple_uid (cand->incremented_at)
5629               && (closest_after == NULL
5630                   || uid < gimple_uid (closest_after->stmt)))
5631             closest_after = use;
5632         }
5633
5634       if (closest_before != NULL
5635           && autoinc_possible_for_pair (data, closest_before, cand))
5636         cand->ainc_use = closest_before;
5637       else if (closest_after != NULL
5638                && autoinc_possible_for_pair (data, closest_after, cand))
5639         cand->ainc_use = closest_after;
5640     }
5641 }
5642
5643 /* Relate compare use with all candidates.  */
5644
5645 static void
5646 relate_compare_use_with_all_cands (struct ivopts_data *data)
5647 {
5648   unsigned i, count = data->vcands.length ();
5649   for (i = 0; i < data->vgroups.length (); i++)
5650     {
5651       struct iv_group *group = data->vgroups[i];
5652
5653       if (group->type == USE_COMPARE)
5654         bitmap_set_range (group->related_cands, 0, count);
5655     }
5656 }
5657
5658 /* Add one doloop dedicated IV candidate:
5659      - Base is (may_be_zero ? 1 : (niter + 1)).
5660      - Step is -1.  */
5661
5662 static void
5663 add_iv_candidate_for_doloop (struct ivopts_data *data)
5664 {
5665   tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5666   gcc_assert (niter_desc && niter_desc->assumptions);
5667
5668   tree niter = niter_desc->niter;
5669   tree ntype = TREE_TYPE (niter);
5670   gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5671
5672   tree may_be_zero = niter_desc->may_be_zero;
5673   if (may_be_zero && integer_zerop (may_be_zero))
5674     may_be_zero = NULL_TREE;
5675   if (may_be_zero)
5676     {
5677       if (COMPARISON_CLASS_P (may_be_zero))
5678         {
5679           niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5680                                build_int_cst (ntype, 0),
5681                                rewrite_to_non_trapping_overflow (niter));
5682         }
5683       /* Don't try to obtain the iteration count expression when may_be_zero is
5684          integer_nonzerop (actually iteration count is one) or else.  */
5685       else
5686         return;
5687     }
5688
5689   tree base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5690                            build_int_cst (ntype, 1));
5691   add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5692 }
5693
5694 /* Finds the candidates for the induction variables.  */
5695
5696 static void
5697 find_iv_candidates (struct ivopts_data *data)
5698 {
5699   /* Add commonly used ivs.  */
5700   add_standard_iv_candidates (data);
5701
5702   /* Add doloop dedicated ivs.  */
5703   if (data->doloop_use_p)
5704     add_iv_candidate_for_doloop (data);
5705
5706   /* Add old induction variables.  */
5707   add_iv_candidate_for_bivs (data);
5708
5709   /* Add induction variables derived from uses.  */
5710   add_iv_candidate_for_groups (data);
5711
5712   set_autoinc_for_original_candidates (data);
5713
5714   /* Record the important candidates.  */
5715   record_important_candidates (data);
5716
5717   /* Relate compare iv_use with all candidates.  */
5718   if (!data->consider_all_candidates)
5719     relate_compare_use_with_all_cands (data);
5720
5721   if (dump_file && (dump_flags & TDF_DETAILS))
5722     {
5723       unsigned i;
5724
5725       fprintf (dump_file, "\n<Important Candidates>:\t");
5726       for (i = 0; i < data->vcands.length (); i++)
5727         if (data->vcands[i]->important)
5728           fprintf (dump_file, " %d,", data->vcands[i]->id);
5729       fprintf (dump_file, "\n");
5730
5731       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5732       for (i = 0; i < data->vgroups.length (); i++)
5733         {
5734           struct iv_group *group = data->vgroups[i];
5735
5736           if (group->related_cands)
5737             {
5738               fprintf (dump_file, "  Group %d:\t", group->id);
5739               dump_bitmap (dump_file, group->related_cands);
5740             }
5741         }
5742       fprintf (dump_file, "\n");
5743     }
5744 }
5745
5746 /* Determines costs of computing use of iv with an iv candidate.  */
5747
5748 static void
5749 determine_group_iv_costs (struct ivopts_data *data)
5750 {
5751   unsigned i, j;
5752   struct iv_cand *cand;
5753   struct iv_group *group;
5754   bitmap to_clear = BITMAP_ALLOC (NULL);
5755
5756   alloc_use_cost_map (data);
5757
5758   for (i = 0; i < data->vgroups.length (); i++)
5759     {
5760       group = data->vgroups[i];
5761
5762       if (data->consider_all_candidates)
5763         {
5764           for (j = 0; j < data->vcands.length (); j++)
5765             {
5766               cand = data->vcands[j];
5767               determine_group_iv_cost (data, group, cand);
5768             }
5769         }
5770       else
5771         {
5772           bitmap_iterator bi;
5773
5774           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5775             {
5776               cand = data->vcands[j];
5777               if (!determine_group_iv_cost (data, group, cand))
5778                 bitmap_set_bit (to_clear, j);
5779             }
5780
5781           /* Remove the candidates for that the cost is infinite from
5782              the list of related candidates.  */
5783           bitmap_and_compl_into (group->related_cands, to_clear);
5784           bitmap_clear (to_clear);
5785         }
5786     }
5787
5788   BITMAP_FREE (to_clear);
5789
5790   if (dump_file && (dump_flags & TDF_DETAILS))
5791     {
5792       bitmap_iterator bi;
5793
5794       /* Dump invariant variables.  */
5795       fprintf (dump_file, "\n<Invariant Vars>:\n");
5796       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5797         {
5798           struct version_info *info = ver_info (data, i);
5799           if (info->inv_id)
5800             {
5801               fprintf (dump_file, "Inv %d:\t", info->inv_id);
5802               print_generic_expr (dump_file, info->name, TDF_SLIM);
5803               fprintf (dump_file, "%s\n",
5804                        info->has_nonlin_use ? "" : "\t(eliminable)");
5805             }
5806         }
5807
5808       /* Dump invariant expressions.  */
5809       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5810       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5811
5812       for (hash_table<iv_inv_expr_hasher>::iterator it
5813            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5814            ++it)
5815         list.safe_push (*it);
5816
5817       list.qsort (sort_iv_inv_expr_ent);
5818
5819       for (i = 0; i < list.length (); ++i)
5820         {
5821           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5822           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5823           fprintf (dump_file, "\n");
5824         }
5825
5826       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5827
5828       for (i = 0; i < data->vgroups.length (); i++)
5829         {
5830           group = data->vgroups[i];
5831
5832           fprintf (dump_file, "Group %d:\n", i);
5833           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5834           for (j = 0; j < group->n_map_members; j++)
5835             {
5836               if (!group->cost_map[j].cand
5837                   || group->cost_map[j].cost.infinite_cost_p ())
5838                 continue;
5839
5840               fprintf (dump_file, "  %d\t%" PRId64 "\t%d\t",
5841                        group->cost_map[j].cand->id,
5842                        group->cost_map[j].cost.cost,
5843                        group->cost_map[j].cost.complexity);
5844               if (!group->cost_map[j].inv_exprs
5845                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5846                 fprintf (dump_file, "NIL;\t");
5847               else
5848                 bitmap_print (dump_file,
5849                               group->cost_map[j].inv_exprs, "", ";\t");
5850               if (!group->cost_map[j].inv_vars
5851                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5852                 fprintf (dump_file, "NIL;\n");
5853               else
5854                 bitmap_print (dump_file,
5855                               group->cost_map[j].inv_vars, "", "\n");
5856             }
5857
5858           fprintf (dump_file, "\n");
5859         }
5860       fprintf (dump_file, "\n");
5861     }
5862 }
5863
5864 /* Determines cost of the candidate CAND.  */
5865
5866 static void
5867 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5868 {
5869   comp_cost cost_base;
5870   int64_t cost, cost_step;
5871   tree base;
5872
5873   gcc_assert (cand->iv != NULL);
5874
5875   /* There are two costs associated with the candidate -- its increment
5876      and its initialization.  The second is almost negligible for any loop
5877      that rolls enough, so we take it just very little into account.  */
5878
5879   base = cand->iv->base;
5880   cost_base = force_var_cost (data, base, NULL);
5881   /* It will be exceptional that the iv register happens to be initialized with
5882      the proper value at no cost.  In general, there will at least be a regcopy
5883      or a const set.  */
5884   if (cost_base.cost == 0)
5885     cost_base.cost = COSTS_N_INSNS (1);
5886   /* Doloop decrement should be considered as zero cost.  */
5887   if (cand->doloop_p)
5888     cost_step = 0;
5889   else
5890     cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5891   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5892
5893   /* Prefer the original ivs unless we may gain something by replacing it.
5894      The reason is to make debugging simpler; so this is not relevant for
5895      artificial ivs created by other optimization passes.  */
5896   if ((cand->pos != IP_ORIGINAL
5897        || !SSA_NAME_VAR (cand->var_before)
5898        || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5899       /* Prefer doloop as well.  */
5900       && !cand->doloop_p)
5901     cost++;
5902
5903   /* Prefer not to insert statements into latch unless there are some
5904      already (so that we do not create unnecessary jumps).  */
5905   if (cand->pos == IP_END
5906       && empty_block_p (ip_end_pos (data->current_loop)))
5907     cost++;
5908
5909   cand->cost = cost;
5910   cand->cost_step = cost_step;
5911 }
5912
5913 /* Determines costs of computation of the candidates.  */
5914
5915 static void
5916 determine_iv_costs (struct ivopts_data *data)
5917 {
5918   unsigned i;
5919
5920   if (dump_file && (dump_flags & TDF_DETAILS))
5921     {
5922       fprintf (dump_file, "<Candidate Costs>:\n");
5923       fprintf (dump_file, "  cand\tcost\n");
5924     }
5925
5926   for (i = 0; i < data->vcands.length (); i++)
5927     {
5928       struct iv_cand *cand = data->vcands[i];
5929
5930       determine_iv_cost (data, cand);
5931
5932       if (dump_file && (dump_flags & TDF_DETAILS))
5933         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5934     }
5935
5936   if (dump_file && (dump_flags & TDF_DETAILS))
5937     fprintf (dump_file, "\n");
5938 }
5939
5940 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5941    induction variables.  Note N_INVS includes both invariant variables and
5942    invariant expressions.  */
5943
5944 static unsigned
5945 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5946                               unsigned n_cands)
5947 {
5948   unsigned cost;
5949   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5950   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5951   bool speed = data->speed;
5952
5953   /* If there is a call in the loop body, the call-clobbered registers
5954      are not available for loop invariants.  */
5955   if (data->body_includes_call)
5956     available_regs = available_regs - target_clobbered_regs;
5957
5958   /* If we have enough registers.  */
5959   if (regs_needed + target_res_regs < available_regs)
5960     cost = n_new;
5961   /* If close to running out of registers, try to preserve them.  */
5962   else if (regs_needed <= available_regs)
5963     cost = target_reg_cost [speed] * regs_needed;
5964   /* If we run out of available registers but the number of candidates
5965      does not, we penalize extra registers using target_spill_cost.  */
5966   else if (n_cands <= available_regs)
5967     cost = target_reg_cost [speed] * available_regs
5968            + target_spill_cost [speed] * (regs_needed - available_regs);
5969   /* If the number of candidates runs out available registers, we penalize
5970      extra candidate registers using target_spill_cost * 2.  Because it is
5971      more expensive to spill induction variable than invariant.  */
5972   else
5973     cost = target_reg_cost [speed] * available_regs
5974            + target_spill_cost [speed] * (n_cands - available_regs) * 2
5975            + target_spill_cost [speed] * (regs_needed - n_cands);
5976
5977   /* Finally, add the number of candidates, so that we prefer eliminating
5978      induction variables if possible.  */
5979   return cost + n_cands;
5980 }
5981
5982 /* For each size of the induction variable set determine the penalty.  */
5983
5984 static void
5985 determine_set_costs (struct ivopts_data *data)
5986 {
5987   unsigned j, n;
5988   gphi *phi;
5989   gphi_iterator psi;
5990   tree op;
5991   class loop *loop = data->current_loop;
5992   bitmap_iterator bi;
5993
5994   if (dump_file && (dump_flags & TDF_DETAILS))
5995     {
5996       fprintf (dump_file, "<Global Costs>:\n");
5997       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5998       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5999       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
6000       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
6001     }
6002
6003   n = 0;
6004   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6005     {
6006       phi = psi.phi ();
6007       op = PHI_RESULT (phi);
6008
6009       if (virtual_operand_p (op))
6010         continue;
6011
6012       if (get_iv (data, op))
6013         continue;
6014
6015       if (!POINTER_TYPE_P (TREE_TYPE (op))
6016           && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6017         continue;
6018
6019       n++;
6020     }
6021
6022   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6023     {
6024       struct version_info *info = ver_info (data, j);
6025
6026       if (info->inv_id && info->has_nonlin_use)
6027         n++;
6028     }
6029
6030   data->regs_used = n;
6031   if (dump_file && (dump_flags & TDF_DETAILS))
6032     fprintf (dump_file, "  regs_used %d\n", n);
6033
6034   if (dump_file && (dump_flags & TDF_DETAILS))
6035     {
6036       fprintf (dump_file, "  cost for size:\n");
6037       fprintf (dump_file, "  ivs\tcost\n");
6038       for (j = 0; j <= 2 * target_avail_regs; j++)
6039         fprintf (dump_file, "  %d\t%d\n", j,
6040                  ivopts_estimate_reg_pressure (data, 0, j));
6041       fprintf (dump_file, "\n");
6042     }
6043 }
6044
6045 /* Returns true if A is a cheaper cost pair than B.  */
6046
6047 static bool
6048 cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6049 {
6050   if (!a)
6051     return false;
6052
6053   if (!b)
6054     return true;
6055
6056   if (a->cost < b->cost)
6057     return true;
6058
6059   if (b->cost < a->cost)
6060     return false;
6061
6062   /* In case the costs are the same, prefer the cheaper candidate.  */
6063   if (a->cand->cost < b->cand->cost)
6064     return true;
6065
6066   return false;
6067 }
6068
6069 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
6070    for more expensive, equal and cheaper respectively.  */
6071
6072 static int
6073 compare_cost_pair (class cost_pair *a, class cost_pair *b)
6074 {
6075   if (cheaper_cost_pair (a, b))
6076     return -1;
6077   if (cheaper_cost_pair (b, a))
6078     return 1;
6079
6080   return 0;
6081 }
6082
6083 /* Returns candidate by that USE is expressed in IVS.  */
6084
6085 static class cost_pair *
6086 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6087 {
6088   return ivs->cand_for_group[group->id];
6089 }
6090
6091 /* Computes the cost field of IVS structure.  */
6092
6093 static void
6094 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6095 {
6096   comp_cost cost = ivs->cand_use_cost;
6097
6098   cost += ivs->cand_cost;
6099   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6100   ivs->cost = cost;
6101 }
6102
6103 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6104    and IVS.  */
6105
6106 static void
6107 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6108 {
6109   bitmap_iterator bi;
6110   unsigned iid;
6111
6112   if (!invs)
6113     return;
6114
6115   gcc_assert (n_inv_uses != NULL);
6116   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6117     {
6118       n_inv_uses[iid]--;
6119       if (n_inv_uses[iid] == 0)
6120         ivs->n_invs--;
6121     }
6122 }
6123
6124 /* Set USE not to be expressed by any candidate in IVS.  */
6125
6126 static void
6127 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6128                  struct iv_group *group)
6129 {
6130   unsigned gid = group->id, cid;
6131   class cost_pair *cp;
6132
6133   cp = ivs->cand_for_group[gid];
6134   if (!cp)
6135     return;
6136   cid = cp->cand->id;
6137
6138   ivs->bad_groups++;
6139   ivs->cand_for_group[gid] = NULL;
6140   ivs->n_cand_uses[cid]--;
6141
6142   if (ivs->n_cand_uses[cid] == 0)
6143     {
6144       bitmap_clear_bit (ivs->cands, cid);
6145       if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6146         ivs->n_cands--;
6147       ivs->cand_cost -= cp->cand->cost;
6148       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6149       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6150     }
6151
6152   ivs->cand_use_cost -= cp->cost;
6153   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6154   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6155   iv_ca_recount_cost (data, ivs);
6156 }
6157
6158 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6159    IVS.  */
6160
6161 static void
6162 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6163 {
6164   bitmap_iterator bi;
6165   unsigned iid;
6166
6167   if (!invs)
6168     return;
6169
6170   gcc_assert (n_inv_uses != NULL);
6171   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6172     {
6173       n_inv_uses[iid]++;
6174       if (n_inv_uses[iid] == 1)
6175         ivs->n_invs++;
6176     }
6177 }
6178
6179 /* Set cost pair for GROUP in set IVS to CP.  */
6180
6181 static void
6182 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6183               struct iv_group *group, class cost_pair *cp)
6184 {
6185   unsigned gid = group->id, cid;
6186
6187   if (ivs->cand_for_group[gid] == cp)
6188     return;
6189
6190   if (ivs->cand_for_group[gid])
6191     iv_ca_set_no_cp (data, ivs, group);
6192
6193   if (cp)
6194     {
6195       cid = cp->cand->id;
6196
6197       ivs->bad_groups--;
6198       ivs->cand_for_group[gid] = cp;
6199       ivs->n_cand_uses[cid]++;
6200       if (ivs->n_cand_uses[cid] == 1)
6201         {
6202           bitmap_set_bit (ivs->cands, cid);
6203           if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6204             ivs->n_cands++;
6205           ivs->cand_cost += cp->cand->cost;
6206           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6207           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6208         }
6209
6210       ivs->cand_use_cost += cp->cost;
6211       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6212       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6213       iv_ca_recount_cost (data, ivs);
6214     }
6215 }
6216
6217 /* Extend set IVS by expressing USE by some of the candidates in it
6218    if possible.  Consider all important candidates if candidates in
6219    set IVS don't give any result.  */
6220
6221 static void
6222 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6223                struct iv_group *group)
6224 {
6225   class cost_pair *best_cp = NULL, *cp;
6226   bitmap_iterator bi;
6227   unsigned i;
6228   struct iv_cand *cand;
6229
6230   gcc_assert (ivs->upto >= group->id);
6231   ivs->upto++;
6232   ivs->bad_groups++;
6233
6234   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6235     {
6236       cand = data->vcands[i];
6237       cp = get_group_iv_cost (data, group, cand);
6238       if (cheaper_cost_pair (cp, best_cp))
6239         best_cp = cp;
6240     }
6241
6242   if (best_cp == NULL)
6243     {
6244       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6245         {
6246           cand = data->vcands[i];
6247           cp = get_group_iv_cost (data, group, cand);
6248           if (cheaper_cost_pair (cp, best_cp))
6249             best_cp = cp;
6250         }
6251     }
6252
6253   iv_ca_set_cp (data, ivs, group, best_cp);
6254 }
6255
6256 /* Get cost for assignment IVS.  */
6257
6258 static comp_cost
6259 iv_ca_cost (class iv_ca *ivs)
6260 {
6261   /* This was a conditional expression but it triggered a bug in
6262      Sun C 5.5.  */
6263   if (ivs->bad_groups)
6264     return infinite_cost;
6265   else
6266     return ivs->cost;
6267 }
6268
6269 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6270    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
6271    respectively.  */
6272
6273 static int
6274 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6275                     struct iv_group *group, class cost_pair *old_cp,
6276                     class cost_pair *new_cp)
6277 {
6278   gcc_assert (old_cp && new_cp && old_cp != new_cp);
6279   unsigned old_n_invs = ivs->n_invs;
6280   iv_ca_set_cp (data, ivs, group, new_cp);
6281   unsigned new_n_invs = ivs->n_invs;
6282   iv_ca_set_cp (data, ivs, group, old_cp);
6283
6284   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6285 }
6286
6287 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6288    it before NEXT.  */
6289
6290 static struct iv_ca_delta *
6291 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6292                  class cost_pair *new_cp, struct iv_ca_delta *next)
6293 {
6294   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6295
6296   change->group = group;
6297   change->old_cp = old_cp;
6298   change->new_cp = new_cp;
6299   change->next = next;
6300
6301   return change;
6302 }
6303
6304 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6305    are rewritten.  */
6306
6307 static struct iv_ca_delta *
6308 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6309 {
6310   struct iv_ca_delta *last;
6311
6312   if (!l2)
6313     return l1;
6314
6315   if (!l1)
6316     return l2;
6317
6318   for (last = l1; last->next; last = last->next)
6319     continue;
6320   last->next = l2;
6321
6322   return l1;
6323 }
6324
6325 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6326
6327 static struct iv_ca_delta *
6328 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6329 {
6330   struct iv_ca_delta *act, *next, *prev = NULL;
6331
6332   for (act = delta; act; act = next)
6333     {
6334       next = act->next;
6335       act->next = prev;
6336       prev = act;
6337
6338       std::swap (act->old_cp, act->new_cp);
6339     }
6340
6341   return prev;
6342 }
6343
6344 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6345    reverted instead.  */
6346
6347 static void
6348 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6349                     struct iv_ca_delta *delta, bool forward)
6350 {
6351   class cost_pair *from, *to;
6352   struct iv_ca_delta *act;
6353
6354   if (!forward)
6355     delta = iv_ca_delta_reverse (delta);
6356
6357   for (act = delta; act; act = act->next)
6358     {
6359       from = act->old_cp;
6360       to = act->new_cp;
6361       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6362       iv_ca_set_cp (data, ivs, act->group, to);
6363     }
6364
6365   if (!forward)
6366     iv_ca_delta_reverse (delta);
6367 }
6368
6369 /* Returns true if CAND is used in IVS.  */
6370
6371 static bool
6372 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6373 {
6374   return ivs->n_cand_uses[cand->id] > 0;
6375 }
6376
6377 /* Returns number of induction variable candidates in the set IVS.  */
6378
6379 static unsigned
6380 iv_ca_n_cands (class iv_ca *ivs)
6381 {
6382   return ivs->n_cands;
6383 }
6384
6385 /* Free the list of changes DELTA.  */
6386
6387 static void
6388 iv_ca_delta_free (struct iv_ca_delta **delta)
6389 {
6390   struct iv_ca_delta *act, *next;
6391
6392   for (act = *delta; act; act = next)
6393     {
6394       next = act->next;
6395       free (act);
6396     }
6397
6398   *delta = NULL;
6399 }
6400
6401 /* Allocates new iv candidates assignment.  */
6402
6403 static class iv_ca *
6404 iv_ca_new (struct ivopts_data *data)
6405 {
6406   class iv_ca *nw = XNEW (class iv_ca);
6407
6408   nw->upto = 0;
6409   nw->bad_groups = 0;
6410   nw->cand_for_group = XCNEWVEC (class cost_pair *,
6411                                  data->vgroups.length ());
6412   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6413   nw->cands = BITMAP_ALLOC (NULL);
6414   nw->n_cands = 0;
6415   nw->n_invs = 0;
6416   nw->cand_use_cost = no_cost;
6417   nw->cand_cost = 0;
6418   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6419   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6420   nw->cost = no_cost;
6421
6422   return nw;
6423 }
6424
6425 /* Free memory occupied by the set IVS.  */
6426
6427 static void
6428 iv_ca_free (class iv_ca **ivs)
6429 {
6430   free ((*ivs)->cand_for_group);
6431   free ((*ivs)->n_cand_uses);
6432   BITMAP_FREE ((*ivs)->cands);
6433   free ((*ivs)->n_inv_var_uses);
6434   free ((*ivs)->n_inv_expr_uses);
6435   free (*ivs);
6436   *ivs = NULL;
6437 }
6438
6439 /* Dumps IVS to FILE.  */
6440
6441 static void
6442 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6443 {
6444   unsigned i;
6445   comp_cost cost = iv_ca_cost (ivs);
6446
6447   fprintf (file, "  cost: %" PRId64 " (complexity %d)\n", cost.cost,
6448            cost.complexity);
6449   fprintf (file, "  reg_cost: %d\n",
6450            ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6451   fprintf (file, "  cand_cost: %" PRId64 "\n  cand_group_cost: "
6452            "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6453            ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6454   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6455
6456   for (i = 0; i < ivs->upto; i++)
6457     {
6458       struct iv_group *group = data->vgroups[i];
6459       class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6460       if (cp)
6461         fprintf (file, "   group:%d --> iv_cand:%d, cost=("
6462                  "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6463                  cp->cost.cost, cp->cost.complexity);
6464       else
6465         fprintf (file, "   group:%d --> ??\n", group->id);
6466     }
6467
6468   const char *pref = "";
6469   fprintf (file, "  invariant variables: ");
6470   for (i = 1; i <= data->max_inv_var_id; i++)
6471     if (ivs->n_inv_var_uses[i])
6472       {
6473         fprintf (file, "%s%d", pref, i);
6474         pref = ", ";
6475       }
6476
6477   pref = "";
6478   fprintf (file, "\n  invariant expressions: ");
6479   for (i = 1; i <= data->max_inv_expr_id; i++)
6480     if (ivs->n_inv_expr_uses[i])
6481       {
6482         fprintf (file, "%s%d", pref, i);
6483         pref = ", ";
6484       }
6485
6486   fprintf (file, "\n\n");
6487 }
6488
6489 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6490    new set, and store differences in DELTA.  Number of induction variables
6491    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6492    the function will try to find a solution with mimimal iv candidates.  */
6493
6494 static comp_cost
6495 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6496               struct iv_cand *cand, struct iv_ca_delta **delta,
6497               unsigned *n_ivs, bool min_ncand)
6498 {
6499   unsigned i;
6500   comp_cost cost;
6501   struct iv_group *group;
6502   class cost_pair *old_cp, *new_cp;
6503
6504   *delta = NULL;
6505   for (i = 0; i < ivs->upto; i++)
6506     {
6507       group = data->vgroups[i];
6508       old_cp = iv_ca_cand_for_group (ivs, group);
6509
6510       if (old_cp
6511           && old_cp->cand == cand)
6512         continue;
6513
6514       new_cp = get_group_iv_cost (data, group, cand);
6515       if (!new_cp)
6516         continue;
6517
6518       if (!min_ncand)
6519         {
6520           int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6521           /* Skip if new_cp depends on more invariants.  */
6522           if (cmp_invs > 0)
6523             continue;
6524
6525           int cmp_cost = compare_cost_pair (new_cp, old_cp);
6526           /* Skip if new_cp is not cheaper.  */
6527           if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6528             continue;
6529         }
6530
6531       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6532     }
6533
6534   iv_ca_delta_commit (data, ivs, *delta, true);
6535   cost = iv_ca_cost (ivs);
6536   if (n_ivs)
6537     *n_ivs = iv_ca_n_cands (ivs);
6538   iv_ca_delta_commit (data, ivs, *delta, false);
6539
6540   return cost;
6541 }
6542
6543 /* Try narrowing set IVS by removing CAND.  Return the cost of
6544    the new set and store the differences in DELTA.  START is
6545    the candidate with which we start narrowing.  */
6546
6547 static comp_cost
6548 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6549               struct iv_cand *cand, struct iv_cand *start,
6550               struct iv_ca_delta **delta)
6551 {
6552   unsigned i, ci;
6553   struct iv_group *group;
6554   class cost_pair *old_cp, *new_cp, *cp;
6555   bitmap_iterator bi;
6556   struct iv_cand *cnd;
6557   comp_cost cost, best_cost, acost;
6558
6559   *delta = NULL;
6560   for (i = 0; i < data->vgroups.length (); i++)
6561     {
6562       group = data->vgroups[i];
6563
6564       old_cp = iv_ca_cand_for_group (ivs, group);
6565       if (old_cp->cand != cand)
6566         continue;
6567
6568       best_cost = iv_ca_cost (ivs);
6569       /* Start narrowing with START.  */
6570       new_cp = get_group_iv_cost (data, group, start);
6571
6572       if (data->consider_all_candidates)
6573         {
6574           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6575             {
6576               if (ci == cand->id || (start && ci == start->id))
6577                 continue;
6578
6579               cnd = data->vcands[ci];
6580
6581               cp = get_group_iv_cost (data, group, cnd);
6582               if (!cp)
6583                 continue;
6584
6585               iv_ca_set_cp (data, ivs, group, cp);
6586               acost = iv_ca_cost (ivs);
6587
6588               if (acost < best_cost)
6589                 {
6590                   best_cost = acost;
6591                   new_cp = cp;
6592                 }
6593             }
6594         }
6595       else
6596         {
6597           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6598             {
6599               if (ci == cand->id || (start && ci == start->id))
6600                 continue;
6601
6602               cnd = data->vcands[ci];
6603
6604               cp = get_group_iv_cost (data, group, cnd);
6605               if (!cp)
6606                 continue;
6607
6608               iv_ca_set_cp (data, ivs, group, cp);
6609               acost = iv_ca_cost (ivs);
6610
6611               if (acost < best_cost)
6612                 {
6613                   best_cost = acost;
6614                   new_cp = cp;
6615                 }
6616             }
6617         }
6618       /* Restore to old cp for use.  */
6619       iv_ca_set_cp (data, ivs, group, old_cp);
6620
6621       if (!new_cp)
6622         {
6623           iv_ca_delta_free (delta);
6624           return infinite_cost;
6625         }
6626
6627       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6628     }
6629
6630   iv_ca_delta_commit (data, ivs, *delta, true);
6631   cost = iv_ca_cost (ivs);
6632   iv_ca_delta_commit (data, ivs, *delta, false);
6633
6634   return cost;
6635 }
6636
6637 /* Try optimizing the set of candidates IVS by removing candidates different
6638    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6639    differences in DELTA.  */
6640
6641 static comp_cost
6642 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6643              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6644 {
6645   bitmap_iterator bi;
6646   struct iv_ca_delta *act_delta, *best_delta;
6647   unsigned i;
6648   comp_cost best_cost, acost;
6649   struct iv_cand *cand;
6650
6651   best_delta = NULL;
6652   best_cost = iv_ca_cost (ivs);
6653
6654   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6655     {
6656       cand = data->vcands[i];
6657
6658       if (cand == except_cand)
6659         continue;
6660
6661       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6662
6663       if (acost < best_cost)
6664         {
6665           best_cost = acost;
6666           iv_ca_delta_free (&best_delta);
6667           best_delta = act_delta;
6668         }
6669       else
6670         iv_ca_delta_free (&act_delta);
6671     }
6672
6673   if (!best_delta)
6674     {
6675       *delta = NULL;
6676       return best_cost;
6677     }
6678
6679   /* Recurse to possibly remove other unnecessary ivs.  */
6680   iv_ca_delta_commit (data, ivs, best_delta, true);
6681   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6682   iv_ca_delta_commit (data, ivs, best_delta, false);
6683   *delta = iv_ca_delta_join (best_delta, *delta);
6684   return best_cost;
6685 }
6686
6687 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6688    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6689    the corresponding cost_pair, otherwise just return BEST_CP.  */
6690
6691 static class cost_pair*
6692 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6693                         unsigned int cand_idx, struct iv_cand *old_cand,
6694                         class cost_pair *best_cp)
6695 {
6696   struct iv_cand *cand;
6697   class cost_pair *cp;
6698
6699   gcc_assert (old_cand != NULL && best_cp != NULL);
6700   if (cand_idx == old_cand->id)
6701     return best_cp;
6702
6703   cand = data->vcands[cand_idx];
6704   cp = get_group_iv_cost (data, group, cand);
6705   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6706     return cp;
6707
6708   return best_cp;
6709 }
6710
6711 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6712    which are used by more than one iv uses.  For each of those candidates,
6713    this function tries to represent iv uses under that candidate using
6714    other ones with lower local cost, then tries to prune the new set.
6715    If the new set has lower cost, It returns the new cost after recording
6716    candidate replacement in list DELTA.  */
6717
6718 static comp_cost
6719 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6720                struct iv_ca_delta **delta)
6721 {
6722   bitmap_iterator bi, bj;
6723   unsigned int i, j, k;
6724   struct iv_cand *cand;
6725   comp_cost orig_cost, acost;
6726   struct iv_ca_delta *act_delta, *tmp_delta;
6727   class cost_pair *old_cp, *best_cp = NULL;
6728
6729   *delta = NULL;
6730   orig_cost = iv_ca_cost (ivs);
6731
6732   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6733     {
6734       if (ivs->n_cand_uses[i] == 1
6735           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6736         continue;
6737
6738       cand = data->vcands[i];
6739
6740       act_delta = NULL;
6741       /*  Represent uses under current candidate using other ones with
6742           lower local cost.  */
6743       for (j = 0; j < ivs->upto; j++)
6744         {
6745           struct iv_group *group = data->vgroups[j];
6746           old_cp = iv_ca_cand_for_group (ivs, group);
6747
6748           if (old_cp->cand != cand)
6749             continue;
6750
6751           best_cp = old_cp;
6752           if (data->consider_all_candidates)
6753             for (k = 0; k < data->vcands.length (); k++)
6754               best_cp = cheaper_cost_with_cand (data, group, k,
6755                                                 old_cp->cand, best_cp);
6756           else
6757             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6758               best_cp = cheaper_cost_with_cand (data, group, k,
6759                                                 old_cp->cand, best_cp);
6760
6761           if (best_cp == old_cp)
6762             continue;
6763
6764           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6765         }
6766       /* No need for further prune.  */
6767       if (!act_delta)
6768         continue;
6769
6770       /* Prune the new candidate set.  */
6771       iv_ca_delta_commit (data, ivs, act_delta, true);
6772       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6773       iv_ca_delta_commit (data, ivs, act_delta, false);
6774       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6775
6776       if (acost < orig_cost)
6777         {
6778           *delta = act_delta;
6779           return acost;
6780         }
6781       else
6782         iv_ca_delta_free (&act_delta);
6783     }
6784
6785   return orig_cost;
6786 }
6787
6788 /* Tries to extend the sets IVS in the best possible way in order to
6789    express the GROUP.  If ORIGINALP is true, prefer candidates from
6790    the original set of IVs, otherwise favor important candidates not
6791    based on any memory object.  */
6792
6793 static bool
6794 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6795                   struct iv_group *group, bool originalp)
6796 {
6797   comp_cost best_cost, act_cost;
6798   unsigned i;
6799   bitmap_iterator bi;
6800   struct iv_cand *cand;
6801   struct iv_ca_delta *best_delta = NULL, *act_delta;
6802   class cost_pair *cp;
6803
6804   iv_ca_add_group (data, ivs, group);
6805   best_cost = iv_ca_cost (ivs);
6806   cp = iv_ca_cand_for_group (ivs, group);
6807   if (cp)
6808     {
6809       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6810       iv_ca_set_no_cp (data, ivs, group);
6811     }
6812
6813   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6814      first try important candidates not based on any memory object.  Only if
6815      this fails, try the specific ones.  Rationale -- in loops with many
6816      variables the best choice often is to use just one generic biv.  If we
6817      added here many ivs specific to the uses, the optimization algorithm later
6818      would be likely to get stuck in a local minimum, thus causing us to create
6819      too many ivs.  The approach from few ivs to more seems more likely to be
6820      successful -- starting from few ivs, replacing an expensive use by a
6821      specific iv should always be a win.  */
6822   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6823     {
6824       cand = data->vcands[i];
6825
6826       if (originalp && cand->pos !=IP_ORIGINAL)
6827         continue;
6828
6829       if (!originalp && cand->iv->base_object != NULL_TREE)
6830         continue;
6831
6832       if (iv_ca_cand_used_p (ivs, cand))
6833         continue;
6834
6835       cp = get_group_iv_cost (data, group, cand);
6836       if (!cp)
6837         continue;
6838
6839       iv_ca_set_cp (data, ivs, group, cp);
6840       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6841                                true);
6842       iv_ca_set_no_cp (data, ivs, group);
6843       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6844
6845       if (act_cost < best_cost)
6846         {
6847           best_cost = act_cost;
6848
6849           iv_ca_delta_free (&best_delta);
6850           best_delta = act_delta;
6851         }
6852       else
6853         iv_ca_delta_free (&act_delta);
6854     }
6855
6856   if (best_cost.infinite_cost_p ())
6857     {
6858       for (i = 0; i < group->n_map_members; i++)
6859         {
6860           cp = group->cost_map + i;
6861           cand = cp->cand;
6862           if (!cand)
6863             continue;
6864
6865           /* Already tried this.  */
6866           if (cand->important)
6867             {
6868               if (originalp && cand->pos == IP_ORIGINAL)
6869                 continue;
6870               if (!originalp && cand->iv->base_object == NULL_TREE)
6871                 continue;
6872             }
6873
6874           if (iv_ca_cand_used_p (ivs, cand))
6875             continue;
6876
6877           act_delta = NULL;
6878           iv_ca_set_cp (data, ivs, group, cp);
6879           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6880           iv_ca_set_no_cp (data, ivs, group);
6881           act_delta = iv_ca_delta_add (group,
6882                                        iv_ca_cand_for_group (ivs, group),
6883                                        cp, act_delta);
6884
6885           if (act_cost < best_cost)
6886             {
6887               best_cost = act_cost;
6888
6889               if (best_delta)
6890                 iv_ca_delta_free (&best_delta);
6891               best_delta = act_delta;
6892             }
6893           else
6894             iv_ca_delta_free (&act_delta);
6895         }
6896     }
6897
6898   iv_ca_delta_commit (data, ivs, best_delta, true);
6899   iv_ca_delta_free (&best_delta);
6900
6901   return !best_cost.infinite_cost_p ();
6902 }
6903
6904 /* Finds an initial assignment of candidates to uses.  */
6905
6906 static class iv_ca *
6907 get_initial_solution (struct ivopts_data *data, bool originalp)
6908 {
6909   unsigned i;
6910   class iv_ca *ivs = iv_ca_new (data);
6911
6912   for (i = 0; i < data->vgroups.length (); i++)
6913     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6914       {
6915         iv_ca_free (&ivs);
6916         return NULL;
6917       }
6918
6919   return ivs;
6920 }
6921
6922 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6923    points to a bool variable, this function tries to break local
6924    optimal fixed-point by replacing candidates in IVS if it's true.  */
6925
6926 static bool
6927 try_improve_iv_set (struct ivopts_data *data,
6928                     class iv_ca *ivs, bool *try_replace_p)
6929 {
6930   unsigned i, n_ivs;
6931   comp_cost acost, best_cost = iv_ca_cost (ivs);
6932   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6933   struct iv_cand *cand;
6934
6935   /* Try extending the set of induction variables by one.  */
6936   for (i = 0; i < data->vcands.length (); i++)
6937     {
6938       cand = data->vcands[i];
6939
6940       if (iv_ca_cand_used_p (ivs, cand))
6941         continue;
6942
6943       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6944       if (!act_delta)
6945         continue;
6946
6947       /* If we successfully added the candidate and the set is small enough,
6948          try optimizing it by removing other candidates.  */
6949       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6950         {
6951           iv_ca_delta_commit (data, ivs, act_delta, true);
6952           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6953           iv_ca_delta_commit (data, ivs, act_delta, false);
6954           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6955         }
6956
6957       if (acost < best_cost)
6958         {
6959           best_cost = acost;
6960           iv_ca_delta_free (&best_delta);
6961           best_delta = act_delta;
6962         }
6963       else
6964         iv_ca_delta_free (&act_delta);
6965     }
6966
6967   if (!best_delta)
6968     {
6969       /* Try removing the candidates from the set instead.  */
6970       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6971
6972       if (!best_delta && *try_replace_p)
6973         {
6974           *try_replace_p = false;
6975           /* So far candidate selecting algorithm tends to choose fewer IVs
6976              so that it can handle cases in which loops have many variables
6977              but the best choice is often to use only one general biv.  One
6978              weakness is it can't handle opposite cases, in which different
6979              candidates should be chosen with respect to each use.  To solve
6980              the problem, we replace candidates in a manner described by the
6981              comments of iv_ca_replace, thus give general algorithm a chance
6982              to break local optimal fixed-point in these cases.  */
6983           best_cost = iv_ca_replace (data, ivs, &best_delta);
6984         }
6985
6986       if (!best_delta)
6987         return false;
6988     }
6989
6990   iv_ca_delta_commit (data, ivs, best_delta, true);
6991   iv_ca_delta_free (&best_delta);
6992   return best_cost == iv_ca_cost (ivs);
6993 }
6994
6995 /* Attempts to find the optimal set of induction variables.  We do simple
6996    greedy heuristic -- we try to replace at most one candidate in the selected
6997    solution and remove the unused ivs while this improves the cost.  */
6998
6999 static class iv_ca *
7000 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7001 {
7002   class iv_ca *set;
7003   bool try_replace_p = true;
7004
7005   /* Get the initial solution.  */
7006   set = get_initial_solution (data, originalp);
7007   if (!set)
7008     {
7009       if (dump_file && (dump_flags & TDF_DETAILS))
7010         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7011       return NULL;
7012     }
7013
7014   if (dump_file && (dump_flags & TDF_DETAILS))
7015     {
7016       fprintf (dump_file, "Initial set of candidates:\n");
7017       iv_ca_dump (data, dump_file, set);
7018     }
7019
7020   while (try_improve_iv_set (data, set, &try_replace_p))
7021     {
7022       if (dump_file && (dump_flags & TDF_DETAILS))
7023         {
7024           fprintf (dump_file, "Improved to:\n");
7025           iv_ca_dump (data, dump_file, set);
7026         }
7027     }
7028
7029   /* If the set has infinite_cost, it can't be optimal.  */
7030   if (iv_ca_cost (set).infinite_cost_p ())
7031     {
7032       if (dump_file && (dump_flags & TDF_DETAILS))
7033         fprintf (dump_file,
7034                  "Overflow to infinite cost in try_improve_iv_set.\n");
7035       iv_ca_free (&set);
7036     }
7037   return set;
7038 }
7039
7040 static class iv_ca *
7041 find_optimal_iv_set (struct ivopts_data *data)
7042 {
7043   unsigned i;
7044   comp_cost cost, origcost;
7045   class iv_ca *set, *origset;
7046
7047   /* Determine the cost based on a strategy that starts with original IVs,
7048      and try again using a strategy that prefers candidates not based
7049      on any IVs.  */
7050   origset = find_optimal_iv_set_1 (data, true);
7051   set = find_optimal_iv_set_1 (data, false);
7052
7053   if (!origset && !set)
7054     return NULL;
7055
7056   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7057   cost = set ? iv_ca_cost (set) : infinite_cost;
7058
7059   if (dump_file && (dump_flags & TDF_DETAILS))
7060     {
7061       fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7062                origcost.cost, origcost.complexity);
7063       fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7064                cost.cost, cost.complexity);
7065     }
7066
7067   /* Choose the one with the best cost.  */
7068   if (origcost <= cost)
7069     {
7070       if (set)
7071         iv_ca_free (&set);
7072       set = origset;
7073     }
7074   else if (origset)
7075     iv_ca_free (&origset);
7076
7077   for (i = 0; i < data->vgroups.length (); i++)
7078     {
7079       struct iv_group *group = data->vgroups[i];
7080       group->selected = iv_ca_cand_for_group (set, group)->cand;
7081     }
7082
7083   return set;
7084 }
7085
7086 /* Creates a new induction variable corresponding to CAND.  */
7087
7088 static void
7089 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7090 {
7091   gimple_stmt_iterator incr_pos;
7092   tree base;
7093   struct iv_use *use;
7094   struct iv_group *group;
7095   bool after = false;
7096
7097   gcc_assert (cand->iv != NULL);
7098
7099   switch (cand->pos)
7100     {
7101     case IP_NORMAL:
7102       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7103       break;
7104
7105     case IP_END:
7106       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7107       after = true;
7108       break;
7109
7110     case IP_AFTER_USE:
7111       after = true;
7112       /* fall through */
7113     case IP_BEFORE_USE:
7114       incr_pos = gsi_for_stmt (cand->incremented_at);
7115       break;
7116
7117     case IP_ORIGINAL:
7118       /* Mark that the iv is preserved.  */
7119       name_info (data, cand->var_before)->preserve_biv = true;
7120       name_info (data, cand->var_after)->preserve_biv = true;
7121
7122       /* Rewrite the increment so that it uses var_before directly.  */
7123       use = find_interesting_uses_op (data, cand->var_after);
7124       group = data->vgroups[use->group_id];
7125       group->selected = cand;
7126       return;
7127     }
7128
7129   gimple_add_tmp_var (cand->var_before);
7130
7131   base = unshare_expr (cand->iv->base);
7132
7133   create_iv (base, unshare_expr (cand->iv->step),
7134              cand->var_before, data->current_loop,
7135              &incr_pos, after, &cand->var_before, &cand->var_after);
7136 }
7137
7138 /* Creates new induction variables described in SET.  */
7139
7140 static void
7141 create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7142 {
7143   unsigned i;
7144   struct iv_cand *cand;
7145   bitmap_iterator bi;
7146
7147   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7148     {
7149       cand = data->vcands[i];
7150       create_new_iv (data, cand);
7151     }
7152
7153   if (dump_file && (dump_flags & TDF_DETAILS))
7154     {
7155       fprintf (dump_file, "Selected IV set for loop %d",
7156                data->current_loop->num);
7157       if (data->loop_loc != UNKNOWN_LOCATION)
7158         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7159                  LOCATION_LINE (data->loop_loc));
7160       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7161                avg_loop_niter (data->current_loop));
7162       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7163       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7164         {
7165           cand = data->vcands[i];
7166           dump_cand (dump_file, cand);
7167         }
7168       fprintf (dump_file, "\n");
7169     }
7170 }
7171
7172 /* Rewrites USE (definition of iv used in a nonlinear expression)
7173    using candidate CAND.  */
7174
7175 static void
7176 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7177                             struct iv_use *use, struct iv_cand *cand)
7178 {
7179   gassign *ass;
7180   gimple_stmt_iterator bsi;
7181   tree comp, type = get_use_type (use), tgt;
7182
7183   /* An important special case -- if we are asked to express value of
7184      the original iv by itself, just exit; there is no need to
7185      introduce a new computation (that might also need casting the
7186      variable to unsigned and back).  */
7187   if (cand->pos == IP_ORIGINAL
7188       && cand->incremented_at == use->stmt)
7189     {
7190       tree op = NULL_TREE;
7191       enum tree_code stmt_code;
7192
7193       gcc_assert (is_gimple_assign (use->stmt));
7194       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7195
7196       /* Check whether we may leave the computation unchanged.
7197          This is the case only if it does not rely on other
7198          computations in the loop -- otherwise, the computation
7199          we rely upon may be removed in remove_unused_ivs,
7200          thus leading to ICE.  */
7201       stmt_code = gimple_assign_rhs_code (use->stmt);
7202       if (stmt_code == PLUS_EXPR
7203           || stmt_code == MINUS_EXPR
7204           || stmt_code == POINTER_PLUS_EXPR)
7205         {
7206           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7207             op = gimple_assign_rhs2 (use->stmt);
7208           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7209             op = gimple_assign_rhs1 (use->stmt);
7210         }
7211
7212       if (op != NULL_TREE)
7213         {
7214           if (expr_invariant_in_loop_p (data->current_loop, op))
7215             return;
7216           if (TREE_CODE (op) == SSA_NAME)
7217             {
7218               struct iv *iv = get_iv (data, op);
7219               if (iv != NULL && integer_zerop (iv->step))
7220                 return;
7221             }
7222         }
7223     }
7224
7225   switch (gimple_code (use->stmt))
7226     {
7227     case GIMPLE_PHI:
7228       tgt = PHI_RESULT (use->stmt);
7229
7230       /* If we should keep the biv, do not replace it.  */
7231       if (name_info (data, tgt)->preserve_biv)
7232         return;
7233
7234       bsi = gsi_after_labels (gimple_bb (use->stmt));
7235       break;
7236
7237     case GIMPLE_ASSIGN:
7238       tgt = gimple_assign_lhs (use->stmt);
7239       bsi = gsi_for_stmt (use->stmt);
7240       break;
7241
7242     default:
7243       gcc_unreachable ();
7244     }
7245
7246   aff_tree aff_inv, aff_var;
7247   if (!get_computation_aff_1 (data->current_loop, use->stmt,
7248                               use, cand, &aff_inv, &aff_var))
7249     gcc_unreachable ();
7250
7251   unshare_aff_combination (&aff_inv);
7252   unshare_aff_combination (&aff_var);
7253   /* Prefer CSE opportunity than loop invariant by adding offset at last
7254      so that iv_uses have different offsets can be CSEed.  */
7255   poly_widest_int offset = aff_inv.offset;
7256   aff_inv.offset = 0;
7257
7258   gimple_seq stmt_list = NULL, seq = NULL;
7259   tree comp_op1 = aff_combination_to_tree (&aff_inv);
7260   tree comp_op2 = aff_combination_to_tree (&aff_var);
7261   gcc_assert (comp_op1 && comp_op2);
7262
7263   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7264   gimple_seq_add_seq (&stmt_list, seq);
7265   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7266   gimple_seq_add_seq (&stmt_list, seq);
7267
7268   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7269     std::swap (comp_op1, comp_op2);
7270
7271   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7272     {
7273       comp = fold_build_pointer_plus (comp_op1,
7274                                       fold_convert (sizetype, comp_op2));
7275       comp = fold_build_pointer_plus (comp,
7276                                       wide_int_to_tree (sizetype, offset));
7277     }
7278   else
7279     {
7280       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7281                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
7282       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7283                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7284     }
7285
7286   comp = fold_convert (type, comp);
7287   if (!valid_gimple_rhs_p (comp)
7288       || (gimple_code (use->stmt) != GIMPLE_PHI
7289           /* We can't allow re-allocating the stmt as it might be pointed
7290              to still.  */
7291           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7292               >= gimple_num_ops (gsi_stmt (bsi)))))
7293     {
7294       comp = force_gimple_operand (comp, &seq, true, NULL);
7295       gimple_seq_add_seq (&stmt_list, seq);
7296       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7297         {
7298           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7299           /* As this isn't a plain copy we have to reset alignment
7300              information.  */
7301           if (SSA_NAME_PTR_INFO (comp))
7302             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7303         }
7304     }
7305
7306   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7307   if (gimple_code (use->stmt) == GIMPLE_PHI)
7308     {
7309       ass = gimple_build_assign (tgt, comp);
7310       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7311
7312       bsi = gsi_for_stmt (use->stmt);
7313       remove_phi_node (&bsi, false);
7314     }
7315   else
7316     {
7317       gimple_assign_set_rhs_from_tree (&bsi, comp);
7318       use->stmt = gsi_stmt (bsi);
7319     }
7320 }
7321
7322 /* Performs a peephole optimization to reorder the iv update statement with
7323    a mem ref to enable instruction combining in later phases. The mem ref uses
7324    the iv value before the update, so the reordering transformation requires
7325    adjustment of the offset. CAND is the selected IV_CAND.
7326
7327    Example:
7328
7329    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7330    iv2 = iv1 + 1;
7331
7332    if (t < val)      (1)
7333      goto L;
7334    goto Head;
7335
7336
7337    directly propagating t over to (1) will introduce overlapping live range
7338    thus increase register pressure. This peephole transform it into:
7339
7340
7341    iv2 = iv1 + 1;
7342    t = MEM_REF (base, iv2, 8, 8);
7343    if (t < val)
7344      goto L;
7345    goto Head;
7346 */
7347
7348 static void
7349 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7350 {
7351   tree var_after;
7352   gimple *iv_update, *stmt;
7353   basic_block bb;
7354   gimple_stmt_iterator gsi, gsi_iv;
7355
7356   if (cand->pos != IP_NORMAL)
7357     return;
7358
7359   var_after = cand->var_after;
7360   iv_update = SSA_NAME_DEF_STMT (var_after);
7361
7362   bb = gimple_bb (iv_update);
7363   gsi = gsi_last_nondebug_bb (bb);
7364   stmt = gsi_stmt (gsi);
7365
7366   /* Only handle conditional statement for now.  */
7367   if (gimple_code (stmt) != GIMPLE_COND)
7368     return;
7369
7370   gsi_prev_nondebug (&gsi);
7371   stmt = gsi_stmt (gsi);
7372   if (stmt != iv_update)
7373     return;
7374
7375   gsi_prev_nondebug (&gsi);
7376   if (gsi_end_p (gsi))
7377     return;
7378
7379   stmt = gsi_stmt (gsi);
7380   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7381     return;
7382
7383   if (stmt != use->stmt)
7384     return;
7385
7386   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7387     return;
7388
7389   if (dump_file && (dump_flags & TDF_DETAILS))
7390     {
7391       fprintf (dump_file, "Reordering \n");
7392       print_gimple_stmt (dump_file, iv_update, 0);
7393       print_gimple_stmt (dump_file, use->stmt, 0);
7394       fprintf (dump_file, "\n");
7395     }
7396
7397   gsi = gsi_for_stmt (use->stmt);
7398   gsi_iv = gsi_for_stmt (iv_update);
7399   gsi_move_before (&gsi_iv, &gsi);
7400
7401   cand->pos = IP_BEFORE_USE;
7402   cand->incremented_at = use->stmt;
7403 }
7404
7405 /* Return the alias pointer type that should be used for a MEM_REF
7406    associated with USE, which has type USE_PTR_ADDRESS.  */
7407
7408 static tree
7409 get_alias_ptr_type_for_ptr_address (iv_use *use)
7410 {
7411   gcall *call = as_a <gcall *> (use->stmt);
7412   switch (gimple_call_internal_fn (call))
7413     {
7414     case IFN_MASK_LOAD:
7415     case IFN_MASK_STORE:
7416     case IFN_MASK_LOAD_LANES:
7417     case IFN_MASK_STORE_LANES:
7418       /* The second argument contains the correct alias type.  */
7419       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7420       return TREE_TYPE (gimple_call_arg (call, 1));
7421
7422     default:
7423       gcc_unreachable ();
7424     }
7425 }
7426
7427
7428 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7429
7430 static void
7431 rewrite_use_address (struct ivopts_data *data,
7432                      struct iv_use *use, struct iv_cand *cand)
7433 {
7434   aff_tree aff;
7435   bool ok;
7436
7437   adjust_iv_update_pos (cand, use);
7438   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7439   gcc_assert (ok);
7440   unshare_aff_combination (&aff);
7441
7442   /* To avoid undefined overflow problems, all IV candidates use unsigned
7443      integer types.  The drawback is that this makes it impossible for
7444      create_mem_ref to distinguish an IV that is based on a memory object
7445      from one that represents simply an offset.
7446
7447      To work around this problem, we pass a hint to create_mem_ref that
7448      indicates which variable (if any) in aff is an IV based on a memory
7449      object.  Note that we only consider the candidate.  If this is not
7450      based on an object, the base of the reference is in some subexpression
7451      of the use -- but these will use pointer types, so they are recognized
7452      by the create_mem_ref heuristics anyway.  */
7453   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7454   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7455   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7456   tree type = use->mem_type;
7457   tree alias_ptr_type;
7458   if (use->type == USE_PTR_ADDRESS)
7459     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7460   else
7461     {
7462       gcc_assert (type == TREE_TYPE (*use->op_p));
7463       unsigned int align = get_object_alignment (*use->op_p);
7464       if (align != TYPE_ALIGN (type))
7465         type = build_aligned_type (type, align);
7466       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7467     }
7468   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7469                              iv, base_hint, data->speed);
7470
7471   if (use->type == USE_PTR_ADDRESS)
7472     {
7473       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7474       ref = fold_convert (get_use_type (use), ref);
7475       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7476                                       true, GSI_SAME_STMT);
7477     }
7478   else
7479     copy_ref_info (ref, *use->op_p);
7480
7481   *use->op_p = ref;
7482 }
7483
7484 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7485    candidate CAND.  */
7486
7487 static void
7488 rewrite_use_compare (struct ivopts_data *data,
7489                      struct iv_use *use, struct iv_cand *cand)
7490 {
7491   tree comp, op, bound;
7492   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7493   enum tree_code compare;
7494   struct iv_group *group = data->vgroups[use->group_id];
7495   class cost_pair *cp = get_group_iv_cost (data, group, cand);
7496
7497   bound = cp->value;
7498   if (bound)
7499     {
7500       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7501       tree var_type = TREE_TYPE (var);
7502       gimple_seq stmts;
7503
7504       if (dump_file && (dump_flags & TDF_DETAILS))
7505         {
7506           fprintf (dump_file, "Replacing exit test: ");
7507           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7508         }
7509       compare = cp->comp;
7510       bound = unshare_expr (fold_convert (var_type, bound));
7511       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7512       if (stmts)
7513         gsi_insert_seq_on_edge_immediate (
7514                 loop_preheader_edge (data->current_loop),
7515                 stmts);
7516
7517       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7518       gimple_cond_set_lhs (cond_stmt, var);
7519       gimple_cond_set_code (cond_stmt, compare);
7520       gimple_cond_set_rhs (cond_stmt, op);
7521       return;
7522     }
7523
7524   /* The induction variable elimination failed; just express the original
7525      giv.  */
7526   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7527   gcc_assert (comp != NULL_TREE);
7528   gcc_assert (use->op_p != NULL);
7529   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7530                                          SSA_NAME_VAR (*use->op_p),
7531                                          true, GSI_SAME_STMT);
7532 }
7533
7534 /* Rewrite the groups using the selected induction variables.  */
7535
7536 static void
7537 rewrite_groups (struct ivopts_data *data)
7538 {
7539   unsigned i, j;
7540
7541   for (i = 0; i < data->vgroups.length (); i++)
7542     {
7543       struct iv_group *group = data->vgroups[i];
7544       struct iv_cand *cand = group->selected;
7545
7546       gcc_assert (cand);
7547
7548       if (group->type == USE_NONLINEAR_EXPR)
7549         {
7550           for (j = 0; j < group->vuses.length (); j++)
7551             {
7552               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7553               update_stmt (group->vuses[j]->stmt);
7554             }
7555         }
7556       else if (address_p (group->type))
7557         {
7558           for (j = 0; j < group->vuses.length (); j++)
7559             {
7560               rewrite_use_address (data, group->vuses[j], cand);
7561               update_stmt (group->vuses[j]->stmt);
7562             }
7563         }
7564       else
7565         {
7566           gcc_assert (group->type == USE_COMPARE);
7567
7568           for (j = 0; j < group->vuses.length (); j++)
7569             {
7570               rewrite_use_compare (data, group->vuses[j], cand);
7571               update_stmt (group->vuses[j]->stmt);
7572             }
7573         }
7574     }
7575 }
7576
7577 /* Removes the ivs that are not used after rewriting.  */
7578
7579 static void
7580 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7581 {
7582   unsigned j;
7583   bitmap_iterator bi;
7584
7585   /* Figure out an order in which to release SSA DEFs so that we don't
7586      release something that we'd have to propagate into a debug stmt
7587      afterwards.  */
7588   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7589     {
7590       struct version_info *info;
7591
7592       info = ver_info (data, j);
7593       if (info->iv
7594           && !integer_zerop (info->iv->step)
7595           && !info->inv_id
7596           && !info->iv->nonlin_use
7597           && !info->preserve_biv)
7598         {
7599           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7600
7601           tree def = info->iv->ssa_name;
7602
7603           if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7604             {
7605               imm_use_iterator imm_iter;
7606               use_operand_p use_p;
7607               gimple *stmt;
7608               int count = 0;
7609
7610               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7611                 {
7612                   if (!gimple_debug_bind_p (stmt))
7613                     continue;
7614
7615                   /* We just want to determine whether to do nothing
7616                      (count == 0), to substitute the computed
7617                      expression into a single use of the SSA DEF by
7618                      itself (count == 1), or to use a debug temp
7619                      because the SSA DEF is used multiple times or as
7620                      part of a larger expression (count > 1). */
7621                   count++;
7622                   if (gimple_debug_bind_get_value (stmt) != def)
7623                     count++;
7624
7625                   if (count > 1)
7626                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7627                 }
7628
7629               if (!count)
7630                 continue;
7631
7632               struct iv_use dummy_use;
7633               struct iv_cand *best_cand = NULL, *cand;
7634               unsigned i, best_pref = 0, cand_pref;
7635               tree comp = NULL_TREE;
7636
7637               memset (&dummy_use, 0, sizeof (dummy_use));
7638               dummy_use.iv = info->iv;
7639               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7640                 {
7641                   cand = data->vgroups[i]->selected;
7642                   if (cand == best_cand)
7643                     continue;
7644                   cand_pref = operand_equal_p (cand->iv->step,
7645                                                info->iv->step, 0)
7646                     ? 4 : 0;
7647                   cand_pref
7648                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7649                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7650                     ? 2 : 0;
7651                   cand_pref
7652                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7653                     ? 1 : 0;
7654                   if (best_cand == NULL || best_pref < cand_pref)
7655                     {
7656                       tree this_comp
7657                         = get_debug_computation_at (data->current_loop,
7658                                                     SSA_NAME_DEF_STMT (def),
7659                                                     &dummy_use, cand);
7660                       if (this_comp)
7661                         {
7662                           best_cand = cand;
7663                           best_pref = cand_pref;
7664                           comp = this_comp;
7665                         }
7666                     }
7667                 }
7668
7669               if (!best_cand)
7670                 continue;
7671
7672               comp = unshare_expr (comp);
7673               if (count > 1)
7674                 {
7675                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7676                   DECL_ARTIFICIAL (vexpr) = 1;
7677                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7678                   if (SSA_NAME_VAR (def))
7679                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7680                   else
7681                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7682                   gdebug *def_temp
7683                     = gimple_build_debug_bind (vexpr, comp, NULL);
7684                   gimple_stmt_iterator gsi;
7685
7686                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7687                     gsi = gsi_after_labels (gimple_bb
7688                                             (SSA_NAME_DEF_STMT (def)));
7689                   else
7690                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7691
7692                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7693                   comp = vexpr;
7694                 }
7695
7696               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7697                 {
7698                   if (!gimple_debug_bind_p (stmt))
7699                     continue;
7700
7701                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7702                     SET_USE (use_p, comp);
7703
7704                   update_stmt (stmt);
7705                 }
7706             }
7707         }
7708     }
7709 }
7710
7711 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7712    for hash_map::traverse.  */
7713
7714 bool
7715 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7716 {
7717   free (value);
7718   return true;
7719 }
7720
7721 /* Frees data allocated by the optimization of a single loop.  */
7722
7723 static void
7724 free_loop_data (struct ivopts_data *data)
7725 {
7726   unsigned i, j;
7727   bitmap_iterator bi;
7728   tree obj;
7729
7730   if (data->niters)
7731     {
7732       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7733       delete data->niters;
7734       data->niters = NULL;
7735     }
7736
7737   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7738     {
7739       struct version_info *info;
7740
7741       info = ver_info (data, i);
7742       info->iv = NULL;
7743       info->has_nonlin_use = false;
7744       info->preserve_biv = false;
7745       info->inv_id = 0;
7746     }
7747   bitmap_clear (data->relevant);
7748   bitmap_clear (data->important_candidates);
7749
7750   for (i = 0; i < data->vgroups.length (); i++)
7751     {
7752       struct iv_group *group = data->vgroups[i];
7753
7754       for (j = 0; j < group->vuses.length (); j++)
7755         free (group->vuses[j]);
7756       group->vuses.release ();
7757
7758       BITMAP_FREE (group->related_cands);
7759       for (j = 0; j < group->n_map_members; j++)
7760         {
7761           if (group->cost_map[j].inv_vars)
7762             BITMAP_FREE (group->cost_map[j].inv_vars);
7763           if (group->cost_map[j].inv_exprs)
7764             BITMAP_FREE (group->cost_map[j].inv_exprs);
7765         }
7766
7767       free (group->cost_map);
7768       free (group);
7769     }
7770   data->vgroups.truncate (0);
7771
7772   for (i = 0; i < data->vcands.length (); i++)
7773     {
7774       struct iv_cand *cand = data->vcands[i];
7775
7776       if (cand->inv_vars)
7777         BITMAP_FREE (cand->inv_vars);
7778       if (cand->inv_exprs)
7779         BITMAP_FREE (cand->inv_exprs);
7780       free (cand);
7781     }
7782   data->vcands.truncate (0);
7783
7784   if (data->version_info_size < num_ssa_names)
7785     {
7786       data->version_info_size = 2 * num_ssa_names;
7787       free (data->version_info);
7788       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7789     }
7790
7791   data->max_inv_var_id = 0;
7792   data->max_inv_expr_id = 0;
7793
7794   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7795     SET_DECL_RTL (obj, NULL_RTX);
7796
7797   decl_rtl_to_reset.truncate (0);
7798
7799   data->inv_expr_tab->empty ();
7800
7801   data->iv_common_cand_tab->empty ();
7802   data->iv_common_cands.truncate (0);
7803 }
7804
7805 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7806    loop tree.  */
7807
7808 static void
7809 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7810 {
7811   free_loop_data (data);
7812   free (data->version_info);
7813   BITMAP_FREE (data->relevant);
7814   BITMAP_FREE (data->important_candidates);
7815
7816   decl_rtl_to_reset.release ();
7817   data->vgroups.release ();
7818   data->vcands.release ();
7819   delete data->inv_expr_tab;
7820   data->inv_expr_tab = NULL;
7821   free_affine_expand_cache (&data->name_expansion_cache);
7822   if (data->base_object_map)
7823     delete data->base_object_map;
7824   delete data->iv_common_cand_tab;
7825   data->iv_common_cand_tab = NULL;
7826   data->iv_common_cands.release ();
7827   obstack_free (&data->iv_obstack, NULL);
7828 }
7829
7830 /* Returns true if the loop body BODY includes any function calls.  */
7831
7832 static bool
7833 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7834 {
7835   gimple_stmt_iterator gsi;
7836   unsigned i;
7837
7838   for (i = 0; i < num_nodes; i++)
7839     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7840       {
7841         gimple *stmt = gsi_stmt (gsi);
7842         if (is_gimple_call (stmt)
7843             && !gimple_call_internal_p (stmt)
7844             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7845           return true;
7846       }
7847   return false;
7848 }
7849
7850 /* Determine cost scaling factor for basic blocks in loop.  */
7851 #define COST_SCALING_FACTOR_BOUND (20)
7852
7853 static void
7854 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
7855 {
7856   int lfreq = data->current_loop->header->count.to_frequency (cfun);
7857   if (!data->speed || lfreq <= 0)
7858     return;
7859
7860   int max_freq = lfreq;
7861   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7862     {
7863       body[i]->aux = (void *)(intptr_t) 1;
7864       if (max_freq < body[i]->count.to_frequency (cfun))
7865         max_freq = body[i]->count.to_frequency (cfun);
7866     }
7867   if (max_freq > lfreq)
7868     {
7869       int divisor, factor;
7870       /* Check if scaling factor itself needs to be scaled by the bound.  This
7871          is to avoid overflow when scaling cost according to profile info.  */
7872       if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
7873         {
7874           divisor = max_freq;
7875           factor = COST_SCALING_FACTOR_BOUND;
7876         }
7877       else
7878         {
7879           divisor = lfreq;
7880           factor = 1;
7881         }
7882       for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7883         {
7884           int bfreq = body[i]->count.to_frequency (cfun);
7885           if (bfreq <= lfreq)
7886             continue;
7887
7888           body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
7889         }
7890     }
7891 }
7892
7893 /* Find doloop comparison use and set its doloop_p on if found.  */
7894
7895 static bool
7896 find_doloop_use (struct ivopts_data *data)
7897 {
7898   struct loop *loop = data->current_loop;
7899
7900   for (unsigned i = 0; i < data->vgroups.length (); i++)
7901     {
7902       struct iv_group *group = data->vgroups[i];
7903       if (group->type == USE_COMPARE)
7904         {
7905           gcc_assert (group->vuses.length () == 1);
7906           struct iv_use *use = group->vuses[0];
7907           gimple *stmt = use->stmt;
7908           if (gimple_code (stmt) == GIMPLE_COND)
7909             {
7910               basic_block bb = gimple_bb (stmt);
7911               edge true_edge, false_edge;
7912               extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
7913               /* This comparison is used for loop latch.  Require latch is empty
7914                  for now.  */
7915               if ((loop->latch == true_edge->dest
7916                    || loop->latch == false_edge->dest)
7917                   && empty_block_p (loop->latch))
7918                 {
7919                   group->doloop_p = true;
7920                   if (dump_file && (dump_flags & TDF_DETAILS))
7921                     {
7922                       fprintf (dump_file, "Doloop cmp iv use: ");
7923                       print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
7924                     }
7925                   return true;
7926                 }
7927             }
7928         }
7929     }
7930
7931   return false;
7932 }
7933
7934 /* For the targets which support doloop, to predict whether later RTL doloop
7935    transformation will perform on this loop, further detect the doloop use and
7936    mark the flag doloop_use_p if predicted.  */
7937
7938 void
7939 analyze_and_mark_doloop_use (struct ivopts_data *data)
7940 {
7941   data->doloop_use_p = false;
7942
7943   if (!flag_branch_on_count_reg)
7944     return;
7945
7946   if (!generic_predict_doloop_p (data))
7947     return;
7948
7949   if (find_doloop_use (data))
7950     {
7951       data->doloop_use_p = true;
7952       if (dump_file && (dump_flags & TDF_DETAILS))
7953         {
7954           struct loop *loop = data->current_loop;
7955           fprintf (dump_file,
7956                    "Predict loop %d can perform"
7957                    " doloop optimization later.\n",
7958                    loop->num);
7959           flow_loop_dump (loop, dump_file, NULL, 1);
7960         }
7961     }
7962 }
7963
7964 /* Optimizes the LOOP.  Returns true if anything changed.  */
7965
7966 static bool
7967 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
7968                            bitmap toremove)
7969 {
7970   bool changed = false;
7971   class iv_ca *iv_ca;
7972   edge exit = single_dom_exit (loop);
7973   basic_block *body;
7974
7975   gcc_assert (!data->niters);
7976   data->current_loop = loop;
7977   data->loop_loc = find_loop_location (loop).get_location_t ();
7978   data->speed = optimize_loop_for_speed_p (loop);
7979
7980   if (dump_file && (dump_flags & TDF_DETAILS))
7981     {
7982       fprintf (dump_file, "Processing loop %d", loop->num);
7983       if (data->loop_loc != UNKNOWN_LOCATION)
7984         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7985                  LOCATION_LINE (data->loop_loc));
7986       fprintf (dump_file, "\n");
7987
7988       if (exit)
7989         {
7990           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7991                    exit->src->index, exit->dest->index);
7992           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7993           fprintf (dump_file, "\n");
7994         }
7995
7996       fprintf (dump_file, "\n");
7997     }
7998
7999   body = get_loop_body (loop);
8000   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8001   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8002
8003   data->loop_single_exit_p
8004     = exit != NULL && loop_only_exit_p (loop, body, exit);
8005
8006   /* For each ssa name determines whether it behaves as an induction variable
8007      in some loop.  */
8008   if (!find_induction_variables (data))
8009     goto finish;
8010
8011   /* Finds interesting uses (item 1).  */
8012   find_interesting_uses (data);
8013   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8014     goto finish;
8015
8016   /* Determine cost scaling factor for basic blocks in loop.  */
8017   determine_scaling_factor (data, body);
8018
8019   /* Analyze doloop possibility and mark the doloop use if predicted.  */
8020   analyze_and_mark_doloop_use (data);
8021
8022   /* Finds candidates for the induction variables (item 2).  */
8023   find_iv_candidates (data);
8024
8025   /* Calculates the costs (item 3, part 1).  */
8026   determine_iv_costs (data);
8027   determine_group_iv_costs (data);
8028   determine_set_costs (data);
8029
8030   /* Find the optimal set of induction variables (item 3, part 2).  */
8031   iv_ca = find_optimal_iv_set (data);
8032   /* Cleanup basic block aux field.  */
8033   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8034     body[i]->aux = NULL;
8035   if (!iv_ca)
8036     goto finish;
8037   changed = true;
8038
8039   /* Create the new induction variables (item 4, part 1).  */
8040   create_new_ivs (data, iv_ca);
8041   iv_ca_free (&iv_ca);
8042
8043   /* Rewrite the uses (item 4, part 2).  */
8044   rewrite_groups (data);
8045
8046   /* Remove the ivs that are unused after rewriting.  */
8047   remove_unused_ivs (data, toremove);
8048
8049 finish:
8050   free (body);
8051   free_loop_data (data);
8052
8053   return changed;
8054 }
8055
8056 /* Main entry point.  Optimizes induction variables in loops.  */
8057
8058 void
8059 tree_ssa_iv_optimize (void)
8060 {
8061   class loop *loop;
8062   struct ivopts_data data;
8063   auto_bitmap toremove;
8064
8065   tree_ssa_iv_optimize_init (&data);
8066
8067   /* Optimize the loops starting with the innermost ones.  */
8068   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
8069     {
8070       if (!dbg_cnt (ivopts_loop))
8071         continue;
8072
8073       if (dump_file && (dump_flags & TDF_DETAILS))
8074         flow_loop_dump (loop, dump_file, NULL, 1);
8075
8076       tree_ssa_iv_optimize_loop (&data, loop, toremove);
8077     }
8078
8079   /* Remove eliminated IV defs.  */
8080   release_defs_bitset (toremove);
8081
8082   /* We have changed the structure of induction variables; it might happen
8083      that definitions in the scev database refer to some of them that were
8084      eliminated.  */
8085   scev_reset_htab ();
8086   /* Likewise niter and control-IV information.  */
8087   free_numbers_of_iterations_estimates (cfun);
8088
8089   tree_ssa_iv_optimize_finalize (&data);
8090 }
8091
8092 #include "gt-tree-ssa-loop-ivopts.h"