gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2019 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.  */
  68
  69 #include "config.h"
  70 #include "system.h"
  71 #include "coretypes.h"
  72 #include "backend.h"
  73 #include "rtl.h"
  74 #include "tree.h"
  75 #include "gimple.h"
  76 #include "cfghooks.h"
  77 #include "tree-pass.h"
  78 #include "memmodel.h"
  79 #include "tm_p.h"
  80 #include "ssa.h"
  81 #include "expmed.h"
  82 #include "insn-config.h"
  83 #include "emit-rtl.h"
  84 #include "recog.h"
  85 #include "cgraph.h"
  86 #include "gimple-pretty-print.h"
  87 #include "alias.h"
  88 #include "fold-const.h"
  89 #include "stor-layout.h"
  90 #include "tree-eh.h"
  91 #include "gimplify.h"
  92 #include "gimple-iterator.h"
  93 #include "gimplify-me.h"
  94 #include "tree-cfg.h"
  95 #include "tree-ssa-loop-ivopts.h"
  96 #include "tree-ssa-loop-manip.h"
  97 #include "tree-ssa-loop-niter.h"
  98 #include "tree-ssa-loop.h"
  99 #include "explow.h"
 100 #include "expr.h"
 101 #include "tree-dfa.h"
 102 #include "tree-ssa.h"
 103 #include "cfgloop.h"
 104 #include "tree-scalar-evolution.h"
 105 #include "params.h"
 106 #include "tree-affine.h"
 107 #include "tree-ssa-propagate.h"
 108 #include "tree-ssa-address.h"
 109 #include "builtins.h"
 110 #include "tree-vectorizer.h"
 111
 112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 113    cost of different addressing modes.  This should be moved to a TBD
 114    interface between the GIMPLE and RTL worlds.  */
 115
 116 /* The infinite cost.  */
 117 #define INFTY 10000000
 118
 119 /* Returns the expected number of loop iterations for LOOP.
 120    The average trip count is computed from profile data if it
 121    exists. */
 122
 123 static inline HOST_WIDE_INT
 124 avg_loop_niter (struct loop *loop)
 125 {
 126   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 127   if (niter == -1)
 128     {
 129       niter = likely_max_stmt_executions_int (loop);
 130
 131       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
 132         return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 133     }
 134
 135   return niter;
 136 }
 137
 138 struct iv_use;
 139
 140 /* Representation of the induction variable.  */
 141 struct iv
 142 {
 143   tree base;            /* Initial value of the iv.  */
 144   tree base_object;     /* A memory object to that the induction variable points.  */
 145   tree step;            /* Step of the iv (constant only).  */
 146   tree ssa_name;        /* The ssa name with the value.  */
 147   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 148   bool biv_p;           /* Is it a biv?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150   bool have_address_use;/* For biv, indicate if it's used in any address
 151                            type use.  */
 152 };
 153
 154 /* Per-ssa version information (induction variable descriptions, etc.).  */
 155 struct version_info
 156 {
 157   tree name;            /* The ssa name.  */
 158   struct iv *iv;        /* Induction variable description.  */
 159   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 160                            an expression that is not an induction variable.  */
 161   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 162   unsigned inv_id;      /* Id of an invariant.  */
 163 };
 164
 165 /* Types of uses.  */
 166 enum use_type
 167 {
 168   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 169   USE_REF_ADDRESS,      /* Use is an address for an explicit memory
 170                            reference.  */
 171   USE_PTR_ADDRESS,      /* Use is a pointer argument to a function in
 172                            cases where the expansion of the function
 173                            will turn the argument into a normal address.  */
 174   USE_COMPARE           /* Use is a compare.  */
 175 };
 176
 177 /* Cost of a computation.  */
 178 struct comp_cost
 179 {
 180   comp_cost (): cost (0), complexity (0), scratch (0)
 181   {}
 182
 183   comp_cost (int cost, unsigned complexity, int scratch = 0)
 184     : cost (cost), complexity (complexity), scratch (scratch)
 185   {}
 186
 187   /* Returns true if COST is infinite.  */
 188   bool infinite_cost_p ();
 189
 190   /* Adds costs COST1 and COST2.  */
 191   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 192
 193   /* Adds COST to the comp_cost.  */
 194   comp_cost operator+= (comp_cost cost);
 195
 196   /* Adds constant C to this comp_cost.  */
 197   comp_cost operator+= (HOST_WIDE_INT c);
 198
 199   /* Subtracts constant C to this comp_cost.  */
 200   comp_cost operator-= (HOST_WIDE_INT c);
 201
 202   /* Divide the comp_cost by constant C.  */
 203   comp_cost operator/= (HOST_WIDE_INT c);
 204
 205   /* Multiply the comp_cost by constant C.  */
 206   comp_cost operator*= (HOST_WIDE_INT c);
 207
 208   /* Subtracts costs COST1 and COST2.  */
 209   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 210
 211   /* Subtracts COST from this comp_cost.  */
 212   comp_cost operator-= (comp_cost cost);
 213
 214   /* Returns true if COST1 is smaller than COST2.  */
 215   friend bool operator< (comp_cost cost1, comp_cost cost2);
 216
 217   /* Returns true if COST1 and COST2 are equal.  */
 218   friend bool operator== (comp_cost cost1, comp_cost cost2);
 219
 220   /* Returns true if COST1 is smaller or equal than COST2.  */
 221   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 222
 223   int cost;             /* The runtime cost.  */
 224   unsigned complexity;  /* The estimate of the complexity of the code for
 225                            the computation (in no concrete units --
 226                            complexity field should be larger for more
 227                            complex expressions and addressing modes).  */
 228   int scratch;          /* Scratch used during cost computation.  */
 229 };
 230
 231 static const comp_cost no_cost;
 232 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
 233
 234 bool
 235 comp_cost::infinite_cost_p ()
 236 {
 237   return cost == INFTY;
 238 }
 239
 240 comp_cost
 241 operator+ (comp_cost cost1, comp_cost cost2)
 242 {
 243   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 244     return infinite_cost;
 245
 246   cost1.cost += cost2.cost;
 247   cost1.complexity += cost2.complexity;
 248
 249   return cost1;
 250 }
 251
 252 comp_cost
 253 operator- (comp_cost cost1, comp_cost cost2)
 254 {
 255   if (cost1.infinite_cost_p ())
 256     return infinite_cost;
 257
 258   gcc_assert (!cost2.infinite_cost_p ());
 259
 260   cost1.cost -= cost2.cost;
 261   cost1.complexity -= cost2.complexity;
 262
 263   return cost1;
 264 }
 265
 266 comp_cost
 267 comp_cost::operator+= (comp_cost cost)
 268 {
 269   *this = *this + cost;
 270   return *this;
 271 }
 272
 273 comp_cost
 274 comp_cost::operator+= (HOST_WIDE_INT c)
 275 {
 276   if (infinite_cost_p ())
 277     return *this;
 278
 279   this->cost += c;
 280
 281   return *this;
 282 }
 283
 284 comp_cost
 285 comp_cost::operator-= (HOST_WIDE_INT c)
 286 {
 287   if (infinite_cost_p ())
 288     return *this;
 289
 290   this->cost -= c;
 291
 292   return *this;
 293 }
 294
 295 comp_cost
 296 comp_cost::operator/= (HOST_WIDE_INT c)
 297 {
 298   if (infinite_cost_p ())
 299     return *this;
 300
 301   this->cost /= c;
 302
 303   return *this;
 304 }
 305
 306 comp_cost
 307 comp_cost::operator*= (HOST_WIDE_INT c)
 308 {
 309   if (infinite_cost_p ())
 310     return *this;
 311
 312   this->cost *= c;
 313
 314   return *this;
 315 }
 316
 317 comp_cost
 318 comp_cost::operator-= (comp_cost cost)
 319 {
 320   *this = *this - cost;
 321   return *this;
 322 }
 323
 324 bool
 325 operator< (comp_cost cost1, comp_cost cost2)
 326 {
 327   if (cost1.cost == cost2.cost)
 328     return cost1.complexity < cost2.complexity;
 329
 330   return cost1.cost < cost2.cost;
 331 }
 332
 333 bool
 334 operator== (comp_cost cost1, comp_cost cost2)
 335 {
 336   return cost1.cost == cost2.cost
 337     && cost1.complexity == cost2.complexity;
 338 }
 339
 340 bool
 341 operator<= (comp_cost cost1, comp_cost cost2)
 342 {
 343   return cost1 < cost2 || cost1 == cost2;
 344 }
 345
 346 struct iv_inv_expr_ent;
 347
 348 /* The candidate - cost pair.  */
 349 struct cost_pair
 350 {
 351   struct iv_cand *cand; /* The candidate.  */
 352   comp_cost cost;       /* The cost.  */
 353   enum tree_code comp;  /* For iv elimination, the comparison.  */
 354   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 355                            preserved when representing iv_use with iv_cand.  */
 356   bitmap inv_exprs;     /* The list of newly created invariant expressions
 357                            when representing iv_use with iv_cand.  */
 358   tree value;           /* For final value elimination, the expression for
 359                            the final value of the iv.  For iv elimination,
 360                            the new bound to compare with.  */
 361 };
 362
 363 /* Use.  */
 364 struct iv_use
 365 {
 366   unsigned id;          /* The id of the use.  */
 367   unsigned group_id;    /* The group id the use belongs to.  */
 368   enum use_type type;   /* Type of the use.  */
 369   tree mem_type;        /* The memory type to use when testing whether an
 370                            address is legitimate, and what the address's
 371                            cost is.  */
 372   struct iv *iv;        /* The induction variable it is based on.  */
 373   gimple *stmt;         /* Statement in that it occurs.  */
 374   tree *op_p;           /* The place where it occurs.  */
 375
 376   tree addr_base;       /* Base address with const offset stripped.  */
 377   poly_uint64_pod addr_offset;
 378                         /* Const offset stripped from base address.  */
 379 };
 380
 381 /* Group of uses.  */
 382 struct iv_group
 383 {
 384   /* The id of the group.  */
 385   unsigned id;
 386   /* Uses of the group are of the same type.  */
 387   enum use_type type;
 388   /* The set of "related" IV candidates, plus the important ones.  */
 389   bitmap related_cands;
 390   /* Number of IV candidates in the cost_map.  */
 391   unsigned n_map_members;
 392   /* The costs wrto the iv candidates.  */
 393   struct cost_pair *cost_map;
 394   /* The selected candidate for the group.  */
 395   struct iv_cand *selected;
 396   /* Uses in the group.  */
 397   vec<struct iv_use *> vuses;
 398 };
 399
 400 /* The position where the iv is computed.  */
 401 enum iv_position
 402 {
 403   IP_NORMAL,            /* At the end, just before the exit condition.  */
 404   IP_END,               /* At the end of the latch block.  */
 405   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 406   IP_AFTER_USE,         /* Immediately after a specific use.  */
 407   IP_ORIGINAL           /* The original biv.  */
 408 };
 409
 410 /* The induction variable candidate.  */
 411 struct iv_cand
 412 {
 413   unsigned id;          /* The number of the candidate.  */
 414   bool important;       /* Whether this is an "important" candidate, i.e. such
 415                            that it should be considered by all uses.  */
 416   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 417   gimple *incremented_at;/* For original biv, the statement where it is
 418                            incremented.  */
 419   tree var_before;      /* The variable used for it before increment.  */
 420   tree var_after;       /* The variable used for it after increment.  */
 421   struct iv *iv;        /* The value of the candidate.  NULL for
 422                            "pseudocandidate" used to indicate the possibility
 423                            to replace the final value of an iv by direct
 424                            computation of the value.  */
 425   unsigned cost;        /* Cost of the candidate.  */
 426   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 427   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 428                               where it is incremented.  */
 429   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 430                            iv_cand.  */
 431   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 432                            hanlde it as a new invariant expression which will
 433                            be hoisted out of loop.  */
 434   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 435                            smaller type.  */
 436 };
 437
 438 /* Hashtable entry for common candidate derived from iv uses.  */
 439 struct iv_common_cand
 440 {
 441   tree base;
 442   tree step;
 443   /* IV uses from which this common candidate is derived.  */
 444   auto_vec<struct iv_use *> uses;
 445   hashval_t hash;
 446 };
 447
 448 /* Hashtable helpers.  */
 449
 450 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 451 {
 452   static inline hashval_t hash (const iv_common_cand *);
 453   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 454 };
 455
 456 /* Hash function for possible common candidates.  */
 457
 458 inline hashval_t
 459 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 460 {
 461   return ccand->hash;
 462 }
 463
 464 /* Hash table equality function for common candidates.  */
 465
 466 inline bool
 467 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 468                               const iv_common_cand *ccand2)
 469 {
 470   return (ccand1->hash == ccand2->hash
 471           && operand_equal_p (ccand1->base, ccand2->base, 0)
 472           && operand_equal_p (ccand1->step, ccand2->step, 0)
 473           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 474               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 475 }
 476
 477 /* Loop invariant expression hashtable entry.  */
 478
 479 struct iv_inv_expr_ent
 480 {
 481   /* Tree expression of the entry.  */
 482   tree expr;
 483   /* Unique indentifier.  */
 484   int id;
 485   /* Hash value.  */
 486   hashval_t hash;
 487 };
 488
 489 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 490
 491 static int
 492 sort_iv_inv_expr_ent (const void *a, const void *b)
 493 {
 494   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 495   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 496
 497   unsigned id1 = (*e1)->id;
 498   unsigned id2 = (*e2)->id;
 499
 500   if (id1 < id2)
 501     return -1;
 502   else if (id1 > id2)
 503     return 1;
 504   else
 505     return 0;
 506 }
 507
 508 /* Hashtable helpers.  */
 509
 510 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 511 {
 512   static inline hashval_t hash (const iv_inv_expr_ent *);
 513   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 514 };
 515
 516 /* Return true if uses of type TYPE represent some form of address.  */
 517
 518 inline bool
 519 address_p (use_type type)
 520 {
 521   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
 522 }
 523
 524 /* Hash function for loop invariant expressions.  */
 525
 526 inline hashval_t
 527 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 528 {
 529   return expr->hash;
 530 }
 531
 532 /* Hash table equality function for expressions.  */
 533
 534 inline bool
 535 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 536                            const iv_inv_expr_ent *expr2)
 537 {
 538   return expr1->hash == expr2->hash
 539          && operand_equal_p (expr1->expr, expr2->expr, 0);
 540 }
 541
 542 struct ivopts_data
 543 {
 544   /* The currently optimized loop.  */
 545   struct loop *current_loop;
 546   location_t loop_loc;
 547
 548   /* Numbers of iterations for all exits of the current loop.  */
 549   hash_map<edge, tree_niter_desc *> *niters;
 550
 551   /* Number of registers used in it.  */
 552   unsigned regs_used;
 553
 554   /* The size of version_info array allocated.  */
 555   unsigned version_info_size;
 556
 557   /* The array of information for the ssa names.  */
 558   struct version_info *version_info;
 559
 560   /* The hashtable of loop invariant expressions created
 561      by ivopt.  */
 562   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 563
 564   /* The bitmap of indices in version_info whose value was changed.  */
 565   bitmap relevant;
 566
 567   /* The uses of induction variables.  */
 568   vec<iv_group *> vgroups;
 569
 570   /* The candidates.  */
 571   vec<iv_cand *> vcands;
 572
 573   /* A bitmap of important candidates.  */
 574   bitmap important_candidates;
 575
 576   /* Cache used by tree_to_aff_combination_expand.  */
 577   hash_map<tree, name_expansion *> *name_expansion_cache;
 578
 579   /* The hashtable of common candidates derived from iv uses.  */
 580   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 581
 582   /* The common candidates.  */
 583   vec<iv_common_cand *> iv_common_cands;
 584
 585   /* The maximum invariant variable id.  */
 586   unsigned max_inv_var_id;
 587
 588   /* The maximum invariant expression id.  */
 589   unsigned max_inv_expr_id;
 590
 591   /* Number of no_overflow BIVs which are not used in memory address.  */
 592   unsigned bivs_not_used_in_addr;
 593
 594   /* Obstack for iv structure.  */
 595   struct obstack iv_obstack;
 596
 597   /* Whether to consider just related and important candidates when replacing a
 598      use.  */
 599   bool consider_all_candidates;
 600
 601   /* Are we optimizing for speed?  */
 602   bool speed;
 603
 604   /* Whether the loop body includes any function calls.  */
 605   bool body_includes_call;
 606
 607   /* Whether the loop body can only be exited via single exit.  */
 608   bool loop_single_exit_p;
 609 };
 610
 611 /* An assignment of iv candidates to uses.  */
 612
 613 struct iv_ca
 614 {
 615   /* The number of uses covered by the assignment.  */
 616   unsigned upto;
 617
 618   /* Number of uses that cannot be expressed by the candidates in the set.  */
 619   unsigned bad_groups;
 620
 621   /* Candidate assigned to a use, together with the related costs.  */
 622   struct cost_pair **cand_for_group;
 623
 624   /* Number of times each candidate is used.  */
 625   unsigned *n_cand_uses;
 626
 627   /* The candidates used.  */
 628   bitmap cands;
 629
 630   /* The number of candidates in the set.  */
 631   unsigned n_cands;
 632
 633   /* The number of invariants needed, including both invariant variants and
 634      invariant expressions.  */
 635   unsigned n_invs;
 636
 637   /* Total cost of expressing uses.  */
 638   comp_cost cand_use_cost;
 639
 640   /* Total cost of candidates.  */
 641   unsigned cand_cost;
 642
 643   /* Number of times each invariant variable is used.  */
 644   unsigned *n_inv_var_uses;
 645
 646   /* Number of times each invariant expression is used.  */
 647   unsigned *n_inv_expr_uses;
 648
 649   /* Total cost of the assignment.  */
 650   comp_cost cost;
 651 };
 652
 653 /* Difference of two iv candidate assignments.  */
 654
 655 struct iv_ca_delta
 656 {
 657   /* Changed group.  */
 658   struct iv_group *group;
 659
 660   /* An old assignment (for rollback purposes).  */
 661   struct cost_pair *old_cp;
 662
 663   /* A new assignment.  */
 664   struct cost_pair *new_cp;
 665
 666   /* Next change in the list.  */
 667   struct iv_ca_delta *next;
 668 };
 669
 670 /* Bound on number of candidates below that all candidates are considered.  */
 671
 672 #define CONSIDER_ALL_CANDIDATES_BOUND \
 673   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 674
 675 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 676    optimizing such a loop would help, and it would take ages).  */
 677
 678 #define MAX_CONSIDERED_GROUPS \
 679   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 680
 681 /* If there are at most this number of ivs in the set, try removing unnecessary
 682    ivs from the set always.  */
 683
 684 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 685   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 686
 687 /* The list of trees for that the decl_rtl field must be reset is stored
 688    here.  */
 689
 690 static vec<tree> decl_rtl_to_reset;
 691
 692 static comp_cost force_expr_to_var_cost (tree, bool);
 693
 694 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 695
 696 edge
 697 single_dom_exit (struct loop *loop)
 698 {
 699   edge exit = single_exit (loop);
 700
 701   if (!exit)
 702     return NULL;
 703
 704   if (!just_once_each_iteration_p (loop, exit->src))
 705     return NULL;
 706
 707   return exit;
 708 }
 709
 710 /* Dumps information about the induction variable IV to FILE.  Don't dump
 711    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 712    preceding spaces indicated by INDENT_LEVEL.  */
 713
 714 void
 715 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 716 {
 717   const char *p;
 718   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 719
 720   if (indent_level > 4)
 721     indent_level = 4;
 722   p = spaces + 8 - (indent_level << 1);
 723
 724   fprintf (file, "%sIV struct:\n", p);
 725   if (iv->ssa_name && dump_name)
 726     {
 727       fprintf (file, "%s  SSA_NAME:\t", p);
 728       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 729       fprintf (file, "\n");
 730     }
 731
 732   fprintf (file, "%s  Type:\t", p);
 733   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 734   fprintf (file, "\n");
 735
 736   fprintf (file, "%s  Base:\t", p);
 737   print_generic_expr (file, iv->base, TDF_SLIM);
 738   fprintf (file, "\n");
 739
 740   fprintf (file, "%s  Step:\t", p);
 741   print_generic_expr (file, iv->step, TDF_SLIM);
 742   fprintf (file, "\n");
 743
 744   if (iv->base_object)
 745     {
 746       fprintf (file, "%s  Object:\t", p);
 747       print_generic_expr (file, iv->base_object, TDF_SLIM);
 748       fprintf (file, "\n");
 749     }
 750
 751   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 752
 753   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 754            p, iv->no_overflow ? "No-overflow" : "Overflow");
 755 }
 756
 757 /* Dumps information about the USE to FILE.  */
 758
 759 void
 760 dump_use (FILE *file, struct iv_use *use)
 761 {
 762   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 763   fprintf (file, "    At stmt:\t");
 764   print_gimple_stmt (file, use->stmt, 0);
 765   fprintf (file, "    At pos:\t");
 766   if (use->op_p)
 767     print_generic_expr (file, *use->op_p, TDF_SLIM);
 768   fprintf (file, "\n");
 769   dump_iv (file, use->iv, false, 2);
 770 }
 771
 772 /* Dumps information about the uses to FILE.  */
 773
 774 void
 775 dump_groups (FILE *file, struct ivopts_data *data)
 776 {
 777   unsigned i, j;
 778   struct iv_group *group;
 779
 780   for (i = 0; i < data->vgroups.length (); i++)
 781     {
 782       group = data->vgroups[i];
 783       fprintf (file, "Group %d:\n", group->id);
 784       if (group->type == USE_NONLINEAR_EXPR)
 785         fprintf (file, "  Type:\tGENERIC\n");
 786       else if (group->type == USE_REF_ADDRESS)
 787         fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
 788       else if (group->type == USE_PTR_ADDRESS)
 789         fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
 790       else
 791         {
 792           gcc_assert (group->type == USE_COMPARE);
 793           fprintf (file, "  Type:\tCOMPARE\n");
 794         }
 795       for (j = 0; j < group->vuses.length (); j++)
 796         dump_use (file, group->vuses[j]);
 797     }
 798 }
 799
 800 /* Dumps information about induction variable candidate CAND to FILE.  */
 801
 802 void
 803 dump_cand (FILE *file, struct iv_cand *cand)
 804 {
 805   struct iv *iv = cand->iv;
 806
 807   fprintf (file, "Candidate %d:\n", cand->id);
 808   if (cand->inv_vars)
 809     {
 810       fprintf (file, "  Depend on inv.vars: ");
 811       dump_bitmap (file, cand->inv_vars);
 812     }
 813   if (cand->inv_exprs)
 814     {
 815       fprintf (file, "  Depend on inv.exprs: ");
 816       dump_bitmap (file, cand->inv_exprs);
 817     }
 818
 819   if (cand->var_before)
 820     {
 821       fprintf (file, "  Var befor: ");
 822       print_generic_expr (file, cand->var_before, TDF_SLIM);
 823       fprintf (file, "\n");
 824     }
 825   if (cand->var_after)
 826     {
 827       fprintf (file, "  Var after: ");
 828       print_generic_expr (file, cand->var_after, TDF_SLIM);
 829       fprintf (file, "\n");
 830     }
 831
 832   switch (cand->pos)
 833     {
 834     case IP_NORMAL:
 835       fprintf (file, "  Incr POS: before exit test\n");
 836       break;
 837
 838     case IP_BEFORE_USE:
 839       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 840       break;
 841
 842     case IP_AFTER_USE:
 843       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 844       break;
 845
 846     case IP_END:
 847       fprintf (file, "  Incr POS: at end\n");
 848       break;
 849
 850     case IP_ORIGINAL:
 851       fprintf (file, "  Incr POS: orig biv\n");
 852       break;
 853     }
 854
 855   dump_iv (file, iv, false, 1);
 856 }
 857
 858 /* Returns the info for ssa version VER.  */
 859
 860 static inline struct version_info *
 861 ver_info (struct ivopts_data *data, unsigned ver)
 862 {
 863   return data->version_info + ver;
 864 }
 865
 866 /* Returns the info for ssa name NAME.  */
 867
 868 static inline struct version_info *
 869 name_info (struct ivopts_data *data, tree name)
 870 {
 871   return ver_info (data, SSA_NAME_VERSION (name));
 872 }
 873
 874 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 875    emitted in LOOP.  */
 876
 877 static bool
 878 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 879 {
 880   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 881
 882   gcc_assert (bb);
 883
 884   if (sbb == loop->latch)
 885     return true;
 886
 887   if (sbb != bb)
 888     return false;
 889
 890   return stmt == last_stmt (bb);
 891 }
 892
 893 /* Returns true if STMT if after the place where the original induction
 894    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 895    if the positions are identical.  */
 896
 897 static bool
 898 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 899 {
 900   basic_block cand_bb = gimple_bb (cand->incremented_at);
 901   basic_block stmt_bb = gimple_bb (stmt);
 902
 903   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 904     return false;
 905
 906   if (stmt_bb != cand_bb)
 907     return true;
 908
 909   if (true_if_equal
 910       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 911     return true;
 912   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 913 }
 914
 915 /* Returns true if STMT if after the place where the induction variable
 916    CAND is incremented in LOOP.  */
 917
 918 static bool
 919 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 920 {
 921   switch (cand->pos)
 922     {
 923     case IP_END:
 924       return false;
 925
 926     case IP_NORMAL:
 927       return stmt_after_ip_normal_pos (loop, stmt);
 928
 929     case IP_ORIGINAL:
 930     case IP_AFTER_USE:
 931       return stmt_after_inc_pos (cand, stmt, false);
 932
 933     case IP_BEFORE_USE:
 934       return stmt_after_inc_pos (cand, stmt, true);
 935
 936     default:
 937       gcc_unreachable ();
 938     }
 939 }
 940
 941 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 942
 943 static bool
 944 abnormal_ssa_name_p (tree exp)
 945 {
 946   if (!exp)
 947     return false;
 948
 949   if (TREE_CODE (exp) != SSA_NAME)
 950     return false;
 951
 952   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 953 }
 954
 955 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 956    abnormal phi node.  Callback for for_each_index.  */
 957
 958 static bool
 959 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 960                                   void *data ATTRIBUTE_UNUSED)
 961 {
 962   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 963     {
 964       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 965         return false;
 966       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 967         return false;
 968     }
 969
 970   return !abnormal_ssa_name_p (*index);
 971 }
 972
 973 /* Returns true if EXPR contains a ssa name that occurs in an
 974    abnormal phi node.  */
 975
 976 bool
 977 contains_abnormal_ssa_name_p (tree expr)
 978 {
 979   enum tree_code code;
 980   enum tree_code_class codeclass;
 981
 982   if (!expr)
 983     return false;
 984
 985   code = TREE_CODE (expr);
 986   codeclass = TREE_CODE_CLASS (code);
 987
 988   if (code == CALL_EXPR)
 989     {
 990       tree arg;
 991       call_expr_arg_iterator iter;
 992       FOR_EACH_CALL_EXPR_ARG (arg, iter, expr)
 993         if (contains_abnormal_ssa_name_p (arg))
 994           return true;
 995       return false;
 996     }
 997
 998   if (code == SSA_NAME)
 999     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
1000
1001   if (code == INTEGER_CST
1002       || is_gimple_min_invariant (expr))
1003     return false;
1004
1005   if (code == ADDR_EXPR)
1006     return !for_each_index (&TREE_OPERAND (expr, 0),
1007                             idx_contains_abnormal_ssa_name_p,
1008                             NULL);
1009
1010   if (code == COND_EXPR)
1011     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
1012       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
1013       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
1014
1015   switch (codeclass)
1016     {
1017     case tcc_binary:
1018     case tcc_comparison:
1019       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
1020         return true;
1021
1022       /* Fallthru.  */
1023     case tcc_unary:
1024       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
1025         return true;
1026
1027       break;
1028
1029     default:
1030       gcc_unreachable ();
1031     }
1032
1033   return false;
1034 }
1035
1036 /*  Returns the structure describing number of iterations determined from
1037     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1038
1039 static struct tree_niter_desc *
1040 niter_for_exit (struct ivopts_data *data, edge exit)
1041 {
1042   struct tree_niter_desc *desc;
1043   tree_niter_desc **slot;
1044
1045   if (!data->niters)
1046     {
1047       data->niters = new hash_map<edge, tree_niter_desc *>;
1048       slot = NULL;
1049     }
1050   else
1051     slot = data->niters->get (exit);
1052
1053   if (!slot)
1054     {
1055       /* Try to determine number of iterations.  We cannot safely work with ssa
1056          names that appear in phi nodes on abnormal edges, so that we do not
1057          create overlapping life ranges for them (PR 27283).  */
1058       desc = XNEW (struct tree_niter_desc);
1059       if (!number_of_iterations_exit (data->current_loop,
1060                                       exit, desc, true)
1061           || contains_abnormal_ssa_name_p (desc->niter))
1062         {
1063           XDELETE (desc);
1064           desc = NULL;
1065         }
1066       data->niters->put (exit, desc);
1067     }
1068   else
1069     desc = *slot;
1070
1071   return desc;
1072 }
1073
1074 /* Returns the structure describing number of iterations determined from
1075    single dominating exit of DATA->current_loop, or NULL if something
1076    goes wrong.  */
1077
1078 static struct tree_niter_desc *
1079 niter_for_single_dom_exit (struct ivopts_data *data)
1080 {
1081   edge exit = single_dom_exit (data->current_loop);
1082
1083   if (!exit)
1084     return NULL;
1085
1086   return niter_for_exit (data, exit);
1087 }
1088
1089 /* Initializes data structures used by the iv optimization pass, stored
1090    in DATA.  */
1091
1092 static void
1093 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1094 {
1095   data->version_info_size = 2 * num_ssa_names;
1096   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1097   data->relevant = BITMAP_ALLOC (NULL);
1098   data->important_candidates = BITMAP_ALLOC (NULL);
1099   data->max_inv_var_id = 0;
1100   data->max_inv_expr_id = 0;
1101   data->niters = NULL;
1102   data->vgroups.create (20);
1103   data->vcands.create (20);
1104   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1105   data->name_expansion_cache = NULL;
1106   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1107   data->iv_common_cands.create (20);
1108   decl_rtl_to_reset.create (20);
1109   gcc_obstack_init (&data->iv_obstack);
1110 }
1111
1112 /* Returns a memory object to that EXPR points.  In case we are able to
1113    determine that it does not point to any such object, NULL is returned.  */
1114
1115 static tree
1116 determine_base_object (tree expr)
1117 {
1118   enum tree_code code = TREE_CODE (expr);
1119   tree base, obj;
1120
1121   /* If this is a pointer casted to any type, we need to determine
1122      the base object for the pointer; so handle conversions before
1123      throwing away non-pointer expressions.  */
1124   if (CONVERT_EXPR_P (expr))
1125     return determine_base_object (TREE_OPERAND (expr, 0));
1126
1127   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1128     return NULL_TREE;
1129
1130   switch (code)
1131     {
1132     case INTEGER_CST:
1133       return NULL_TREE;
1134
1135     case ADDR_EXPR:
1136       obj = TREE_OPERAND (expr, 0);
1137       base = get_base_address (obj);
1138
1139       if (!base)
1140         return expr;
1141
1142       if (TREE_CODE (base) == MEM_REF)
1143         return determine_base_object (TREE_OPERAND (base, 0));
1144
1145       return fold_convert (ptr_type_node,
1146                            build_fold_addr_expr (base));
1147
1148     case POINTER_PLUS_EXPR:
1149       return determine_base_object (TREE_OPERAND (expr, 0));
1150
1151     case PLUS_EXPR:
1152     case MINUS_EXPR:
1153       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1154       gcc_unreachable ();
1155
1156     default:
1157       if (POLY_INT_CST_P (expr))
1158         return NULL_TREE;
1159       return fold_convert (ptr_type_node, expr);
1160     }
1161 }
1162
1163 /* Return true if address expression with non-DECL_P operand appears
1164    in EXPR.  */
1165
1166 static bool
1167 contain_complex_addr_expr (tree expr)
1168 {
1169   bool res = false;
1170
1171   STRIP_NOPS (expr);
1172   switch (TREE_CODE (expr))
1173     {
1174     case POINTER_PLUS_EXPR:
1175     case PLUS_EXPR:
1176     case MINUS_EXPR:
1177       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1178       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1179       break;
1180
1181     case ADDR_EXPR:
1182       return (!DECL_P (TREE_OPERAND (expr, 0)));
1183
1184     default:
1185       return false;
1186     }
1187
1188   return res;
1189 }
1190
1191 /* Allocates an induction variable with given initial value BASE and step STEP
1192    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1193
1194 static struct iv *
1195 alloc_iv (struct ivopts_data *data, tree base, tree step,
1196           bool no_overflow = false)
1197 {
1198   tree expr = base;
1199   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1200                                               sizeof (struct iv));
1201   gcc_assert (step != NULL_TREE);
1202
1203   /* Lower address expression in base except ones with DECL_P as operand.
1204      By doing this:
1205        1) More accurate cost can be computed for address expressions;
1206        2) Duplicate candidates won't be created for bases in different
1207           forms, like &a[0] and &a.  */
1208   STRIP_NOPS (expr);
1209   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1210       || contain_complex_addr_expr (expr))
1211     {
1212       aff_tree comb;
1213       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1214       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1215     }
1216
1217   iv->base = base;
1218   iv->base_object = determine_base_object (base);
1219   iv->step = step;
1220   iv->biv_p = false;
1221   iv->nonlin_use = NULL;
1222   iv->ssa_name = NULL_TREE;
1223   if (!no_overflow
1224        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1225                               base, step))
1226     no_overflow = true;
1227   iv->no_overflow = no_overflow;
1228   iv->have_address_use = false;
1229
1230   return iv;
1231 }
1232
1233 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1234    doesn't overflow.  */
1235
1236 static void
1237 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1238         bool no_overflow)
1239 {
1240   struct version_info *info = name_info (data, iv);
1241
1242   gcc_assert (!info->iv);
1243
1244   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1245   info->iv = alloc_iv (data, base, step, no_overflow);
1246   info->iv->ssa_name = iv;
1247 }
1248
1249 /* Finds induction variable declaration for VAR.  */
1250
1251 static struct iv *
1252 get_iv (struct ivopts_data *data, tree var)
1253 {
1254   basic_block bb;
1255   tree type = TREE_TYPE (var);
1256
1257   if (!POINTER_TYPE_P (type)
1258       && !INTEGRAL_TYPE_P (type))
1259     return NULL;
1260
1261   if (!name_info (data, var)->iv)
1262     {
1263       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1264
1265       if (!bb
1266           || !flow_bb_inside_loop_p (data->current_loop, bb))
1267         set_iv (data, var, var, build_int_cst (type, 0), true);
1268     }
1269
1270   return name_info (data, var)->iv;
1271 }
1272
1273 /* Return the first non-invariant ssa var found in EXPR.  */
1274
1275 static tree
1276 extract_single_var_from_expr (tree expr)
1277 {
1278   int i, n;
1279   tree tmp;
1280   enum tree_code code;
1281
1282   if (!expr || is_gimple_min_invariant (expr))
1283     return NULL;
1284
1285   code = TREE_CODE (expr);
1286   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1287     {
1288       n = TREE_OPERAND_LENGTH (expr);
1289       for (i = 0; i < n; i++)
1290         {
1291           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1292
1293           if (tmp)
1294             return tmp;
1295         }
1296     }
1297   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1298 }
1299
1300 /* Finds basic ivs.  */
1301
1302 static bool
1303 find_bivs (struct ivopts_data *data)
1304 {
1305   gphi *phi;
1306   affine_iv iv;
1307   tree step, type, base, stop;
1308   bool found = false;
1309   struct loop *loop = data->current_loop;
1310   gphi_iterator psi;
1311
1312   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1313     {
1314       phi = psi.phi ();
1315
1316       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1317         continue;
1318
1319       if (virtual_operand_p (PHI_RESULT (phi)))
1320         continue;
1321
1322       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1323         continue;
1324
1325       if (integer_zerop (iv.step))
1326         continue;
1327
1328       step = iv.step;
1329       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1330       /* Stop expanding iv base at the first ssa var referred by iv step.
1331          Ideally we should stop at any ssa var, because that's expensive
1332          and unusual to happen, we just do it on the first one.
1333
1334          See PR64705 for the rationale.  */
1335       stop = extract_single_var_from_expr (step);
1336       base = expand_simple_operations (base, stop);
1337       if (contains_abnormal_ssa_name_p (base)
1338           || contains_abnormal_ssa_name_p (step))
1339         continue;
1340
1341       type = TREE_TYPE (PHI_RESULT (phi));
1342       base = fold_convert (type, base);
1343       if (step)
1344         {
1345           if (POINTER_TYPE_P (type))
1346             step = convert_to_ptrofftype (step);
1347           else
1348             step = fold_convert (type, step);
1349         }
1350
1351       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1352       found = true;
1353     }
1354
1355   return found;
1356 }
1357
1358 /* Marks basic ivs.  */
1359
1360 static void
1361 mark_bivs (struct ivopts_data *data)
1362 {
1363   gphi *phi;
1364   gimple *def;
1365   tree var;
1366   struct iv *iv, *incr_iv;
1367   struct loop *loop = data->current_loop;
1368   basic_block incr_bb;
1369   gphi_iterator psi;
1370
1371   data->bivs_not_used_in_addr = 0;
1372   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1373     {
1374       phi = psi.phi ();
1375
1376       iv = get_iv (data, PHI_RESULT (phi));
1377       if (!iv)
1378         continue;
1379
1380       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1381       def = SSA_NAME_DEF_STMT (var);
1382       /* Don't mark iv peeled from other one as biv.  */
1383       if (def
1384           && gimple_code (def) == GIMPLE_PHI
1385           && gimple_bb (def) == loop->header)
1386         continue;
1387
1388       incr_iv = get_iv (data, var);
1389       if (!incr_iv)
1390         continue;
1391
1392       /* If the increment is in the subloop, ignore it.  */
1393       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1394       if (incr_bb->loop_father != data->current_loop
1395           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1396         continue;
1397
1398       iv->biv_p = true;
1399       incr_iv->biv_p = true;
1400       if (iv->no_overflow)
1401         data->bivs_not_used_in_addr++;
1402       if (incr_iv->no_overflow)
1403         data->bivs_not_used_in_addr++;
1404     }
1405 }
1406
1407 /* Checks whether STMT defines a linear induction variable and stores its
1408    parameters to IV.  */
1409
1410 static bool
1411 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1412 {
1413   tree lhs, stop;
1414   struct loop *loop = data->current_loop;
1415
1416   iv->base = NULL_TREE;
1417   iv->step = NULL_TREE;
1418
1419   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1420     return false;
1421
1422   lhs = gimple_assign_lhs (stmt);
1423   if (TREE_CODE (lhs) != SSA_NAME)
1424     return false;
1425
1426   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1427     return false;
1428
1429   /* Stop expanding iv base at the first ssa var referred by iv step.
1430      Ideally we should stop at any ssa var, because that's expensive
1431      and unusual to happen, we just do it on the first one.
1432
1433      See PR64705 for the rationale.  */
1434   stop = extract_single_var_from_expr (iv->step);
1435   iv->base = expand_simple_operations (iv->base, stop);
1436   if (contains_abnormal_ssa_name_p (iv->base)
1437       || contains_abnormal_ssa_name_p (iv->step))
1438     return false;
1439
1440   /* If STMT could throw, then do not consider STMT as defining a GIV.
1441      While this will suppress optimizations, we cannot safely delete this
1442      GIV and associated statements, even if it appears it is not used.  */
1443   if (stmt_could_throw_p (cfun, stmt))
1444     return false;
1445
1446   return true;
1447 }
1448
1449 /* Finds general ivs in statement STMT.  */
1450
1451 static void
1452 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1453 {
1454   affine_iv iv;
1455
1456   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1457     return;
1458
1459   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1460 }
1461
1462 /* Finds general ivs in basic block BB.  */
1463
1464 static void
1465 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1466 {
1467   gimple_stmt_iterator bsi;
1468
1469   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1470     find_givs_in_stmt (data, gsi_stmt (bsi));
1471 }
1472
1473 /* Finds general ivs.  */
1474
1475 static void
1476 find_givs (struct ivopts_data *data)
1477 {
1478   struct loop *loop = data->current_loop;
1479   basic_block *body = get_loop_body_in_dom_order (loop);
1480   unsigned i;
1481
1482   for (i = 0; i < loop->num_nodes; i++)
1483     find_givs_in_bb (data, body[i]);
1484   free (body);
1485 }
1486
1487 /* For each ssa name defined in LOOP determines whether it is an induction
1488    variable and if so, its initial value and step.  */
1489
1490 static bool
1491 find_induction_variables (struct ivopts_data *data)
1492 {
1493   unsigned i;
1494   bitmap_iterator bi;
1495
1496   if (!find_bivs (data))
1497     return false;
1498
1499   find_givs (data);
1500   mark_bivs (data);
1501
1502   if (dump_file && (dump_flags & TDF_DETAILS))
1503     {
1504       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1505
1506       if (niter)
1507         {
1508           fprintf (dump_file, "  number of iterations ");
1509           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1510           if (!integer_zerop (niter->may_be_zero))
1511             {
1512               fprintf (dump_file, "; zero if ");
1513               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1514             }
1515           fprintf (dump_file, "\n");
1516         };
1517
1518       fprintf (dump_file, "\n<Induction Vars>:\n");
1519       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1520         {
1521           struct version_info *info = ver_info (data, i);
1522           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1523             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1524         }
1525     }
1526
1527   return true;
1528 }
1529
1530 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1531    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1532    is the const offset stripped from IV base and MEM_TYPE is the type
1533    of the memory being addressed.  For uses of other types, ADDR_BASE
1534    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1535
1536 static struct iv_use *
1537 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1538             gimple *stmt, enum use_type type, tree mem_type,
1539             tree addr_base, poly_uint64 addr_offset)
1540 {
1541   struct iv_use *use = XCNEW (struct iv_use);
1542
1543   use->id = group->vuses.length ();
1544   use->group_id = group->id;
1545   use->type = type;
1546   use->mem_type = mem_type;
1547   use->iv = iv;
1548   use->stmt = stmt;
1549   use->op_p = use_p;
1550   use->addr_base = addr_base;
1551   use->addr_offset = addr_offset;
1552
1553   group->vuses.safe_push (use);
1554   return use;
1555 }
1556
1557 /* Checks whether OP is a loop-level invariant and if so, records it.
1558    NONLINEAR_USE is true if the invariant is used in a way we do not
1559    handle specially.  */
1560
1561 static void
1562 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1563 {
1564   basic_block bb;
1565   struct version_info *info;
1566
1567   if (TREE_CODE (op) != SSA_NAME
1568       || virtual_operand_p (op))
1569     return;
1570
1571   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1572   if (bb
1573       && flow_bb_inside_loop_p (data->current_loop, bb))
1574     return;
1575
1576   info = name_info (data, op);
1577   info->name = op;
1578   info->has_nonlin_use |= nonlinear_use;
1579   if (!info->inv_id)
1580     info->inv_id = ++data->max_inv_var_id;
1581   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1582 }
1583
1584 /* Record a group of TYPE.  */
1585
1586 static struct iv_group *
1587 record_group (struct ivopts_data *data, enum use_type type)
1588 {
1589   struct iv_group *group = XCNEW (struct iv_group);
1590
1591   group->id = data->vgroups.length ();
1592   group->type = type;
1593   group->related_cands = BITMAP_ALLOC (NULL);
1594   group->vuses.create (1);
1595
1596   data->vgroups.safe_push (group);
1597   return group;
1598 }
1599
1600 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1601    New group will be created if there is no existing group for the use.
1602    MEM_TYPE is the type of memory being addressed, or NULL if this
1603    isn't an address reference.  */
1604
1605 static struct iv_use *
1606 record_group_use (struct ivopts_data *data, tree *use_p,
1607                   struct iv *iv, gimple *stmt, enum use_type type,
1608                   tree mem_type)
1609 {
1610   tree addr_base = NULL;
1611   struct iv_group *group = NULL;
1612   poly_uint64 addr_offset = 0;
1613
1614   /* Record non address type use in a new group.  */
1615   if (address_p (type))
1616     {
1617       unsigned int i;
1618
1619       addr_base = strip_offset (iv->base, &addr_offset);
1620       for (i = 0; i < data->vgroups.length (); i++)
1621         {
1622           struct iv_use *use;
1623
1624           group = data->vgroups[i];
1625           use = group->vuses[0];
1626           if (!address_p (use->type))
1627             continue;
1628
1629           /* Check if it has the same stripped base and step.  */
1630           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1631               && operand_equal_p (iv->step, use->iv->step, 0)
1632               && operand_equal_p (addr_base, use->addr_base, 0))
1633             break;
1634         }
1635       if (i == data->vgroups.length ())
1636         group = NULL;
1637     }
1638
1639   if (!group)
1640     group = record_group (data, type);
1641
1642   return record_use (group, use_p, iv, stmt, type, mem_type,
1643                      addr_base, addr_offset);
1644 }
1645
1646 /* Checks whether the use OP is interesting and if so, records it.  */
1647
1648 static struct iv_use *
1649 find_interesting_uses_op (struct ivopts_data *data, tree op)
1650 {
1651   struct iv *iv;
1652   gimple *stmt;
1653   struct iv_use *use;
1654
1655   if (TREE_CODE (op) != SSA_NAME)
1656     return NULL;
1657
1658   iv = get_iv (data, op);
1659   if (!iv)
1660     return NULL;
1661
1662   if (iv->nonlin_use)
1663     {
1664       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1665       return iv->nonlin_use;
1666     }
1667
1668   if (integer_zerop (iv->step))
1669     {
1670       record_invariant (data, op, true);
1671       return NULL;
1672     }
1673
1674   stmt = SSA_NAME_DEF_STMT (op);
1675   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1676
1677   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1678   iv->nonlin_use = use;
1679   return use;
1680 }
1681
1682 /* Indicate how compare type iv_use can be handled.  */
1683 enum comp_iv_rewrite
1684 {
1685   COMP_IV_NA,
1686   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1687   COMP_IV_EXPR,
1688   /* We may rewrite compare type iv_uses on both sides of comparison by
1689      expressing value of each iv_use.  */
1690   COMP_IV_EXPR_2,
1691   /* We may rewrite compare type iv_use by expressing value of the iv_use
1692      or by eliminating it with other iv_cand.  */
1693   COMP_IV_ELIM
1694 };
1695
1696 /* Given a condition in statement STMT, checks whether it is a compare
1697    of an induction variable and an invariant.  If this is the case,
1698    CONTROL_VAR is set to location of the iv, BOUND to the location of
1699    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1700    induction variable descriptions, and true is returned.  If this is not
1701    the case, CONTROL_VAR and BOUND are set to the arguments of the
1702    condition and false is returned.  */
1703
1704 static enum comp_iv_rewrite
1705 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1706                        tree **control_var, tree **bound,
1707                        struct iv **iv_var, struct iv **iv_bound)
1708 {
1709   /* The objects returned when COND has constant operands.  */
1710   static struct iv const_iv;
1711   static tree zero;
1712   tree *op0 = &zero, *op1 = &zero;
1713   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1714   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1715
1716   if (gimple_code (stmt) == GIMPLE_COND)
1717     {
1718       gcond *cond_stmt = as_a <gcond *> (stmt);
1719       op0 = gimple_cond_lhs_ptr (cond_stmt);
1720       op1 = gimple_cond_rhs_ptr (cond_stmt);
1721     }
1722   else
1723     {
1724       op0 = gimple_assign_rhs1_ptr (stmt);
1725       op1 = gimple_assign_rhs2_ptr (stmt);
1726     }
1727
1728   zero = integer_zero_node;
1729   const_iv.step = integer_zero_node;
1730
1731   if (TREE_CODE (*op0) == SSA_NAME)
1732     iv0 = get_iv (data, *op0);
1733   if (TREE_CODE (*op1) == SSA_NAME)
1734     iv1 = get_iv (data, *op1);
1735
1736   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1737   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1738     {
1739       rewrite_type = COMP_IV_EXPR_2;
1740       goto end;
1741     }
1742
1743   /* If none side of comparison is IV.  */
1744   if ((!iv0 || integer_zerop (iv0->step))
1745       && (!iv1 || integer_zerop (iv1->step)))
1746     goto end;
1747
1748   /* Control variable may be on the other side.  */
1749   if (!iv0 || integer_zerop (iv0->step))
1750     {
1751       std::swap (op0, op1);
1752       std::swap (iv0, iv1);
1753     }
1754   /* If one side is IV and the other side isn't loop invariant.  */
1755   if (!iv1)
1756     rewrite_type = COMP_IV_EXPR;
1757   /* If one side is IV and the other side is loop invariant.  */
1758   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1759     rewrite_type = COMP_IV_ELIM;
1760
1761 end:
1762   if (control_var)
1763     *control_var = op0;
1764   if (iv_var)
1765     *iv_var = iv0;
1766   if (bound)
1767     *bound = op1;
1768   if (iv_bound)
1769     *iv_bound = iv1;
1770
1771   return rewrite_type;
1772 }
1773
1774 /* Checks whether the condition in STMT is interesting and if so,
1775    records it.  */
1776
1777 static void
1778 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1779 {
1780   tree *var_p, *bound_p;
1781   struct iv *var_iv, *bound_iv;
1782   enum comp_iv_rewrite ret;
1783
1784   ret = extract_cond_operands (data, stmt,
1785                                &var_p, &bound_p, &var_iv, &bound_iv);
1786   if (ret == COMP_IV_NA)
1787     {
1788       find_interesting_uses_op (data, *var_p);
1789       find_interesting_uses_op (data, *bound_p);
1790       return;
1791     }
1792
1793   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1794   /* Record compare type iv_use for iv on the other side of comparison.  */
1795   if (ret == COMP_IV_EXPR_2)
1796     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1797 }
1798
1799 /* Returns the outermost loop EXPR is obviously invariant in
1800    relative to the loop LOOP, i.e. if all its operands are defined
1801    outside of the returned loop.  Returns NULL if EXPR is not
1802    even obviously invariant in LOOP.  */
1803
1804 struct loop *
1805 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1806 {
1807   basic_block def_bb;
1808   unsigned i, len;
1809
1810   if (is_gimple_min_invariant (expr))
1811     return current_loops->tree_root;
1812
1813   if (TREE_CODE (expr) == SSA_NAME)
1814     {
1815       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1816       if (def_bb)
1817         {
1818           if (flow_bb_inside_loop_p (loop, def_bb))
1819             return NULL;
1820           return superloop_at_depth (loop,
1821                                      loop_depth (def_bb->loop_father) + 1);
1822         }
1823
1824       return current_loops->tree_root;
1825     }
1826
1827   if (!EXPR_P (expr))
1828     return NULL;
1829
1830   unsigned maxdepth = 0;
1831   len = TREE_OPERAND_LENGTH (expr);
1832   for (i = 0; i < len; i++)
1833     {
1834       struct loop *ivloop;
1835       if (!TREE_OPERAND (expr, i))
1836         continue;
1837
1838       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1839       if (!ivloop)
1840         return NULL;
1841       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1842     }
1843
1844   return superloop_at_depth (loop, maxdepth);
1845 }
1846
1847 /* Returns true if expression EXPR is obviously invariant in LOOP,
1848    i.e. if all its operands are defined outside of the LOOP.  LOOP
1849    should not be the function body.  */
1850
1851 bool
1852 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1853 {
1854   basic_block def_bb;
1855   unsigned i, len;
1856
1857   gcc_assert (loop_depth (loop) > 0);
1858
1859   if (is_gimple_min_invariant (expr))
1860     return true;
1861
1862   if (TREE_CODE (expr) == SSA_NAME)
1863     {
1864       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1865       if (def_bb
1866           && flow_bb_inside_loop_p (loop, def_bb))
1867         return false;
1868
1869       return true;
1870     }
1871
1872   if (!EXPR_P (expr))
1873     return false;
1874
1875   len = TREE_OPERAND_LENGTH (expr);
1876   for (i = 0; i < len; i++)
1877     if (TREE_OPERAND (expr, i)
1878         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1879       return false;
1880
1881   return true;
1882 }
1883
1884 /* Given expression EXPR which computes inductive values with respect
1885    to loop recorded in DATA, this function returns biv from which EXPR
1886    is derived by tracing definition chains of ssa variables in EXPR.  */
1887
1888 static struct iv*
1889 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1890 {
1891   struct iv *iv;
1892   unsigned i, n;
1893   tree e2, e1;
1894   enum tree_code code;
1895   gimple *stmt;
1896
1897   if (expr == NULL_TREE)
1898     return NULL;
1899
1900   if (is_gimple_min_invariant (expr))
1901     return NULL;
1902
1903   code = TREE_CODE (expr);
1904   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1905     {
1906       n = TREE_OPERAND_LENGTH (expr);
1907       for (i = 0; i < n; i++)
1908         {
1909           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1910           if (iv)
1911             return iv;
1912         }
1913     }
1914
1915   /* Stop if it's not ssa name.  */
1916   if (code != SSA_NAME)
1917     return NULL;
1918
1919   iv = get_iv (data, expr);
1920   if (!iv || integer_zerop (iv->step))
1921     return NULL;
1922   else if (iv->biv_p)
1923     return iv;
1924
1925   stmt = SSA_NAME_DEF_STMT (expr);
1926   if (gphi *phi = dyn_cast <gphi *> (stmt))
1927     {
1928       ssa_op_iter iter;
1929       use_operand_p use_p;
1930       basic_block phi_bb = gimple_bb (phi);
1931
1932       /* Skip loop header PHI that doesn't define biv.  */
1933       if (phi_bb->loop_father == data->current_loop)
1934         return NULL;
1935
1936       if (virtual_operand_p (gimple_phi_result (phi)))
1937         return NULL;
1938
1939       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1940         {
1941           tree use = USE_FROM_PTR (use_p);
1942           iv = find_deriving_biv_for_expr (data, use);
1943           if (iv)
1944             return iv;
1945         }
1946       return NULL;
1947     }
1948   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1949     return NULL;
1950
1951   e1 = gimple_assign_rhs1 (stmt);
1952   code = gimple_assign_rhs_code (stmt);
1953   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1954     return find_deriving_biv_for_expr (data, e1);
1955
1956   switch (code)
1957     {
1958     case MULT_EXPR:
1959     case PLUS_EXPR:
1960     case MINUS_EXPR:
1961     case POINTER_PLUS_EXPR:
1962       /* Increments, decrements and multiplications by a constant
1963          are simple.  */
1964       e2 = gimple_assign_rhs2 (stmt);
1965       iv = find_deriving_biv_for_expr (data, e2);
1966       if (iv)
1967         return iv;
1968       gcc_fallthrough ();
1969
1970     CASE_CONVERT:
1971       /* Casts are simple.  */
1972       return find_deriving_biv_for_expr (data, e1);
1973
1974     default:
1975       break;
1976     }
1977
1978   return NULL;
1979 }
1980
1981 /* Record BIV, its predecessor and successor that they are used in
1982    address type uses.  */
1983
1984 static void
1985 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1986 {
1987   unsigned i;
1988   tree type, base_1, base_2;
1989   bitmap_iterator bi;
1990
1991   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1992       || biv->have_address_use || !biv->no_overflow)
1993     return;
1994
1995   type = TREE_TYPE (biv->base);
1996   if (!INTEGRAL_TYPE_P (type))
1997     return;
1998
1999   biv->have_address_use = true;
2000   data->bivs_not_used_in_addr--;
2001   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
2002   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2003     {
2004       struct iv *iv = ver_info (data, i)->iv;
2005
2006       if (!iv || !iv->biv_p || integer_zerop (iv->step)
2007           || iv->have_address_use || !iv->no_overflow)
2008         continue;
2009
2010       if (type != TREE_TYPE (iv->base)
2011           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2012         continue;
2013
2014       if (!operand_equal_p (biv->step, iv->step, 0))
2015         continue;
2016
2017       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2018       if (operand_equal_p (base_1, iv->base, 0)
2019           || operand_equal_p (base_2, biv->base, 0))
2020         {
2021           iv->have_address_use = true;
2022           data->bivs_not_used_in_addr--;
2023         }
2024     }
2025 }
2026
2027 /* Cumulates the steps of indices into DATA and replaces their values with the
2028    initial ones.  Returns false when the value of the index cannot be determined.
2029    Callback for for_each_index.  */
2030
2031 struct ifs_ivopts_data
2032 {
2033   struct ivopts_data *ivopts_data;
2034   gimple *stmt;
2035   tree step;
2036 };
2037
2038 static bool
2039 idx_find_step (tree base, tree *idx, void *data)
2040 {
2041   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2042   struct iv *iv;
2043   bool use_overflow_semantics = false;
2044   tree step, iv_base, iv_step, lbound, off;
2045   struct loop *loop = dta->ivopts_data->current_loop;
2046
2047   /* If base is a component ref, require that the offset of the reference
2048      be invariant.  */
2049   if (TREE_CODE (base) == COMPONENT_REF)
2050     {
2051       off = component_ref_field_offset (base);
2052       return expr_invariant_in_loop_p (loop, off);
2053     }
2054
2055   /* If base is array, first check whether we will be able to move the
2056      reference out of the loop (in order to take its address in strength
2057      reduction).  In order for this to work we need both lower bound
2058      and step to be loop invariants.  */
2059   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2060     {
2061       /* Moreover, for a range, the size needs to be invariant as well.  */
2062       if (TREE_CODE (base) == ARRAY_RANGE_REF
2063           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2064         return false;
2065
2066       step = array_ref_element_size (base);
2067       lbound = array_ref_low_bound (base);
2068
2069       if (!expr_invariant_in_loop_p (loop, step)
2070           || !expr_invariant_in_loop_p (loop, lbound))
2071         return false;
2072     }
2073
2074   if (TREE_CODE (*idx) != SSA_NAME)
2075     return true;
2076
2077   iv = get_iv (dta->ivopts_data, *idx);
2078   if (!iv)
2079     return false;
2080
2081   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2082           *&x[0], which is not folded and does not trigger the
2083           ARRAY_REF path below.  */
2084   *idx = iv->base;
2085
2086   if (integer_zerop (iv->step))
2087     return true;
2088
2089   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2090     {
2091       step = array_ref_element_size (base);
2092
2093       /* We only handle addresses whose step is an integer constant.  */
2094       if (TREE_CODE (step) != INTEGER_CST)
2095         return false;
2096     }
2097   else
2098     /* The step for pointer arithmetics already is 1 byte.  */
2099     step = size_one_node;
2100
2101   iv_base = iv->base;
2102   iv_step = iv->step;
2103   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2104     use_overflow_semantics = true;
2105
2106   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2107                             sizetype, &iv_base, &iv_step, dta->stmt,
2108                             use_overflow_semantics))
2109     {
2110       /* The index might wrap.  */
2111       return false;
2112     }
2113
2114   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2115   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2116
2117   if (dta->ivopts_data->bivs_not_used_in_addr)
2118     {
2119       if (!iv->biv_p)
2120         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2121
2122       record_biv_for_address_use (dta->ivopts_data, iv);
2123     }
2124   return true;
2125 }
2126
2127 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2128    object is passed to it in DATA.  */
2129
2130 static bool
2131 idx_record_use (tree base, tree *idx,
2132                 void *vdata)
2133 {
2134   struct ivopts_data *data = (struct ivopts_data *) vdata;
2135   find_interesting_uses_op (data, *idx);
2136   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2137     {
2138       find_interesting_uses_op (data, array_ref_element_size (base));
2139       find_interesting_uses_op (data, array_ref_low_bound (base));
2140     }
2141   return true;
2142 }
2143
2144 /* If we can prove that TOP = cst * BOT for some constant cst,
2145    store cst to MUL and return true.  Otherwise return false.
2146    The returned value is always sign-extended, regardless of the
2147    signedness of TOP and BOT.  */
2148
2149 static bool
2150 constant_multiple_of (tree top, tree bot, widest_int *mul)
2151 {
2152   tree mby;
2153   enum tree_code code;
2154   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2155   widest_int res, p0, p1;
2156
2157   STRIP_NOPS (top);
2158   STRIP_NOPS (bot);
2159
2160   if (operand_equal_p (top, bot, 0))
2161     {
2162       *mul = 1;
2163       return true;
2164     }
2165
2166   code = TREE_CODE (top);
2167   switch (code)
2168     {
2169     case MULT_EXPR:
2170       mby = TREE_OPERAND (top, 1);
2171       if (TREE_CODE (mby) != INTEGER_CST)
2172         return false;
2173
2174       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2175         return false;
2176
2177       *mul = wi::sext (res * wi::to_widest (mby), precision);
2178       return true;
2179
2180     case PLUS_EXPR:
2181     case MINUS_EXPR:
2182       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2183           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2184         return false;
2185
2186       if (code == MINUS_EXPR)
2187         p1 = -p1;
2188       *mul = wi::sext (p0 + p1, precision);
2189       return true;
2190
2191     case INTEGER_CST:
2192       if (TREE_CODE (bot) != INTEGER_CST)
2193         return false;
2194
2195       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2196       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2197       if (p1 == 0)
2198         return false;
2199       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2200       return res == 0;
2201
2202     default:
2203       if (POLY_INT_CST_P (top)
2204           && POLY_INT_CST_P (bot)
2205           && constant_multiple_p (wi::to_poly_widest (top),
2206                                   wi::to_poly_widest (bot), mul))
2207         return true;
2208
2209       return false;
2210     }
2211 }
2212
2213 /* Return true if memory reference REF with step STEP may be unaligned.  */
2214
2215 static bool
2216 may_be_unaligned_p (tree ref, tree step)
2217 {
2218   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2219      thus they are not misaligned.  */
2220   if (TREE_CODE (ref) == TARGET_MEM_REF)
2221     return false;
2222
2223   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2224   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2225     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2226
2227   unsigned HOST_WIDE_INT bitpos;
2228   unsigned int ref_align;
2229   get_object_alignment_1 (ref, &ref_align, &bitpos);
2230   if (ref_align < align
2231       || (bitpos % align) != 0
2232       || (bitpos % BITS_PER_UNIT) != 0)
2233     return true;
2234
2235   unsigned int trailing_zeros = tree_ctz (step);
2236   if (trailing_zeros < HOST_BITS_PER_INT
2237       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2238     return true;
2239
2240   return false;
2241 }
2242
2243 /* Return true if EXPR may be non-addressable.   */
2244
2245 bool
2246 may_be_nonaddressable_p (tree expr)
2247 {
2248   switch (TREE_CODE (expr))
2249     {
2250     case VAR_DECL:
2251       /* Check if it's a register variable.  */
2252       return DECL_HARD_REGISTER (expr);
2253
2254     case TARGET_MEM_REF:
2255       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2256          target, thus they are always addressable.  */
2257       return false;
2258
2259     case MEM_REF:
2260       /* Likewise for MEM_REFs, modulo the storage order.  */
2261       return REF_REVERSE_STORAGE_ORDER (expr);
2262
2263     case BIT_FIELD_REF:
2264       if (REF_REVERSE_STORAGE_ORDER (expr))
2265         return true;
2266       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2267
2268     case COMPONENT_REF:
2269       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2270         return true;
2271       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2272              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2273
2274     case ARRAY_REF:
2275     case ARRAY_RANGE_REF:
2276       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2277         return true;
2278       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2279
2280     case VIEW_CONVERT_EXPR:
2281       /* This kind of view-conversions may wrap non-addressable objects
2282          and make them look addressable.  After some processing the
2283          non-addressability may be uncovered again, causing ADDR_EXPRs
2284          of inappropriate objects to be built.  */
2285       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2286           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2287         return true;
2288       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2289
2290     CASE_CONVERT:
2291       return true;
2292
2293     default:
2294       break;
2295     }
2296
2297   return false;
2298 }
2299
2300 /* Finds addresses in *OP_P inside STMT.  */
2301
2302 static void
2303 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2304                                tree *op_p)
2305 {
2306   tree base = *op_p, step = size_zero_node;
2307   struct iv *civ;
2308   struct ifs_ivopts_data ifs_ivopts_data;
2309
2310   /* Do not play with volatile memory references.  A bit too conservative,
2311      perhaps, but safe.  */
2312   if (gimple_has_volatile_ops (stmt))
2313     goto fail;
2314
2315   /* Ignore bitfields for now.  Not really something terribly complicated
2316      to handle.  TODO.  */
2317   if (TREE_CODE (base) == BIT_FIELD_REF)
2318     goto fail;
2319
2320   base = unshare_expr (base);
2321
2322   if (TREE_CODE (base) == TARGET_MEM_REF)
2323     {
2324       tree type = build_pointer_type (TREE_TYPE (base));
2325       tree astep;
2326
2327       if (TMR_BASE (base)
2328           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2329         {
2330           civ = get_iv (data, TMR_BASE (base));
2331           if (!civ)
2332             goto fail;
2333
2334           TMR_BASE (base) = civ->base;
2335           step = civ->step;
2336         }
2337       if (TMR_INDEX2 (base)
2338           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2339         {
2340           civ = get_iv (data, TMR_INDEX2 (base));
2341           if (!civ)
2342             goto fail;
2343
2344           TMR_INDEX2 (base) = civ->base;
2345           step = civ->step;
2346         }
2347       if (TMR_INDEX (base)
2348           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2349         {
2350           civ = get_iv (data, TMR_INDEX (base));
2351           if (!civ)
2352             goto fail;
2353
2354           TMR_INDEX (base) = civ->base;
2355           astep = civ->step;
2356
2357           if (astep)
2358             {
2359               if (TMR_STEP (base))
2360                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2361
2362               step = fold_build2 (PLUS_EXPR, type, step, astep);
2363             }
2364         }
2365
2366       if (integer_zerop (step))
2367         goto fail;
2368       base = tree_mem_ref_addr (type, base);
2369     }
2370   else
2371     {
2372       ifs_ivopts_data.ivopts_data = data;
2373       ifs_ivopts_data.stmt = stmt;
2374       ifs_ivopts_data.step = size_zero_node;
2375       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2376           || integer_zerop (ifs_ivopts_data.step))
2377         goto fail;
2378       step = ifs_ivopts_data.step;
2379
2380       /* Check that the base expression is addressable.  This needs
2381          to be done after substituting bases of IVs into it.  */
2382       if (may_be_nonaddressable_p (base))
2383         goto fail;
2384
2385       /* Moreover, on strict alignment platforms, check that it is
2386          sufficiently aligned.  */
2387       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2388         goto fail;
2389
2390       base = build_fold_addr_expr (base);
2391
2392       /* Substituting bases of IVs into the base expression might
2393          have caused folding opportunities.  */
2394       if (TREE_CODE (base) == ADDR_EXPR)
2395         {
2396           tree *ref = &TREE_OPERAND (base, 0);
2397           while (handled_component_p (*ref))
2398             ref = &TREE_OPERAND (*ref, 0);
2399           if (TREE_CODE (*ref) == MEM_REF)
2400             {
2401               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2402                                       TREE_OPERAND (*ref, 0),
2403                                       TREE_OPERAND (*ref, 1));
2404               if (tem)
2405                 *ref = tem;
2406             }
2407         }
2408     }
2409
2410   civ = alloc_iv (data, base, step);
2411   /* Fail if base object of this memory reference is unknown.  */
2412   if (civ->base_object == NULL_TREE)
2413     goto fail;
2414
2415   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2416   return;
2417
2418 fail:
2419   for_each_index (op_p, idx_record_use, data);
2420 }
2421
2422 /* Finds and records invariants used in STMT.  */
2423
2424 static void
2425 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2426 {
2427   ssa_op_iter iter;
2428   use_operand_p use_p;
2429   tree op;
2430
2431   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2432     {
2433       op = USE_FROM_PTR (use_p);
2434       record_invariant (data, op, false);
2435     }
2436 }
2437
2438 /* CALL calls an internal function.  If operand *OP_P will become an
2439    address when the call is expanded, return the type of the memory
2440    being addressed, otherwise return null.  */
2441
2442 static tree
2443 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2444 {
2445   switch (gimple_call_internal_fn (call))
2446     {
2447     case IFN_MASK_LOAD:
2448       if (op_p == gimple_call_arg_ptr (call, 0))
2449         return TREE_TYPE (gimple_call_lhs (call));
2450       return NULL_TREE;
2451
2452     case IFN_MASK_STORE:
2453       if (op_p == gimple_call_arg_ptr (call, 0))
2454         return TREE_TYPE (gimple_call_arg (call, 3));
2455       return NULL_TREE;
2456
2457     default:
2458       return NULL_TREE;
2459     }
2460 }
2461
2462 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2463    Return true if the operand will become an address when STMT
2464    is expanded and record the associated address use if so.  */
2465
2466 static bool
2467 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2468                        struct iv *iv)
2469 {
2470   /* Fail if base object of this memory reference is unknown.  */
2471   if (iv->base_object == NULL_TREE)
2472     return false;
2473
2474   tree mem_type = NULL_TREE;
2475   if (gcall *call = dyn_cast <gcall *> (stmt))
2476     if (gimple_call_internal_p (call))
2477       mem_type = get_mem_type_for_internal_fn (call, op_p);
2478   if (mem_type)
2479     {
2480       iv = alloc_iv (data, iv->base, iv->step);
2481       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2482       return true;
2483     }
2484   return false;
2485 }
2486
2487 /* Finds interesting uses of induction variables in the statement STMT.  */
2488
2489 static void
2490 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2491 {
2492   struct iv *iv;
2493   tree op, *lhs, *rhs;
2494   ssa_op_iter iter;
2495   use_operand_p use_p;
2496   enum tree_code code;
2497
2498   find_invariants_stmt (data, stmt);
2499
2500   if (gimple_code (stmt) == GIMPLE_COND)
2501     {
2502       find_interesting_uses_cond (data, stmt);
2503       return;
2504     }
2505
2506   if (is_gimple_assign (stmt))
2507     {
2508       lhs = gimple_assign_lhs_ptr (stmt);
2509       rhs = gimple_assign_rhs1_ptr (stmt);
2510
2511       if (TREE_CODE (*lhs) == SSA_NAME)
2512         {
2513           /* If the statement defines an induction variable, the uses are not
2514              interesting by themselves.  */
2515
2516           iv = get_iv (data, *lhs);
2517
2518           if (iv && !integer_zerop (iv->step))
2519             return;
2520         }
2521
2522       code = gimple_assign_rhs_code (stmt);
2523       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2524           && (REFERENCE_CLASS_P (*rhs)
2525               || is_gimple_val (*rhs)))
2526         {
2527           if (REFERENCE_CLASS_P (*rhs))
2528             find_interesting_uses_address (data, stmt, rhs);
2529           else
2530             find_interesting_uses_op (data, *rhs);
2531
2532           if (REFERENCE_CLASS_P (*lhs))
2533             find_interesting_uses_address (data, stmt, lhs);
2534           return;
2535         }
2536       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2537         {
2538           find_interesting_uses_cond (data, stmt);
2539           return;
2540         }
2541
2542       /* TODO -- we should also handle address uses of type
2543
2544          memory = call (whatever);
2545
2546          and
2547
2548          call (memory).  */
2549     }
2550
2551   if (gimple_code (stmt) == GIMPLE_PHI
2552       && gimple_bb (stmt) == data->current_loop->header)
2553     {
2554       iv = get_iv (data, PHI_RESULT (stmt));
2555
2556       if (iv && !integer_zerop (iv->step))
2557         return;
2558     }
2559
2560   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2561     {
2562       op = USE_FROM_PTR (use_p);
2563
2564       if (TREE_CODE (op) != SSA_NAME)
2565         continue;
2566
2567       iv = get_iv (data, op);
2568       if (!iv)
2569         continue;
2570
2571       if (!find_address_like_use (data, stmt, use_p->use, iv))
2572         find_interesting_uses_op (data, op);
2573     }
2574 }
2575
2576 /* Finds interesting uses of induction variables outside of loops
2577    on loop exit edge EXIT.  */
2578
2579 static void
2580 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2581 {
2582   gphi *phi;
2583   gphi_iterator psi;
2584   tree def;
2585
2586   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2587     {
2588       phi = psi.phi ();
2589       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2590       if (!virtual_operand_p (def))
2591         find_interesting_uses_op (data, def);
2592     }
2593 }
2594
2595 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2596    mode for memory reference represented by USE.  */
2597
2598 static GTY (()) vec<rtx, va_gc> *addr_list;
2599
2600 static bool
2601 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2602 {
2603   rtx reg, addr;
2604   unsigned list_index;
2605   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2606   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2607
2608   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2609   if (list_index >= vec_safe_length (addr_list))
2610     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2611
2612   addr = (*addr_list)[list_index];
2613   if (!addr)
2614     {
2615       addr_mode = targetm.addr_space.address_mode (as);
2616       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2617       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2618       (*addr_list)[list_index] = addr;
2619     }
2620   else
2621     addr_mode = GET_MODE (addr);
2622
2623   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2624   return (memory_address_addr_space_p (mem_mode, addr, as));
2625 }
2626
2627 /* Comparison function to sort group in ascending order of addr_offset.  */
2628
2629 static int
2630 group_compare_offset (const void *a, const void *b)
2631 {
2632   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2633   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2634
2635   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2636 }
2637
2638 /* Check if small groups should be split.  Return true if no group
2639    contains more than two uses with distinct addr_offsets.  Return
2640    false otherwise.  We want to split such groups because:
2641
2642      1) Small groups don't have much benefit and may interfer with
2643         general candidate selection.
2644      2) Size for problem with only small groups is usually small and
2645         general algorithm can handle it well.
2646
2647    TODO -- Above claim may not hold when we want to merge memory
2648    accesses with conseuctive addresses.  */
2649
2650 static bool
2651 split_small_address_groups_p (struct ivopts_data *data)
2652 {
2653   unsigned int i, j, distinct = 1;
2654   struct iv_use *pre;
2655   struct iv_group *group;
2656
2657   for (i = 0; i < data->vgroups.length (); i++)
2658     {
2659       group = data->vgroups[i];
2660       if (group->vuses.length () == 1)
2661         continue;
2662
2663       gcc_assert (address_p (group->type));
2664       if (group->vuses.length () == 2)
2665         {
2666           if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2667                                       group->vuses[1]->addr_offset) > 0)
2668             std::swap (group->vuses[0], group->vuses[1]);
2669         }
2670       else
2671         group->vuses.qsort (group_compare_offset);
2672
2673       if (distinct > 2)
2674         continue;
2675
2676       distinct = 1;
2677       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2678         {
2679           if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2680             {
2681               pre = group->vuses[j];
2682               distinct++;
2683             }
2684
2685           if (distinct > 2)
2686             break;
2687         }
2688     }
2689
2690   return (distinct <= 2);
2691 }
2692
2693 /* For each group of address type uses, this function further groups
2694    these uses according to the maximum offset supported by target's
2695    [base + offset] addressing mode.  */
2696
2697 static void
2698 split_address_groups (struct ivopts_data *data)
2699 {
2700   unsigned int i, j;
2701   /* Always split group.  */
2702   bool split_p = split_small_address_groups_p (data);
2703
2704   for (i = 0; i < data->vgroups.length (); i++)
2705     {
2706       struct iv_group *new_group = NULL;
2707       struct iv_group *group = data->vgroups[i];
2708       struct iv_use *use = group->vuses[0];
2709
2710       use->id = 0;
2711       use->group_id = group->id;
2712       if (group->vuses.length () == 1)
2713         continue;
2714
2715       gcc_assert (address_p (use->type));
2716
2717       for (j = 1; j < group->vuses.length ();)
2718         {
2719           struct iv_use *next = group->vuses[j];
2720           poly_int64 offset = next->addr_offset - use->addr_offset;
2721
2722           /* Split group if aksed to, or the offset against the first
2723              use can't fit in offset part of addressing mode.  IV uses
2724              having the same offset are still kept in one group.  */
2725           if (maybe_ne (offset, 0)
2726               && (split_p || !addr_offset_valid_p (use, offset)))
2727             {
2728               if (!new_group)
2729                 new_group = record_group (data, group->type);
2730               group->vuses.ordered_remove (j);
2731               new_group->vuses.safe_push (next);
2732               continue;
2733             }
2734
2735           next->id = j;
2736           next->group_id = group->id;
2737           j++;
2738         }
2739     }
2740 }
2741
2742 /* Finds uses of the induction variables that are interesting.  */
2743
2744 static void
2745 find_interesting_uses (struct ivopts_data *data)
2746 {
2747   basic_block bb;
2748   gimple_stmt_iterator bsi;
2749   basic_block *body = get_loop_body (data->current_loop);
2750   unsigned i;
2751   edge e;
2752
2753   for (i = 0; i < data->current_loop->num_nodes; i++)
2754     {
2755       edge_iterator ei;
2756       bb = body[i];
2757
2758       FOR_EACH_EDGE (e, ei, bb->succs)
2759         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2760             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2761           find_interesting_uses_outside (data, e);
2762
2763       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2764         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2765       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2766         if (!is_gimple_debug (gsi_stmt (bsi)))
2767           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2768     }
2769   free (body);
2770
2771   split_address_groups (data);
2772
2773   if (dump_file && (dump_flags & TDF_DETAILS))
2774     {
2775       fprintf (dump_file, "\n<IV Groups>:\n");
2776       dump_groups (dump_file, data);
2777       fprintf (dump_file, "\n");
2778     }
2779 }
2780
2781 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2782    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2783    we are at the top-level of the processed address.  */
2784
2785 static tree
2786 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2787                 poly_int64 *offset)
2788 {
2789   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2790   enum tree_code code;
2791   tree type, orig_type = TREE_TYPE (expr);
2792   poly_int64 off0, off1;
2793   HOST_WIDE_INT st;
2794   tree orig_expr = expr;
2795
2796   STRIP_NOPS (expr);
2797
2798   type = TREE_TYPE (expr);
2799   code = TREE_CODE (expr);
2800   *offset = 0;
2801
2802   switch (code)
2803     {
2804     case POINTER_PLUS_EXPR:
2805     case PLUS_EXPR:
2806     case MINUS_EXPR:
2807       op0 = TREE_OPERAND (expr, 0);
2808       op1 = TREE_OPERAND (expr, 1);
2809
2810       op0 = strip_offset_1 (op0, false, false, &off0);
2811       op1 = strip_offset_1 (op1, false, false, &off1);
2812
2813       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2814       if (op0 == TREE_OPERAND (expr, 0)
2815           && op1 == TREE_OPERAND (expr, 1))
2816         return orig_expr;
2817
2818       if (integer_zerop (op1))
2819         expr = op0;
2820       else if (integer_zerop (op0))
2821         {
2822           if (code == MINUS_EXPR)
2823             expr = fold_build1 (NEGATE_EXPR, type, op1);
2824           else
2825             expr = op1;
2826         }
2827       else
2828         expr = fold_build2 (code, type, op0, op1);
2829
2830       return fold_convert (orig_type, expr);
2831
2832     case MULT_EXPR:
2833       op1 = TREE_OPERAND (expr, 1);
2834       if (!cst_and_fits_in_hwi (op1))
2835         return orig_expr;
2836
2837       op0 = TREE_OPERAND (expr, 0);
2838       op0 = strip_offset_1 (op0, false, false, &off0);
2839       if (op0 == TREE_OPERAND (expr, 0))
2840         return orig_expr;
2841
2842       *offset = off0 * int_cst_value (op1);
2843       if (integer_zerop (op0))
2844         expr = op0;
2845       else
2846         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2847
2848       return fold_convert (orig_type, expr);
2849
2850     case ARRAY_REF:
2851     case ARRAY_RANGE_REF:
2852       if (!inside_addr)
2853         return orig_expr;
2854
2855       step = array_ref_element_size (expr);
2856       if (!cst_and_fits_in_hwi (step))
2857         break;
2858
2859       st = int_cst_value (step);
2860       op1 = TREE_OPERAND (expr, 1);
2861       op1 = strip_offset_1 (op1, false, false, &off1);
2862       *offset = off1 * st;
2863
2864       if (top_compref
2865           && integer_zerop (op1))
2866         {
2867           /* Strip the component reference completely.  */
2868           op0 = TREE_OPERAND (expr, 0);
2869           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2870           *offset += off0;
2871           return op0;
2872         }
2873       break;
2874
2875     case COMPONENT_REF:
2876       {
2877         tree field;
2878
2879         if (!inside_addr)
2880           return orig_expr;
2881
2882         tmp = component_ref_field_offset (expr);
2883         field = TREE_OPERAND (expr, 1);
2884         if (top_compref
2885             && cst_and_fits_in_hwi (tmp)
2886             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2887           {
2888             HOST_WIDE_INT boffset, abs_off;
2889
2890             /* Strip the component reference completely.  */
2891             op0 = TREE_OPERAND (expr, 0);
2892             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2893             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2894             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2895             if (boffset < 0)
2896               abs_off = -abs_off;
2897
2898             *offset = off0 + int_cst_value (tmp) + abs_off;
2899             return op0;
2900           }
2901       }
2902       break;
2903
2904     case ADDR_EXPR:
2905       op0 = TREE_OPERAND (expr, 0);
2906       op0 = strip_offset_1 (op0, true, true, &off0);
2907       *offset += off0;
2908
2909       if (op0 == TREE_OPERAND (expr, 0))
2910         return orig_expr;
2911
2912       expr = build_fold_addr_expr (op0);
2913       return fold_convert (orig_type, expr);
2914
2915     case MEM_REF:
2916       /* ???  Offset operand?  */
2917       inside_addr = false;
2918       break;
2919
2920     default:
2921       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2922         return build_int_cst (orig_type, 0);
2923       return orig_expr;
2924     }
2925
2926   /* Default handling of expressions for that we want to recurse into
2927      the first operand.  */
2928   op0 = TREE_OPERAND (expr, 0);
2929   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2930   *offset += off0;
2931
2932   if (op0 == TREE_OPERAND (expr, 0)
2933       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2934     return orig_expr;
2935
2936   expr = copy_node (expr);
2937   TREE_OPERAND (expr, 0) = op0;
2938   if (op1)
2939     TREE_OPERAND (expr, 1) = op1;
2940
2941   /* Inside address, we might strip the top level component references,
2942      thus changing type of the expression.  Handling of ADDR_EXPR
2943      will fix that.  */
2944   expr = fold_convert (orig_type, expr);
2945
2946   return expr;
2947 }
2948
2949 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2950
2951 tree
2952 strip_offset (tree expr, poly_uint64_pod *offset)
2953 {
2954   poly_int64 off;
2955   tree core = strip_offset_1 (expr, false, false, &off);
2956   *offset = off;
2957   return core;
2958 }
2959
2960 /* Returns variant of TYPE that can be used as base for different uses.
2961    We return unsigned type with the same precision, which avoids problems
2962    with overflows.  */
2963
2964 static tree
2965 generic_type_for (tree type)
2966 {
2967   if (POINTER_TYPE_P (type))
2968     return unsigned_type_for (type);
2969
2970   if (TYPE_UNSIGNED (type))
2971     return type;
2972
2973   return unsigned_type_for (type);
2974 }
2975
2976 /* Private data for walk_tree.  */
2977
2978 struct walk_tree_data
2979 {
2980   bitmap *inv_vars;
2981   struct ivopts_data *idata;
2982 };
2983
2984 /* Callback function for walk_tree, it records invariants and symbol
2985    reference in *EXPR_P.  DATA is the structure storing result info.  */
2986
2987 static tree
2988 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2989 {
2990   tree op = *expr_p;
2991   struct version_info *info;
2992   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2993
2994   if (TREE_CODE (op) != SSA_NAME)
2995     return NULL_TREE;
2996
2997   info = name_info (wdata->idata, op);
2998   /* Because we expand simple operations when finding IVs, loop invariant
2999      variable that isn't referred by the original loop could be used now.
3000      Record such invariant variables here.  */
3001   if (!info->iv)
3002     {
3003       struct ivopts_data *idata = wdata->idata;
3004       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3005
3006       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3007         {
3008           set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
3009           record_invariant (idata, op, false);
3010         }
3011     }
3012   if (!info->inv_id || info->has_nonlin_use)
3013     return NULL_TREE;
3014
3015   if (!*wdata->inv_vars)
3016     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3017   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3018
3019   return NULL_TREE;
3020 }
3021
3022 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3023    store it.  */
3024
3025 static inline void
3026 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3027 {
3028   struct walk_tree_data wdata;
3029
3030   if (!inv_vars)
3031     return;
3032
3033   wdata.idata = data;
3034   wdata.inv_vars = inv_vars;
3035   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3036 }
3037
3038 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3039    will be recorded if it doesn't exist yet.  Given below two exprs:
3040      inv_expr + cst1, inv_expr + cst2
3041    It's hard to make decision whether constant part should be stripped
3042    or not.  We choose to not strip based on below facts:
3043      1) We need to count ADD cost for constant part if it's stripped,
3044         which isn't always trivial where this functions is called.
3045      2) Stripping constant away may be conflict with following loop
3046         invariant hoisting pass.
3047      3) Not stripping constant away results in more invariant exprs,
3048         which usually leads to decision preferring lower reg pressure.  */
3049
3050 static iv_inv_expr_ent *
3051 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3052 {
3053   STRIP_NOPS (inv_expr);
3054
3055   if (poly_int_tree_p (inv_expr)
3056       || TREE_CODE (inv_expr) == SSA_NAME)
3057     return NULL;
3058
3059   /* Don't strip constant part away as we used to.  */
3060
3061   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3062   struct iv_inv_expr_ent ent;
3063   ent.expr = inv_expr;
3064   ent.hash = iterative_hash_expr (inv_expr, 0);
3065   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3066
3067   if (!*slot)
3068     {
3069       *slot = XNEW (struct iv_inv_expr_ent);
3070       (*slot)->expr = inv_expr;
3071       (*slot)->hash = ent.hash;
3072       (*slot)->id = ++data->max_inv_expr_id;
3073     }
3074
3075   return *slot;
3076 }
3077
3078 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3079    position to POS.  If USE is not NULL, the candidate is set as related to
3080    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3081    replacement of the final value of the iv by a direct computation.  */
3082
3083 static struct iv_cand *
3084 add_candidate_1 (struct ivopts_data *data,
3085                  tree base, tree step, bool important, enum iv_position pos,
3086                  struct iv_use *use, gimple *incremented_at,
3087                  struct iv *orig_iv = NULL)
3088 {
3089   unsigned i;
3090   struct iv_cand *cand = NULL;
3091   tree type, orig_type;
3092
3093   gcc_assert (base && step);
3094
3095   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3096      live, but the ivopts code may replace a real pointer with one
3097      pointing before or after the memory block that is then adjusted
3098      into the memory block during the loop.  FIXME: It would likely be
3099      better to actually force the pointer live and still use ivopts;
3100      for example, it would be enough to write the pointer into memory
3101      and keep it there until after the loop.  */
3102   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3103     return NULL;
3104
3105   /* For non-original variables, make sure their values are computed in a type
3106      that does not invoke undefined behavior on overflows (since in general,
3107      we cannot prove that these induction variables are non-wrapping).  */
3108   if (pos != IP_ORIGINAL)
3109     {
3110       orig_type = TREE_TYPE (base);
3111       type = generic_type_for (orig_type);
3112       if (type != orig_type)
3113         {
3114           base = fold_convert (type, base);
3115           step = fold_convert (type, step);
3116         }
3117     }
3118
3119   for (i = 0; i < data->vcands.length (); i++)
3120     {
3121       cand = data->vcands[i];
3122
3123       if (cand->pos != pos)
3124         continue;
3125
3126       if (cand->incremented_at != incremented_at
3127           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3128               && cand->ainc_use != use))
3129         continue;
3130
3131       if (operand_equal_p (base, cand->iv->base, 0)
3132           && operand_equal_p (step, cand->iv->step, 0)
3133           && (TYPE_PRECISION (TREE_TYPE (base))
3134               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3135         break;
3136     }
3137
3138   if (i == data->vcands.length ())
3139     {
3140       cand = XCNEW (struct iv_cand);
3141       cand->id = i;
3142       cand->iv = alloc_iv (data, base, step);
3143       cand->pos = pos;
3144       if (pos != IP_ORIGINAL)
3145         {
3146           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3147           cand->var_after = cand->var_before;
3148         }
3149       cand->important = important;
3150       cand->incremented_at = incremented_at;
3151       data->vcands.safe_push (cand);
3152
3153       if (!poly_int_tree_p (step))
3154         {
3155           find_inv_vars (data, &step, &cand->inv_vars);
3156
3157           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3158           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3159           if (inv_expr != NULL)
3160             {
3161               cand->inv_exprs = cand->inv_vars;
3162               cand->inv_vars = NULL;
3163               if (cand->inv_exprs)
3164                 bitmap_clear (cand->inv_exprs);
3165               else
3166                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3167
3168               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3169             }
3170         }
3171
3172       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3173         cand->ainc_use = use;
3174       else
3175         cand->ainc_use = NULL;
3176
3177       cand->orig_iv = orig_iv;
3178       if (dump_file && (dump_flags & TDF_DETAILS))
3179         dump_cand (dump_file, cand);
3180     }
3181
3182   cand->important |= important;
3183
3184   /* Relate candidate to the group for which it is added.  */
3185   if (use)
3186     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3187
3188   return cand;
3189 }
3190
3191 /* Returns true if incrementing the induction variable at the end of the LOOP
3192    is allowed.
3193
3194    The purpose is to avoid splitting latch edge with a biv increment, thus
3195    creating a jump, possibly confusing other optimization passes and leaving
3196    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3197    available (so we do not have a better alternative), or if the latch edge
3198    is already nonempty.  */
3199
3200 static bool
3201 allow_ip_end_pos_p (struct loop *loop)
3202 {
3203   if (!ip_normal_pos (loop))
3204     return true;
3205
3206   if (!empty_block_p (ip_end_pos (loop)))
3207     return true;
3208
3209   return false;
3210 }
3211
3212 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3213    Important field is set to IMPORTANT.  */
3214
3215 static void
3216 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3217                         bool important, struct iv_use *use)
3218 {
3219   basic_block use_bb = gimple_bb (use->stmt);
3220   machine_mode mem_mode;
3221   unsigned HOST_WIDE_INT cstepi;
3222
3223   /* If we insert the increment in any position other than the standard
3224      ones, we must ensure that it is incremented once per iteration.
3225      It must not be in an inner nested loop, or one side of an if
3226      statement.  */
3227   if (use_bb->loop_father != data->current_loop
3228       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3229       || stmt_can_throw_internal (cfun, use->stmt)
3230       || !cst_and_fits_in_hwi (step))
3231     return;
3232
3233   cstepi = int_cst_value (step);
3234
3235   mem_mode = TYPE_MODE (use->mem_type);
3236   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3237         || USE_STORE_PRE_INCREMENT (mem_mode))
3238        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3239       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3240            || USE_STORE_PRE_DECREMENT (mem_mode))
3241           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3242     {
3243       enum tree_code code = MINUS_EXPR;
3244       tree new_base;
3245       tree new_step = step;
3246
3247       if (POINTER_TYPE_P (TREE_TYPE (base)))
3248         {
3249           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3250           code = POINTER_PLUS_EXPR;
3251         }
3252       else
3253         new_step = fold_convert (TREE_TYPE (base), new_step);
3254       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3255       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3256                        use->stmt);
3257     }
3258   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3259         || USE_STORE_POST_INCREMENT (mem_mode))
3260        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3261       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3262            || USE_STORE_POST_DECREMENT (mem_mode))
3263           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3264     {
3265       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3266                        use->stmt);
3267     }
3268 }
3269
3270 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3271    position to POS.  If USE is not NULL, the candidate is set as related to
3272    it.  The candidate computation is scheduled before exit condition and at
3273    the end of loop.  */
3274
3275 static void
3276 add_candidate (struct ivopts_data *data,
3277                tree base, tree step, bool important, struct iv_use *use,
3278                struct iv *orig_iv = NULL)
3279 {
3280   if (ip_normal_pos (data->current_loop))
3281     add_candidate_1 (data, base, step, important,
3282                      IP_NORMAL, use, NULL, orig_iv);
3283   if (ip_end_pos (data->current_loop)
3284       && allow_ip_end_pos_p (data->current_loop))
3285     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3286 }
3287
3288 /* Adds standard iv candidates.  */
3289
3290 static void
3291 add_standard_iv_candidates (struct ivopts_data *data)
3292 {
3293   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3294
3295   /* The same for a double-integer type if it is still fast enough.  */
3296   if (TYPE_PRECISION
3297         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3298       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3299     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3300                    build_int_cst (long_integer_type_node, 1), true, NULL);
3301
3302   /* The same for a double-integer type if it is still fast enough.  */
3303   if (TYPE_PRECISION
3304         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3305       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3306     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3307                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3308 }
3309
3310
3311 /* Adds candidates bases on the old induction variable IV.  */
3312
3313 static void
3314 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3315 {
3316   gimple *phi;
3317   tree def;
3318   struct iv_cand *cand;
3319
3320   /* Check if this biv is used in address type use.  */
3321   if (iv->no_overflow  && iv->have_address_use
3322       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3323       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3324     {
3325       tree base = fold_convert (sizetype, iv->base);
3326       tree step = fold_convert (sizetype, iv->step);
3327
3328       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3329       add_candidate (data, base, step, true, NULL, iv);
3330       /* Add iv cand of the original type only if it has nonlinear use.  */
3331       if (iv->nonlin_use)
3332         add_candidate (data, iv->base, iv->step, true, NULL);
3333     }
3334   else
3335     add_candidate (data, iv->base, iv->step, true, NULL);
3336
3337   /* The same, but with initial value zero.  */
3338   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3339     add_candidate (data, size_int (0), iv->step, true, NULL);
3340   else
3341     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3342                    iv->step, true, NULL);
3343
3344   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3345   if (gimple_code (phi) == GIMPLE_PHI)
3346     {
3347       /* Additionally record the possibility of leaving the original iv
3348          untouched.  */
3349       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3350       /* Don't add candidate if it's from another PHI node because
3351          it's an affine iv appearing in the form of PEELED_CHREC.  */
3352       phi = SSA_NAME_DEF_STMT (def);
3353       if (gimple_code (phi) != GIMPLE_PHI)
3354         {
3355           cand = add_candidate_1 (data,
3356                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3357                                   SSA_NAME_DEF_STMT (def));
3358           if (cand)
3359             {
3360               cand->var_before = iv->ssa_name;
3361               cand->var_after = def;
3362             }
3363         }
3364       else
3365         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3366     }
3367 }
3368
3369 /* Adds candidates based on the old induction variables.  */
3370
3371 static void
3372 add_iv_candidate_for_bivs (struct ivopts_data *data)
3373 {
3374   unsigned i;
3375   struct iv *iv;
3376   bitmap_iterator bi;
3377
3378   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3379     {
3380       iv = ver_info (data, i)->iv;
3381       if (iv && iv->biv_p && !integer_zerop (iv->step))
3382         add_iv_candidate_for_biv (data, iv);
3383     }
3384 }
3385
3386 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3387
3388 static void
3389 record_common_cand (struct ivopts_data *data, tree base,
3390                     tree step, struct iv_use *use)
3391 {
3392   struct iv_common_cand ent;
3393   struct iv_common_cand **slot;
3394
3395   ent.base = base;
3396   ent.step = step;
3397   ent.hash = iterative_hash_expr (base, 0);
3398   ent.hash = iterative_hash_expr (step, ent.hash);
3399
3400   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3401   if (*slot == NULL)
3402     {
3403       *slot = new iv_common_cand ();
3404       (*slot)->base = base;
3405       (*slot)->step = step;
3406       (*slot)->uses.create (8);
3407       (*slot)->hash = ent.hash;
3408       data->iv_common_cands.safe_push ((*slot));
3409     }
3410
3411   gcc_assert (use != NULL);
3412   (*slot)->uses.safe_push (use);
3413   return;
3414 }
3415
3416 /* Comparison function used to sort common candidates.  */
3417
3418 static int
3419 common_cand_cmp (const void *p1, const void *p2)
3420 {
3421   unsigned n1, n2;
3422   const struct iv_common_cand *const *const ccand1
3423     = (const struct iv_common_cand *const *)p1;
3424   const struct iv_common_cand *const *const ccand2
3425     = (const struct iv_common_cand *const *)p2;
3426
3427   n1 = (*ccand1)->uses.length ();
3428   n2 = (*ccand2)->uses.length ();
3429   return n2 - n1;
3430 }
3431
3432 /* Adds IV candidates based on common candidated recorded.  */
3433
3434 static void
3435 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3436 {
3437   unsigned i, j;
3438   struct iv_cand *cand_1, *cand_2;
3439
3440   data->iv_common_cands.qsort (common_cand_cmp);
3441   for (i = 0; i < data->iv_common_cands.length (); i++)
3442     {
3443       struct iv_common_cand *ptr = data->iv_common_cands[i];
3444
3445       /* Only add IV candidate if it's derived from multiple uses.  */
3446       if (ptr->uses.length () <= 1)
3447         break;
3448
3449       cand_1 = NULL;
3450       cand_2 = NULL;
3451       if (ip_normal_pos (data->current_loop))
3452         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3453                                   false, IP_NORMAL, NULL, NULL);
3454
3455       if (ip_end_pos (data->current_loop)
3456           && allow_ip_end_pos_p (data->current_loop))
3457         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3458                                   false, IP_END, NULL, NULL);
3459
3460       /* Bind deriving uses and the new candidates.  */
3461       for (j = 0; j < ptr->uses.length (); j++)
3462         {
3463           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3464           if (cand_1)
3465             bitmap_set_bit (group->related_cands, cand_1->id);
3466           if (cand_2)
3467             bitmap_set_bit (group->related_cands, cand_2->id);
3468         }
3469     }
3470
3471   /* Release data since it is useless from this point.  */
3472   data->iv_common_cand_tab->empty ();
3473   data->iv_common_cands.truncate (0);
3474 }
3475
3476 /* Adds candidates based on the value of USE's iv.  */
3477
3478 static void
3479 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3480 {
3481   poly_uint64 offset;
3482   tree base;
3483   tree basetype;
3484   struct iv *iv = use->iv;
3485
3486   add_candidate (data, iv->base, iv->step, false, use);
3487
3488   /* Record common candidate for use in case it can be shared by others.  */
3489   record_common_cand (data, iv->base, iv->step, use);
3490
3491   /* Record common candidate with initial value zero.  */
3492   basetype = TREE_TYPE (iv->base);
3493   if (POINTER_TYPE_P (basetype))
3494     basetype = sizetype;
3495   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3496
3497   /* Record common candidate with constant offset stripped in base.
3498      Like the use itself, we also add candidate directly for it.  */
3499   base = strip_offset (iv->base, &offset);
3500   if (maybe_ne (offset, 0U) || base != iv->base)
3501     {
3502       record_common_cand (data, base, iv->step, use);
3503       add_candidate (data, base, iv->step, false, use);
3504     }
3505
3506   /* Record common candidate with base_object removed in base.  */
3507   base = iv->base;
3508   STRIP_NOPS (base);
3509   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3510     {
3511       tree step = iv->step;
3512
3513       STRIP_NOPS (step);
3514       base = TREE_OPERAND (base, 1);
3515       step = fold_convert (sizetype, step);
3516       record_common_cand (data, base, step, use);
3517       /* Also record common candidate with offset stripped.  */
3518       base = strip_offset (base, &offset);
3519       if (maybe_ne (offset, 0U))
3520         record_common_cand (data, base, step, use);
3521     }
3522
3523   /* At last, add auto-incremental candidates.  Make such variables
3524      important since other iv uses with same base object may be based
3525      on it.  */
3526   if (use != NULL && address_p (use->type))
3527     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3528 }
3529
3530 /* Adds candidates based on the uses.  */
3531
3532 static void
3533 add_iv_candidate_for_groups (struct ivopts_data *data)
3534 {
3535   unsigned i;
3536
3537   /* Only add candidate for the first use in group.  */
3538   for (i = 0; i < data->vgroups.length (); i++)
3539     {
3540       struct iv_group *group = data->vgroups[i];
3541
3542       gcc_assert (group->vuses[0] != NULL);
3543       add_iv_candidate_for_use (data, group->vuses[0]);
3544     }
3545   add_iv_candidate_derived_from_uses (data);
3546 }
3547
3548 /* Record important candidates and add them to related_cands bitmaps.  */
3549
3550 static void
3551 record_important_candidates (struct ivopts_data *data)
3552 {
3553   unsigned i;
3554   struct iv_group *group;
3555
3556   for (i = 0; i < data->vcands.length (); i++)
3557     {
3558       struct iv_cand *cand = data->vcands[i];
3559
3560       if (cand->important)
3561         bitmap_set_bit (data->important_candidates, i);
3562     }
3563
3564   data->consider_all_candidates = (data->vcands.length ()
3565                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3566
3567   /* Add important candidates to groups' related_cands bitmaps.  */
3568   for (i = 0; i < data->vgroups.length (); i++)
3569     {
3570       group = data->vgroups[i];
3571       bitmap_ior_into (group->related_cands, data->important_candidates);
3572     }
3573 }
3574
3575 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3576    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3577    we allocate a simple list to every use.  */
3578
3579 static void
3580 alloc_use_cost_map (struct ivopts_data *data)
3581 {
3582   unsigned i, size, s;
3583
3584   for (i = 0; i < data->vgroups.length (); i++)
3585     {
3586       struct iv_group *group = data->vgroups[i];
3587
3588       if (data->consider_all_candidates)
3589         size = data->vcands.length ();
3590       else
3591         {
3592           s = bitmap_count_bits (group->related_cands);
3593
3594           /* Round up to the power of two, so that moduling by it is fast.  */
3595           size = s ? (1 << ceil_log2 (s)) : 1;
3596         }
3597
3598       group->n_map_members = size;
3599       group->cost_map = XCNEWVEC (struct cost_pair, size);
3600     }
3601 }
3602
3603 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3604    on invariants INV_VARS and that the value used in expressing it is
3605    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3606
3607 static void
3608 set_group_iv_cost (struct ivopts_data *data,
3609                    struct iv_group *group, struct iv_cand *cand,
3610                    comp_cost cost, bitmap inv_vars, tree value,
3611                    enum tree_code comp, bitmap inv_exprs)
3612 {
3613   unsigned i, s;
3614
3615   if (cost.infinite_cost_p ())
3616     {
3617       BITMAP_FREE (inv_vars);
3618       BITMAP_FREE (inv_exprs);
3619       return;
3620     }
3621
3622   if (data->consider_all_candidates)
3623     {
3624       group->cost_map[cand->id].cand = cand;
3625       group->cost_map[cand->id].cost = cost;
3626       group->cost_map[cand->id].inv_vars = inv_vars;
3627       group->cost_map[cand->id].inv_exprs = inv_exprs;
3628       group->cost_map[cand->id].value = value;
3629       group->cost_map[cand->id].comp = comp;
3630       return;
3631     }
3632
3633   /* n_map_members is a power of two, so this computes modulo.  */
3634   s = cand->id & (group->n_map_members - 1);
3635   for (i = s; i < group->n_map_members; i++)
3636     if (!group->cost_map[i].cand)
3637       goto found;
3638   for (i = 0; i < s; i++)
3639     if (!group->cost_map[i].cand)
3640       goto found;
3641
3642   gcc_unreachable ();
3643
3644 found:
3645   group->cost_map[i].cand = cand;
3646   group->cost_map[i].cost = cost;
3647   group->cost_map[i].inv_vars = inv_vars;
3648   group->cost_map[i].inv_exprs = inv_exprs;
3649   group->cost_map[i].value = value;
3650   group->cost_map[i].comp = comp;
3651 }
3652
3653 /* Gets cost of (GROUP, CAND) pair.  */
3654
3655 static struct cost_pair *
3656 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3657                    struct iv_cand *cand)
3658 {
3659   unsigned i, s;
3660   struct cost_pair *ret;
3661
3662   if (!cand)
3663     return NULL;
3664
3665   if (data->consider_all_candidates)
3666     {
3667       ret = group->cost_map + cand->id;
3668       if (!ret->cand)
3669         return NULL;
3670
3671       return ret;
3672     }
3673
3674   /* n_map_members is a power of two, so this computes modulo.  */
3675   s = cand->id & (group->n_map_members - 1);
3676   for (i = s; i < group->n_map_members; i++)
3677     if (group->cost_map[i].cand == cand)
3678       return group->cost_map + i;
3679     else if (group->cost_map[i].cand == NULL)
3680       return NULL;
3681   for (i = 0; i < s; i++)
3682     if (group->cost_map[i].cand == cand)
3683       return group->cost_map + i;
3684     else if (group->cost_map[i].cand == NULL)
3685       return NULL;
3686
3687   return NULL;
3688 }
3689
3690 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3691 static rtx
3692 produce_memory_decl_rtl (tree obj, int *regno)
3693 {
3694   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3695   machine_mode address_mode = targetm.addr_space.address_mode (as);
3696   rtx x;
3697
3698   gcc_assert (obj);
3699   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3700     {
3701       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3702       x = gen_rtx_SYMBOL_REF (address_mode, name);
3703       SET_SYMBOL_REF_DECL (x, obj);
3704       x = gen_rtx_MEM (DECL_MODE (obj), x);
3705       set_mem_addr_space (x, as);
3706       targetm.encode_section_info (obj, x, true);
3707     }
3708   else
3709     {
3710       x = gen_raw_REG (address_mode, (*regno)++);
3711       x = gen_rtx_MEM (DECL_MODE (obj), x);
3712       set_mem_addr_space (x, as);
3713     }
3714
3715   return x;
3716 }
3717
3718 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3719    walk_tree.  DATA contains the actual fake register number.  */
3720
3721 static tree
3722 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3723 {
3724   tree obj = NULL_TREE;
3725   rtx x = NULL_RTX;
3726   int *regno = (int *) data;
3727
3728   switch (TREE_CODE (*expr_p))
3729     {
3730     case ADDR_EXPR:
3731       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3732            handled_component_p (*expr_p);
3733            expr_p = &TREE_OPERAND (*expr_p, 0))
3734         continue;
3735       obj = *expr_p;
3736       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3737         x = produce_memory_decl_rtl (obj, regno);
3738       break;
3739
3740     case SSA_NAME:
3741       *ws = 0;
3742       obj = SSA_NAME_VAR (*expr_p);
3743       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3744       if (!obj)
3745         return NULL_TREE;
3746       if (!DECL_RTL_SET_P (obj))
3747         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3748       break;
3749
3750     case VAR_DECL:
3751     case PARM_DECL:
3752     case RESULT_DECL:
3753       *ws = 0;
3754       obj = *expr_p;
3755
3756       if (DECL_RTL_SET_P (obj))
3757         break;
3758
3759       if (DECL_MODE (obj) == BLKmode)
3760         x = produce_memory_decl_rtl (obj, regno);
3761       else
3762         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3763
3764       break;
3765
3766     default:
3767       break;
3768     }
3769
3770   if (x)
3771     {
3772       decl_rtl_to_reset.safe_push (obj);
3773       SET_DECL_RTL (obj, x);
3774     }
3775
3776   return NULL_TREE;
3777 }
3778
3779 /* Determines cost of the computation of EXPR.  */
3780
3781 static unsigned
3782 computation_cost (tree expr, bool speed)
3783 {
3784   rtx_insn *seq;
3785   rtx rslt;
3786   tree type = TREE_TYPE (expr);
3787   unsigned cost;
3788   /* Avoid using hard regs in ways which may be unsupported.  */
3789   int regno = LAST_VIRTUAL_REGISTER + 1;
3790   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3791   enum node_frequency real_frequency = node->frequency;
3792
3793   node->frequency = NODE_FREQUENCY_NORMAL;
3794   crtl->maybe_hot_insn_p = speed;
3795   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3796   start_sequence ();
3797   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3798   seq = get_insns ();
3799   end_sequence ();
3800   default_rtl_profile ();
3801   node->frequency = real_frequency;
3802
3803   cost = seq_cost (seq, speed);
3804   if (MEM_P (rslt))
3805     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3806                           TYPE_ADDR_SPACE (type), speed);
3807   else if (!REG_P (rslt))
3808     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3809
3810   return cost;
3811 }
3812
3813 /* Returns variable containing the value of candidate CAND at statement AT.  */
3814
3815 static tree
3816 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3817 {
3818   if (stmt_after_increment (loop, cand, stmt))
3819     return cand->var_after;
3820   else
3821     return cand->var_before;
3822 }
3823
3824 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3825    same precision that is at least as wide as the precision of TYPE, stores
3826    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3827    type of A and B.  */
3828
3829 static tree
3830 determine_common_wider_type (tree *a, tree *b)
3831 {
3832   tree wider_type = NULL;
3833   tree suba, subb;
3834   tree atype = TREE_TYPE (*a);
3835
3836   if (CONVERT_EXPR_P (*a))
3837     {
3838       suba = TREE_OPERAND (*a, 0);
3839       wider_type = TREE_TYPE (suba);
3840       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3841         return atype;
3842     }
3843   else
3844     return atype;
3845
3846   if (CONVERT_EXPR_P (*b))
3847     {
3848       subb = TREE_OPERAND (*b, 0);
3849       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3850         return atype;
3851     }
3852   else
3853     return atype;
3854
3855   *a = suba;
3856   *b = subb;
3857   return wider_type;
3858 }
3859
3860 /* Determines the expression by that USE is expressed from induction variable
3861    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3862    decomposed form.  The invariant part is stored in AFF_INV; while variant
3863    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3864    non-null.  Returns false if USE cannot be expressed using CAND.  */
3865
3866 static bool
3867 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3868                        struct iv_cand *cand, struct aff_tree *aff_inv,
3869                        struct aff_tree *aff_var, widest_int *prat = NULL)
3870 {
3871   tree ubase = use->iv->base, ustep = use->iv->step;
3872   tree cbase = cand->iv->base, cstep = cand->iv->step;
3873   tree common_type, uutype, var, cstep_common;
3874   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3875   aff_tree aff_cbase;
3876   widest_int rat;
3877
3878   /* We must have a precision to express the values of use.  */
3879   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3880     return false;
3881
3882   var = var_at_stmt (loop, cand, at);
3883   uutype = unsigned_type_for (utype);
3884
3885   /* If the conversion is not noop, perform it.  */
3886   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3887     {
3888       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3889           && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3890         {
3891           tree inner_base, inner_step, inner_type;
3892           inner_base = TREE_OPERAND (cbase, 0);
3893           if (CONVERT_EXPR_P (cstep))
3894             inner_step = TREE_OPERAND (cstep, 0);
3895           else
3896             inner_step = cstep;
3897
3898           inner_type = TREE_TYPE (inner_base);
3899           /* If candidate is added from a biv whose type is smaller than
3900              ctype, we know both candidate and the biv won't overflow.
3901              In this case, it's safe to skip the convertion in candidate.
3902              As an example, (unsigned short)((unsigned long)A) equals to
3903              (unsigned short)A, if A has a type no larger than short.  */
3904           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3905             {
3906               cbase = inner_base;
3907               cstep = inner_step;
3908             }
3909         }
3910       cbase = fold_convert (uutype, cbase);
3911       cstep = fold_convert (uutype, cstep);
3912       var = fold_convert (uutype, var);
3913     }
3914
3915   /* Ratio is 1 when computing the value of biv cand by itself.
3916      We can't rely on constant_multiple_of in this case because the
3917      use is created after the original biv is selected.  The call
3918      could fail because of inconsistent fold behavior.  See PR68021
3919      for more information.  */
3920   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3921     {
3922       gcc_assert (is_gimple_assign (use->stmt));
3923       gcc_assert (use->iv->ssa_name == cand->var_after);
3924       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3925       rat = 1;
3926     }
3927   else if (!constant_multiple_of (ustep, cstep, &rat))
3928     return false;
3929
3930   if (prat)
3931     *prat = rat;
3932
3933   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3934      type, we achieve better folding by computing their difference in this
3935      wider type, and cast the result to UUTYPE.  We do not need to worry about
3936      overflows, as all the arithmetics will in the end be performed in UUTYPE
3937      anyway.  */
3938   common_type = determine_common_wider_type (&ubase, &cbase);
3939
3940   /* use = ubase - ratio * cbase + ratio * var.  */
3941   tree_to_aff_combination (ubase, common_type, aff_inv);
3942   tree_to_aff_combination (cbase, common_type, &aff_cbase);
3943   tree_to_aff_combination (var, uutype, aff_var);
3944
3945   /* We need to shift the value if we are after the increment.  */
3946   if (stmt_after_increment (loop, cand, at))
3947     {
3948       aff_tree cstep_aff;
3949
3950       if (common_type != uutype)
3951         cstep_common = fold_convert (common_type, cstep);
3952       else
3953         cstep_common = cstep;
3954
3955       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3956       aff_combination_add (&aff_cbase, &cstep_aff);
3957     }
3958
3959   aff_combination_scale (&aff_cbase, -rat);
3960   aff_combination_add (aff_inv, &aff_cbase);
3961   if (common_type != uutype)
3962     aff_combination_convert (aff_inv, uutype);
3963
3964   aff_combination_scale (aff_var, rat);
3965   return true;
3966 }
3967
3968 /* Determines the expression by that USE is expressed from induction variable
3969    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3970    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3971
3972 static bool
3973 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3974                      struct iv_cand *cand, struct aff_tree *aff)
3975 {
3976   aff_tree aff_var;
3977
3978   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3979     return false;
3980
3981   aff_combination_add (aff, &aff_var);
3982   return true;
3983 }
3984
3985 /* Return the type of USE.  */
3986
3987 static tree
3988 get_use_type (struct iv_use *use)
3989 {
3990   tree base_type = TREE_TYPE (use->iv->base);
3991   tree type;
3992
3993   if (use->type == USE_REF_ADDRESS)
3994     {
3995       /* The base_type may be a void pointer.  Create a pointer type based on
3996          the mem_ref instead.  */
3997       type = build_pointer_type (TREE_TYPE (*use->op_p));
3998       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3999                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4000     }
4001   else
4002     type = base_type;
4003
4004   return type;
4005 }
4006
4007 /* Determines the expression by that USE is expressed from induction variable
4008    CAND at statement AT in LOOP.  The computation is unshared.  */
4009
4010 static tree
4011 get_computation_at (struct loop *loop, gimple *at,
4012                     struct iv_use *use, struct iv_cand *cand)
4013 {
4014   aff_tree aff;
4015   tree type = get_use_type (use);
4016
4017   if (!get_computation_aff (loop, at, use, cand, &aff))
4018     return NULL_TREE;
4019   unshare_aff_combination (&aff);
4020   return fold_convert (type, aff_combination_to_tree (&aff));
4021 }
4022
4023 /* Adjust the cost COST for being in loop setup rather than loop body.
4024    If we're optimizing for space, the loop setup overhead is constant;
4025    if we're optimizing for speed, amortize it over the per-iteration cost.
4026    If ROUND_UP_P is true, the result is round up rather than to zero when
4027    optimizing for speed.  */
4028 static unsigned
4029 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
4030                    bool round_up_p = false)
4031 {
4032   if (cost == INFTY)
4033     return cost;
4034   else if (optimize_loop_for_speed_p (data->current_loop))
4035     {
4036       HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
4037       return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
4038     }
4039   else
4040     return cost;
4041 }
4042
4043 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4044    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4045    calculating the operands of EXPR.  Returns true if successful, and returns
4046    the cost in COST.  */
4047
4048 static bool
4049 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4050                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4051 {
4052   comp_cost res;
4053   tree op1 = TREE_OPERAND (expr, 1);
4054   tree cst = TREE_OPERAND (mult, 1);
4055   tree multop = TREE_OPERAND (mult, 0);
4056   int m = exact_log2 (int_cst_value (cst));
4057   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4058   int as_cost, sa_cost;
4059   bool mult_in_op1;
4060
4061   if (!(m >= 0 && m < maxm))
4062     return false;
4063
4064   STRIP_NOPS (op1);
4065   mult_in_op1 = operand_equal_p (op1, mult, 0);
4066
4067   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4068
4069   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4070      use that in preference to a shift insn followed by an add insn.  */
4071   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4072              ? shiftadd_cost (speed, mode, m)
4073              : (mult_in_op1
4074                 ? shiftsub1_cost (speed, mode, m)
4075                 : shiftsub0_cost (speed, mode, m)));
4076
4077   res = comp_cost (MIN (as_cost, sa_cost), 0);
4078   res += (mult_in_op1 ? cost0 : cost1);
4079
4080   STRIP_NOPS (multop);
4081   if (!is_gimple_val (multop))
4082     res += force_expr_to_var_cost (multop, speed);
4083
4084   *cost = res;
4085   return true;
4086 }
4087
4088 /* Estimates cost of forcing expression EXPR into a variable.  */
4089
4090 static comp_cost
4091 force_expr_to_var_cost (tree expr, bool speed)
4092 {
4093   static bool costs_initialized = false;
4094   static unsigned integer_cost [2];
4095   static unsigned symbol_cost [2];
4096   static unsigned address_cost [2];
4097   tree op0, op1;
4098   comp_cost cost0, cost1, cost;
4099   machine_mode mode;
4100   scalar_int_mode int_mode;
4101
4102   if (!costs_initialized)
4103     {
4104       tree type = build_pointer_type (integer_type_node);
4105       tree var, addr;
4106       rtx x;
4107       int i;
4108
4109       var = create_tmp_var_raw (integer_type_node, "test_var");
4110       TREE_STATIC (var) = 1;
4111       x = produce_memory_decl_rtl (var, NULL);
4112       SET_DECL_RTL (var, x);
4113
4114       addr = build1 (ADDR_EXPR, type, var);
4115
4116
4117       for (i = 0; i < 2; i++)
4118         {
4119           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4120                                                              2000), i);
4121
4122           symbol_cost[i] = computation_cost (addr, i) + 1;
4123
4124           address_cost[i]
4125             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4126           if (dump_file && (dump_flags & TDF_DETAILS))
4127             {
4128               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4129               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4130               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4131               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4132               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4133               fprintf (dump_file, "\n");
4134             }
4135         }
4136
4137       costs_initialized = true;
4138     }
4139
4140   STRIP_NOPS (expr);
4141
4142   if (SSA_VAR_P (expr))
4143     return no_cost;
4144
4145   if (is_gimple_min_invariant (expr))
4146     {
4147       if (poly_int_tree_p (expr))
4148         return comp_cost (integer_cost [speed], 0);
4149
4150       if (TREE_CODE (expr) == ADDR_EXPR)
4151         {
4152           tree obj = TREE_OPERAND (expr, 0);
4153
4154           if (VAR_P (obj)
4155               || TREE_CODE (obj) == PARM_DECL
4156               || TREE_CODE (obj) == RESULT_DECL)
4157             return comp_cost (symbol_cost [speed], 0);
4158         }
4159
4160       return comp_cost (address_cost [speed], 0);
4161     }
4162
4163   switch (TREE_CODE (expr))
4164     {
4165     case POINTER_PLUS_EXPR:
4166     case PLUS_EXPR:
4167     case MINUS_EXPR:
4168     case MULT_EXPR:
4169     case TRUNC_DIV_EXPR:
4170     case BIT_AND_EXPR:
4171     case BIT_IOR_EXPR:
4172     case LSHIFT_EXPR:
4173     case RSHIFT_EXPR:
4174       op0 = TREE_OPERAND (expr, 0);
4175       op1 = TREE_OPERAND (expr, 1);
4176       STRIP_NOPS (op0);
4177       STRIP_NOPS (op1);
4178       break;
4179
4180     CASE_CONVERT:
4181     case NEGATE_EXPR:
4182     case BIT_NOT_EXPR:
4183       op0 = TREE_OPERAND (expr, 0);
4184       STRIP_NOPS (op0);
4185       op1 = NULL_TREE;
4186       break;
4187
4188     default:
4189       /* Just an arbitrary value, FIXME.  */
4190       return comp_cost (target_spill_cost[speed], 0);
4191     }
4192
4193   if (op0 == NULL_TREE
4194       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4195     cost0 = no_cost;
4196   else
4197     cost0 = force_expr_to_var_cost (op0, speed);
4198
4199   if (op1 == NULL_TREE
4200       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4201     cost1 = no_cost;
4202   else
4203     cost1 = force_expr_to_var_cost (op1, speed);
4204
4205   mode = TYPE_MODE (TREE_TYPE (expr));
4206   switch (TREE_CODE (expr))
4207     {
4208     case POINTER_PLUS_EXPR:
4209     case PLUS_EXPR:
4210     case MINUS_EXPR:
4211     case NEGATE_EXPR:
4212       cost = comp_cost (add_cost (speed, mode), 0);
4213       if (TREE_CODE (expr) != NEGATE_EXPR)
4214         {
4215           tree mult = NULL_TREE;
4216           comp_cost sa_cost;
4217           if (TREE_CODE (op1) == MULT_EXPR)
4218             mult = op1;
4219           else if (TREE_CODE (op0) == MULT_EXPR)
4220             mult = op0;
4221
4222           if (mult != NULL_TREE
4223               && is_a <scalar_int_mode> (mode, &int_mode)
4224               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4225               && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4226                                     speed, &sa_cost))
4227             return sa_cost;
4228         }
4229       break;
4230
4231     CASE_CONVERT:
4232       {
4233         tree inner_mode, outer_mode;
4234         outer_mode = TREE_TYPE (expr);
4235         inner_mode = TREE_TYPE (op0);
4236         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4237                                        TYPE_MODE (inner_mode), speed), 0);
4238       }
4239       break;
4240
4241     case MULT_EXPR:
4242       if (cst_and_fits_in_hwi (op0))
4243         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4244                                              mode, speed), 0);
4245       else if (cst_and_fits_in_hwi (op1))
4246         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4247                                              mode, speed), 0);
4248       else
4249         return comp_cost (target_spill_cost [speed], 0);
4250       break;
4251
4252     case TRUNC_DIV_EXPR:
4253       /* Division by power of two is usually cheap, so we allow it.  Forbid
4254          anything else.  */
4255       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4256         cost = comp_cost (add_cost (speed, mode), 0);
4257       else
4258         cost = comp_cost (target_spill_cost[speed], 0);
4259       break;
4260
4261     case BIT_AND_EXPR:
4262     case BIT_IOR_EXPR:
4263     case BIT_NOT_EXPR:
4264     case LSHIFT_EXPR:
4265     case RSHIFT_EXPR:
4266       cost = comp_cost (add_cost (speed, mode), 0);
4267       break;
4268
4269     default:
4270       gcc_unreachable ();
4271     }
4272
4273   cost += cost0;
4274   cost += cost1;
4275   return cost;
4276 }
4277
4278 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4279    invariants the computation depends on.  */
4280
4281 static comp_cost
4282 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4283 {
4284   if (!expr)
4285     return no_cost;
4286
4287   find_inv_vars (data, &expr, inv_vars);
4288   return force_expr_to_var_cost (expr, data->speed);
4289 }
4290
4291 /* Returns cost of auto-modifying address expression in shape base + offset.
4292    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4293    address expression.  The address expression has ADDR_MODE in addr space
4294    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4295    speed or size.  */
4296
4297 enum ainc_type
4298 {
4299   AINC_PRE_INC,         /* Pre increment.  */
4300   AINC_PRE_DEC,         /* Pre decrement.  */
4301   AINC_POST_INC,        /* Post increment.  */
4302   AINC_POST_DEC,        /* Post decrement.  */
4303   AINC_NONE             /* Also the number of auto increment types.  */
4304 };
4305
4306 struct ainc_cost_data
4307 {
4308   unsigned costs[AINC_NONE];
4309 };
4310
4311 static comp_cost
4312 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4313                        machine_mode addr_mode, machine_mode mem_mode,
4314                        addr_space_t as, bool speed)
4315 {
4316   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4317       && !USE_STORE_PRE_DECREMENT (mem_mode)
4318       && !USE_LOAD_POST_DECREMENT (mem_mode)
4319       && !USE_STORE_POST_DECREMENT (mem_mode)
4320       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4321       && !USE_STORE_PRE_INCREMENT (mem_mode)
4322       && !USE_LOAD_POST_INCREMENT (mem_mode)
4323       && !USE_STORE_POST_INCREMENT (mem_mode))
4324     return infinite_cost;
4325
4326   static vec<ainc_cost_data *> ainc_cost_data_list;
4327   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4328   if (idx >= ainc_cost_data_list.length ())
4329     {
4330       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4331
4332       gcc_assert (nsize > idx);
4333       ainc_cost_data_list.safe_grow_cleared (nsize);
4334     }
4335
4336   ainc_cost_data *data = ainc_cost_data_list[idx];
4337   if (data == NULL)
4338     {
4339       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4340
4341       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4342       data->costs[AINC_PRE_DEC] = INFTY;
4343       data->costs[AINC_POST_DEC] = INFTY;
4344       data->costs[AINC_PRE_INC] = INFTY;
4345       data->costs[AINC_POST_INC] = INFTY;
4346       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4347           || USE_STORE_PRE_DECREMENT (mem_mode))
4348         {
4349           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4350
4351           if (memory_address_addr_space_p (mem_mode, addr, as))
4352             data->costs[AINC_PRE_DEC]
4353               = address_cost (addr, mem_mode, as, speed);
4354         }
4355       if (USE_LOAD_POST_DECREMENT (mem_mode)
4356           || USE_STORE_POST_DECREMENT (mem_mode))
4357         {
4358           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4359
4360           if (memory_address_addr_space_p (mem_mode, addr, as))
4361             data->costs[AINC_POST_DEC]
4362               = address_cost (addr, mem_mode, as, speed);
4363         }
4364       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4365           || USE_STORE_PRE_INCREMENT (mem_mode))
4366         {
4367           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4368
4369           if (memory_address_addr_space_p (mem_mode, addr, as))
4370             data->costs[AINC_PRE_INC]
4371               = address_cost (addr, mem_mode, as, speed);
4372         }
4373       if (USE_LOAD_POST_INCREMENT (mem_mode)
4374           || USE_STORE_POST_INCREMENT (mem_mode))
4375         {
4376           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4377
4378           if (memory_address_addr_space_p (mem_mode, addr, as))
4379             data->costs[AINC_POST_INC]
4380               = address_cost (addr, mem_mode, as, speed);
4381         }
4382       ainc_cost_data_list[idx] = data;
4383     }
4384
4385   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4386   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4387     return comp_cost (data->costs[AINC_POST_INC], 0);
4388   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4389     return comp_cost (data->costs[AINC_POST_DEC], 0);
4390   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4391     return comp_cost (data->costs[AINC_PRE_INC], 0);
4392   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4393     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4394
4395   return infinite_cost;
4396 }
4397
4398 /* Return cost of computing USE's address expression by using CAND.
4399    AFF_INV and AFF_VAR represent invariant and variant parts of the
4400    address expression, respectively.  If AFF_INV is simple, store
4401    the loop invariant variables which are depended by it in INV_VARS;
4402    if AFF_INV is complicated, handle it as a new invariant expression
4403    and record it in INV_EXPR.  RATIO indicates multiple times between
4404    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4405    value to it indicating if this is an auto-increment address.  */
4406
4407 static comp_cost
4408 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4409                   struct iv_cand *cand, aff_tree *aff_inv,
4410                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4411                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4412                   bool *can_autoinc, bool speed)
4413 {
4414   rtx addr;
4415   bool simple_inv = true;
4416   tree comp_inv = NULL_TREE, type = aff_var->type;
4417   comp_cost var_cost = no_cost, cost = no_cost;
4418   struct mem_address parts = {NULL_TREE, integer_one_node,
4419                               NULL_TREE, NULL_TREE, NULL_TREE};
4420   machine_mode addr_mode = TYPE_MODE (type);
4421   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4422   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4423   /* Only true if ratio != 1.  */
4424   bool ok_with_ratio_p = false;
4425   bool ok_without_ratio_p = false;
4426
4427   if (!aff_combination_const_p (aff_inv))
4428     {
4429       parts.index = integer_one_node;
4430       /* Addressing mode "base + index".  */
4431       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4432       if (ratio != 1)
4433         {
4434           parts.step = wide_int_to_tree (type, ratio);
4435           /* Addressing mode "base + index << scale".  */
4436           ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4437           if (!ok_with_ratio_p)
4438             parts.step = NULL_TREE;
4439         }
4440       if (ok_with_ratio_p || ok_without_ratio_p)
4441         {
4442           if (maybe_ne (aff_inv->offset, 0))
4443             {
4444               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4445               /* Addressing mode "base + index [<< scale] + offset".  */
4446               if (!valid_mem_ref_p (mem_mode, as, &parts))
4447                 parts.offset = NULL_TREE;
4448               else
4449                 aff_inv->offset = 0;
4450             }
4451
4452           move_fixed_address_to_symbol (&parts, aff_inv);
4453           /* Base is fixed address and is moved to symbol part.  */
4454           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4455             parts.base = NULL_TREE;
4456
4457           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4458           if (parts.symbol != NULL_TREE
4459               && !valid_mem_ref_p (mem_mode, as, &parts))
4460             {
4461               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4462               parts.symbol = NULL_TREE;
4463               /* Reset SIMPLE_INV since symbol address needs to be computed
4464                  outside of address expression in this case.  */
4465               simple_inv = false;
4466               /* Symbol part is moved back to base part, it can't be NULL.  */
4467               parts.base = integer_one_node;
4468             }
4469         }
4470       else
4471         parts.index = NULL_TREE;
4472     }
4473   else
4474     {
4475       poly_int64 ainc_step;
4476       if (can_autoinc
4477           && ratio == 1
4478           && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4479         {
4480           poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4481
4482           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4483             ainc_offset += ainc_step;
4484           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4485                                         addr_mode, mem_mode, as, speed);
4486           if (!cost.infinite_cost_p ())
4487             {
4488               *can_autoinc = true;
4489               return cost;
4490             }
4491           cost = no_cost;
4492         }
4493       if (!aff_combination_zero_p (aff_inv))
4494         {
4495           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4496           /* Addressing mode "base + offset".  */
4497           if (!valid_mem_ref_p (mem_mode, as, &parts))
4498             parts.offset = NULL_TREE;
4499           else
4500             aff_inv->offset = 0;
4501         }
4502     }
4503
4504   if (simple_inv)
4505     simple_inv = (aff_inv == NULL
4506                   || aff_combination_const_p (aff_inv)
4507                   || aff_combination_singleton_var_p (aff_inv));
4508   if (!aff_combination_zero_p (aff_inv))
4509     comp_inv = aff_combination_to_tree (aff_inv);
4510   if (comp_inv != NULL_TREE)
4511     cost = force_var_cost (data, comp_inv, inv_vars);
4512   if (ratio != 1 && parts.step == NULL_TREE)
4513     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4514   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4515     var_cost += add_cost (speed, addr_mode);
4516
4517   if (comp_inv && inv_expr && !simple_inv)
4518     {
4519       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4520       /* Clear depends on.  */
4521       if (*inv_expr != NULL && inv_vars && *inv_vars)
4522         bitmap_clear (*inv_vars);
4523
4524       /* Cost of small invariant expression adjusted against loop niters
4525          is usually zero, which makes it difficult to be differentiated
4526          from candidate based on loop invariant variables.  Secondly, the
4527          generated invariant expression may not be hoisted out of loop by
4528          following pass.  We penalize the cost by rounding up in order to
4529          neutralize such effects.  */
4530       cost.cost = adjust_setup_cost (data, cost.cost, true);
4531       cost.scratch = cost.cost;
4532     }
4533
4534   cost += var_cost;
4535   addr = addr_for_mem_ref (&parts, as, false);
4536   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4537   cost += address_cost (addr, mem_mode, as, speed);
4538
4539   if (parts.symbol != NULL_TREE)
4540     cost.complexity += 1;
4541   /* Don't increase the complexity of adding a scaled index if it's
4542      the only kind of index that the target allows.  */
4543   if (parts.step != NULL_TREE && ok_without_ratio_p)
4544     cost.complexity += 1;
4545   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4546     cost.complexity += 1;
4547   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4548     cost.complexity += 1;
4549
4550   return cost;
4551 }
4552
4553 /* Scale (multiply) the computed COST (except scratch part that should be
4554    hoisted out a loop) by header->frequency / AT->frequency, which makes
4555    expected cost more accurate.  */
4556
4557 static comp_cost
4558 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4559 {
4560   if (data->speed
4561       && data->current_loop->header->count.to_frequency (cfun) > 0)
4562     {
4563       basic_block bb = gimple_bb (at);
4564       gcc_assert (cost.scratch <= cost.cost);
4565       int scale_factor = (int)(intptr_t) bb->aux;
4566       if (scale_factor == 1)
4567         return cost;
4568
4569       int scaled_cost
4570         = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4571
4572       if (dump_file && (dump_flags & TDF_DETAILS))
4573         fprintf (dump_file, "Scaling cost based on bb prob "
4574                  "by %2.2f: %d (scratch: %d) -> %d\n",
4575                  1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4576
4577       cost.cost = scaled_cost;
4578     }
4579
4580   return cost;
4581 }
4582
4583 /* Determines the cost of the computation by that USE is expressed
4584    from induction variable CAND.  If ADDRESS_P is true, we just need
4585    to create an address from it, otherwise we want to get it into
4586    register.  A set of invariants we depend on is stored in INV_VARS.
4587    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4588    addressing is likely.  If INV_EXPR is nonnull, record invariant
4589    expr entry in it.  */
4590
4591 static comp_cost
4592 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4593                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4594                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4595 {
4596   gimple *at = use->stmt;
4597   tree ubase = use->iv->base, cbase = cand->iv->base;
4598   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4599   tree comp_inv = NULL_TREE;
4600   HOST_WIDE_INT ratio, aratio;
4601   comp_cost cost;
4602   widest_int rat;
4603   aff_tree aff_inv, aff_var;
4604   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4605
4606   if (inv_vars)
4607     *inv_vars = NULL;
4608   if (can_autoinc)
4609     *can_autoinc = false;
4610   if (inv_expr)
4611     *inv_expr = NULL;
4612
4613   /* Check if we have enough precision to express the values of use.  */
4614   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4615     return infinite_cost;
4616
4617   if (address_p
4618       || (use->iv->base_object
4619           && cand->iv->base_object
4620           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4621           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4622     {
4623       /* Do not try to express address of an object with computation based
4624          on address of a different object.  This may cause problems in rtl
4625          level alias analysis (that does not expect this to be happening,
4626          as this is illegal in C), and would be unlikely to be useful
4627          anyway.  */
4628       if (use->iv->base_object
4629           && cand->iv->base_object
4630           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4631         return infinite_cost;
4632     }
4633
4634   if (!get_computation_aff_1 (data->current_loop, at, use,
4635                               cand, &aff_inv, &aff_var, &rat)
4636       || !wi::fits_shwi_p (rat))
4637     return infinite_cost;
4638
4639   ratio = rat.to_shwi ();
4640   if (address_p)
4641     {
4642       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4643                                inv_vars, inv_expr, can_autoinc, speed);
4644       return get_scaled_computation_cost_at (data, at, cost);
4645     }
4646
4647   bool simple_inv = (aff_combination_const_p (&aff_inv)
4648                      || aff_combination_singleton_var_p (&aff_inv));
4649   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4650   aff_combination_convert (&aff_inv, signed_type);
4651   if (!aff_combination_zero_p (&aff_inv))
4652     comp_inv = aff_combination_to_tree (&aff_inv);
4653
4654   cost = force_var_cost (data, comp_inv, inv_vars);
4655   if (comp_inv && inv_expr && !simple_inv)
4656     {
4657       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4658       /* Clear depends on.  */
4659       if (*inv_expr != NULL && inv_vars && *inv_vars)
4660         bitmap_clear (*inv_vars);
4661
4662       cost.cost = adjust_setup_cost (data, cost.cost);
4663       /* Record setup cost in scratch field.  */
4664       cost.scratch = cost.cost;
4665     }
4666   /* Cost of constant integer can be covered when adding invariant part to
4667      variant part.  */
4668   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4669     cost = no_cost;
4670
4671   /* Need type narrowing to represent use with cand.  */
4672   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4673     {
4674       machine_mode outer_mode = TYPE_MODE (utype);
4675       machine_mode inner_mode = TYPE_MODE (ctype);
4676       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4677     }
4678
4679   /* Turn a + i * (-c) into a - i * c.  */
4680   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4681     aratio = -ratio;
4682   else
4683     aratio = ratio;
4684
4685   if (ratio != 1)
4686     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4687
4688   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4689      instruction.  */
4690   /* Need to add up the invariant and variant parts.  */
4691   if (comp_inv && !integer_zerop (comp_inv))
4692     cost += add_cost (speed, TYPE_MODE (utype));
4693
4694   return get_scaled_computation_cost_at (data, at, cost);
4695 }
4696
4697 /* Determines cost of computing the use in GROUP with CAND in a generic
4698    expression.  */
4699
4700 static bool
4701 determine_group_iv_cost_generic (struct ivopts_data *data,
4702                                  struct iv_group *group, struct iv_cand *cand)
4703 {
4704   comp_cost cost;
4705   iv_inv_expr_ent *inv_expr = NULL;
4706   bitmap inv_vars = NULL, inv_exprs = NULL;
4707   struct iv_use *use = group->vuses[0];
4708
4709   /* The simple case first -- if we need to express value of the preserved
4710      original biv, the cost is 0.  This also prevents us from counting the
4711      cost of increment twice -- once at this use and once in the cost of
4712      the candidate.  */
4713   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4714     cost = no_cost;
4715   else
4716     cost = get_computation_cost (data, use, cand, false,
4717                                  &inv_vars, NULL, &inv_expr);
4718
4719   if (inv_expr)
4720     {
4721       inv_exprs = BITMAP_ALLOC (NULL);
4722       bitmap_set_bit (inv_exprs, inv_expr->id);
4723     }
4724   set_group_iv_cost (data, group, cand, cost, inv_vars,
4725                      NULL_TREE, ERROR_MARK, inv_exprs);
4726   return !cost.infinite_cost_p ();
4727 }
4728
4729 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4730
4731 static bool
4732 determine_group_iv_cost_address (struct ivopts_data *data,
4733                                  struct iv_group *group, struct iv_cand *cand)
4734 {
4735   unsigned i;
4736   bitmap inv_vars = NULL, inv_exprs = NULL;
4737   bool can_autoinc;
4738   iv_inv_expr_ent *inv_expr = NULL;
4739   struct iv_use *use = group->vuses[0];
4740   comp_cost sum_cost = no_cost, cost;
4741
4742   cost = get_computation_cost (data, use, cand, true,
4743                                &inv_vars, &can_autoinc, &inv_expr);
4744
4745   if (inv_expr)
4746     {
4747       inv_exprs = BITMAP_ALLOC (NULL);
4748       bitmap_set_bit (inv_exprs, inv_expr->id);
4749     }
4750   sum_cost = cost;
4751   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4752     {
4753       if (can_autoinc)
4754         sum_cost -= cand->cost_step;
4755       /* If we generated the candidate solely for exploiting autoincrement
4756          opportunities, and it turns out it can't be used, set the cost to
4757          infinity to make sure we ignore it.  */
4758       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4759         sum_cost = infinite_cost;
4760     }
4761
4762   /* Uses in a group can share setup code, so only add setup cost once.  */
4763   cost -= cost.scratch;
4764   /* Compute and add costs for rest uses of this group.  */
4765   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4766     {
4767       struct iv_use *next = group->vuses[i];
4768
4769       /* TODO: We could skip computing cost for sub iv_use when it has the
4770          same cost as the first iv_use, but the cost really depends on the
4771          offset and where the iv_use is.  */
4772         cost = get_computation_cost (data, next, cand, true,
4773                                      NULL, &can_autoinc, &inv_expr);
4774         if (inv_expr)
4775           {
4776             if (!inv_exprs)
4777               inv_exprs = BITMAP_ALLOC (NULL);
4778
4779             bitmap_set_bit (inv_exprs, inv_expr->id);
4780           }
4781       sum_cost += cost;
4782     }
4783   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4784                      NULL_TREE, ERROR_MARK, inv_exprs);
4785
4786   return !sum_cost.infinite_cost_p ();
4787 }
4788
4789 /* Computes value of candidate CAND at position AT in iteration NITER, and
4790    stores it to VAL.  */
4791
4792 static void
4793 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4794                aff_tree *val)
4795 {
4796   aff_tree step, delta, nit;
4797   struct iv *iv = cand->iv;
4798   tree type = TREE_TYPE (iv->base);
4799   tree steptype;
4800   if (POINTER_TYPE_P (type))
4801     steptype = sizetype;
4802   else
4803     steptype = unsigned_type_for (type);
4804
4805   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4806   aff_combination_convert (&step, steptype);
4807   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4808   aff_combination_convert (&nit, steptype);
4809   aff_combination_mult (&nit, &step, &delta);
4810   if (stmt_after_increment (loop, cand, at))
4811     aff_combination_add (&delta, &step);
4812
4813   tree_to_aff_combination (iv->base, type, val);
4814   if (!POINTER_TYPE_P (type))
4815     aff_combination_convert (val, steptype);
4816   aff_combination_add (val, &delta);
4817 }
4818
4819 /* Returns period of induction variable iv.  */
4820
4821 static tree
4822 iv_period (struct iv *iv)
4823 {
4824   tree step = iv->step, period, type;
4825   tree pow2div;
4826
4827   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4828
4829   type = unsigned_type_for (TREE_TYPE (step));
4830   /* Period of the iv is lcm (step, type_range)/step -1,
4831      i.e., N*type_range/step - 1. Since type range is power
4832      of two, N == (step >> num_of_ending_zeros_binary (step),
4833      so the final result is
4834
4835        (type_range >> num_of_ending_zeros_binary (step)) - 1
4836
4837   */
4838   pow2div = num_ending_zeros (step);
4839
4840   period = build_low_bits_mask (type,
4841                                 (TYPE_PRECISION (type)
4842                                  - tree_to_uhwi (pow2div)));
4843
4844   return period;
4845 }
4846
4847 /* Returns the comparison operator used when eliminating the iv USE.  */
4848
4849 static enum tree_code
4850 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4851 {
4852   struct loop *loop = data->current_loop;
4853   basic_block ex_bb;
4854   edge exit;
4855
4856   ex_bb = gimple_bb (use->stmt);
4857   exit = EDGE_SUCC (ex_bb, 0);
4858   if (flow_bb_inside_loop_p (loop, exit->dest))
4859     exit = EDGE_SUCC (ex_bb, 1);
4860
4861   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4862 }
4863
4864 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4865    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4866    calculation is performed in non-wrapping type.
4867
4868    TODO: More generally, we could test for the situation that
4869          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4870          This would require knowing the sign of OFFSET.  */
4871
4872 static bool
4873 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4874 {
4875   enum tree_code code;
4876   tree e1, e2;
4877   aff_tree aff_e1, aff_e2, aff_offset;
4878
4879   if (!nowrap_type_p (TREE_TYPE (base)))
4880     return false;
4881
4882   base = expand_simple_operations (base);
4883
4884   if (TREE_CODE (base) == SSA_NAME)
4885     {
4886       gimple *stmt = SSA_NAME_DEF_STMT (base);
4887
4888       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4889         return false;
4890
4891       code = gimple_assign_rhs_code (stmt);
4892       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4893         return false;
4894
4895       e1 = gimple_assign_rhs1 (stmt);
4896       e2 = gimple_assign_rhs2 (stmt);
4897     }
4898   else
4899     {
4900       code = TREE_CODE (base);
4901       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4902         return false;
4903       e1 = TREE_OPERAND (base, 0);
4904       e2 = TREE_OPERAND (base, 1);
4905     }
4906
4907   /* Use affine expansion as deeper inspection to prove the equality.  */
4908   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4909                                   &aff_e2, &data->name_expansion_cache);
4910   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4911                                   &aff_offset, &data->name_expansion_cache);
4912   aff_combination_scale (&aff_offset, -1);
4913   switch (code)
4914     {
4915     case PLUS_EXPR:
4916       aff_combination_add (&aff_e2, &aff_offset);
4917       if (aff_combination_zero_p (&aff_e2))
4918         return true;
4919
4920       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4921                                       &aff_e1, &data->name_expansion_cache);
4922       aff_combination_add (&aff_e1, &aff_offset);
4923       return aff_combination_zero_p (&aff_e1);
4924
4925     case POINTER_PLUS_EXPR:
4926       aff_combination_add (&aff_e2, &aff_offset);
4927       return aff_combination_zero_p (&aff_e2);
4928
4929     default:
4930       return false;
4931     }
4932 }
4933
4934 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4935    comparison with CAND.  NITER describes the number of iterations of
4936    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4937
4938    We aim to handle the following situation:
4939
4940    sometype *base, *p;
4941    int a, b, i;
4942
4943    i = a;
4944    p = p_0 = base + a;
4945
4946    do
4947      {
4948        bla (*p);
4949        p++;
4950        i++;
4951      }
4952    while (i < b);
4953
4954    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4955    We aim to optimize this to
4956
4957    p = p_0 = base + a;
4958    do
4959      {
4960        bla (*p);
4961        p++;
4962      }
4963    while (p < p_0 - a + b);
4964
4965    This preserves the correctness, since the pointer arithmetics does not
4966    overflow.  More precisely:
4967
4968    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4969       overflow in computing it or the values of p.
4970    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4971       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4972
4973 static bool
4974 iv_elimination_compare_lt (struct ivopts_data *data,
4975                            struct iv_cand *cand, enum tree_code *comp_p,
4976                            struct tree_niter_desc *niter)
4977 {
4978   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4979   struct aff_tree nit, tmpa, tmpb;
4980   enum tree_code comp;
4981   HOST_WIDE_INT step;
4982
4983   /* We need to know that the candidate induction variable does not overflow.
4984      While more complex analysis may be used to prove this, for now just
4985      check that the variable appears in the original program and that it
4986      is computed in a type that guarantees no overflows.  */
4987   cand_type = TREE_TYPE (cand->iv->base);
4988   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4989     return false;
4990
4991   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4992      the calculation of the BOUND could overflow, making the comparison
4993      invalid.  */
4994   if (!data->loop_single_exit_p)
4995     return false;
4996
4997   /* We need to be able to decide whether candidate is increasing or decreasing
4998      in order to choose the right comparison operator.  */
4999   if (!cst_and_fits_in_hwi (cand->iv->step))
5000     return false;
5001   step = int_cst_value (cand->iv->step);
5002
5003   /* Check that the number of iterations matches the expected pattern:
5004      a + 1 > b ? 0 : b - a - 1.  */
5005   mbz = niter->may_be_zero;
5006   if (TREE_CODE (mbz) == GT_EXPR)
5007     {
5008       /* Handle a + 1 > b.  */
5009       tree op0 = TREE_OPERAND (mbz, 0);
5010       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5011         {
5012           a = TREE_OPERAND (op0, 0);
5013           b = TREE_OPERAND (mbz, 1);
5014         }
5015       else
5016         return false;
5017     }
5018   else if (TREE_CODE (mbz) == LT_EXPR)
5019     {
5020       tree op1 = TREE_OPERAND (mbz, 1);
5021
5022       /* Handle b < a + 1.  */
5023       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5024         {
5025           a = TREE_OPERAND (op1, 0);
5026           b = TREE_OPERAND (mbz, 0);
5027         }
5028       else
5029         return false;
5030     }
5031   else
5032     return false;
5033
5034   /* Expected number of iterations is B - A - 1.  Check that it matches
5035      the actual number, i.e., that B - A - NITER = 1.  */
5036   tree_to_aff_combination (niter->niter, nit_type, &nit);
5037   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5038   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5039   aff_combination_scale (&nit, -1);
5040   aff_combination_scale (&tmpa, -1);
5041   aff_combination_add (&tmpb, &tmpa);
5042   aff_combination_add (&tmpb, &nit);
5043   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5044     return false;
5045
5046   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5047      overflow.  */
5048   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5049                         cand->iv->step,
5050                         fold_convert (TREE_TYPE (cand->iv->step), a));
5051   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5052     return false;
5053
5054   /* Determine the new comparison operator.  */
5055   comp = step < 0 ? GT_EXPR : LT_EXPR;
5056   if (*comp_p == NE_EXPR)
5057     *comp_p = comp;
5058   else if (*comp_p == EQ_EXPR)
5059     *comp_p = invert_tree_comparison (comp, false);
5060   else
5061     gcc_unreachable ();
5062
5063   return true;
5064 }
5065
5066 /* Check whether it is possible to express the condition in USE by comparison
5067    of candidate CAND.  If so, store the value compared with to BOUND, and the
5068    comparison operator to COMP.  */
5069
5070 static bool
5071 may_eliminate_iv (struct ivopts_data *data,
5072                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5073                   enum tree_code *comp)
5074 {
5075   basic_block ex_bb;
5076   edge exit;
5077   tree period;
5078   struct loop *loop = data->current_loop;
5079   aff_tree bnd;
5080   struct tree_niter_desc *desc = NULL;
5081
5082   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5083     return false;
5084
5085   /* For now works only for exits that dominate the loop latch.
5086      TODO: extend to other conditions inside loop body.  */
5087   ex_bb = gimple_bb (use->stmt);
5088   if (use->stmt != last_stmt (ex_bb)
5089       || gimple_code (use->stmt) != GIMPLE_COND
5090       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5091     return false;
5092
5093   exit = EDGE_SUCC (ex_bb, 0);
5094   if (flow_bb_inside_loop_p (loop, exit->dest))
5095     exit = EDGE_SUCC (ex_bb, 1);
5096   if (flow_bb_inside_loop_p (loop, exit->dest))
5097     return false;
5098
5099   desc = niter_for_exit (data, exit);
5100   if (!desc)
5101     return false;
5102
5103   /* Determine whether we can use the variable to test the exit condition.
5104      This is the case iff the period of the induction variable is greater
5105      than the number of iterations for which the exit condition is true.  */
5106   period = iv_period (cand->iv);
5107
5108   /* If the number of iterations is constant, compare against it directly.  */
5109   if (TREE_CODE (desc->niter) == INTEGER_CST)
5110     {
5111       /* See cand_value_at.  */
5112       if (stmt_after_increment (loop, cand, use->stmt))
5113         {
5114           if (!tree_int_cst_lt (desc->niter, period))
5115             return false;
5116         }
5117       else
5118         {
5119           if (tree_int_cst_lt (period, desc->niter))
5120             return false;
5121         }
5122     }
5123
5124   /* If not, and if this is the only possible exit of the loop, see whether
5125      we can get a conservative estimate on the number of iterations of the
5126      entire loop and compare against that instead.  */
5127   else
5128     {
5129       widest_int period_value, max_niter;
5130
5131       max_niter = desc->max;
5132       if (stmt_after_increment (loop, cand, use->stmt))
5133         max_niter += 1;
5134       period_value = wi::to_widest (period);
5135       if (wi::gtu_p (max_niter, period_value))
5136         {
5137           /* See if we can take advantage of inferred loop bound
5138              information.  */
5139           if (data->loop_single_exit_p)
5140             {
5141               if (!max_loop_iterations (loop, &max_niter))
5142                 return false;
5143               /* The loop bound is already adjusted by adding 1.  */
5144               if (wi::gtu_p (max_niter, period_value))
5145                 return false;
5146             }
5147           else
5148             return false;
5149         }
5150     }
5151
5152   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5153
5154   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5155                          aff_combination_to_tree (&bnd));
5156   *comp = iv_elimination_compare (data, use);
5157
5158   /* It is unlikely that computing the number of iterations using division
5159      would be more profitable than keeping the original induction variable.  */
5160   if (expression_expensive_p (*bound))
5161     return false;
5162
5163   /* Sometimes, it is possible to handle the situation that the number of
5164      iterations may be zero unless additional assumptions by using <
5165      instead of != in the exit condition.
5166
5167      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5168            base the exit condition on it.  However, that is often too
5169            expensive.  */
5170   if (!integer_zerop (desc->may_be_zero))
5171     return iv_elimination_compare_lt (data, cand, comp, desc);
5172
5173   return true;
5174 }
5175
5176  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5177     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5178
5179 static int
5180 parm_decl_cost (struct ivopts_data *data, tree bound)
5181 {
5182   tree sbound = bound;
5183   STRIP_NOPS (sbound);
5184
5185   if (TREE_CODE (sbound) == SSA_NAME
5186       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5187       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5188       && data->body_includes_call)
5189     return COSTS_N_INSNS (1);
5190
5191   return 0;
5192 }
5193
5194 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5195
5196 static bool
5197 determine_group_iv_cost_cond (struct ivopts_data *data,
5198                               struct iv_group *group, struct iv_cand *cand)
5199 {
5200   tree bound = NULL_TREE;
5201   struct iv *cmp_iv;
5202   bitmap inv_exprs = NULL;
5203   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5204   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5205   enum comp_iv_rewrite rewrite_type;
5206   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5207   tree *control_var, *bound_cst;
5208   enum tree_code comp = ERROR_MARK;
5209   struct iv_use *use = group->vuses[0];
5210
5211   /* Extract condition operands.  */
5212   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5213                                         &bound_cst, NULL, &cmp_iv);
5214   gcc_assert (rewrite_type != COMP_IV_NA);
5215
5216   /* Try iv elimination.  */
5217   if (rewrite_type == COMP_IV_ELIM
5218       && may_eliminate_iv (data, use, cand, &bound, &comp))
5219     {
5220       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5221       if (elim_cost.cost == 0)
5222         elim_cost.cost = parm_decl_cost (data, bound);
5223       else if (TREE_CODE (bound) == INTEGER_CST)
5224         elim_cost.cost = 0;
5225       /* If we replace a loop condition 'i < n' with 'p < base + n',
5226          inv_vars_elim will have 'base' and 'n' set, which implies that both
5227          'base' and 'n' will be live during the loop.    More likely,
5228          'base + n' will be loop invariant, resulting in only one live value
5229          during the loop.  So in that case we clear inv_vars_elim and set
5230          inv_expr_elim instead.  */
5231       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5232         {
5233           inv_expr_elim = get_loop_invariant_expr (data, bound);
5234           bitmap_clear (inv_vars_elim);
5235         }
5236       /* The bound is a loop invariant, so it will be only computed
5237          once.  */
5238       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5239     }
5240
5241   /* When the condition is a comparison of the candidate IV against
5242      zero, prefer this IV.
5243
5244      TODO: The constant that we're subtracting from the cost should
5245      be target-dependent.  This information should be added to the
5246      target costs for each backend.  */
5247   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5248       && integer_zerop (*bound_cst)
5249       && (operand_equal_p (*control_var, cand->var_after, 0)
5250           || operand_equal_p (*control_var, cand->var_before, 0)))
5251     elim_cost -= 1;
5252
5253   express_cost = get_computation_cost (data, use, cand, false,
5254                                        &inv_vars_express, NULL,
5255                                        &inv_expr_express);
5256   if (cmp_iv != NULL)
5257     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5258
5259   /* Count the cost of the original bound as well.  */
5260   bound_cost = force_var_cost (data, *bound_cst, NULL);
5261   if (bound_cost.cost == 0)
5262     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5263   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5264     bound_cost.cost = 0;
5265   express_cost += bound_cost;
5266
5267   /* Choose the better approach, preferring the eliminated IV. */
5268   if (elim_cost <= express_cost)
5269     {
5270       cost = elim_cost;
5271       inv_vars = inv_vars_elim;
5272       inv_vars_elim = NULL;
5273       inv_expr = inv_expr_elim;
5274     }
5275   else
5276     {
5277       cost = express_cost;
5278       inv_vars = inv_vars_express;
5279       inv_vars_express = NULL;
5280       bound = NULL_TREE;
5281       comp = ERROR_MARK;
5282       inv_expr = inv_expr_express;
5283     }
5284
5285   if (inv_expr)
5286     {
5287       inv_exprs = BITMAP_ALLOC (NULL);
5288       bitmap_set_bit (inv_exprs, inv_expr->id);
5289     }
5290   set_group_iv_cost (data, group, cand, cost,
5291                      inv_vars, bound, comp, inv_exprs);
5292
5293   if (inv_vars_elim)
5294     BITMAP_FREE (inv_vars_elim);
5295   if (inv_vars_express)
5296     BITMAP_FREE (inv_vars_express);
5297
5298   return !cost.infinite_cost_p ();
5299 }
5300
5301 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5302    if USE cannot be represented with CAND.  */
5303
5304 static bool
5305 determine_group_iv_cost (struct ivopts_data *data,
5306                          struct iv_group *group, struct iv_cand *cand)
5307 {
5308   switch (group->type)
5309     {
5310     case USE_NONLINEAR_EXPR:
5311       return determine_group_iv_cost_generic (data, group, cand);
5312
5313     case USE_REF_ADDRESS:
5314     case USE_PTR_ADDRESS:
5315       return determine_group_iv_cost_address (data, group, cand);
5316
5317     case USE_COMPARE:
5318       return determine_group_iv_cost_cond (data, group, cand);
5319
5320     default:
5321       gcc_unreachable ();
5322     }
5323 }
5324
5325 /* Return true if get_computation_cost indicates that autoincrement is
5326    a possibility for the pair of USE and CAND, false otherwise.  */
5327
5328 static bool
5329 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5330                            struct iv_cand *cand)
5331 {
5332   if (!address_p (use->type))
5333     return false;
5334
5335   bool can_autoinc = false;
5336   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5337   return can_autoinc;
5338 }
5339
5340 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5341    use that allows autoincrement, and set their AINC_USE if possible.  */
5342
5343 static void
5344 set_autoinc_for_original_candidates (struct ivopts_data *data)
5345 {
5346   unsigned i, j;
5347
5348   for (i = 0; i < data->vcands.length (); i++)
5349     {
5350       struct iv_cand *cand = data->vcands[i];
5351       struct iv_use *closest_before = NULL;
5352       struct iv_use *closest_after = NULL;
5353       if (cand->pos != IP_ORIGINAL)
5354         continue;
5355
5356       for (j = 0; j < data->vgroups.length (); j++)
5357         {
5358           struct iv_group *group = data->vgroups[j];
5359           struct iv_use *use = group->vuses[0];
5360           unsigned uid = gimple_uid (use->stmt);
5361
5362           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5363             continue;
5364
5365           if (uid < gimple_uid (cand->incremented_at)
5366               && (closest_before == NULL
5367                   || uid > gimple_uid (closest_before->stmt)))
5368             closest_before = use;
5369
5370           if (uid > gimple_uid (cand->incremented_at)
5371               && (closest_after == NULL
5372                   || uid < gimple_uid (closest_after->stmt)))
5373             closest_after = use;
5374         }
5375
5376       if (closest_before != NULL
5377           && autoinc_possible_for_pair (data, closest_before, cand))
5378         cand->ainc_use = closest_before;
5379       else if (closest_after != NULL
5380                && autoinc_possible_for_pair (data, closest_after, cand))
5381         cand->ainc_use = closest_after;
5382     }
5383 }
5384
5385 /* Relate compare use with all candidates.  */
5386
5387 static void
5388 relate_compare_use_with_all_cands (struct ivopts_data *data)
5389 {
5390   unsigned i, count = data->vcands.length ();
5391   for (i = 0; i < data->vgroups.length (); i++)
5392     {
5393       struct iv_group *group = data->vgroups[i];
5394
5395       if (group->type == USE_COMPARE)
5396         bitmap_set_range (group->related_cands, 0, count);
5397     }
5398 }
5399
5400 /* Finds the candidates for the induction variables.  */
5401
5402 static void
5403 find_iv_candidates (struct ivopts_data *data)
5404 {
5405   /* Add commonly used ivs.  */
5406   add_standard_iv_candidates (data);
5407
5408   /* Add old induction variables.  */
5409   add_iv_candidate_for_bivs (data);
5410
5411   /* Add induction variables derived from uses.  */
5412   add_iv_candidate_for_groups (data);
5413
5414   set_autoinc_for_original_candidates (data);
5415
5416   /* Record the important candidates.  */
5417   record_important_candidates (data);
5418
5419   /* Relate compare iv_use with all candidates.  */
5420   if (!data->consider_all_candidates)
5421     relate_compare_use_with_all_cands (data);
5422
5423   if (dump_file && (dump_flags & TDF_DETAILS))
5424     {
5425       unsigned i;
5426
5427       fprintf (dump_file, "\n<Important Candidates>:\t");
5428       for (i = 0; i < data->vcands.length (); i++)
5429         if (data->vcands[i]->important)
5430           fprintf (dump_file, " %d,", data->vcands[i]->id);
5431       fprintf (dump_file, "\n");
5432
5433       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5434       for (i = 0; i < data->vgroups.length (); i++)
5435         {
5436           struct iv_group *group = data->vgroups[i];
5437
5438           if (group->related_cands)
5439             {
5440               fprintf (dump_file, "  Group %d:\t", group->id);
5441               dump_bitmap (dump_file, group->related_cands);
5442             }
5443         }
5444       fprintf (dump_file, "\n");
5445     }
5446 }
5447
5448 /* Determines costs of computing use of iv with an iv candidate.  */
5449
5450 static void
5451 determine_group_iv_costs (struct ivopts_data *data)
5452 {
5453   unsigned i, j;
5454   struct iv_cand *cand;
5455   struct iv_group *group;
5456   bitmap to_clear = BITMAP_ALLOC (NULL);
5457
5458   alloc_use_cost_map (data);
5459
5460   for (i = 0; i < data->vgroups.length (); i++)
5461     {
5462       group = data->vgroups[i];
5463
5464       if (data->consider_all_candidates)
5465         {
5466           for (j = 0; j < data->vcands.length (); j++)
5467             {
5468               cand = data->vcands[j];
5469               determine_group_iv_cost (data, group, cand);
5470             }
5471         }
5472       else
5473         {
5474           bitmap_iterator bi;
5475
5476           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5477             {
5478               cand = data->vcands[j];
5479               if (!determine_group_iv_cost (data, group, cand))
5480                 bitmap_set_bit (to_clear, j);
5481             }
5482
5483           /* Remove the candidates for that the cost is infinite from
5484              the list of related candidates.  */
5485           bitmap_and_compl_into (group->related_cands, to_clear);
5486           bitmap_clear (to_clear);
5487         }
5488     }
5489
5490   BITMAP_FREE (to_clear);
5491
5492   if (dump_file && (dump_flags & TDF_DETAILS))
5493     {
5494       bitmap_iterator bi;
5495
5496       /* Dump invariant variables.  */
5497       fprintf (dump_file, "\n<Invariant Vars>:\n");
5498       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5499         {
5500           struct version_info *info = ver_info (data, i);
5501           if (info->inv_id)
5502             {
5503               fprintf (dump_file, "Inv %d:\t", info->inv_id);
5504               print_generic_expr (dump_file, info->name, TDF_SLIM);
5505               fprintf (dump_file, "%s\n",
5506                        info->has_nonlin_use ? "" : "\t(eliminable)");
5507             }
5508         }
5509
5510       /* Dump invariant expressions.  */
5511       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5512       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5513
5514       for (hash_table<iv_inv_expr_hasher>::iterator it
5515            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5516            ++it)
5517         list.safe_push (*it);
5518
5519       list.qsort (sort_iv_inv_expr_ent);
5520
5521       for (i = 0; i < list.length (); ++i)
5522         {
5523           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5524           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5525           fprintf (dump_file, "\n");
5526         }
5527
5528       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5529
5530       for (i = 0; i < data->vgroups.length (); i++)
5531         {
5532           group = data->vgroups[i];
5533
5534           fprintf (dump_file, "Group %d:\n", i);
5535           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5536           for (j = 0; j < group->n_map_members; j++)
5537             {
5538               if (!group->cost_map[j].cand
5539                   || group->cost_map[j].cost.infinite_cost_p ())
5540                 continue;
5541
5542               fprintf (dump_file, "  %d\t%d\t%d\t",
5543                        group->cost_map[j].cand->id,
5544                        group->cost_map[j].cost.cost,
5545                        group->cost_map[j].cost.complexity);
5546               if (!group->cost_map[j].inv_exprs
5547                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5548                 fprintf (dump_file, "NIL;\t");
5549               else
5550                 bitmap_print (dump_file,
5551                               group->cost_map[j].inv_exprs, "", ";\t");
5552               if (!group->cost_map[j].inv_vars
5553                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5554                 fprintf (dump_file, "NIL;\n");
5555               else
5556                 bitmap_print (dump_file,
5557                               group->cost_map[j].inv_vars, "", "\n");
5558             }
5559
5560           fprintf (dump_file, "\n");
5561         }
5562       fprintf (dump_file, "\n");
5563     }
5564 }
5565
5566 /* Determines cost of the candidate CAND.  */
5567
5568 static void
5569 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5570 {
5571   comp_cost cost_base;
5572   unsigned cost, cost_step;
5573   tree base;
5574
5575   gcc_assert (cand->iv != NULL);
5576
5577   /* There are two costs associated with the candidate -- its increment
5578      and its initialization.  The second is almost negligible for any loop
5579      that rolls enough, so we take it just very little into account.  */
5580
5581   base = cand->iv->base;
5582   cost_base = force_var_cost (data, base, NULL);
5583   /* It will be exceptional that the iv register happens to be initialized with
5584      the proper value at no cost.  In general, there will at least be a regcopy
5585      or a const set.  */
5586   if (cost_base.cost == 0)
5587     cost_base.cost = COSTS_N_INSNS (1);
5588   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5589
5590   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5591
5592   /* Prefer the original ivs unless we may gain something by replacing it.
5593      The reason is to make debugging simpler; so this is not relevant for
5594      artificial ivs created by other optimization passes.  */
5595   if (cand->pos != IP_ORIGINAL
5596       || !SSA_NAME_VAR (cand->var_before)
5597       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5598     cost++;
5599
5600   /* Prefer not to insert statements into latch unless there are some
5601      already (so that we do not create unnecessary jumps).  */
5602   if (cand->pos == IP_END
5603       && empty_block_p (ip_end_pos (data->current_loop)))
5604     cost++;
5605
5606   cand->cost = cost;
5607   cand->cost_step = cost_step;
5608 }
5609
5610 /* Determines costs of computation of the candidates.  */
5611
5612 static void
5613 determine_iv_costs (struct ivopts_data *data)
5614 {
5615   unsigned i;
5616
5617   if (dump_file && (dump_flags & TDF_DETAILS))
5618     {
5619       fprintf (dump_file, "<Candidate Costs>:\n");
5620       fprintf (dump_file, "  cand\tcost\n");
5621     }
5622
5623   for (i = 0; i < data->vcands.length (); i++)
5624     {
5625       struct iv_cand *cand = data->vcands[i];
5626
5627       determine_iv_cost (data, cand);
5628
5629       if (dump_file && (dump_flags & TDF_DETAILS))
5630         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5631     }
5632
5633   if (dump_file && (dump_flags & TDF_DETAILS))
5634     fprintf (dump_file, "\n");
5635 }
5636
5637 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5638    induction variables.  Note N_INVS includes both invariant variables and
5639    invariant expressions.  */
5640
5641 static unsigned
5642 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5643                               unsigned n_cands)
5644 {
5645   unsigned cost;
5646   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5647   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5648   bool speed = data->speed;
5649
5650   /* If there is a call in the loop body, the call-clobbered registers
5651      are not available for loop invariants.  */
5652   if (data->body_includes_call)
5653     available_regs = available_regs - target_clobbered_regs;
5654
5655   /* If we have enough registers.  */
5656   if (regs_needed + target_res_regs < available_regs)
5657     cost = n_new;
5658   /* If close to running out of registers, try to preserve them.  */
5659   else if (regs_needed <= available_regs)
5660     cost = target_reg_cost [speed] * regs_needed;
5661   /* If we run out of available registers but the number of candidates
5662      does not, we penalize extra registers using target_spill_cost.  */
5663   else if (n_cands <= available_regs)
5664     cost = target_reg_cost [speed] * available_regs
5665            + target_spill_cost [speed] * (regs_needed - available_regs);
5666   /* If the number of candidates runs out available registers, we penalize
5667      extra candidate registers using target_spill_cost * 2.  Because it is
5668      more expensive to spill induction variable than invariant.  */
5669   else
5670     cost = target_reg_cost [speed] * available_regs
5671            + target_spill_cost [speed] * (n_cands - available_regs) * 2
5672            + target_spill_cost [speed] * (regs_needed - n_cands);
5673
5674   /* Finally, add the number of candidates, so that we prefer eliminating
5675      induction variables if possible.  */
5676   return cost + n_cands;
5677 }
5678
5679 /* For each size of the induction variable set determine the penalty.  */
5680
5681 static void
5682 determine_set_costs (struct ivopts_data *data)
5683 {
5684   unsigned j, n;
5685   gphi *phi;
5686   gphi_iterator psi;
5687   tree op;
5688   struct loop *loop = data->current_loop;
5689   bitmap_iterator bi;
5690
5691   if (dump_file && (dump_flags & TDF_DETAILS))
5692     {
5693       fprintf (dump_file, "<Global Costs>:\n");
5694       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5695       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5696       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5697       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5698     }
5699
5700   n = 0;
5701   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5702     {
5703       phi = psi.phi ();
5704       op = PHI_RESULT (phi);
5705
5706       if (virtual_operand_p (op))
5707         continue;
5708
5709       if (get_iv (data, op))
5710         continue;
5711
5712       if (!POINTER_TYPE_P (TREE_TYPE (op))
5713           && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5714         continue;
5715
5716       n++;
5717     }
5718
5719   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5720     {
5721       struct version_info *info = ver_info (data, j);
5722
5723       if (info->inv_id && info->has_nonlin_use)
5724         n++;
5725     }
5726
5727   data->regs_used = n;
5728   if (dump_file && (dump_flags & TDF_DETAILS))
5729     fprintf (dump_file, "  regs_used %d\n", n);
5730
5731   if (dump_file && (dump_flags & TDF_DETAILS))
5732     {
5733       fprintf (dump_file, "  cost for size:\n");
5734       fprintf (dump_file, "  ivs\tcost\n");
5735       for (j = 0; j <= 2 * target_avail_regs; j++)
5736         fprintf (dump_file, "  %d\t%d\n", j,
5737                  ivopts_estimate_reg_pressure (data, 0, j));
5738       fprintf (dump_file, "\n");
5739     }
5740 }
5741
5742 /* Returns true if A is a cheaper cost pair than B.  */
5743
5744 static bool
5745 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5746 {
5747   if (!a)
5748     return false;
5749
5750   if (!b)
5751     return true;
5752
5753   if (a->cost < b->cost)
5754     return true;
5755
5756   if (b->cost < a->cost)
5757     return false;
5758
5759   /* In case the costs are the same, prefer the cheaper candidate.  */
5760   if (a->cand->cost < b->cand->cost)
5761     return true;
5762
5763   return false;
5764 }
5765
5766 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
5767    for more expensive, equal and cheaper respectively.  */
5768
5769 static int
5770 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5771 {
5772   if (cheaper_cost_pair (a, b))
5773     return -1;
5774   if (cheaper_cost_pair (b, a))
5775     return 1;
5776
5777   return 0;
5778 }
5779
5780 /* Returns candidate by that USE is expressed in IVS.  */
5781
5782 static struct cost_pair *
5783 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5784 {
5785   return ivs->cand_for_group[group->id];
5786 }
5787
5788 /* Computes the cost field of IVS structure.  */
5789
5790 static void
5791 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5792 {
5793   comp_cost cost = ivs->cand_use_cost;
5794
5795   cost += ivs->cand_cost;
5796   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5797   ivs->cost = cost;
5798 }
5799
5800 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5801    and IVS.  */
5802
5803 static void
5804 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5805 {
5806   bitmap_iterator bi;
5807   unsigned iid;
5808
5809   if (!invs)
5810     return;
5811
5812   gcc_assert (n_inv_uses != NULL);
5813   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5814     {
5815       n_inv_uses[iid]--;
5816       if (n_inv_uses[iid] == 0)
5817         ivs->n_invs--;
5818     }
5819 }
5820
5821 /* Set USE not to be expressed by any candidate in IVS.  */
5822
5823 static void
5824 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5825                  struct iv_group *group)
5826 {
5827   unsigned gid = group->id, cid;
5828   struct cost_pair *cp;
5829
5830   cp = ivs->cand_for_group[gid];
5831   if (!cp)
5832     return;
5833   cid = cp->cand->id;
5834
5835   ivs->bad_groups++;
5836   ivs->cand_for_group[gid] = NULL;
5837   ivs->n_cand_uses[cid]--;
5838
5839   if (ivs->n_cand_uses[cid] == 0)
5840     {
5841       bitmap_clear_bit (ivs->cands, cid);
5842       ivs->n_cands--;
5843       ivs->cand_cost -= cp->cand->cost;
5844       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5845       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5846     }
5847
5848   ivs->cand_use_cost -= cp->cost;
5849   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5850   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5851   iv_ca_recount_cost (data, ivs);
5852 }
5853
5854 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5855    IVS.  */
5856
5857 static void
5858 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5859 {
5860   bitmap_iterator bi;
5861   unsigned iid;
5862
5863   if (!invs)
5864     return;
5865
5866   gcc_assert (n_inv_uses != NULL);
5867   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5868     {
5869       n_inv_uses[iid]++;
5870       if (n_inv_uses[iid] == 1)
5871         ivs->n_invs++;
5872     }
5873 }
5874
5875 /* Set cost pair for GROUP in set IVS to CP.  */
5876
5877 static void
5878 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5879               struct iv_group *group, struct cost_pair *cp)
5880 {
5881   unsigned gid = group->id, cid;
5882
5883   if (ivs->cand_for_group[gid] == cp)
5884     return;
5885
5886   if (ivs->cand_for_group[gid])
5887     iv_ca_set_no_cp (data, ivs, group);
5888
5889   if (cp)
5890     {
5891       cid = cp->cand->id;
5892
5893       ivs->bad_groups--;
5894       ivs->cand_for_group[gid] = cp;
5895       ivs->n_cand_uses[cid]++;
5896       if (ivs->n_cand_uses[cid] == 1)
5897         {
5898           bitmap_set_bit (ivs->cands, cid);
5899           ivs->n_cands++;
5900           ivs->cand_cost += cp->cand->cost;
5901           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5902           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5903         }
5904
5905       ivs->cand_use_cost += cp->cost;
5906       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5907       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5908       iv_ca_recount_cost (data, ivs);
5909     }
5910 }
5911
5912 /* Extend set IVS by expressing USE by some of the candidates in it
5913    if possible.  Consider all important candidates if candidates in
5914    set IVS don't give any result.  */
5915
5916 static void
5917 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5918                struct iv_group *group)
5919 {
5920   struct cost_pair *best_cp = NULL, *cp;
5921   bitmap_iterator bi;
5922   unsigned i;
5923   struct iv_cand *cand;
5924
5925   gcc_assert (ivs->upto >= group->id);
5926   ivs->upto++;
5927   ivs->bad_groups++;
5928
5929   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5930     {
5931       cand = data->vcands[i];
5932       cp = get_group_iv_cost (data, group, cand);
5933       if (cheaper_cost_pair (cp, best_cp))
5934         best_cp = cp;
5935     }
5936
5937   if (best_cp == NULL)
5938     {
5939       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5940         {
5941           cand = data->vcands[i];
5942           cp = get_group_iv_cost (data, group, cand);
5943           if (cheaper_cost_pair (cp, best_cp))
5944             best_cp = cp;
5945         }
5946     }
5947
5948   iv_ca_set_cp (data, ivs, group, best_cp);
5949 }
5950
5951 /* Get cost for assignment IVS.  */
5952
5953 static comp_cost
5954 iv_ca_cost (struct iv_ca *ivs)
5955 {
5956   /* This was a conditional expression but it triggered a bug in
5957      Sun C 5.5.  */
5958   if (ivs->bad_groups)
5959     return infinite_cost;
5960   else
5961     return ivs->cost;
5962 }
5963
5964 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5965    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
5966    respectively.  */
5967
5968 static int
5969 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5970                     struct iv_group *group, struct cost_pair *old_cp,
5971                     struct cost_pair *new_cp)
5972 {
5973   gcc_assert (old_cp && new_cp && old_cp != new_cp);
5974   unsigned old_n_invs = ivs->n_invs;
5975   iv_ca_set_cp (data, ivs, group, new_cp);
5976   unsigned new_n_invs = ivs->n_invs;
5977   iv_ca_set_cp (data, ivs, group, old_cp);
5978
5979   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5980 }
5981
5982 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5983    it before NEXT.  */
5984
5985 static struct iv_ca_delta *
5986 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5987                  struct cost_pair *new_cp, struct iv_ca_delta *next)
5988 {
5989   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5990
5991   change->group = group;
5992   change->old_cp = old_cp;
5993   change->new_cp = new_cp;
5994   change->next = next;
5995
5996   return change;
5997 }
5998
5999 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6000    are rewritten.  */
6001
6002 static struct iv_ca_delta *
6003 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6004 {
6005   struct iv_ca_delta *last;
6006
6007   if (!l2)
6008     return l1;
6009
6010   if (!l1)
6011     return l2;
6012
6013   for (last = l1; last->next; last = last->next)
6014     continue;
6015   last->next = l2;
6016
6017   return l1;
6018 }
6019
6020 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6021
6022 static struct iv_ca_delta *
6023 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6024 {
6025   struct iv_ca_delta *act, *next, *prev = NULL;
6026
6027   for (act = delta; act; act = next)
6028     {
6029       next = act->next;
6030       act->next = prev;
6031       prev = act;
6032
6033       std::swap (act->old_cp, act->new_cp);
6034     }
6035
6036   return prev;
6037 }
6038
6039 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6040    reverted instead.  */
6041
6042 static void
6043 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6044                     struct iv_ca_delta *delta, bool forward)
6045 {
6046   struct cost_pair *from, *to;
6047   struct iv_ca_delta *act;
6048
6049   if (!forward)
6050     delta = iv_ca_delta_reverse (delta);
6051
6052   for (act = delta; act; act = act->next)
6053     {
6054       from = act->old_cp;
6055       to = act->new_cp;
6056       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6057       iv_ca_set_cp (data, ivs, act->group, to);
6058     }
6059
6060   if (!forward)
6061     iv_ca_delta_reverse (delta);
6062 }
6063
6064 /* Returns true if CAND is used in IVS.  */
6065
6066 static bool
6067 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6068 {
6069   return ivs->n_cand_uses[cand->id] > 0;
6070 }
6071
6072 /* Returns number of induction variable candidates in the set IVS.  */
6073
6074 static unsigned
6075 iv_ca_n_cands (struct iv_ca *ivs)
6076 {
6077   return ivs->n_cands;
6078 }
6079
6080 /* Free the list of changes DELTA.  */
6081
6082 static void
6083 iv_ca_delta_free (struct iv_ca_delta **delta)
6084 {
6085   struct iv_ca_delta *act, *next;
6086
6087   for (act = *delta; act; act = next)
6088     {
6089       next = act->next;
6090       free (act);
6091     }
6092
6093   *delta = NULL;
6094 }
6095
6096 /* Allocates new iv candidates assignment.  */
6097
6098 static struct iv_ca *
6099 iv_ca_new (struct ivopts_data *data)
6100 {
6101   struct iv_ca *nw = XNEW (struct iv_ca);
6102
6103   nw->upto = 0;
6104   nw->bad_groups = 0;
6105   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6106                                  data->vgroups.length ());
6107   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6108   nw->cands = BITMAP_ALLOC (NULL);
6109   nw->n_cands = 0;
6110   nw->n_invs = 0;
6111   nw->cand_use_cost = no_cost;
6112   nw->cand_cost = 0;
6113   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6114   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6115   nw->cost = no_cost;
6116
6117   return nw;
6118 }
6119
6120 /* Free memory occupied by the set IVS.  */
6121
6122 static void
6123 iv_ca_free (struct iv_ca **ivs)
6124 {
6125   free ((*ivs)->cand_for_group);
6126   free ((*ivs)->n_cand_uses);
6127   BITMAP_FREE ((*ivs)->cands);
6128   free ((*ivs)->n_inv_var_uses);
6129   free ((*ivs)->n_inv_expr_uses);
6130   free (*ivs);
6131   *ivs = NULL;
6132 }
6133
6134 /* Dumps IVS to FILE.  */
6135
6136 static void
6137 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6138 {
6139   unsigned i;
6140   comp_cost cost = iv_ca_cost (ivs);
6141
6142   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6143            cost.complexity);
6144   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6145            ivs->cand_cost, ivs->cand_use_cost.cost,
6146            ivs->cand_use_cost.complexity);
6147   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6148
6149   for (i = 0; i < ivs->upto; i++)
6150     {
6151       struct iv_group *group = data->vgroups[i];
6152       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6153       if (cp)
6154         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6155                  group->id, cp->cand->id, cp->cost.cost,
6156                  cp->cost.complexity);
6157       else
6158         fprintf (file, "   group:%d --> ??\n", group->id);
6159     }
6160
6161   const char *pref = "";
6162   fprintf (file, "  invariant variables: ");
6163   for (i = 1; i <= data->max_inv_var_id; i++)
6164     if (ivs->n_inv_var_uses[i])
6165       {
6166         fprintf (file, "%s%d", pref, i);
6167         pref = ", ";
6168       }
6169
6170   pref = "";
6171   fprintf (file, "\n  invariant expressions: ");
6172   for (i = 1; i <= data->max_inv_expr_id; i++)
6173     if (ivs->n_inv_expr_uses[i])
6174       {
6175         fprintf (file, "%s%d", pref, i);
6176         pref = ", ";
6177       }
6178
6179   fprintf (file, "\n\n");
6180 }
6181
6182 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6183    new set, and store differences in DELTA.  Number of induction variables
6184    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6185    the function will try to find a solution with mimimal iv candidates.  */
6186
6187 static comp_cost
6188 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6189               struct iv_cand *cand, struct iv_ca_delta **delta,
6190               unsigned *n_ivs, bool min_ncand)
6191 {
6192   unsigned i;
6193   comp_cost cost;
6194   struct iv_group *group;
6195   struct cost_pair *old_cp, *new_cp;
6196
6197   *delta = NULL;
6198   for (i = 0; i < ivs->upto; i++)
6199     {
6200       group = data->vgroups[i];
6201       old_cp = iv_ca_cand_for_group (ivs, group);
6202
6203       if (old_cp
6204           && old_cp->cand == cand)
6205         continue;
6206
6207       new_cp = get_group_iv_cost (data, group, cand);
6208       if (!new_cp)
6209         continue;
6210
6211       if (!min_ncand)
6212         {
6213           int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6214           /* Skip if new_cp depends on more invariants.  */
6215           if (cmp_invs > 0)
6216             continue;
6217
6218           int cmp_cost = compare_cost_pair (new_cp, old_cp);
6219           /* Skip if new_cp is not cheaper.  */
6220           if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6221             continue;
6222         }
6223
6224       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6225     }
6226
6227   iv_ca_delta_commit (data, ivs, *delta, true);
6228   cost = iv_ca_cost (ivs);
6229   if (n_ivs)
6230     *n_ivs = iv_ca_n_cands (ivs);
6231   iv_ca_delta_commit (data, ivs, *delta, false);
6232
6233   return cost;
6234 }
6235
6236 /* Try narrowing set IVS by removing CAND.  Return the cost of
6237    the new set and store the differences in DELTA.  START is
6238    the candidate with which we start narrowing.  */
6239
6240 static comp_cost
6241 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6242               struct iv_cand *cand, struct iv_cand *start,
6243               struct iv_ca_delta **delta)
6244 {
6245   unsigned i, ci;
6246   struct iv_group *group;
6247   struct cost_pair *old_cp, *new_cp, *cp;
6248   bitmap_iterator bi;
6249   struct iv_cand *cnd;
6250   comp_cost cost, best_cost, acost;
6251
6252   *delta = NULL;
6253   for (i = 0; i < data->vgroups.length (); i++)
6254     {
6255       group = data->vgroups[i];
6256
6257       old_cp = iv_ca_cand_for_group (ivs, group);
6258       if (old_cp->cand != cand)
6259         continue;
6260
6261       best_cost = iv_ca_cost (ivs);
6262       /* Start narrowing with START.  */
6263       new_cp = get_group_iv_cost (data, group, start);
6264
6265       if (data->consider_all_candidates)
6266         {
6267           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6268             {
6269               if (ci == cand->id || (start && ci == start->id))
6270                 continue;
6271
6272               cnd = data->vcands[ci];
6273
6274               cp = get_group_iv_cost (data, group, cnd);
6275               if (!cp)
6276                 continue;
6277
6278               iv_ca_set_cp (data, ivs, group, cp);
6279               acost = iv_ca_cost (ivs);
6280
6281               if (acost < best_cost)
6282                 {
6283                   best_cost = acost;
6284                   new_cp = cp;
6285                 }
6286             }
6287         }
6288       else
6289         {
6290           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6291             {
6292               if (ci == cand->id || (start && ci == start->id))
6293                 continue;
6294
6295               cnd = data->vcands[ci];
6296
6297               cp = get_group_iv_cost (data, group, cnd);
6298               if (!cp)
6299                 continue;
6300
6301               iv_ca_set_cp (data, ivs, group, cp);
6302               acost = iv_ca_cost (ivs);
6303
6304               if (acost < best_cost)
6305                 {
6306                   best_cost = acost;
6307                   new_cp = cp;
6308                 }
6309             }
6310         }
6311       /* Restore to old cp for use.  */
6312       iv_ca_set_cp (data, ivs, group, old_cp);
6313
6314       if (!new_cp)
6315         {
6316           iv_ca_delta_free (delta);
6317           return infinite_cost;
6318         }
6319
6320       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6321     }
6322
6323   iv_ca_delta_commit (data, ivs, *delta, true);
6324   cost = iv_ca_cost (ivs);
6325   iv_ca_delta_commit (data, ivs, *delta, false);
6326
6327   return cost;
6328 }
6329
6330 /* Try optimizing the set of candidates IVS by removing candidates different
6331    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6332    differences in DELTA.  */
6333
6334 static comp_cost
6335 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6336              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6337 {
6338   bitmap_iterator bi;
6339   struct iv_ca_delta *act_delta, *best_delta;
6340   unsigned i;
6341   comp_cost best_cost, acost;
6342   struct iv_cand *cand;
6343
6344   best_delta = NULL;
6345   best_cost = iv_ca_cost (ivs);
6346
6347   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6348     {
6349       cand = data->vcands[i];
6350
6351       if (cand == except_cand)
6352         continue;
6353
6354       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6355
6356       if (acost < best_cost)
6357         {
6358           best_cost = acost;
6359           iv_ca_delta_free (&best_delta);
6360           best_delta = act_delta;
6361         }
6362       else
6363         iv_ca_delta_free (&act_delta);
6364     }
6365
6366   if (!best_delta)
6367     {
6368       *delta = NULL;
6369       return best_cost;
6370     }
6371
6372   /* Recurse to possibly remove other unnecessary ivs.  */
6373   iv_ca_delta_commit (data, ivs, best_delta, true);
6374   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6375   iv_ca_delta_commit (data, ivs, best_delta, false);
6376   *delta = iv_ca_delta_join (best_delta, *delta);
6377   return best_cost;
6378 }
6379
6380 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6381    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6382    the corresponding cost_pair, otherwise just return BEST_CP.  */
6383
6384 static struct cost_pair*
6385 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6386                         unsigned int cand_idx, struct iv_cand *old_cand,
6387                         struct cost_pair *best_cp)
6388 {
6389   struct iv_cand *cand;
6390   struct cost_pair *cp;
6391
6392   gcc_assert (old_cand != NULL && best_cp != NULL);
6393   if (cand_idx == old_cand->id)
6394     return best_cp;
6395
6396   cand = data->vcands[cand_idx];
6397   cp = get_group_iv_cost (data, group, cand);
6398   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6399     return cp;
6400
6401   return best_cp;
6402 }
6403
6404 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6405    which are used by more than one iv uses.  For each of those candidates,
6406    this function tries to represent iv uses under that candidate using
6407    other ones with lower local cost, then tries to prune the new set.
6408    If the new set has lower cost, It returns the new cost after recording
6409    candidate replacement in list DELTA.  */
6410
6411 static comp_cost
6412 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6413                struct iv_ca_delta **delta)
6414 {
6415   bitmap_iterator bi, bj;
6416   unsigned int i, j, k;
6417   struct iv_cand *cand;
6418   comp_cost orig_cost, acost;
6419   struct iv_ca_delta *act_delta, *tmp_delta;
6420   struct cost_pair *old_cp, *best_cp = NULL;
6421
6422   *delta = NULL;
6423   orig_cost = iv_ca_cost (ivs);
6424
6425   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6426     {
6427       if (ivs->n_cand_uses[i] == 1
6428           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6429         continue;
6430
6431       cand = data->vcands[i];
6432
6433       act_delta = NULL;
6434       /*  Represent uses under current candidate using other ones with
6435           lower local cost.  */
6436       for (j = 0; j < ivs->upto; j++)
6437         {
6438           struct iv_group *group = data->vgroups[j];
6439           old_cp = iv_ca_cand_for_group (ivs, group);
6440
6441           if (old_cp->cand != cand)
6442             continue;
6443
6444           best_cp = old_cp;
6445           if (data->consider_all_candidates)
6446             for (k = 0; k < data->vcands.length (); k++)
6447               best_cp = cheaper_cost_with_cand (data, group, k,
6448                                                 old_cp->cand, best_cp);
6449           else
6450             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6451               best_cp = cheaper_cost_with_cand (data, group, k,
6452                                                 old_cp->cand, best_cp);
6453
6454           if (best_cp == old_cp)
6455             continue;
6456
6457           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6458         }
6459       /* No need for further prune.  */
6460       if (!act_delta)
6461         continue;
6462
6463       /* Prune the new candidate set.  */
6464       iv_ca_delta_commit (data, ivs, act_delta, true);
6465       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6466       iv_ca_delta_commit (data, ivs, act_delta, false);
6467       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6468
6469       if (acost < orig_cost)
6470         {
6471           *delta = act_delta;
6472           return acost;
6473         }
6474       else
6475         iv_ca_delta_free (&act_delta);
6476     }
6477
6478   return orig_cost;
6479 }
6480
6481 /* Tries to extend the sets IVS in the best possible way in order to
6482    express the GROUP.  If ORIGINALP is true, prefer candidates from
6483    the original set of IVs, otherwise favor important candidates not
6484    based on any memory object.  */
6485
6486 static bool
6487 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6488                   struct iv_group *group, bool originalp)
6489 {
6490   comp_cost best_cost, act_cost;
6491   unsigned i;
6492   bitmap_iterator bi;
6493   struct iv_cand *cand;
6494   struct iv_ca_delta *best_delta = NULL, *act_delta;
6495   struct cost_pair *cp;
6496
6497   iv_ca_add_group (data, ivs, group);
6498   best_cost = iv_ca_cost (ivs);
6499   cp = iv_ca_cand_for_group (ivs, group);
6500   if (cp)
6501     {
6502       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6503       iv_ca_set_no_cp (data, ivs, group);
6504     }
6505
6506   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6507      first try important candidates not based on any memory object.  Only if
6508      this fails, try the specific ones.  Rationale -- in loops with many
6509      variables the best choice often is to use just one generic biv.  If we
6510      added here many ivs specific to the uses, the optimization algorithm later
6511      would be likely to get stuck in a local minimum, thus causing us to create
6512      too many ivs.  The approach from few ivs to more seems more likely to be
6513      successful -- starting from few ivs, replacing an expensive use by a
6514      specific iv should always be a win.  */
6515   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6516     {
6517       cand = data->vcands[i];
6518
6519       if (originalp && cand->pos !=IP_ORIGINAL)
6520         continue;
6521
6522       if (!originalp && cand->iv->base_object != NULL_TREE)
6523         continue;
6524
6525       if (iv_ca_cand_used_p (ivs, cand))
6526         continue;
6527
6528       cp = get_group_iv_cost (data, group, cand);
6529       if (!cp)
6530         continue;
6531
6532       iv_ca_set_cp (data, ivs, group, cp);
6533       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6534                                true);
6535       iv_ca_set_no_cp (data, ivs, group);
6536       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6537
6538       if (act_cost < best_cost)
6539         {
6540           best_cost = act_cost;
6541
6542           iv_ca_delta_free (&best_delta);
6543           best_delta = act_delta;
6544         }
6545       else
6546         iv_ca_delta_free (&act_delta);
6547     }
6548
6549   if (best_cost.infinite_cost_p ())
6550     {
6551       for (i = 0; i < group->n_map_members; i++)
6552         {
6553           cp = group->cost_map + i;
6554           cand = cp->cand;
6555           if (!cand)
6556             continue;
6557
6558           /* Already tried this.  */
6559           if (cand->important)
6560             {
6561               if (originalp && cand->pos == IP_ORIGINAL)
6562                 continue;
6563               if (!originalp && cand->iv->base_object == NULL_TREE)
6564                 continue;
6565             }
6566
6567           if (iv_ca_cand_used_p (ivs, cand))
6568             continue;
6569
6570           act_delta = NULL;
6571           iv_ca_set_cp (data, ivs, group, cp);
6572           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6573           iv_ca_set_no_cp (data, ivs, group);
6574           act_delta = iv_ca_delta_add (group,
6575                                        iv_ca_cand_for_group (ivs, group),
6576                                        cp, act_delta);
6577
6578           if (act_cost < best_cost)
6579             {
6580               best_cost = act_cost;
6581
6582               if (best_delta)
6583                 iv_ca_delta_free (&best_delta);
6584               best_delta = act_delta;
6585             }
6586           else
6587             iv_ca_delta_free (&act_delta);
6588         }
6589     }
6590
6591   iv_ca_delta_commit (data, ivs, best_delta, true);
6592   iv_ca_delta_free (&best_delta);
6593
6594   return !best_cost.infinite_cost_p ();
6595 }
6596
6597 /* Finds an initial assignment of candidates to uses.  */
6598
6599 static struct iv_ca *
6600 get_initial_solution (struct ivopts_data *data, bool originalp)
6601 {
6602   unsigned i;
6603   struct iv_ca *ivs = iv_ca_new (data);
6604
6605   for (i = 0; i < data->vgroups.length (); i++)
6606     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6607       {
6608         iv_ca_free (&ivs);
6609         return NULL;
6610       }
6611
6612   return ivs;
6613 }
6614
6615 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6616    points to a bool variable, this function tries to break local
6617    optimal fixed-point by replacing candidates in IVS if it's true.  */
6618
6619 static bool
6620 try_improve_iv_set (struct ivopts_data *data,
6621                     struct iv_ca *ivs, bool *try_replace_p)
6622 {
6623   unsigned i, n_ivs;
6624   comp_cost acost, best_cost = iv_ca_cost (ivs);
6625   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6626   struct iv_cand *cand;
6627
6628   /* Try extending the set of induction variables by one.  */
6629   for (i = 0; i < data->vcands.length (); i++)
6630     {
6631       cand = data->vcands[i];
6632
6633       if (iv_ca_cand_used_p (ivs, cand))
6634         continue;
6635
6636       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6637       if (!act_delta)
6638         continue;
6639
6640       /* If we successfully added the candidate and the set is small enough,
6641          try optimizing it by removing other candidates.  */
6642       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6643         {
6644           iv_ca_delta_commit (data, ivs, act_delta, true);
6645           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6646           iv_ca_delta_commit (data, ivs, act_delta, false);
6647           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6648         }
6649
6650       if (acost < best_cost)
6651         {
6652           best_cost = acost;
6653           iv_ca_delta_free (&best_delta);
6654           best_delta = act_delta;
6655         }
6656       else
6657         iv_ca_delta_free (&act_delta);
6658     }
6659
6660   if (!best_delta)
6661     {
6662       /* Try removing the candidates from the set instead.  */
6663       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6664
6665       if (!best_delta && *try_replace_p)
6666         {
6667           *try_replace_p = false;
6668           /* So far candidate selecting algorithm tends to choose fewer IVs
6669              so that it can handle cases in which loops have many variables
6670              but the best choice is often to use only one general biv.  One
6671              weakness is it can't handle opposite cases, in which different
6672              candidates should be chosen with respect to each use.  To solve
6673              the problem, we replace candidates in a manner described by the
6674              comments of iv_ca_replace, thus give general algorithm a chance
6675              to break local optimal fixed-point in these cases.  */
6676           best_cost = iv_ca_replace (data, ivs, &best_delta);
6677         }
6678
6679       if (!best_delta)
6680         return false;
6681     }
6682
6683   iv_ca_delta_commit (data, ivs, best_delta, true);
6684   iv_ca_delta_free (&best_delta);
6685   return best_cost == iv_ca_cost (ivs);
6686 }
6687
6688 /* Attempts to find the optimal set of induction variables.  We do simple
6689    greedy heuristic -- we try to replace at most one candidate in the selected
6690    solution and remove the unused ivs while this improves the cost.  */
6691
6692 static struct iv_ca *
6693 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6694 {
6695   struct iv_ca *set;
6696   bool try_replace_p = true;
6697
6698   /* Get the initial solution.  */
6699   set = get_initial_solution (data, originalp);
6700   if (!set)
6701     {
6702       if (dump_file && (dump_flags & TDF_DETAILS))
6703         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6704       return NULL;
6705     }
6706
6707   if (dump_file && (dump_flags & TDF_DETAILS))
6708     {
6709       fprintf (dump_file, "Initial set of candidates:\n");
6710       iv_ca_dump (data, dump_file, set);
6711     }
6712
6713   while (try_improve_iv_set (data, set, &try_replace_p))
6714     {
6715       if (dump_file && (dump_flags & TDF_DETAILS))
6716         {
6717           fprintf (dump_file, "Improved to:\n");
6718           iv_ca_dump (data, dump_file, set);
6719         }
6720     }
6721
6722   /* If the set has infinite_cost, it can't be optimal.  */
6723   if (iv_ca_cost (set).infinite_cost_p ())
6724     {
6725       if (dump_file && (dump_flags & TDF_DETAILS))
6726         fprintf (dump_file,
6727                  "Overflow to infinite cost in try_improve_iv_set.\n");
6728       iv_ca_free (&set);
6729     }
6730   return set;
6731 }
6732
6733 static struct iv_ca *
6734 find_optimal_iv_set (struct ivopts_data *data)
6735 {
6736   unsigned i;
6737   comp_cost cost, origcost;
6738   struct iv_ca *set, *origset;
6739
6740   /* Determine the cost based on a strategy that starts with original IVs,
6741      and try again using a strategy that prefers candidates not based
6742      on any IVs.  */
6743   origset = find_optimal_iv_set_1 (data, true);
6744   set = find_optimal_iv_set_1 (data, false);
6745
6746   if (!origset && !set)
6747     return NULL;
6748
6749   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6750   cost = set ? iv_ca_cost (set) : infinite_cost;
6751
6752   if (dump_file && (dump_flags & TDF_DETAILS))
6753     {
6754       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6755                origcost.cost, origcost.complexity);
6756       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6757                cost.cost, cost.complexity);
6758     }
6759
6760   /* Choose the one with the best cost.  */
6761   if (origcost <= cost)
6762     {
6763       if (set)
6764         iv_ca_free (&set);
6765       set = origset;
6766     }
6767   else if (origset)
6768     iv_ca_free (&origset);
6769
6770   for (i = 0; i < data->vgroups.length (); i++)
6771     {
6772       struct iv_group *group = data->vgroups[i];
6773       group->selected = iv_ca_cand_for_group (set, group)->cand;
6774     }
6775
6776   return set;
6777 }
6778
6779 /* Creates a new induction variable corresponding to CAND.  */
6780
6781 static void
6782 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6783 {
6784   gimple_stmt_iterator incr_pos;
6785   tree base;
6786   struct iv_use *use;
6787   struct iv_group *group;
6788   bool after = false;
6789
6790   gcc_assert (cand->iv != NULL);
6791
6792   switch (cand->pos)
6793     {
6794     case IP_NORMAL:
6795       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6796       break;
6797
6798     case IP_END:
6799       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6800       after = true;
6801       break;
6802
6803     case IP_AFTER_USE:
6804       after = true;
6805       /* fall through */
6806     case IP_BEFORE_USE:
6807       incr_pos = gsi_for_stmt (cand->incremented_at);
6808       break;
6809
6810     case IP_ORIGINAL:
6811       /* Mark that the iv is preserved.  */
6812       name_info (data, cand->var_before)->preserve_biv = true;
6813       name_info (data, cand->var_after)->preserve_biv = true;
6814
6815       /* Rewrite the increment so that it uses var_before directly.  */
6816       use = find_interesting_uses_op (data, cand->var_after);
6817       group = data->vgroups[use->group_id];
6818       group->selected = cand;
6819       return;
6820     }
6821
6822   gimple_add_tmp_var (cand->var_before);
6823
6824   base = unshare_expr (cand->iv->base);
6825
6826   create_iv (base, unshare_expr (cand->iv->step),
6827              cand->var_before, data->current_loop,
6828              &incr_pos, after, &cand->var_before, &cand->var_after);
6829 }
6830
6831 /* Creates new induction variables described in SET.  */
6832
6833 static void
6834 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6835 {
6836   unsigned i;
6837   struct iv_cand *cand;
6838   bitmap_iterator bi;
6839
6840   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6841     {
6842       cand = data->vcands[i];
6843       create_new_iv (data, cand);
6844     }
6845
6846   if (dump_file && (dump_flags & TDF_DETAILS))
6847     {
6848       fprintf (dump_file, "Selected IV set for loop %d",
6849                data->current_loop->num);
6850       if (data->loop_loc != UNKNOWN_LOCATION)
6851         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6852                  LOCATION_LINE (data->loop_loc));
6853       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6854                avg_loop_niter (data->current_loop));
6855       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6856       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6857         {
6858           cand = data->vcands[i];
6859           dump_cand (dump_file, cand);
6860         }
6861       fprintf (dump_file, "\n");
6862     }
6863 }
6864
6865 /* Rewrites USE (definition of iv used in a nonlinear expression)
6866    using candidate CAND.  */
6867
6868 static void
6869 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6870                             struct iv_use *use, struct iv_cand *cand)
6871 {
6872   gassign *ass;
6873   gimple_stmt_iterator bsi;
6874   tree comp, type = get_use_type (use), tgt;
6875
6876   /* An important special case -- if we are asked to express value of
6877      the original iv by itself, just exit; there is no need to
6878      introduce a new computation (that might also need casting the
6879      variable to unsigned and back).  */
6880   if (cand->pos == IP_ORIGINAL
6881       && cand->incremented_at == use->stmt)
6882     {
6883       tree op = NULL_TREE;
6884       enum tree_code stmt_code;
6885
6886       gcc_assert (is_gimple_assign (use->stmt));
6887       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6888
6889       /* Check whether we may leave the computation unchanged.
6890          This is the case only if it does not rely on other
6891          computations in the loop -- otherwise, the computation
6892          we rely upon may be removed in remove_unused_ivs,
6893          thus leading to ICE.  */
6894       stmt_code = gimple_assign_rhs_code (use->stmt);
6895       if (stmt_code == PLUS_EXPR
6896           || stmt_code == MINUS_EXPR
6897           || stmt_code == POINTER_PLUS_EXPR)
6898         {
6899           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6900             op = gimple_assign_rhs2 (use->stmt);
6901           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6902             op = gimple_assign_rhs1 (use->stmt);
6903         }
6904
6905       if (op != NULL_TREE)
6906         {
6907           if (expr_invariant_in_loop_p (data->current_loop, op))
6908             return;
6909           if (TREE_CODE (op) == SSA_NAME)
6910             {
6911               struct iv *iv = get_iv (data, op);
6912               if (iv != NULL && integer_zerop (iv->step))
6913                 return;
6914             }
6915         }
6916     }
6917
6918   switch (gimple_code (use->stmt))
6919     {
6920     case GIMPLE_PHI:
6921       tgt = PHI_RESULT (use->stmt);
6922
6923       /* If we should keep the biv, do not replace it.  */
6924       if (name_info (data, tgt)->preserve_biv)
6925         return;
6926
6927       bsi = gsi_after_labels (gimple_bb (use->stmt));
6928       break;
6929
6930     case GIMPLE_ASSIGN:
6931       tgt = gimple_assign_lhs (use->stmt);
6932       bsi = gsi_for_stmt (use->stmt);
6933       break;
6934
6935     default:
6936       gcc_unreachable ();
6937     }
6938
6939   aff_tree aff_inv, aff_var;
6940   if (!get_computation_aff_1 (data->current_loop, use->stmt,
6941                               use, cand, &aff_inv, &aff_var))
6942     gcc_unreachable ();
6943
6944   unshare_aff_combination (&aff_inv);
6945   unshare_aff_combination (&aff_var);
6946   /* Prefer CSE opportunity than loop invariant by adding offset at last
6947      so that iv_uses have different offsets can be CSEed.  */
6948   poly_widest_int offset = aff_inv.offset;
6949   aff_inv.offset = 0;
6950
6951   gimple_seq stmt_list = NULL, seq = NULL;
6952   tree comp_op1 = aff_combination_to_tree (&aff_inv);
6953   tree comp_op2 = aff_combination_to_tree (&aff_var);
6954   gcc_assert (comp_op1 && comp_op2);
6955
6956   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6957   gimple_seq_add_seq (&stmt_list, seq);
6958   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6959   gimple_seq_add_seq (&stmt_list, seq);
6960
6961   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6962     std::swap (comp_op1, comp_op2);
6963
6964   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6965     {
6966       comp = fold_build_pointer_plus (comp_op1,
6967                                       fold_convert (sizetype, comp_op2));
6968       comp = fold_build_pointer_plus (comp,
6969                                       wide_int_to_tree (sizetype, offset));
6970     }
6971   else
6972     {
6973       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6974                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
6975       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6976                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6977     }
6978
6979   comp = fold_convert (type, comp);
6980   if (!valid_gimple_rhs_p (comp)
6981       || (gimple_code (use->stmt) != GIMPLE_PHI
6982           /* We can't allow re-allocating the stmt as it might be pointed
6983              to still.  */
6984           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6985               >= gimple_num_ops (gsi_stmt (bsi)))))
6986     {
6987       comp = force_gimple_operand (comp, &seq, true, NULL);
6988       gimple_seq_add_seq (&stmt_list, seq);
6989       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6990         {
6991           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6992           /* As this isn't a plain copy we have to reset alignment
6993              information.  */
6994           if (SSA_NAME_PTR_INFO (comp))
6995             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6996         }
6997     }
6998
6999   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7000   if (gimple_code (use->stmt) == GIMPLE_PHI)
7001     {
7002       ass = gimple_build_assign (tgt, comp);
7003       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7004
7005       bsi = gsi_for_stmt (use->stmt);
7006       remove_phi_node (&bsi, false);
7007     }
7008   else
7009     {
7010       gimple_assign_set_rhs_from_tree (&bsi, comp);
7011       use->stmt = gsi_stmt (bsi);
7012     }
7013 }
7014
7015 /* Performs a peephole optimization to reorder the iv update statement with
7016    a mem ref to enable instruction combining in later phases. The mem ref uses
7017    the iv value before the update, so the reordering transformation requires
7018    adjustment of the offset. CAND is the selected IV_CAND.
7019
7020    Example:
7021
7022    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7023    iv2 = iv1 + 1;
7024
7025    if (t < val)      (1)
7026      goto L;
7027    goto Head;
7028
7029
7030    directly propagating t over to (1) will introduce overlapping live range
7031    thus increase register pressure. This peephole transform it into:
7032
7033
7034    iv2 = iv1 + 1;
7035    t = MEM_REF (base, iv2, 8, 8);
7036    if (t < val)
7037      goto L;
7038    goto Head;
7039 */
7040
7041 static void
7042 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7043 {
7044   tree var_after;
7045   gimple *iv_update, *stmt;
7046   basic_block bb;
7047   gimple_stmt_iterator gsi, gsi_iv;
7048
7049   if (cand->pos != IP_NORMAL)
7050     return;
7051
7052   var_after = cand->var_after;
7053   iv_update = SSA_NAME_DEF_STMT (var_after);
7054
7055   bb = gimple_bb (iv_update);
7056   gsi = gsi_last_nondebug_bb (bb);
7057   stmt = gsi_stmt (gsi);
7058
7059   /* Only handle conditional statement for now.  */
7060   if (gimple_code (stmt) != GIMPLE_COND)
7061     return;
7062
7063   gsi_prev_nondebug (&gsi);
7064   stmt = gsi_stmt (gsi);
7065   if (stmt != iv_update)
7066     return;
7067
7068   gsi_prev_nondebug (&gsi);
7069   if (gsi_end_p (gsi))
7070     return;
7071
7072   stmt = gsi_stmt (gsi);
7073   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7074     return;
7075
7076   if (stmt != use->stmt)
7077     return;
7078
7079   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7080     return;
7081
7082   if (dump_file && (dump_flags & TDF_DETAILS))
7083     {
7084       fprintf (dump_file, "Reordering \n");
7085       print_gimple_stmt (dump_file, iv_update, 0);
7086       print_gimple_stmt (dump_file, use->stmt, 0);
7087       fprintf (dump_file, "\n");
7088     }
7089
7090   gsi = gsi_for_stmt (use->stmt);
7091   gsi_iv = gsi_for_stmt (iv_update);
7092   gsi_move_before (&gsi_iv, &gsi);
7093
7094   cand->pos = IP_BEFORE_USE;
7095   cand->incremented_at = use->stmt;
7096 }
7097
7098 /* Return the alias pointer type that should be used for a MEM_REF
7099    associated with USE, which has type USE_PTR_ADDRESS.  */
7100
7101 static tree
7102 get_alias_ptr_type_for_ptr_address (iv_use *use)
7103 {
7104   gcall *call = as_a <gcall *> (use->stmt);
7105   switch (gimple_call_internal_fn (call))
7106     {
7107     case IFN_MASK_LOAD:
7108     case IFN_MASK_STORE:
7109       /* The second argument contains the correct alias type.  */
7110       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7111       return TREE_TYPE (gimple_call_arg (call, 1));
7112
7113     default:
7114       gcc_unreachable ();
7115     }
7116 }
7117
7118
7119 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7120
7121 static void
7122 rewrite_use_address (struct ivopts_data *data,
7123                      struct iv_use *use, struct iv_cand *cand)
7124 {
7125   aff_tree aff;
7126   bool ok;
7127
7128   adjust_iv_update_pos (cand, use);
7129   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7130   gcc_assert (ok);
7131   unshare_aff_combination (&aff);
7132
7133   /* To avoid undefined overflow problems, all IV candidates use unsigned
7134      integer types.  The drawback is that this makes it impossible for
7135      create_mem_ref to distinguish an IV that is based on a memory object
7136      from one that represents simply an offset.
7137
7138      To work around this problem, we pass a hint to create_mem_ref that
7139      indicates which variable (if any) in aff is an IV based on a memory
7140      object.  Note that we only consider the candidate.  If this is not
7141      based on an object, the base of the reference is in some subexpression
7142      of the use -- but these will use pointer types, so they are recognized
7143      by the create_mem_ref heuristics anyway.  */
7144   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7145   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7146   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7147   tree type = use->mem_type;
7148   tree alias_ptr_type;
7149   if (use->type == USE_PTR_ADDRESS)
7150     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7151   else
7152     {
7153       gcc_assert (type == TREE_TYPE (*use->op_p));
7154       unsigned int align = get_object_alignment (*use->op_p);
7155       if (align != TYPE_ALIGN (type))
7156         type = build_aligned_type (type, align);
7157       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7158     }
7159   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7160                              iv, base_hint, data->speed);
7161
7162   if (use->type == USE_PTR_ADDRESS)
7163     {
7164       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7165       ref = fold_convert (get_use_type (use), ref);
7166       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7167                                       true, GSI_SAME_STMT);
7168     }
7169   else
7170     copy_ref_info (ref, *use->op_p);
7171
7172   *use->op_p = ref;
7173 }
7174
7175 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7176    candidate CAND.  */
7177
7178 static void
7179 rewrite_use_compare (struct ivopts_data *data,
7180                      struct iv_use *use, struct iv_cand *cand)
7181 {
7182   tree comp, op, bound;
7183   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7184   enum tree_code compare;
7185   struct iv_group *group = data->vgroups[use->group_id];
7186   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7187
7188   bound = cp->value;
7189   if (bound)
7190     {
7191       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7192       tree var_type = TREE_TYPE (var);
7193       gimple_seq stmts;
7194
7195       if (dump_file && (dump_flags & TDF_DETAILS))
7196         {
7197           fprintf (dump_file, "Replacing exit test: ");
7198           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7199         }
7200       compare = cp->comp;
7201       bound = unshare_expr (fold_convert (var_type, bound));
7202       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7203       if (stmts)
7204         gsi_insert_seq_on_edge_immediate (
7205                 loop_preheader_edge (data->current_loop),
7206                 stmts);
7207
7208       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7209       gimple_cond_set_lhs (cond_stmt, var);
7210       gimple_cond_set_code (cond_stmt, compare);
7211       gimple_cond_set_rhs (cond_stmt, op);
7212       return;
7213     }
7214
7215   /* The induction variable elimination failed; just express the original
7216      giv.  */
7217   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7218   gcc_assert (comp != NULL_TREE);
7219   gcc_assert (use->op_p != NULL);
7220   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7221                                          SSA_NAME_VAR (*use->op_p),
7222                                          true, GSI_SAME_STMT);
7223 }
7224
7225 /* Rewrite the groups using the selected induction variables.  */
7226
7227 static void
7228 rewrite_groups (struct ivopts_data *data)
7229 {
7230   unsigned i, j;
7231
7232   for (i = 0; i < data->vgroups.length (); i++)
7233     {
7234       struct iv_group *group = data->vgroups[i];
7235       struct iv_cand *cand = group->selected;
7236
7237       gcc_assert (cand);
7238
7239       if (group->type == USE_NONLINEAR_EXPR)
7240         {
7241           for (j = 0; j < group->vuses.length (); j++)
7242             {
7243               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7244               update_stmt (group->vuses[j]->stmt);
7245             }
7246         }
7247       else if (address_p (group->type))
7248         {
7249           for (j = 0; j < group->vuses.length (); j++)
7250             {
7251               rewrite_use_address (data, group->vuses[j], cand);
7252               update_stmt (group->vuses[j]->stmt);
7253             }
7254         }
7255       else
7256         {
7257           gcc_assert (group->type == USE_COMPARE);
7258
7259           for (j = 0; j < group->vuses.length (); j++)
7260             {
7261               rewrite_use_compare (data, group->vuses[j], cand);
7262               update_stmt (group->vuses[j]->stmt);
7263             }
7264         }
7265     }
7266 }
7267
7268 /* Removes the ivs that are not used after rewriting.  */
7269
7270 static void
7271 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7272 {
7273   unsigned j;
7274   bitmap_iterator bi;
7275
7276   /* Figure out an order in which to release SSA DEFs so that we don't
7277      release something that we'd have to propagate into a debug stmt
7278      afterwards.  */
7279   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7280     {
7281       struct version_info *info;
7282
7283       info = ver_info (data, j);
7284       if (info->iv
7285           && !integer_zerop (info->iv->step)
7286           && !info->inv_id
7287           && !info->iv->nonlin_use
7288           && !info->preserve_biv)
7289         {
7290           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7291
7292           tree def = info->iv->ssa_name;
7293
7294           if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7295             {
7296               imm_use_iterator imm_iter;
7297               use_operand_p use_p;
7298               gimple *stmt;
7299               int count = 0;
7300
7301               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7302                 {
7303                   if (!gimple_debug_bind_p (stmt))
7304                     continue;
7305
7306                   /* We just want to determine whether to do nothing
7307                      (count == 0), to substitute the computed
7308                      expression into a single use of the SSA DEF by
7309                      itself (count == 1), or to use a debug temp
7310                      because the SSA DEF is used multiple times or as
7311                      part of a larger expression (count > 1). */
7312                   count++;
7313                   if (gimple_debug_bind_get_value (stmt) != def)
7314                     count++;
7315
7316                   if (count > 1)
7317                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7318                 }
7319
7320               if (!count)
7321                 continue;
7322
7323               struct iv_use dummy_use;
7324               struct iv_cand *best_cand = NULL, *cand;
7325               unsigned i, best_pref = 0, cand_pref;
7326
7327               memset (&dummy_use, 0, sizeof (dummy_use));
7328               dummy_use.iv = info->iv;
7329               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7330                 {
7331                   cand = data->vgroups[i]->selected;
7332                   if (cand == best_cand)
7333                     continue;
7334                   cand_pref = operand_equal_p (cand->iv->step,
7335                                                info->iv->step, 0)
7336                     ? 4 : 0;
7337                   cand_pref
7338                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7339                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7340                     ? 2 : 0;
7341                   cand_pref
7342                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7343                     ? 1 : 0;
7344                   if (best_cand == NULL || best_pref < cand_pref)
7345                     {
7346                       best_cand = cand;
7347                       best_pref = cand_pref;
7348                     }
7349                 }
7350
7351               if (!best_cand)
7352                 continue;
7353
7354               tree comp = get_computation_at (data->current_loop,
7355                                               SSA_NAME_DEF_STMT (def),
7356                                               &dummy_use, best_cand);
7357               if (!comp)
7358                 continue;
7359
7360               if (count > 1)
7361                 {
7362                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7363                   DECL_ARTIFICIAL (vexpr) = 1;
7364                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7365                   if (SSA_NAME_VAR (def))
7366                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7367                   else
7368                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7369                   gdebug *def_temp
7370                     = gimple_build_debug_bind (vexpr, comp, NULL);
7371                   gimple_stmt_iterator gsi;
7372
7373                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7374                     gsi = gsi_after_labels (gimple_bb
7375                                             (SSA_NAME_DEF_STMT (def)));
7376                   else
7377                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7378
7379                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7380                   comp = vexpr;
7381                 }
7382
7383               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7384                 {
7385                   if (!gimple_debug_bind_p (stmt))
7386                     continue;
7387
7388                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7389                     SET_USE (use_p, comp);
7390
7391                   update_stmt (stmt);
7392                 }
7393             }
7394         }
7395     }
7396 }
7397
7398 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7399    for hash_map::traverse.  */
7400
7401 bool
7402 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7403 {
7404   free (value);
7405   return true;
7406 }
7407
7408 /* Frees data allocated by the optimization of a single loop.  */
7409
7410 static void
7411 free_loop_data (struct ivopts_data *data)
7412 {
7413   unsigned i, j;
7414   bitmap_iterator bi;
7415   tree obj;
7416
7417   if (data->niters)
7418     {
7419       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7420       delete data->niters;
7421       data->niters = NULL;
7422     }
7423
7424   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7425     {
7426       struct version_info *info;
7427
7428       info = ver_info (data, i);
7429       info->iv = NULL;
7430       info->has_nonlin_use = false;
7431       info->preserve_biv = false;
7432       info->inv_id = 0;
7433     }
7434   bitmap_clear (data->relevant);
7435   bitmap_clear (data->important_candidates);
7436
7437   for (i = 0; i < data->vgroups.length (); i++)
7438     {
7439       struct iv_group *group = data->vgroups[i];
7440
7441       for (j = 0; j < group->vuses.length (); j++)
7442         free (group->vuses[j]);
7443       group->vuses.release ();
7444
7445       BITMAP_FREE (group->related_cands);
7446       for (j = 0; j < group->n_map_members; j++)
7447         {
7448           if (group->cost_map[j].inv_vars)
7449             BITMAP_FREE (group->cost_map[j].inv_vars);
7450           if (group->cost_map[j].inv_exprs)
7451             BITMAP_FREE (group->cost_map[j].inv_exprs);
7452         }
7453
7454       free (group->cost_map);
7455       free (group);
7456     }
7457   data->vgroups.truncate (0);
7458
7459   for (i = 0; i < data->vcands.length (); i++)
7460     {
7461       struct iv_cand *cand = data->vcands[i];
7462
7463       if (cand->inv_vars)
7464         BITMAP_FREE (cand->inv_vars);
7465       if (cand->inv_exprs)
7466         BITMAP_FREE (cand->inv_exprs);
7467       free (cand);
7468     }
7469   data->vcands.truncate (0);
7470
7471   if (data->version_info_size < num_ssa_names)
7472     {
7473       data->version_info_size = 2 * num_ssa_names;
7474       free (data->version_info);
7475       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7476     }
7477
7478   data->max_inv_var_id = 0;
7479   data->max_inv_expr_id = 0;
7480
7481   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7482     SET_DECL_RTL (obj, NULL_RTX);
7483
7484   decl_rtl_to_reset.truncate (0);
7485
7486   data->inv_expr_tab->empty ();
7487
7488   data->iv_common_cand_tab->empty ();
7489   data->iv_common_cands.truncate (0);
7490 }
7491
7492 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7493    loop tree.  */
7494
7495 static void
7496 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7497 {
7498   free_loop_data (data);
7499   free (data->version_info);
7500   BITMAP_FREE (data->relevant);
7501   BITMAP_FREE (data->important_candidates);
7502
7503   decl_rtl_to_reset.release ();
7504   data->vgroups.release ();
7505   data->vcands.release ();
7506   delete data->inv_expr_tab;
7507   data->inv_expr_tab = NULL;
7508   free_affine_expand_cache (&data->name_expansion_cache);
7509   delete data->iv_common_cand_tab;
7510   data->iv_common_cand_tab = NULL;
7511   data->iv_common_cands.release ();
7512   obstack_free (&data->iv_obstack, NULL);
7513 }
7514
7515 /* Returns true if the loop body BODY includes any function calls.  */
7516
7517 static bool
7518 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7519 {
7520   gimple_stmt_iterator gsi;
7521   unsigned i;
7522
7523   for (i = 0; i < num_nodes; i++)
7524     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7525       {
7526         gimple *stmt = gsi_stmt (gsi);
7527         if (is_gimple_call (stmt)
7528             && !gimple_call_internal_p (stmt)
7529             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7530           return true;
7531       }
7532   return false;
7533 }
7534
7535 /* Determine cost scaling factor for basic blocks in loop.  */
7536 #define COST_SCALING_FACTOR_BOUND (20)
7537
7538 static void
7539 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
7540 {
7541   int lfreq = data->current_loop->header->count.to_frequency (cfun);
7542   if (!data->speed || lfreq <= 0)
7543     return;
7544
7545   int max_freq = lfreq;
7546   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7547     {
7548       body[i]->aux = (void *)(intptr_t) 1;
7549       if (max_freq < body[i]->count.to_frequency (cfun))
7550         max_freq = body[i]->count.to_frequency (cfun);
7551     }
7552   if (max_freq > lfreq)
7553     {
7554       int divisor, factor;
7555       /* Check if scaling factor itself needs to be scaled by the bound.  This
7556          is to avoid overflow when scaling cost according to profile info.  */
7557       if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
7558         {
7559           divisor = max_freq;
7560           factor = COST_SCALING_FACTOR_BOUND;
7561         }
7562       else
7563         {
7564           divisor = lfreq;
7565           factor = 1;
7566         }
7567       for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7568         {
7569           int bfreq = body[i]->count.to_frequency (cfun);
7570           if (bfreq <= lfreq)
7571             continue;
7572
7573           body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
7574         }
7575     }
7576 }
7577
7578 /* Optimizes the LOOP.  Returns true if anything changed.  */
7579
7580 static bool
7581 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
7582                            bitmap toremove)
7583 {
7584   bool changed = false;
7585   struct iv_ca *iv_ca;
7586   edge exit = single_dom_exit (loop);
7587   basic_block *body;
7588
7589   gcc_assert (!data->niters);
7590   data->current_loop = loop;
7591   data->loop_loc = find_loop_location (loop).get_location_t ();
7592   data->speed = optimize_loop_for_speed_p (loop);
7593
7594   if (dump_file && (dump_flags & TDF_DETAILS))
7595     {
7596       fprintf (dump_file, "Processing loop %d", loop->num);
7597       if (data->loop_loc != UNKNOWN_LOCATION)
7598         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7599                  LOCATION_LINE (data->loop_loc));
7600       fprintf (dump_file, "\n");
7601
7602       if (exit)
7603         {
7604           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7605                    exit->src->index, exit->dest->index);
7606           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7607           fprintf (dump_file, "\n");
7608         }
7609
7610       fprintf (dump_file, "\n");
7611     }
7612
7613   body = get_loop_body (loop);
7614   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7615   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7616
7617   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7618
7619   /* For each ssa name determines whether it behaves as an induction variable
7620      in some loop.  */
7621   if (!find_induction_variables (data))
7622     goto finish;
7623
7624   /* Finds interesting uses (item 1).  */
7625   find_interesting_uses (data);
7626   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7627     goto finish;
7628
7629   /* Determine cost scaling factor for basic blocks in loop.  */
7630   determine_scaling_factor (data, body);
7631
7632   /* Finds candidates for the induction variables (item 2).  */
7633   find_iv_candidates (data);
7634
7635   /* Calculates the costs (item 3, part 1).  */
7636   determine_iv_costs (data);
7637   determine_group_iv_costs (data);
7638   determine_set_costs (data);
7639
7640   /* Find the optimal set of induction variables (item 3, part 2).  */
7641   iv_ca = find_optimal_iv_set (data);
7642   /* Cleanup basic block aux field.  */
7643   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7644     body[i]->aux = NULL;
7645   if (!iv_ca)
7646     goto finish;
7647   changed = true;
7648
7649   /* Create the new induction variables (item 4, part 1).  */
7650   create_new_ivs (data, iv_ca);
7651   iv_ca_free (&iv_ca);
7652
7653   /* Rewrite the uses (item 4, part 2).  */
7654   rewrite_groups (data);
7655
7656   /* Remove the ivs that are unused after rewriting.  */
7657   remove_unused_ivs (data, toremove);
7658
7659 finish:
7660   free (body);
7661   free_loop_data (data);
7662
7663   return changed;
7664 }
7665
7666 /* Main entry point.  Optimizes induction variables in loops.  */
7667
7668 void
7669 tree_ssa_iv_optimize (void)
7670 {
7671   struct loop *loop;
7672   struct ivopts_data data;
7673   auto_bitmap toremove;
7674
7675   tree_ssa_iv_optimize_init (&data);
7676
7677   /* Optimize the loops starting with the innermost ones.  */
7678   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7679     {
7680       if (dump_file && (dump_flags & TDF_DETAILS))
7681         flow_loop_dump (loop, dump_file, NULL, 1);
7682
7683       tree_ssa_iv_optimize_loop (&data, loop, toremove);
7684     }
7685
7686   /* Remove eliminated IV defs.  */
7687   release_defs_bitset (toremove);
7688
7689   /* We have changed the structure of induction variables; it might happen
7690      that definitions in the scev database refer to some of them that were
7691      eliminated.  */
7692   scev_reset_htab ();
7693   /* Likewise niter and control-IV information.  */
7694   free_numbers_of_iterations_estimates (cfun);
7695
7696   tree_ssa_iv_optimize_finalize (&data);
7697 }
7698
7699 #include "gt-tree-ssa-loop-ivopts.h"