gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.  */
  68
  69 #include "config.h"
  70 #include "system.h"
  71 #include "coretypes.h"
  72 #include "backend.h"
  73 #include "rtl.h"
  74 #include "tree.h"
  75 #include "gimple.h"
  76 #include "cfghooks.h"
  77 #include "tree-pass.h"
  78 #include "memmodel.h"
  79 #include "tm_p.h"
  80 #include "ssa.h"
  81 #include "expmed.h"
  82 #include "insn-config.h"
  83 #include "emit-rtl.h"
  84 #include "recog.h"
  85 #include "cgraph.h"
  86 #include "gimple-pretty-print.h"
  87 #include "alias.h"
  88 #include "fold-const.h"
  89 #include "stor-layout.h"
  90 #include "tree-eh.h"
  91 #include "gimplify.h"
  92 #include "gimple-iterator.h"
  93 #include "gimplify-me.h"
  94 #include "tree-cfg.h"
  95 #include "tree-ssa-loop-ivopts.h"
  96 #include "tree-ssa-loop-manip.h"
  97 #include "tree-ssa-loop-niter.h"
  98 #include "tree-ssa-loop.h"
  99 #include "explow.h"
 100 #include "expr.h"
 101 #include "tree-dfa.h"
 102 #include "tree-ssa.h"
 103 #include "cfgloop.h"
 104 #include "tree-scalar-evolution.h"
 105 #include "params.h"
 106 #include "tree-affine.h"
 107 #include "tree-ssa-propagate.h"
 108 #include "tree-ssa-address.h"
 109 #include "builtins.h"
 110 #include "tree-vectorizer.h"
 111
 112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 113    cost of different addressing modes.  This should be moved to a TBD
 114    interface between the GIMPLE and RTL worlds.  */
 115
 116 /* The infinite cost.  */
 117 #define INFTY 10000000
 118
 119 /* Returns the expected number of loop iterations for LOOP.
 120    The average trip count is computed from profile data if it
 121    exists. */
 122
 123 static inline HOST_WIDE_INT
 124 avg_loop_niter (struct loop *loop)
 125 {
 126   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 127   if (niter == -1)
 128     {
 129       niter = likely_max_stmt_executions_int (loop);
 130
 131       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
 132         return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 133     }
 134
 135   return niter;
 136 }
 137
 138 struct iv_use;
 139
 140 /* Representation of the induction variable.  */
 141 struct iv
 142 {
 143   tree base;            /* Initial value of the iv.  */
 144   tree base_object;     /* A memory object to that the induction variable points.  */
 145   tree step;            /* Step of the iv (constant only).  */
 146   tree ssa_name;        /* The ssa name with the value.  */
 147   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 148   bool biv_p;           /* Is it a biv?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150   bool have_address_use;/* For biv, indicate if it's used in any address
 151                            type use.  */
 152 };
 153
 154 /* Per-ssa version information (induction variable descriptions, etc.).  */
 155 struct version_info
 156 {
 157   tree name;            /* The ssa name.  */
 158   struct iv *iv;        /* Induction variable description.  */
 159   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 160                            an expression that is not an induction variable.  */
 161   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 162   unsigned inv_id;      /* Id of an invariant.  */
 163 };
 164
 165 /* Types of uses.  */
 166 enum use_type
 167 {
 168   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 169   USE_ADDRESS,          /* Use in an address.  */
 170   USE_COMPARE           /* Use is a compare.  */
 171 };
 172
 173 /* Cost of a computation.  */
 174 struct comp_cost
 175 {
 176   comp_cost (): cost (0), complexity (0), scratch (0)
 177   {}
 178
 179   comp_cost (int cost, unsigned complexity, int scratch = 0)
 180     : cost (cost), complexity (complexity), scratch (scratch)
 181   {}
 182
 183   /* Returns true if COST is infinite.  */
 184   bool infinite_cost_p ();
 185
 186   /* Adds costs COST1 and COST2.  */
 187   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 188
 189   /* Adds COST to the comp_cost.  */
 190   comp_cost operator+= (comp_cost cost);
 191
 192   /* Adds constant C to this comp_cost.  */
 193   comp_cost operator+= (HOST_WIDE_INT c);
 194
 195   /* Subtracts constant C to this comp_cost.  */
 196   comp_cost operator-= (HOST_WIDE_INT c);
 197
 198   /* Divide the comp_cost by constant C.  */
 199   comp_cost operator/= (HOST_WIDE_INT c);
 200
 201   /* Multiply the comp_cost by constant C.  */
 202   comp_cost operator*= (HOST_WIDE_INT c);
 203
 204   /* Subtracts costs COST1 and COST2.  */
 205   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 206
 207   /* Subtracts COST from this comp_cost.  */
 208   comp_cost operator-= (comp_cost cost);
 209
 210   /* Returns true if COST1 is smaller than COST2.  */
 211   friend bool operator< (comp_cost cost1, comp_cost cost2);
 212
 213   /* Returns true if COST1 and COST2 are equal.  */
 214   friend bool operator== (comp_cost cost1, comp_cost cost2);
 215
 216   /* Returns true if COST1 is smaller or equal than COST2.  */
 217   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 218
 219   int cost;             /* The runtime cost.  */
 220   unsigned complexity;  /* The estimate of the complexity of the code for
 221                            the computation (in no concrete units --
 222                            complexity field should be larger for more
 223                            complex expressions and addressing modes).  */
 224   int scratch;          /* Scratch used during cost computation.  */
 225 };
 226
 227 static const comp_cost no_cost;
 228 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
 229
 230 bool
 231 comp_cost::infinite_cost_p ()
 232 {
 233   return cost == INFTY;
 234 }
 235
 236 comp_cost
 237 operator+ (comp_cost cost1, comp_cost cost2)
 238 {
 239   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 240     return infinite_cost;
 241
 242   cost1.cost += cost2.cost;
 243   cost1.complexity += cost2.complexity;
 244
 245   return cost1;
 246 }
 247
 248 comp_cost
 249 operator- (comp_cost cost1, comp_cost cost2)
 250 {
 251   if (cost1.infinite_cost_p ())
 252     return infinite_cost;
 253
 254   gcc_assert (!cost2.infinite_cost_p ());
 255
 256   cost1.cost -= cost2.cost;
 257   cost1.complexity -= cost2.complexity;
 258
 259   return cost1;
 260 }
 261
 262 comp_cost
 263 comp_cost::operator+= (comp_cost cost)
 264 {
 265   *this = *this + cost;
 266   return *this;
 267 }
 268
 269 comp_cost
 270 comp_cost::operator+= (HOST_WIDE_INT c)
 271 {
 272   if (infinite_cost_p ())
 273     return *this;
 274
 275   this->cost += c;
 276
 277   return *this;
 278 }
 279
 280 comp_cost
 281 comp_cost::operator-= (HOST_WIDE_INT c)
 282 {
 283   if (infinite_cost_p ())
 284     return *this;
 285
 286   this->cost -= c;
 287
 288   return *this;
 289 }
 290
 291 comp_cost
 292 comp_cost::operator/= (HOST_WIDE_INT c)
 293 {
 294   if (infinite_cost_p ())
 295     return *this;
 296
 297   this->cost /= c;
 298
 299   return *this;
 300 }
 301
 302 comp_cost
 303 comp_cost::operator*= (HOST_WIDE_INT c)
 304 {
 305   if (infinite_cost_p ())
 306     return *this;
 307
 308   this->cost *= c;
 309
 310   return *this;
 311 }
 312
 313 comp_cost
 314 comp_cost::operator-= (comp_cost cost)
 315 {
 316   *this = *this - cost;
 317   return *this;
 318 }
 319
 320 bool
 321 operator< (comp_cost cost1, comp_cost cost2)
 322 {
 323   if (cost1.cost == cost2.cost)
 324     return cost1.complexity < cost2.complexity;
 325
 326   return cost1.cost < cost2.cost;
 327 }
 328
 329 bool
 330 operator== (comp_cost cost1, comp_cost cost2)
 331 {
 332   return cost1.cost == cost2.cost
 333     && cost1.complexity == cost2.complexity;
 334 }
 335
 336 bool
 337 operator<= (comp_cost cost1, comp_cost cost2)
 338 {
 339   return cost1 < cost2 || cost1 == cost2;
 340 }
 341
 342 struct iv_inv_expr_ent;
 343
 344 /* The candidate - cost pair.  */
 345 struct cost_pair
 346 {
 347   struct iv_cand *cand; /* The candidate.  */
 348   comp_cost cost;       /* The cost.  */
 349   enum tree_code comp;  /* For iv elimination, the comparison.  */
 350   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 351                            preserved when representing iv_use with iv_cand.  */
 352   bitmap inv_exprs;     /* The list of newly created invariant expressions
 353                            when representing iv_use with iv_cand.  */
 354   tree value;           /* For final value elimination, the expression for
 355                            the final value of the iv.  For iv elimination,
 356                            the new bound to compare with.  */
 357 };
 358
 359 /* Use.  */
 360 struct iv_use
 361 {
 362   unsigned id;          /* The id of the use.  */
 363   unsigned group_id;    /* The group id the use belongs to.  */
 364   enum use_type type;   /* Type of the use.  */
 365   struct iv *iv;        /* The induction variable it is based on.  */
 366   gimple *stmt;         /* Statement in that it occurs.  */
 367   tree *op_p;           /* The place where it occurs.  */
 368
 369   tree addr_base;       /* Base address with const offset stripped.  */
 370   unsigned HOST_WIDE_INT addr_offset;
 371                         /* Const offset stripped from base address.  */
 372 };
 373
 374 /* Group of uses.  */
 375 struct iv_group
 376 {
 377   /* The id of the group.  */
 378   unsigned id;
 379   /* Uses of the group are of the same type.  */
 380   enum use_type type;
 381   /* The set of "related" IV candidates, plus the important ones.  */
 382   bitmap related_cands;
 383   /* Number of IV candidates in the cost_map.  */
 384   unsigned n_map_members;
 385   /* The costs wrto the iv candidates.  */
 386   struct cost_pair *cost_map;
 387   /* The selected candidate for the group.  */
 388   struct iv_cand *selected;
 389   /* Uses in the group.  */
 390   vec<struct iv_use *> vuses;
 391 };
 392
 393 /* The position where the iv is computed.  */
 394 enum iv_position
 395 {
 396   IP_NORMAL,            /* At the end, just before the exit condition.  */
 397   IP_END,               /* At the end of the latch block.  */
 398   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 399   IP_AFTER_USE,         /* Immediately after a specific use.  */
 400   IP_ORIGINAL           /* The original biv.  */
 401 };
 402
 403 /* The induction variable candidate.  */
 404 struct iv_cand
 405 {
 406   unsigned id;          /* The number of the candidate.  */
 407   bool important;       /* Whether this is an "important" candidate, i.e. such
 408                            that it should be considered by all uses.  */
 409   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 410   gimple *incremented_at;/* For original biv, the statement where it is
 411                            incremented.  */
 412   tree var_before;      /* The variable used for it before increment.  */
 413   tree var_after;       /* The variable used for it after increment.  */
 414   struct iv *iv;        /* The value of the candidate.  NULL for
 415                            "pseudocandidate" used to indicate the possibility
 416                            to replace the final value of an iv by direct
 417                            computation of the value.  */
 418   unsigned cost;        /* Cost of the candidate.  */
 419   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 420   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 421                               where it is incremented.  */
 422   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 423                            iv_cand.  */
 424   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 425                            hanlde it as a new invariant expression which will
 426                            be hoisted out of loop.  */
 427   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 428                            smaller type.  */
 429 };
 430
 431 /* Hashtable entry for common candidate derived from iv uses.  */
 432 struct iv_common_cand
 433 {
 434   tree base;
 435   tree step;
 436   /* IV uses from which this common candidate is derived.  */
 437   auto_vec<struct iv_use *> uses;
 438   hashval_t hash;
 439 };
 440
 441 /* Hashtable helpers.  */
 442
 443 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 444 {
 445   static inline hashval_t hash (const iv_common_cand *);
 446   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 447 };
 448
 449 /* Hash function for possible common candidates.  */
 450
 451 inline hashval_t
 452 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 453 {
 454   return ccand->hash;
 455 }
 456
 457 /* Hash table equality function for common candidates.  */
 458
 459 inline bool
 460 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 461                               const iv_common_cand *ccand2)
 462 {
 463   return (ccand1->hash == ccand2->hash
 464           && operand_equal_p (ccand1->base, ccand2->base, 0)
 465           && operand_equal_p (ccand1->step, ccand2->step, 0)
 466           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 467               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 468 }
 469
 470 /* Loop invariant expression hashtable entry.  */
 471
 472 struct iv_inv_expr_ent
 473 {
 474   /* Tree expression of the entry.  */
 475   tree expr;
 476   /* Unique indentifier.  */
 477   int id;
 478   /* Hash value.  */
 479   hashval_t hash;
 480 };
 481
 482 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 483
 484 static int
 485 sort_iv_inv_expr_ent (const void *a, const void *b)
 486 {
 487   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 488   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 489
 490   unsigned id1 = (*e1)->id;
 491   unsigned id2 = (*e2)->id;
 492
 493   if (id1 < id2)
 494     return -1;
 495   else if (id1 > id2)
 496     return 1;
 497   else
 498     return 0;
 499 }
 500
 501 /* Hashtable helpers.  */
 502
 503 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 504 {
 505   static inline hashval_t hash (const iv_inv_expr_ent *);
 506   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 507 };
 508
 509 /* Hash function for loop invariant expressions.  */
 510
 511 inline hashval_t
 512 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 513 {
 514   return expr->hash;
 515 }
 516
 517 /* Hash table equality function for expressions.  */
 518
 519 inline bool
 520 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 521                            const iv_inv_expr_ent *expr2)
 522 {
 523   return expr1->hash == expr2->hash
 524          && operand_equal_p (expr1->expr, expr2->expr, 0);
 525 }
 526
 527 struct ivopts_data
 528 {
 529   /* The currently optimized loop.  */
 530   struct loop *current_loop;
 531   source_location loop_loc;
 532
 533   /* Numbers of iterations for all exits of the current loop.  */
 534   hash_map<edge, tree_niter_desc *> *niters;
 535
 536   /* Number of registers used in it.  */
 537   unsigned regs_used;
 538
 539   /* The size of version_info array allocated.  */
 540   unsigned version_info_size;
 541
 542   /* The array of information for the ssa names.  */
 543   struct version_info *version_info;
 544
 545   /* The hashtable of loop invariant expressions created
 546      by ivopt.  */
 547   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 548
 549   /* The bitmap of indices in version_info whose value was changed.  */
 550   bitmap relevant;
 551
 552   /* The uses of induction variables.  */
 553   vec<iv_group *> vgroups;
 554
 555   /* The candidates.  */
 556   vec<iv_cand *> vcands;
 557
 558   /* A bitmap of important candidates.  */
 559   bitmap important_candidates;
 560
 561   /* Cache used by tree_to_aff_combination_expand.  */
 562   hash_map<tree, name_expansion *> *name_expansion_cache;
 563
 564   /* The hashtable of common candidates derived from iv uses.  */
 565   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 566
 567   /* The common candidates.  */
 568   vec<iv_common_cand *> iv_common_cands;
 569
 570   /* The maximum invariant variable id.  */
 571   unsigned max_inv_var_id;
 572
 573   /* The maximum invariant expression id.  */
 574   unsigned max_inv_expr_id;
 575
 576   /* Number of no_overflow BIVs which are not used in memory address.  */
 577   unsigned bivs_not_used_in_addr;
 578
 579   /* Obstack for iv structure.  */
 580   struct obstack iv_obstack;
 581
 582   /* Whether to consider just related and important candidates when replacing a
 583      use.  */
 584   bool consider_all_candidates;
 585
 586   /* Are we optimizing for speed?  */
 587   bool speed;
 588
 589   /* Whether the loop body includes any function calls.  */
 590   bool body_includes_call;
 591
 592   /* Whether the loop body can only be exited via single exit.  */
 593   bool loop_single_exit_p;
 594 };
 595
 596 /* An assignment of iv candidates to uses.  */
 597
 598 struct iv_ca
 599 {
 600   /* The number of uses covered by the assignment.  */
 601   unsigned upto;
 602
 603   /* Number of uses that cannot be expressed by the candidates in the set.  */
 604   unsigned bad_groups;
 605
 606   /* Candidate assigned to a use, together with the related costs.  */
 607   struct cost_pair **cand_for_group;
 608
 609   /* Number of times each candidate is used.  */
 610   unsigned *n_cand_uses;
 611
 612   /* The candidates used.  */
 613   bitmap cands;
 614
 615   /* The number of candidates in the set.  */
 616   unsigned n_cands;
 617
 618   /* The number of invariants needed, including both invariant variants and
 619      invariant expressions.  */
 620   unsigned n_invs;
 621
 622   /* Total cost of expressing uses.  */
 623   comp_cost cand_use_cost;
 624
 625   /* Total cost of candidates.  */
 626   unsigned cand_cost;
 627
 628   /* Number of times each invariant variable is used.  */
 629   unsigned *n_inv_var_uses;
 630
 631   /* Number of times each invariant expression is used.  */
 632   unsigned *n_inv_expr_uses;
 633
 634   /* Total cost of the assignment.  */
 635   comp_cost cost;
 636 };
 637
 638 /* Difference of two iv candidate assignments.  */
 639
 640 struct iv_ca_delta
 641 {
 642   /* Changed group.  */
 643   struct iv_group *group;
 644
 645   /* An old assignment (for rollback purposes).  */
 646   struct cost_pair *old_cp;
 647
 648   /* A new assignment.  */
 649   struct cost_pair *new_cp;
 650
 651   /* Next change in the list.  */
 652   struct iv_ca_delta *next;
 653 };
 654
 655 /* Bound on number of candidates below that all candidates are considered.  */
 656
 657 #define CONSIDER_ALL_CANDIDATES_BOUND \
 658   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 659
 660 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 661    optimizing such a loop would help, and it would take ages).  */
 662
 663 #define MAX_CONSIDERED_GROUPS \
 664   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 665
 666 /* If there are at most this number of ivs in the set, try removing unnecessary
 667    ivs from the set always.  */
 668
 669 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 670   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 671
 672 /* The list of trees for that the decl_rtl field must be reset is stored
 673    here.  */
 674
 675 static vec<tree> decl_rtl_to_reset;
 676
 677 static comp_cost force_expr_to_var_cost (tree, bool);
 678
 679 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 680
 681 edge
 682 single_dom_exit (struct loop *loop)
 683 {
 684   edge exit = single_exit (loop);
 685
 686   if (!exit)
 687     return NULL;
 688
 689   if (!just_once_each_iteration_p (loop, exit->src))
 690     return NULL;
 691
 692   return exit;
 693 }
 694
 695 /* Dumps information about the induction variable IV to FILE.  Don't dump
 696    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 697    preceding spaces indicated by INDENT_LEVEL.  */
 698
 699 void
 700 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 701 {
 702   const char *p;
 703   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 704
 705   if (indent_level > 4)
 706     indent_level = 4;
 707   p = spaces + 8 - (indent_level << 1);
 708
 709   fprintf (file, "%sIV struct:\n", p);
 710   if (iv->ssa_name && dump_name)
 711     {
 712       fprintf (file, "%s  SSA_NAME:\t", p);
 713       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 714       fprintf (file, "\n");
 715     }
 716
 717   fprintf (file, "%s  Type:\t", p);
 718   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 719   fprintf (file, "\n");
 720
 721   fprintf (file, "%s  Base:\t", p);
 722   print_generic_expr (file, iv->base, TDF_SLIM);
 723   fprintf (file, "\n");
 724
 725   fprintf (file, "%s  Step:\t", p);
 726   print_generic_expr (file, iv->step, TDF_SLIM);
 727   fprintf (file, "\n");
 728
 729   if (iv->base_object)
 730     {
 731       fprintf (file, "%s  Object:\t", p);
 732       print_generic_expr (file, iv->base_object, TDF_SLIM);
 733       fprintf (file, "\n");
 734     }
 735
 736   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 737
 738   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 739            p, iv->no_overflow ? "No-overflow" : "Overflow");
 740 }
 741
 742 /* Dumps information about the USE to FILE.  */
 743
 744 void
 745 dump_use (FILE *file, struct iv_use *use)
 746 {
 747   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 748   fprintf (file, "    At stmt:\t");
 749   print_gimple_stmt (file, use->stmt, 0);
 750   fprintf (file, "    At pos:\t");
 751   if (use->op_p)
 752     print_generic_expr (file, *use->op_p, TDF_SLIM);
 753   fprintf (file, "\n");
 754   dump_iv (file, use->iv, false, 2);
 755 }
 756
 757 /* Dumps information about the uses to FILE.  */
 758
 759 void
 760 dump_groups (FILE *file, struct ivopts_data *data)
 761 {
 762   unsigned i, j;
 763   struct iv_group *group;
 764
 765   for (i = 0; i < data->vgroups.length (); i++)
 766     {
 767       group = data->vgroups[i];
 768       fprintf (file, "Group %d:\n", group->id);
 769       if (group->type == USE_NONLINEAR_EXPR)
 770         fprintf (file, "  Type:\tGENERIC\n");
 771       else if (group->type == USE_ADDRESS)
 772         fprintf (file, "  Type:\tADDRESS\n");
 773       else
 774         {
 775           gcc_assert (group->type == USE_COMPARE);
 776           fprintf (file, "  Type:\tCOMPARE\n");
 777         }
 778       for (j = 0; j < group->vuses.length (); j++)
 779         dump_use (file, group->vuses[j]);
 780     }
 781 }
 782
 783 /* Dumps information about induction variable candidate CAND to FILE.  */
 784
 785 void
 786 dump_cand (FILE *file, struct iv_cand *cand)
 787 {
 788   struct iv *iv = cand->iv;
 789
 790   fprintf (file, "Candidate %d:\n", cand->id);
 791   if (cand->inv_vars)
 792     {
 793       fprintf (file, "  Depend on inv.vars: ");
 794       dump_bitmap (file, cand->inv_vars);
 795     }
 796   if (cand->inv_exprs)
 797     {
 798       fprintf (file, "  Depend on inv.exprs: ");
 799       dump_bitmap (file, cand->inv_exprs);
 800     }
 801
 802   if (cand->var_before)
 803     {
 804       fprintf (file, "  Var befor: ");
 805       print_generic_expr (file, cand->var_before, TDF_SLIM);
 806       fprintf (file, "\n");
 807     }
 808   if (cand->var_after)
 809     {
 810       fprintf (file, "  Var after: ");
 811       print_generic_expr (file, cand->var_after, TDF_SLIM);
 812       fprintf (file, "\n");
 813     }
 814
 815   switch (cand->pos)
 816     {
 817     case IP_NORMAL:
 818       fprintf (file, "  Incr POS: before exit test\n");
 819       break;
 820
 821     case IP_BEFORE_USE:
 822       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 823       break;
 824
 825     case IP_AFTER_USE:
 826       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 827       break;
 828
 829     case IP_END:
 830       fprintf (file, "  Incr POS: at end\n");
 831       break;
 832
 833     case IP_ORIGINAL:
 834       fprintf (file, "  Incr POS: orig biv\n");
 835       break;
 836     }
 837
 838   dump_iv (file, iv, false, 1);
 839 }
 840
 841 /* Returns the info for ssa version VER.  */
 842
 843 static inline struct version_info *
 844 ver_info (struct ivopts_data *data, unsigned ver)
 845 {
 846   return data->version_info + ver;
 847 }
 848
 849 /* Returns the info for ssa name NAME.  */
 850
 851 static inline struct version_info *
 852 name_info (struct ivopts_data *data, tree name)
 853 {
 854   return ver_info (data, SSA_NAME_VERSION (name));
 855 }
 856
 857 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 858    emitted in LOOP.  */
 859
 860 static bool
 861 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 862 {
 863   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 864
 865   gcc_assert (bb);
 866
 867   if (sbb == loop->latch)
 868     return true;
 869
 870   if (sbb != bb)
 871     return false;
 872
 873   return stmt == last_stmt (bb);
 874 }
 875
 876 /* Returns true if STMT if after the place where the original induction
 877    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 878    if the positions are identical.  */
 879
 880 static bool
 881 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 882 {
 883   basic_block cand_bb = gimple_bb (cand->incremented_at);
 884   basic_block stmt_bb = gimple_bb (stmt);
 885
 886   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 887     return false;
 888
 889   if (stmt_bb != cand_bb)
 890     return true;
 891
 892   if (true_if_equal
 893       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 894     return true;
 895   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 896 }
 897
 898 /* Returns true if STMT if after the place where the induction variable
 899    CAND is incremented in LOOP.  */
 900
 901 static bool
 902 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 903 {
 904   switch (cand->pos)
 905     {
 906     case IP_END:
 907       return false;
 908
 909     case IP_NORMAL:
 910       return stmt_after_ip_normal_pos (loop, stmt);
 911
 912     case IP_ORIGINAL:
 913     case IP_AFTER_USE:
 914       return stmt_after_inc_pos (cand, stmt, false);
 915
 916     case IP_BEFORE_USE:
 917       return stmt_after_inc_pos (cand, stmt, true);
 918
 919     default:
 920       gcc_unreachable ();
 921     }
 922 }
 923
 924 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 925
 926 static bool
 927 abnormal_ssa_name_p (tree exp)
 928 {
 929   if (!exp)
 930     return false;
 931
 932   if (TREE_CODE (exp) != SSA_NAME)
 933     return false;
 934
 935   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 936 }
 937
 938 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 939    abnormal phi node.  Callback for for_each_index.  */
 940
 941 static bool
 942 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 943                                   void *data ATTRIBUTE_UNUSED)
 944 {
 945   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 946     {
 947       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 948         return false;
 949       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 950         return false;
 951     }
 952
 953   return !abnormal_ssa_name_p (*index);
 954 }
 955
 956 /* Returns true if EXPR contains a ssa name that occurs in an
 957    abnormal phi node.  */
 958
 959 bool
 960 contains_abnormal_ssa_name_p (tree expr)
 961 {
 962   enum tree_code code;
 963   enum tree_code_class codeclass;
 964
 965   if (!expr)
 966     return false;
 967
 968   code = TREE_CODE (expr);
 969   codeclass = TREE_CODE_CLASS (code);
 970
 971   if (code == SSA_NAME)
 972     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 973
 974   if (code == INTEGER_CST
 975       || is_gimple_min_invariant (expr))
 976     return false;
 977
 978   if (code == ADDR_EXPR)
 979     return !for_each_index (&TREE_OPERAND (expr, 0),
 980                             idx_contains_abnormal_ssa_name_p,
 981                             NULL);
 982
 983   if (code == COND_EXPR)
 984     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 985       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 986       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 987
 988   switch (codeclass)
 989     {
 990     case tcc_binary:
 991     case tcc_comparison:
 992       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 993         return true;
 994
 995       /* Fallthru.  */
 996     case tcc_unary:
 997       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 998         return true;
 999
1000       break;
1001
1002     default:
1003       gcc_unreachable ();
1004     }
1005
1006   return false;
1007 }
1008
1009 /*  Returns the structure describing number of iterations determined from
1010     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1011
1012 static struct tree_niter_desc *
1013 niter_for_exit (struct ivopts_data *data, edge exit)
1014 {
1015   struct tree_niter_desc *desc;
1016   tree_niter_desc **slot;
1017
1018   if (!data->niters)
1019     {
1020       data->niters = new hash_map<edge, tree_niter_desc *>;
1021       slot = NULL;
1022     }
1023   else
1024     slot = data->niters->get (exit);
1025
1026   if (!slot)
1027     {
1028       /* Try to determine number of iterations.  We cannot safely work with ssa
1029          names that appear in phi nodes on abnormal edges, so that we do not
1030          create overlapping life ranges for them (PR 27283).  */
1031       desc = XNEW (struct tree_niter_desc);
1032       if (!number_of_iterations_exit (data->current_loop,
1033                                       exit, desc, true)
1034           || contains_abnormal_ssa_name_p (desc->niter))
1035         {
1036           XDELETE (desc);
1037           desc = NULL;
1038         }
1039       data->niters->put (exit, desc);
1040     }
1041   else
1042     desc = *slot;
1043
1044   return desc;
1045 }
1046
1047 /* Returns the structure describing number of iterations determined from
1048    single dominating exit of DATA->current_loop, or NULL if something
1049    goes wrong.  */
1050
1051 static struct tree_niter_desc *
1052 niter_for_single_dom_exit (struct ivopts_data *data)
1053 {
1054   edge exit = single_dom_exit (data->current_loop);
1055
1056   if (!exit)
1057     return NULL;
1058
1059   return niter_for_exit (data, exit);
1060 }
1061
1062 /* Initializes data structures used by the iv optimization pass, stored
1063    in DATA.  */
1064
1065 static void
1066 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1067 {
1068   data->version_info_size = 2 * num_ssa_names;
1069   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1070   data->relevant = BITMAP_ALLOC (NULL);
1071   data->important_candidates = BITMAP_ALLOC (NULL);
1072   data->max_inv_var_id = 0;
1073   data->max_inv_expr_id = 0;
1074   data->niters = NULL;
1075   data->vgroups.create (20);
1076   data->vcands.create (20);
1077   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1078   data->name_expansion_cache = NULL;
1079   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1080   data->iv_common_cands.create (20);
1081   decl_rtl_to_reset.create (20);
1082   gcc_obstack_init (&data->iv_obstack);
1083 }
1084
1085 /* Returns a memory object to that EXPR points.  In case we are able to
1086    determine that it does not point to any such object, NULL is returned.  */
1087
1088 static tree
1089 determine_base_object (tree expr)
1090 {
1091   enum tree_code code = TREE_CODE (expr);
1092   tree base, obj;
1093
1094   /* If this is a pointer casted to any type, we need to determine
1095      the base object for the pointer; so handle conversions before
1096      throwing away non-pointer expressions.  */
1097   if (CONVERT_EXPR_P (expr))
1098     return determine_base_object (TREE_OPERAND (expr, 0));
1099
1100   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1101     return NULL_TREE;
1102
1103   switch (code)
1104     {
1105     case INTEGER_CST:
1106       return NULL_TREE;
1107
1108     case ADDR_EXPR:
1109       obj = TREE_OPERAND (expr, 0);
1110       base = get_base_address (obj);
1111
1112       if (!base)
1113         return expr;
1114
1115       if (TREE_CODE (base) == MEM_REF)
1116         return determine_base_object (TREE_OPERAND (base, 0));
1117
1118       return fold_convert (ptr_type_node,
1119                            build_fold_addr_expr (base));
1120
1121     case POINTER_PLUS_EXPR:
1122       return determine_base_object (TREE_OPERAND (expr, 0));
1123
1124     case PLUS_EXPR:
1125     case MINUS_EXPR:
1126       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1127       gcc_unreachable ();
1128
1129     default:
1130       if (POLY_INT_CST_P (expr))
1131         return NULL_TREE;
1132       return fold_convert (ptr_type_node, expr);
1133     }
1134 }
1135
1136 /* Return true if address expression with non-DECL_P operand appears
1137    in EXPR.  */
1138
1139 static bool
1140 contain_complex_addr_expr (tree expr)
1141 {
1142   bool res = false;
1143
1144   STRIP_NOPS (expr);
1145   switch (TREE_CODE (expr))
1146     {
1147     case POINTER_PLUS_EXPR:
1148     case PLUS_EXPR:
1149     case MINUS_EXPR:
1150       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1151       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1152       break;
1153
1154     case ADDR_EXPR:
1155       return (!DECL_P (TREE_OPERAND (expr, 0)));
1156
1157     default:
1158       return false;
1159     }
1160
1161   return res;
1162 }
1163
1164 /* Allocates an induction variable with given initial value BASE and step STEP
1165    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1166
1167 static struct iv *
1168 alloc_iv (struct ivopts_data *data, tree base, tree step,
1169           bool no_overflow = false)
1170 {
1171   tree expr = base;
1172   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1173                                               sizeof (struct iv));
1174   gcc_assert (step != NULL_TREE);
1175
1176   /* Lower address expression in base except ones with DECL_P as operand.
1177      By doing this:
1178        1) More accurate cost can be computed for address expressions;
1179        2) Duplicate candidates won't be created for bases in different
1180           forms, like &a[0] and &a.  */
1181   STRIP_NOPS (expr);
1182   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1183       || contain_complex_addr_expr (expr))
1184     {
1185       aff_tree comb;
1186       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1187       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1188     }
1189
1190   iv->base = base;
1191   iv->base_object = determine_base_object (base);
1192   iv->step = step;
1193   iv->biv_p = false;
1194   iv->nonlin_use = NULL;
1195   iv->ssa_name = NULL_TREE;
1196   if (!no_overflow
1197        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1198                               base, step))
1199     no_overflow = true;
1200   iv->no_overflow = no_overflow;
1201   iv->have_address_use = false;
1202
1203   return iv;
1204 }
1205
1206 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1207    doesn't overflow.  */
1208
1209 static void
1210 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1211         bool no_overflow)
1212 {
1213   struct version_info *info = name_info (data, iv);
1214
1215   gcc_assert (!info->iv);
1216
1217   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1218   info->iv = alloc_iv (data, base, step, no_overflow);
1219   info->iv->ssa_name = iv;
1220 }
1221
1222 /* Finds induction variable declaration for VAR.  */
1223
1224 static struct iv *
1225 get_iv (struct ivopts_data *data, tree var)
1226 {
1227   basic_block bb;
1228   tree type = TREE_TYPE (var);
1229
1230   if (!POINTER_TYPE_P (type)
1231       && !INTEGRAL_TYPE_P (type))
1232     return NULL;
1233
1234   if (!name_info (data, var)->iv)
1235     {
1236       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1237
1238       if (!bb
1239           || !flow_bb_inside_loop_p (data->current_loop, bb))
1240         set_iv (data, var, var, build_int_cst (type, 0), true);
1241     }
1242
1243   return name_info (data, var)->iv;
1244 }
1245
1246 /* Return the first non-invariant ssa var found in EXPR.  */
1247
1248 static tree
1249 extract_single_var_from_expr (tree expr)
1250 {
1251   int i, n;
1252   tree tmp;
1253   enum tree_code code;
1254
1255   if (!expr || is_gimple_min_invariant (expr))
1256     return NULL;
1257
1258   code = TREE_CODE (expr);
1259   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1260     {
1261       n = TREE_OPERAND_LENGTH (expr);
1262       for (i = 0; i < n; i++)
1263         {
1264           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1265
1266           if (tmp)
1267             return tmp;
1268         }
1269     }
1270   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1271 }
1272
1273 /* Finds basic ivs.  */
1274
1275 static bool
1276 find_bivs (struct ivopts_data *data)
1277 {
1278   gphi *phi;
1279   affine_iv iv;
1280   tree step, type, base, stop;
1281   bool found = false;
1282   struct loop *loop = data->current_loop;
1283   gphi_iterator psi;
1284
1285   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1286     {
1287       phi = psi.phi ();
1288
1289       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1290         continue;
1291
1292       if (virtual_operand_p (PHI_RESULT (phi)))
1293         continue;
1294
1295       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1296         continue;
1297
1298       if (integer_zerop (iv.step))
1299         continue;
1300
1301       step = iv.step;
1302       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1303       /* Stop expanding iv base at the first ssa var referred by iv step.
1304          Ideally we should stop at any ssa var, because that's expensive
1305          and unusual to happen, we just do it on the first one.
1306
1307          See PR64705 for the rationale.  */
1308       stop = extract_single_var_from_expr (step);
1309       base = expand_simple_operations (base, stop);
1310       if (contains_abnormal_ssa_name_p (base)
1311           || contains_abnormal_ssa_name_p (step))
1312         continue;
1313
1314       type = TREE_TYPE (PHI_RESULT (phi));
1315       base = fold_convert (type, base);
1316       if (step)
1317         {
1318           if (POINTER_TYPE_P (type))
1319             step = convert_to_ptrofftype (step);
1320           else
1321             step = fold_convert (type, step);
1322         }
1323
1324       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1325       found = true;
1326     }
1327
1328   return found;
1329 }
1330
1331 /* Marks basic ivs.  */
1332
1333 static void
1334 mark_bivs (struct ivopts_data *data)
1335 {
1336   gphi *phi;
1337   gimple *def;
1338   tree var;
1339   struct iv *iv, *incr_iv;
1340   struct loop *loop = data->current_loop;
1341   basic_block incr_bb;
1342   gphi_iterator psi;
1343
1344   data->bivs_not_used_in_addr = 0;
1345   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1346     {
1347       phi = psi.phi ();
1348
1349       iv = get_iv (data, PHI_RESULT (phi));
1350       if (!iv)
1351         continue;
1352
1353       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1354       def = SSA_NAME_DEF_STMT (var);
1355       /* Don't mark iv peeled from other one as biv.  */
1356       if (def
1357           && gimple_code (def) == GIMPLE_PHI
1358           && gimple_bb (def) == loop->header)
1359         continue;
1360
1361       incr_iv = get_iv (data, var);
1362       if (!incr_iv)
1363         continue;
1364
1365       /* If the increment is in the subloop, ignore it.  */
1366       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1367       if (incr_bb->loop_father != data->current_loop
1368           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1369         continue;
1370
1371       iv->biv_p = true;
1372       incr_iv->biv_p = true;
1373       if (iv->no_overflow)
1374         data->bivs_not_used_in_addr++;
1375       if (incr_iv->no_overflow)
1376         data->bivs_not_used_in_addr++;
1377     }
1378 }
1379
1380 /* Checks whether STMT defines a linear induction variable and stores its
1381    parameters to IV.  */
1382
1383 static bool
1384 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1385 {
1386   tree lhs, stop;
1387   struct loop *loop = data->current_loop;
1388
1389   iv->base = NULL_TREE;
1390   iv->step = NULL_TREE;
1391
1392   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1393     return false;
1394
1395   lhs = gimple_assign_lhs (stmt);
1396   if (TREE_CODE (lhs) != SSA_NAME)
1397     return false;
1398
1399   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1400     return false;
1401
1402   /* Stop expanding iv base at the first ssa var referred by iv step.
1403      Ideally we should stop at any ssa var, because that's expensive
1404      and unusual to happen, we just do it on the first one.
1405
1406      See PR64705 for the rationale.  */
1407   stop = extract_single_var_from_expr (iv->step);
1408   iv->base = expand_simple_operations (iv->base, stop);
1409   if (contains_abnormal_ssa_name_p (iv->base)
1410       || contains_abnormal_ssa_name_p (iv->step))
1411     return false;
1412
1413   /* If STMT could throw, then do not consider STMT as defining a GIV.
1414      While this will suppress optimizations, we can not safely delete this
1415      GIV and associated statements, even if it appears it is not used.  */
1416   if (stmt_could_throw_p (stmt))
1417     return false;
1418
1419   return true;
1420 }
1421
1422 /* Finds general ivs in statement STMT.  */
1423
1424 static void
1425 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1426 {
1427   affine_iv iv;
1428
1429   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1430     return;
1431
1432   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1433 }
1434
1435 /* Finds general ivs in basic block BB.  */
1436
1437 static void
1438 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1439 {
1440   gimple_stmt_iterator bsi;
1441
1442   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1443     find_givs_in_stmt (data, gsi_stmt (bsi));
1444 }
1445
1446 /* Finds general ivs.  */
1447
1448 static void
1449 find_givs (struct ivopts_data *data)
1450 {
1451   struct loop *loop = data->current_loop;
1452   basic_block *body = get_loop_body_in_dom_order (loop);
1453   unsigned i;
1454
1455   for (i = 0; i < loop->num_nodes; i++)
1456     find_givs_in_bb (data, body[i]);
1457   free (body);
1458 }
1459
1460 /* For each ssa name defined in LOOP determines whether it is an induction
1461    variable and if so, its initial value and step.  */
1462
1463 static bool
1464 find_induction_variables (struct ivopts_data *data)
1465 {
1466   unsigned i;
1467   bitmap_iterator bi;
1468
1469   if (!find_bivs (data))
1470     return false;
1471
1472   find_givs (data);
1473   mark_bivs (data);
1474
1475   if (dump_file && (dump_flags & TDF_DETAILS))
1476     {
1477       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1478
1479       if (niter)
1480         {
1481           fprintf (dump_file, "  number of iterations ");
1482           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1483           if (!integer_zerop (niter->may_be_zero))
1484             {
1485               fprintf (dump_file, "; zero if ");
1486               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1487             }
1488           fprintf (dump_file, "\n");
1489         };
1490
1491       fprintf (dump_file, "\n<Induction Vars>:\n");
1492       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1493         {
1494           struct version_info *info = ver_info (data, i);
1495           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1496             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1497         }
1498     }
1499
1500   return true;
1501 }
1502
1503 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1504    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1505    is the const offset stripped from IV base; for other types use, both
1506    are zero by default.  */
1507
1508 static struct iv_use *
1509 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1510             gimple *stmt, enum use_type type, tree addr_base,
1511             unsigned HOST_WIDE_INT addr_offset)
1512 {
1513   struct iv_use *use = XCNEW (struct iv_use);
1514
1515   use->id = group->vuses.length ();
1516   use->group_id = group->id;
1517   use->type = type;
1518   use->iv = iv;
1519   use->stmt = stmt;
1520   use->op_p = use_p;
1521   use->addr_base = addr_base;
1522   use->addr_offset = addr_offset;
1523
1524   group->vuses.safe_push (use);
1525   return use;
1526 }
1527
1528 /* Checks whether OP is a loop-level invariant and if so, records it.
1529    NONLINEAR_USE is true if the invariant is used in a way we do not
1530    handle specially.  */
1531
1532 static void
1533 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1534 {
1535   basic_block bb;
1536   struct version_info *info;
1537
1538   if (TREE_CODE (op) != SSA_NAME
1539       || virtual_operand_p (op))
1540     return;
1541
1542   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1543   if (bb
1544       && flow_bb_inside_loop_p (data->current_loop, bb))
1545     return;
1546
1547   info = name_info (data, op);
1548   info->name = op;
1549   info->has_nonlin_use |= nonlinear_use;
1550   if (!info->inv_id)
1551     info->inv_id = ++data->max_inv_var_id;
1552   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1553 }
1554
1555 /* Record a group of TYPE.  */
1556
1557 static struct iv_group *
1558 record_group (struct ivopts_data *data, enum use_type type)
1559 {
1560   struct iv_group *group = XCNEW (struct iv_group);
1561
1562   group->id = data->vgroups.length ();
1563   group->type = type;
1564   group->related_cands = BITMAP_ALLOC (NULL);
1565   group->vuses.create (1);
1566
1567   data->vgroups.safe_push (group);
1568   return group;
1569 }
1570
1571 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1572    New group will be created if there is no existing group for the use.  */
1573
1574 static struct iv_use *
1575 record_group_use (struct ivopts_data *data, tree *use_p,
1576                   struct iv *iv, gimple *stmt, enum use_type type)
1577 {
1578   tree addr_base = NULL;
1579   struct iv_group *group = NULL;
1580   unsigned HOST_WIDE_INT addr_offset = 0;
1581
1582   /* Record non address type use in a new group.  */
1583   if (type == USE_ADDRESS && iv->base_object)
1584     {
1585       unsigned int i;
1586
1587       addr_base = strip_offset (iv->base, &addr_offset);
1588       for (i = 0; i < data->vgroups.length (); i++)
1589         {
1590           struct iv_use *use;
1591
1592           group = data->vgroups[i];
1593           use = group->vuses[0];
1594           if (use->type != USE_ADDRESS || !use->iv->base_object)
1595             continue;
1596
1597           /* Check if it has the same stripped base and step.  */
1598           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1599               && operand_equal_p (iv->step, use->iv->step, 0)
1600               && operand_equal_p (addr_base, use->addr_base, 0))
1601             break;
1602         }
1603       if (i == data->vgroups.length ())
1604         group = NULL;
1605     }
1606
1607   if (!group)
1608     group = record_group (data, type);
1609
1610   return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1611 }
1612
1613 /* Checks whether the use OP is interesting and if so, records it.  */
1614
1615 static struct iv_use *
1616 find_interesting_uses_op (struct ivopts_data *data, tree op)
1617 {
1618   struct iv *iv;
1619   gimple *stmt;
1620   struct iv_use *use;
1621
1622   if (TREE_CODE (op) != SSA_NAME)
1623     return NULL;
1624
1625   iv = get_iv (data, op);
1626   if (!iv)
1627     return NULL;
1628
1629   if (iv->nonlin_use)
1630     {
1631       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1632       return iv->nonlin_use;
1633     }
1634
1635   if (integer_zerop (iv->step))
1636     {
1637       record_invariant (data, op, true);
1638       return NULL;
1639     }
1640
1641   stmt = SSA_NAME_DEF_STMT (op);
1642   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1643
1644   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1645   iv->nonlin_use = use;
1646   return use;
1647 }
1648
1649 /* Indicate how compare type iv_use can be handled.  */
1650 enum comp_iv_rewrite
1651 {
1652   COMP_IV_NA,
1653   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1654   COMP_IV_EXPR,
1655   /* We may rewrite compare type iv_uses on both sides of comparison by
1656      expressing value of each iv_use.  */
1657   COMP_IV_EXPR_2,
1658   /* We may rewrite compare type iv_use by expressing value of the iv_use
1659      or by eliminating it with other iv_cand.  */
1660   COMP_IV_ELIM
1661 };
1662
1663 /* Given a condition in statement STMT, checks whether it is a compare
1664    of an induction variable and an invariant.  If this is the case,
1665    CONTROL_VAR is set to location of the iv, BOUND to the location of
1666    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1667    induction variable descriptions, and true is returned.  If this is not
1668    the case, CONTROL_VAR and BOUND are set to the arguments of the
1669    condition and false is returned.  */
1670
1671 static enum comp_iv_rewrite
1672 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1673                        tree **control_var, tree **bound,
1674                        struct iv **iv_var, struct iv **iv_bound)
1675 {
1676   /* The objects returned when COND has constant operands.  */
1677   static struct iv const_iv;
1678   static tree zero;
1679   tree *op0 = &zero, *op1 = &zero;
1680   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1681   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1682
1683   if (gimple_code (stmt) == GIMPLE_COND)
1684     {
1685       gcond *cond_stmt = as_a <gcond *> (stmt);
1686       op0 = gimple_cond_lhs_ptr (cond_stmt);
1687       op1 = gimple_cond_rhs_ptr (cond_stmt);
1688     }
1689   else
1690     {
1691       op0 = gimple_assign_rhs1_ptr (stmt);
1692       op1 = gimple_assign_rhs2_ptr (stmt);
1693     }
1694
1695   zero = integer_zero_node;
1696   const_iv.step = integer_zero_node;
1697
1698   if (TREE_CODE (*op0) == SSA_NAME)
1699     iv0 = get_iv (data, *op0);
1700   if (TREE_CODE (*op1) == SSA_NAME)
1701     iv1 = get_iv (data, *op1);
1702
1703   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1704   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1705     {
1706       rewrite_type = COMP_IV_EXPR_2;
1707       goto end;
1708     }
1709
1710   /* If none side of comparison is IV.  */
1711   if ((!iv0 || integer_zerop (iv0->step))
1712       && (!iv1 || integer_zerop (iv1->step)))
1713     goto end;
1714
1715   /* Control variable may be on the other side.  */
1716   if (!iv0 || integer_zerop (iv0->step))
1717     {
1718       std::swap (op0, op1);
1719       std::swap (iv0, iv1);
1720     }
1721   /* If one side is IV and the other side isn't loop invariant.  */
1722   if (!iv1)
1723     rewrite_type = COMP_IV_EXPR;
1724   /* If one side is IV and the other side is loop invariant.  */
1725   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1726     rewrite_type = COMP_IV_ELIM;
1727
1728 end:
1729   if (control_var)
1730     *control_var = op0;
1731   if (iv_var)
1732     *iv_var = iv0;
1733   if (bound)
1734     *bound = op1;
1735   if (iv_bound)
1736     *iv_bound = iv1;
1737
1738   return rewrite_type;
1739 }
1740
1741 /* Checks whether the condition in STMT is interesting and if so,
1742    records it.  */
1743
1744 static void
1745 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1746 {
1747   tree *var_p, *bound_p;
1748   struct iv *var_iv, *bound_iv;
1749   enum comp_iv_rewrite ret;
1750
1751   ret = extract_cond_operands (data, stmt,
1752                                &var_p, &bound_p, &var_iv, &bound_iv);
1753   if (ret == COMP_IV_NA)
1754     {
1755       find_interesting_uses_op (data, *var_p);
1756       find_interesting_uses_op (data, *bound_p);
1757       return;
1758     }
1759
1760   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE);
1761   /* Record compare type iv_use for iv on the other side of comparison.  */
1762   if (ret == COMP_IV_EXPR_2)
1763     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE);
1764 }
1765
1766 /* Returns the outermost loop EXPR is obviously invariant in
1767    relative to the loop LOOP, i.e. if all its operands are defined
1768    outside of the returned loop.  Returns NULL if EXPR is not
1769    even obviously invariant in LOOP.  */
1770
1771 struct loop *
1772 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1773 {
1774   basic_block def_bb;
1775   unsigned i, len;
1776
1777   if (is_gimple_min_invariant (expr))
1778     return current_loops->tree_root;
1779
1780   if (TREE_CODE (expr) == SSA_NAME)
1781     {
1782       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1783       if (def_bb)
1784         {
1785           if (flow_bb_inside_loop_p (loop, def_bb))
1786             return NULL;
1787           return superloop_at_depth (loop,
1788                                      loop_depth (def_bb->loop_father) + 1);
1789         }
1790
1791       return current_loops->tree_root;
1792     }
1793
1794   if (!EXPR_P (expr))
1795     return NULL;
1796
1797   unsigned maxdepth = 0;
1798   len = TREE_OPERAND_LENGTH (expr);
1799   for (i = 0; i < len; i++)
1800     {
1801       struct loop *ivloop;
1802       if (!TREE_OPERAND (expr, i))
1803         continue;
1804
1805       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1806       if (!ivloop)
1807         return NULL;
1808       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1809     }
1810
1811   return superloop_at_depth (loop, maxdepth);
1812 }
1813
1814 /* Returns true if expression EXPR is obviously invariant in LOOP,
1815    i.e. if all its operands are defined outside of the LOOP.  LOOP
1816    should not be the function body.  */
1817
1818 bool
1819 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1820 {
1821   basic_block def_bb;
1822   unsigned i, len;
1823
1824   gcc_assert (loop_depth (loop) > 0);
1825
1826   if (is_gimple_min_invariant (expr))
1827     return true;
1828
1829   if (TREE_CODE (expr) == SSA_NAME)
1830     {
1831       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1832       if (def_bb
1833           && flow_bb_inside_loop_p (loop, def_bb))
1834         return false;
1835
1836       return true;
1837     }
1838
1839   if (!EXPR_P (expr))
1840     return false;
1841
1842   len = TREE_OPERAND_LENGTH (expr);
1843   for (i = 0; i < len; i++)
1844     if (TREE_OPERAND (expr, i)
1845         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1846       return false;
1847
1848   return true;
1849 }
1850
1851 /* Given expression EXPR which computes inductive values with respect
1852    to loop recorded in DATA, this function returns biv from which EXPR
1853    is derived by tracing definition chains of ssa variables in EXPR.  */
1854
1855 static struct iv*
1856 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1857 {
1858   struct iv *iv;
1859   unsigned i, n;
1860   tree e2, e1;
1861   enum tree_code code;
1862   gimple *stmt;
1863
1864   if (expr == NULL_TREE)
1865     return NULL;
1866
1867   if (is_gimple_min_invariant (expr))
1868     return NULL;
1869
1870   code = TREE_CODE (expr);
1871   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1872     {
1873       n = TREE_OPERAND_LENGTH (expr);
1874       for (i = 0; i < n; i++)
1875         {
1876           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1877           if (iv)
1878             return iv;
1879         }
1880     }
1881
1882   /* Stop if it's not ssa name.  */
1883   if (code != SSA_NAME)
1884     return NULL;
1885
1886   iv = get_iv (data, expr);
1887   if (!iv || integer_zerop (iv->step))
1888     return NULL;
1889   else if (iv->biv_p)
1890     return iv;
1891
1892   stmt = SSA_NAME_DEF_STMT (expr);
1893   if (gphi *phi = dyn_cast <gphi *> (stmt))
1894     {
1895       ssa_op_iter iter;
1896       use_operand_p use_p;
1897       basic_block phi_bb = gimple_bb (phi);
1898
1899       /* Skip loop header PHI that doesn't define biv.  */
1900       if (phi_bb->loop_father == data->current_loop)
1901         return NULL;
1902
1903       if (virtual_operand_p (gimple_phi_result (phi)))
1904         return NULL;
1905
1906       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1907         {
1908           tree use = USE_FROM_PTR (use_p);
1909           iv = find_deriving_biv_for_expr (data, use);
1910           if (iv)
1911             return iv;
1912         }
1913       return NULL;
1914     }
1915   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1916     return NULL;
1917
1918   e1 = gimple_assign_rhs1 (stmt);
1919   code = gimple_assign_rhs_code (stmt);
1920   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1921     return find_deriving_biv_for_expr (data, e1);
1922
1923   switch (code)
1924     {
1925     case MULT_EXPR:
1926     case PLUS_EXPR:
1927     case MINUS_EXPR:
1928     case POINTER_PLUS_EXPR:
1929       /* Increments, decrements and multiplications by a constant
1930          are simple.  */
1931       e2 = gimple_assign_rhs2 (stmt);
1932       iv = find_deriving_biv_for_expr (data, e2);
1933       if (iv)
1934         return iv;
1935       gcc_fallthrough ();
1936
1937     CASE_CONVERT:
1938       /* Casts are simple.  */
1939       return find_deriving_biv_for_expr (data, e1);
1940
1941     default:
1942       break;
1943     }
1944
1945   return NULL;
1946 }
1947
1948 /* Record BIV, its predecessor and successor that they are used in
1949    address type uses.  */
1950
1951 static void
1952 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1953 {
1954   unsigned i;
1955   tree type, base_1, base_2;
1956   bitmap_iterator bi;
1957
1958   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1959       || biv->have_address_use || !biv->no_overflow)
1960     return;
1961
1962   type = TREE_TYPE (biv->base);
1963   if (!INTEGRAL_TYPE_P (type))
1964     return;
1965
1966   biv->have_address_use = true;
1967   data->bivs_not_used_in_addr--;
1968   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1969   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1970     {
1971       struct iv *iv = ver_info (data, i)->iv;
1972
1973       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1974           || iv->have_address_use || !iv->no_overflow)
1975         continue;
1976
1977       if (type != TREE_TYPE (iv->base)
1978           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1979         continue;
1980
1981       if (!operand_equal_p (biv->step, iv->step, 0))
1982         continue;
1983
1984       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1985       if (operand_equal_p (base_1, iv->base, 0)
1986           || operand_equal_p (base_2, biv->base, 0))
1987         {
1988           iv->have_address_use = true;
1989           data->bivs_not_used_in_addr--;
1990         }
1991     }
1992 }
1993
1994 /* Cumulates the steps of indices into DATA and replaces their values with the
1995    initial ones.  Returns false when the value of the index cannot be determined.
1996    Callback for for_each_index.  */
1997
1998 struct ifs_ivopts_data
1999 {
2000   struct ivopts_data *ivopts_data;
2001   gimple *stmt;
2002   tree step;
2003 };
2004
2005 static bool
2006 idx_find_step (tree base, tree *idx, void *data)
2007 {
2008   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2009   struct iv *iv;
2010   bool use_overflow_semantics = false;
2011   tree step, iv_base, iv_step, lbound, off;
2012   struct loop *loop = dta->ivopts_data->current_loop;
2013
2014   /* If base is a component ref, require that the offset of the reference
2015      be invariant.  */
2016   if (TREE_CODE (base) == COMPONENT_REF)
2017     {
2018       off = component_ref_field_offset (base);
2019       return expr_invariant_in_loop_p (loop, off);
2020     }
2021
2022   /* If base is array, first check whether we will be able to move the
2023      reference out of the loop (in order to take its address in strength
2024      reduction).  In order for this to work we need both lower bound
2025      and step to be loop invariants.  */
2026   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2027     {
2028       /* Moreover, for a range, the size needs to be invariant as well.  */
2029       if (TREE_CODE (base) == ARRAY_RANGE_REF
2030           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2031         return false;
2032
2033       step = array_ref_element_size (base);
2034       lbound = array_ref_low_bound (base);
2035
2036       if (!expr_invariant_in_loop_p (loop, step)
2037           || !expr_invariant_in_loop_p (loop, lbound))
2038         return false;
2039     }
2040
2041   if (TREE_CODE (*idx) != SSA_NAME)
2042     return true;
2043
2044   iv = get_iv (dta->ivopts_data, *idx);
2045   if (!iv)
2046     return false;
2047
2048   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2049           *&x[0], which is not folded and does not trigger the
2050           ARRAY_REF path below.  */
2051   *idx = iv->base;
2052
2053   if (integer_zerop (iv->step))
2054     return true;
2055
2056   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2057     {
2058       step = array_ref_element_size (base);
2059
2060       /* We only handle addresses whose step is an integer constant.  */
2061       if (TREE_CODE (step) != INTEGER_CST)
2062         return false;
2063     }
2064   else
2065     /* The step for pointer arithmetics already is 1 byte.  */
2066     step = size_one_node;
2067
2068   iv_base = iv->base;
2069   iv_step = iv->step;
2070   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2071     use_overflow_semantics = true;
2072
2073   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2074                             sizetype, &iv_base, &iv_step, dta->stmt,
2075                             use_overflow_semantics))
2076     {
2077       /* The index might wrap.  */
2078       return false;
2079     }
2080
2081   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2082   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2083
2084   if (dta->ivopts_data->bivs_not_used_in_addr)
2085     {
2086       if (!iv->biv_p)
2087         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2088
2089       record_biv_for_address_use (dta->ivopts_data, iv);
2090     }
2091   return true;
2092 }
2093
2094 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2095    object is passed to it in DATA.  */
2096
2097 static bool
2098 idx_record_use (tree base, tree *idx,
2099                 void *vdata)
2100 {
2101   struct ivopts_data *data = (struct ivopts_data *) vdata;
2102   find_interesting_uses_op (data, *idx);
2103   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2104     {
2105       find_interesting_uses_op (data, array_ref_element_size (base));
2106       find_interesting_uses_op (data, array_ref_low_bound (base));
2107     }
2108   return true;
2109 }
2110
2111 /* If we can prove that TOP = cst * BOT for some constant cst,
2112    store cst to MUL and return true.  Otherwise return false.
2113    The returned value is always sign-extended, regardless of the
2114    signedness of TOP and BOT.  */
2115
2116 static bool
2117 constant_multiple_of (tree top, tree bot, widest_int *mul)
2118 {
2119   tree mby;
2120   enum tree_code code;
2121   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2122   widest_int res, p0, p1;
2123
2124   STRIP_NOPS (top);
2125   STRIP_NOPS (bot);
2126
2127   if (operand_equal_p (top, bot, 0))
2128     {
2129       *mul = 1;
2130       return true;
2131     }
2132
2133   code = TREE_CODE (top);
2134   switch (code)
2135     {
2136     case MULT_EXPR:
2137       mby = TREE_OPERAND (top, 1);
2138       if (TREE_CODE (mby) != INTEGER_CST)
2139         return false;
2140
2141       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2142         return false;
2143
2144       *mul = wi::sext (res * wi::to_widest (mby), precision);
2145       return true;
2146
2147     case PLUS_EXPR:
2148     case MINUS_EXPR:
2149       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2150           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2151         return false;
2152
2153       if (code == MINUS_EXPR)
2154         p1 = -p1;
2155       *mul = wi::sext (p0 + p1, precision);
2156       return true;
2157
2158     case INTEGER_CST:
2159       if (TREE_CODE (bot) != INTEGER_CST)
2160         return false;
2161
2162       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2163       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2164       if (p1 == 0)
2165         return false;
2166       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2167       return res == 0;
2168
2169     default:
2170       if (POLY_INT_CST_P (top)
2171           && POLY_INT_CST_P (bot)
2172           && constant_multiple_p (wi::to_poly_widest (top),
2173                                   wi::to_poly_widest (bot), mul))
2174         return true;
2175
2176       return false;
2177     }
2178 }
2179
2180 /* Return true if memory reference REF with step STEP may be unaligned.  */
2181
2182 static bool
2183 may_be_unaligned_p (tree ref, tree step)
2184 {
2185   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2186      thus they are not misaligned.  */
2187   if (TREE_CODE (ref) == TARGET_MEM_REF)
2188     return false;
2189
2190   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2191   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2192     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2193
2194   unsigned HOST_WIDE_INT bitpos;
2195   unsigned int ref_align;
2196   get_object_alignment_1 (ref, &ref_align, &bitpos);
2197   if (ref_align < align
2198       || (bitpos % align) != 0
2199       || (bitpos % BITS_PER_UNIT) != 0)
2200     return true;
2201
2202   unsigned int trailing_zeros = tree_ctz (step);
2203   if (trailing_zeros < HOST_BITS_PER_INT
2204       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2205     return true;
2206
2207   return false;
2208 }
2209
2210 /* Return true if EXPR may be non-addressable.   */
2211
2212 bool
2213 may_be_nonaddressable_p (tree expr)
2214 {
2215   switch (TREE_CODE (expr))
2216     {
2217     case TARGET_MEM_REF:
2218       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2219          target, thus they are always addressable.  */
2220       return false;
2221
2222     case MEM_REF:
2223       /* Likewise for MEM_REFs, modulo the storage order.  */
2224       return REF_REVERSE_STORAGE_ORDER (expr);
2225
2226     case BIT_FIELD_REF:
2227       if (REF_REVERSE_STORAGE_ORDER (expr))
2228         return true;
2229       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2230
2231     case COMPONENT_REF:
2232       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2233         return true;
2234       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2235              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2236
2237     case ARRAY_REF:
2238     case ARRAY_RANGE_REF:
2239       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2240         return true;
2241       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2242
2243     case VIEW_CONVERT_EXPR:
2244       /* This kind of view-conversions may wrap non-addressable objects
2245          and make them look addressable.  After some processing the
2246          non-addressability may be uncovered again, causing ADDR_EXPRs
2247          of inappropriate objects to be built.  */
2248       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2249           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2250         return true;
2251       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2252
2253     CASE_CONVERT:
2254       return true;
2255
2256     default:
2257       break;
2258     }
2259
2260   return false;
2261 }
2262
2263 /* Finds addresses in *OP_P inside STMT.  */
2264
2265 static void
2266 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2267                                tree *op_p)
2268 {
2269   tree base = *op_p, step = size_zero_node;
2270   struct iv *civ;
2271   struct ifs_ivopts_data ifs_ivopts_data;
2272
2273   /* Do not play with volatile memory references.  A bit too conservative,
2274      perhaps, but safe.  */
2275   if (gimple_has_volatile_ops (stmt))
2276     goto fail;
2277
2278   /* Ignore bitfields for now.  Not really something terribly complicated
2279      to handle.  TODO.  */
2280   if (TREE_CODE (base) == BIT_FIELD_REF)
2281     goto fail;
2282
2283   base = unshare_expr (base);
2284
2285   if (TREE_CODE (base) == TARGET_MEM_REF)
2286     {
2287       tree type = build_pointer_type (TREE_TYPE (base));
2288       tree astep;
2289
2290       if (TMR_BASE (base)
2291           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2292         {
2293           civ = get_iv (data, TMR_BASE (base));
2294           if (!civ)
2295             goto fail;
2296
2297           TMR_BASE (base) = civ->base;
2298           step = civ->step;
2299         }
2300       if (TMR_INDEX2 (base)
2301           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2302         {
2303           civ = get_iv (data, TMR_INDEX2 (base));
2304           if (!civ)
2305             goto fail;
2306
2307           TMR_INDEX2 (base) = civ->base;
2308           step = civ->step;
2309         }
2310       if (TMR_INDEX (base)
2311           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2312         {
2313           civ = get_iv (data, TMR_INDEX (base));
2314           if (!civ)
2315             goto fail;
2316
2317           TMR_INDEX (base) = civ->base;
2318           astep = civ->step;
2319
2320           if (astep)
2321             {
2322               if (TMR_STEP (base))
2323                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2324
2325               step = fold_build2 (PLUS_EXPR, type, step, astep);
2326             }
2327         }
2328
2329       if (integer_zerop (step))
2330         goto fail;
2331       base = tree_mem_ref_addr (type, base);
2332     }
2333   else
2334     {
2335       ifs_ivopts_data.ivopts_data = data;
2336       ifs_ivopts_data.stmt = stmt;
2337       ifs_ivopts_data.step = size_zero_node;
2338       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2339           || integer_zerop (ifs_ivopts_data.step))
2340         goto fail;
2341       step = ifs_ivopts_data.step;
2342
2343       /* Check that the base expression is addressable.  This needs
2344          to be done after substituting bases of IVs into it.  */
2345       if (may_be_nonaddressable_p (base))
2346         goto fail;
2347
2348       /* Moreover, on strict alignment platforms, check that it is
2349          sufficiently aligned.  */
2350       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2351         goto fail;
2352
2353       base = build_fold_addr_expr (base);
2354
2355       /* Substituting bases of IVs into the base expression might
2356          have caused folding opportunities.  */
2357       if (TREE_CODE (base) == ADDR_EXPR)
2358         {
2359           tree *ref = &TREE_OPERAND (base, 0);
2360           while (handled_component_p (*ref))
2361             ref = &TREE_OPERAND (*ref, 0);
2362           if (TREE_CODE (*ref) == MEM_REF)
2363             {
2364               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2365                                       TREE_OPERAND (*ref, 0),
2366                                       TREE_OPERAND (*ref, 1));
2367               if (tem)
2368                 *ref = tem;
2369             }
2370         }
2371     }
2372
2373   civ = alloc_iv (data, base, step);
2374   /* Fail if base object of this memory reference is unknown.  */
2375   if (civ->base_object == NULL_TREE)
2376     goto fail;
2377
2378   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2379   return;
2380
2381 fail:
2382   for_each_index (op_p, idx_record_use, data);
2383 }
2384
2385 /* Finds and records invariants used in STMT.  */
2386
2387 static void
2388 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2389 {
2390   ssa_op_iter iter;
2391   use_operand_p use_p;
2392   tree op;
2393
2394   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2395     {
2396       op = USE_FROM_PTR (use_p);
2397       record_invariant (data, op, false);
2398     }
2399 }
2400
2401 /* Finds interesting uses of induction variables in the statement STMT.  */
2402
2403 static void
2404 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2405 {
2406   struct iv *iv;
2407   tree op, *lhs, *rhs;
2408   ssa_op_iter iter;
2409   use_operand_p use_p;
2410   enum tree_code code;
2411
2412   find_invariants_stmt (data, stmt);
2413
2414   if (gimple_code (stmt) == GIMPLE_COND)
2415     {
2416       find_interesting_uses_cond (data, stmt);
2417       return;
2418     }
2419
2420   if (is_gimple_assign (stmt))
2421     {
2422       lhs = gimple_assign_lhs_ptr (stmt);
2423       rhs = gimple_assign_rhs1_ptr (stmt);
2424
2425       if (TREE_CODE (*lhs) == SSA_NAME)
2426         {
2427           /* If the statement defines an induction variable, the uses are not
2428              interesting by themselves.  */
2429
2430           iv = get_iv (data, *lhs);
2431
2432           if (iv && !integer_zerop (iv->step))
2433             return;
2434         }
2435
2436       code = gimple_assign_rhs_code (stmt);
2437       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2438           && (REFERENCE_CLASS_P (*rhs)
2439               || is_gimple_val (*rhs)))
2440         {
2441           if (REFERENCE_CLASS_P (*rhs))
2442             find_interesting_uses_address (data, stmt, rhs);
2443           else
2444             find_interesting_uses_op (data, *rhs);
2445
2446           if (REFERENCE_CLASS_P (*lhs))
2447             find_interesting_uses_address (data, stmt, lhs);
2448           return;
2449         }
2450       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2451         {
2452           find_interesting_uses_cond (data, stmt);
2453           return;
2454         }
2455
2456       /* TODO -- we should also handle address uses of type
2457
2458          memory = call (whatever);
2459
2460          and
2461
2462          call (memory).  */
2463     }
2464
2465   if (gimple_code (stmt) == GIMPLE_PHI
2466       && gimple_bb (stmt) == data->current_loop->header)
2467     {
2468       iv = get_iv (data, PHI_RESULT (stmt));
2469
2470       if (iv && !integer_zerop (iv->step))
2471         return;
2472     }
2473
2474   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2475     {
2476       op = USE_FROM_PTR (use_p);
2477
2478       if (TREE_CODE (op) != SSA_NAME)
2479         continue;
2480
2481       iv = get_iv (data, op);
2482       if (!iv)
2483         continue;
2484
2485       find_interesting_uses_op (data, op);
2486     }
2487 }
2488
2489 /* Finds interesting uses of induction variables outside of loops
2490    on loop exit edge EXIT.  */
2491
2492 static void
2493 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2494 {
2495   gphi *phi;
2496   gphi_iterator psi;
2497   tree def;
2498
2499   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2500     {
2501       phi = psi.phi ();
2502       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2503       if (!virtual_operand_p (def))
2504         find_interesting_uses_op (data, def);
2505     }
2506 }
2507
2508 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2509    mode for memory reference represented by USE.  */
2510
2511 static GTY (()) vec<rtx, va_gc> *addr_list;
2512
2513 static bool
2514 addr_offset_valid_p (struct iv_use *use, HOST_WIDE_INT offset)
2515 {
2516   rtx reg, addr;
2517   unsigned list_index;
2518   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2519   machine_mode addr_mode, mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2520
2521   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2522   if (list_index >= vec_safe_length (addr_list))
2523     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2524
2525   addr = (*addr_list)[list_index];
2526   if (!addr)
2527     {
2528       addr_mode = targetm.addr_space.address_mode (as);
2529       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2530       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2531       (*addr_list)[list_index] = addr;
2532     }
2533   else
2534     addr_mode = GET_MODE (addr);
2535
2536   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2537   return (memory_address_addr_space_p (mem_mode, addr, as));
2538 }
2539
2540 /* Comparison function to sort group in ascending order of addr_offset.  */
2541
2542 static int
2543 group_compare_offset (const void *a, const void *b)
2544 {
2545   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2546   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2547
2548   if ((*u1)->addr_offset != (*u2)->addr_offset)
2549     return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2550   else
2551     return 0;
2552 }
2553
2554 /* Check if small groups should be split.  Return true if no group
2555    contains more than two uses with distinct addr_offsets.  Return
2556    false otherwise.  We want to split such groups because:
2557
2558      1) Small groups don't have much benefit and may interfer with
2559         general candidate selection.
2560      2) Size for problem with only small groups is usually small and
2561         general algorithm can handle it well.
2562
2563    TODO -- Above claim may not hold when we want to merge memory
2564    accesses with conseuctive addresses.  */
2565
2566 static bool
2567 split_small_address_groups_p (struct ivopts_data *data)
2568 {
2569   unsigned int i, j, distinct = 1;
2570   struct iv_use *pre;
2571   struct iv_group *group;
2572
2573   for (i = 0; i < data->vgroups.length (); i++)
2574     {
2575       group = data->vgroups[i];
2576       if (group->vuses.length () == 1)
2577         continue;
2578
2579       gcc_assert (group->type == USE_ADDRESS);
2580       if (group->vuses.length () == 2)
2581         {
2582           if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2583             std::swap (group->vuses[0], group->vuses[1]);
2584         }
2585       else
2586         group->vuses.qsort (group_compare_offset);
2587
2588       if (distinct > 2)
2589         continue;
2590
2591       distinct = 1;
2592       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2593         {
2594           if (group->vuses[j]->addr_offset != pre->addr_offset)
2595             {
2596               pre = group->vuses[j];
2597               distinct++;
2598             }
2599
2600           if (distinct > 2)
2601             break;
2602         }
2603     }
2604
2605   return (distinct <= 2);
2606 }
2607
2608 /* For each group of address type uses, this function further groups
2609    these uses according to the maximum offset supported by target's
2610    [base + offset] addressing mode.  */
2611
2612 static void
2613 split_address_groups (struct ivopts_data *data)
2614 {
2615   unsigned int i, j;
2616   /* Always split group.  */
2617   bool split_p = split_small_address_groups_p (data);
2618
2619   for (i = 0; i < data->vgroups.length (); i++)
2620     {
2621       struct iv_group *new_group = NULL;
2622       struct iv_group *group = data->vgroups[i];
2623       struct iv_use *use = group->vuses[0];
2624
2625       use->id = 0;
2626       use->group_id = group->id;
2627       if (group->vuses.length () == 1)
2628         continue;
2629
2630       gcc_assert (group->type == USE_ADDRESS);
2631
2632       for (j = 1; j < group->vuses.length ();)
2633         {
2634           struct iv_use *next = group->vuses[j];
2635           HOST_WIDE_INT offset = next->addr_offset - use->addr_offset;
2636
2637           /* Split group if aksed to, or the offset against the first
2638              use can't fit in offset part of addressing mode.  IV uses
2639              having the same offset are still kept in one group.  */
2640           if (offset != 0 &&
2641               (split_p || !addr_offset_valid_p (use, offset)))
2642             {
2643               if (!new_group)
2644                 new_group = record_group (data, group->type);
2645               group->vuses.ordered_remove (j);
2646               new_group->vuses.safe_push (next);
2647               continue;
2648             }
2649
2650           next->id = j;
2651           next->group_id = group->id;
2652           j++;
2653         }
2654     }
2655 }
2656
2657 /* Finds uses of the induction variables that are interesting.  */
2658
2659 static void
2660 find_interesting_uses (struct ivopts_data *data)
2661 {
2662   basic_block bb;
2663   gimple_stmt_iterator bsi;
2664   basic_block *body = get_loop_body (data->current_loop);
2665   unsigned i;
2666   edge e;
2667
2668   for (i = 0; i < data->current_loop->num_nodes; i++)
2669     {
2670       edge_iterator ei;
2671       bb = body[i];
2672
2673       FOR_EACH_EDGE (e, ei, bb->succs)
2674         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2675             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2676           find_interesting_uses_outside (data, e);
2677
2678       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2679         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2680       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2681         if (!is_gimple_debug (gsi_stmt (bsi)))
2682           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2683     }
2684   free (body);
2685
2686   split_address_groups (data);
2687
2688   if (dump_file && (dump_flags & TDF_DETAILS))
2689     {
2690       fprintf (dump_file, "\n<IV Groups>:\n");
2691       dump_groups (dump_file, data);
2692       fprintf (dump_file, "\n");
2693     }
2694 }
2695
2696 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2697    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2698    we are at the top-level of the processed address.  */
2699
2700 static tree
2701 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2702                 HOST_WIDE_INT *offset)
2703 {
2704   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2705   enum tree_code code;
2706   tree type, orig_type = TREE_TYPE (expr);
2707   HOST_WIDE_INT off0, off1, st;
2708   tree orig_expr = expr;
2709
2710   STRIP_NOPS (expr);
2711
2712   type = TREE_TYPE (expr);
2713   code = TREE_CODE (expr);
2714   *offset = 0;
2715
2716   switch (code)
2717     {
2718     case INTEGER_CST:
2719       if (!cst_and_fits_in_hwi (expr)
2720           || integer_zerop (expr))
2721         return orig_expr;
2722
2723       *offset = int_cst_value (expr);
2724       return build_int_cst (orig_type, 0);
2725
2726     case POINTER_PLUS_EXPR:
2727     case PLUS_EXPR:
2728     case MINUS_EXPR:
2729       op0 = TREE_OPERAND (expr, 0);
2730       op1 = TREE_OPERAND (expr, 1);
2731
2732       op0 = strip_offset_1 (op0, false, false, &off0);
2733       op1 = strip_offset_1 (op1, false, false, &off1);
2734
2735       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2736       if (op0 == TREE_OPERAND (expr, 0)
2737           && op1 == TREE_OPERAND (expr, 1))
2738         return orig_expr;
2739
2740       if (integer_zerop (op1))
2741         expr = op0;
2742       else if (integer_zerop (op0))
2743         {
2744           if (code == MINUS_EXPR)
2745             expr = fold_build1 (NEGATE_EXPR, type, op1);
2746           else
2747             expr = op1;
2748         }
2749       else
2750         expr = fold_build2 (code, type, op0, op1);
2751
2752       return fold_convert (orig_type, expr);
2753
2754     case MULT_EXPR:
2755       op1 = TREE_OPERAND (expr, 1);
2756       if (!cst_and_fits_in_hwi (op1))
2757         return orig_expr;
2758
2759       op0 = TREE_OPERAND (expr, 0);
2760       op0 = strip_offset_1 (op0, false, false, &off0);
2761       if (op0 == TREE_OPERAND (expr, 0))
2762         return orig_expr;
2763
2764       *offset = off0 * int_cst_value (op1);
2765       if (integer_zerop (op0))
2766         expr = op0;
2767       else
2768         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2769
2770       return fold_convert (orig_type, expr);
2771
2772     case ARRAY_REF:
2773     case ARRAY_RANGE_REF:
2774       if (!inside_addr)
2775         return orig_expr;
2776
2777       step = array_ref_element_size (expr);
2778       if (!cst_and_fits_in_hwi (step))
2779         break;
2780
2781       st = int_cst_value (step);
2782       op1 = TREE_OPERAND (expr, 1);
2783       op1 = strip_offset_1 (op1, false, false, &off1);
2784       *offset = off1 * st;
2785
2786       if (top_compref
2787           && integer_zerop (op1))
2788         {
2789           /* Strip the component reference completely.  */
2790           op0 = TREE_OPERAND (expr, 0);
2791           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2792           *offset += off0;
2793           return op0;
2794         }
2795       break;
2796
2797     case COMPONENT_REF:
2798       {
2799         tree field;
2800
2801         if (!inside_addr)
2802           return orig_expr;
2803
2804         tmp = component_ref_field_offset (expr);
2805         field = TREE_OPERAND (expr, 1);
2806         if (top_compref
2807             && cst_and_fits_in_hwi (tmp)
2808             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2809           {
2810             HOST_WIDE_INT boffset, abs_off;
2811
2812             /* Strip the component reference completely.  */
2813             op0 = TREE_OPERAND (expr, 0);
2814             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2815             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2816             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2817             if (boffset < 0)
2818               abs_off = -abs_off;
2819
2820             *offset = off0 + int_cst_value (tmp) + abs_off;
2821             return op0;
2822           }
2823       }
2824       break;
2825
2826     case ADDR_EXPR:
2827       op0 = TREE_OPERAND (expr, 0);
2828       op0 = strip_offset_1 (op0, true, true, &off0);
2829       *offset += off0;
2830
2831       if (op0 == TREE_OPERAND (expr, 0))
2832         return orig_expr;
2833
2834       expr = build_fold_addr_expr (op0);
2835       return fold_convert (orig_type, expr);
2836
2837     case MEM_REF:
2838       /* ???  Offset operand?  */
2839       inside_addr = false;
2840       break;
2841
2842     default:
2843       return orig_expr;
2844     }
2845
2846   /* Default handling of expressions for that we want to recurse into
2847      the first operand.  */
2848   op0 = TREE_OPERAND (expr, 0);
2849   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2850   *offset += off0;
2851
2852   if (op0 == TREE_OPERAND (expr, 0)
2853       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2854     return orig_expr;
2855
2856   expr = copy_node (expr);
2857   TREE_OPERAND (expr, 0) = op0;
2858   if (op1)
2859     TREE_OPERAND (expr, 1) = op1;
2860
2861   /* Inside address, we might strip the top level component references,
2862      thus changing type of the expression.  Handling of ADDR_EXPR
2863      will fix that.  */
2864   expr = fold_convert (orig_type, expr);
2865
2866   return expr;
2867 }
2868
2869 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2870
2871 tree
2872 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2873 {
2874   HOST_WIDE_INT off;
2875   tree core = strip_offset_1 (expr, false, false, &off);
2876   *offset = off;
2877   return core;
2878 }
2879
2880 /* Returns variant of TYPE that can be used as base for different uses.
2881    We return unsigned type with the same precision, which avoids problems
2882    with overflows.  */
2883
2884 static tree
2885 generic_type_for (tree type)
2886 {
2887   if (POINTER_TYPE_P (type))
2888     return unsigned_type_for (type);
2889
2890   if (TYPE_UNSIGNED (type))
2891     return type;
2892
2893   return unsigned_type_for (type);
2894 }
2895
2896 /* Private data for walk_tree.  */
2897
2898 struct walk_tree_data
2899 {
2900   bitmap *inv_vars;
2901   struct ivopts_data *idata;
2902 };
2903
2904 /* Callback function for walk_tree, it records invariants and symbol
2905    reference in *EXPR_P.  DATA is the structure storing result info.  */
2906
2907 static tree
2908 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2909 {
2910   tree op = *expr_p;
2911   struct version_info *info;
2912   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2913
2914   if (TREE_CODE (op) != SSA_NAME)
2915     return NULL_TREE;
2916
2917   info = name_info (wdata->idata, op);
2918   /* Because we expand simple operations when finding IVs, loop invariant
2919      variable that isn't referred by the original loop could be used now.
2920      Record such invariant variables here.  */
2921   if (!info->iv)
2922     {
2923       struct ivopts_data *idata = wdata->idata;
2924       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2925
2926       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2927         {
2928           set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
2929           record_invariant (idata, op, false);
2930         }
2931     }
2932   if (!info->inv_id || info->has_nonlin_use)
2933     return NULL_TREE;
2934
2935   if (!*wdata->inv_vars)
2936     *wdata->inv_vars = BITMAP_ALLOC (NULL);
2937   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
2938
2939   return NULL_TREE;
2940 }
2941
2942 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
2943    store it.  */
2944
2945 static inline void
2946 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
2947 {
2948   struct walk_tree_data wdata;
2949
2950   if (!inv_vars)
2951     return;
2952
2953   wdata.idata = data;
2954   wdata.inv_vars = inv_vars;
2955   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
2956 }
2957
2958 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
2959    will be recorded if it doesn't exist yet.  Given below two exprs:
2960      inv_expr + cst1, inv_expr + cst2
2961    It's hard to make decision whether constant part should be stripped
2962    or not.  We choose to not strip based on below facts:
2963      1) We need to count ADD cost for constant part if it's stripped,
2964         which is't always trivial where this functions is called.
2965      2) Stripping constant away may be conflict with following loop
2966         invariant hoisting pass.
2967      3) Not stripping constant away results in more invariant exprs,
2968         which usually leads to decision preferring lower reg pressure.  */
2969
2970 static iv_inv_expr_ent *
2971 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
2972 {
2973   STRIP_NOPS (inv_expr);
2974
2975   if (poly_int_tree_p (inv_expr)
2976       || TREE_CODE (inv_expr) == SSA_NAME)
2977     return NULL;
2978
2979   /* Don't strip constant part away as we used to.  */
2980
2981   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
2982   struct iv_inv_expr_ent ent;
2983   ent.expr = inv_expr;
2984   ent.hash = iterative_hash_expr (inv_expr, 0);
2985   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
2986
2987   if (!*slot)
2988     {
2989       *slot = XNEW (struct iv_inv_expr_ent);
2990       (*slot)->expr = inv_expr;
2991       (*slot)->hash = ent.hash;
2992       (*slot)->id = ++data->max_inv_expr_id;
2993     }
2994
2995   return *slot;
2996 }
2997
2998 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2999    position to POS.  If USE is not NULL, the candidate is set as related to
3000    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3001    replacement of the final value of the iv by a direct computation.  */
3002
3003 static struct iv_cand *
3004 add_candidate_1 (struct ivopts_data *data,
3005                  tree base, tree step, bool important, enum iv_position pos,
3006                  struct iv_use *use, gimple *incremented_at,
3007                  struct iv *orig_iv = NULL)
3008 {
3009   unsigned i;
3010   struct iv_cand *cand = NULL;
3011   tree type, orig_type;
3012
3013   gcc_assert (base && step);
3014
3015   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3016      live, but the ivopts code may replace a real pointer with one
3017      pointing before or after the memory block that is then adjusted
3018      into the memory block during the loop.  FIXME: It would likely be
3019      better to actually force the pointer live and still use ivopts;
3020      for example, it would be enough to write the pointer into memory
3021      and keep it there until after the loop.  */
3022   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3023     return NULL;
3024
3025   /* For non-original variables, make sure their values are computed in a type
3026      that does not invoke undefined behavior on overflows (since in general,
3027      we cannot prove that these induction variables are non-wrapping).  */
3028   if (pos != IP_ORIGINAL)
3029     {
3030       orig_type = TREE_TYPE (base);
3031       type = generic_type_for (orig_type);
3032       if (type != orig_type)
3033         {
3034           base = fold_convert (type, base);
3035           step = fold_convert (type, step);
3036         }
3037     }
3038
3039   for (i = 0; i < data->vcands.length (); i++)
3040     {
3041       cand = data->vcands[i];
3042
3043       if (cand->pos != pos)
3044         continue;
3045
3046       if (cand->incremented_at != incremented_at
3047           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3048               && cand->ainc_use != use))
3049         continue;
3050
3051       if (operand_equal_p (base, cand->iv->base, 0)
3052           && operand_equal_p (step, cand->iv->step, 0)
3053           && (TYPE_PRECISION (TREE_TYPE (base))
3054               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3055         break;
3056     }
3057
3058   if (i == data->vcands.length ())
3059     {
3060       cand = XCNEW (struct iv_cand);
3061       cand->id = i;
3062       cand->iv = alloc_iv (data, base, step);
3063       cand->pos = pos;
3064       if (pos != IP_ORIGINAL)
3065         {
3066           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3067           cand->var_after = cand->var_before;
3068         }
3069       cand->important = important;
3070       cand->incremented_at = incremented_at;
3071       data->vcands.safe_push (cand);
3072
3073       if (!poly_int_tree_p (step))
3074         {
3075           find_inv_vars (data, &step, &cand->inv_vars);
3076
3077           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3078           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3079           if (inv_expr != NULL)
3080             {
3081               cand->inv_exprs = cand->inv_vars;
3082               cand->inv_vars = NULL;
3083               if (cand->inv_exprs)
3084                 bitmap_clear (cand->inv_exprs);
3085               else
3086                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3087
3088               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3089             }
3090         }
3091
3092       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3093         cand->ainc_use = use;
3094       else
3095         cand->ainc_use = NULL;
3096
3097       cand->orig_iv = orig_iv;
3098       if (dump_file && (dump_flags & TDF_DETAILS))
3099         dump_cand (dump_file, cand);
3100     }
3101
3102   cand->important |= important;
3103
3104   /* Relate candidate to the group for which it is added.  */
3105   if (use)
3106     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3107
3108   return cand;
3109 }
3110
3111 /* Returns true if incrementing the induction variable at the end of the LOOP
3112    is allowed.
3113
3114    The purpose is to avoid splitting latch edge with a biv increment, thus
3115    creating a jump, possibly confusing other optimization passes and leaving
3116    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3117    available (so we do not have a better alternative), or if the latch edge
3118    is already nonempty.  */
3119
3120 static bool
3121 allow_ip_end_pos_p (struct loop *loop)
3122 {
3123   if (!ip_normal_pos (loop))
3124     return true;
3125
3126   if (!empty_block_p (ip_end_pos (loop)))
3127     return true;
3128
3129   return false;
3130 }
3131
3132 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3133    Important field is set to IMPORTANT.  */
3134
3135 static void
3136 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3137                         bool important, struct iv_use *use)
3138 {
3139   basic_block use_bb = gimple_bb (use->stmt);
3140   machine_mode mem_mode;
3141   unsigned HOST_WIDE_INT cstepi;
3142
3143   /* If we insert the increment in any position other than the standard
3144      ones, we must ensure that it is incremented once per iteration.
3145      It must not be in an inner nested loop, or one side of an if
3146      statement.  */
3147   if (use_bb->loop_father != data->current_loop
3148       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3149       || stmt_can_throw_internal (use->stmt)
3150       || !cst_and_fits_in_hwi (step))
3151     return;
3152
3153   cstepi = int_cst_value (step);
3154
3155   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
3156   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3157         || USE_STORE_PRE_INCREMENT (mem_mode))
3158        && GET_MODE_SIZE (mem_mode) == cstepi)
3159       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3160            || USE_STORE_PRE_DECREMENT (mem_mode))
3161           && GET_MODE_SIZE (mem_mode) == -cstepi))
3162     {
3163       enum tree_code code = MINUS_EXPR;
3164       tree new_base;
3165       tree new_step = step;
3166
3167       if (POINTER_TYPE_P (TREE_TYPE (base)))
3168         {
3169           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3170           code = POINTER_PLUS_EXPR;
3171         }
3172       else
3173         new_step = fold_convert (TREE_TYPE (base), new_step);
3174       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3175       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3176                        use->stmt);
3177     }
3178   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3179         || USE_STORE_POST_INCREMENT (mem_mode))
3180        && GET_MODE_SIZE (mem_mode) == cstepi)
3181       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3182            || USE_STORE_POST_DECREMENT (mem_mode))
3183           && GET_MODE_SIZE (mem_mode) == -cstepi))
3184     {
3185       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3186                        use->stmt);
3187     }
3188 }
3189
3190 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3191    position to POS.  If USE is not NULL, the candidate is set as related to
3192    it.  The candidate computation is scheduled before exit condition and at
3193    the end of loop.  */
3194
3195 static void
3196 add_candidate (struct ivopts_data *data,
3197                tree base, tree step, bool important, struct iv_use *use,
3198                struct iv *orig_iv = NULL)
3199 {
3200   if (ip_normal_pos (data->current_loop))
3201     add_candidate_1 (data, base, step, important,
3202                      IP_NORMAL, use, NULL, orig_iv);
3203   if (ip_end_pos (data->current_loop)
3204       && allow_ip_end_pos_p (data->current_loop))
3205     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3206 }
3207
3208 /* Adds standard iv candidates.  */
3209
3210 static void
3211 add_standard_iv_candidates (struct ivopts_data *data)
3212 {
3213   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3214
3215   /* The same for a double-integer type if it is still fast enough.  */
3216   if (TYPE_PRECISION
3217         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3218       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3219     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3220                    build_int_cst (long_integer_type_node, 1), true, NULL);
3221
3222   /* The same for a double-integer type if it is still fast enough.  */
3223   if (TYPE_PRECISION
3224         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3225       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3226     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3227                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3228 }
3229
3230
3231 /* Adds candidates bases on the old induction variable IV.  */
3232
3233 static void
3234 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3235 {
3236   gimple *phi;
3237   tree def;
3238   struct iv_cand *cand;
3239
3240   /* Check if this biv is used in address type use.  */
3241   if (iv->no_overflow  && iv->have_address_use
3242       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3243       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3244     {
3245       tree base = fold_convert (sizetype, iv->base);
3246       tree step = fold_convert (sizetype, iv->step);
3247
3248       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3249       add_candidate (data, base, step, true, NULL, iv);
3250       /* Add iv cand of the original type only if it has nonlinear use.  */
3251       if (iv->nonlin_use)
3252         add_candidate (data, iv->base, iv->step, true, NULL);
3253     }
3254   else
3255     add_candidate (data, iv->base, iv->step, true, NULL);
3256
3257   /* The same, but with initial value zero.  */
3258   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3259     add_candidate (data, size_int (0), iv->step, true, NULL);
3260   else
3261     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3262                    iv->step, true, NULL);
3263
3264   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3265   if (gimple_code (phi) == GIMPLE_PHI)
3266     {
3267       /* Additionally record the possibility of leaving the original iv
3268          untouched.  */
3269       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3270       /* Don't add candidate if it's from another PHI node because
3271          it's an affine iv appearing in the form of PEELED_CHREC.  */
3272       phi = SSA_NAME_DEF_STMT (def);
3273       if (gimple_code (phi) != GIMPLE_PHI)
3274         {
3275           cand = add_candidate_1 (data,
3276                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3277                                   SSA_NAME_DEF_STMT (def));
3278           if (cand)
3279             {
3280               cand->var_before = iv->ssa_name;
3281               cand->var_after = def;
3282             }
3283         }
3284       else
3285         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3286     }
3287 }
3288
3289 /* Adds candidates based on the old induction variables.  */
3290
3291 static void
3292 add_iv_candidate_for_bivs (struct ivopts_data *data)
3293 {
3294   unsigned i;
3295   struct iv *iv;
3296   bitmap_iterator bi;
3297
3298   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3299     {
3300       iv = ver_info (data, i)->iv;
3301       if (iv && iv->biv_p && !integer_zerop (iv->step))
3302         add_iv_candidate_for_biv (data, iv);
3303     }
3304 }
3305
3306 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3307
3308 static void
3309 record_common_cand (struct ivopts_data *data, tree base,
3310                     tree step, struct iv_use *use)
3311 {
3312   struct iv_common_cand ent;
3313   struct iv_common_cand **slot;
3314
3315   ent.base = base;
3316   ent.step = step;
3317   ent.hash = iterative_hash_expr (base, 0);
3318   ent.hash = iterative_hash_expr (step, ent.hash);
3319
3320   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3321   if (*slot == NULL)
3322     {
3323       *slot = new iv_common_cand ();
3324       (*slot)->base = base;
3325       (*slot)->step = step;
3326       (*slot)->uses.create (8);
3327       (*slot)->hash = ent.hash;
3328       data->iv_common_cands.safe_push ((*slot));
3329     }
3330
3331   gcc_assert (use != NULL);
3332   (*slot)->uses.safe_push (use);
3333   return;
3334 }
3335
3336 /* Comparison function used to sort common candidates.  */
3337
3338 static int
3339 common_cand_cmp (const void *p1, const void *p2)
3340 {
3341   unsigned n1, n2;
3342   const struct iv_common_cand *const *const ccand1
3343     = (const struct iv_common_cand *const *)p1;
3344   const struct iv_common_cand *const *const ccand2
3345     = (const struct iv_common_cand *const *)p2;
3346
3347   n1 = (*ccand1)->uses.length ();
3348   n2 = (*ccand2)->uses.length ();
3349   return n2 - n1;
3350 }
3351
3352 /* Adds IV candidates based on common candidated recorded.  */
3353
3354 static void
3355 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3356 {
3357   unsigned i, j;
3358   struct iv_cand *cand_1, *cand_2;
3359
3360   data->iv_common_cands.qsort (common_cand_cmp);
3361   for (i = 0; i < data->iv_common_cands.length (); i++)
3362     {
3363       struct iv_common_cand *ptr = data->iv_common_cands[i];
3364
3365       /* Only add IV candidate if it's derived from multiple uses.  */
3366       if (ptr->uses.length () <= 1)
3367         break;
3368
3369       cand_1 = NULL;
3370       cand_2 = NULL;
3371       if (ip_normal_pos (data->current_loop))
3372         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3373                                   false, IP_NORMAL, NULL, NULL);
3374
3375       if (ip_end_pos (data->current_loop)
3376           && allow_ip_end_pos_p (data->current_loop))
3377         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3378                                   false, IP_END, NULL, NULL);
3379
3380       /* Bind deriving uses and the new candidates.  */
3381       for (j = 0; j < ptr->uses.length (); j++)
3382         {
3383           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3384           if (cand_1)
3385             bitmap_set_bit (group->related_cands, cand_1->id);
3386           if (cand_2)
3387             bitmap_set_bit (group->related_cands, cand_2->id);
3388         }
3389     }
3390
3391   /* Release data since it is useless from this point.  */
3392   data->iv_common_cand_tab->empty ();
3393   data->iv_common_cands.truncate (0);
3394 }
3395
3396 /* Adds candidates based on the value of USE's iv.  */
3397
3398 static void
3399 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3400 {
3401   unsigned HOST_WIDE_INT offset;
3402   tree base;
3403   tree basetype;
3404   struct iv *iv = use->iv;
3405
3406   add_candidate (data, iv->base, iv->step, false, use);
3407
3408   /* Record common candidate for use in case it can be shared by others.  */
3409   record_common_cand (data, iv->base, iv->step, use);
3410
3411   /* Record common candidate with initial value zero.  */
3412   basetype = TREE_TYPE (iv->base);
3413   if (POINTER_TYPE_P (basetype))
3414     basetype = sizetype;
3415   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3416
3417   /* Record common candidate with constant offset stripped in base.
3418      Like the use itself, we also add candidate directly for it.  */
3419   base = strip_offset (iv->base, &offset);
3420   if (offset || base != iv->base)
3421     {
3422       record_common_cand (data, base, iv->step, use);
3423       add_candidate (data, base, iv->step, false, use);
3424     }
3425
3426   /* Record common candidate with base_object removed in base.  */
3427   base = iv->base;
3428   STRIP_NOPS (base);
3429   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3430     {
3431       tree step = iv->step;
3432
3433       STRIP_NOPS (step);
3434       base = TREE_OPERAND (base, 1);
3435       step = fold_convert (sizetype, step);
3436       record_common_cand (data, base, step, use);
3437       /* Also record common candidate with offset stripped.  */
3438       base = strip_offset (base, &offset);
3439       if (offset)
3440         record_common_cand (data, base, step, use);
3441     }
3442
3443   /* At last, add auto-incremental candidates.  Make such variables
3444      important since other iv uses with same base object may be based
3445      on it.  */
3446   if (use != NULL && use->type == USE_ADDRESS)
3447     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3448 }
3449
3450 /* Adds candidates based on the uses.  */
3451
3452 static void
3453 add_iv_candidate_for_groups (struct ivopts_data *data)
3454 {
3455   unsigned i;
3456
3457   /* Only add candidate for the first use in group.  */
3458   for (i = 0; i < data->vgroups.length (); i++)
3459     {
3460       struct iv_group *group = data->vgroups[i];
3461
3462       gcc_assert (group->vuses[0] != NULL);
3463       add_iv_candidate_for_use (data, group->vuses[0]);
3464     }
3465   add_iv_candidate_derived_from_uses (data);
3466 }
3467
3468 /* Record important candidates and add them to related_cands bitmaps.  */
3469
3470 static void
3471 record_important_candidates (struct ivopts_data *data)
3472 {
3473   unsigned i;
3474   struct iv_group *group;
3475
3476   for (i = 0; i < data->vcands.length (); i++)
3477     {
3478       struct iv_cand *cand = data->vcands[i];
3479
3480       if (cand->important)
3481         bitmap_set_bit (data->important_candidates, i);
3482     }
3483
3484   data->consider_all_candidates = (data->vcands.length ()
3485                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3486
3487   /* Add important candidates to groups' related_cands bitmaps.  */
3488   for (i = 0; i < data->vgroups.length (); i++)
3489     {
3490       group = data->vgroups[i];
3491       bitmap_ior_into (group->related_cands, data->important_candidates);
3492     }
3493 }
3494
3495 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3496    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3497    we allocate a simple list to every use.  */
3498
3499 static void
3500 alloc_use_cost_map (struct ivopts_data *data)
3501 {
3502   unsigned i, size, s;
3503
3504   for (i = 0; i < data->vgroups.length (); i++)
3505     {
3506       struct iv_group *group = data->vgroups[i];
3507
3508       if (data->consider_all_candidates)
3509         size = data->vcands.length ();
3510       else
3511         {
3512           s = bitmap_count_bits (group->related_cands);
3513
3514           /* Round up to the power of two, so that moduling by it is fast.  */
3515           size = s ? (1 << ceil_log2 (s)) : 1;
3516         }
3517
3518       group->n_map_members = size;
3519       group->cost_map = XCNEWVEC (struct cost_pair, size);
3520     }
3521 }
3522
3523 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3524    on invariants INV_VARS and that the value used in expressing it is
3525    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3526
3527 static void
3528 set_group_iv_cost (struct ivopts_data *data,
3529                    struct iv_group *group, struct iv_cand *cand,
3530                    comp_cost cost, bitmap inv_vars, tree value,
3531                    enum tree_code comp, bitmap inv_exprs)
3532 {
3533   unsigned i, s;
3534
3535   if (cost.infinite_cost_p ())
3536     {
3537       BITMAP_FREE (inv_vars);
3538       BITMAP_FREE (inv_exprs);
3539       return;
3540     }
3541
3542   if (data->consider_all_candidates)
3543     {
3544       group->cost_map[cand->id].cand = cand;
3545       group->cost_map[cand->id].cost = cost;
3546       group->cost_map[cand->id].inv_vars = inv_vars;
3547       group->cost_map[cand->id].inv_exprs = inv_exprs;
3548       group->cost_map[cand->id].value = value;
3549       group->cost_map[cand->id].comp = comp;
3550       return;
3551     }
3552
3553   /* n_map_members is a power of two, so this computes modulo.  */
3554   s = cand->id & (group->n_map_members - 1);
3555   for (i = s; i < group->n_map_members; i++)
3556     if (!group->cost_map[i].cand)
3557       goto found;
3558   for (i = 0; i < s; i++)
3559     if (!group->cost_map[i].cand)
3560       goto found;
3561
3562   gcc_unreachable ();
3563
3564 found:
3565   group->cost_map[i].cand = cand;
3566   group->cost_map[i].cost = cost;
3567   group->cost_map[i].inv_vars = inv_vars;
3568   group->cost_map[i].inv_exprs = inv_exprs;
3569   group->cost_map[i].value = value;
3570   group->cost_map[i].comp = comp;
3571 }
3572
3573 /* Gets cost of (GROUP, CAND) pair.  */
3574
3575 static struct cost_pair *
3576 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3577                    struct iv_cand *cand)
3578 {
3579   unsigned i, s;
3580   struct cost_pair *ret;
3581
3582   if (!cand)
3583     return NULL;
3584
3585   if (data->consider_all_candidates)
3586     {
3587       ret = group->cost_map + cand->id;
3588       if (!ret->cand)
3589         return NULL;
3590
3591       return ret;
3592     }
3593
3594   /* n_map_members is a power of two, so this computes modulo.  */
3595   s = cand->id & (group->n_map_members - 1);
3596   for (i = s; i < group->n_map_members; i++)
3597     if (group->cost_map[i].cand == cand)
3598       return group->cost_map + i;
3599     else if (group->cost_map[i].cand == NULL)
3600       return NULL;
3601   for (i = 0; i < s; i++)
3602     if (group->cost_map[i].cand == cand)
3603       return group->cost_map + i;
3604     else if (group->cost_map[i].cand == NULL)
3605       return NULL;
3606
3607   return NULL;
3608 }
3609
3610 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3611 static rtx
3612 produce_memory_decl_rtl (tree obj, int *regno)
3613 {
3614   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3615   machine_mode address_mode = targetm.addr_space.address_mode (as);
3616   rtx x;
3617
3618   gcc_assert (obj);
3619   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3620     {
3621       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3622       x = gen_rtx_SYMBOL_REF (address_mode, name);
3623       SET_SYMBOL_REF_DECL (x, obj);
3624       x = gen_rtx_MEM (DECL_MODE (obj), x);
3625       set_mem_addr_space (x, as);
3626       targetm.encode_section_info (obj, x, true);
3627     }
3628   else
3629     {
3630       x = gen_raw_REG (address_mode, (*regno)++);
3631       x = gen_rtx_MEM (DECL_MODE (obj), x);
3632       set_mem_addr_space (x, as);
3633     }
3634
3635   return x;
3636 }
3637
3638 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3639    walk_tree.  DATA contains the actual fake register number.  */
3640
3641 static tree
3642 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3643 {
3644   tree obj = NULL_TREE;
3645   rtx x = NULL_RTX;
3646   int *regno = (int *) data;
3647
3648   switch (TREE_CODE (*expr_p))
3649     {
3650     case ADDR_EXPR:
3651       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3652            handled_component_p (*expr_p);
3653            expr_p = &TREE_OPERAND (*expr_p, 0))
3654         continue;
3655       obj = *expr_p;
3656       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3657         x = produce_memory_decl_rtl (obj, regno);
3658       break;
3659
3660     case SSA_NAME:
3661       *ws = 0;
3662       obj = SSA_NAME_VAR (*expr_p);
3663       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3664       if (!obj)
3665         return NULL_TREE;
3666       if (!DECL_RTL_SET_P (obj))
3667         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3668       break;
3669
3670     case VAR_DECL:
3671     case PARM_DECL:
3672     case RESULT_DECL:
3673       *ws = 0;
3674       obj = *expr_p;
3675
3676       if (DECL_RTL_SET_P (obj))
3677         break;
3678
3679       if (DECL_MODE (obj) == BLKmode)
3680         x = produce_memory_decl_rtl (obj, regno);
3681       else
3682         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3683
3684       break;
3685
3686     default:
3687       break;
3688     }
3689
3690   if (x)
3691     {
3692       decl_rtl_to_reset.safe_push (obj);
3693       SET_DECL_RTL (obj, x);
3694     }
3695
3696   return NULL_TREE;
3697 }
3698
3699 /* Determines cost of the computation of EXPR.  */
3700
3701 static unsigned
3702 computation_cost (tree expr, bool speed)
3703 {
3704   rtx_insn *seq;
3705   rtx rslt;
3706   tree type = TREE_TYPE (expr);
3707   unsigned cost;
3708   /* Avoid using hard regs in ways which may be unsupported.  */
3709   int regno = LAST_VIRTUAL_REGISTER + 1;
3710   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3711   enum node_frequency real_frequency = node->frequency;
3712
3713   node->frequency = NODE_FREQUENCY_NORMAL;
3714   crtl->maybe_hot_insn_p = speed;
3715   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3716   start_sequence ();
3717   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3718   seq = get_insns ();
3719   end_sequence ();
3720   default_rtl_profile ();
3721   node->frequency = real_frequency;
3722
3723   cost = seq_cost (seq, speed);
3724   if (MEM_P (rslt))
3725     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3726                           TYPE_ADDR_SPACE (type), speed);
3727   else if (!REG_P (rslt))
3728     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3729
3730   return cost;
3731 }
3732
3733 /* Returns variable containing the value of candidate CAND at statement AT.  */
3734
3735 static tree
3736 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3737 {
3738   if (stmt_after_increment (loop, cand, stmt))
3739     return cand->var_after;
3740   else
3741     return cand->var_before;
3742 }
3743
3744 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3745    same precision that is at least as wide as the precision of TYPE, stores
3746    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3747    type of A and B.  */
3748
3749 static tree
3750 determine_common_wider_type (tree *a, tree *b)
3751 {
3752   tree wider_type = NULL;
3753   tree suba, subb;
3754   tree atype = TREE_TYPE (*a);
3755
3756   if (CONVERT_EXPR_P (*a))
3757     {
3758       suba = TREE_OPERAND (*a, 0);
3759       wider_type = TREE_TYPE (suba);
3760       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3761         return atype;
3762     }
3763   else
3764     return atype;
3765
3766   if (CONVERT_EXPR_P (*b))
3767     {
3768       subb = TREE_OPERAND (*b, 0);
3769       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3770         return atype;
3771     }
3772   else
3773     return atype;
3774
3775   *a = suba;
3776   *b = subb;
3777   return wider_type;
3778 }
3779
3780 /* Determines the expression by that USE is expressed from induction variable
3781    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3782    decomposed form.  The invariant part is stored in AFF_INV; while variant
3783    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3784    non-null.  Returns false if USE cannot be expressed using CAND.  */
3785
3786 static bool
3787 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3788                        struct iv_cand *cand, struct aff_tree *aff_inv,
3789                        struct aff_tree *aff_var, widest_int *prat = NULL)
3790 {
3791   tree ubase = use->iv->base, ustep = use->iv->step;
3792   tree cbase = cand->iv->base, cstep = cand->iv->step;
3793   tree common_type, uutype, var, cstep_common;
3794   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3795   aff_tree aff_cbase;
3796   widest_int rat;
3797
3798   /* We must have a precision to express the values of use.  */
3799   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3800     return false;
3801
3802   var = var_at_stmt (loop, cand, at);
3803   uutype = unsigned_type_for (utype);
3804
3805   /* If the conversion is not noop, perform it.  */
3806   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3807     {
3808       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3809           && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3810         {
3811           tree inner_base, inner_step, inner_type;
3812           inner_base = TREE_OPERAND (cbase, 0);
3813           if (CONVERT_EXPR_P (cstep))
3814             inner_step = TREE_OPERAND (cstep, 0);
3815           else
3816             inner_step = cstep;
3817
3818           inner_type = TREE_TYPE (inner_base);
3819           /* If candidate is added from a biv whose type is smaller than
3820              ctype, we know both candidate and the biv won't overflow.
3821              In this case, it's safe to skip the convertion in candidate.
3822              As an example, (unsigned short)((unsigned long)A) equals to
3823              (unsigned short)A, if A has a type no larger than short.  */
3824           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3825             {
3826               cbase = inner_base;
3827               cstep = inner_step;
3828             }
3829         }
3830       cbase = fold_convert (uutype, cbase);
3831       cstep = fold_convert (uutype, cstep);
3832       var = fold_convert (uutype, var);
3833     }
3834
3835   /* Ratio is 1 when computing the value of biv cand by itself.
3836      We can't rely on constant_multiple_of in this case because the
3837      use is created after the original biv is selected.  The call
3838      could fail because of inconsistent fold behavior.  See PR68021
3839      for more information.  */
3840   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3841     {
3842       gcc_assert (is_gimple_assign (use->stmt));
3843       gcc_assert (use->iv->ssa_name == cand->var_after);
3844       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3845       rat = 1;
3846     }
3847   else if (!constant_multiple_of (ustep, cstep, &rat))
3848     return false;
3849
3850   if (prat)
3851     *prat = rat;
3852
3853   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3854      type, we achieve better folding by computing their difference in this
3855      wider type, and cast the result to UUTYPE.  We do not need to worry about
3856      overflows, as all the arithmetics will in the end be performed in UUTYPE
3857      anyway.  */
3858   common_type = determine_common_wider_type (&ubase, &cbase);
3859
3860   /* use = ubase - ratio * cbase + ratio * var.  */
3861   tree_to_aff_combination (ubase, common_type, aff_inv);
3862   tree_to_aff_combination (cbase, common_type, &aff_cbase);
3863   tree_to_aff_combination (var, uutype, aff_var);
3864
3865   /* We need to shift the value if we are after the increment.  */
3866   if (stmt_after_increment (loop, cand, at))
3867     {
3868       aff_tree cstep_aff;
3869
3870       if (common_type != uutype)
3871         cstep_common = fold_convert (common_type, cstep);
3872       else
3873         cstep_common = cstep;
3874
3875       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3876       aff_combination_add (&aff_cbase, &cstep_aff);
3877     }
3878
3879   aff_combination_scale (&aff_cbase, -rat);
3880   aff_combination_add (aff_inv, &aff_cbase);
3881   if (common_type != uutype)
3882     aff_combination_convert (aff_inv, uutype);
3883
3884   aff_combination_scale (aff_var, rat);
3885   return true;
3886 }
3887
3888 /* Determines the expression by that USE is expressed from induction variable
3889    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3890    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3891
3892 static bool
3893 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3894                      struct iv_cand *cand, struct aff_tree *aff)
3895 {
3896   aff_tree aff_var;
3897
3898   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3899     return false;
3900
3901   aff_combination_add (aff, &aff_var);
3902   return true;
3903 }
3904
3905 /* Return the type of USE.  */
3906
3907 static tree
3908 get_use_type (struct iv_use *use)
3909 {
3910   tree base_type = TREE_TYPE (use->iv->base);
3911   tree type;
3912
3913   if (use->type == USE_ADDRESS)
3914     {
3915       /* The base_type may be a void pointer.  Create a pointer type based on
3916          the mem_ref instead.  */
3917       type = build_pointer_type (TREE_TYPE (*use->op_p));
3918       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3919                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3920     }
3921   else
3922     type = base_type;
3923
3924   return type;
3925 }
3926
3927 /* Determines the expression by that USE is expressed from induction variable
3928    CAND at statement AT in LOOP.  The computation is unshared.  */
3929
3930 static tree
3931 get_computation_at (struct loop *loop, gimple *at,
3932                     struct iv_use *use, struct iv_cand *cand)
3933 {
3934   aff_tree aff;
3935   tree type = get_use_type (use);
3936
3937   if (!get_computation_aff (loop, at, use, cand, &aff))
3938     return NULL_TREE;
3939   unshare_aff_combination (&aff);
3940   return fold_convert (type, aff_combination_to_tree (&aff));
3941 }
3942
3943 /* Adjust the cost COST for being in loop setup rather than loop body.
3944    If we're optimizing for space, the loop setup overhead is constant;
3945    if we're optimizing for speed, amortize it over the per-iteration cost.
3946    If ROUND_UP_P is true, the result is round up rather than to zero when
3947    optimizing for speed.  */
3948 static unsigned
3949 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
3950                    bool round_up_p = false)
3951 {
3952   if (cost == INFTY)
3953     return cost;
3954   else if (optimize_loop_for_speed_p (data->current_loop))
3955     {
3956       HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
3957       return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
3958     }
3959   else
3960     return cost;
3961 }
3962
3963 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3964    EXPR operand holding the shift.  COST0 and COST1 are the costs for
3965    calculating the operands of EXPR.  Returns true if successful, and returns
3966    the cost in COST.  */
3967
3968 static bool
3969 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
3970                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3971 {
3972   comp_cost res;
3973   tree op1 = TREE_OPERAND (expr, 1);
3974   tree cst = TREE_OPERAND (mult, 1);
3975   tree multop = TREE_OPERAND (mult, 0);
3976   int m = exact_log2 (int_cst_value (cst));
3977   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3978   int as_cost, sa_cost;
3979   bool mult_in_op1;
3980
3981   if (!(m >= 0 && m < maxm))
3982     return false;
3983
3984   STRIP_NOPS (op1);
3985   mult_in_op1 = operand_equal_p (op1, mult, 0);
3986
3987   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3988
3989   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3990      use that in preference to a shift insn followed by an add insn.  */
3991   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3992              ? shiftadd_cost (speed, mode, m)
3993              : (mult_in_op1
3994                 ? shiftsub1_cost (speed, mode, m)
3995                 : shiftsub0_cost (speed, mode, m)));
3996
3997   res = comp_cost (MIN (as_cost, sa_cost), 0);
3998   res += (mult_in_op1 ? cost0 : cost1);
3999
4000   STRIP_NOPS (multop);
4001   if (!is_gimple_val (multop))
4002     res += force_expr_to_var_cost (multop, speed);
4003
4004   *cost = res;
4005   return true;
4006 }
4007
4008 /* Estimates cost of forcing expression EXPR into a variable.  */
4009
4010 static comp_cost
4011 force_expr_to_var_cost (tree expr, bool speed)
4012 {
4013   static bool costs_initialized = false;
4014   static unsigned integer_cost [2];
4015   static unsigned symbol_cost [2];
4016   static unsigned address_cost [2];
4017   tree op0, op1;
4018   comp_cost cost0, cost1, cost;
4019   machine_mode mode;
4020   scalar_int_mode int_mode;
4021
4022   if (!costs_initialized)
4023     {
4024       tree type = build_pointer_type (integer_type_node);
4025       tree var, addr;
4026       rtx x;
4027       int i;
4028
4029       var = create_tmp_var_raw (integer_type_node, "test_var");
4030       TREE_STATIC (var) = 1;
4031       x = produce_memory_decl_rtl (var, NULL);
4032       SET_DECL_RTL (var, x);
4033
4034       addr = build1 (ADDR_EXPR, type, var);
4035
4036
4037       for (i = 0; i < 2; i++)
4038         {
4039           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4040                                                              2000), i);
4041
4042           symbol_cost[i] = computation_cost (addr, i) + 1;
4043
4044           address_cost[i]
4045             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4046           if (dump_file && (dump_flags & TDF_DETAILS))
4047             {
4048               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4049               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4050               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4051               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4052               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4053               fprintf (dump_file, "\n");
4054             }
4055         }
4056
4057       costs_initialized = true;
4058     }
4059
4060   STRIP_NOPS (expr);
4061
4062   if (SSA_VAR_P (expr))
4063     return no_cost;
4064
4065   if (is_gimple_min_invariant (expr))
4066     {
4067       if (poly_int_tree_p (expr))
4068         return comp_cost (integer_cost [speed], 0);
4069
4070       if (TREE_CODE (expr) == ADDR_EXPR)
4071         {
4072           tree obj = TREE_OPERAND (expr, 0);
4073
4074           if (VAR_P (obj)
4075               || TREE_CODE (obj) == PARM_DECL
4076               || TREE_CODE (obj) == RESULT_DECL)
4077             return comp_cost (symbol_cost [speed], 0);
4078         }
4079
4080       return comp_cost (address_cost [speed], 0);
4081     }
4082
4083   switch (TREE_CODE (expr))
4084     {
4085     case POINTER_PLUS_EXPR:
4086     case PLUS_EXPR:
4087     case MINUS_EXPR:
4088     case MULT_EXPR:
4089     case TRUNC_DIV_EXPR:
4090     case BIT_AND_EXPR:
4091     case BIT_IOR_EXPR:
4092     case LSHIFT_EXPR:
4093     case RSHIFT_EXPR:
4094       op0 = TREE_OPERAND (expr, 0);
4095       op1 = TREE_OPERAND (expr, 1);
4096       STRIP_NOPS (op0);
4097       STRIP_NOPS (op1);
4098       break;
4099
4100     CASE_CONVERT:
4101     case NEGATE_EXPR:
4102     case BIT_NOT_EXPR:
4103       op0 = TREE_OPERAND (expr, 0);
4104       STRIP_NOPS (op0);
4105       op1 = NULL_TREE;
4106       break;
4107
4108     default:
4109       /* Just an arbitrary value, FIXME.  */
4110       return comp_cost (target_spill_cost[speed], 0);
4111     }
4112
4113   if (op0 == NULL_TREE
4114       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4115     cost0 = no_cost;
4116   else
4117     cost0 = force_expr_to_var_cost (op0, speed);
4118
4119   if (op1 == NULL_TREE
4120       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4121     cost1 = no_cost;
4122   else
4123     cost1 = force_expr_to_var_cost (op1, speed);
4124
4125   mode = TYPE_MODE (TREE_TYPE (expr));
4126   switch (TREE_CODE (expr))
4127     {
4128     case POINTER_PLUS_EXPR:
4129     case PLUS_EXPR:
4130     case MINUS_EXPR:
4131     case NEGATE_EXPR:
4132       cost = comp_cost (add_cost (speed, mode), 0);
4133       if (TREE_CODE (expr) != NEGATE_EXPR)
4134         {
4135           tree mult = NULL_TREE;
4136           comp_cost sa_cost;
4137           if (TREE_CODE (op1) == MULT_EXPR)
4138             mult = op1;
4139           else if (TREE_CODE (op0) == MULT_EXPR)
4140             mult = op0;
4141
4142           if (mult != NULL_TREE
4143               && is_a <scalar_int_mode> (mode, &int_mode)
4144               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4145               && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4146                                     speed, &sa_cost))
4147             return sa_cost;
4148         }
4149       break;
4150
4151     CASE_CONVERT:
4152       {
4153         tree inner_mode, outer_mode;
4154         outer_mode = TREE_TYPE (expr);
4155         inner_mode = TREE_TYPE (op0);
4156         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4157                                        TYPE_MODE (inner_mode), speed), 0);
4158       }
4159       break;
4160
4161     case MULT_EXPR:
4162       if (cst_and_fits_in_hwi (op0))
4163         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4164                                              mode, speed), 0);
4165       else if (cst_and_fits_in_hwi (op1))
4166         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4167                                              mode, speed), 0);
4168       else
4169         return comp_cost (target_spill_cost [speed], 0);
4170       break;
4171
4172     case TRUNC_DIV_EXPR:
4173       /* Division by power of two is usually cheap, so we allow it.  Forbid
4174          anything else.  */
4175       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4176         cost = comp_cost (add_cost (speed, mode), 0);
4177       else
4178         cost = comp_cost (target_spill_cost[speed], 0);
4179       break;
4180
4181     case BIT_AND_EXPR:
4182     case BIT_IOR_EXPR:
4183     case BIT_NOT_EXPR:
4184     case LSHIFT_EXPR:
4185     case RSHIFT_EXPR:
4186       cost = comp_cost (add_cost (speed, mode), 0);
4187       break;
4188
4189     default:
4190       gcc_unreachable ();
4191     }
4192
4193   cost += cost0;
4194   cost += cost1;
4195   return cost;
4196 }
4197
4198 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4199    invariants the computation depends on.  */
4200
4201 static comp_cost
4202 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4203 {
4204   if (!expr)
4205     return no_cost;
4206
4207   find_inv_vars (data, &expr, inv_vars);
4208   return force_expr_to_var_cost (expr, data->speed);
4209 }
4210
4211 /* Returns cost of auto-modifying address expression in shape base + offset.
4212    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4213    address expression.  The address expression has ADDR_MODE in addr space
4214    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4215    speed or size.  */
4216
4217 enum ainc_type
4218 {
4219   AINC_PRE_INC,         /* Pre increment.  */
4220   AINC_PRE_DEC,         /* Pre decrement.  */
4221   AINC_POST_INC,        /* Post increment.  */
4222   AINC_POST_DEC,        /* Post decrement.  */
4223   AINC_NONE             /* Also the number of auto increment types.  */
4224 };
4225
4226 struct ainc_cost_data
4227 {
4228   unsigned costs[AINC_NONE];
4229 };
4230
4231 static comp_cost
4232 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4233                        machine_mode addr_mode, machine_mode mem_mode,
4234                        addr_space_t as, bool speed)
4235 {
4236   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4237       && !USE_STORE_PRE_DECREMENT (mem_mode)
4238       && !USE_LOAD_POST_DECREMENT (mem_mode)
4239       && !USE_STORE_POST_DECREMENT (mem_mode)
4240       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4241       && !USE_STORE_PRE_INCREMENT (mem_mode)
4242       && !USE_LOAD_POST_INCREMENT (mem_mode)
4243       && !USE_STORE_POST_INCREMENT (mem_mode))
4244     return infinite_cost;
4245
4246   static vec<ainc_cost_data *> ainc_cost_data_list;
4247   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4248   if (idx >= ainc_cost_data_list.length ())
4249     {
4250       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4251
4252       gcc_assert (nsize > idx);
4253       ainc_cost_data_list.safe_grow_cleared (nsize);
4254     }
4255
4256   ainc_cost_data *data = ainc_cost_data_list[idx];
4257   if (data == NULL)
4258     {
4259       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4260
4261       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4262       data->costs[AINC_PRE_DEC] = INFTY;
4263       data->costs[AINC_POST_DEC] = INFTY;
4264       data->costs[AINC_PRE_INC] = INFTY;
4265       data->costs[AINC_POST_INC] = INFTY;
4266       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4267           || USE_STORE_PRE_DECREMENT (mem_mode))
4268         {
4269           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4270
4271           if (memory_address_addr_space_p (mem_mode, addr, as))
4272             data->costs[AINC_PRE_DEC]
4273               = address_cost (addr, mem_mode, as, speed);
4274         }
4275       if (USE_LOAD_POST_DECREMENT (mem_mode)
4276           || USE_STORE_POST_DECREMENT (mem_mode))
4277         {
4278           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4279
4280           if (memory_address_addr_space_p (mem_mode, addr, as))
4281             data->costs[AINC_POST_DEC]
4282               = address_cost (addr, mem_mode, as, speed);
4283         }
4284       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4285           || USE_STORE_PRE_INCREMENT (mem_mode))
4286         {
4287           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4288
4289           if (memory_address_addr_space_p (mem_mode, addr, as))
4290             data->costs[AINC_PRE_INC]
4291               = address_cost (addr, mem_mode, as, speed);
4292         }
4293       if (USE_LOAD_POST_INCREMENT (mem_mode)
4294           || USE_STORE_POST_INCREMENT (mem_mode))
4295         {
4296           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4297
4298           if (memory_address_addr_space_p (mem_mode, addr, as))
4299             data->costs[AINC_POST_INC]
4300               = address_cost (addr, mem_mode, as, speed);
4301         }
4302       ainc_cost_data_list[idx] = data;
4303     }
4304
4305   HOST_WIDE_INT msize = GET_MODE_SIZE (mem_mode);
4306   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4307     return comp_cost (data->costs[AINC_POST_INC], 0);
4308   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4309     return comp_cost (data->costs[AINC_POST_DEC], 0);
4310   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4311     return comp_cost (data->costs[AINC_PRE_INC], 0);
4312   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4313     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4314
4315   return infinite_cost;
4316 }
4317
4318 /* Return cost of computing USE's address expression by using CAND.
4319    AFF_INV and AFF_VAR represent invariant and variant parts of the
4320    address expression, respectively.  If AFF_INV is simple, store
4321    the loop invariant variables which are depended by it in INV_VARS;
4322    if AFF_INV is complicated, handle it as a new invariant expression
4323    and record it in INV_EXPR.  RATIO indicates multiple times between
4324    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4325    value to it indicating if this is an auto-increment address.  */
4326
4327 static comp_cost
4328 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4329                   struct iv_cand *cand, aff_tree *aff_inv,
4330                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4331                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4332                   bool *can_autoinc, bool speed)
4333 {
4334   rtx addr;
4335   bool simple_inv = true;
4336   tree comp_inv = NULL_TREE, type = aff_var->type;
4337   comp_cost var_cost = no_cost, cost = no_cost;
4338   struct mem_address parts = {NULL_TREE, integer_one_node,
4339                               NULL_TREE, NULL_TREE, NULL_TREE};
4340   machine_mode addr_mode = TYPE_MODE (type);
4341   machine_mode mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
4342   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4343   /* Only true if ratio != 1.  */
4344   bool ok_with_ratio_p = false;
4345   bool ok_without_ratio_p = false;
4346
4347   if (!aff_combination_const_p (aff_inv))
4348     {
4349       parts.index = integer_one_node;
4350       /* Addressing mode "base + index".  */
4351       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4352       if (ratio != 1)
4353         {
4354           parts.step = wide_int_to_tree (type, ratio);
4355           /* Addressing mode "base + index << scale".  */
4356           ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4357           if (!ok_with_ratio_p)
4358             parts.step = NULL_TREE;
4359         }
4360       if (ok_with_ratio_p || ok_without_ratio_p)
4361         {
4362           if (maybe_ne (aff_inv->offset, 0))
4363             {
4364               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4365               /* Addressing mode "base + index [<< scale] + offset".  */
4366               if (!valid_mem_ref_p (mem_mode, as, &parts))
4367                 parts.offset = NULL_TREE;
4368               else
4369                 aff_inv->offset = 0;
4370             }
4371
4372           move_fixed_address_to_symbol (&parts, aff_inv);
4373           /* Base is fixed address and is moved to symbol part.  */
4374           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4375             parts.base = NULL_TREE;
4376
4377           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4378           if (parts.symbol != NULL_TREE
4379               && !valid_mem_ref_p (mem_mode, as, &parts))
4380             {
4381               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4382               parts.symbol = NULL_TREE;
4383               /* Reset SIMPLE_INV since symbol address needs to be computed
4384                  outside of address expression in this case.  */
4385               simple_inv = false;
4386               /* Symbol part is moved back to base part, it can't be NULL.  */
4387               parts.base = integer_one_node;
4388             }
4389         }
4390       else
4391         parts.index = NULL_TREE;
4392     }
4393   else
4394     {
4395       poly_int64 ainc_step;
4396       if (can_autoinc
4397           && ratio == 1
4398           && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4399         {
4400           poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4401
4402           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4403             ainc_offset += ainc_step;
4404           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4405                                         addr_mode, mem_mode, as, speed);
4406           if (!cost.infinite_cost_p ())
4407             {
4408               *can_autoinc = true;
4409               return cost;
4410             }
4411           cost = no_cost;
4412         }
4413       if (!aff_combination_zero_p (aff_inv))
4414         {
4415           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4416           /* Addressing mode "base + offset".  */
4417           if (!valid_mem_ref_p (mem_mode, as, &parts))
4418             parts.offset = NULL_TREE;
4419           else
4420             aff_inv->offset = 0;
4421         }
4422     }
4423
4424   if (simple_inv)
4425     simple_inv = (aff_inv == NULL
4426                   || aff_combination_const_p (aff_inv)
4427                   || aff_combination_singleton_var_p (aff_inv));
4428   if (!aff_combination_zero_p (aff_inv))
4429     comp_inv = aff_combination_to_tree (aff_inv);
4430   if (comp_inv != NULL_TREE)
4431     cost = force_var_cost (data, comp_inv, inv_vars);
4432   if (ratio != 1 && parts.step == NULL_TREE)
4433     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4434   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4435     var_cost += add_cost (speed, addr_mode);
4436
4437   if (comp_inv && inv_expr && !simple_inv)
4438     {
4439       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4440       /* Clear depends on.  */
4441       if (*inv_expr != NULL && inv_vars && *inv_vars)
4442         bitmap_clear (*inv_vars);
4443
4444       /* Cost of small invariant expression adjusted against loop niters
4445          is usually zero, which makes it difficult to be differentiated
4446          from candidate based on loop invariant variables.  Secondly, the
4447          generated invariant expression may not be hoisted out of loop by
4448          following pass.  We penalize the cost by rounding up in order to
4449          neutralize such effects.  */
4450       cost.cost = adjust_setup_cost (data, cost.cost, true);
4451       cost.scratch = cost.cost;
4452     }
4453
4454   cost += var_cost;
4455   addr = addr_for_mem_ref (&parts, as, false);
4456   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4457   cost += address_cost (addr, mem_mode, as, speed);
4458
4459   if (parts.symbol != NULL_TREE)
4460     cost.complexity += 1;
4461   /* Don't increase the complexity of adding a scaled index if it's
4462      the only kind of index that the target allows.  */
4463   if (parts.step != NULL_TREE && ok_without_ratio_p)
4464     cost.complexity += 1;
4465   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4466     cost.complexity += 1;
4467   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4468     cost.complexity += 1;
4469
4470   return cost;
4471 }
4472
4473 /* Scale (multiply) the computed COST (except scratch part that should be
4474    hoisted out a loop) by header->frequency / AT->frequency, which makes
4475    expected cost more accurate.  */
4476
4477 static comp_cost
4478 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4479 {
4480    int loop_freq = data->current_loop->header->count.to_frequency (cfun);
4481    int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
4482    if (loop_freq != 0)
4483      {
4484        gcc_assert (cost.scratch <= cost.cost);
4485        int scaled_cost
4486          = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4487
4488        if (dump_file && (dump_flags & TDF_DETAILS))
4489          fprintf (dump_file, "Scaling cost based on bb prob "
4490                   "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4491                   1.0f * bb_freq / loop_freq, cost.cost,
4492                   cost.scratch, scaled_cost, bb_freq, loop_freq);
4493
4494        cost.cost = scaled_cost;
4495      }
4496
4497   return cost;
4498 }
4499
4500 /* Determines the cost of the computation by that USE is expressed
4501    from induction variable CAND.  If ADDRESS_P is true, we just need
4502    to create an address from it, otherwise we want to get it into
4503    register.  A set of invariants we depend on is stored in INV_VARS.
4504    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4505    addressing is likely.  If INV_EXPR is nonnull, record invariant
4506    expr entry in it.  */
4507
4508 static comp_cost
4509 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4510                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4511                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4512 {
4513   gimple *at = use->stmt;
4514   tree ubase = use->iv->base, cbase = cand->iv->base;
4515   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4516   tree comp_inv = NULL_TREE;
4517   HOST_WIDE_INT ratio, aratio;
4518   comp_cost cost;
4519   widest_int rat;
4520   aff_tree aff_inv, aff_var;
4521   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4522
4523   if (inv_vars)
4524     *inv_vars = NULL;
4525   if (can_autoinc)
4526     *can_autoinc = false;
4527   if (inv_expr)
4528     *inv_expr = NULL;
4529
4530   /* Check if we have enough precision to express the values of use.  */
4531   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4532     return infinite_cost;
4533
4534   if (address_p
4535       || (use->iv->base_object
4536           && cand->iv->base_object
4537           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4538           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4539     {
4540       /* Do not try to express address of an object with computation based
4541          on address of a different object.  This may cause problems in rtl
4542          level alias analysis (that does not expect this to be happening,
4543          as this is illegal in C), and would be unlikely to be useful
4544          anyway.  */
4545       if (use->iv->base_object
4546           && cand->iv->base_object
4547           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4548         return infinite_cost;
4549     }
4550
4551   if (!get_computation_aff_1 (data->current_loop, at, use,
4552                               cand, &aff_inv, &aff_var, &rat)
4553       || !wi::fits_shwi_p (rat))
4554     return infinite_cost;
4555
4556   ratio = rat.to_shwi ();
4557   if (address_p)
4558     {
4559       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4560                                inv_vars, inv_expr, can_autoinc, speed);
4561       return get_scaled_computation_cost_at (data, at, cost);
4562     }
4563
4564   bool simple_inv = (aff_combination_const_p (&aff_inv)
4565                      || aff_combination_singleton_var_p (&aff_inv));
4566   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4567   aff_combination_convert (&aff_inv, signed_type);
4568   if (!aff_combination_zero_p (&aff_inv))
4569     comp_inv = aff_combination_to_tree (&aff_inv);
4570
4571   cost = force_var_cost (data, comp_inv, inv_vars);
4572   if (comp_inv && inv_expr && !simple_inv)
4573     {
4574       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4575       /* Clear depends on.  */
4576       if (*inv_expr != NULL && inv_vars && *inv_vars)
4577         bitmap_clear (*inv_vars);
4578
4579       cost.cost = adjust_setup_cost (data, cost.cost);
4580       /* Record setup cost in scratch field.  */
4581       cost.scratch = cost.cost;
4582     }
4583   /* Cost of constant integer can be covered when adding invariant part to
4584      variant part.  */
4585   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4586     cost = no_cost;
4587
4588   /* Need type narrowing to represent use with cand.  */
4589   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4590     {
4591       machine_mode outer_mode = TYPE_MODE (utype);
4592       machine_mode inner_mode = TYPE_MODE (ctype);
4593       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4594     }
4595
4596   /* Turn a + i * (-c) into a - i * c.  */
4597   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4598     aratio = -ratio;
4599   else
4600     aratio = ratio;
4601
4602   if (ratio != 1)
4603     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4604
4605   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4606      instruction.  */
4607   /* Need to add up the invariant and variant parts.  */
4608   if (comp_inv && !integer_zerop (comp_inv))
4609     cost += add_cost (speed, TYPE_MODE (utype));
4610
4611   return get_scaled_computation_cost_at (data, at, cost);
4612 }
4613
4614 /* Determines cost of computing the use in GROUP with CAND in a generic
4615    expression.  */
4616
4617 static bool
4618 determine_group_iv_cost_generic (struct ivopts_data *data,
4619                                  struct iv_group *group, struct iv_cand *cand)
4620 {
4621   comp_cost cost;
4622   iv_inv_expr_ent *inv_expr = NULL;
4623   bitmap inv_vars = NULL, inv_exprs = NULL;
4624   struct iv_use *use = group->vuses[0];
4625
4626   /* The simple case first -- if we need to express value of the preserved
4627      original biv, the cost is 0.  This also prevents us from counting the
4628      cost of increment twice -- once at this use and once in the cost of
4629      the candidate.  */
4630   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4631     cost = no_cost;
4632   else
4633     cost = get_computation_cost (data, use, cand, false,
4634                                  &inv_vars, NULL, &inv_expr);
4635
4636   if (inv_expr)
4637     {
4638       inv_exprs = BITMAP_ALLOC (NULL);
4639       bitmap_set_bit (inv_exprs, inv_expr->id);
4640     }
4641   set_group_iv_cost (data, group, cand, cost, inv_vars,
4642                      NULL_TREE, ERROR_MARK, inv_exprs);
4643   return !cost.infinite_cost_p ();
4644 }
4645
4646 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4647
4648 static bool
4649 determine_group_iv_cost_address (struct ivopts_data *data,
4650                                  struct iv_group *group, struct iv_cand *cand)
4651 {
4652   unsigned i;
4653   bitmap inv_vars = NULL, inv_exprs = NULL;
4654   bool can_autoinc;
4655   iv_inv_expr_ent *inv_expr = NULL;
4656   struct iv_use *use = group->vuses[0];
4657   comp_cost sum_cost = no_cost, cost;
4658
4659   cost = get_computation_cost (data, use, cand, true,
4660                                &inv_vars, &can_autoinc, &inv_expr);
4661
4662   if (inv_expr)
4663     {
4664       inv_exprs = BITMAP_ALLOC (NULL);
4665       bitmap_set_bit (inv_exprs, inv_expr->id);
4666     }
4667   sum_cost = cost;
4668   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4669     {
4670       if (can_autoinc)
4671         sum_cost -= cand->cost_step;
4672       /* If we generated the candidate solely for exploiting autoincrement
4673          opportunities, and it turns out it can't be used, set the cost to
4674          infinity to make sure we ignore it.  */
4675       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4676         sum_cost = infinite_cost;
4677     }
4678
4679   /* Uses in a group can share setup code, so only add setup cost once.  */
4680   cost -= cost.scratch;
4681   /* Compute and add costs for rest uses of this group.  */
4682   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4683     {
4684       struct iv_use *next = group->vuses[i];
4685
4686       /* TODO: We could skip computing cost for sub iv_use when it has the
4687          same cost as the first iv_use, but the cost really depends on the
4688          offset and where the iv_use is.  */
4689         cost = get_computation_cost (data, next, cand, true,
4690                                      NULL, &can_autoinc, &inv_expr);
4691         if (inv_expr)
4692           {
4693             if (!inv_exprs)
4694               inv_exprs = BITMAP_ALLOC (NULL);
4695
4696             bitmap_set_bit (inv_exprs, inv_expr->id);
4697           }
4698       sum_cost += cost;
4699     }
4700   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4701                      NULL_TREE, ERROR_MARK, inv_exprs);
4702
4703   return !sum_cost.infinite_cost_p ();
4704 }
4705
4706 /* Computes value of candidate CAND at position AT in iteration NITER, and
4707    stores it to VAL.  */
4708
4709 static void
4710 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4711                aff_tree *val)
4712 {
4713   aff_tree step, delta, nit;
4714   struct iv *iv = cand->iv;
4715   tree type = TREE_TYPE (iv->base);
4716   tree steptype;
4717   if (POINTER_TYPE_P (type))
4718     steptype = sizetype;
4719   else
4720     steptype = unsigned_type_for (type);
4721
4722   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4723   aff_combination_convert (&step, steptype);
4724   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4725   aff_combination_convert (&nit, steptype);
4726   aff_combination_mult (&nit, &step, &delta);
4727   if (stmt_after_increment (loop, cand, at))
4728     aff_combination_add (&delta, &step);
4729
4730   tree_to_aff_combination (iv->base, type, val);
4731   if (!POINTER_TYPE_P (type))
4732     aff_combination_convert (val, steptype);
4733   aff_combination_add (val, &delta);
4734 }
4735
4736 /* Returns period of induction variable iv.  */
4737
4738 static tree
4739 iv_period (struct iv *iv)
4740 {
4741   tree step = iv->step, period, type;
4742   tree pow2div;
4743
4744   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4745
4746   type = unsigned_type_for (TREE_TYPE (step));
4747   /* Period of the iv is lcm (step, type_range)/step -1,
4748      i.e., N*type_range/step - 1. Since type range is power
4749      of two, N == (step >> num_of_ending_zeros_binary (step),
4750      so the final result is
4751
4752        (type_range >> num_of_ending_zeros_binary (step)) - 1
4753
4754   */
4755   pow2div = num_ending_zeros (step);
4756
4757   period = build_low_bits_mask (type,
4758                                 (TYPE_PRECISION (type)
4759                                  - tree_to_uhwi (pow2div)));
4760
4761   return period;
4762 }
4763
4764 /* Returns the comparison operator used when eliminating the iv USE.  */
4765
4766 static enum tree_code
4767 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4768 {
4769   struct loop *loop = data->current_loop;
4770   basic_block ex_bb;
4771   edge exit;
4772
4773   ex_bb = gimple_bb (use->stmt);
4774   exit = EDGE_SUCC (ex_bb, 0);
4775   if (flow_bb_inside_loop_p (loop, exit->dest))
4776     exit = EDGE_SUCC (ex_bb, 1);
4777
4778   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4779 }
4780
4781 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4782    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4783    calculation is performed in non-wrapping type.
4784
4785    TODO: More generally, we could test for the situation that
4786          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4787          This would require knowing the sign of OFFSET.  */
4788
4789 static bool
4790 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4791 {
4792   enum tree_code code;
4793   tree e1, e2;
4794   aff_tree aff_e1, aff_e2, aff_offset;
4795
4796   if (!nowrap_type_p (TREE_TYPE (base)))
4797     return false;
4798
4799   base = expand_simple_operations (base);
4800
4801   if (TREE_CODE (base) == SSA_NAME)
4802     {
4803       gimple *stmt = SSA_NAME_DEF_STMT (base);
4804
4805       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4806         return false;
4807
4808       code = gimple_assign_rhs_code (stmt);
4809       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4810         return false;
4811
4812       e1 = gimple_assign_rhs1 (stmt);
4813       e2 = gimple_assign_rhs2 (stmt);
4814     }
4815   else
4816     {
4817       code = TREE_CODE (base);
4818       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4819         return false;
4820       e1 = TREE_OPERAND (base, 0);
4821       e2 = TREE_OPERAND (base, 1);
4822     }
4823
4824   /* Use affine expansion as deeper inspection to prove the equality.  */
4825   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4826                                   &aff_e2, &data->name_expansion_cache);
4827   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4828                                   &aff_offset, &data->name_expansion_cache);
4829   aff_combination_scale (&aff_offset, -1);
4830   switch (code)
4831     {
4832     case PLUS_EXPR:
4833       aff_combination_add (&aff_e2, &aff_offset);
4834       if (aff_combination_zero_p (&aff_e2))
4835         return true;
4836
4837       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4838                                       &aff_e1, &data->name_expansion_cache);
4839       aff_combination_add (&aff_e1, &aff_offset);
4840       return aff_combination_zero_p (&aff_e1);
4841
4842     case POINTER_PLUS_EXPR:
4843       aff_combination_add (&aff_e2, &aff_offset);
4844       return aff_combination_zero_p (&aff_e2);
4845
4846     default:
4847       return false;
4848     }
4849 }
4850
4851 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4852    comparison with CAND.  NITER describes the number of iterations of
4853    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4854
4855    We aim to handle the following situation:
4856
4857    sometype *base, *p;
4858    int a, b, i;
4859
4860    i = a;
4861    p = p_0 = base + a;
4862
4863    do
4864      {
4865        bla (*p);
4866        p++;
4867        i++;
4868      }
4869    while (i < b);
4870
4871    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4872    We aim to optimize this to
4873
4874    p = p_0 = base + a;
4875    do
4876      {
4877        bla (*p);
4878        p++;
4879      }
4880    while (p < p_0 - a + b);
4881
4882    This preserves the correctness, since the pointer arithmetics does not
4883    overflow.  More precisely:
4884
4885    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4886       overflow in computing it or the values of p.
4887    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4888       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4889
4890 static bool
4891 iv_elimination_compare_lt (struct ivopts_data *data,
4892                            struct iv_cand *cand, enum tree_code *comp_p,
4893                            struct tree_niter_desc *niter)
4894 {
4895   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4896   struct aff_tree nit, tmpa, tmpb;
4897   enum tree_code comp;
4898   HOST_WIDE_INT step;
4899
4900   /* We need to know that the candidate induction variable does not overflow.
4901      While more complex analysis may be used to prove this, for now just
4902      check that the variable appears in the original program and that it
4903      is computed in a type that guarantees no overflows.  */
4904   cand_type = TREE_TYPE (cand->iv->base);
4905   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4906     return false;
4907
4908   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4909      the calculation of the BOUND could overflow, making the comparison
4910      invalid.  */
4911   if (!data->loop_single_exit_p)
4912     return false;
4913
4914   /* We need to be able to decide whether candidate is increasing or decreasing
4915      in order to choose the right comparison operator.  */
4916   if (!cst_and_fits_in_hwi (cand->iv->step))
4917     return false;
4918   step = int_cst_value (cand->iv->step);
4919
4920   /* Check that the number of iterations matches the expected pattern:
4921      a + 1 > b ? 0 : b - a - 1.  */
4922   mbz = niter->may_be_zero;
4923   if (TREE_CODE (mbz) == GT_EXPR)
4924     {
4925       /* Handle a + 1 > b.  */
4926       tree op0 = TREE_OPERAND (mbz, 0);
4927       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4928         {
4929           a = TREE_OPERAND (op0, 0);
4930           b = TREE_OPERAND (mbz, 1);
4931         }
4932       else
4933         return false;
4934     }
4935   else if (TREE_CODE (mbz) == LT_EXPR)
4936     {
4937       tree op1 = TREE_OPERAND (mbz, 1);
4938
4939       /* Handle b < a + 1.  */
4940       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4941         {
4942           a = TREE_OPERAND (op1, 0);
4943           b = TREE_OPERAND (mbz, 0);
4944         }
4945       else
4946         return false;
4947     }
4948   else
4949     return false;
4950
4951   /* Expected number of iterations is B - A - 1.  Check that it matches
4952      the actual number, i.e., that B - A - NITER = 1.  */
4953   tree_to_aff_combination (niter->niter, nit_type, &nit);
4954   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4955   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4956   aff_combination_scale (&nit, -1);
4957   aff_combination_scale (&tmpa, -1);
4958   aff_combination_add (&tmpb, &tmpa);
4959   aff_combination_add (&tmpb, &nit);
4960   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
4961     return false;
4962
4963   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4964      overflow.  */
4965   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4966                         cand->iv->step,
4967                         fold_convert (TREE_TYPE (cand->iv->step), a));
4968   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4969     return false;
4970
4971   /* Determine the new comparison operator.  */
4972   comp = step < 0 ? GT_EXPR : LT_EXPR;
4973   if (*comp_p == NE_EXPR)
4974     *comp_p = comp;
4975   else if (*comp_p == EQ_EXPR)
4976     *comp_p = invert_tree_comparison (comp, false);
4977   else
4978     gcc_unreachable ();
4979
4980   return true;
4981 }
4982
4983 /* Check whether it is possible to express the condition in USE by comparison
4984    of candidate CAND.  If so, store the value compared with to BOUND, and the
4985    comparison operator to COMP.  */
4986
4987 static bool
4988 may_eliminate_iv (struct ivopts_data *data,
4989                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4990                   enum tree_code *comp)
4991 {
4992   basic_block ex_bb;
4993   edge exit;
4994   tree period;
4995   struct loop *loop = data->current_loop;
4996   aff_tree bnd;
4997   struct tree_niter_desc *desc = NULL;
4998
4999   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5000     return false;
5001
5002   /* For now works only for exits that dominate the loop latch.
5003      TODO: extend to other conditions inside loop body.  */
5004   ex_bb = gimple_bb (use->stmt);
5005   if (use->stmt != last_stmt (ex_bb)
5006       || gimple_code (use->stmt) != GIMPLE_COND
5007       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5008     return false;
5009
5010   exit = EDGE_SUCC (ex_bb, 0);
5011   if (flow_bb_inside_loop_p (loop, exit->dest))
5012     exit = EDGE_SUCC (ex_bb, 1);
5013   if (flow_bb_inside_loop_p (loop, exit->dest))
5014     return false;
5015
5016   desc = niter_for_exit (data, exit);
5017   if (!desc)
5018     return false;
5019
5020   /* Determine whether we can use the variable to test the exit condition.
5021      This is the case iff the period of the induction variable is greater
5022      than the number of iterations for which the exit condition is true.  */
5023   period = iv_period (cand->iv);
5024
5025   /* If the number of iterations is constant, compare against it directly.  */
5026   if (TREE_CODE (desc->niter) == INTEGER_CST)
5027     {
5028       /* See cand_value_at.  */
5029       if (stmt_after_increment (loop, cand, use->stmt))
5030         {
5031           if (!tree_int_cst_lt (desc->niter, period))
5032             return false;
5033         }
5034       else
5035         {
5036           if (tree_int_cst_lt (period, desc->niter))
5037             return false;
5038         }
5039     }
5040
5041   /* If not, and if this is the only possible exit of the loop, see whether
5042      we can get a conservative estimate on the number of iterations of the
5043      entire loop and compare against that instead.  */
5044   else
5045     {
5046       widest_int period_value, max_niter;
5047
5048       max_niter = desc->max;
5049       if (stmt_after_increment (loop, cand, use->stmt))
5050         max_niter += 1;
5051       period_value = wi::to_widest (period);
5052       if (wi::gtu_p (max_niter, period_value))
5053         {
5054           /* See if we can take advantage of inferred loop bound
5055              information.  */
5056           if (data->loop_single_exit_p)
5057             {
5058               if (!max_loop_iterations (loop, &max_niter))
5059                 return false;
5060               /* The loop bound is already adjusted by adding 1.  */
5061               if (wi::gtu_p (max_niter, period_value))
5062                 return false;
5063             }
5064           else
5065             return false;
5066         }
5067     }
5068
5069   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5070
5071   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5072                          aff_combination_to_tree (&bnd));
5073   *comp = iv_elimination_compare (data, use);
5074
5075   /* It is unlikely that computing the number of iterations using division
5076      would be more profitable than keeping the original induction variable.  */
5077   if (expression_expensive_p (*bound))
5078     return false;
5079
5080   /* Sometimes, it is possible to handle the situation that the number of
5081      iterations may be zero unless additional assumptions by using <
5082      instead of != in the exit condition.
5083
5084      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5085            base the exit condition on it.  However, that is often too
5086            expensive.  */
5087   if (!integer_zerop (desc->may_be_zero))
5088     return iv_elimination_compare_lt (data, cand, comp, desc);
5089
5090   return true;
5091 }
5092
5093  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5094     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5095
5096 static int
5097 parm_decl_cost (struct ivopts_data *data, tree bound)
5098 {
5099   tree sbound = bound;
5100   STRIP_NOPS (sbound);
5101
5102   if (TREE_CODE (sbound) == SSA_NAME
5103       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5104       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5105       && data->body_includes_call)
5106     return COSTS_N_INSNS (1);
5107
5108   return 0;
5109 }
5110
5111 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5112
5113 static bool
5114 determine_group_iv_cost_cond (struct ivopts_data *data,
5115                               struct iv_group *group, struct iv_cand *cand)
5116 {
5117   tree bound = NULL_TREE;
5118   struct iv *cmp_iv;
5119   bitmap inv_exprs = NULL;
5120   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5121   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5122   enum comp_iv_rewrite rewrite_type;
5123   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5124   tree *control_var, *bound_cst;
5125   enum tree_code comp = ERROR_MARK;
5126   struct iv_use *use = group->vuses[0];
5127
5128   /* Extract condition operands.  */
5129   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5130                                         &bound_cst, NULL, &cmp_iv);
5131   gcc_assert (rewrite_type != COMP_IV_NA);
5132
5133   /* Try iv elimination.  */
5134   if (rewrite_type == COMP_IV_ELIM
5135       && may_eliminate_iv (data, use, cand, &bound, &comp))
5136     {
5137       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5138       if (elim_cost.cost == 0)
5139         elim_cost.cost = parm_decl_cost (data, bound);
5140       else if (TREE_CODE (bound) == INTEGER_CST)
5141         elim_cost.cost = 0;
5142       /* If we replace a loop condition 'i < n' with 'p < base + n',
5143          inv_vars_elim will have 'base' and 'n' set, which implies that both
5144          'base' and 'n' will be live during the loop.    More likely,
5145          'base + n' will be loop invariant, resulting in only one live value
5146          during the loop.  So in that case we clear inv_vars_elim and set
5147          inv_expr_elim instead.  */
5148       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5149         {
5150           inv_expr_elim = get_loop_invariant_expr (data, bound);
5151           bitmap_clear (inv_vars_elim);
5152         }
5153       /* The bound is a loop invariant, so it will be only computed
5154          once.  */
5155       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5156     }
5157
5158   /* When the condition is a comparison of the candidate IV against
5159      zero, prefer this IV.
5160
5161      TODO: The constant that we're subtracting from the cost should
5162      be target-dependent.  This information should be added to the
5163      target costs for each backend.  */
5164   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5165       && integer_zerop (*bound_cst)
5166       && (operand_equal_p (*control_var, cand->var_after, 0)
5167           || operand_equal_p (*control_var, cand->var_before, 0)))
5168     elim_cost -= 1;
5169
5170   express_cost = get_computation_cost (data, use, cand, false,
5171                                        &inv_vars_express, NULL,
5172                                        &inv_expr_express);
5173   if (cmp_iv != NULL)
5174     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5175
5176   /* Count the cost of the original bound as well.  */
5177   bound_cost = force_var_cost (data, *bound_cst, NULL);
5178   if (bound_cost.cost == 0)
5179     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5180   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5181     bound_cost.cost = 0;
5182   express_cost += bound_cost;
5183
5184   /* Choose the better approach, preferring the eliminated IV. */
5185   if (elim_cost <= express_cost)
5186     {
5187       cost = elim_cost;
5188       inv_vars = inv_vars_elim;
5189       inv_vars_elim = NULL;
5190       inv_expr = inv_expr_elim;
5191     }
5192   else
5193     {
5194       cost = express_cost;
5195       inv_vars = inv_vars_express;
5196       inv_vars_express = NULL;
5197       bound = NULL_TREE;
5198       comp = ERROR_MARK;
5199       inv_expr = inv_expr_express;
5200     }
5201
5202   if (inv_expr)
5203     {
5204       inv_exprs = BITMAP_ALLOC (NULL);
5205       bitmap_set_bit (inv_exprs, inv_expr->id);
5206     }
5207   set_group_iv_cost (data, group, cand, cost,
5208                      inv_vars, bound, comp, inv_exprs);
5209
5210   if (inv_vars_elim)
5211     BITMAP_FREE (inv_vars_elim);
5212   if (inv_vars_express)
5213     BITMAP_FREE (inv_vars_express);
5214
5215   return !cost.infinite_cost_p ();
5216 }
5217
5218 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5219    if USE cannot be represented with CAND.  */
5220
5221 static bool
5222 determine_group_iv_cost (struct ivopts_data *data,
5223                          struct iv_group *group, struct iv_cand *cand)
5224 {
5225   switch (group->type)
5226     {
5227     case USE_NONLINEAR_EXPR:
5228       return determine_group_iv_cost_generic (data, group, cand);
5229
5230     case USE_ADDRESS:
5231       return determine_group_iv_cost_address (data, group, cand);
5232
5233     case USE_COMPARE:
5234       return determine_group_iv_cost_cond (data, group, cand);
5235
5236     default:
5237       gcc_unreachable ();
5238     }
5239 }
5240
5241 /* Return true if get_computation_cost indicates that autoincrement is
5242    a possibility for the pair of USE and CAND, false otherwise.  */
5243
5244 static bool
5245 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5246                            struct iv_cand *cand)
5247 {
5248   if (use->type != USE_ADDRESS)
5249     return false;
5250
5251   bool can_autoinc = false;
5252   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5253   return can_autoinc;
5254 }
5255
5256 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5257    use that allows autoincrement, and set their AINC_USE if possible.  */
5258
5259 static void
5260 set_autoinc_for_original_candidates (struct ivopts_data *data)
5261 {
5262   unsigned i, j;
5263
5264   for (i = 0; i < data->vcands.length (); i++)
5265     {
5266       struct iv_cand *cand = data->vcands[i];
5267       struct iv_use *closest_before = NULL;
5268       struct iv_use *closest_after = NULL;
5269       if (cand->pos != IP_ORIGINAL)
5270         continue;
5271
5272       for (j = 0; j < data->vgroups.length (); j++)
5273         {
5274           struct iv_group *group = data->vgroups[j];
5275           struct iv_use *use = group->vuses[0];
5276           unsigned uid = gimple_uid (use->stmt);
5277
5278           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5279             continue;
5280
5281           if (uid < gimple_uid (cand->incremented_at)
5282               && (closest_before == NULL
5283                   || uid > gimple_uid (closest_before->stmt)))
5284             closest_before = use;
5285
5286           if (uid > gimple_uid (cand->incremented_at)
5287               && (closest_after == NULL
5288                   || uid < gimple_uid (closest_after->stmt)))
5289             closest_after = use;
5290         }
5291
5292       if (closest_before != NULL
5293           && autoinc_possible_for_pair (data, closest_before, cand))
5294         cand->ainc_use = closest_before;
5295       else if (closest_after != NULL
5296                && autoinc_possible_for_pair (data, closest_after, cand))
5297         cand->ainc_use = closest_after;
5298     }
5299 }
5300
5301 /* Relate compare use with all candidates.  */
5302
5303 static void
5304 relate_compare_use_with_all_cands (struct ivopts_data *data)
5305 {
5306   unsigned i, count = data->vcands.length ();
5307   for (i = 0; i < data->vgroups.length (); i++)
5308     {
5309       struct iv_group *group = data->vgroups[i];
5310
5311       if (group->type == USE_COMPARE)
5312         bitmap_set_range (group->related_cands, 0, count);
5313     }
5314 }
5315
5316 /* Finds the candidates for the induction variables.  */
5317
5318 static void
5319 find_iv_candidates (struct ivopts_data *data)
5320 {
5321   /* Add commonly used ivs.  */
5322   add_standard_iv_candidates (data);
5323
5324   /* Add old induction variables.  */
5325   add_iv_candidate_for_bivs (data);
5326
5327   /* Add induction variables derived from uses.  */
5328   add_iv_candidate_for_groups (data);
5329
5330   set_autoinc_for_original_candidates (data);
5331
5332   /* Record the important candidates.  */
5333   record_important_candidates (data);
5334
5335   /* Relate compare iv_use with all candidates.  */
5336   if (!data->consider_all_candidates)
5337     relate_compare_use_with_all_cands (data);
5338
5339   if (dump_file && (dump_flags & TDF_DETAILS))
5340     {
5341       unsigned i;
5342
5343       fprintf (dump_file, "\n<Important Candidates>:\t");
5344       for (i = 0; i < data->vcands.length (); i++)
5345         if (data->vcands[i]->important)
5346           fprintf (dump_file, " %d,", data->vcands[i]->id);
5347       fprintf (dump_file, "\n");
5348
5349       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5350       for (i = 0; i < data->vgroups.length (); i++)
5351         {
5352           struct iv_group *group = data->vgroups[i];
5353
5354           if (group->related_cands)
5355             {
5356               fprintf (dump_file, "  Group %d:\t", group->id);
5357               dump_bitmap (dump_file, group->related_cands);
5358             }
5359         }
5360       fprintf (dump_file, "\n");
5361     }
5362 }
5363
5364 /* Determines costs of computing use of iv with an iv candidate.  */
5365
5366 static void
5367 determine_group_iv_costs (struct ivopts_data *data)
5368 {
5369   unsigned i, j;
5370   struct iv_cand *cand;
5371   struct iv_group *group;
5372   bitmap to_clear = BITMAP_ALLOC (NULL);
5373
5374   alloc_use_cost_map (data);
5375
5376   for (i = 0; i < data->vgroups.length (); i++)
5377     {
5378       group = data->vgroups[i];
5379
5380       if (data->consider_all_candidates)
5381         {
5382           for (j = 0; j < data->vcands.length (); j++)
5383             {
5384               cand = data->vcands[j];
5385               determine_group_iv_cost (data, group, cand);
5386             }
5387         }
5388       else
5389         {
5390           bitmap_iterator bi;
5391
5392           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5393             {
5394               cand = data->vcands[j];
5395               if (!determine_group_iv_cost (data, group, cand))
5396                 bitmap_set_bit (to_clear, j);
5397             }
5398
5399           /* Remove the candidates for that the cost is infinite from
5400              the list of related candidates.  */
5401           bitmap_and_compl_into (group->related_cands, to_clear);
5402           bitmap_clear (to_clear);
5403         }
5404     }
5405
5406   BITMAP_FREE (to_clear);
5407
5408   if (dump_file && (dump_flags & TDF_DETAILS))
5409     {
5410       bitmap_iterator bi;
5411
5412       /* Dump invariant variables.  */
5413       fprintf (dump_file, "\n<Invariant Vars>:\n");
5414       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5415         {
5416           struct version_info *info = ver_info (data, i);
5417           if (info->inv_id)
5418             {
5419               fprintf (dump_file, "Inv %d:\t", info->inv_id);
5420               print_generic_expr (dump_file, info->name, TDF_SLIM);
5421               fprintf (dump_file, "%s\n",
5422                        info->has_nonlin_use ? "" : "\t(eliminable)");
5423             }
5424         }
5425
5426       /* Dump invariant expressions.  */
5427       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5428       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5429
5430       for (hash_table<iv_inv_expr_hasher>::iterator it
5431            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5432            ++it)
5433         list.safe_push (*it);
5434
5435       list.qsort (sort_iv_inv_expr_ent);
5436
5437       for (i = 0; i < list.length (); ++i)
5438         {
5439           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5440           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5441           fprintf (dump_file, "\n");
5442         }
5443
5444       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5445
5446       for (i = 0; i < data->vgroups.length (); i++)
5447         {
5448           group = data->vgroups[i];
5449
5450           fprintf (dump_file, "Group %d:\n", i);
5451           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5452           for (j = 0; j < group->n_map_members; j++)
5453             {
5454               if (!group->cost_map[j].cand
5455                   || group->cost_map[j].cost.infinite_cost_p ())
5456                 continue;
5457
5458               fprintf (dump_file, "  %d\t%d\t%d\t",
5459                        group->cost_map[j].cand->id,
5460                        group->cost_map[j].cost.cost,
5461                        group->cost_map[j].cost.complexity);
5462               if (!group->cost_map[j].inv_exprs
5463                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5464                 fprintf (dump_file, "NIL;\t");
5465               else
5466                 bitmap_print (dump_file,
5467                               group->cost_map[j].inv_exprs, "", ";\t");
5468               if (!group->cost_map[j].inv_vars
5469                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5470                 fprintf (dump_file, "NIL;\n");
5471               else
5472                 bitmap_print (dump_file,
5473                               group->cost_map[j].inv_vars, "", "\n");
5474             }
5475
5476           fprintf (dump_file, "\n");
5477         }
5478       fprintf (dump_file, "\n");
5479     }
5480 }
5481
5482 /* Determines cost of the candidate CAND.  */
5483
5484 static void
5485 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5486 {
5487   comp_cost cost_base;
5488   unsigned cost, cost_step;
5489   tree base;
5490
5491   gcc_assert (cand->iv != NULL);
5492
5493   /* There are two costs associated with the candidate -- its increment
5494      and its initialization.  The second is almost negligible for any loop
5495      that rolls enough, so we take it just very little into account.  */
5496
5497   base = cand->iv->base;
5498   cost_base = force_var_cost (data, base, NULL);
5499   /* It will be exceptional that the iv register happens to be initialized with
5500      the proper value at no cost.  In general, there will at least be a regcopy
5501      or a const set.  */
5502   if (cost_base.cost == 0)
5503     cost_base.cost = COSTS_N_INSNS (1);
5504   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5505
5506   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5507
5508   /* Prefer the original ivs unless we may gain something by replacing it.
5509      The reason is to make debugging simpler; so this is not relevant for
5510      artificial ivs created by other optimization passes.  */
5511   if (cand->pos != IP_ORIGINAL
5512       || !SSA_NAME_VAR (cand->var_before)
5513       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5514     cost++;
5515
5516   /* Prefer not to insert statements into latch unless there are some
5517      already (so that we do not create unnecessary jumps).  */
5518   if (cand->pos == IP_END
5519       && empty_block_p (ip_end_pos (data->current_loop)))
5520     cost++;
5521
5522   cand->cost = cost;
5523   cand->cost_step = cost_step;
5524 }
5525
5526 /* Determines costs of computation of the candidates.  */
5527
5528 static void
5529 determine_iv_costs (struct ivopts_data *data)
5530 {
5531   unsigned i;
5532
5533   if (dump_file && (dump_flags & TDF_DETAILS))
5534     {
5535       fprintf (dump_file, "<Candidate Costs>:\n");
5536       fprintf (dump_file, "  cand\tcost\n");
5537     }
5538
5539   for (i = 0; i < data->vcands.length (); i++)
5540     {
5541       struct iv_cand *cand = data->vcands[i];
5542
5543       determine_iv_cost (data, cand);
5544
5545       if (dump_file && (dump_flags & TDF_DETAILS))
5546         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5547     }
5548
5549   if (dump_file && (dump_flags & TDF_DETAILS))
5550     fprintf (dump_file, "\n");
5551 }
5552
5553 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5554    induction variables.  Note N_INVS includes both invariant variables and
5555    invariant expressions.  */
5556
5557 static unsigned
5558 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5559                               unsigned n_cands)
5560 {
5561   unsigned cost;
5562   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5563   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5564   bool speed = data->speed;
5565
5566   /* If there is a call in the loop body, the call-clobbered registers
5567      are not available for loop invariants.  */
5568   if (data->body_includes_call)
5569     available_regs = available_regs - target_clobbered_regs;
5570
5571   /* If we have enough registers.  */
5572   if (regs_needed + target_res_regs < available_regs)
5573     cost = n_new;
5574   /* If close to running out of registers, try to preserve them.  */
5575   else if (regs_needed <= available_regs)
5576     cost = target_reg_cost [speed] * regs_needed;
5577   /* If we run out of available registers but the number of candidates
5578      does not, we penalize extra registers using target_spill_cost.  */
5579   else if (n_cands <= available_regs)
5580     cost = target_reg_cost [speed] * available_regs
5581            + target_spill_cost [speed] * (regs_needed - available_regs);
5582   /* If the number of candidates runs out available registers, we penalize
5583      extra candidate registers using target_spill_cost * 2.  Because it is
5584      more expensive to spill induction variable than invariant.  */
5585   else
5586     cost = target_reg_cost [speed] * available_regs
5587            + target_spill_cost [speed] * (n_cands - available_regs) * 2
5588            + target_spill_cost [speed] * (regs_needed - n_cands);
5589
5590   /* Finally, add the number of candidates, so that we prefer eliminating
5591      induction variables if possible.  */
5592   return cost + n_cands;
5593 }
5594
5595 /* For each size of the induction variable set determine the penalty.  */
5596
5597 static void
5598 determine_set_costs (struct ivopts_data *data)
5599 {
5600   unsigned j, n;
5601   gphi *phi;
5602   gphi_iterator psi;
5603   tree op;
5604   struct loop *loop = data->current_loop;
5605   bitmap_iterator bi;
5606
5607   if (dump_file && (dump_flags & TDF_DETAILS))
5608     {
5609       fprintf (dump_file, "<Global Costs>:\n");
5610       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5611       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5612       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5613       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5614     }
5615
5616   n = 0;
5617   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5618     {
5619       phi = psi.phi ();
5620       op = PHI_RESULT (phi);
5621
5622       if (virtual_operand_p (op))
5623         continue;
5624
5625       if (get_iv (data, op))
5626         continue;
5627
5628       if (!POINTER_TYPE_P (TREE_TYPE (op))
5629           && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5630         continue;
5631
5632       n++;
5633     }
5634
5635   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5636     {
5637       struct version_info *info = ver_info (data, j);
5638
5639       if (info->inv_id && info->has_nonlin_use)
5640         n++;
5641     }
5642
5643   data->regs_used = n;
5644   if (dump_file && (dump_flags & TDF_DETAILS))
5645     fprintf (dump_file, "  regs_used %d\n", n);
5646
5647   if (dump_file && (dump_flags & TDF_DETAILS))
5648     {
5649       fprintf (dump_file, "  cost for size:\n");
5650       fprintf (dump_file, "  ivs\tcost\n");
5651       for (j = 0; j <= 2 * target_avail_regs; j++)
5652         fprintf (dump_file, "  %d\t%d\n", j,
5653                  ivopts_estimate_reg_pressure (data, 0, j));
5654       fprintf (dump_file, "\n");
5655     }
5656 }
5657
5658 /* Returns true if A is a cheaper cost pair than B.  */
5659
5660 static bool
5661 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5662 {
5663   if (!a)
5664     return false;
5665
5666   if (!b)
5667     return true;
5668
5669   if (a->cost < b->cost)
5670     return true;
5671
5672   if (b->cost < a->cost)
5673     return false;
5674
5675   /* In case the costs are the same, prefer the cheaper candidate.  */
5676   if (a->cand->cost < b->cand->cost)
5677     return true;
5678
5679   return false;
5680 }
5681
5682 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
5683    for more expensive, equal and cheaper respectively.  */
5684
5685 static int
5686 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5687 {
5688   if (cheaper_cost_pair (a, b))
5689     return -1;
5690   if (cheaper_cost_pair (b, a))
5691     return 1;
5692
5693   return 0;
5694 }
5695
5696 /* Returns candidate by that USE is expressed in IVS.  */
5697
5698 static struct cost_pair *
5699 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5700 {
5701   return ivs->cand_for_group[group->id];
5702 }
5703
5704 /* Computes the cost field of IVS structure.  */
5705
5706 static void
5707 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5708 {
5709   comp_cost cost = ivs->cand_use_cost;
5710
5711   cost += ivs->cand_cost;
5712   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5713   ivs->cost = cost;
5714 }
5715
5716 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5717    and IVS.  */
5718
5719 static void
5720 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5721 {
5722   bitmap_iterator bi;
5723   unsigned iid;
5724
5725   if (!invs)
5726     return;
5727
5728   gcc_assert (n_inv_uses != NULL);
5729   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5730     {
5731       n_inv_uses[iid]--;
5732       if (n_inv_uses[iid] == 0)
5733         ivs->n_invs--;
5734     }
5735 }
5736
5737 /* Set USE not to be expressed by any candidate in IVS.  */
5738
5739 static void
5740 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5741                  struct iv_group *group)
5742 {
5743   unsigned gid = group->id, cid;
5744   struct cost_pair *cp;
5745
5746   cp = ivs->cand_for_group[gid];
5747   if (!cp)
5748     return;
5749   cid = cp->cand->id;
5750
5751   ivs->bad_groups++;
5752   ivs->cand_for_group[gid] = NULL;
5753   ivs->n_cand_uses[cid]--;
5754
5755   if (ivs->n_cand_uses[cid] == 0)
5756     {
5757       bitmap_clear_bit (ivs->cands, cid);
5758       ivs->n_cands--;
5759       ivs->cand_cost -= cp->cand->cost;
5760       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5761       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5762     }
5763
5764   ivs->cand_use_cost -= cp->cost;
5765   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5766   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5767   iv_ca_recount_cost (data, ivs);
5768 }
5769
5770 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5771    IVS.  */
5772
5773 static void
5774 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5775 {
5776   bitmap_iterator bi;
5777   unsigned iid;
5778
5779   if (!invs)
5780     return;
5781
5782   gcc_assert (n_inv_uses != NULL);
5783   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5784     {
5785       n_inv_uses[iid]++;
5786       if (n_inv_uses[iid] == 1)
5787         ivs->n_invs++;
5788     }
5789 }
5790
5791 /* Set cost pair for GROUP in set IVS to CP.  */
5792
5793 static void
5794 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5795               struct iv_group *group, struct cost_pair *cp)
5796 {
5797   unsigned gid = group->id, cid;
5798
5799   if (ivs->cand_for_group[gid] == cp)
5800     return;
5801
5802   if (ivs->cand_for_group[gid])
5803     iv_ca_set_no_cp (data, ivs, group);
5804
5805   if (cp)
5806     {
5807       cid = cp->cand->id;
5808
5809       ivs->bad_groups--;
5810       ivs->cand_for_group[gid] = cp;
5811       ivs->n_cand_uses[cid]++;
5812       if (ivs->n_cand_uses[cid] == 1)
5813         {
5814           bitmap_set_bit (ivs->cands, cid);
5815           ivs->n_cands++;
5816           ivs->cand_cost += cp->cand->cost;
5817           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5818           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5819         }
5820
5821       ivs->cand_use_cost += cp->cost;
5822       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5823       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5824       iv_ca_recount_cost (data, ivs);
5825     }
5826 }
5827
5828 /* Extend set IVS by expressing USE by some of the candidates in it
5829    if possible.  Consider all important candidates if candidates in
5830    set IVS don't give any result.  */
5831
5832 static void
5833 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5834                struct iv_group *group)
5835 {
5836   struct cost_pair *best_cp = NULL, *cp;
5837   bitmap_iterator bi;
5838   unsigned i;
5839   struct iv_cand *cand;
5840
5841   gcc_assert (ivs->upto >= group->id);
5842   ivs->upto++;
5843   ivs->bad_groups++;
5844
5845   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5846     {
5847       cand = data->vcands[i];
5848       cp = get_group_iv_cost (data, group, cand);
5849       if (cheaper_cost_pair (cp, best_cp))
5850         best_cp = cp;
5851     }
5852
5853   if (best_cp == NULL)
5854     {
5855       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5856         {
5857           cand = data->vcands[i];
5858           cp = get_group_iv_cost (data, group, cand);
5859           if (cheaper_cost_pair (cp, best_cp))
5860             best_cp = cp;
5861         }
5862     }
5863
5864   iv_ca_set_cp (data, ivs, group, best_cp);
5865 }
5866
5867 /* Get cost for assignment IVS.  */
5868
5869 static comp_cost
5870 iv_ca_cost (struct iv_ca *ivs)
5871 {
5872   /* This was a conditional expression but it triggered a bug in
5873      Sun C 5.5.  */
5874   if (ivs->bad_groups)
5875     return infinite_cost;
5876   else
5877     return ivs->cost;
5878 }
5879
5880 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5881    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
5882    respectively.  */
5883
5884 static int
5885 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5886                     struct iv_group *group, struct cost_pair *old_cp,
5887                     struct cost_pair *new_cp)
5888 {
5889   gcc_assert (old_cp && new_cp && old_cp != new_cp);
5890   unsigned old_n_invs = ivs->n_invs;
5891   iv_ca_set_cp (data, ivs, group, new_cp);
5892   unsigned new_n_invs = ivs->n_invs;
5893   iv_ca_set_cp (data, ivs, group, old_cp);
5894
5895   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5896 }
5897
5898 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5899    it before NEXT.  */
5900
5901 static struct iv_ca_delta *
5902 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5903                  struct cost_pair *new_cp, struct iv_ca_delta *next)
5904 {
5905   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5906
5907   change->group = group;
5908   change->old_cp = old_cp;
5909   change->new_cp = new_cp;
5910   change->next = next;
5911
5912   return change;
5913 }
5914
5915 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5916    are rewritten.  */
5917
5918 static struct iv_ca_delta *
5919 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5920 {
5921   struct iv_ca_delta *last;
5922
5923   if (!l2)
5924     return l1;
5925
5926   if (!l1)
5927     return l2;
5928
5929   for (last = l1; last->next; last = last->next)
5930     continue;
5931   last->next = l2;
5932
5933   return l1;
5934 }
5935
5936 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5937
5938 static struct iv_ca_delta *
5939 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5940 {
5941   struct iv_ca_delta *act, *next, *prev = NULL;
5942
5943   for (act = delta; act; act = next)
5944     {
5945       next = act->next;
5946       act->next = prev;
5947       prev = act;
5948
5949       std::swap (act->old_cp, act->new_cp);
5950     }
5951
5952   return prev;
5953 }
5954
5955 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5956    reverted instead.  */
5957
5958 static void
5959 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5960                     struct iv_ca_delta *delta, bool forward)
5961 {
5962   struct cost_pair *from, *to;
5963   struct iv_ca_delta *act;
5964
5965   if (!forward)
5966     delta = iv_ca_delta_reverse (delta);
5967
5968   for (act = delta; act; act = act->next)
5969     {
5970       from = act->old_cp;
5971       to = act->new_cp;
5972       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
5973       iv_ca_set_cp (data, ivs, act->group, to);
5974     }
5975
5976   if (!forward)
5977     iv_ca_delta_reverse (delta);
5978 }
5979
5980 /* Returns true if CAND is used in IVS.  */
5981
5982 static bool
5983 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5984 {
5985   return ivs->n_cand_uses[cand->id] > 0;
5986 }
5987
5988 /* Returns number of induction variable candidates in the set IVS.  */
5989
5990 static unsigned
5991 iv_ca_n_cands (struct iv_ca *ivs)
5992 {
5993   return ivs->n_cands;
5994 }
5995
5996 /* Free the list of changes DELTA.  */
5997
5998 static void
5999 iv_ca_delta_free (struct iv_ca_delta **delta)
6000 {
6001   struct iv_ca_delta *act, *next;
6002
6003   for (act = *delta; act; act = next)
6004     {
6005       next = act->next;
6006       free (act);
6007     }
6008
6009   *delta = NULL;
6010 }
6011
6012 /* Allocates new iv candidates assignment.  */
6013
6014 static struct iv_ca *
6015 iv_ca_new (struct ivopts_data *data)
6016 {
6017   struct iv_ca *nw = XNEW (struct iv_ca);
6018
6019   nw->upto = 0;
6020   nw->bad_groups = 0;
6021   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6022                                  data->vgroups.length ());
6023   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6024   nw->cands = BITMAP_ALLOC (NULL);
6025   nw->n_cands = 0;
6026   nw->n_invs = 0;
6027   nw->cand_use_cost = no_cost;
6028   nw->cand_cost = 0;
6029   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6030   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6031   nw->cost = no_cost;
6032
6033   return nw;
6034 }
6035
6036 /* Free memory occupied by the set IVS.  */
6037
6038 static void
6039 iv_ca_free (struct iv_ca **ivs)
6040 {
6041   free ((*ivs)->cand_for_group);
6042   free ((*ivs)->n_cand_uses);
6043   BITMAP_FREE ((*ivs)->cands);
6044   free ((*ivs)->n_inv_var_uses);
6045   free ((*ivs)->n_inv_expr_uses);
6046   free (*ivs);
6047   *ivs = NULL;
6048 }
6049
6050 /* Dumps IVS to FILE.  */
6051
6052 static void
6053 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6054 {
6055   unsigned i;
6056   comp_cost cost = iv_ca_cost (ivs);
6057
6058   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6059            cost.complexity);
6060   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6061            ivs->cand_cost, ivs->cand_use_cost.cost,
6062            ivs->cand_use_cost.complexity);
6063   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6064
6065   for (i = 0; i < ivs->upto; i++)
6066     {
6067       struct iv_group *group = data->vgroups[i];
6068       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6069       if (cp)
6070         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6071                  group->id, cp->cand->id, cp->cost.cost,
6072                  cp->cost.complexity);
6073       else
6074         fprintf (file, "   group:%d --> ??\n", group->id);
6075     }
6076
6077   const char *pref = "";
6078   fprintf (file, "  invariant variables: ");
6079   for (i = 1; i <= data->max_inv_var_id; i++)
6080     if (ivs->n_inv_var_uses[i])
6081       {
6082         fprintf (file, "%s%d", pref, i);
6083         pref = ", ";
6084       }
6085
6086   pref = "";
6087   fprintf (file, "\n  invariant expressions: ");
6088   for (i = 1; i <= data->max_inv_expr_id; i++)
6089     if (ivs->n_inv_expr_uses[i])
6090       {
6091         fprintf (file, "%s%d", pref, i);
6092         pref = ", ";
6093       }
6094
6095   fprintf (file, "\n\n");
6096 }
6097
6098 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6099    new set, and store differences in DELTA.  Number of induction variables
6100    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6101    the function will try to find a solution with mimimal iv candidates.  */
6102
6103 static comp_cost
6104 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6105               struct iv_cand *cand, struct iv_ca_delta **delta,
6106               unsigned *n_ivs, bool min_ncand)
6107 {
6108   unsigned i;
6109   comp_cost cost;
6110   struct iv_group *group;
6111   struct cost_pair *old_cp, *new_cp;
6112
6113   *delta = NULL;
6114   for (i = 0; i < ivs->upto; i++)
6115     {
6116       group = data->vgroups[i];
6117       old_cp = iv_ca_cand_for_group (ivs, group);
6118
6119       if (old_cp
6120           && old_cp->cand == cand)
6121         continue;
6122
6123       new_cp = get_group_iv_cost (data, group, cand);
6124       if (!new_cp)
6125         continue;
6126
6127       if (!min_ncand)
6128         {
6129           int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6130           /* Skip if new_cp depends on more invariants.  */
6131           if (cmp_invs > 0)
6132             continue;
6133
6134           int cmp_cost = compare_cost_pair (new_cp, old_cp);
6135           /* Skip if new_cp is not cheaper.  */
6136           if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6137             continue;
6138         }
6139
6140       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6141     }
6142
6143   iv_ca_delta_commit (data, ivs, *delta, true);
6144   cost = iv_ca_cost (ivs);
6145   if (n_ivs)
6146     *n_ivs = iv_ca_n_cands (ivs);
6147   iv_ca_delta_commit (data, ivs, *delta, false);
6148
6149   return cost;
6150 }
6151
6152 /* Try narrowing set IVS by removing CAND.  Return the cost of
6153    the new set and store the differences in DELTA.  START is
6154    the candidate with which we start narrowing.  */
6155
6156 static comp_cost
6157 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6158               struct iv_cand *cand, struct iv_cand *start,
6159               struct iv_ca_delta **delta)
6160 {
6161   unsigned i, ci;
6162   struct iv_group *group;
6163   struct cost_pair *old_cp, *new_cp, *cp;
6164   bitmap_iterator bi;
6165   struct iv_cand *cnd;
6166   comp_cost cost, best_cost, acost;
6167
6168   *delta = NULL;
6169   for (i = 0; i < data->vgroups.length (); i++)
6170     {
6171       group = data->vgroups[i];
6172
6173       old_cp = iv_ca_cand_for_group (ivs, group);
6174       if (old_cp->cand != cand)
6175         continue;
6176
6177       best_cost = iv_ca_cost (ivs);
6178       /* Start narrowing with START.  */
6179       new_cp = get_group_iv_cost (data, group, start);
6180
6181       if (data->consider_all_candidates)
6182         {
6183           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6184             {
6185               if (ci == cand->id || (start && ci == start->id))
6186                 continue;
6187
6188               cnd = data->vcands[ci];
6189
6190               cp = get_group_iv_cost (data, group, cnd);
6191               if (!cp)
6192                 continue;
6193
6194               iv_ca_set_cp (data, ivs, group, cp);
6195               acost = iv_ca_cost (ivs);
6196
6197               if (acost < best_cost)
6198                 {
6199                   best_cost = acost;
6200                   new_cp = cp;
6201                 }
6202             }
6203         }
6204       else
6205         {
6206           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6207             {
6208               if (ci == cand->id || (start && ci == start->id))
6209                 continue;
6210
6211               cnd = data->vcands[ci];
6212
6213               cp = get_group_iv_cost (data, group, cnd);
6214               if (!cp)
6215                 continue;
6216
6217               iv_ca_set_cp (data, ivs, group, cp);
6218               acost = iv_ca_cost (ivs);
6219
6220               if (acost < best_cost)
6221                 {
6222                   best_cost = acost;
6223                   new_cp = cp;
6224                 }
6225             }
6226         }
6227       /* Restore to old cp for use.  */
6228       iv_ca_set_cp (data, ivs, group, old_cp);
6229
6230       if (!new_cp)
6231         {
6232           iv_ca_delta_free (delta);
6233           return infinite_cost;
6234         }
6235
6236       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6237     }
6238
6239   iv_ca_delta_commit (data, ivs, *delta, true);
6240   cost = iv_ca_cost (ivs);
6241   iv_ca_delta_commit (data, ivs, *delta, false);
6242
6243   return cost;
6244 }
6245
6246 /* Try optimizing the set of candidates IVS by removing candidates different
6247    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6248    differences in DELTA.  */
6249
6250 static comp_cost
6251 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6252              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6253 {
6254   bitmap_iterator bi;
6255   struct iv_ca_delta *act_delta, *best_delta;
6256   unsigned i;
6257   comp_cost best_cost, acost;
6258   struct iv_cand *cand;
6259
6260   best_delta = NULL;
6261   best_cost = iv_ca_cost (ivs);
6262
6263   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6264     {
6265       cand = data->vcands[i];
6266
6267       if (cand == except_cand)
6268         continue;
6269
6270       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6271
6272       if (acost < best_cost)
6273         {
6274           best_cost = acost;
6275           iv_ca_delta_free (&best_delta);
6276           best_delta = act_delta;
6277         }
6278       else
6279         iv_ca_delta_free (&act_delta);
6280     }
6281
6282   if (!best_delta)
6283     {
6284       *delta = NULL;
6285       return best_cost;
6286     }
6287
6288   /* Recurse to possibly remove other unnecessary ivs.  */
6289   iv_ca_delta_commit (data, ivs, best_delta, true);
6290   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6291   iv_ca_delta_commit (data, ivs, best_delta, false);
6292   *delta = iv_ca_delta_join (best_delta, *delta);
6293   return best_cost;
6294 }
6295
6296 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6297    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6298    the corresponding cost_pair, otherwise just return BEST_CP.  */
6299
6300 static struct cost_pair*
6301 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6302                         unsigned int cand_idx, struct iv_cand *old_cand,
6303                         struct cost_pair *best_cp)
6304 {
6305   struct iv_cand *cand;
6306   struct cost_pair *cp;
6307
6308   gcc_assert (old_cand != NULL && best_cp != NULL);
6309   if (cand_idx == old_cand->id)
6310     return best_cp;
6311
6312   cand = data->vcands[cand_idx];
6313   cp = get_group_iv_cost (data, group, cand);
6314   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6315     return cp;
6316
6317   return best_cp;
6318 }
6319
6320 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6321    which are used by more than one iv uses.  For each of those candidates,
6322    this function tries to represent iv uses under that candidate using
6323    other ones with lower local cost, then tries to prune the new set.
6324    If the new set has lower cost, It returns the new cost after recording
6325    candidate replacement in list DELTA.  */
6326
6327 static comp_cost
6328 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6329                struct iv_ca_delta **delta)
6330 {
6331   bitmap_iterator bi, bj;
6332   unsigned int i, j, k;
6333   struct iv_cand *cand;
6334   comp_cost orig_cost, acost;
6335   struct iv_ca_delta *act_delta, *tmp_delta;
6336   struct cost_pair *old_cp, *best_cp = NULL;
6337
6338   *delta = NULL;
6339   orig_cost = iv_ca_cost (ivs);
6340
6341   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6342     {
6343       if (ivs->n_cand_uses[i] == 1
6344           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6345         continue;
6346
6347       cand = data->vcands[i];
6348
6349       act_delta = NULL;
6350       /*  Represent uses under current candidate using other ones with
6351           lower local cost.  */
6352       for (j = 0; j < ivs->upto; j++)
6353         {
6354           struct iv_group *group = data->vgroups[j];
6355           old_cp = iv_ca_cand_for_group (ivs, group);
6356
6357           if (old_cp->cand != cand)
6358             continue;
6359
6360           best_cp = old_cp;
6361           if (data->consider_all_candidates)
6362             for (k = 0; k < data->vcands.length (); k++)
6363               best_cp = cheaper_cost_with_cand (data, group, k,
6364                                                 old_cp->cand, best_cp);
6365           else
6366             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6367               best_cp = cheaper_cost_with_cand (data, group, k,
6368                                                 old_cp->cand, best_cp);
6369
6370           if (best_cp == old_cp)
6371             continue;
6372
6373           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6374         }
6375       /* No need for further prune.  */
6376       if (!act_delta)
6377         continue;
6378
6379       /* Prune the new candidate set.  */
6380       iv_ca_delta_commit (data, ivs, act_delta, true);
6381       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6382       iv_ca_delta_commit (data, ivs, act_delta, false);
6383       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6384
6385       if (acost < orig_cost)
6386         {
6387           *delta = act_delta;
6388           return acost;
6389         }
6390       else
6391         iv_ca_delta_free (&act_delta);
6392     }
6393
6394   return orig_cost;
6395 }
6396
6397 /* Tries to extend the sets IVS in the best possible way in order to
6398    express the GROUP.  If ORIGINALP is true, prefer candidates from
6399    the original set of IVs, otherwise favor important candidates not
6400    based on any memory object.  */
6401
6402 static bool
6403 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6404                   struct iv_group *group, bool originalp)
6405 {
6406   comp_cost best_cost, act_cost;
6407   unsigned i;
6408   bitmap_iterator bi;
6409   struct iv_cand *cand;
6410   struct iv_ca_delta *best_delta = NULL, *act_delta;
6411   struct cost_pair *cp;
6412
6413   iv_ca_add_group (data, ivs, group);
6414   best_cost = iv_ca_cost (ivs);
6415   cp = iv_ca_cand_for_group (ivs, group);
6416   if (cp)
6417     {
6418       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6419       iv_ca_set_no_cp (data, ivs, group);
6420     }
6421
6422   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6423      first try important candidates not based on any memory object.  Only if
6424      this fails, try the specific ones.  Rationale -- in loops with many
6425      variables the best choice often is to use just one generic biv.  If we
6426      added here many ivs specific to the uses, the optimization algorithm later
6427      would be likely to get stuck in a local minimum, thus causing us to create
6428      too many ivs.  The approach from few ivs to more seems more likely to be
6429      successful -- starting from few ivs, replacing an expensive use by a
6430      specific iv should always be a win.  */
6431   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6432     {
6433       cand = data->vcands[i];
6434
6435       if (originalp && cand->pos !=IP_ORIGINAL)
6436         continue;
6437
6438       if (!originalp && cand->iv->base_object != NULL_TREE)
6439         continue;
6440
6441       if (iv_ca_cand_used_p (ivs, cand))
6442         continue;
6443
6444       cp = get_group_iv_cost (data, group, cand);
6445       if (!cp)
6446         continue;
6447
6448       iv_ca_set_cp (data, ivs, group, cp);
6449       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6450                                true);
6451       iv_ca_set_no_cp (data, ivs, group);
6452       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6453
6454       if (act_cost < best_cost)
6455         {
6456           best_cost = act_cost;
6457
6458           iv_ca_delta_free (&best_delta);
6459           best_delta = act_delta;
6460         }
6461       else
6462         iv_ca_delta_free (&act_delta);
6463     }
6464
6465   if (best_cost.infinite_cost_p ())
6466     {
6467       for (i = 0; i < group->n_map_members; i++)
6468         {
6469           cp = group->cost_map + i;
6470           cand = cp->cand;
6471           if (!cand)
6472             continue;
6473
6474           /* Already tried this.  */
6475           if (cand->important)
6476             {
6477               if (originalp && cand->pos == IP_ORIGINAL)
6478                 continue;
6479               if (!originalp && cand->iv->base_object == NULL_TREE)
6480                 continue;
6481             }
6482
6483           if (iv_ca_cand_used_p (ivs, cand))
6484             continue;
6485
6486           act_delta = NULL;
6487           iv_ca_set_cp (data, ivs, group, cp);
6488           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6489           iv_ca_set_no_cp (data, ivs, group);
6490           act_delta = iv_ca_delta_add (group,
6491                                        iv_ca_cand_for_group (ivs, group),
6492                                        cp, act_delta);
6493
6494           if (act_cost < best_cost)
6495             {
6496               best_cost = act_cost;
6497
6498               if (best_delta)
6499                 iv_ca_delta_free (&best_delta);
6500               best_delta = act_delta;
6501             }
6502           else
6503             iv_ca_delta_free (&act_delta);
6504         }
6505     }
6506
6507   iv_ca_delta_commit (data, ivs, best_delta, true);
6508   iv_ca_delta_free (&best_delta);
6509
6510   return !best_cost.infinite_cost_p ();
6511 }
6512
6513 /* Finds an initial assignment of candidates to uses.  */
6514
6515 static struct iv_ca *
6516 get_initial_solution (struct ivopts_data *data, bool originalp)
6517 {
6518   unsigned i;
6519   struct iv_ca *ivs = iv_ca_new (data);
6520
6521   for (i = 0; i < data->vgroups.length (); i++)
6522     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6523       {
6524         iv_ca_free (&ivs);
6525         return NULL;
6526       }
6527
6528   return ivs;
6529 }
6530
6531 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6532    points to a bool variable, this function tries to break local
6533    optimal fixed-point by replacing candidates in IVS if it's true.  */
6534
6535 static bool
6536 try_improve_iv_set (struct ivopts_data *data,
6537                     struct iv_ca *ivs, bool *try_replace_p)
6538 {
6539   unsigned i, n_ivs;
6540   comp_cost acost, best_cost = iv_ca_cost (ivs);
6541   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6542   struct iv_cand *cand;
6543
6544   /* Try extending the set of induction variables by one.  */
6545   for (i = 0; i < data->vcands.length (); i++)
6546     {
6547       cand = data->vcands[i];
6548
6549       if (iv_ca_cand_used_p (ivs, cand))
6550         continue;
6551
6552       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6553       if (!act_delta)
6554         continue;
6555
6556       /* If we successfully added the candidate and the set is small enough,
6557          try optimizing it by removing other candidates.  */
6558       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6559         {
6560           iv_ca_delta_commit (data, ivs, act_delta, true);
6561           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6562           iv_ca_delta_commit (data, ivs, act_delta, false);
6563           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6564         }
6565
6566       if (acost < best_cost)
6567         {
6568           best_cost = acost;
6569           iv_ca_delta_free (&best_delta);
6570           best_delta = act_delta;
6571         }
6572       else
6573         iv_ca_delta_free (&act_delta);
6574     }
6575
6576   if (!best_delta)
6577     {
6578       /* Try removing the candidates from the set instead.  */
6579       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6580
6581       if (!best_delta && *try_replace_p)
6582         {
6583           *try_replace_p = false;
6584           /* So far candidate selecting algorithm tends to choose fewer IVs
6585              so that it can handle cases in which loops have many variables
6586              but the best choice is often to use only one general biv.  One
6587              weakness is it can't handle opposite cases, in which different
6588              candidates should be chosen with respect to each use.  To solve
6589              the problem, we replace candidates in a manner described by the
6590              comments of iv_ca_replace, thus give general algorithm a chance
6591              to break local optimal fixed-point in these cases.  */
6592           best_cost = iv_ca_replace (data, ivs, &best_delta);
6593         }
6594
6595       if (!best_delta)
6596         return false;
6597     }
6598
6599   iv_ca_delta_commit (data, ivs, best_delta, true);
6600   gcc_assert (best_cost == iv_ca_cost (ivs));
6601   iv_ca_delta_free (&best_delta);
6602   return true;
6603 }
6604
6605 /* Attempts to find the optimal set of induction variables.  We do simple
6606    greedy heuristic -- we try to replace at most one candidate in the selected
6607    solution and remove the unused ivs while this improves the cost.  */
6608
6609 static struct iv_ca *
6610 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6611 {
6612   struct iv_ca *set;
6613   bool try_replace_p = true;
6614
6615   /* Get the initial solution.  */
6616   set = get_initial_solution (data, originalp);
6617   if (!set)
6618     {
6619       if (dump_file && (dump_flags & TDF_DETAILS))
6620         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6621       return NULL;
6622     }
6623
6624   if (dump_file && (dump_flags & TDF_DETAILS))
6625     {
6626       fprintf (dump_file, "Initial set of candidates:\n");
6627       iv_ca_dump (data, dump_file, set);
6628     }
6629
6630   while (try_improve_iv_set (data, set, &try_replace_p))
6631     {
6632       if (dump_file && (dump_flags & TDF_DETAILS))
6633         {
6634           fprintf (dump_file, "Improved to:\n");
6635           iv_ca_dump (data, dump_file, set);
6636         }
6637     }
6638
6639   return set;
6640 }
6641
6642 static struct iv_ca *
6643 find_optimal_iv_set (struct ivopts_data *data)
6644 {
6645   unsigned i;
6646   comp_cost cost, origcost;
6647   struct iv_ca *set, *origset;
6648
6649   /* Determine the cost based on a strategy that starts with original IVs,
6650      and try again using a strategy that prefers candidates not based
6651      on any IVs.  */
6652   origset = find_optimal_iv_set_1 (data, true);
6653   set = find_optimal_iv_set_1 (data, false);
6654
6655   if (!origset && !set)
6656     return NULL;
6657
6658   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6659   cost = set ? iv_ca_cost (set) : infinite_cost;
6660
6661   if (dump_file && (dump_flags & TDF_DETAILS))
6662     {
6663       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6664                origcost.cost, origcost.complexity);
6665       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6666                cost.cost, cost.complexity);
6667     }
6668
6669   /* Choose the one with the best cost.  */
6670   if (origcost <= cost)
6671     {
6672       if (set)
6673         iv_ca_free (&set);
6674       set = origset;
6675     }
6676   else if (origset)
6677     iv_ca_free (&origset);
6678
6679   for (i = 0; i < data->vgroups.length (); i++)
6680     {
6681       struct iv_group *group = data->vgroups[i];
6682       group->selected = iv_ca_cand_for_group (set, group)->cand;
6683     }
6684
6685   return set;
6686 }
6687
6688 /* Creates a new induction variable corresponding to CAND.  */
6689
6690 static void
6691 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6692 {
6693   gimple_stmt_iterator incr_pos;
6694   tree base;
6695   struct iv_use *use;
6696   struct iv_group *group;
6697   bool after = false;
6698
6699   gcc_assert (cand->iv != NULL);
6700
6701   switch (cand->pos)
6702     {
6703     case IP_NORMAL:
6704       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6705       break;
6706
6707     case IP_END:
6708       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6709       after = true;
6710       break;
6711
6712     case IP_AFTER_USE:
6713       after = true;
6714       /* fall through */
6715     case IP_BEFORE_USE:
6716       incr_pos = gsi_for_stmt (cand->incremented_at);
6717       break;
6718
6719     case IP_ORIGINAL:
6720       /* Mark that the iv is preserved.  */
6721       name_info (data, cand->var_before)->preserve_biv = true;
6722       name_info (data, cand->var_after)->preserve_biv = true;
6723
6724       /* Rewrite the increment so that it uses var_before directly.  */
6725       use = find_interesting_uses_op (data, cand->var_after);
6726       group = data->vgroups[use->group_id];
6727       group->selected = cand;
6728       return;
6729     }
6730
6731   gimple_add_tmp_var (cand->var_before);
6732
6733   base = unshare_expr (cand->iv->base);
6734
6735   create_iv (base, unshare_expr (cand->iv->step),
6736              cand->var_before, data->current_loop,
6737              &incr_pos, after, &cand->var_before, &cand->var_after);
6738 }
6739
6740 /* Creates new induction variables described in SET.  */
6741
6742 static void
6743 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6744 {
6745   unsigned i;
6746   struct iv_cand *cand;
6747   bitmap_iterator bi;
6748
6749   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6750     {
6751       cand = data->vcands[i];
6752       create_new_iv (data, cand);
6753     }
6754
6755   if (dump_file && (dump_flags & TDF_DETAILS))
6756     {
6757       fprintf (dump_file, "Selected IV set for loop %d",
6758                data->current_loop->num);
6759       if (data->loop_loc != UNKNOWN_LOCATION)
6760         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6761                  LOCATION_LINE (data->loop_loc));
6762       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6763                avg_loop_niter (data->current_loop));
6764       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6765       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6766         {
6767           cand = data->vcands[i];
6768           dump_cand (dump_file, cand);
6769         }
6770       fprintf (dump_file, "\n");
6771     }
6772 }
6773
6774 /* Rewrites USE (definition of iv used in a nonlinear expression)
6775    using candidate CAND.  */
6776
6777 static void
6778 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6779                             struct iv_use *use, struct iv_cand *cand)
6780 {
6781   gassign *ass;
6782   gimple_stmt_iterator bsi;
6783   tree comp, type = get_use_type (use), tgt;
6784
6785   /* An important special case -- if we are asked to express value of
6786      the original iv by itself, just exit; there is no need to
6787      introduce a new computation (that might also need casting the
6788      variable to unsigned and back).  */
6789   if (cand->pos == IP_ORIGINAL
6790       && cand->incremented_at == use->stmt)
6791     {
6792       tree op = NULL_TREE;
6793       enum tree_code stmt_code;
6794
6795       gcc_assert (is_gimple_assign (use->stmt));
6796       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6797
6798       /* Check whether we may leave the computation unchanged.
6799          This is the case only if it does not rely on other
6800          computations in the loop -- otherwise, the computation
6801          we rely upon may be removed in remove_unused_ivs,
6802          thus leading to ICE.  */
6803       stmt_code = gimple_assign_rhs_code (use->stmt);
6804       if (stmt_code == PLUS_EXPR
6805           || stmt_code == MINUS_EXPR
6806           || stmt_code == POINTER_PLUS_EXPR)
6807         {
6808           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6809             op = gimple_assign_rhs2 (use->stmt);
6810           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6811             op = gimple_assign_rhs1 (use->stmt);
6812         }
6813
6814       if (op != NULL_TREE)
6815         {
6816           if (expr_invariant_in_loop_p (data->current_loop, op))
6817             return;
6818           if (TREE_CODE (op) == SSA_NAME)
6819             {
6820               struct iv *iv = get_iv (data, op);
6821               if (iv != NULL && integer_zerop (iv->step))
6822                 return;
6823             }
6824         }
6825     }
6826
6827   switch (gimple_code (use->stmt))
6828     {
6829     case GIMPLE_PHI:
6830       tgt = PHI_RESULT (use->stmt);
6831
6832       /* If we should keep the biv, do not replace it.  */
6833       if (name_info (data, tgt)->preserve_biv)
6834         return;
6835
6836       bsi = gsi_after_labels (gimple_bb (use->stmt));
6837       break;
6838
6839     case GIMPLE_ASSIGN:
6840       tgt = gimple_assign_lhs (use->stmt);
6841       bsi = gsi_for_stmt (use->stmt);
6842       break;
6843
6844     default:
6845       gcc_unreachable ();
6846     }
6847
6848   aff_tree aff_inv, aff_var;
6849   if (!get_computation_aff_1 (data->current_loop, use->stmt,
6850                               use, cand, &aff_inv, &aff_var))
6851     gcc_unreachable ();
6852
6853   unshare_aff_combination (&aff_inv);
6854   unshare_aff_combination (&aff_var);
6855   /* Prefer CSE opportunity than loop invariant by adding offset at last
6856      so that iv_uses have different offsets can be CSEed.  */
6857   poly_widest_int offset = aff_inv.offset;
6858   aff_inv.offset = 0;
6859
6860   gimple_seq stmt_list = NULL, seq = NULL;
6861   tree comp_op1 = aff_combination_to_tree (&aff_inv);
6862   tree comp_op2 = aff_combination_to_tree (&aff_var);
6863   gcc_assert (comp_op1 && comp_op2);
6864
6865   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6866   gimple_seq_add_seq (&stmt_list, seq);
6867   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6868   gimple_seq_add_seq (&stmt_list, seq);
6869
6870   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6871     std::swap (comp_op1, comp_op2);
6872
6873   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6874     {
6875       comp = fold_build_pointer_plus (comp_op1,
6876                                       fold_convert (sizetype, comp_op2));
6877       comp = fold_build_pointer_plus (comp,
6878                                       wide_int_to_tree (sizetype, offset));
6879     }
6880   else
6881     {
6882       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6883                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
6884       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6885                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6886     }
6887
6888   comp = fold_convert (type, comp);
6889   if (!valid_gimple_rhs_p (comp)
6890       || (gimple_code (use->stmt) != GIMPLE_PHI
6891           /* We can't allow re-allocating the stmt as it might be pointed
6892              to still.  */
6893           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6894               >= gimple_num_ops (gsi_stmt (bsi)))))
6895     {
6896       comp = force_gimple_operand (comp, &seq, true, NULL);
6897       gimple_seq_add_seq (&stmt_list, seq);
6898       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6899         {
6900           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6901           /* As this isn't a plain copy we have to reset alignment
6902              information.  */
6903           if (SSA_NAME_PTR_INFO (comp))
6904             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6905         }
6906     }
6907
6908   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
6909   if (gimple_code (use->stmt) == GIMPLE_PHI)
6910     {
6911       ass = gimple_build_assign (tgt, comp);
6912       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6913
6914       bsi = gsi_for_stmt (use->stmt);
6915       remove_phi_node (&bsi, false);
6916     }
6917   else
6918     {
6919       gimple_assign_set_rhs_from_tree (&bsi, comp);
6920       use->stmt = gsi_stmt (bsi);
6921     }
6922 }
6923
6924 /* Performs a peephole optimization to reorder the iv update statement with
6925    a mem ref to enable instruction combining in later phases. The mem ref uses
6926    the iv value before the update, so the reordering transformation requires
6927    adjustment of the offset. CAND is the selected IV_CAND.
6928
6929    Example:
6930
6931    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6932    iv2 = iv1 + 1;
6933
6934    if (t < val)      (1)
6935      goto L;
6936    goto Head;
6937
6938
6939    directly propagating t over to (1) will introduce overlapping live range
6940    thus increase register pressure. This peephole transform it into:
6941
6942
6943    iv2 = iv1 + 1;
6944    t = MEM_REF (base, iv2, 8, 8);
6945    if (t < val)
6946      goto L;
6947    goto Head;
6948 */
6949
6950 static void
6951 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6952 {
6953   tree var_after;
6954   gimple *iv_update, *stmt;
6955   basic_block bb;
6956   gimple_stmt_iterator gsi, gsi_iv;
6957
6958   if (cand->pos != IP_NORMAL)
6959     return;
6960
6961   var_after = cand->var_after;
6962   iv_update = SSA_NAME_DEF_STMT (var_after);
6963
6964   bb = gimple_bb (iv_update);
6965   gsi = gsi_last_nondebug_bb (bb);
6966   stmt = gsi_stmt (gsi);
6967
6968   /* Only handle conditional statement for now.  */
6969   if (gimple_code (stmt) != GIMPLE_COND)
6970     return;
6971
6972   gsi_prev_nondebug (&gsi);
6973   stmt = gsi_stmt (gsi);
6974   if (stmt != iv_update)
6975     return;
6976
6977   gsi_prev_nondebug (&gsi);
6978   if (gsi_end_p (gsi))
6979     return;
6980
6981   stmt = gsi_stmt (gsi);
6982   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6983     return;
6984
6985   if (stmt != use->stmt)
6986     return;
6987
6988   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6989     return;
6990
6991   if (dump_file && (dump_flags & TDF_DETAILS))
6992     {
6993       fprintf (dump_file, "Reordering \n");
6994       print_gimple_stmt (dump_file, iv_update, 0);
6995       print_gimple_stmt (dump_file, use->stmt, 0);
6996       fprintf (dump_file, "\n");
6997     }
6998
6999   gsi = gsi_for_stmt (use->stmt);
7000   gsi_iv = gsi_for_stmt (iv_update);
7001   gsi_move_before (&gsi_iv, &gsi);
7002
7003   cand->pos = IP_BEFORE_USE;
7004   cand->incremented_at = use->stmt;
7005 }
7006
7007 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7008
7009 static void
7010 rewrite_use_address (struct ivopts_data *data,
7011                      struct iv_use *use, struct iv_cand *cand)
7012 {
7013   aff_tree aff;
7014   bool ok;
7015
7016   adjust_iv_update_pos (cand, use);
7017   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7018   gcc_assert (ok);
7019   unshare_aff_combination (&aff);
7020
7021   /* To avoid undefined overflow problems, all IV candidates use unsigned
7022      integer types.  The drawback is that this makes it impossible for
7023      create_mem_ref to distinguish an IV that is based on a memory object
7024      from one that represents simply an offset.
7025
7026      To work around this problem, we pass a hint to create_mem_ref that
7027      indicates which variable (if any) in aff is an IV based on a memory
7028      object.  Note that we only consider the candidate.  If this is not
7029      based on an object, the base of the reference is in some subexpression
7030      of the use -- but these will use pointer types, so they are recognized
7031      by the create_mem_ref heuristics anyway.  */
7032   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7033   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7034   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7035   tree type = TREE_TYPE (*use->op_p);
7036   unsigned int align = get_object_alignment (*use->op_p);
7037   if (align != TYPE_ALIGN (type))
7038     type = build_aligned_type (type, align);
7039
7040   tree ref = create_mem_ref (&bsi, type, &aff,
7041                              reference_alias_ptr_type (*use->op_p),
7042                              iv, base_hint, data->speed);
7043
7044   copy_ref_info (ref, *use->op_p);
7045   *use->op_p = ref;
7046 }
7047
7048 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7049    candidate CAND.  */
7050
7051 static void
7052 rewrite_use_compare (struct ivopts_data *data,
7053                      struct iv_use *use, struct iv_cand *cand)
7054 {
7055   tree comp, op, bound;
7056   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7057   enum tree_code compare;
7058   struct iv_group *group = data->vgroups[use->group_id];
7059   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7060
7061   bound = cp->value;
7062   if (bound)
7063     {
7064       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7065       tree var_type = TREE_TYPE (var);
7066       gimple_seq stmts;
7067
7068       if (dump_file && (dump_flags & TDF_DETAILS))
7069         {
7070           fprintf (dump_file, "Replacing exit test: ");
7071           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7072         }
7073       compare = cp->comp;
7074       bound = unshare_expr (fold_convert (var_type, bound));
7075       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7076       if (stmts)
7077         gsi_insert_seq_on_edge_immediate (
7078                 loop_preheader_edge (data->current_loop),
7079                 stmts);
7080
7081       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7082       gimple_cond_set_lhs (cond_stmt, var);
7083       gimple_cond_set_code (cond_stmt, compare);
7084       gimple_cond_set_rhs (cond_stmt, op);
7085       return;
7086     }
7087
7088   /* The induction variable elimination failed; just express the original
7089      giv.  */
7090   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7091   gcc_assert (comp != NULL_TREE);
7092   gcc_assert (use->op_p != NULL);
7093   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7094                                          SSA_NAME_VAR (*use->op_p),
7095                                          true, GSI_SAME_STMT);
7096 }
7097
7098 /* Rewrite the groups using the selected induction variables.  */
7099
7100 static void
7101 rewrite_groups (struct ivopts_data *data)
7102 {
7103   unsigned i, j;
7104
7105   for (i = 0; i < data->vgroups.length (); i++)
7106     {
7107       struct iv_group *group = data->vgroups[i];
7108       struct iv_cand *cand = group->selected;
7109
7110       gcc_assert (cand);
7111
7112       if (group->type == USE_NONLINEAR_EXPR)
7113         {
7114           for (j = 0; j < group->vuses.length (); j++)
7115             {
7116               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7117               update_stmt (group->vuses[j]->stmt);
7118             }
7119         }
7120       else if (group->type == USE_ADDRESS)
7121         {
7122           for (j = 0; j < group->vuses.length (); j++)
7123             {
7124               rewrite_use_address (data, group->vuses[j], cand);
7125               update_stmt (group->vuses[j]->stmt);
7126             }
7127         }
7128       else
7129         {
7130           gcc_assert (group->type == USE_COMPARE);
7131
7132           for (j = 0; j < group->vuses.length (); j++)
7133             {
7134               rewrite_use_compare (data, group->vuses[j], cand);
7135               update_stmt (group->vuses[j]->stmt);
7136             }
7137         }
7138     }
7139 }
7140
7141 /* Removes the ivs that are not used after rewriting.  */
7142
7143 static void
7144 remove_unused_ivs (struct ivopts_data *data)
7145 {
7146   unsigned j;
7147   bitmap_iterator bi;
7148   bitmap toremove = BITMAP_ALLOC (NULL);
7149
7150   /* Figure out an order in which to release SSA DEFs so that we don't
7151      release something that we'd have to propagate into a debug stmt
7152      afterwards.  */
7153   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7154     {
7155       struct version_info *info;
7156
7157       info = ver_info (data, j);
7158       if (info->iv
7159           && !integer_zerop (info->iv->step)
7160           && !info->inv_id
7161           && !info->iv->nonlin_use
7162           && !info->preserve_biv)
7163         {
7164           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7165
7166           tree def = info->iv->ssa_name;
7167
7168           if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7169             {
7170               imm_use_iterator imm_iter;
7171               use_operand_p use_p;
7172               gimple *stmt;
7173               int count = 0;
7174
7175               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7176                 {
7177                   if (!gimple_debug_bind_p (stmt))
7178                     continue;
7179
7180                   /* We just want to determine whether to do nothing
7181                      (count == 0), to substitute the computed
7182                      expression into a single use of the SSA DEF by
7183                      itself (count == 1), or to use a debug temp
7184                      because the SSA DEF is used multiple times or as
7185                      part of a larger expression (count > 1). */
7186                   count++;
7187                   if (gimple_debug_bind_get_value (stmt) != def)
7188                     count++;
7189
7190                   if (count > 1)
7191                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7192                 }
7193
7194               if (!count)
7195                 continue;
7196
7197               struct iv_use dummy_use;
7198               struct iv_cand *best_cand = NULL, *cand;
7199               unsigned i, best_pref = 0, cand_pref;
7200
7201               memset (&dummy_use, 0, sizeof (dummy_use));
7202               dummy_use.iv = info->iv;
7203               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7204                 {
7205                   cand = data->vgroups[i]->selected;
7206                   if (cand == best_cand)
7207                     continue;
7208                   cand_pref = operand_equal_p (cand->iv->step,
7209                                                info->iv->step, 0)
7210                     ? 4 : 0;
7211                   cand_pref
7212                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7213                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7214                     ? 2 : 0;
7215                   cand_pref
7216                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7217                     ? 1 : 0;
7218                   if (best_cand == NULL || best_pref < cand_pref)
7219                     {
7220                       best_cand = cand;
7221                       best_pref = cand_pref;
7222                     }
7223                 }
7224
7225               if (!best_cand)
7226                 continue;
7227
7228               tree comp = get_computation_at (data->current_loop,
7229                                               SSA_NAME_DEF_STMT (def),
7230                                               &dummy_use, best_cand);
7231               if (!comp)
7232                 continue;
7233
7234               if (count > 1)
7235                 {
7236                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7237                   DECL_ARTIFICIAL (vexpr) = 1;
7238                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7239                   if (SSA_NAME_VAR (def))
7240                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7241                   else
7242                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7243                   gdebug *def_temp
7244                     = gimple_build_debug_bind (vexpr, comp, NULL);
7245                   gimple_stmt_iterator gsi;
7246
7247                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7248                     gsi = gsi_after_labels (gimple_bb
7249                                             (SSA_NAME_DEF_STMT (def)));
7250                   else
7251                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7252
7253                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7254                   comp = vexpr;
7255                 }
7256
7257               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7258                 {
7259                   if (!gimple_debug_bind_p (stmt))
7260                     continue;
7261
7262                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7263                     SET_USE (use_p, comp);
7264
7265                   update_stmt (stmt);
7266                 }
7267             }
7268         }
7269     }
7270
7271   release_defs_bitset (toremove);
7272
7273   BITMAP_FREE (toremove);
7274 }
7275
7276 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7277    for hash_map::traverse.  */
7278
7279 bool
7280 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7281 {
7282   free (value);
7283   return true;
7284 }
7285
7286 /* Frees data allocated by the optimization of a single loop.  */
7287
7288 static void
7289 free_loop_data (struct ivopts_data *data)
7290 {
7291   unsigned i, j;
7292   bitmap_iterator bi;
7293   tree obj;
7294
7295   if (data->niters)
7296     {
7297       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7298       delete data->niters;
7299       data->niters = NULL;
7300     }
7301
7302   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7303     {
7304       struct version_info *info;
7305
7306       info = ver_info (data, i);
7307       info->iv = NULL;
7308       info->has_nonlin_use = false;
7309       info->preserve_biv = false;
7310       info->inv_id = 0;
7311     }
7312   bitmap_clear (data->relevant);
7313   bitmap_clear (data->important_candidates);
7314
7315   for (i = 0; i < data->vgroups.length (); i++)
7316     {
7317       struct iv_group *group = data->vgroups[i];
7318
7319       for (j = 0; j < group->vuses.length (); j++)
7320         free (group->vuses[j]);
7321       group->vuses.release ();
7322
7323       BITMAP_FREE (group->related_cands);
7324       for (j = 0; j < group->n_map_members; j++)
7325         {
7326           if (group->cost_map[j].inv_vars)
7327             BITMAP_FREE (group->cost_map[j].inv_vars);
7328           if (group->cost_map[j].inv_exprs)
7329             BITMAP_FREE (group->cost_map[j].inv_exprs);
7330         }
7331
7332       free (group->cost_map);
7333       free (group);
7334     }
7335   data->vgroups.truncate (0);
7336
7337   for (i = 0; i < data->vcands.length (); i++)
7338     {
7339       struct iv_cand *cand = data->vcands[i];
7340
7341       if (cand->inv_vars)
7342         BITMAP_FREE (cand->inv_vars);
7343       if (cand->inv_exprs)
7344         BITMAP_FREE (cand->inv_exprs);
7345       free (cand);
7346     }
7347   data->vcands.truncate (0);
7348
7349   if (data->version_info_size < num_ssa_names)
7350     {
7351       data->version_info_size = 2 * num_ssa_names;
7352       free (data->version_info);
7353       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7354     }
7355
7356   data->max_inv_var_id = 0;
7357   data->max_inv_expr_id = 0;
7358
7359   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7360     SET_DECL_RTL (obj, NULL_RTX);
7361
7362   decl_rtl_to_reset.truncate (0);
7363
7364   data->inv_expr_tab->empty ();
7365
7366   data->iv_common_cand_tab->empty ();
7367   data->iv_common_cands.truncate (0);
7368 }
7369
7370 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7371    loop tree.  */
7372
7373 static void
7374 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7375 {
7376   free_loop_data (data);
7377   free (data->version_info);
7378   BITMAP_FREE (data->relevant);
7379   BITMAP_FREE (data->important_candidates);
7380
7381   decl_rtl_to_reset.release ();
7382   data->vgroups.release ();
7383   data->vcands.release ();
7384   delete data->inv_expr_tab;
7385   data->inv_expr_tab = NULL;
7386   free_affine_expand_cache (&data->name_expansion_cache);
7387   delete data->iv_common_cand_tab;
7388   data->iv_common_cand_tab = NULL;
7389   data->iv_common_cands.release ();
7390   obstack_free (&data->iv_obstack, NULL);
7391 }
7392
7393 /* Returns true if the loop body BODY includes any function calls.  */
7394
7395 static bool
7396 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7397 {
7398   gimple_stmt_iterator gsi;
7399   unsigned i;
7400
7401   for (i = 0; i < num_nodes; i++)
7402     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7403       {
7404         gimple *stmt = gsi_stmt (gsi);
7405         if (is_gimple_call (stmt)
7406             && !gimple_call_internal_p (stmt)
7407             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7408           return true;
7409       }
7410   return false;
7411 }
7412
7413 /* Optimizes the LOOP.  Returns true if anything changed.  */
7414
7415 static bool
7416 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7417 {
7418   bool changed = false;
7419   struct iv_ca *iv_ca;
7420   edge exit = single_dom_exit (loop);
7421   basic_block *body;
7422
7423   gcc_assert (!data->niters);
7424   data->current_loop = loop;
7425   data->loop_loc = find_loop_location (loop);
7426   data->speed = optimize_loop_for_speed_p (loop);
7427
7428   if (dump_file && (dump_flags & TDF_DETAILS))
7429     {
7430       fprintf (dump_file, "Processing loop %d", loop->num);
7431       if (data->loop_loc != UNKNOWN_LOCATION)
7432         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7433                  LOCATION_LINE (data->loop_loc));
7434       fprintf (dump_file, "\n");
7435
7436       if (exit)
7437         {
7438           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7439                    exit->src->index, exit->dest->index);
7440           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7441           fprintf (dump_file, "\n");
7442         }
7443
7444       fprintf (dump_file, "\n");
7445     }
7446
7447   body = get_loop_body (loop);
7448   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7449   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7450   free (body);
7451
7452   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7453
7454   /* For each ssa name determines whether it behaves as an induction variable
7455      in some loop.  */
7456   if (!find_induction_variables (data))
7457     goto finish;
7458
7459   /* Finds interesting uses (item 1).  */
7460   find_interesting_uses (data);
7461   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7462     goto finish;
7463
7464   /* Finds candidates for the induction variables (item 2).  */
7465   find_iv_candidates (data);
7466
7467   /* Calculates the costs (item 3, part 1).  */
7468   determine_iv_costs (data);
7469   determine_group_iv_costs (data);
7470   determine_set_costs (data);
7471
7472   /* Find the optimal set of induction variables (item 3, part 2).  */
7473   iv_ca = find_optimal_iv_set (data);
7474   if (!iv_ca)
7475     goto finish;
7476   changed = true;
7477
7478   /* Create the new induction variables (item 4, part 1).  */
7479   create_new_ivs (data, iv_ca);
7480   iv_ca_free (&iv_ca);
7481
7482   /* Rewrite the uses (item 4, part 2).  */
7483   rewrite_groups (data);
7484
7485   /* Remove the ivs that are unused after rewriting.  */
7486   remove_unused_ivs (data);
7487
7488   /* We have changed the structure of induction variables; it might happen
7489      that definitions in the scev database refer to some of them that were
7490      eliminated.  */
7491   scev_reset ();
7492
7493 finish:
7494   free_loop_data (data);
7495
7496   return changed;
7497 }
7498
7499 /* Main entry point.  Optimizes induction variables in loops.  */
7500
7501 void
7502 tree_ssa_iv_optimize (void)
7503 {
7504   struct loop *loop;
7505   struct ivopts_data data;
7506
7507   tree_ssa_iv_optimize_init (&data);
7508
7509   /* Optimize the loops starting with the innermost ones.  */
7510   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7511     {
7512       if (dump_file && (dump_flags & TDF_DETAILS))
7513         flow_loop_dump (loop, dump_file, NULL, 1);
7514
7515       tree_ssa_iv_optimize_loop (&data, loop);
7516     }
7517
7518   tree_ssa_iv_optimize_finalize (&data);
7519 }
7520
7521 #include "gt-tree-ssa-loop-ivopts.h"