gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.  */
  68
  69 #include "config.h"
  70 #include "system.h"
  71 #include "coretypes.h"
  72 #include "backend.h"
  73 #include "rtl.h"
  74 #include "tree.h"
  75 #include "gimple.h"
  76 #include "cfghooks.h"
  77 #include "tree-pass.h"
  78 #include "memmodel.h"
  79 #include "tm_p.h"
  80 #include "ssa.h"
  81 #include "expmed.h"
  82 #include "insn-config.h"
  83 #include "emit-rtl.h"
  84 #include "recog.h"
  85 #include "cgraph.h"
  86 #include "gimple-pretty-print.h"
  87 #include "alias.h"
  88 #include "fold-const.h"
  89 #include "stor-layout.h"
  90 #include "tree-eh.h"
  91 #include "gimplify.h"
  92 #include "gimple-iterator.h"
  93 #include "gimplify-me.h"
  94 #include "tree-cfg.h"
  95 #include "tree-ssa-loop-ivopts.h"
  96 #include "tree-ssa-loop-manip.h"
  97 #include "tree-ssa-loop-niter.h"
  98 #include "tree-ssa-loop.h"
  99 #include "explow.h"
 100 #include "expr.h"
 101 #include "tree-dfa.h"
 102 #include "tree-ssa.h"
 103 #include "cfgloop.h"
 104 #include "tree-scalar-evolution.h"
 105 #include "params.h"
 106 #include "tree-affine.h"
 107 #include "tree-ssa-propagate.h"
 108 #include "tree-ssa-address.h"
 109 #include "builtins.h"
 110 #include "tree-vectorizer.h"
 111
 112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 113    cost of different addressing modes.  This should be moved to a TBD
 114    interface between the GIMPLE and RTL worlds.  */
 115
 116 /* The infinite cost.  */
 117 #define INFTY 10000000
 118
 119 /* Returns the expected number of loop iterations for LOOP.
 120    The average trip count is computed from profile data if it
 121    exists. */
 122
 123 static inline HOST_WIDE_INT
 124 avg_loop_niter (struct loop *loop)
 125 {
 126   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 127   if (niter == -1)
 128     {
 129       niter = likely_max_stmt_executions_int (loop);
 130
 131       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
 132         return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 133     }
 134
 135   return niter;
 136 }
 137
 138 struct iv_use;
 139
 140 /* Representation of the induction variable.  */
 141 struct iv
 142 {
 143   tree base;            /* Initial value of the iv.  */
 144   tree base_object;     /* A memory object to that the induction variable points.  */
 145   tree step;            /* Step of the iv (constant only).  */
 146   tree ssa_name;        /* The ssa name with the value.  */
 147   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 148   bool biv_p;           /* Is it a biv?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150   bool have_address_use;/* For biv, indicate if it's used in any address
 151                            type use.  */
 152 };
 153
 154 /* Per-ssa version information (induction variable descriptions, etc.).  */
 155 struct version_info
 156 {
 157   tree name;            /* The ssa name.  */
 158   struct iv *iv;        /* Induction variable description.  */
 159   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 160                            an expression that is not an induction variable.  */
 161   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 162   unsigned inv_id;      /* Id of an invariant.  */
 163 };
 164
 165 /* Types of uses.  */
 166 enum use_type
 167 {
 168   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 169   USE_ADDRESS,          /* Use in an address.  */
 170   USE_COMPARE           /* Use is a compare.  */
 171 };
 172
 173 /* Cost of a computation.  */
 174 struct comp_cost
 175 {
 176   comp_cost (): cost (0), complexity (0), scratch (0)
 177   {}
 178
 179   comp_cost (int cost, unsigned complexity, int scratch = 0)
 180     : cost (cost), complexity (complexity), scratch (scratch)
 181   {}
 182
 183   /* Returns true if COST is infinite.  */
 184   bool infinite_cost_p ();
 185
 186   /* Adds costs COST1 and COST2.  */
 187   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 188
 189   /* Adds COST to the comp_cost.  */
 190   comp_cost operator+= (comp_cost cost);
 191
 192   /* Adds constant C to this comp_cost.  */
 193   comp_cost operator+= (HOST_WIDE_INT c);
 194
 195   /* Subtracts constant C to this comp_cost.  */
 196   comp_cost operator-= (HOST_WIDE_INT c);
 197
 198   /* Divide the comp_cost by constant C.  */
 199   comp_cost operator/= (HOST_WIDE_INT c);
 200
 201   /* Multiply the comp_cost by constant C.  */
 202   comp_cost operator*= (HOST_WIDE_INT c);
 203
 204   /* Subtracts costs COST1 and COST2.  */
 205   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 206
 207   /* Subtracts COST from this comp_cost.  */
 208   comp_cost operator-= (comp_cost cost);
 209
 210   /* Returns true if COST1 is smaller than COST2.  */
 211   friend bool operator< (comp_cost cost1, comp_cost cost2);
 212
 213   /* Returns true if COST1 and COST2 are equal.  */
 214   friend bool operator== (comp_cost cost1, comp_cost cost2);
 215
 216   /* Returns true if COST1 is smaller or equal than COST2.  */
 217   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 218
 219   int cost;             /* The runtime cost.  */
 220   unsigned complexity;  /* The estimate of the complexity of the code for
 221                            the computation (in no concrete units --
 222                            complexity field should be larger for more
 223                            complex expressions and addressing modes).  */
 224   int scratch;          /* Scratch used during cost computation.  */
 225 };
 226
 227 static const comp_cost no_cost;
 228 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
 229
 230 bool
 231 comp_cost::infinite_cost_p ()
 232 {
 233   return cost == INFTY;
 234 }
 235
 236 comp_cost
 237 operator+ (comp_cost cost1, comp_cost cost2)
 238 {
 239   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 240     return infinite_cost;
 241
 242   cost1.cost += cost2.cost;
 243   cost1.complexity += cost2.complexity;
 244
 245   return cost1;
 246 }
 247
 248 comp_cost
 249 operator- (comp_cost cost1, comp_cost cost2)
 250 {
 251   if (cost1.infinite_cost_p ())
 252     return infinite_cost;
 253
 254   gcc_assert (!cost2.infinite_cost_p ());
 255
 256   cost1.cost -= cost2.cost;
 257   cost1.complexity -= cost2.complexity;
 258
 259   return cost1;
 260 }
 261
 262 comp_cost
 263 comp_cost::operator+= (comp_cost cost)
 264 {
 265   *this = *this + cost;
 266   return *this;
 267 }
 268
 269 comp_cost
 270 comp_cost::operator+= (HOST_WIDE_INT c)
 271 {
 272   if (infinite_cost_p ())
 273     return *this;
 274
 275   this->cost += c;
 276
 277   return *this;
 278 }
 279
 280 comp_cost
 281 comp_cost::operator-= (HOST_WIDE_INT c)
 282 {
 283   if (infinite_cost_p ())
 284     return *this;
 285
 286   this->cost -= c;
 287
 288   return *this;
 289 }
 290
 291 comp_cost
 292 comp_cost::operator/= (HOST_WIDE_INT c)
 293 {
 294   if (infinite_cost_p ())
 295     return *this;
 296
 297   this->cost /= c;
 298
 299   return *this;
 300 }
 301
 302 comp_cost
 303 comp_cost::operator*= (HOST_WIDE_INT c)
 304 {
 305   if (infinite_cost_p ())
 306     return *this;
 307
 308   this->cost *= c;
 309
 310   return *this;
 311 }
 312
 313 comp_cost
 314 comp_cost::operator-= (comp_cost cost)
 315 {
 316   *this = *this - cost;
 317   return *this;
 318 }
 319
 320 bool
 321 operator< (comp_cost cost1, comp_cost cost2)
 322 {
 323   if (cost1.cost == cost2.cost)
 324     return cost1.complexity < cost2.complexity;
 325
 326   return cost1.cost < cost2.cost;
 327 }
 328
 329 bool
 330 operator== (comp_cost cost1, comp_cost cost2)
 331 {
 332   return cost1.cost == cost2.cost
 333     && cost1.complexity == cost2.complexity;
 334 }
 335
 336 bool
 337 operator<= (comp_cost cost1, comp_cost cost2)
 338 {
 339   return cost1 < cost2 || cost1 == cost2;
 340 }
 341
 342 struct iv_inv_expr_ent;
 343
 344 /* The candidate - cost pair.  */
 345 struct cost_pair
 346 {
 347   struct iv_cand *cand; /* The candidate.  */
 348   comp_cost cost;       /* The cost.  */
 349   enum tree_code comp;  /* For iv elimination, the comparison.  */
 350   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 351                            preserved when representing iv_use with iv_cand.  */
 352   bitmap inv_exprs;     /* The list of newly created invariant expressions
 353                            when representing iv_use with iv_cand.  */
 354   tree value;           /* For final value elimination, the expression for
 355                            the final value of the iv.  For iv elimination,
 356                            the new bound to compare with.  */
 357 };
 358
 359 /* Use.  */
 360 struct iv_use
 361 {
 362   unsigned id;          /* The id of the use.  */
 363   unsigned group_id;    /* The group id the use belongs to.  */
 364   enum use_type type;   /* Type of the use.  */
 365   struct iv *iv;        /* The induction variable it is based on.  */
 366   gimple *stmt;         /* Statement in that it occurs.  */
 367   tree *op_p;           /* The place where it occurs.  */
 368
 369   tree addr_base;       /* Base address with const offset stripped.  */
 370   poly_uint64_pod addr_offset;
 371                         /* Const offset stripped from base address.  */
 372 };
 373
 374 /* Group of uses.  */
 375 struct iv_group
 376 {
 377   /* The id of the group.  */
 378   unsigned id;
 379   /* Uses of the group are of the same type.  */
 380   enum use_type type;
 381   /* The set of "related" IV candidates, plus the important ones.  */
 382   bitmap related_cands;
 383   /* Number of IV candidates in the cost_map.  */
 384   unsigned n_map_members;
 385   /* The costs wrto the iv candidates.  */
 386   struct cost_pair *cost_map;
 387   /* The selected candidate for the group.  */
 388   struct iv_cand *selected;
 389   /* Uses in the group.  */
 390   vec<struct iv_use *> vuses;
 391 };
 392
 393 /* The position where the iv is computed.  */
 394 enum iv_position
 395 {
 396   IP_NORMAL,            /* At the end, just before the exit condition.  */
 397   IP_END,               /* At the end of the latch block.  */
 398   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 399   IP_AFTER_USE,         /* Immediately after a specific use.  */
 400   IP_ORIGINAL           /* The original biv.  */
 401 };
 402
 403 /* The induction variable candidate.  */
 404 struct iv_cand
 405 {
 406   unsigned id;          /* The number of the candidate.  */
 407   bool important;       /* Whether this is an "important" candidate, i.e. such
 408                            that it should be considered by all uses.  */
 409   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 410   gimple *incremented_at;/* For original biv, the statement where it is
 411                            incremented.  */
 412   tree var_before;      /* The variable used for it before increment.  */
 413   tree var_after;       /* The variable used for it after increment.  */
 414   struct iv *iv;        /* The value of the candidate.  NULL for
 415                            "pseudocandidate" used to indicate the possibility
 416                            to replace the final value of an iv by direct
 417                            computation of the value.  */
 418   unsigned cost;        /* Cost of the candidate.  */
 419   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 420   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 421                               where it is incremented.  */
 422   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 423                            iv_cand.  */
 424   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 425                            hanlde it as a new invariant expression which will
 426                            be hoisted out of loop.  */
 427   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 428                            smaller type.  */
 429 };
 430
 431 /* Hashtable entry for common candidate derived from iv uses.  */
 432 struct iv_common_cand
 433 {
 434   tree base;
 435   tree step;
 436   /* IV uses from which this common candidate is derived.  */
 437   auto_vec<struct iv_use *> uses;
 438   hashval_t hash;
 439 };
 440
 441 /* Hashtable helpers.  */
 442
 443 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 444 {
 445   static inline hashval_t hash (const iv_common_cand *);
 446   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 447 };
 448
 449 /* Hash function for possible common candidates.  */
 450
 451 inline hashval_t
 452 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 453 {
 454   return ccand->hash;
 455 }
 456
 457 /* Hash table equality function for common candidates.  */
 458
 459 inline bool
 460 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 461                               const iv_common_cand *ccand2)
 462 {
 463   return (ccand1->hash == ccand2->hash
 464           && operand_equal_p (ccand1->base, ccand2->base, 0)
 465           && operand_equal_p (ccand1->step, ccand2->step, 0)
 466           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 467               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 468 }
 469
 470 /* Loop invariant expression hashtable entry.  */
 471
 472 struct iv_inv_expr_ent
 473 {
 474   /* Tree expression of the entry.  */
 475   tree expr;
 476   /* Unique indentifier.  */
 477   int id;
 478   /* Hash value.  */
 479   hashval_t hash;
 480 };
 481
 482 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 483
 484 static int
 485 sort_iv_inv_expr_ent (const void *a, const void *b)
 486 {
 487   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 488   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 489
 490   unsigned id1 = (*e1)->id;
 491   unsigned id2 = (*e2)->id;
 492
 493   if (id1 < id2)
 494     return -1;
 495   else if (id1 > id2)
 496     return 1;
 497   else
 498     return 0;
 499 }
 500
 501 /* Hashtable helpers.  */
 502
 503 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 504 {
 505   static inline hashval_t hash (const iv_inv_expr_ent *);
 506   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 507 };
 508
 509 /* Hash function for loop invariant expressions.  */
 510
 511 inline hashval_t
 512 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 513 {
 514   return expr->hash;
 515 }
 516
 517 /* Hash table equality function for expressions.  */
 518
 519 inline bool
 520 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 521                            const iv_inv_expr_ent *expr2)
 522 {
 523   return expr1->hash == expr2->hash
 524          && operand_equal_p (expr1->expr, expr2->expr, 0);
 525 }
 526
 527 struct ivopts_data
 528 {
 529   /* The currently optimized loop.  */
 530   struct loop *current_loop;
 531   source_location loop_loc;
 532
 533   /* Numbers of iterations for all exits of the current loop.  */
 534   hash_map<edge, tree_niter_desc *> *niters;
 535
 536   /* Number of registers used in it.  */
 537   unsigned regs_used;
 538
 539   /* The size of version_info array allocated.  */
 540   unsigned version_info_size;
 541
 542   /* The array of information for the ssa names.  */
 543   struct version_info *version_info;
 544
 545   /* The hashtable of loop invariant expressions created
 546      by ivopt.  */
 547   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 548
 549   /* The bitmap of indices in version_info whose value was changed.  */
 550   bitmap relevant;
 551
 552   /* The uses of induction variables.  */
 553   vec<iv_group *> vgroups;
 554
 555   /* The candidates.  */
 556   vec<iv_cand *> vcands;
 557
 558   /* A bitmap of important candidates.  */
 559   bitmap important_candidates;
 560
 561   /* Cache used by tree_to_aff_combination_expand.  */
 562   hash_map<tree, name_expansion *> *name_expansion_cache;
 563
 564   /* The hashtable of common candidates derived from iv uses.  */
 565   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 566
 567   /* The common candidates.  */
 568   vec<iv_common_cand *> iv_common_cands;
 569
 570   /* The maximum invariant variable id.  */
 571   unsigned max_inv_var_id;
 572
 573   /* The maximum invariant expression id.  */
 574   unsigned max_inv_expr_id;
 575
 576   /* Number of no_overflow BIVs which are not used in memory address.  */
 577   unsigned bivs_not_used_in_addr;
 578
 579   /* Obstack for iv structure.  */
 580   struct obstack iv_obstack;
 581
 582   /* Whether to consider just related and important candidates when replacing a
 583      use.  */
 584   bool consider_all_candidates;
 585
 586   /* Are we optimizing for speed?  */
 587   bool speed;
 588
 589   /* Whether the loop body includes any function calls.  */
 590   bool body_includes_call;
 591
 592   /* Whether the loop body can only be exited via single exit.  */
 593   bool loop_single_exit_p;
 594 };
 595
 596 /* An assignment of iv candidates to uses.  */
 597
 598 struct iv_ca
 599 {
 600   /* The number of uses covered by the assignment.  */
 601   unsigned upto;
 602
 603   /* Number of uses that cannot be expressed by the candidates in the set.  */
 604   unsigned bad_groups;
 605
 606   /* Candidate assigned to a use, together with the related costs.  */
 607   struct cost_pair **cand_for_group;
 608
 609   /* Number of times each candidate is used.  */
 610   unsigned *n_cand_uses;
 611
 612   /* The candidates used.  */
 613   bitmap cands;
 614
 615   /* The number of candidates in the set.  */
 616   unsigned n_cands;
 617
 618   /* The number of invariants needed, including both invariant variants and
 619      invariant expressions.  */
 620   unsigned n_invs;
 621
 622   /* Total cost of expressing uses.  */
 623   comp_cost cand_use_cost;
 624
 625   /* Total cost of candidates.  */
 626   unsigned cand_cost;
 627
 628   /* Number of times each invariant variable is used.  */
 629   unsigned *n_inv_var_uses;
 630
 631   /* Number of times each invariant expression is used.  */
 632   unsigned *n_inv_expr_uses;
 633
 634   /* Total cost of the assignment.  */
 635   comp_cost cost;
 636 };
 637
 638 /* Difference of two iv candidate assignments.  */
 639
 640 struct iv_ca_delta
 641 {
 642   /* Changed group.  */
 643   struct iv_group *group;
 644
 645   /* An old assignment (for rollback purposes).  */
 646   struct cost_pair *old_cp;
 647
 648   /* A new assignment.  */
 649   struct cost_pair *new_cp;
 650
 651   /* Next change in the list.  */
 652   struct iv_ca_delta *next;
 653 };
 654
 655 /* Bound on number of candidates below that all candidates are considered.  */
 656
 657 #define CONSIDER_ALL_CANDIDATES_BOUND \
 658   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 659
 660 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 661    optimizing such a loop would help, and it would take ages).  */
 662
 663 #define MAX_CONSIDERED_GROUPS \
 664   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 665
 666 /* If there are at most this number of ivs in the set, try removing unnecessary
 667    ivs from the set always.  */
 668
 669 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 670   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 671
 672 /* The list of trees for that the decl_rtl field must be reset is stored
 673    here.  */
 674
 675 static vec<tree> decl_rtl_to_reset;
 676
 677 static comp_cost force_expr_to_var_cost (tree, bool);
 678
 679 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 680
 681 edge
 682 single_dom_exit (struct loop *loop)
 683 {
 684   edge exit = single_exit (loop);
 685
 686   if (!exit)
 687     return NULL;
 688
 689   if (!just_once_each_iteration_p (loop, exit->src))
 690     return NULL;
 691
 692   return exit;
 693 }
 694
 695 /* Dumps information about the induction variable IV to FILE.  Don't dump
 696    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 697    preceding spaces indicated by INDENT_LEVEL.  */
 698
 699 void
 700 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 701 {
 702   const char *p;
 703   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 704
 705   if (indent_level > 4)
 706     indent_level = 4;
 707   p = spaces + 8 - (indent_level << 1);
 708
 709   fprintf (file, "%sIV struct:\n", p);
 710   if (iv->ssa_name && dump_name)
 711     {
 712       fprintf (file, "%s  SSA_NAME:\t", p);
 713       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 714       fprintf (file, "\n");
 715     }
 716
 717   fprintf (file, "%s  Type:\t", p);
 718   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 719   fprintf (file, "\n");
 720
 721   fprintf (file, "%s  Base:\t", p);
 722   print_generic_expr (file, iv->base, TDF_SLIM);
 723   fprintf (file, "\n");
 724
 725   fprintf (file, "%s  Step:\t", p);
 726   print_generic_expr (file, iv->step, TDF_SLIM);
 727   fprintf (file, "\n");
 728
 729   if (iv->base_object)
 730     {
 731       fprintf (file, "%s  Object:\t", p);
 732       print_generic_expr (file, iv->base_object, TDF_SLIM);
 733       fprintf (file, "\n");
 734     }
 735
 736   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 737
 738   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 739            p, iv->no_overflow ? "No-overflow" : "Overflow");
 740 }
 741
 742 /* Dumps information about the USE to FILE.  */
 743
 744 void
 745 dump_use (FILE *file, struct iv_use *use)
 746 {
 747   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 748   fprintf (file, "    At stmt:\t");
 749   print_gimple_stmt (file, use->stmt, 0);
 750   fprintf (file, "    At pos:\t");
 751   if (use->op_p)
 752     print_generic_expr (file, *use->op_p, TDF_SLIM);
 753   fprintf (file, "\n");
 754   dump_iv (file, use->iv, false, 2);
 755 }
 756
 757 /* Dumps information about the uses to FILE.  */
 758
 759 void
 760 dump_groups (FILE *file, struct ivopts_data *data)
 761 {
 762   unsigned i, j;
 763   struct iv_group *group;
 764
 765   for (i = 0; i < data->vgroups.length (); i++)
 766     {
 767       group = data->vgroups[i];
 768       fprintf (file, "Group %d:\n", group->id);
 769       if (group->type == USE_NONLINEAR_EXPR)
 770         fprintf (file, "  Type:\tGENERIC\n");
 771       else if (group->type == USE_ADDRESS)
 772         fprintf (file, "  Type:\tADDRESS\n");
 773       else
 774         {
 775           gcc_assert (group->type == USE_COMPARE);
 776           fprintf (file, "  Type:\tCOMPARE\n");
 777         }
 778       for (j = 0; j < group->vuses.length (); j++)
 779         dump_use (file, group->vuses[j]);
 780     }
 781 }
 782
 783 /* Dumps information about induction variable candidate CAND to FILE.  */
 784
 785 void
 786 dump_cand (FILE *file, struct iv_cand *cand)
 787 {
 788   struct iv *iv = cand->iv;
 789
 790   fprintf (file, "Candidate %d:\n", cand->id);
 791   if (cand->inv_vars)
 792     {
 793       fprintf (file, "  Depend on inv.vars: ");
 794       dump_bitmap (file, cand->inv_vars);
 795     }
 796   if (cand->inv_exprs)
 797     {
 798       fprintf (file, "  Depend on inv.exprs: ");
 799       dump_bitmap (file, cand->inv_exprs);
 800     }
 801
 802   if (cand->var_before)
 803     {
 804       fprintf (file, "  Var befor: ");
 805       print_generic_expr (file, cand->var_before, TDF_SLIM);
 806       fprintf (file, "\n");
 807     }
 808   if (cand->var_after)
 809     {
 810       fprintf (file, "  Var after: ");
 811       print_generic_expr (file, cand->var_after, TDF_SLIM);
 812       fprintf (file, "\n");
 813     }
 814
 815   switch (cand->pos)
 816     {
 817     case IP_NORMAL:
 818       fprintf (file, "  Incr POS: before exit test\n");
 819       break;
 820
 821     case IP_BEFORE_USE:
 822       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 823       break;
 824
 825     case IP_AFTER_USE:
 826       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 827       break;
 828
 829     case IP_END:
 830       fprintf (file, "  Incr POS: at end\n");
 831       break;
 832
 833     case IP_ORIGINAL:
 834       fprintf (file, "  Incr POS: orig biv\n");
 835       break;
 836     }
 837
 838   dump_iv (file, iv, false, 1);
 839 }
 840
 841 /* Returns the info for ssa version VER.  */
 842
 843 static inline struct version_info *
 844 ver_info (struct ivopts_data *data, unsigned ver)
 845 {
 846   return data->version_info + ver;
 847 }
 848
 849 /* Returns the info for ssa name NAME.  */
 850
 851 static inline struct version_info *
 852 name_info (struct ivopts_data *data, tree name)
 853 {
 854   return ver_info (data, SSA_NAME_VERSION (name));
 855 }
 856
 857 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 858    emitted in LOOP.  */
 859
 860 static bool
 861 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 862 {
 863   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 864
 865   gcc_assert (bb);
 866
 867   if (sbb == loop->latch)
 868     return true;
 869
 870   if (sbb != bb)
 871     return false;
 872
 873   return stmt == last_stmt (bb);
 874 }
 875
 876 /* Returns true if STMT if after the place where the original induction
 877    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 878    if the positions are identical.  */
 879
 880 static bool
 881 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 882 {
 883   basic_block cand_bb = gimple_bb (cand->incremented_at);
 884   basic_block stmt_bb = gimple_bb (stmt);
 885
 886   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 887     return false;
 888
 889   if (stmt_bb != cand_bb)
 890     return true;
 891
 892   if (true_if_equal
 893       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 894     return true;
 895   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 896 }
 897
 898 /* Returns true if STMT if after the place where the induction variable
 899    CAND is incremented in LOOP.  */
 900
 901 static bool
 902 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 903 {
 904   switch (cand->pos)
 905     {
 906     case IP_END:
 907       return false;
 908
 909     case IP_NORMAL:
 910       return stmt_after_ip_normal_pos (loop, stmt);
 911
 912     case IP_ORIGINAL:
 913     case IP_AFTER_USE:
 914       return stmt_after_inc_pos (cand, stmt, false);
 915
 916     case IP_BEFORE_USE:
 917       return stmt_after_inc_pos (cand, stmt, true);
 918
 919     default:
 920       gcc_unreachable ();
 921     }
 922 }
 923
 924 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 925
 926 static bool
 927 abnormal_ssa_name_p (tree exp)
 928 {
 929   if (!exp)
 930     return false;
 931
 932   if (TREE_CODE (exp) != SSA_NAME)
 933     return false;
 934
 935   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 936 }
 937
 938 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 939    abnormal phi node.  Callback for for_each_index.  */
 940
 941 static bool
 942 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 943                                   void *data ATTRIBUTE_UNUSED)
 944 {
 945   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 946     {
 947       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 948         return false;
 949       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 950         return false;
 951     }
 952
 953   return !abnormal_ssa_name_p (*index);
 954 }
 955
 956 /* Returns true if EXPR contains a ssa name that occurs in an
 957    abnormal phi node.  */
 958
 959 bool
 960 contains_abnormal_ssa_name_p (tree expr)
 961 {
 962   enum tree_code code;
 963   enum tree_code_class codeclass;
 964
 965   if (!expr)
 966     return false;
 967
 968   code = TREE_CODE (expr);
 969   codeclass = TREE_CODE_CLASS (code);
 970
 971   if (code == SSA_NAME)
 972     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 973
 974   if (code == INTEGER_CST
 975       || is_gimple_min_invariant (expr))
 976     return false;
 977
 978   if (code == ADDR_EXPR)
 979     return !for_each_index (&TREE_OPERAND (expr, 0),
 980                             idx_contains_abnormal_ssa_name_p,
 981                             NULL);
 982
 983   if (code == COND_EXPR)
 984     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 985       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 986       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 987
 988   switch (codeclass)
 989     {
 990     case tcc_binary:
 991     case tcc_comparison:
 992       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 993         return true;
 994
 995       /* Fallthru.  */
 996     case tcc_unary:
 997       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 998         return true;
 999
1000       break;
1001
1002     default:
1003       gcc_unreachable ();
1004     }
1005
1006   return false;
1007 }
1008
1009 /*  Returns the structure describing number of iterations determined from
1010     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1011
1012 static struct tree_niter_desc *
1013 niter_for_exit (struct ivopts_data *data, edge exit)
1014 {
1015   struct tree_niter_desc *desc;
1016   tree_niter_desc **slot;
1017
1018   if (!data->niters)
1019     {
1020       data->niters = new hash_map<edge, tree_niter_desc *>;
1021       slot = NULL;
1022     }
1023   else
1024     slot = data->niters->get (exit);
1025
1026   if (!slot)
1027     {
1028       /* Try to determine number of iterations.  We cannot safely work with ssa
1029          names that appear in phi nodes on abnormal edges, so that we do not
1030          create overlapping life ranges for them (PR 27283).  */
1031       desc = XNEW (struct tree_niter_desc);
1032       if (!number_of_iterations_exit (data->current_loop,
1033                                       exit, desc, true)
1034           || contains_abnormal_ssa_name_p (desc->niter))
1035         {
1036           XDELETE (desc);
1037           desc = NULL;
1038         }
1039       data->niters->put (exit, desc);
1040     }
1041   else
1042     desc = *slot;
1043
1044   return desc;
1045 }
1046
1047 /* Returns the structure describing number of iterations determined from
1048    single dominating exit of DATA->current_loop, or NULL if something
1049    goes wrong.  */
1050
1051 static struct tree_niter_desc *
1052 niter_for_single_dom_exit (struct ivopts_data *data)
1053 {
1054   edge exit = single_dom_exit (data->current_loop);
1055
1056   if (!exit)
1057     return NULL;
1058
1059   return niter_for_exit (data, exit);
1060 }
1061
1062 /* Initializes data structures used by the iv optimization pass, stored
1063    in DATA.  */
1064
1065 static void
1066 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1067 {
1068   data->version_info_size = 2 * num_ssa_names;
1069   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1070   data->relevant = BITMAP_ALLOC (NULL);
1071   data->important_candidates = BITMAP_ALLOC (NULL);
1072   data->max_inv_var_id = 0;
1073   data->max_inv_expr_id = 0;
1074   data->niters = NULL;
1075   data->vgroups.create (20);
1076   data->vcands.create (20);
1077   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1078   data->name_expansion_cache = NULL;
1079   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1080   data->iv_common_cands.create (20);
1081   decl_rtl_to_reset.create (20);
1082   gcc_obstack_init (&data->iv_obstack);
1083 }
1084
1085 /* Returns a memory object to that EXPR points.  In case we are able to
1086    determine that it does not point to any such object, NULL is returned.  */
1087
1088 static tree
1089 determine_base_object (tree expr)
1090 {
1091   enum tree_code code = TREE_CODE (expr);
1092   tree base, obj;
1093
1094   /* If this is a pointer casted to any type, we need to determine
1095      the base object for the pointer; so handle conversions before
1096      throwing away non-pointer expressions.  */
1097   if (CONVERT_EXPR_P (expr))
1098     return determine_base_object (TREE_OPERAND (expr, 0));
1099
1100   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1101     return NULL_TREE;
1102
1103   switch (code)
1104     {
1105     case INTEGER_CST:
1106       return NULL_TREE;
1107
1108     case ADDR_EXPR:
1109       obj = TREE_OPERAND (expr, 0);
1110       base = get_base_address (obj);
1111
1112       if (!base)
1113         return expr;
1114
1115       if (TREE_CODE (base) == MEM_REF)
1116         return determine_base_object (TREE_OPERAND (base, 0));
1117
1118       return fold_convert (ptr_type_node,
1119                            build_fold_addr_expr (base));
1120
1121     case POINTER_PLUS_EXPR:
1122       return determine_base_object (TREE_OPERAND (expr, 0));
1123
1124     case PLUS_EXPR:
1125     case MINUS_EXPR:
1126       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1127       gcc_unreachable ();
1128
1129     default:
1130       if (POLY_INT_CST_P (expr))
1131         return NULL_TREE;
1132       return fold_convert (ptr_type_node, expr);
1133     }
1134 }
1135
1136 /* Return true if address expression with non-DECL_P operand appears
1137    in EXPR.  */
1138
1139 static bool
1140 contain_complex_addr_expr (tree expr)
1141 {
1142   bool res = false;
1143
1144   STRIP_NOPS (expr);
1145   switch (TREE_CODE (expr))
1146     {
1147     case POINTER_PLUS_EXPR:
1148     case PLUS_EXPR:
1149     case MINUS_EXPR:
1150       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1151       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1152       break;
1153
1154     case ADDR_EXPR:
1155       return (!DECL_P (TREE_OPERAND (expr, 0)));
1156
1157     default:
1158       return false;
1159     }
1160
1161   return res;
1162 }
1163
1164 /* Allocates an induction variable with given initial value BASE and step STEP
1165    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1166
1167 static struct iv *
1168 alloc_iv (struct ivopts_data *data, tree base, tree step,
1169           bool no_overflow = false)
1170 {
1171   tree expr = base;
1172   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1173                                               sizeof (struct iv));
1174   gcc_assert (step != NULL_TREE);
1175
1176   /* Lower address expression in base except ones with DECL_P as operand.
1177      By doing this:
1178        1) More accurate cost can be computed for address expressions;
1179        2) Duplicate candidates won't be created for bases in different
1180           forms, like &a[0] and &a.  */
1181   STRIP_NOPS (expr);
1182   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1183       || contain_complex_addr_expr (expr))
1184     {
1185       aff_tree comb;
1186       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1187       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1188     }
1189
1190   iv->base = base;
1191   iv->base_object = determine_base_object (base);
1192   iv->step = step;
1193   iv->biv_p = false;
1194   iv->nonlin_use = NULL;
1195   iv->ssa_name = NULL_TREE;
1196   if (!no_overflow
1197        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1198                               base, step))
1199     no_overflow = true;
1200   iv->no_overflow = no_overflow;
1201   iv->have_address_use = false;
1202
1203   return iv;
1204 }
1205
1206 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1207    doesn't overflow.  */
1208
1209 static void
1210 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1211         bool no_overflow)
1212 {
1213   struct version_info *info = name_info (data, iv);
1214
1215   gcc_assert (!info->iv);
1216
1217   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1218   info->iv = alloc_iv (data, base, step, no_overflow);
1219   info->iv->ssa_name = iv;
1220 }
1221
1222 /* Finds induction variable declaration for VAR.  */
1223
1224 static struct iv *
1225 get_iv (struct ivopts_data *data, tree var)
1226 {
1227   basic_block bb;
1228   tree type = TREE_TYPE (var);
1229
1230   if (!POINTER_TYPE_P (type)
1231       && !INTEGRAL_TYPE_P (type))
1232     return NULL;
1233
1234   if (!name_info (data, var)->iv)
1235     {
1236       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1237
1238       if (!bb
1239           || !flow_bb_inside_loop_p (data->current_loop, bb))
1240         set_iv (data, var, var, build_int_cst (type, 0), true);
1241     }
1242
1243   return name_info (data, var)->iv;
1244 }
1245
1246 /* Return the first non-invariant ssa var found in EXPR.  */
1247
1248 static tree
1249 extract_single_var_from_expr (tree expr)
1250 {
1251   int i, n;
1252   tree tmp;
1253   enum tree_code code;
1254
1255   if (!expr || is_gimple_min_invariant (expr))
1256     return NULL;
1257
1258   code = TREE_CODE (expr);
1259   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1260     {
1261       n = TREE_OPERAND_LENGTH (expr);
1262       for (i = 0; i < n; i++)
1263         {
1264           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1265
1266           if (tmp)
1267             return tmp;
1268         }
1269     }
1270   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1271 }
1272
1273 /* Finds basic ivs.  */
1274
1275 static bool
1276 find_bivs (struct ivopts_data *data)
1277 {
1278   gphi *phi;
1279   affine_iv iv;
1280   tree step, type, base, stop;
1281   bool found = false;
1282   struct loop *loop = data->current_loop;
1283   gphi_iterator psi;
1284
1285   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1286     {
1287       phi = psi.phi ();
1288
1289       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1290         continue;
1291
1292       if (virtual_operand_p (PHI_RESULT (phi)))
1293         continue;
1294
1295       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1296         continue;
1297
1298       if (integer_zerop (iv.step))
1299         continue;
1300
1301       step = iv.step;
1302       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1303       /* Stop expanding iv base at the first ssa var referred by iv step.
1304          Ideally we should stop at any ssa var, because that's expensive
1305          and unusual to happen, we just do it on the first one.
1306
1307          See PR64705 for the rationale.  */
1308       stop = extract_single_var_from_expr (step);
1309       base = expand_simple_operations (base, stop);
1310       if (contains_abnormal_ssa_name_p (base)
1311           || contains_abnormal_ssa_name_p (step))
1312         continue;
1313
1314       type = TREE_TYPE (PHI_RESULT (phi));
1315       base = fold_convert (type, base);
1316       if (step)
1317         {
1318           if (POINTER_TYPE_P (type))
1319             step = convert_to_ptrofftype (step);
1320           else
1321             step = fold_convert (type, step);
1322         }
1323
1324       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1325       found = true;
1326     }
1327
1328   return found;
1329 }
1330
1331 /* Marks basic ivs.  */
1332
1333 static void
1334 mark_bivs (struct ivopts_data *data)
1335 {
1336   gphi *phi;
1337   gimple *def;
1338   tree var;
1339   struct iv *iv, *incr_iv;
1340   struct loop *loop = data->current_loop;
1341   basic_block incr_bb;
1342   gphi_iterator psi;
1343
1344   data->bivs_not_used_in_addr = 0;
1345   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1346     {
1347       phi = psi.phi ();
1348
1349       iv = get_iv (data, PHI_RESULT (phi));
1350       if (!iv)
1351         continue;
1352
1353       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1354       def = SSA_NAME_DEF_STMT (var);
1355       /* Don't mark iv peeled from other one as biv.  */
1356       if (def
1357           && gimple_code (def) == GIMPLE_PHI
1358           && gimple_bb (def) == loop->header)
1359         continue;
1360
1361       incr_iv = get_iv (data, var);
1362       if (!incr_iv)
1363         continue;
1364
1365       /* If the increment is in the subloop, ignore it.  */
1366       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1367       if (incr_bb->loop_father != data->current_loop
1368           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1369         continue;
1370
1371       iv->biv_p = true;
1372       incr_iv->biv_p = true;
1373       if (iv->no_overflow)
1374         data->bivs_not_used_in_addr++;
1375       if (incr_iv->no_overflow)
1376         data->bivs_not_used_in_addr++;
1377     }
1378 }
1379
1380 /* Checks whether STMT defines a linear induction variable and stores its
1381    parameters to IV.  */
1382
1383 static bool
1384 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1385 {
1386   tree lhs, stop;
1387   struct loop *loop = data->current_loop;
1388
1389   iv->base = NULL_TREE;
1390   iv->step = NULL_TREE;
1391
1392   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1393     return false;
1394
1395   lhs = gimple_assign_lhs (stmt);
1396   if (TREE_CODE (lhs) != SSA_NAME)
1397     return false;
1398
1399   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1400     return false;
1401
1402   /* Stop expanding iv base at the first ssa var referred by iv step.
1403      Ideally we should stop at any ssa var, because that's expensive
1404      and unusual to happen, we just do it on the first one.
1405
1406      See PR64705 for the rationale.  */
1407   stop = extract_single_var_from_expr (iv->step);
1408   iv->base = expand_simple_operations (iv->base, stop);
1409   if (contains_abnormal_ssa_name_p (iv->base)
1410       || contains_abnormal_ssa_name_p (iv->step))
1411     return false;
1412
1413   /* If STMT could throw, then do not consider STMT as defining a GIV.
1414      While this will suppress optimizations, we can not safely delete this
1415      GIV and associated statements, even if it appears it is not used.  */
1416   if (stmt_could_throw_p (stmt))
1417     return false;
1418
1419   return true;
1420 }
1421
1422 /* Finds general ivs in statement STMT.  */
1423
1424 static void
1425 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1426 {
1427   affine_iv iv;
1428
1429   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1430     return;
1431
1432   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1433 }
1434
1435 /* Finds general ivs in basic block BB.  */
1436
1437 static void
1438 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1439 {
1440   gimple_stmt_iterator bsi;
1441
1442   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1443     find_givs_in_stmt (data, gsi_stmt (bsi));
1444 }
1445
1446 /* Finds general ivs.  */
1447
1448 static void
1449 find_givs (struct ivopts_data *data)
1450 {
1451   struct loop *loop = data->current_loop;
1452   basic_block *body = get_loop_body_in_dom_order (loop);
1453   unsigned i;
1454
1455   for (i = 0; i < loop->num_nodes; i++)
1456     find_givs_in_bb (data, body[i]);
1457   free (body);
1458 }
1459
1460 /* For each ssa name defined in LOOP determines whether it is an induction
1461    variable and if so, its initial value and step.  */
1462
1463 static bool
1464 find_induction_variables (struct ivopts_data *data)
1465 {
1466   unsigned i;
1467   bitmap_iterator bi;
1468
1469   if (!find_bivs (data))
1470     return false;
1471
1472   find_givs (data);
1473   mark_bivs (data);
1474
1475   if (dump_file && (dump_flags & TDF_DETAILS))
1476     {
1477       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1478
1479       if (niter)
1480         {
1481           fprintf (dump_file, "  number of iterations ");
1482           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1483           if (!integer_zerop (niter->may_be_zero))
1484             {
1485               fprintf (dump_file, "; zero if ");
1486               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1487             }
1488           fprintf (dump_file, "\n");
1489         };
1490
1491       fprintf (dump_file, "\n<Induction Vars>:\n");
1492       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1493         {
1494           struct version_info *info = ver_info (data, i);
1495           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1496             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1497         }
1498     }
1499
1500   return true;
1501 }
1502
1503 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1504    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1505    is the const offset stripped from IV base; for other types use, both
1506    are zero by default.  */
1507
1508 static struct iv_use *
1509 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1510             gimple *stmt, enum use_type type, tree addr_base,
1511             poly_uint64 addr_offset)
1512 {
1513   struct iv_use *use = XCNEW (struct iv_use);
1514
1515   use->id = group->vuses.length ();
1516   use->group_id = group->id;
1517   use->type = type;
1518   use->iv = iv;
1519   use->stmt = stmt;
1520   use->op_p = use_p;
1521   use->addr_base = addr_base;
1522   use->addr_offset = addr_offset;
1523
1524   group->vuses.safe_push (use);
1525   return use;
1526 }
1527
1528 /* Checks whether OP is a loop-level invariant and if so, records it.
1529    NONLINEAR_USE is true if the invariant is used in a way we do not
1530    handle specially.  */
1531
1532 static void
1533 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1534 {
1535   basic_block bb;
1536   struct version_info *info;
1537
1538   if (TREE_CODE (op) != SSA_NAME
1539       || virtual_operand_p (op))
1540     return;
1541
1542   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1543   if (bb
1544       && flow_bb_inside_loop_p (data->current_loop, bb))
1545     return;
1546
1547   info = name_info (data, op);
1548   info->name = op;
1549   info->has_nonlin_use |= nonlinear_use;
1550   if (!info->inv_id)
1551     info->inv_id = ++data->max_inv_var_id;
1552   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1553 }
1554
1555 /* Record a group of TYPE.  */
1556
1557 static struct iv_group *
1558 record_group (struct ivopts_data *data, enum use_type type)
1559 {
1560   struct iv_group *group = XCNEW (struct iv_group);
1561
1562   group->id = data->vgroups.length ();
1563   group->type = type;
1564   group->related_cands = BITMAP_ALLOC (NULL);
1565   group->vuses.create (1);
1566
1567   data->vgroups.safe_push (group);
1568   return group;
1569 }
1570
1571 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1572    New group will be created if there is no existing group for the use.  */
1573
1574 static struct iv_use *
1575 record_group_use (struct ivopts_data *data, tree *use_p,
1576                   struct iv *iv, gimple *stmt, enum use_type type)
1577 {
1578   tree addr_base = NULL;
1579   struct iv_group *group = NULL;
1580   poly_uint64 addr_offset = 0;
1581
1582   /* Record non address type use in a new group.  */
1583   if (type == USE_ADDRESS && iv->base_object)
1584     {
1585       unsigned int i;
1586
1587       addr_base = strip_offset (iv->base, &addr_offset);
1588       for (i = 0; i < data->vgroups.length (); i++)
1589         {
1590           struct iv_use *use;
1591
1592           group = data->vgroups[i];
1593           use = group->vuses[0];
1594           if (use->type != USE_ADDRESS || !use->iv->base_object)
1595             continue;
1596
1597           /* Check if it has the same stripped base and step.  */
1598           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1599               && operand_equal_p (iv->step, use->iv->step, 0)
1600               && operand_equal_p (addr_base, use->addr_base, 0))
1601             break;
1602         }
1603       if (i == data->vgroups.length ())
1604         group = NULL;
1605     }
1606
1607   if (!group)
1608     group = record_group (data, type);
1609
1610   return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1611 }
1612
1613 /* Checks whether the use OP is interesting and if so, records it.  */
1614
1615 static struct iv_use *
1616 find_interesting_uses_op (struct ivopts_data *data, tree op)
1617 {
1618   struct iv *iv;
1619   gimple *stmt;
1620   struct iv_use *use;
1621
1622   if (TREE_CODE (op) != SSA_NAME)
1623     return NULL;
1624
1625   iv = get_iv (data, op);
1626   if (!iv)
1627     return NULL;
1628
1629   if (iv->nonlin_use)
1630     {
1631       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1632       return iv->nonlin_use;
1633     }
1634
1635   if (integer_zerop (iv->step))
1636     {
1637       record_invariant (data, op, true);
1638       return NULL;
1639     }
1640
1641   stmt = SSA_NAME_DEF_STMT (op);
1642   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1643
1644   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1645   iv->nonlin_use = use;
1646   return use;
1647 }
1648
1649 /* Indicate how compare type iv_use can be handled.  */
1650 enum comp_iv_rewrite
1651 {
1652   COMP_IV_NA,
1653   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1654   COMP_IV_EXPR,
1655   /* We may rewrite compare type iv_uses on both sides of comparison by
1656      expressing value of each iv_use.  */
1657   COMP_IV_EXPR_2,
1658   /* We may rewrite compare type iv_use by expressing value of the iv_use
1659      or by eliminating it with other iv_cand.  */
1660   COMP_IV_ELIM
1661 };
1662
1663 /* Given a condition in statement STMT, checks whether it is a compare
1664    of an induction variable and an invariant.  If this is the case,
1665    CONTROL_VAR is set to location of the iv, BOUND to the location of
1666    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1667    induction variable descriptions, and true is returned.  If this is not
1668    the case, CONTROL_VAR and BOUND are set to the arguments of the
1669    condition and false is returned.  */
1670
1671 static enum comp_iv_rewrite
1672 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1673                        tree **control_var, tree **bound,
1674                        struct iv **iv_var, struct iv **iv_bound)
1675 {
1676   /* The objects returned when COND has constant operands.  */
1677   static struct iv const_iv;
1678   static tree zero;
1679   tree *op0 = &zero, *op1 = &zero;
1680   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1681   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1682
1683   if (gimple_code (stmt) == GIMPLE_COND)
1684     {
1685       gcond *cond_stmt = as_a <gcond *> (stmt);
1686       op0 = gimple_cond_lhs_ptr (cond_stmt);
1687       op1 = gimple_cond_rhs_ptr (cond_stmt);
1688     }
1689   else
1690     {
1691       op0 = gimple_assign_rhs1_ptr (stmt);
1692       op1 = gimple_assign_rhs2_ptr (stmt);
1693     }
1694
1695   zero = integer_zero_node;
1696   const_iv.step = integer_zero_node;
1697
1698   if (TREE_CODE (*op0) == SSA_NAME)
1699     iv0 = get_iv (data, *op0);
1700   if (TREE_CODE (*op1) == SSA_NAME)
1701     iv1 = get_iv (data, *op1);
1702
1703   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1704   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1705     {
1706       rewrite_type = COMP_IV_EXPR_2;
1707       goto end;
1708     }
1709
1710   /* If none side of comparison is IV.  */
1711   if ((!iv0 || integer_zerop (iv0->step))
1712       && (!iv1 || integer_zerop (iv1->step)))
1713     goto end;
1714
1715   /* Control variable may be on the other side.  */
1716   if (!iv0 || integer_zerop (iv0->step))
1717     {
1718       std::swap (op0, op1);
1719       std::swap (iv0, iv1);
1720     }
1721   /* If one side is IV and the other side isn't loop invariant.  */
1722   if (!iv1)
1723     rewrite_type = COMP_IV_EXPR;
1724   /* If one side is IV and the other side is loop invariant.  */
1725   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1726     rewrite_type = COMP_IV_ELIM;
1727
1728 end:
1729   if (control_var)
1730     *control_var = op0;
1731   if (iv_var)
1732     *iv_var = iv0;
1733   if (bound)
1734     *bound = op1;
1735   if (iv_bound)
1736     *iv_bound = iv1;
1737
1738   return rewrite_type;
1739 }
1740
1741 /* Checks whether the condition in STMT is interesting and if so,
1742    records it.  */
1743
1744 static void
1745 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1746 {
1747   tree *var_p, *bound_p;
1748   struct iv *var_iv, *bound_iv;
1749   enum comp_iv_rewrite ret;
1750
1751   ret = extract_cond_operands (data, stmt,
1752                                &var_p, &bound_p, &var_iv, &bound_iv);
1753   if (ret == COMP_IV_NA)
1754     {
1755       find_interesting_uses_op (data, *var_p);
1756       find_interesting_uses_op (data, *bound_p);
1757       return;
1758     }
1759
1760   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE);
1761   /* Record compare type iv_use for iv on the other side of comparison.  */
1762   if (ret == COMP_IV_EXPR_2)
1763     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE);
1764 }
1765
1766 /* Returns the outermost loop EXPR is obviously invariant in
1767    relative to the loop LOOP, i.e. if all its operands are defined
1768    outside of the returned loop.  Returns NULL if EXPR is not
1769    even obviously invariant in LOOP.  */
1770
1771 struct loop *
1772 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1773 {
1774   basic_block def_bb;
1775   unsigned i, len;
1776
1777   if (is_gimple_min_invariant (expr))
1778     return current_loops->tree_root;
1779
1780   if (TREE_CODE (expr) == SSA_NAME)
1781     {
1782       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1783       if (def_bb)
1784         {
1785           if (flow_bb_inside_loop_p (loop, def_bb))
1786             return NULL;
1787           return superloop_at_depth (loop,
1788                                      loop_depth (def_bb->loop_father) + 1);
1789         }
1790
1791       return current_loops->tree_root;
1792     }
1793
1794   if (!EXPR_P (expr))
1795     return NULL;
1796
1797   unsigned maxdepth = 0;
1798   len = TREE_OPERAND_LENGTH (expr);
1799   for (i = 0; i < len; i++)
1800     {
1801       struct loop *ivloop;
1802       if (!TREE_OPERAND (expr, i))
1803         continue;
1804
1805       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1806       if (!ivloop)
1807         return NULL;
1808       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1809     }
1810
1811   return superloop_at_depth (loop, maxdepth);
1812 }
1813
1814 /* Returns true if expression EXPR is obviously invariant in LOOP,
1815    i.e. if all its operands are defined outside of the LOOP.  LOOP
1816    should not be the function body.  */
1817
1818 bool
1819 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1820 {
1821   basic_block def_bb;
1822   unsigned i, len;
1823
1824   gcc_assert (loop_depth (loop) > 0);
1825
1826   if (is_gimple_min_invariant (expr))
1827     return true;
1828
1829   if (TREE_CODE (expr) == SSA_NAME)
1830     {
1831       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1832       if (def_bb
1833           && flow_bb_inside_loop_p (loop, def_bb))
1834         return false;
1835
1836       return true;
1837     }
1838
1839   if (!EXPR_P (expr))
1840     return false;
1841
1842   len = TREE_OPERAND_LENGTH (expr);
1843   for (i = 0; i < len; i++)
1844     if (TREE_OPERAND (expr, i)
1845         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1846       return false;
1847
1848   return true;
1849 }
1850
1851 /* Given expression EXPR which computes inductive values with respect
1852    to loop recorded in DATA, this function returns biv from which EXPR
1853    is derived by tracing definition chains of ssa variables in EXPR.  */
1854
1855 static struct iv*
1856 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1857 {
1858   struct iv *iv;
1859   unsigned i, n;
1860   tree e2, e1;
1861   enum tree_code code;
1862   gimple *stmt;
1863
1864   if (expr == NULL_TREE)
1865     return NULL;
1866
1867   if (is_gimple_min_invariant (expr))
1868     return NULL;
1869
1870   code = TREE_CODE (expr);
1871   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1872     {
1873       n = TREE_OPERAND_LENGTH (expr);
1874       for (i = 0; i < n; i++)
1875         {
1876           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1877           if (iv)
1878             return iv;
1879         }
1880     }
1881
1882   /* Stop if it's not ssa name.  */
1883   if (code != SSA_NAME)
1884     return NULL;
1885
1886   iv = get_iv (data, expr);
1887   if (!iv || integer_zerop (iv->step))
1888     return NULL;
1889   else if (iv->biv_p)
1890     return iv;
1891
1892   stmt = SSA_NAME_DEF_STMT (expr);
1893   if (gphi *phi = dyn_cast <gphi *> (stmt))
1894     {
1895       ssa_op_iter iter;
1896       use_operand_p use_p;
1897       basic_block phi_bb = gimple_bb (phi);
1898
1899       /* Skip loop header PHI that doesn't define biv.  */
1900       if (phi_bb->loop_father == data->current_loop)
1901         return NULL;
1902
1903       if (virtual_operand_p (gimple_phi_result (phi)))
1904         return NULL;
1905
1906       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1907         {
1908           tree use = USE_FROM_PTR (use_p);
1909           iv = find_deriving_biv_for_expr (data, use);
1910           if (iv)
1911             return iv;
1912         }
1913       return NULL;
1914     }
1915   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1916     return NULL;
1917
1918   e1 = gimple_assign_rhs1 (stmt);
1919   code = gimple_assign_rhs_code (stmt);
1920   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1921     return find_deriving_biv_for_expr (data, e1);
1922
1923   switch (code)
1924     {
1925     case MULT_EXPR:
1926     case PLUS_EXPR:
1927     case MINUS_EXPR:
1928     case POINTER_PLUS_EXPR:
1929       /* Increments, decrements and multiplications by a constant
1930          are simple.  */
1931       e2 = gimple_assign_rhs2 (stmt);
1932       iv = find_deriving_biv_for_expr (data, e2);
1933       if (iv)
1934         return iv;
1935       gcc_fallthrough ();
1936
1937     CASE_CONVERT:
1938       /* Casts are simple.  */
1939       return find_deriving_biv_for_expr (data, e1);
1940
1941     default:
1942       break;
1943     }
1944
1945   return NULL;
1946 }
1947
1948 /* Record BIV, its predecessor and successor that they are used in
1949    address type uses.  */
1950
1951 static void
1952 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1953 {
1954   unsigned i;
1955   tree type, base_1, base_2;
1956   bitmap_iterator bi;
1957
1958   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1959       || biv->have_address_use || !biv->no_overflow)
1960     return;
1961
1962   type = TREE_TYPE (biv->base);
1963   if (!INTEGRAL_TYPE_P (type))
1964     return;
1965
1966   biv->have_address_use = true;
1967   data->bivs_not_used_in_addr--;
1968   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1969   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1970     {
1971       struct iv *iv = ver_info (data, i)->iv;
1972
1973       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1974           || iv->have_address_use || !iv->no_overflow)
1975         continue;
1976
1977       if (type != TREE_TYPE (iv->base)
1978           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1979         continue;
1980
1981       if (!operand_equal_p (biv->step, iv->step, 0))
1982         continue;
1983
1984       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1985       if (operand_equal_p (base_1, iv->base, 0)
1986           || operand_equal_p (base_2, biv->base, 0))
1987         {
1988           iv->have_address_use = true;
1989           data->bivs_not_used_in_addr--;
1990         }
1991     }
1992 }
1993
1994 /* Cumulates the steps of indices into DATA and replaces their values with the
1995    initial ones.  Returns false when the value of the index cannot be determined.
1996    Callback for for_each_index.  */
1997
1998 struct ifs_ivopts_data
1999 {
2000   struct ivopts_data *ivopts_data;
2001   gimple *stmt;
2002   tree step;
2003 };
2004
2005 static bool
2006 idx_find_step (tree base, tree *idx, void *data)
2007 {
2008   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2009   struct iv *iv;
2010   bool use_overflow_semantics = false;
2011   tree step, iv_base, iv_step, lbound, off;
2012   struct loop *loop = dta->ivopts_data->current_loop;
2013
2014   /* If base is a component ref, require that the offset of the reference
2015      be invariant.  */
2016   if (TREE_CODE (base) == COMPONENT_REF)
2017     {
2018       off = component_ref_field_offset (base);
2019       return expr_invariant_in_loop_p (loop, off);
2020     }
2021
2022   /* If base is array, first check whether we will be able to move the
2023      reference out of the loop (in order to take its address in strength
2024      reduction).  In order for this to work we need both lower bound
2025      and step to be loop invariants.  */
2026   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2027     {
2028       /* Moreover, for a range, the size needs to be invariant as well.  */
2029       if (TREE_CODE (base) == ARRAY_RANGE_REF
2030           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2031         return false;
2032
2033       step = array_ref_element_size (base);
2034       lbound = array_ref_low_bound (base);
2035
2036       if (!expr_invariant_in_loop_p (loop, step)
2037           || !expr_invariant_in_loop_p (loop, lbound))
2038         return false;
2039     }
2040
2041   if (TREE_CODE (*idx) != SSA_NAME)
2042     return true;
2043
2044   iv = get_iv (dta->ivopts_data, *idx);
2045   if (!iv)
2046     return false;
2047
2048   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2049           *&x[0], which is not folded and does not trigger the
2050           ARRAY_REF path below.  */
2051   *idx = iv->base;
2052
2053   if (integer_zerop (iv->step))
2054     return true;
2055
2056   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2057     {
2058       step = array_ref_element_size (base);
2059
2060       /* We only handle addresses whose step is an integer constant.  */
2061       if (TREE_CODE (step) != INTEGER_CST)
2062         return false;
2063     }
2064   else
2065     /* The step for pointer arithmetics already is 1 byte.  */
2066     step = size_one_node;
2067
2068   iv_base = iv->base;
2069   iv_step = iv->step;
2070   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2071     use_overflow_semantics = true;
2072
2073   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2074                             sizetype, &iv_base, &iv_step, dta->stmt,
2075                             use_overflow_semantics))
2076     {
2077       /* The index might wrap.  */
2078       return false;
2079     }
2080
2081   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2082   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2083
2084   if (dta->ivopts_data->bivs_not_used_in_addr)
2085     {
2086       if (!iv->biv_p)
2087         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2088
2089       record_biv_for_address_use (dta->ivopts_data, iv);
2090     }
2091   return true;
2092 }
2093
2094 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2095    object is passed to it in DATA.  */
2096
2097 static bool
2098 idx_record_use (tree base, tree *idx,
2099                 void *vdata)
2100 {
2101   struct ivopts_data *data = (struct ivopts_data *) vdata;
2102   find_interesting_uses_op (data, *idx);
2103   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2104     {
2105       find_interesting_uses_op (data, array_ref_element_size (base));
2106       find_interesting_uses_op (data, array_ref_low_bound (base));
2107     }
2108   return true;
2109 }
2110
2111 /* If we can prove that TOP = cst * BOT for some constant cst,
2112    store cst to MUL and return true.  Otherwise return false.
2113    The returned value is always sign-extended, regardless of the
2114    signedness of TOP and BOT.  */
2115
2116 static bool
2117 constant_multiple_of (tree top, tree bot, widest_int *mul)
2118 {
2119   tree mby;
2120   enum tree_code code;
2121   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2122   widest_int res, p0, p1;
2123
2124   STRIP_NOPS (top);
2125   STRIP_NOPS (bot);
2126
2127   if (operand_equal_p (top, bot, 0))
2128     {
2129       *mul = 1;
2130       return true;
2131     }
2132
2133   code = TREE_CODE (top);
2134   switch (code)
2135     {
2136     case MULT_EXPR:
2137       mby = TREE_OPERAND (top, 1);
2138       if (TREE_CODE (mby) != INTEGER_CST)
2139         return false;
2140
2141       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2142         return false;
2143
2144       *mul = wi::sext (res * wi::to_widest (mby), precision);
2145       return true;
2146
2147     case PLUS_EXPR:
2148     case MINUS_EXPR:
2149       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2150           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2151         return false;
2152
2153       if (code == MINUS_EXPR)
2154         p1 = -p1;
2155       *mul = wi::sext (p0 + p1, precision);
2156       return true;
2157
2158     case INTEGER_CST:
2159       if (TREE_CODE (bot) != INTEGER_CST)
2160         return false;
2161
2162       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2163       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2164       if (p1 == 0)
2165         return false;
2166       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2167       return res == 0;
2168
2169     default:
2170       if (POLY_INT_CST_P (top)
2171           && POLY_INT_CST_P (bot)
2172           && constant_multiple_p (wi::to_poly_widest (top),
2173                                   wi::to_poly_widest (bot), mul))
2174         return true;
2175
2176       return false;
2177     }
2178 }
2179
2180 /* Return true if memory reference REF with step STEP may be unaligned.  */
2181
2182 static bool
2183 may_be_unaligned_p (tree ref, tree step)
2184 {
2185   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2186      thus they are not misaligned.  */
2187   if (TREE_CODE (ref) == TARGET_MEM_REF)
2188     return false;
2189
2190   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2191   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2192     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2193
2194   unsigned HOST_WIDE_INT bitpos;
2195   unsigned int ref_align;
2196   get_object_alignment_1 (ref, &ref_align, &bitpos);
2197   if (ref_align < align
2198       || (bitpos % align) != 0
2199       || (bitpos % BITS_PER_UNIT) != 0)
2200     return true;
2201
2202   unsigned int trailing_zeros = tree_ctz (step);
2203   if (trailing_zeros < HOST_BITS_PER_INT
2204       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2205     return true;
2206
2207   return false;
2208 }
2209
2210 /* Return true if EXPR may be non-addressable.   */
2211
2212 bool
2213 may_be_nonaddressable_p (tree expr)
2214 {
2215   switch (TREE_CODE (expr))
2216     {
2217     case TARGET_MEM_REF:
2218       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2219          target, thus they are always addressable.  */
2220       return false;
2221
2222     case MEM_REF:
2223       /* Likewise for MEM_REFs, modulo the storage order.  */
2224       return REF_REVERSE_STORAGE_ORDER (expr);
2225
2226     case BIT_FIELD_REF:
2227       if (REF_REVERSE_STORAGE_ORDER (expr))
2228         return true;
2229       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2230
2231     case COMPONENT_REF:
2232       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2233         return true;
2234       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2235              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2236
2237     case ARRAY_REF:
2238     case ARRAY_RANGE_REF:
2239       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2240         return true;
2241       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2242
2243     case VIEW_CONVERT_EXPR:
2244       /* This kind of view-conversions may wrap non-addressable objects
2245          and make them look addressable.  After some processing the
2246          non-addressability may be uncovered again, causing ADDR_EXPRs
2247          of inappropriate objects to be built.  */
2248       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2249           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2250         return true;
2251       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2252
2253     CASE_CONVERT:
2254       return true;
2255
2256     default:
2257       break;
2258     }
2259
2260   return false;
2261 }
2262
2263 /* Finds addresses in *OP_P inside STMT.  */
2264
2265 static void
2266 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2267                                tree *op_p)
2268 {
2269   tree base = *op_p, step = size_zero_node;
2270   struct iv *civ;
2271   struct ifs_ivopts_data ifs_ivopts_data;
2272
2273   /* Do not play with volatile memory references.  A bit too conservative,
2274      perhaps, but safe.  */
2275   if (gimple_has_volatile_ops (stmt))
2276     goto fail;
2277
2278   /* Ignore bitfields for now.  Not really something terribly complicated
2279      to handle.  TODO.  */
2280   if (TREE_CODE (base) == BIT_FIELD_REF)
2281     goto fail;
2282
2283   base = unshare_expr (base);
2284
2285   if (TREE_CODE (base) == TARGET_MEM_REF)
2286     {
2287       tree type = build_pointer_type (TREE_TYPE (base));
2288       tree astep;
2289
2290       if (TMR_BASE (base)
2291           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2292         {
2293           civ = get_iv (data, TMR_BASE (base));
2294           if (!civ)
2295             goto fail;
2296
2297           TMR_BASE (base) = civ->base;
2298           step = civ->step;
2299         }
2300       if (TMR_INDEX2 (base)
2301           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2302         {
2303           civ = get_iv (data, TMR_INDEX2 (base));
2304           if (!civ)
2305             goto fail;
2306
2307           TMR_INDEX2 (base) = civ->base;
2308           step = civ->step;
2309         }
2310       if (TMR_INDEX (base)
2311           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2312         {
2313           civ = get_iv (data, TMR_INDEX (base));
2314           if (!civ)
2315             goto fail;
2316
2317           TMR_INDEX (base) = civ->base;
2318           astep = civ->step;
2319
2320           if (astep)
2321             {
2322               if (TMR_STEP (base))
2323                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2324
2325               step = fold_build2 (PLUS_EXPR, type, step, astep);
2326             }
2327         }
2328
2329       if (integer_zerop (step))
2330         goto fail;
2331       base = tree_mem_ref_addr (type, base);
2332     }
2333   else
2334     {
2335       ifs_ivopts_data.ivopts_data = data;
2336       ifs_ivopts_data.stmt = stmt;
2337       ifs_ivopts_data.step = size_zero_node;
2338       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2339           || integer_zerop (ifs_ivopts_data.step))
2340         goto fail;
2341       step = ifs_ivopts_data.step;
2342
2343       /* Check that the base expression is addressable.  This needs
2344          to be done after substituting bases of IVs into it.  */
2345       if (may_be_nonaddressable_p (base))
2346         goto fail;
2347
2348       /* Moreover, on strict alignment platforms, check that it is
2349          sufficiently aligned.  */
2350       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2351         goto fail;
2352
2353       base = build_fold_addr_expr (base);
2354
2355       /* Substituting bases of IVs into the base expression might
2356          have caused folding opportunities.  */
2357       if (TREE_CODE (base) == ADDR_EXPR)
2358         {
2359           tree *ref = &TREE_OPERAND (base, 0);
2360           while (handled_component_p (*ref))
2361             ref = &TREE_OPERAND (*ref, 0);
2362           if (TREE_CODE (*ref) == MEM_REF)
2363             {
2364               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2365                                       TREE_OPERAND (*ref, 0),
2366                                       TREE_OPERAND (*ref, 1));
2367               if (tem)
2368                 *ref = tem;
2369             }
2370         }
2371     }
2372
2373   civ = alloc_iv (data, base, step);
2374   /* Fail if base object of this memory reference is unknown.  */
2375   if (civ->base_object == NULL_TREE)
2376     goto fail;
2377
2378   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2379   return;
2380
2381 fail:
2382   for_each_index (op_p, idx_record_use, data);
2383 }
2384
2385 /* Finds and records invariants used in STMT.  */
2386
2387 static void
2388 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2389 {
2390   ssa_op_iter iter;
2391   use_operand_p use_p;
2392   tree op;
2393
2394   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2395     {
2396       op = USE_FROM_PTR (use_p);
2397       record_invariant (data, op, false);
2398     }
2399 }
2400
2401 /* Finds interesting uses of induction variables in the statement STMT.  */
2402
2403 static void
2404 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2405 {
2406   struct iv *iv;
2407   tree op, *lhs, *rhs;
2408   ssa_op_iter iter;
2409   use_operand_p use_p;
2410   enum tree_code code;
2411
2412   find_invariants_stmt (data, stmt);
2413
2414   if (gimple_code (stmt) == GIMPLE_COND)
2415     {
2416       find_interesting_uses_cond (data, stmt);
2417       return;
2418     }
2419
2420   if (is_gimple_assign (stmt))
2421     {
2422       lhs = gimple_assign_lhs_ptr (stmt);
2423       rhs = gimple_assign_rhs1_ptr (stmt);
2424
2425       if (TREE_CODE (*lhs) == SSA_NAME)
2426         {
2427           /* If the statement defines an induction variable, the uses are not
2428              interesting by themselves.  */
2429
2430           iv = get_iv (data, *lhs);
2431
2432           if (iv && !integer_zerop (iv->step))
2433             return;
2434         }
2435
2436       code = gimple_assign_rhs_code (stmt);
2437       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2438           && (REFERENCE_CLASS_P (*rhs)
2439               || is_gimple_val (*rhs)))
2440         {
2441           if (REFERENCE_CLASS_P (*rhs))
2442             find_interesting_uses_address (data, stmt, rhs);
2443           else
2444             find_interesting_uses_op (data, *rhs);
2445
2446           if (REFERENCE_CLASS_P (*lhs))
2447             find_interesting_uses_address (data, stmt, lhs);
2448           return;
2449         }
2450       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2451         {
2452           find_interesting_uses_cond (data, stmt);
2453           return;
2454         }
2455
2456       /* TODO -- we should also handle address uses of type
2457
2458          memory = call (whatever);
2459
2460          and
2461
2462          call (memory).  */
2463     }
2464
2465   if (gimple_code (stmt) == GIMPLE_PHI
2466       && gimple_bb (stmt) == data->current_loop->header)
2467     {
2468       iv = get_iv (data, PHI_RESULT (stmt));
2469
2470       if (iv && !integer_zerop (iv->step))
2471         return;
2472     }
2473
2474   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2475     {
2476       op = USE_FROM_PTR (use_p);
2477
2478       if (TREE_CODE (op) != SSA_NAME)
2479         continue;
2480
2481       iv = get_iv (data, op);
2482       if (!iv)
2483         continue;
2484
2485       find_interesting_uses_op (data, op);
2486     }
2487 }
2488
2489 /* Finds interesting uses of induction variables outside of loops
2490    on loop exit edge EXIT.  */
2491
2492 static void
2493 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2494 {
2495   gphi *phi;
2496   gphi_iterator psi;
2497   tree def;
2498
2499   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2500     {
2501       phi = psi.phi ();
2502       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2503       if (!virtual_operand_p (def))
2504         find_interesting_uses_op (data, def);
2505     }
2506 }
2507
2508 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2509    mode for memory reference represented by USE.  */
2510
2511 static GTY (()) vec<rtx, va_gc> *addr_list;
2512
2513 static bool
2514 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2515 {
2516   rtx reg, addr;
2517   unsigned list_index;
2518   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2519   machine_mode addr_mode, mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2520
2521   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2522   if (list_index >= vec_safe_length (addr_list))
2523     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2524
2525   addr = (*addr_list)[list_index];
2526   if (!addr)
2527     {
2528       addr_mode = targetm.addr_space.address_mode (as);
2529       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2530       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2531       (*addr_list)[list_index] = addr;
2532     }
2533   else
2534     addr_mode = GET_MODE (addr);
2535
2536   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2537   return (memory_address_addr_space_p (mem_mode, addr, as));
2538 }
2539
2540 /* Comparison function to sort group in ascending order of addr_offset.  */
2541
2542 static int
2543 group_compare_offset (const void *a, const void *b)
2544 {
2545   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2546   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2547
2548   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2549 }
2550
2551 /* Check if small groups should be split.  Return true if no group
2552    contains more than two uses with distinct addr_offsets.  Return
2553    false otherwise.  We want to split such groups because:
2554
2555      1) Small groups don't have much benefit and may interfer with
2556         general candidate selection.
2557      2) Size for problem with only small groups is usually small and
2558         general algorithm can handle it well.
2559
2560    TODO -- Above claim may not hold when we want to merge memory
2561    accesses with conseuctive addresses.  */
2562
2563 static bool
2564 split_small_address_groups_p (struct ivopts_data *data)
2565 {
2566   unsigned int i, j, distinct = 1;
2567   struct iv_use *pre;
2568   struct iv_group *group;
2569
2570   for (i = 0; i < data->vgroups.length (); i++)
2571     {
2572       group = data->vgroups[i];
2573       if (group->vuses.length () == 1)
2574         continue;
2575
2576       gcc_assert (group->type == USE_ADDRESS);
2577       if (group->vuses.length () == 2)
2578         {
2579           if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2580                                       group->vuses[1]->addr_offset) > 0)
2581             std::swap (group->vuses[0], group->vuses[1]);
2582         }
2583       else
2584         group->vuses.qsort (group_compare_offset);
2585
2586       if (distinct > 2)
2587         continue;
2588
2589       distinct = 1;
2590       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2591         {
2592           if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2593             {
2594               pre = group->vuses[j];
2595               distinct++;
2596             }
2597
2598           if (distinct > 2)
2599             break;
2600         }
2601     }
2602
2603   return (distinct <= 2);
2604 }
2605
2606 /* For each group of address type uses, this function further groups
2607    these uses according to the maximum offset supported by target's
2608    [base + offset] addressing mode.  */
2609
2610 static void
2611 split_address_groups (struct ivopts_data *data)
2612 {
2613   unsigned int i, j;
2614   /* Always split group.  */
2615   bool split_p = split_small_address_groups_p (data);
2616
2617   for (i = 0; i < data->vgroups.length (); i++)
2618     {
2619       struct iv_group *new_group = NULL;
2620       struct iv_group *group = data->vgroups[i];
2621       struct iv_use *use = group->vuses[0];
2622
2623       use->id = 0;
2624       use->group_id = group->id;
2625       if (group->vuses.length () == 1)
2626         continue;
2627
2628       gcc_assert (group->type == USE_ADDRESS);
2629
2630       for (j = 1; j < group->vuses.length ();)
2631         {
2632           struct iv_use *next = group->vuses[j];
2633           poly_int64 offset = next->addr_offset - use->addr_offset;
2634
2635           /* Split group if aksed to, or the offset against the first
2636              use can't fit in offset part of addressing mode.  IV uses
2637              having the same offset are still kept in one group.  */
2638           if (maybe_ne (offset, 0)
2639               && (split_p || !addr_offset_valid_p (use, offset)))
2640             {
2641               if (!new_group)
2642                 new_group = record_group (data, group->type);
2643               group->vuses.ordered_remove (j);
2644               new_group->vuses.safe_push (next);
2645               continue;
2646             }
2647
2648           next->id = j;
2649           next->group_id = group->id;
2650           j++;
2651         }
2652     }
2653 }
2654
2655 /* Finds uses of the induction variables that are interesting.  */
2656
2657 static void
2658 find_interesting_uses (struct ivopts_data *data)
2659 {
2660   basic_block bb;
2661   gimple_stmt_iterator bsi;
2662   basic_block *body = get_loop_body (data->current_loop);
2663   unsigned i;
2664   edge e;
2665
2666   for (i = 0; i < data->current_loop->num_nodes; i++)
2667     {
2668       edge_iterator ei;
2669       bb = body[i];
2670
2671       FOR_EACH_EDGE (e, ei, bb->succs)
2672         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2673             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2674           find_interesting_uses_outside (data, e);
2675
2676       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2677         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2678       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2679         if (!is_gimple_debug (gsi_stmt (bsi)))
2680           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2681     }
2682   free (body);
2683
2684   split_address_groups (data);
2685
2686   if (dump_file && (dump_flags & TDF_DETAILS))
2687     {
2688       fprintf (dump_file, "\n<IV Groups>:\n");
2689       dump_groups (dump_file, data);
2690       fprintf (dump_file, "\n");
2691     }
2692 }
2693
2694 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2695    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2696    we are at the top-level of the processed address.  */
2697
2698 static tree
2699 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2700                 poly_int64 *offset)
2701 {
2702   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2703   enum tree_code code;
2704   tree type, orig_type = TREE_TYPE (expr);
2705   poly_int64 off0, off1;
2706   HOST_WIDE_INT st;
2707   tree orig_expr = expr;
2708
2709   STRIP_NOPS (expr);
2710
2711   type = TREE_TYPE (expr);
2712   code = TREE_CODE (expr);
2713   *offset = 0;
2714
2715   switch (code)
2716     {
2717     case POINTER_PLUS_EXPR:
2718     case PLUS_EXPR:
2719     case MINUS_EXPR:
2720       op0 = TREE_OPERAND (expr, 0);
2721       op1 = TREE_OPERAND (expr, 1);
2722
2723       op0 = strip_offset_1 (op0, false, false, &off0);
2724       op1 = strip_offset_1 (op1, false, false, &off1);
2725
2726       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2727       if (op0 == TREE_OPERAND (expr, 0)
2728           && op1 == TREE_OPERAND (expr, 1))
2729         return orig_expr;
2730
2731       if (integer_zerop (op1))
2732         expr = op0;
2733       else if (integer_zerop (op0))
2734         {
2735           if (code == MINUS_EXPR)
2736             expr = fold_build1 (NEGATE_EXPR, type, op1);
2737           else
2738             expr = op1;
2739         }
2740       else
2741         expr = fold_build2 (code, type, op0, op1);
2742
2743       return fold_convert (orig_type, expr);
2744
2745     case MULT_EXPR:
2746       op1 = TREE_OPERAND (expr, 1);
2747       if (!cst_and_fits_in_hwi (op1))
2748         return orig_expr;
2749
2750       op0 = TREE_OPERAND (expr, 0);
2751       op0 = strip_offset_1 (op0, false, false, &off0);
2752       if (op0 == TREE_OPERAND (expr, 0))
2753         return orig_expr;
2754
2755       *offset = off0 * int_cst_value (op1);
2756       if (integer_zerop (op0))
2757         expr = op0;
2758       else
2759         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2760
2761       return fold_convert (orig_type, expr);
2762
2763     case ARRAY_REF:
2764     case ARRAY_RANGE_REF:
2765       if (!inside_addr)
2766         return orig_expr;
2767
2768       step = array_ref_element_size (expr);
2769       if (!cst_and_fits_in_hwi (step))
2770         break;
2771
2772       st = int_cst_value (step);
2773       op1 = TREE_OPERAND (expr, 1);
2774       op1 = strip_offset_1 (op1, false, false, &off1);
2775       *offset = off1 * st;
2776
2777       if (top_compref
2778           && integer_zerop (op1))
2779         {
2780           /* Strip the component reference completely.  */
2781           op0 = TREE_OPERAND (expr, 0);
2782           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2783           *offset += off0;
2784           return op0;
2785         }
2786       break;
2787
2788     case COMPONENT_REF:
2789       {
2790         tree field;
2791
2792         if (!inside_addr)
2793           return orig_expr;
2794
2795         tmp = component_ref_field_offset (expr);
2796         field = TREE_OPERAND (expr, 1);
2797         if (top_compref
2798             && cst_and_fits_in_hwi (tmp)
2799             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2800           {
2801             HOST_WIDE_INT boffset, abs_off;
2802
2803             /* Strip the component reference completely.  */
2804             op0 = TREE_OPERAND (expr, 0);
2805             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2806             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2807             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2808             if (boffset < 0)
2809               abs_off = -abs_off;
2810
2811             *offset = off0 + int_cst_value (tmp) + abs_off;
2812             return op0;
2813           }
2814       }
2815       break;
2816
2817     case ADDR_EXPR:
2818       op0 = TREE_OPERAND (expr, 0);
2819       op0 = strip_offset_1 (op0, true, true, &off0);
2820       *offset += off0;
2821
2822       if (op0 == TREE_OPERAND (expr, 0))
2823         return orig_expr;
2824
2825       expr = build_fold_addr_expr (op0);
2826       return fold_convert (orig_type, expr);
2827
2828     case MEM_REF:
2829       /* ???  Offset operand?  */
2830       inside_addr = false;
2831       break;
2832
2833     default:
2834       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2835         return build_int_cst (orig_type, 0);
2836       return orig_expr;
2837     }
2838
2839   /* Default handling of expressions for that we want to recurse into
2840      the first operand.  */
2841   op0 = TREE_OPERAND (expr, 0);
2842   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2843   *offset += off0;
2844
2845   if (op0 == TREE_OPERAND (expr, 0)
2846       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2847     return orig_expr;
2848
2849   expr = copy_node (expr);
2850   TREE_OPERAND (expr, 0) = op0;
2851   if (op1)
2852     TREE_OPERAND (expr, 1) = op1;
2853
2854   /* Inside address, we might strip the top level component references,
2855      thus changing type of the expression.  Handling of ADDR_EXPR
2856      will fix that.  */
2857   expr = fold_convert (orig_type, expr);
2858
2859   return expr;
2860 }
2861
2862 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2863
2864 tree
2865 strip_offset (tree expr, poly_uint64_pod *offset)
2866 {
2867   poly_int64 off;
2868   tree core = strip_offset_1 (expr, false, false, &off);
2869   *offset = off;
2870   return core;
2871 }
2872
2873 /* Returns variant of TYPE that can be used as base for different uses.
2874    We return unsigned type with the same precision, which avoids problems
2875    with overflows.  */
2876
2877 static tree
2878 generic_type_for (tree type)
2879 {
2880   if (POINTER_TYPE_P (type))
2881     return unsigned_type_for (type);
2882
2883   if (TYPE_UNSIGNED (type))
2884     return type;
2885
2886   return unsigned_type_for (type);
2887 }
2888
2889 /* Private data for walk_tree.  */
2890
2891 struct walk_tree_data
2892 {
2893   bitmap *inv_vars;
2894   struct ivopts_data *idata;
2895 };
2896
2897 /* Callback function for walk_tree, it records invariants and symbol
2898    reference in *EXPR_P.  DATA is the structure storing result info.  */
2899
2900 static tree
2901 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2902 {
2903   tree op = *expr_p;
2904   struct version_info *info;
2905   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2906
2907   if (TREE_CODE (op) != SSA_NAME)
2908     return NULL_TREE;
2909
2910   info = name_info (wdata->idata, op);
2911   /* Because we expand simple operations when finding IVs, loop invariant
2912      variable that isn't referred by the original loop could be used now.
2913      Record such invariant variables here.  */
2914   if (!info->iv)
2915     {
2916       struct ivopts_data *idata = wdata->idata;
2917       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2918
2919       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2920         {
2921           set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
2922           record_invariant (idata, op, false);
2923         }
2924     }
2925   if (!info->inv_id || info->has_nonlin_use)
2926     return NULL_TREE;
2927
2928   if (!*wdata->inv_vars)
2929     *wdata->inv_vars = BITMAP_ALLOC (NULL);
2930   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
2931
2932   return NULL_TREE;
2933 }
2934
2935 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
2936    store it.  */
2937
2938 static inline void
2939 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
2940 {
2941   struct walk_tree_data wdata;
2942
2943   if (!inv_vars)
2944     return;
2945
2946   wdata.idata = data;
2947   wdata.inv_vars = inv_vars;
2948   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
2949 }
2950
2951 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
2952    will be recorded if it doesn't exist yet.  Given below two exprs:
2953      inv_expr + cst1, inv_expr + cst2
2954    It's hard to make decision whether constant part should be stripped
2955    or not.  We choose to not strip based on below facts:
2956      1) We need to count ADD cost for constant part if it's stripped,
2957         which is't always trivial where this functions is called.
2958      2) Stripping constant away may be conflict with following loop
2959         invariant hoisting pass.
2960      3) Not stripping constant away results in more invariant exprs,
2961         which usually leads to decision preferring lower reg pressure.  */
2962
2963 static iv_inv_expr_ent *
2964 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
2965 {
2966   STRIP_NOPS (inv_expr);
2967
2968   if (poly_int_tree_p (inv_expr)
2969       || TREE_CODE (inv_expr) == SSA_NAME)
2970     return NULL;
2971
2972   /* Don't strip constant part away as we used to.  */
2973
2974   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
2975   struct iv_inv_expr_ent ent;
2976   ent.expr = inv_expr;
2977   ent.hash = iterative_hash_expr (inv_expr, 0);
2978   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
2979
2980   if (!*slot)
2981     {
2982       *slot = XNEW (struct iv_inv_expr_ent);
2983       (*slot)->expr = inv_expr;
2984       (*slot)->hash = ent.hash;
2985       (*slot)->id = ++data->max_inv_expr_id;
2986     }
2987
2988   return *slot;
2989 }
2990
2991 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2992    position to POS.  If USE is not NULL, the candidate is set as related to
2993    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2994    replacement of the final value of the iv by a direct computation.  */
2995
2996 static struct iv_cand *
2997 add_candidate_1 (struct ivopts_data *data,
2998                  tree base, tree step, bool important, enum iv_position pos,
2999                  struct iv_use *use, gimple *incremented_at,
3000                  struct iv *orig_iv = NULL)
3001 {
3002   unsigned i;
3003   struct iv_cand *cand = NULL;
3004   tree type, orig_type;
3005
3006   gcc_assert (base && step);
3007
3008   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3009      live, but the ivopts code may replace a real pointer with one
3010      pointing before or after the memory block that is then adjusted
3011      into the memory block during the loop.  FIXME: It would likely be
3012      better to actually force the pointer live and still use ivopts;
3013      for example, it would be enough to write the pointer into memory
3014      and keep it there until after the loop.  */
3015   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3016     return NULL;
3017
3018   /* For non-original variables, make sure their values are computed in a type
3019      that does not invoke undefined behavior on overflows (since in general,
3020      we cannot prove that these induction variables are non-wrapping).  */
3021   if (pos != IP_ORIGINAL)
3022     {
3023       orig_type = TREE_TYPE (base);
3024       type = generic_type_for (orig_type);
3025       if (type != orig_type)
3026         {
3027           base = fold_convert (type, base);
3028           step = fold_convert (type, step);
3029         }
3030     }
3031
3032   for (i = 0; i < data->vcands.length (); i++)
3033     {
3034       cand = data->vcands[i];
3035
3036       if (cand->pos != pos)
3037         continue;
3038
3039       if (cand->incremented_at != incremented_at
3040           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3041               && cand->ainc_use != use))
3042         continue;
3043
3044       if (operand_equal_p (base, cand->iv->base, 0)
3045           && operand_equal_p (step, cand->iv->step, 0)
3046           && (TYPE_PRECISION (TREE_TYPE (base))
3047               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3048         break;
3049     }
3050
3051   if (i == data->vcands.length ())
3052     {
3053       cand = XCNEW (struct iv_cand);
3054       cand->id = i;
3055       cand->iv = alloc_iv (data, base, step);
3056       cand->pos = pos;
3057       if (pos != IP_ORIGINAL)
3058         {
3059           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3060           cand->var_after = cand->var_before;
3061         }
3062       cand->important = important;
3063       cand->incremented_at = incremented_at;
3064       data->vcands.safe_push (cand);
3065
3066       if (!poly_int_tree_p (step))
3067         {
3068           find_inv_vars (data, &step, &cand->inv_vars);
3069
3070           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3071           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3072           if (inv_expr != NULL)
3073             {
3074               cand->inv_exprs = cand->inv_vars;
3075               cand->inv_vars = NULL;
3076               if (cand->inv_exprs)
3077                 bitmap_clear (cand->inv_exprs);
3078               else
3079                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3080
3081               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3082             }
3083         }
3084
3085       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3086         cand->ainc_use = use;
3087       else
3088         cand->ainc_use = NULL;
3089
3090       cand->orig_iv = orig_iv;
3091       if (dump_file && (dump_flags & TDF_DETAILS))
3092         dump_cand (dump_file, cand);
3093     }
3094
3095   cand->important |= important;
3096
3097   /* Relate candidate to the group for which it is added.  */
3098   if (use)
3099     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3100
3101   return cand;
3102 }
3103
3104 /* Returns true if incrementing the induction variable at the end of the LOOP
3105    is allowed.
3106
3107    The purpose is to avoid splitting latch edge with a biv increment, thus
3108    creating a jump, possibly confusing other optimization passes and leaving
3109    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3110    available (so we do not have a better alternative), or if the latch edge
3111    is already nonempty.  */
3112
3113 static bool
3114 allow_ip_end_pos_p (struct loop *loop)
3115 {
3116   if (!ip_normal_pos (loop))
3117     return true;
3118
3119   if (!empty_block_p (ip_end_pos (loop)))
3120     return true;
3121
3122   return false;
3123 }
3124
3125 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3126    Important field is set to IMPORTANT.  */
3127
3128 static void
3129 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3130                         bool important, struct iv_use *use)
3131 {
3132   basic_block use_bb = gimple_bb (use->stmt);
3133   machine_mode mem_mode;
3134   unsigned HOST_WIDE_INT cstepi;
3135
3136   /* If we insert the increment in any position other than the standard
3137      ones, we must ensure that it is incremented once per iteration.
3138      It must not be in an inner nested loop, or one side of an if
3139      statement.  */
3140   if (use_bb->loop_father != data->current_loop
3141       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3142       || stmt_can_throw_internal (use->stmt)
3143       || !cst_and_fits_in_hwi (step))
3144     return;
3145
3146   cstepi = int_cst_value (step);
3147
3148   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
3149   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3150         || USE_STORE_PRE_INCREMENT (mem_mode))
3151        && GET_MODE_SIZE (mem_mode) == cstepi)
3152       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3153            || USE_STORE_PRE_DECREMENT (mem_mode))
3154           && GET_MODE_SIZE (mem_mode) == -cstepi))
3155     {
3156       enum tree_code code = MINUS_EXPR;
3157       tree new_base;
3158       tree new_step = step;
3159
3160       if (POINTER_TYPE_P (TREE_TYPE (base)))
3161         {
3162           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3163           code = POINTER_PLUS_EXPR;
3164         }
3165       else
3166         new_step = fold_convert (TREE_TYPE (base), new_step);
3167       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3168       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3169                        use->stmt);
3170     }
3171   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3172         || USE_STORE_POST_INCREMENT (mem_mode))
3173        && GET_MODE_SIZE (mem_mode) == cstepi)
3174       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3175            || USE_STORE_POST_DECREMENT (mem_mode))
3176           && GET_MODE_SIZE (mem_mode) == -cstepi))
3177     {
3178       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3179                        use->stmt);
3180     }
3181 }
3182
3183 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3184    position to POS.  If USE is not NULL, the candidate is set as related to
3185    it.  The candidate computation is scheduled before exit condition and at
3186    the end of loop.  */
3187
3188 static void
3189 add_candidate (struct ivopts_data *data,
3190                tree base, tree step, bool important, struct iv_use *use,
3191                struct iv *orig_iv = NULL)
3192 {
3193   if (ip_normal_pos (data->current_loop))
3194     add_candidate_1 (data, base, step, important,
3195                      IP_NORMAL, use, NULL, orig_iv);
3196   if (ip_end_pos (data->current_loop)
3197       && allow_ip_end_pos_p (data->current_loop))
3198     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3199 }
3200
3201 /* Adds standard iv candidates.  */
3202
3203 static void
3204 add_standard_iv_candidates (struct ivopts_data *data)
3205 {
3206   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3207
3208   /* The same for a double-integer type if it is still fast enough.  */
3209   if (TYPE_PRECISION
3210         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3211       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3212     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3213                    build_int_cst (long_integer_type_node, 1), true, NULL);
3214
3215   /* The same for a double-integer type if it is still fast enough.  */
3216   if (TYPE_PRECISION
3217         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3218       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3219     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3220                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3221 }
3222
3223
3224 /* Adds candidates bases on the old induction variable IV.  */
3225
3226 static void
3227 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3228 {
3229   gimple *phi;
3230   tree def;
3231   struct iv_cand *cand;
3232
3233   /* Check if this biv is used in address type use.  */
3234   if (iv->no_overflow  && iv->have_address_use
3235       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3236       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3237     {
3238       tree base = fold_convert (sizetype, iv->base);
3239       tree step = fold_convert (sizetype, iv->step);
3240
3241       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3242       add_candidate (data, base, step, true, NULL, iv);
3243       /* Add iv cand of the original type only if it has nonlinear use.  */
3244       if (iv->nonlin_use)
3245         add_candidate (data, iv->base, iv->step, true, NULL);
3246     }
3247   else
3248     add_candidate (data, iv->base, iv->step, true, NULL);
3249
3250   /* The same, but with initial value zero.  */
3251   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3252     add_candidate (data, size_int (0), iv->step, true, NULL);
3253   else
3254     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3255                    iv->step, true, NULL);
3256
3257   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3258   if (gimple_code (phi) == GIMPLE_PHI)
3259     {
3260       /* Additionally record the possibility of leaving the original iv
3261          untouched.  */
3262       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3263       /* Don't add candidate if it's from another PHI node because
3264          it's an affine iv appearing in the form of PEELED_CHREC.  */
3265       phi = SSA_NAME_DEF_STMT (def);
3266       if (gimple_code (phi) != GIMPLE_PHI)
3267         {
3268           cand = add_candidate_1 (data,
3269                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3270                                   SSA_NAME_DEF_STMT (def));
3271           if (cand)
3272             {
3273               cand->var_before = iv->ssa_name;
3274               cand->var_after = def;
3275             }
3276         }
3277       else
3278         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3279     }
3280 }
3281
3282 /* Adds candidates based on the old induction variables.  */
3283
3284 static void
3285 add_iv_candidate_for_bivs (struct ivopts_data *data)
3286 {
3287   unsigned i;
3288   struct iv *iv;
3289   bitmap_iterator bi;
3290
3291   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3292     {
3293       iv = ver_info (data, i)->iv;
3294       if (iv && iv->biv_p && !integer_zerop (iv->step))
3295         add_iv_candidate_for_biv (data, iv);
3296     }
3297 }
3298
3299 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3300
3301 static void
3302 record_common_cand (struct ivopts_data *data, tree base,
3303                     tree step, struct iv_use *use)
3304 {
3305   struct iv_common_cand ent;
3306   struct iv_common_cand **slot;
3307
3308   ent.base = base;
3309   ent.step = step;
3310   ent.hash = iterative_hash_expr (base, 0);
3311   ent.hash = iterative_hash_expr (step, ent.hash);
3312
3313   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3314   if (*slot == NULL)
3315     {
3316       *slot = new iv_common_cand ();
3317       (*slot)->base = base;
3318       (*slot)->step = step;
3319       (*slot)->uses.create (8);
3320       (*slot)->hash = ent.hash;
3321       data->iv_common_cands.safe_push ((*slot));
3322     }
3323
3324   gcc_assert (use != NULL);
3325   (*slot)->uses.safe_push (use);
3326   return;
3327 }
3328
3329 /* Comparison function used to sort common candidates.  */
3330
3331 static int
3332 common_cand_cmp (const void *p1, const void *p2)
3333 {
3334   unsigned n1, n2;
3335   const struct iv_common_cand *const *const ccand1
3336     = (const struct iv_common_cand *const *)p1;
3337   const struct iv_common_cand *const *const ccand2
3338     = (const struct iv_common_cand *const *)p2;
3339
3340   n1 = (*ccand1)->uses.length ();
3341   n2 = (*ccand2)->uses.length ();
3342   return n2 - n1;
3343 }
3344
3345 /* Adds IV candidates based on common candidated recorded.  */
3346
3347 static void
3348 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3349 {
3350   unsigned i, j;
3351   struct iv_cand *cand_1, *cand_2;
3352
3353   data->iv_common_cands.qsort (common_cand_cmp);
3354   for (i = 0; i < data->iv_common_cands.length (); i++)
3355     {
3356       struct iv_common_cand *ptr = data->iv_common_cands[i];
3357
3358       /* Only add IV candidate if it's derived from multiple uses.  */
3359       if (ptr->uses.length () <= 1)
3360         break;
3361
3362       cand_1 = NULL;
3363       cand_2 = NULL;
3364       if (ip_normal_pos (data->current_loop))
3365         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3366                                   false, IP_NORMAL, NULL, NULL);
3367
3368       if (ip_end_pos (data->current_loop)
3369           && allow_ip_end_pos_p (data->current_loop))
3370         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3371                                   false, IP_END, NULL, NULL);
3372
3373       /* Bind deriving uses and the new candidates.  */
3374       for (j = 0; j < ptr->uses.length (); j++)
3375         {
3376           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3377           if (cand_1)
3378             bitmap_set_bit (group->related_cands, cand_1->id);
3379           if (cand_2)
3380             bitmap_set_bit (group->related_cands, cand_2->id);
3381         }
3382     }
3383
3384   /* Release data since it is useless from this point.  */
3385   data->iv_common_cand_tab->empty ();
3386   data->iv_common_cands.truncate (0);
3387 }
3388
3389 /* Adds candidates based on the value of USE's iv.  */
3390
3391 static void
3392 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3393 {
3394   poly_uint64 offset;
3395   tree base;
3396   tree basetype;
3397   struct iv *iv = use->iv;
3398
3399   add_candidate (data, iv->base, iv->step, false, use);
3400
3401   /* Record common candidate for use in case it can be shared by others.  */
3402   record_common_cand (data, iv->base, iv->step, use);
3403
3404   /* Record common candidate with initial value zero.  */
3405   basetype = TREE_TYPE (iv->base);
3406   if (POINTER_TYPE_P (basetype))
3407     basetype = sizetype;
3408   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3409
3410   /* Record common candidate with constant offset stripped in base.
3411      Like the use itself, we also add candidate directly for it.  */
3412   base = strip_offset (iv->base, &offset);
3413   if (maybe_ne (offset, 0U) || base != iv->base)
3414     {
3415       record_common_cand (data, base, iv->step, use);
3416       add_candidate (data, base, iv->step, false, use);
3417     }
3418
3419   /* Record common candidate with base_object removed in base.  */
3420   base = iv->base;
3421   STRIP_NOPS (base);
3422   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3423     {
3424       tree step = iv->step;
3425
3426       STRIP_NOPS (step);
3427       base = TREE_OPERAND (base, 1);
3428       step = fold_convert (sizetype, step);
3429       record_common_cand (data, base, step, use);
3430       /* Also record common candidate with offset stripped.  */
3431       base = strip_offset (base, &offset);
3432       if (maybe_ne (offset, 0U))
3433         record_common_cand (data, base, step, use);
3434     }
3435
3436   /* At last, add auto-incremental candidates.  Make such variables
3437      important since other iv uses with same base object may be based
3438      on it.  */
3439   if (use != NULL && use->type == USE_ADDRESS)
3440     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3441 }
3442
3443 /* Adds candidates based on the uses.  */
3444
3445 static void
3446 add_iv_candidate_for_groups (struct ivopts_data *data)
3447 {
3448   unsigned i;
3449
3450   /* Only add candidate for the first use in group.  */
3451   for (i = 0; i < data->vgroups.length (); i++)
3452     {
3453       struct iv_group *group = data->vgroups[i];
3454
3455       gcc_assert (group->vuses[0] != NULL);
3456       add_iv_candidate_for_use (data, group->vuses[0]);
3457     }
3458   add_iv_candidate_derived_from_uses (data);
3459 }
3460
3461 /* Record important candidates and add them to related_cands bitmaps.  */
3462
3463 static void
3464 record_important_candidates (struct ivopts_data *data)
3465 {
3466   unsigned i;
3467   struct iv_group *group;
3468
3469   for (i = 0; i < data->vcands.length (); i++)
3470     {
3471       struct iv_cand *cand = data->vcands[i];
3472
3473       if (cand->important)
3474         bitmap_set_bit (data->important_candidates, i);
3475     }
3476
3477   data->consider_all_candidates = (data->vcands.length ()
3478                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3479
3480   /* Add important candidates to groups' related_cands bitmaps.  */
3481   for (i = 0; i < data->vgroups.length (); i++)
3482     {
3483       group = data->vgroups[i];
3484       bitmap_ior_into (group->related_cands, data->important_candidates);
3485     }
3486 }
3487
3488 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3489    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3490    we allocate a simple list to every use.  */
3491
3492 static void
3493 alloc_use_cost_map (struct ivopts_data *data)
3494 {
3495   unsigned i, size, s;
3496
3497   for (i = 0; i < data->vgroups.length (); i++)
3498     {
3499       struct iv_group *group = data->vgroups[i];
3500
3501       if (data->consider_all_candidates)
3502         size = data->vcands.length ();
3503       else
3504         {
3505           s = bitmap_count_bits (group->related_cands);
3506
3507           /* Round up to the power of two, so that moduling by it is fast.  */
3508           size = s ? (1 << ceil_log2 (s)) : 1;
3509         }
3510
3511       group->n_map_members = size;
3512       group->cost_map = XCNEWVEC (struct cost_pair, size);
3513     }
3514 }
3515
3516 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3517    on invariants INV_VARS and that the value used in expressing it is
3518    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3519
3520 static void
3521 set_group_iv_cost (struct ivopts_data *data,
3522                    struct iv_group *group, struct iv_cand *cand,
3523                    comp_cost cost, bitmap inv_vars, tree value,
3524                    enum tree_code comp, bitmap inv_exprs)
3525 {
3526   unsigned i, s;
3527
3528   if (cost.infinite_cost_p ())
3529     {
3530       BITMAP_FREE (inv_vars);
3531       BITMAP_FREE (inv_exprs);
3532       return;
3533     }
3534
3535   if (data->consider_all_candidates)
3536     {
3537       group->cost_map[cand->id].cand = cand;
3538       group->cost_map[cand->id].cost = cost;
3539       group->cost_map[cand->id].inv_vars = inv_vars;
3540       group->cost_map[cand->id].inv_exprs = inv_exprs;
3541       group->cost_map[cand->id].value = value;
3542       group->cost_map[cand->id].comp = comp;
3543       return;
3544     }
3545
3546   /* n_map_members is a power of two, so this computes modulo.  */
3547   s = cand->id & (group->n_map_members - 1);
3548   for (i = s; i < group->n_map_members; i++)
3549     if (!group->cost_map[i].cand)
3550       goto found;
3551   for (i = 0; i < s; i++)
3552     if (!group->cost_map[i].cand)
3553       goto found;
3554
3555   gcc_unreachable ();
3556
3557 found:
3558   group->cost_map[i].cand = cand;
3559   group->cost_map[i].cost = cost;
3560   group->cost_map[i].inv_vars = inv_vars;
3561   group->cost_map[i].inv_exprs = inv_exprs;
3562   group->cost_map[i].value = value;
3563   group->cost_map[i].comp = comp;
3564 }
3565
3566 /* Gets cost of (GROUP, CAND) pair.  */
3567
3568 static struct cost_pair *
3569 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3570                    struct iv_cand *cand)
3571 {
3572   unsigned i, s;
3573   struct cost_pair *ret;
3574
3575   if (!cand)
3576     return NULL;
3577
3578   if (data->consider_all_candidates)
3579     {
3580       ret = group->cost_map + cand->id;
3581       if (!ret->cand)
3582         return NULL;
3583
3584       return ret;
3585     }
3586
3587   /* n_map_members is a power of two, so this computes modulo.  */
3588   s = cand->id & (group->n_map_members - 1);
3589   for (i = s; i < group->n_map_members; i++)
3590     if (group->cost_map[i].cand == cand)
3591       return group->cost_map + i;
3592     else if (group->cost_map[i].cand == NULL)
3593       return NULL;
3594   for (i = 0; i < s; i++)
3595     if (group->cost_map[i].cand == cand)
3596       return group->cost_map + i;
3597     else if (group->cost_map[i].cand == NULL)
3598       return NULL;
3599
3600   return NULL;
3601 }
3602
3603 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3604 static rtx
3605 produce_memory_decl_rtl (tree obj, int *regno)
3606 {
3607   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3608   machine_mode address_mode = targetm.addr_space.address_mode (as);
3609   rtx x;
3610
3611   gcc_assert (obj);
3612   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3613     {
3614       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3615       x = gen_rtx_SYMBOL_REF (address_mode, name);
3616       SET_SYMBOL_REF_DECL (x, obj);
3617       x = gen_rtx_MEM (DECL_MODE (obj), x);
3618       set_mem_addr_space (x, as);
3619       targetm.encode_section_info (obj, x, true);
3620     }
3621   else
3622     {
3623       x = gen_raw_REG (address_mode, (*regno)++);
3624       x = gen_rtx_MEM (DECL_MODE (obj), x);
3625       set_mem_addr_space (x, as);
3626     }
3627
3628   return x;
3629 }
3630
3631 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3632    walk_tree.  DATA contains the actual fake register number.  */
3633
3634 static tree
3635 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3636 {
3637   tree obj = NULL_TREE;
3638   rtx x = NULL_RTX;
3639   int *regno = (int *) data;
3640
3641   switch (TREE_CODE (*expr_p))
3642     {
3643     case ADDR_EXPR:
3644       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3645            handled_component_p (*expr_p);
3646            expr_p = &TREE_OPERAND (*expr_p, 0))
3647         continue;
3648       obj = *expr_p;
3649       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3650         x = produce_memory_decl_rtl (obj, regno);
3651       break;
3652
3653     case SSA_NAME:
3654       *ws = 0;
3655       obj = SSA_NAME_VAR (*expr_p);
3656       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3657       if (!obj)
3658         return NULL_TREE;
3659       if (!DECL_RTL_SET_P (obj))
3660         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3661       break;
3662
3663     case VAR_DECL:
3664     case PARM_DECL:
3665     case RESULT_DECL:
3666       *ws = 0;
3667       obj = *expr_p;
3668
3669       if (DECL_RTL_SET_P (obj))
3670         break;
3671
3672       if (DECL_MODE (obj) == BLKmode)
3673         x = produce_memory_decl_rtl (obj, regno);
3674       else
3675         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3676
3677       break;
3678
3679     default:
3680       break;
3681     }
3682
3683   if (x)
3684     {
3685       decl_rtl_to_reset.safe_push (obj);
3686       SET_DECL_RTL (obj, x);
3687     }
3688
3689   return NULL_TREE;
3690 }
3691
3692 /* Determines cost of the computation of EXPR.  */
3693
3694 static unsigned
3695 computation_cost (tree expr, bool speed)
3696 {
3697   rtx_insn *seq;
3698   rtx rslt;
3699   tree type = TREE_TYPE (expr);
3700   unsigned cost;
3701   /* Avoid using hard regs in ways which may be unsupported.  */
3702   int regno = LAST_VIRTUAL_REGISTER + 1;
3703   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3704   enum node_frequency real_frequency = node->frequency;
3705
3706   node->frequency = NODE_FREQUENCY_NORMAL;
3707   crtl->maybe_hot_insn_p = speed;
3708   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3709   start_sequence ();
3710   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3711   seq = get_insns ();
3712   end_sequence ();
3713   default_rtl_profile ();
3714   node->frequency = real_frequency;
3715
3716   cost = seq_cost (seq, speed);
3717   if (MEM_P (rslt))
3718     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3719                           TYPE_ADDR_SPACE (type), speed);
3720   else if (!REG_P (rslt))
3721     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3722
3723   return cost;
3724 }
3725
3726 /* Returns variable containing the value of candidate CAND at statement AT.  */
3727
3728 static tree
3729 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3730 {
3731   if (stmt_after_increment (loop, cand, stmt))
3732     return cand->var_after;
3733   else
3734     return cand->var_before;
3735 }
3736
3737 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3738    same precision that is at least as wide as the precision of TYPE, stores
3739    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3740    type of A and B.  */
3741
3742 static tree
3743 determine_common_wider_type (tree *a, tree *b)
3744 {
3745   tree wider_type = NULL;
3746   tree suba, subb;
3747   tree atype = TREE_TYPE (*a);
3748
3749   if (CONVERT_EXPR_P (*a))
3750     {
3751       suba = TREE_OPERAND (*a, 0);
3752       wider_type = TREE_TYPE (suba);
3753       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3754         return atype;
3755     }
3756   else
3757     return atype;
3758
3759   if (CONVERT_EXPR_P (*b))
3760     {
3761       subb = TREE_OPERAND (*b, 0);
3762       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3763         return atype;
3764     }
3765   else
3766     return atype;
3767
3768   *a = suba;
3769   *b = subb;
3770   return wider_type;
3771 }
3772
3773 /* Determines the expression by that USE is expressed from induction variable
3774    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3775    decomposed form.  The invariant part is stored in AFF_INV; while variant
3776    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3777    non-null.  Returns false if USE cannot be expressed using CAND.  */
3778
3779 static bool
3780 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3781                        struct iv_cand *cand, struct aff_tree *aff_inv,
3782                        struct aff_tree *aff_var, widest_int *prat = NULL)
3783 {
3784   tree ubase = use->iv->base, ustep = use->iv->step;
3785   tree cbase = cand->iv->base, cstep = cand->iv->step;
3786   tree common_type, uutype, var, cstep_common;
3787   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3788   aff_tree aff_cbase;
3789   widest_int rat;
3790
3791   /* We must have a precision to express the values of use.  */
3792   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3793     return false;
3794
3795   var = var_at_stmt (loop, cand, at);
3796   uutype = unsigned_type_for (utype);
3797
3798   /* If the conversion is not noop, perform it.  */
3799   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3800     {
3801       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3802           && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3803         {
3804           tree inner_base, inner_step, inner_type;
3805           inner_base = TREE_OPERAND (cbase, 0);
3806           if (CONVERT_EXPR_P (cstep))
3807             inner_step = TREE_OPERAND (cstep, 0);
3808           else
3809             inner_step = cstep;
3810
3811           inner_type = TREE_TYPE (inner_base);
3812           /* If candidate is added from a biv whose type is smaller than
3813              ctype, we know both candidate and the biv won't overflow.
3814              In this case, it's safe to skip the convertion in candidate.
3815              As an example, (unsigned short)((unsigned long)A) equals to
3816              (unsigned short)A, if A has a type no larger than short.  */
3817           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3818             {
3819               cbase = inner_base;
3820               cstep = inner_step;
3821             }
3822         }
3823       cbase = fold_convert (uutype, cbase);
3824       cstep = fold_convert (uutype, cstep);
3825       var = fold_convert (uutype, var);
3826     }
3827
3828   /* Ratio is 1 when computing the value of biv cand by itself.
3829      We can't rely on constant_multiple_of in this case because the
3830      use is created after the original biv is selected.  The call
3831      could fail because of inconsistent fold behavior.  See PR68021
3832      for more information.  */
3833   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3834     {
3835       gcc_assert (is_gimple_assign (use->stmt));
3836       gcc_assert (use->iv->ssa_name == cand->var_after);
3837       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3838       rat = 1;
3839     }
3840   else if (!constant_multiple_of (ustep, cstep, &rat))
3841     return false;
3842
3843   if (prat)
3844     *prat = rat;
3845
3846   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3847      type, we achieve better folding by computing their difference in this
3848      wider type, and cast the result to UUTYPE.  We do not need to worry about
3849      overflows, as all the arithmetics will in the end be performed in UUTYPE
3850      anyway.  */
3851   common_type = determine_common_wider_type (&ubase, &cbase);
3852
3853   /* use = ubase - ratio * cbase + ratio * var.  */
3854   tree_to_aff_combination (ubase, common_type, aff_inv);
3855   tree_to_aff_combination (cbase, common_type, &aff_cbase);
3856   tree_to_aff_combination (var, uutype, aff_var);
3857
3858   /* We need to shift the value if we are after the increment.  */
3859   if (stmt_after_increment (loop, cand, at))
3860     {
3861       aff_tree cstep_aff;
3862
3863       if (common_type != uutype)
3864         cstep_common = fold_convert (common_type, cstep);
3865       else
3866         cstep_common = cstep;
3867
3868       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3869       aff_combination_add (&aff_cbase, &cstep_aff);
3870     }
3871
3872   aff_combination_scale (&aff_cbase, -rat);
3873   aff_combination_add (aff_inv, &aff_cbase);
3874   if (common_type != uutype)
3875     aff_combination_convert (aff_inv, uutype);
3876
3877   aff_combination_scale (aff_var, rat);
3878   return true;
3879 }
3880
3881 /* Determines the expression by that USE is expressed from induction variable
3882    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3883    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3884
3885 static bool
3886 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3887                      struct iv_cand *cand, struct aff_tree *aff)
3888 {
3889   aff_tree aff_var;
3890
3891   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3892     return false;
3893
3894   aff_combination_add (aff, &aff_var);
3895   return true;
3896 }
3897
3898 /* Return the type of USE.  */
3899
3900 static tree
3901 get_use_type (struct iv_use *use)
3902 {
3903   tree base_type = TREE_TYPE (use->iv->base);
3904   tree type;
3905
3906   if (use->type == USE_ADDRESS)
3907     {
3908       /* The base_type may be a void pointer.  Create a pointer type based on
3909          the mem_ref instead.  */
3910       type = build_pointer_type (TREE_TYPE (*use->op_p));
3911       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3912                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3913     }
3914   else
3915     type = base_type;
3916
3917   return type;
3918 }
3919
3920 /* Determines the expression by that USE is expressed from induction variable
3921    CAND at statement AT in LOOP.  The computation is unshared.  */
3922
3923 static tree
3924 get_computation_at (struct loop *loop, gimple *at,
3925                     struct iv_use *use, struct iv_cand *cand)
3926 {
3927   aff_tree aff;
3928   tree type = get_use_type (use);
3929
3930   if (!get_computation_aff (loop, at, use, cand, &aff))
3931     return NULL_TREE;
3932   unshare_aff_combination (&aff);
3933   return fold_convert (type, aff_combination_to_tree (&aff));
3934 }
3935
3936 /* Adjust the cost COST for being in loop setup rather than loop body.
3937    If we're optimizing for space, the loop setup overhead is constant;
3938    if we're optimizing for speed, amortize it over the per-iteration cost.
3939    If ROUND_UP_P is true, the result is round up rather than to zero when
3940    optimizing for speed.  */
3941 static unsigned
3942 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
3943                    bool round_up_p = false)
3944 {
3945   if (cost == INFTY)
3946     return cost;
3947   else if (optimize_loop_for_speed_p (data->current_loop))
3948     {
3949       HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
3950       return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
3951     }
3952   else
3953     return cost;
3954 }
3955
3956 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3957    EXPR operand holding the shift.  COST0 and COST1 are the costs for
3958    calculating the operands of EXPR.  Returns true if successful, and returns
3959    the cost in COST.  */
3960
3961 static bool
3962 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
3963                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3964 {
3965   comp_cost res;
3966   tree op1 = TREE_OPERAND (expr, 1);
3967   tree cst = TREE_OPERAND (mult, 1);
3968   tree multop = TREE_OPERAND (mult, 0);
3969   int m = exact_log2 (int_cst_value (cst));
3970   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3971   int as_cost, sa_cost;
3972   bool mult_in_op1;
3973
3974   if (!(m >= 0 && m < maxm))
3975     return false;
3976
3977   STRIP_NOPS (op1);
3978   mult_in_op1 = operand_equal_p (op1, mult, 0);
3979
3980   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3981
3982   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3983      use that in preference to a shift insn followed by an add insn.  */
3984   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3985              ? shiftadd_cost (speed, mode, m)
3986              : (mult_in_op1
3987                 ? shiftsub1_cost (speed, mode, m)
3988                 : shiftsub0_cost (speed, mode, m)));
3989
3990   res = comp_cost (MIN (as_cost, sa_cost), 0);
3991   res += (mult_in_op1 ? cost0 : cost1);
3992
3993   STRIP_NOPS (multop);
3994   if (!is_gimple_val (multop))
3995     res += force_expr_to_var_cost (multop, speed);
3996
3997   *cost = res;
3998   return true;
3999 }
4000
4001 /* Estimates cost of forcing expression EXPR into a variable.  */
4002
4003 static comp_cost
4004 force_expr_to_var_cost (tree expr, bool speed)
4005 {
4006   static bool costs_initialized = false;
4007   static unsigned integer_cost [2];
4008   static unsigned symbol_cost [2];
4009   static unsigned address_cost [2];
4010   tree op0, op1;
4011   comp_cost cost0, cost1, cost;
4012   machine_mode mode;
4013   scalar_int_mode int_mode;
4014
4015   if (!costs_initialized)
4016     {
4017       tree type = build_pointer_type (integer_type_node);
4018       tree var, addr;
4019       rtx x;
4020       int i;
4021
4022       var = create_tmp_var_raw (integer_type_node, "test_var");
4023       TREE_STATIC (var) = 1;
4024       x = produce_memory_decl_rtl (var, NULL);
4025       SET_DECL_RTL (var, x);
4026
4027       addr = build1 (ADDR_EXPR, type, var);
4028
4029
4030       for (i = 0; i < 2; i++)
4031         {
4032           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4033                                                              2000), i);
4034
4035           symbol_cost[i] = computation_cost (addr, i) + 1;
4036
4037           address_cost[i]
4038             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4039           if (dump_file && (dump_flags & TDF_DETAILS))
4040             {
4041               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4042               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4043               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4044               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4045               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4046               fprintf (dump_file, "\n");
4047             }
4048         }
4049
4050       costs_initialized = true;
4051     }
4052
4053   STRIP_NOPS (expr);
4054
4055   if (SSA_VAR_P (expr))
4056     return no_cost;
4057
4058   if (is_gimple_min_invariant (expr))
4059     {
4060       if (poly_int_tree_p (expr))
4061         return comp_cost (integer_cost [speed], 0);
4062
4063       if (TREE_CODE (expr) == ADDR_EXPR)
4064         {
4065           tree obj = TREE_OPERAND (expr, 0);
4066
4067           if (VAR_P (obj)
4068               || TREE_CODE (obj) == PARM_DECL
4069               || TREE_CODE (obj) == RESULT_DECL)
4070             return comp_cost (symbol_cost [speed], 0);
4071         }
4072
4073       return comp_cost (address_cost [speed], 0);
4074     }
4075
4076   switch (TREE_CODE (expr))
4077     {
4078     case POINTER_PLUS_EXPR:
4079     case PLUS_EXPR:
4080     case MINUS_EXPR:
4081     case MULT_EXPR:
4082     case TRUNC_DIV_EXPR:
4083     case BIT_AND_EXPR:
4084     case BIT_IOR_EXPR:
4085     case LSHIFT_EXPR:
4086     case RSHIFT_EXPR:
4087       op0 = TREE_OPERAND (expr, 0);
4088       op1 = TREE_OPERAND (expr, 1);
4089       STRIP_NOPS (op0);
4090       STRIP_NOPS (op1);
4091       break;
4092
4093     CASE_CONVERT:
4094     case NEGATE_EXPR:
4095     case BIT_NOT_EXPR:
4096       op0 = TREE_OPERAND (expr, 0);
4097       STRIP_NOPS (op0);
4098       op1 = NULL_TREE;
4099       break;
4100
4101     default:
4102       /* Just an arbitrary value, FIXME.  */
4103       return comp_cost (target_spill_cost[speed], 0);
4104     }
4105
4106   if (op0 == NULL_TREE
4107       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4108     cost0 = no_cost;
4109   else
4110     cost0 = force_expr_to_var_cost (op0, speed);
4111
4112   if (op1 == NULL_TREE
4113       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4114     cost1 = no_cost;
4115   else
4116     cost1 = force_expr_to_var_cost (op1, speed);
4117
4118   mode = TYPE_MODE (TREE_TYPE (expr));
4119   switch (TREE_CODE (expr))
4120     {
4121     case POINTER_PLUS_EXPR:
4122     case PLUS_EXPR:
4123     case MINUS_EXPR:
4124     case NEGATE_EXPR:
4125       cost = comp_cost (add_cost (speed, mode), 0);
4126       if (TREE_CODE (expr) != NEGATE_EXPR)
4127         {
4128           tree mult = NULL_TREE;
4129           comp_cost sa_cost;
4130           if (TREE_CODE (op1) == MULT_EXPR)
4131             mult = op1;
4132           else if (TREE_CODE (op0) == MULT_EXPR)
4133             mult = op0;
4134
4135           if (mult != NULL_TREE
4136               && is_a <scalar_int_mode> (mode, &int_mode)
4137               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4138               && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4139                                     speed, &sa_cost))
4140             return sa_cost;
4141         }
4142       break;
4143
4144     CASE_CONVERT:
4145       {
4146         tree inner_mode, outer_mode;
4147         outer_mode = TREE_TYPE (expr);
4148         inner_mode = TREE_TYPE (op0);
4149         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4150                                        TYPE_MODE (inner_mode), speed), 0);
4151       }
4152       break;
4153
4154     case MULT_EXPR:
4155       if (cst_and_fits_in_hwi (op0))
4156         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4157                                              mode, speed), 0);
4158       else if (cst_and_fits_in_hwi (op1))
4159         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4160                                              mode, speed), 0);
4161       else
4162         return comp_cost (target_spill_cost [speed], 0);
4163       break;
4164
4165     case TRUNC_DIV_EXPR:
4166       /* Division by power of two is usually cheap, so we allow it.  Forbid
4167          anything else.  */
4168       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4169         cost = comp_cost (add_cost (speed, mode), 0);
4170       else
4171         cost = comp_cost (target_spill_cost[speed], 0);
4172       break;
4173
4174     case BIT_AND_EXPR:
4175     case BIT_IOR_EXPR:
4176     case BIT_NOT_EXPR:
4177     case LSHIFT_EXPR:
4178     case RSHIFT_EXPR:
4179       cost = comp_cost (add_cost (speed, mode), 0);
4180       break;
4181
4182     default:
4183       gcc_unreachable ();
4184     }
4185
4186   cost += cost0;
4187   cost += cost1;
4188   return cost;
4189 }
4190
4191 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4192    invariants the computation depends on.  */
4193
4194 static comp_cost
4195 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4196 {
4197   if (!expr)
4198     return no_cost;
4199
4200   find_inv_vars (data, &expr, inv_vars);
4201   return force_expr_to_var_cost (expr, data->speed);
4202 }
4203
4204 /* Returns cost of auto-modifying address expression in shape base + offset.
4205    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4206    address expression.  The address expression has ADDR_MODE in addr space
4207    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4208    speed or size.  */
4209
4210 enum ainc_type
4211 {
4212   AINC_PRE_INC,         /* Pre increment.  */
4213   AINC_PRE_DEC,         /* Pre decrement.  */
4214   AINC_POST_INC,        /* Post increment.  */
4215   AINC_POST_DEC,        /* Post decrement.  */
4216   AINC_NONE             /* Also the number of auto increment types.  */
4217 };
4218
4219 struct ainc_cost_data
4220 {
4221   unsigned costs[AINC_NONE];
4222 };
4223
4224 static comp_cost
4225 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4226                        machine_mode addr_mode, machine_mode mem_mode,
4227                        addr_space_t as, bool speed)
4228 {
4229   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4230       && !USE_STORE_PRE_DECREMENT (mem_mode)
4231       && !USE_LOAD_POST_DECREMENT (mem_mode)
4232       && !USE_STORE_POST_DECREMENT (mem_mode)
4233       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4234       && !USE_STORE_PRE_INCREMENT (mem_mode)
4235       && !USE_LOAD_POST_INCREMENT (mem_mode)
4236       && !USE_STORE_POST_INCREMENT (mem_mode))
4237     return infinite_cost;
4238
4239   static vec<ainc_cost_data *> ainc_cost_data_list;
4240   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4241   if (idx >= ainc_cost_data_list.length ())
4242     {
4243       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4244
4245       gcc_assert (nsize > idx);
4246       ainc_cost_data_list.safe_grow_cleared (nsize);
4247     }
4248
4249   ainc_cost_data *data = ainc_cost_data_list[idx];
4250   if (data == NULL)
4251     {
4252       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4253
4254       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4255       data->costs[AINC_PRE_DEC] = INFTY;
4256       data->costs[AINC_POST_DEC] = INFTY;
4257       data->costs[AINC_PRE_INC] = INFTY;
4258       data->costs[AINC_POST_INC] = INFTY;
4259       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4260           || USE_STORE_PRE_DECREMENT (mem_mode))
4261         {
4262           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4263
4264           if (memory_address_addr_space_p (mem_mode, addr, as))
4265             data->costs[AINC_PRE_DEC]
4266               = address_cost (addr, mem_mode, as, speed);
4267         }
4268       if (USE_LOAD_POST_DECREMENT (mem_mode)
4269           || USE_STORE_POST_DECREMENT (mem_mode))
4270         {
4271           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4272
4273           if (memory_address_addr_space_p (mem_mode, addr, as))
4274             data->costs[AINC_POST_DEC]
4275               = address_cost (addr, mem_mode, as, speed);
4276         }
4277       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4278           || USE_STORE_PRE_INCREMENT (mem_mode))
4279         {
4280           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4281
4282           if (memory_address_addr_space_p (mem_mode, addr, as))
4283             data->costs[AINC_PRE_INC]
4284               = address_cost (addr, mem_mode, as, speed);
4285         }
4286       if (USE_LOAD_POST_INCREMENT (mem_mode)
4287           || USE_STORE_POST_INCREMENT (mem_mode))
4288         {
4289           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4290
4291           if (memory_address_addr_space_p (mem_mode, addr, as))
4292             data->costs[AINC_POST_INC]
4293               = address_cost (addr, mem_mode, as, speed);
4294         }
4295       ainc_cost_data_list[idx] = data;
4296     }
4297
4298   HOST_WIDE_INT msize = GET_MODE_SIZE (mem_mode);
4299   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4300     return comp_cost (data->costs[AINC_POST_INC], 0);
4301   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4302     return comp_cost (data->costs[AINC_POST_DEC], 0);
4303   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4304     return comp_cost (data->costs[AINC_PRE_INC], 0);
4305   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4306     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4307
4308   return infinite_cost;
4309 }
4310
4311 /* Return cost of computing USE's address expression by using CAND.
4312    AFF_INV and AFF_VAR represent invariant and variant parts of the
4313    address expression, respectively.  If AFF_INV is simple, store
4314    the loop invariant variables which are depended by it in INV_VARS;
4315    if AFF_INV is complicated, handle it as a new invariant expression
4316    and record it in INV_EXPR.  RATIO indicates multiple times between
4317    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4318    value to it indicating if this is an auto-increment address.  */
4319
4320 static comp_cost
4321 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4322                   struct iv_cand *cand, aff_tree *aff_inv,
4323                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4324                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4325                   bool *can_autoinc, bool speed)
4326 {
4327   rtx addr;
4328   bool simple_inv = true;
4329   tree comp_inv = NULL_TREE, type = aff_var->type;
4330   comp_cost var_cost = no_cost, cost = no_cost;
4331   struct mem_address parts = {NULL_TREE, integer_one_node,
4332                               NULL_TREE, NULL_TREE, NULL_TREE};
4333   machine_mode addr_mode = TYPE_MODE (type);
4334   machine_mode mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
4335   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4336   /* Only true if ratio != 1.  */
4337   bool ok_with_ratio_p = false;
4338   bool ok_without_ratio_p = false;
4339
4340   if (!aff_combination_const_p (aff_inv))
4341     {
4342       parts.index = integer_one_node;
4343       /* Addressing mode "base + index".  */
4344       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4345       if (ratio != 1)
4346         {
4347           parts.step = wide_int_to_tree (type, ratio);
4348           /* Addressing mode "base + index << scale".  */
4349           ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4350           if (!ok_with_ratio_p)
4351             parts.step = NULL_TREE;
4352         }
4353       if (ok_with_ratio_p || ok_without_ratio_p)
4354         {
4355           if (maybe_ne (aff_inv->offset, 0))
4356             {
4357               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4358               /* Addressing mode "base + index [<< scale] + offset".  */
4359               if (!valid_mem_ref_p (mem_mode, as, &parts))
4360                 parts.offset = NULL_TREE;
4361               else
4362                 aff_inv->offset = 0;
4363             }
4364
4365           move_fixed_address_to_symbol (&parts, aff_inv);
4366           /* Base is fixed address and is moved to symbol part.  */
4367           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4368             parts.base = NULL_TREE;
4369
4370           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4371           if (parts.symbol != NULL_TREE
4372               && !valid_mem_ref_p (mem_mode, as, &parts))
4373             {
4374               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4375               parts.symbol = NULL_TREE;
4376               /* Reset SIMPLE_INV since symbol address needs to be computed
4377                  outside of address expression in this case.  */
4378               simple_inv = false;
4379               /* Symbol part is moved back to base part, it can't be NULL.  */
4380               parts.base = integer_one_node;
4381             }
4382         }
4383       else
4384         parts.index = NULL_TREE;
4385     }
4386   else
4387     {
4388       poly_int64 ainc_step;
4389       if (can_autoinc
4390           && ratio == 1
4391           && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4392         {
4393           poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4394
4395           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4396             ainc_offset += ainc_step;
4397           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4398                                         addr_mode, mem_mode, as, speed);
4399           if (!cost.infinite_cost_p ())
4400             {
4401               *can_autoinc = true;
4402               return cost;
4403             }
4404           cost = no_cost;
4405         }
4406       if (!aff_combination_zero_p (aff_inv))
4407         {
4408           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4409           /* Addressing mode "base + offset".  */
4410           if (!valid_mem_ref_p (mem_mode, as, &parts))
4411             parts.offset = NULL_TREE;
4412           else
4413             aff_inv->offset = 0;
4414         }
4415     }
4416
4417   if (simple_inv)
4418     simple_inv = (aff_inv == NULL
4419                   || aff_combination_const_p (aff_inv)
4420                   || aff_combination_singleton_var_p (aff_inv));
4421   if (!aff_combination_zero_p (aff_inv))
4422     comp_inv = aff_combination_to_tree (aff_inv);
4423   if (comp_inv != NULL_TREE)
4424     cost = force_var_cost (data, comp_inv, inv_vars);
4425   if (ratio != 1 && parts.step == NULL_TREE)
4426     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4427   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4428     var_cost += add_cost (speed, addr_mode);
4429
4430   if (comp_inv && inv_expr && !simple_inv)
4431     {
4432       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4433       /* Clear depends on.  */
4434       if (*inv_expr != NULL && inv_vars && *inv_vars)
4435         bitmap_clear (*inv_vars);
4436
4437       /* Cost of small invariant expression adjusted against loop niters
4438          is usually zero, which makes it difficult to be differentiated
4439          from candidate based on loop invariant variables.  Secondly, the
4440          generated invariant expression may not be hoisted out of loop by
4441          following pass.  We penalize the cost by rounding up in order to
4442          neutralize such effects.  */
4443       cost.cost = adjust_setup_cost (data, cost.cost, true);
4444       cost.scratch = cost.cost;
4445     }
4446
4447   cost += var_cost;
4448   addr = addr_for_mem_ref (&parts, as, false);
4449   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4450   cost += address_cost (addr, mem_mode, as, speed);
4451
4452   if (parts.symbol != NULL_TREE)
4453     cost.complexity += 1;
4454   /* Don't increase the complexity of adding a scaled index if it's
4455      the only kind of index that the target allows.  */
4456   if (parts.step != NULL_TREE && ok_without_ratio_p)
4457     cost.complexity += 1;
4458   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4459     cost.complexity += 1;
4460   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4461     cost.complexity += 1;
4462
4463   return cost;
4464 }
4465
4466 /* Scale (multiply) the computed COST (except scratch part that should be
4467    hoisted out a loop) by header->frequency / AT->frequency, which makes
4468    expected cost more accurate.  */
4469
4470 static comp_cost
4471 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4472 {
4473    int loop_freq = data->current_loop->header->count.to_frequency (cfun);
4474    int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
4475    if (loop_freq != 0)
4476      {
4477        gcc_assert (cost.scratch <= cost.cost);
4478        int scaled_cost
4479          = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4480
4481        if (dump_file && (dump_flags & TDF_DETAILS))
4482          fprintf (dump_file, "Scaling cost based on bb prob "
4483                   "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4484                   1.0f * bb_freq / loop_freq, cost.cost,
4485                   cost.scratch, scaled_cost, bb_freq, loop_freq);
4486
4487        cost.cost = scaled_cost;
4488      }
4489
4490   return cost;
4491 }
4492
4493 /* Determines the cost of the computation by that USE is expressed
4494    from induction variable CAND.  If ADDRESS_P is true, we just need
4495    to create an address from it, otherwise we want to get it into
4496    register.  A set of invariants we depend on is stored in INV_VARS.
4497    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4498    addressing is likely.  If INV_EXPR is nonnull, record invariant
4499    expr entry in it.  */
4500
4501 static comp_cost
4502 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4503                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4504                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4505 {
4506   gimple *at = use->stmt;
4507   tree ubase = use->iv->base, cbase = cand->iv->base;
4508   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4509   tree comp_inv = NULL_TREE;
4510   HOST_WIDE_INT ratio, aratio;
4511   comp_cost cost;
4512   widest_int rat;
4513   aff_tree aff_inv, aff_var;
4514   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4515
4516   if (inv_vars)
4517     *inv_vars = NULL;
4518   if (can_autoinc)
4519     *can_autoinc = false;
4520   if (inv_expr)
4521     *inv_expr = NULL;
4522
4523   /* Check if we have enough precision to express the values of use.  */
4524   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4525     return infinite_cost;
4526
4527   if (address_p
4528       || (use->iv->base_object
4529           && cand->iv->base_object
4530           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4531           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4532     {
4533       /* Do not try to express address of an object with computation based
4534          on address of a different object.  This may cause problems in rtl
4535          level alias analysis (that does not expect this to be happening,
4536          as this is illegal in C), and would be unlikely to be useful
4537          anyway.  */
4538       if (use->iv->base_object
4539           && cand->iv->base_object
4540           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4541         return infinite_cost;
4542     }
4543
4544   if (!get_computation_aff_1 (data->current_loop, at, use,
4545                               cand, &aff_inv, &aff_var, &rat)
4546       || !wi::fits_shwi_p (rat))
4547     return infinite_cost;
4548
4549   ratio = rat.to_shwi ();
4550   if (address_p)
4551     {
4552       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4553                                inv_vars, inv_expr, can_autoinc, speed);
4554       return get_scaled_computation_cost_at (data, at, cost);
4555     }
4556
4557   bool simple_inv = (aff_combination_const_p (&aff_inv)
4558                      || aff_combination_singleton_var_p (&aff_inv));
4559   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4560   aff_combination_convert (&aff_inv, signed_type);
4561   if (!aff_combination_zero_p (&aff_inv))
4562     comp_inv = aff_combination_to_tree (&aff_inv);
4563
4564   cost = force_var_cost (data, comp_inv, inv_vars);
4565   if (comp_inv && inv_expr && !simple_inv)
4566     {
4567       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4568       /* Clear depends on.  */
4569       if (*inv_expr != NULL && inv_vars && *inv_vars)
4570         bitmap_clear (*inv_vars);
4571
4572       cost.cost = adjust_setup_cost (data, cost.cost);
4573       /* Record setup cost in scratch field.  */
4574       cost.scratch = cost.cost;
4575     }
4576   /* Cost of constant integer can be covered when adding invariant part to
4577      variant part.  */
4578   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4579     cost = no_cost;
4580
4581   /* Need type narrowing to represent use with cand.  */
4582   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4583     {
4584       machine_mode outer_mode = TYPE_MODE (utype);
4585       machine_mode inner_mode = TYPE_MODE (ctype);
4586       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4587     }
4588
4589   /* Turn a + i * (-c) into a - i * c.  */
4590   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4591     aratio = -ratio;
4592   else
4593     aratio = ratio;
4594
4595   if (ratio != 1)
4596     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4597
4598   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4599      instruction.  */
4600   /* Need to add up the invariant and variant parts.  */
4601   if (comp_inv && !integer_zerop (comp_inv))
4602     cost += add_cost (speed, TYPE_MODE (utype));
4603
4604   return get_scaled_computation_cost_at (data, at, cost);
4605 }
4606
4607 /* Determines cost of computing the use in GROUP with CAND in a generic
4608    expression.  */
4609
4610 static bool
4611 determine_group_iv_cost_generic (struct ivopts_data *data,
4612                                  struct iv_group *group, struct iv_cand *cand)
4613 {
4614   comp_cost cost;
4615   iv_inv_expr_ent *inv_expr = NULL;
4616   bitmap inv_vars = NULL, inv_exprs = NULL;
4617   struct iv_use *use = group->vuses[0];
4618
4619   /* The simple case first -- if we need to express value of the preserved
4620      original biv, the cost is 0.  This also prevents us from counting the
4621      cost of increment twice -- once at this use and once in the cost of
4622      the candidate.  */
4623   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4624     cost = no_cost;
4625   else
4626     cost = get_computation_cost (data, use, cand, false,
4627                                  &inv_vars, NULL, &inv_expr);
4628
4629   if (inv_expr)
4630     {
4631       inv_exprs = BITMAP_ALLOC (NULL);
4632       bitmap_set_bit (inv_exprs, inv_expr->id);
4633     }
4634   set_group_iv_cost (data, group, cand, cost, inv_vars,
4635                      NULL_TREE, ERROR_MARK, inv_exprs);
4636   return !cost.infinite_cost_p ();
4637 }
4638
4639 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4640
4641 static bool
4642 determine_group_iv_cost_address (struct ivopts_data *data,
4643                                  struct iv_group *group, struct iv_cand *cand)
4644 {
4645   unsigned i;
4646   bitmap inv_vars = NULL, inv_exprs = NULL;
4647   bool can_autoinc;
4648   iv_inv_expr_ent *inv_expr = NULL;
4649   struct iv_use *use = group->vuses[0];
4650   comp_cost sum_cost = no_cost, cost;
4651
4652   cost = get_computation_cost (data, use, cand, true,
4653                                &inv_vars, &can_autoinc, &inv_expr);
4654
4655   if (inv_expr)
4656     {
4657       inv_exprs = BITMAP_ALLOC (NULL);
4658       bitmap_set_bit (inv_exprs, inv_expr->id);
4659     }
4660   sum_cost = cost;
4661   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4662     {
4663       if (can_autoinc)
4664         sum_cost -= cand->cost_step;
4665       /* If we generated the candidate solely for exploiting autoincrement
4666          opportunities, and it turns out it can't be used, set the cost to
4667          infinity to make sure we ignore it.  */
4668       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4669         sum_cost = infinite_cost;
4670     }
4671
4672   /* Uses in a group can share setup code, so only add setup cost once.  */
4673   cost -= cost.scratch;
4674   /* Compute and add costs for rest uses of this group.  */
4675   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4676     {
4677       struct iv_use *next = group->vuses[i];
4678
4679       /* TODO: We could skip computing cost for sub iv_use when it has the
4680          same cost as the first iv_use, but the cost really depends on the
4681          offset and where the iv_use is.  */
4682         cost = get_computation_cost (data, next, cand, true,
4683                                      NULL, &can_autoinc, &inv_expr);
4684         if (inv_expr)
4685           {
4686             if (!inv_exprs)
4687               inv_exprs = BITMAP_ALLOC (NULL);
4688
4689             bitmap_set_bit (inv_exprs, inv_expr->id);
4690           }
4691       sum_cost += cost;
4692     }
4693   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4694                      NULL_TREE, ERROR_MARK, inv_exprs);
4695
4696   return !sum_cost.infinite_cost_p ();
4697 }
4698
4699 /* Computes value of candidate CAND at position AT in iteration NITER, and
4700    stores it to VAL.  */
4701
4702 static void
4703 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4704                aff_tree *val)
4705 {
4706   aff_tree step, delta, nit;
4707   struct iv *iv = cand->iv;
4708   tree type = TREE_TYPE (iv->base);
4709   tree steptype;
4710   if (POINTER_TYPE_P (type))
4711     steptype = sizetype;
4712   else
4713     steptype = unsigned_type_for (type);
4714
4715   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4716   aff_combination_convert (&step, steptype);
4717   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4718   aff_combination_convert (&nit, steptype);
4719   aff_combination_mult (&nit, &step, &delta);
4720   if (stmt_after_increment (loop, cand, at))
4721     aff_combination_add (&delta, &step);
4722
4723   tree_to_aff_combination (iv->base, type, val);
4724   if (!POINTER_TYPE_P (type))
4725     aff_combination_convert (val, steptype);
4726   aff_combination_add (val, &delta);
4727 }
4728
4729 /* Returns period of induction variable iv.  */
4730
4731 static tree
4732 iv_period (struct iv *iv)
4733 {
4734   tree step = iv->step, period, type;
4735   tree pow2div;
4736
4737   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4738
4739   type = unsigned_type_for (TREE_TYPE (step));
4740   /* Period of the iv is lcm (step, type_range)/step -1,
4741      i.e., N*type_range/step - 1. Since type range is power
4742      of two, N == (step >> num_of_ending_zeros_binary (step),
4743      so the final result is
4744
4745        (type_range >> num_of_ending_zeros_binary (step)) - 1
4746
4747   */
4748   pow2div = num_ending_zeros (step);
4749
4750   period = build_low_bits_mask (type,
4751                                 (TYPE_PRECISION (type)
4752                                  - tree_to_uhwi (pow2div)));
4753
4754   return period;
4755 }
4756
4757 /* Returns the comparison operator used when eliminating the iv USE.  */
4758
4759 static enum tree_code
4760 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4761 {
4762   struct loop *loop = data->current_loop;
4763   basic_block ex_bb;
4764   edge exit;
4765
4766   ex_bb = gimple_bb (use->stmt);
4767   exit = EDGE_SUCC (ex_bb, 0);
4768   if (flow_bb_inside_loop_p (loop, exit->dest))
4769     exit = EDGE_SUCC (ex_bb, 1);
4770
4771   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4772 }
4773
4774 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4775    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4776    calculation is performed in non-wrapping type.
4777
4778    TODO: More generally, we could test for the situation that
4779          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4780          This would require knowing the sign of OFFSET.  */
4781
4782 static bool
4783 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4784 {
4785   enum tree_code code;
4786   tree e1, e2;
4787   aff_tree aff_e1, aff_e2, aff_offset;
4788
4789   if (!nowrap_type_p (TREE_TYPE (base)))
4790     return false;
4791
4792   base = expand_simple_operations (base);
4793
4794   if (TREE_CODE (base) == SSA_NAME)
4795     {
4796       gimple *stmt = SSA_NAME_DEF_STMT (base);
4797
4798       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4799         return false;
4800
4801       code = gimple_assign_rhs_code (stmt);
4802       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4803         return false;
4804
4805       e1 = gimple_assign_rhs1 (stmt);
4806       e2 = gimple_assign_rhs2 (stmt);
4807     }
4808   else
4809     {
4810       code = TREE_CODE (base);
4811       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4812         return false;
4813       e1 = TREE_OPERAND (base, 0);
4814       e2 = TREE_OPERAND (base, 1);
4815     }
4816
4817   /* Use affine expansion as deeper inspection to prove the equality.  */
4818   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4819                                   &aff_e2, &data->name_expansion_cache);
4820   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4821                                   &aff_offset, &data->name_expansion_cache);
4822   aff_combination_scale (&aff_offset, -1);
4823   switch (code)
4824     {
4825     case PLUS_EXPR:
4826       aff_combination_add (&aff_e2, &aff_offset);
4827       if (aff_combination_zero_p (&aff_e2))
4828         return true;
4829
4830       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4831                                       &aff_e1, &data->name_expansion_cache);
4832       aff_combination_add (&aff_e1, &aff_offset);
4833       return aff_combination_zero_p (&aff_e1);
4834
4835     case POINTER_PLUS_EXPR:
4836       aff_combination_add (&aff_e2, &aff_offset);
4837       return aff_combination_zero_p (&aff_e2);
4838
4839     default:
4840       return false;
4841     }
4842 }
4843
4844 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4845    comparison with CAND.  NITER describes the number of iterations of
4846    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4847
4848    We aim to handle the following situation:
4849
4850    sometype *base, *p;
4851    int a, b, i;
4852
4853    i = a;
4854    p = p_0 = base + a;
4855
4856    do
4857      {
4858        bla (*p);
4859        p++;
4860        i++;
4861      }
4862    while (i < b);
4863
4864    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4865    We aim to optimize this to
4866
4867    p = p_0 = base + a;
4868    do
4869      {
4870        bla (*p);
4871        p++;
4872      }
4873    while (p < p_0 - a + b);
4874
4875    This preserves the correctness, since the pointer arithmetics does not
4876    overflow.  More precisely:
4877
4878    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4879       overflow in computing it or the values of p.
4880    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4881       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4882
4883 static bool
4884 iv_elimination_compare_lt (struct ivopts_data *data,
4885                            struct iv_cand *cand, enum tree_code *comp_p,
4886                            struct tree_niter_desc *niter)
4887 {
4888   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4889   struct aff_tree nit, tmpa, tmpb;
4890   enum tree_code comp;
4891   HOST_WIDE_INT step;
4892
4893   /* We need to know that the candidate induction variable does not overflow.
4894      While more complex analysis may be used to prove this, for now just
4895      check that the variable appears in the original program and that it
4896      is computed in a type that guarantees no overflows.  */
4897   cand_type = TREE_TYPE (cand->iv->base);
4898   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4899     return false;
4900
4901   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4902      the calculation of the BOUND could overflow, making the comparison
4903      invalid.  */
4904   if (!data->loop_single_exit_p)
4905     return false;
4906
4907   /* We need to be able to decide whether candidate is increasing or decreasing
4908      in order to choose the right comparison operator.  */
4909   if (!cst_and_fits_in_hwi (cand->iv->step))
4910     return false;
4911   step = int_cst_value (cand->iv->step);
4912
4913   /* Check that the number of iterations matches the expected pattern:
4914      a + 1 > b ? 0 : b - a - 1.  */
4915   mbz = niter->may_be_zero;
4916   if (TREE_CODE (mbz) == GT_EXPR)
4917     {
4918       /* Handle a + 1 > b.  */
4919       tree op0 = TREE_OPERAND (mbz, 0);
4920       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4921         {
4922           a = TREE_OPERAND (op0, 0);
4923           b = TREE_OPERAND (mbz, 1);
4924         }
4925       else
4926         return false;
4927     }
4928   else if (TREE_CODE (mbz) == LT_EXPR)
4929     {
4930       tree op1 = TREE_OPERAND (mbz, 1);
4931
4932       /* Handle b < a + 1.  */
4933       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4934         {
4935           a = TREE_OPERAND (op1, 0);
4936           b = TREE_OPERAND (mbz, 0);
4937         }
4938       else
4939         return false;
4940     }
4941   else
4942     return false;
4943
4944   /* Expected number of iterations is B - A - 1.  Check that it matches
4945      the actual number, i.e., that B - A - NITER = 1.  */
4946   tree_to_aff_combination (niter->niter, nit_type, &nit);
4947   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4948   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4949   aff_combination_scale (&nit, -1);
4950   aff_combination_scale (&tmpa, -1);
4951   aff_combination_add (&tmpb, &tmpa);
4952   aff_combination_add (&tmpb, &nit);
4953   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
4954     return false;
4955
4956   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4957      overflow.  */
4958   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4959                         cand->iv->step,
4960                         fold_convert (TREE_TYPE (cand->iv->step), a));
4961   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4962     return false;
4963
4964   /* Determine the new comparison operator.  */
4965   comp = step < 0 ? GT_EXPR : LT_EXPR;
4966   if (*comp_p == NE_EXPR)
4967     *comp_p = comp;
4968   else if (*comp_p == EQ_EXPR)
4969     *comp_p = invert_tree_comparison (comp, false);
4970   else
4971     gcc_unreachable ();
4972
4973   return true;
4974 }
4975
4976 /* Check whether it is possible to express the condition in USE by comparison
4977    of candidate CAND.  If so, store the value compared with to BOUND, and the
4978    comparison operator to COMP.  */
4979
4980 static bool
4981 may_eliminate_iv (struct ivopts_data *data,
4982                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4983                   enum tree_code *comp)
4984 {
4985   basic_block ex_bb;
4986   edge exit;
4987   tree period;
4988   struct loop *loop = data->current_loop;
4989   aff_tree bnd;
4990   struct tree_niter_desc *desc = NULL;
4991
4992   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4993     return false;
4994
4995   /* For now works only for exits that dominate the loop latch.
4996      TODO: extend to other conditions inside loop body.  */
4997   ex_bb = gimple_bb (use->stmt);
4998   if (use->stmt != last_stmt (ex_bb)
4999       || gimple_code (use->stmt) != GIMPLE_COND
5000       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5001     return false;
5002
5003   exit = EDGE_SUCC (ex_bb, 0);
5004   if (flow_bb_inside_loop_p (loop, exit->dest))
5005     exit = EDGE_SUCC (ex_bb, 1);
5006   if (flow_bb_inside_loop_p (loop, exit->dest))
5007     return false;
5008
5009   desc = niter_for_exit (data, exit);
5010   if (!desc)
5011     return false;
5012
5013   /* Determine whether we can use the variable to test the exit condition.
5014      This is the case iff the period of the induction variable is greater
5015      than the number of iterations for which the exit condition is true.  */
5016   period = iv_period (cand->iv);
5017
5018   /* If the number of iterations is constant, compare against it directly.  */
5019   if (TREE_CODE (desc->niter) == INTEGER_CST)
5020     {
5021       /* See cand_value_at.  */
5022       if (stmt_after_increment (loop, cand, use->stmt))
5023         {
5024           if (!tree_int_cst_lt (desc->niter, period))
5025             return false;
5026         }
5027       else
5028         {
5029           if (tree_int_cst_lt (period, desc->niter))
5030             return false;
5031         }
5032     }
5033
5034   /* If not, and if this is the only possible exit of the loop, see whether
5035      we can get a conservative estimate on the number of iterations of the
5036      entire loop and compare against that instead.  */
5037   else
5038     {
5039       widest_int period_value, max_niter;
5040
5041       max_niter = desc->max;
5042       if (stmt_after_increment (loop, cand, use->stmt))
5043         max_niter += 1;
5044       period_value = wi::to_widest (period);
5045       if (wi::gtu_p (max_niter, period_value))
5046         {
5047           /* See if we can take advantage of inferred loop bound
5048              information.  */
5049           if (data->loop_single_exit_p)
5050             {
5051               if (!max_loop_iterations (loop, &max_niter))
5052                 return false;
5053               /* The loop bound is already adjusted by adding 1.  */
5054               if (wi::gtu_p (max_niter, period_value))
5055                 return false;
5056             }
5057           else
5058             return false;
5059         }
5060     }
5061
5062   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5063
5064   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5065                          aff_combination_to_tree (&bnd));
5066   *comp = iv_elimination_compare (data, use);
5067
5068   /* It is unlikely that computing the number of iterations using division
5069      would be more profitable than keeping the original induction variable.  */
5070   if (expression_expensive_p (*bound))
5071     return false;
5072
5073   /* Sometimes, it is possible to handle the situation that the number of
5074      iterations may be zero unless additional assumptions by using <
5075      instead of != in the exit condition.
5076
5077      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5078            base the exit condition on it.  However, that is often too
5079            expensive.  */
5080   if (!integer_zerop (desc->may_be_zero))
5081     return iv_elimination_compare_lt (data, cand, comp, desc);
5082
5083   return true;
5084 }
5085
5086  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5087     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5088
5089 static int
5090 parm_decl_cost (struct ivopts_data *data, tree bound)
5091 {
5092   tree sbound = bound;
5093   STRIP_NOPS (sbound);
5094
5095   if (TREE_CODE (sbound) == SSA_NAME
5096       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5097       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5098       && data->body_includes_call)
5099     return COSTS_N_INSNS (1);
5100
5101   return 0;
5102 }
5103
5104 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5105
5106 static bool
5107 determine_group_iv_cost_cond (struct ivopts_data *data,
5108                               struct iv_group *group, struct iv_cand *cand)
5109 {
5110   tree bound = NULL_TREE;
5111   struct iv *cmp_iv;
5112   bitmap inv_exprs = NULL;
5113   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5114   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5115   enum comp_iv_rewrite rewrite_type;
5116   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5117   tree *control_var, *bound_cst;
5118   enum tree_code comp = ERROR_MARK;
5119   struct iv_use *use = group->vuses[0];
5120
5121   /* Extract condition operands.  */
5122   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5123                                         &bound_cst, NULL, &cmp_iv);
5124   gcc_assert (rewrite_type != COMP_IV_NA);
5125
5126   /* Try iv elimination.  */
5127   if (rewrite_type == COMP_IV_ELIM
5128       && may_eliminate_iv (data, use, cand, &bound, &comp))
5129     {
5130       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5131       if (elim_cost.cost == 0)
5132         elim_cost.cost = parm_decl_cost (data, bound);
5133       else if (TREE_CODE (bound) == INTEGER_CST)
5134         elim_cost.cost = 0;
5135       /* If we replace a loop condition 'i < n' with 'p < base + n',
5136          inv_vars_elim will have 'base' and 'n' set, which implies that both
5137          'base' and 'n' will be live during the loop.    More likely,
5138          'base + n' will be loop invariant, resulting in only one live value
5139          during the loop.  So in that case we clear inv_vars_elim and set
5140          inv_expr_elim instead.  */
5141       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5142         {
5143           inv_expr_elim = get_loop_invariant_expr (data, bound);
5144           bitmap_clear (inv_vars_elim);
5145         }
5146       /* The bound is a loop invariant, so it will be only computed
5147          once.  */
5148       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5149     }
5150
5151   /* When the condition is a comparison of the candidate IV against
5152      zero, prefer this IV.
5153
5154      TODO: The constant that we're subtracting from the cost should
5155      be target-dependent.  This information should be added to the
5156      target costs for each backend.  */
5157   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5158       && integer_zerop (*bound_cst)
5159       && (operand_equal_p (*control_var, cand->var_after, 0)
5160           || operand_equal_p (*control_var, cand->var_before, 0)))
5161     elim_cost -= 1;
5162
5163   express_cost = get_computation_cost (data, use, cand, false,
5164                                        &inv_vars_express, NULL,
5165                                        &inv_expr_express);
5166   if (cmp_iv != NULL)
5167     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5168
5169   /* Count the cost of the original bound as well.  */
5170   bound_cost = force_var_cost (data, *bound_cst, NULL);
5171   if (bound_cost.cost == 0)
5172     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5173   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5174     bound_cost.cost = 0;
5175   express_cost += bound_cost;
5176
5177   /* Choose the better approach, preferring the eliminated IV. */
5178   if (elim_cost <= express_cost)
5179     {
5180       cost = elim_cost;
5181       inv_vars = inv_vars_elim;
5182       inv_vars_elim = NULL;
5183       inv_expr = inv_expr_elim;
5184     }
5185   else
5186     {
5187       cost = express_cost;
5188       inv_vars = inv_vars_express;
5189       inv_vars_express = NULL;
5190       bound = NULL_TREE;
5191       comp = ERROR_MARK;
5192       inv_expr = inv_expr_express;
5193     }
5194
5195   if (inv_expr)
5196     {
5197       inv_exprs = BITMAP_ALLOC (NULL);
5198       bitmap_set_bit (inv_exprs, inv_expr->id);
5199     }
5200   set_group_iv_cost (data, group, cand, cost,
5201                      inv_vars, bound, comp, inv_exprs);
5202
5203   if (inv_vars_elim)
5204     BITMAP_FREE (inv_vars_elim);
5205   if (inv_vars_express)
5206     BITMAP_FREE (inv_vars_express);
5207
5208   return !cost.infinite_cost_p ();
5209 }
5210
5211 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5212    if USE cannot be represented with CAND.  */
5213
5214 static bool
5215 determine_group_iv_cost (struct ivopts_data *data,
5216                          struct iv_group *group, struct iv_cand *cand)
5217 {
5218   switch (group->type)
5219     {
5220     case USE_NONLINEAR_EXPR:
5221       return determine_group_iv_cost_generic (data, group, cand);
5222
5223     case USE_ADDRESS:
5224       return determine_group_iv_cost_address (data, group, cand);
5225
5226     case USE_COMPARE:
5227       return determine_group_iv_cost_cond (data, group, cand);
5228
5229     default:
5230       gcc_unreachable ();
5231     }
5232 }
5233
5234 /* Return true if get_computation_cost indicates that autoincrement is
5235    a possibility for the pair of USE and CAND, false otherwise.  */
5236
5237 static bool
5238 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5239                            struct iv_cand *cand)
5240 {
5241   if (use->type != USE_ADDRESS)
5242     return false;
5243
5244   bool can_autoinc = false;
5245   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5246   return can_autoinc;
5247 }
5248
5249 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5250    use that allows autoincrement, and set their AINC_USE if possible.  */
5251
5252 static void
5253 set_autoinc_for_original_candidates (struct ivopts_data *data)
5254 {
5255   unsigned i, j;
5256
5257   for (i = 0; i < data->vcands.length (); i++)
5258     {
5259       struct iv_cand *cand = data->vcands[i];
5260       struct iv_use *closest_before = NULL;
5261       struct iv_use *closest_after = NULL;
5262       if (cand->pos != IP_ORIGINAL)
5263         continue;
5264
5265       for (j = 0; j < data->vgroups.length (); j++)
5266         {
5267           struct iv_group *group = data->vgroups[j];
5268           struct iv_use *use = group->vuses[0];
5269           unsigned uid = gimple_uid (use->stmt);
5270
5271           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5272             continue;
5273
5274           if (uid < gimple_uid (cand->incremented_at)
5275               && (closest_before == NULL
5276                   || uid > gimple_uid (closest_before->stmt)))
5277             closest_before = use;
5278
5279           if (uid > gimple_uid (cand->incremented_at)
5280               && (closest_after == NULL
5281                   || uid < gimple_uid (closest_after->stmt)))
5282             closest_after = use;
5283         }
5284
5285       if (closest_before != NULL
5286           && autoinc_possible_for_pair (data, closest_before, cand))
5287         cand->ainc_use = closest_before;
5288       else if (closest_after != NULL
5289                && autoinc_possible_for_pair (data, closest_after, cand))
5290         cand->ainc_use = closest_after;
5291     }
5292 }
5293
5294 /* Relate compare use with all candidates.  */
5295
5296 static void
5297 relate_compare_use_with_all_cands (struct ivopts_data *data)
5298 {
5299   unsigned i, count = data->vcands.length ();
5300   for (i = 0; i < data->vgroups.length (); i++)
5301     {
5302       struct iv_group *group = data->vgroups[i];
5303
5304       if (group->type == USE_COMPARE)
5305         bitmap_set_range (group->related_cands, 0, count);
5306     }
5307 }
5308
5309 /* Finds the candidates for the induction variables.  */
5310
5311 static void
5312 find_iv_candidates (struct ivopts_data *data)
5313 {
5314   /* Add commonly used ivs.  */
5315   add_standard_iv_candidates (data);
5316
5317   /* Add old induction variables.  */
5318   add_iv_candidate_for_bivs (data);
5319
5320   /* Add induction variables derived from uses.  */
5321   add_iv_candidate_for_groups (data);
5322
5323   set_autoinc_for_original_candidates (data);
5324
5325   /* Record the important candidates.  */
5326   record_important_candidates (data);
5327
5328   /* Relate compare iv_use with all candidates.  */
5329   if (!data->consider_all_candidates)
5330     relate_compare_use_with_all_cands (data);
5331
5332   if (dump_file && (dump_flags & TDF_DETAILS))
5333     {
5334       unsigned i;
5335
5336       fprintf (dump_file, "\n<Important Candidates>:\t");
5337       for (i = 0; i < data->vcands.length (); i++)
5338         if (data->vcands[i]->important)
5339           fprintf (dump_file, " %d,", data->vcands[i]->id);
5340       fprintf (dump_file, "\n");
5341
5342       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5343       for (i = 0; i < data->vgroups.length (); i++)
5344         {
5345           struct iv_group *group = data->vgroups[i];
5346
5347           if (group->related_cands)
5348             {
5349               fprintf (dump_file, "  Group %d:\t", group->id);
5350               dump_bitmap (dump_file, group->related_cands);
5351             }
5352         }
5353       fprintf (dump_file, "\n");
5354     }
5355 }
5356
5357 /* Determines costs of computing use of iv with an iv candidate.  */
5358
5359 static void
5360 determine_group_iv_costs (struct ivopts_data *data)
5361 {
5362   unsigned i, j;
5363   struct iv_cand *cand;
5364   struct iv_group *group;
5365   bitmap to_clear = BITMAP_ALLOC (NULL);
5366
5367   alloc_use_cost_map (data);
5368
5369   for (i = 0; i < data->vgroups.length (); i++)
5370     {
5371       group = data->vgroups[i];
5372
5373       if (data->consider_all_candidates)
5374         {
5375           for (j = 0; j < data->vcands.length (); j++)
5376             {
5377               cand = data->vcands[j];
5378               determine_group_iv_cost (data, group, cand);
5379             }
5380         }
5381       else
5382         {
5383           bitmap_iterator bi;
5384
5385           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5386             {
5387               cand = data->vcands[j];
5388               if (!determine_group_iv_cost (data, group, cand))
5389                 bitmap_set_bit (to_clear, j);
5390             }
5391
5392           /* Remove the candidates for that the cost is infinite from
5393              the list of related candidates.  */
5394           bitmap_and_compl_into (group->related_cands, to_clear);
5395           bitmap_clear (to_clear);
5396         }
5397     }
5398
5399   BITMAP_FREE (to_clear);
5400
5401   if (dump_file && (dump_flags & TDF_DETAILS))
5402     {
5403       bitmap_iterator bi;
5404
5405       /* Dump invariant variables.  */
5406       fprintf (dump_file, "\n<Invariant Vars>:\n");
5407       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5408         {
5409           struct version_info *info = ver_info (data, i);
5410           if (info->inv_id)
5411             {
5412               fprintf (dump_file, "Inv %d:\t", info->inv_id);
5413               print_generic_expr (dump_file, info->name, TDF_SLIM);
5414               fprintf (dump_file, "%s\n",
5415                        info->has_nonlin_use ? "" : "\t(eliminable)");
5416             }
5417         }
5418
5419       /* Dump invariant expressions.  */
5420       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5421       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5422
5423       for (hash_table<iv_inv_expr_hasher>::iterator it
5424            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5425            ++it)
5426         list.safe_push (*it);
5427
5428       list.qsort (sort_iv_inv_expr_ent);
5429
5430       for (i = 0; i < list.length (); ++i)
5431         {
5432           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5433           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5434           fprintf (dump_file, "\n");
5435         }
5436
5437       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5438
5439       for (i = 0; i < data->vgroups.length (); i++)
5440         {
5441           group = data->vgroups[i];
5442
5443           fprintf (dump_file, "Group %d:\n", i);
5444           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5445           for (j = 0; j < group->n_map_members; j++)
5446             {
5447               if (!group->cost_map[j].cand
5448                   || group->cost_map[j].cost.infinite_cost_p ())
5449                 continue;
5450
5451               fprintf (dump_file, "  %d\t%d\t%d\t",
5452                        group->cost_map[j].cand->id,
5453                        group->cost_map[j].cost.cost,
5454                        group->cost_map[j].cost.complexity);
5455               if (!group->cost_map[j].inv_exprs
5456                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5457                 fprintf (dump_file, "NIL;\t");
5458               else
5459                 bitmap_print (dump_file,
5460                               group->cost_map[j].inv_exprs, "", ";\t");
5461               if (!group->cost_map[j].inv_vars
5462                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5463                 fprintf (dump_file, "NIL;\n");
5464               else
5465                 bitmap_print (dump_file,
5466                               group->cost_map[j].inv_vars, "", "\n");
5467             }
5468
5469           fprintf (dump_file, "\n");
5470         }
5471       fprintf (dump_file, "\n");
5472     }
5473 }
5474
5475 /* Determines cost of the candidate CAND.  */
5476
5477 static void
5478 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5479 {
5480   comp_cost cost_base;
5481   unsigned cost, cost_step;
5482   tree base;
5483
5484   gcc_assert (cand->iv != NULL);
5485
5486   /* There are two costs associated with the candidate -- its increment
5487      and its initialization.  The second is almost negligible for any loop
5488      that rolls enough, so we take it just very little into account.  */
5489
5490   base = cand->iv->base;
5491   cost_base = force_var_cost (data, base, NULL);
5492   /* It will be exceptional that the iv register happens to be initialized with
5493      the proper value at no cost.  In general, there will at least be a regcopy
5494      or a const set.  */
5495   if (cost_base.cost == 0)
5496     cost_base.cost = COSTS_N_INSNS (1);
5497   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5498
5499   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5500
5501   /* Prefer the original ivs unless we may gain something by replacing it.
5502      The reason is to make debugging simpler; so this is not relevant for
5503      artificial ivs created by other optimization passes.  */
5504   if (cand->pos != IP_ORIGINAL
5505       || !SSA_NAME_VAR (cand->var_before)
5506       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5507     cost++;
5508
5509   /* Prefer not to insert statements into latch unless there are some
5510      already (so that we do not create unnecessary jumps).  */
5511   if (cand->pos == IP_END
5512       && empty_block_p (ip_end_pos (data->current_loop)))
5513     cost++;
5514
5515   cand->cost = cost;
5516   cand->cost_step = cost_step;
5517 }
5518
5519 /* Determines costs of computation of the candidates.  */
5520
5521 static void
5522 determine_iv_costs (struct ivopts_data *data)
5523 {
5524   unsigned i;
5525
5526   if (dump_file && (dump_flags & TDF_DETAILS))
5527     {
5528       fprintf (dump_file, "<Candidate Costs>:\n");
5529       fprintf (dump_file, "  cand\tcost\n");
5530     }
5531
5532   for (i = 0; i < data->vcands.length (); i++)
5533     {
5534       struct iv_cand *cand = data->vcands[i];
5535
5536       determine_iv_cost (data, cand);
5537
5538       if (dump_file && (dump_flags & TDF_DETAILS))
5539         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5540     }
5541
5542   if (dump_file && (dump_flags & TDF_DETAILS))
5543     fprintf (dump_file, "\n");
5544 }
5545
5546 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5547    induction variables.  Note N_INVS includes both invariant variables and
5548    invariant expressions.  */
5549
5550 static unsigned
5551 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5552                               unsigned n_cands)
5553 {
5554   unsigned cost;
5555   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5556   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5557   bool speed = data->speed;
5558
5559   /* If there is a call in the loop body, the call-clobbered registers
5560      are not available for loop invariants.  */
5561   if (data->body_includes_call)
5562     available_regs = available_regs - target_clobbered_regs;
5563
5564   /* If we have enough registers.  */
5565   if (regs_needed + target_res_regs < available_regs)
5566     cost = n_new;
5567   /* If close to running out of registers, try to preserve them.  */
5568   else if (regs_needed <= available_regs)
5569     cost = target_reg_cost [speed] * regs_needed;
5570   /* If we run out of available registers but the number of candidates
5571      does not, we penalize extra registers using target_spill_cost.  */
5572   else if (n_cands <= available_regs)
5573     cost = target_reg_cost [speed] * available_regs
5574            + target_spill_cost [speed] * (regs_needed - available_regs);
5575   /* If the number of candidates runs out available registers, we penalize
5576      extra candidate registers using target_spill_cost * 2.  Because it is
5577      more expensive to spill induction variable than invariant.  */
5578   else
5579     cost = target_reg_cost [speed] * available_regs
5580            + target_spill_cost [speed] * (n_cands - available_regs) * 2
5581            + target_spill_cost [speed] * (regs_needed - n_cands);
5582
5583   /* Finally, add the number of candidates, so that we prefer eliminating
5584      induction variables if possible.  */
5585   return cost + n_cands;
5586 }
5587
5588 /* For each size of the induction variable set determine the penalty.  */
5589
5590 static void
5591 determine_set_costs (struct ivopts_data *data)
5592 {
5593   unsigned j, n;
5594   gphi *phi;
5595   gphi_iterator psi;
5596   tree op;
5597   struct loop *loop = data->current_loop;
5598   bitmap_iterator bi;
5599
5600   if (dump_file && (dump_flags & TDF_DETAILS))
5601     {
5602       fprintf (dump_file, "<Global Costs>:\n");
5603       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5604       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5605       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5606       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5607     }
5608
5609   n = 0;
5610   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5611     {
5612       phi = psi.phi ();
5613       op = PHI_RESULT (phi);
5614
5615       if (virtual_operand_p (op))
5616         continue;
5617
5618       if (get_iv (data, op))
5619         continue;
5620
5621       if (!POINTER_TYPE_P (TREE_TYPE (op))
5622           && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5623         continue;
5624
5625       n++;
5626     }
5627
5628   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5629     {
5630       struct version_info *info = ver_info (data, j);
5631
5632       if (info->inv_id && info->has_nonlin_use)
5633         n++;
5634     }
5635
5636   data->regs_used = n;
5637   if (dump_file && (dump_flags & TDF_DETAILS))
5638     fprintf (dump_file, "  regs_used %d\n", n);
5639
5640   if (dump_file && (dump_flags & TDF_DETAILS))
5641     {
5642       fprintf (dump_file, "  cost for size:\n");
5643       fprintf (dump_file, "  ivs\tcost\n");
5644       for (j = 0; j <= 2 * target_avail_regs; j++)
5645         fprintf (dump_file, "  %d\t%d\n", j,
5646                  ivopts_estimate_reg_pressure (data, 0, j));
5647       fprintf (dump_file, "\n");
5648     }
5649 }
5650
5651 /* Returns true if A is a cheaper cost pair than B.  */
5652
5653 static bool
5654 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5655 {
5656   if (!a)
5657     return false;
5658
5659   if (!b)
5660     return true;
5661
5662   if (a->cost < b->cost)
5663     return true;
5664
5665   if (b->cost < a->cost)
5666     return false;
5667
5668   /* In case the costs are the same, prefer the cheaper candidate.  */
5669   if (a->cand->cost < b->cand->cost)
5670     return true;
5671
5672   return false;
5673 }
5674
5675 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
5676    for more expensive, equal and cheaper respectively.  */
5677
5678 static int
5679 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5680 {
5681   if (cheaper_cost_pair (a, b))
5682     return -1;
5683   if (cheaper_cost_pair (b, a))
5684     return 1;
5685
5686   return 0;
5687 }
5688
5689 /* Returns candidate by that USE is expressed in IVS.  */
5690
5691 static struct cost_pair *
5692 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5693 {
5694   return ivs->cand_for_group[group->id];
5695 }
5696
5697 /* Computes the cost field of IVS structure.  */
5698
5699 static void
5700 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5701 {
5702   comp_cost cost = ivs->cand_use_cost;
5703
5704   cost += ivs->cand_cost;
5705   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5706   ivs->cost = cost;
5707 }
5708
5709 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5710    and IVS.  */
5711
5712 static void
5713 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5714 {
5715   bitmap_iterator bi;
5716   unsigned iid;
5717
5718   if (!invs)
5719     return;
5720
5721   gcc_assert (n_inv_uses != NULL);
5722   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5723     {
5724       n_inv_uses[iid]--;
5725       if (n_inv_uses[iid] == 0)
5726         ivs->n_invs--;
5727     }
5728 }
5729
5730 /* Set USE not to be expressed by any candidate in IVS.  */
5731
5732 static void
5733 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5734                  struct iv_group *group)
5735 {
5736   unsigned gid = group->id, cid;
5737   struct cost_pair *cp;
5738
5739   cp = ivs->cand_for_group[gid];
5740   if (!cp)
5741     return;
5742   cid = cp->cand->id;
5743
5744   ivs->bad_groups++;
5745   ivs->cand_for_group[gid] = NULL;
5746   ivs->n_cand_uses[cid]--;
5747
5748   if (ivs->n_cand_uses[cid] == 0)
5749     {
5750       bitmap_clear_bit (ivs->cands, cid);
5751       ivs->n_cands--;
5752       ivs->cand_cost -= cp->cand->cost;
5753       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5754       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5755     }
5756
5757   ivs->cand_use_cost -= cp->cost;
5758   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5759   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5760   iv_ca_recount_cost (data, ivs);
5761 }
5762
5763 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5764    IVS.  */
5765
5766 static void
5767 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5768 {
5769   bitmap_iterator bi;
5770   unsigned iid;
5771
5772   if (!invs)
5773     return;
5774
5775   gcc_assert (n_inv_uses != NULL);
5776   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5777     {
5778       n_inv_uses[iid]++;
5779       if (n_inv_uses[iid] == 1)
5780         ivs->n_invs++;
5781     }
5782 }
5783
5784 /* Set cost pair for GROUP in set IVS to CP.  */
5785
5786 static void
5787 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5788               struct iv_group *group, struct cost_pair *cp)
5789 {
5790   unsigned gid = group->id, cid;
5791
5792   if (ivs->cand_for_group[gid] == cp)
5793     return;
5794
5795   if (ivs->cand_for_group[gid])
5796     iv_ca_set_no_cp (data, ivs, group);
5797
5798   if (cp)
5799     {
5800       cid = cp->cand->id;
5801
5802       ivs->bad_groups--;
5803       ivs->cand_for_group[gid] = cp;
5804       ivs->n_cand_uses[cid]++;
5805       if (ivs->n_cand_uses[cid] == 1)
5806         {
5807           bitmap_set_bit (ivs->cands, cid);
5808           ivs->n_cands++;
5809           ivs->cand_cost += cp->cand->cost;
5810           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5811           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5812         }
5813
5814       ivs->cand_use_cost += cp->cost;
5815       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5816       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5817       iv_ca_recount_cost (data, ivs);
5818     }
5819 }
5820
5821 /* Extend set IVS by expressing USE by some of the candidates in it
5822    if possible.  Consider all important candidates if candidates in
5823    set IVS don't give any result.  */
5824
5825 static void
5826 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5827                struct iv_group *group)
5828 {
5829   struct cost_pair *best_cp = NULL, *cp;
5830   bitmap_iterator bi;
5831   unsigned i;
5832   struct iv_cand *cand;
5833
5834   gcc_assert (ivs->upto >= group->id);
5835   ivs->upto++;
5836   ivs->bad_groups++;
5837
5838   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5839     {
5840       cand = data->vcands[i];
5841       cp = get_group_iv_cost (data, group, cand);
5842       if (cheaper_cost_pair (cp, best_cp))
5843         best_cp = cp;
5844     }
5845
5846   if (best_cp == NULL)
5847     {
5848       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5849         {
5850           cand = data->vcands[i];
5851           cp = get_group_iv_cost (data, group, cand);
5852           if (cheaper_cost_pair (cp, best_cp))
5853             best_cp = cp;
5854         }
5855     }
5856
5857   iv_ca_set_cp (data, ivs, group, best_cp);
5858 }
5859
5860 /* Get cost for assignment IVS.  */
5861
5862 static comp_cost
5863 iv_ca_cost (struct iv_ca *ivs)
5864 {
5865   /* This was a conditional expression but it triggered a bug in
5866      Sun C 5.5.  */
5867   if (ivs->bad_groups)
5868     return infinite_cost;
5869   else
5870     return ivs->cost;
5871 }
5872
5873 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5874    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
5875    respectively.  */
5876
5877 static int
5878 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5879                     struct iv_group *group, struct cost_pair *old_cp,
5880                     struct cost_pair *new_cp)
5881 {
5882   gcc_assert (old_cp && new_cp && old_cp != new_cp);
5883   unsigned old_n_invs = ivs->n_invs;
5884   iv_ca_set_cp (data, ivs, group, new_cp);
5885   unsigned new_n_invs = ivs->n_invs;
5886   iv_ca_set_cp (data, ivs, group, old_cp);
5887
5888   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5889 }
5890
5891 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5892    it before NEXT.  */
5893
5894 static struct iv_ca_delta *
5895 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5896                  struct cost_pair *new_cp, struct iv_ca_delta *next)
5897 {
5898   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5899
5900   change->group = group;
5901   change->old_cp = old_cp;
5902   change->new_cp = new_cp;
5903   change->next = next;
5904
5905   return change;
5906 }
5907
5908 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5909    are rewritten.  */
5910
5911 static struct iv_ca_delta *
5912 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5913 {
5914   struct iv_ca_delta *last;
5915
5916   if (!l2)
5917     return l1;
5918
5919   if (!l1)
5920     return l2;
5921
5922   for (last = l1; last->next; last = last->next)
5923     continue;
5924   last->next = l2;
5925
5926   return l1;
5927 }
5928
5929 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5930
5931 static struct iv_ca_delta *
5932 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5933 {
5934   struct iv_ca_delta *act, *next, *prev = NULL;
5935
5936   for (act = delta; act; act = next)
5937     {
5938       next = act->next;
5939       act->next = prev;
5940       prev = act;
5941
5942       std::swap (act->old_cp, act->new_cp);
5943     }
5944
5945   return prev;
5946 }
5947
5948 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5949    reverted instead.  */
5950
5951 static void
5952 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5953                     struct iv_ca_delta *delta, bool forward)
5954 {
5955   struct cost_pair *from, *to;
5956   struct iv_ca_delta *act;
5957
5958   if (!forward)
5959     delta = iv_ca_delta_reverse (delta);
5960
5961   for (act = delta; act; act = act->next)
5962     {
5963       from = act->old_cp;
5964       to = act->new_cp;
5965       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
5966       iv_ca_set_cp (data, ivs, act->group, to);
5967     }
5968
5969   if (!forward)
5970     iv_ca_delta_reverse (delta);
5971 }
5972
5973 /* Returns true if CAND is used in IVS.  */
5974
5975 static bool
5976 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5977 {
5978   return ivs->n_cand_uses[cand->id] > 0;
5979 }
5980
5981 /* Returns number of induction variable candidates in the set IVS.  */
5982
5983 static unsigned
5984 iv_ca_n_cands (struct iv_ca *ivs)
5985 {
5986   return ivs->n_cands;
5987 }
5988
5989 /* Free the list of changes DELTA.  */
5990
5991 static void
5992 iv_ca_delta_free (struct iv_ca_delta **delta)
5993 {
5994   struct iv_ca_delta *act, *next;
5995
5996   for (act = *delta; act; act = next)
5997     {
5998       next = act->next;
5999       free (act);
6000     }
6001
6002   *delta = NULL;
6003 }
6004
6005 /* Allocates new iv candidates assignment.  */
6006
6007 static struct iv_ca *
6008 iv_ca_new (struct ivopts_data *data)
6009 {
6010   struct iv_ca *nw = XNEW (struct iv_ca);
6011
6012   nw->upto = 0;
6013   nw->bad_groups = 0;
6014   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6015                                  data->vgroups.length ());
6016   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6017   nw->cands = BITMAP_ALLOC (NULL);
6018   nw->n_cands = 0;
6019   nw->n_invs = 0;
6020   nw->cand_use_cost = no_cost;
6021   nw->cand_cost = 0;
6022   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6023   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6024   nw->cost = no_cost;
6025
6026   return nw;
6027 }
6028
6029 /* Free memory occupied by the set IVS.  */
6030
6031 static void
6032 iv_ca_free (struct iv_ca **ivs)
6033 {
6034   free ((*ivs)->cand_for_group);
6035   free ((*ivs)->n_cand_uses);
6036   BITMAP_FREE ((*ivs)->cands);
6037   free ((*ivs)->n_inv_var_uses);
6038   free ((*ivs)->n_inv_expr_uses);
6039   free (*ivs);
6040   *ivs = NULL;
6041 }
6042
6043 /* Dumps IVS to FILE.  */
6044
6045 static void
6046 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6047 {
6048   unsigned i;
6049   comp_cost cost = iv_ca_cost (ivs);
6050
6051   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6052            cost.complexity);
6053   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6054            ivs->cand_cost, ivs->cand_use_cost.cost,
6055            ivs->cand_use_cost.complexity);
6056   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6057
6058   for (i = 0; i < ivs->upto; i++)
6059     {
6060       struct iv_group *group = data->vgroups[i];
6061       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6062       if (cp)
6063         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6064                  group->id, cp->cand->id, cp->cost.cost,
6065                  cp->cost.complexity);
6066       else
6067         fprintf (file, "   group:%d --> ??\n", group->id);
6068     }
6069
6070   const char *pref = "";
6071   fprintf (file, "  invariant variables: ");
6072   for (i = 1; i <= data->max_inv_var_id; i++)
6073     if (ivs->n_inv_var_uses[i])
6074       {
6075         fprintf (file, "%s%d", pref, i);
6076         pref = ", ";
6077       }
6078
6079   pref = "";
6080   fprintf (file, "\n  invariant expressions: ");
6081   for (i = 1; i <= data->max_inv_expr_id; i++)
6082     if (ivs->n_inv_expr_uses[i])
6083       {
6084         fprintf (file, "%s%d", pref, i);
6085         pref = ", ";
6086       }
6087
6088   fprintf (file, "\n\n");
6089 }
6090
6091 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6092    new set, and store differences in DELTA.  Number of induction variables
6093    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6094    the function will try to find a solution with mimimal iv candidates.  */
6095
6096 static comp_cost
6097 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6098               struct iv_cand *cand, struct iv_ca_delta **delta,
6099               unsigned *n_ivs, bool min_ncand)
6100 {
6101   unsigned i;
6102   comp_cost cost;
6103   struct iv_group *group;
6104   struct cost_pair *old_cp, *new_cp;
6105
6106   *delta = NULL;
6107   for (i = 0; i < ivs->upto; i++)
6108     {
6109       group = data->vgroups[i];
6110       old_cp = iv_ca_cand_for_group (ivs, group);
6111
6112       if (old_cp
6113           && old_cp->cand == cand)
6114         continue;
6115
6116       new_cp = get_group_iv_cost (data, group, cand);
6117       if (!new_cp)
6118         continue;
6119
6120       if (!min_ncand)
6121         {
6122           int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6123           /* Skip if new_cp depends on more invariants.  */
6124           if (cmp_invs > 0)
6125             continue;
6126
6127           int cmp_cost = compare_cost_pair (new_cp, old_cp);
6128           /* Skip if new_cp is not cheaper.  */
6129           if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6130             continue;
6131         }
6132
6133       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6134     }
6135
6136   iv_ca_delta_commit (data, ivs, *delta, true);
6137   cost = iv_ca_cost (ivs);
6138   if (n_ivs)
6139     *n_ivs = iv_ca_n_cands (ivs);
6140   iv_ca_delta_commit (data, ivs, *delta, false);
6141
6142   return cost;
6143 }
6144
6145 /* Try narrowing set IVS by removing CAND.  Return the cost of
6146    the new set and store the differences in DELTA.  START is
6147    the candidate with which we start narrowing.  */
6148
6149 static comp_cost
6150 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6151               struct iv_cand *cand, struct iv_cand *start,
6152               struct iv_ca_delta **delta)
6153 {
6154   unsigned i, ci;
6155   struct iv_group *group;
6156   struct cost_pair *old_cp, *new_cp, *cp;
6157   bitmap_iterator bi;
6158   struct iv_cand *cnd;
6159   comp_cost cost, best_cost, acost;
6160
6161   *delta = NULL;
6162   for (i = 0; i < data->vgroups.length (); i++)
6163     {
6164       group = data->vgroups[i];
6165
6166       old_cp = iv_ca_cand_for_group (ivs, group);
6167       if (old_cp->cand != cand)
6168         continue;
6169
6170       best_cost = iv_ca_cost (ivs);
6171       /* Start narrowing with START.  */
6172       new_cp = get_group_iv_cost (data, group, start);
6173
6174       if (data->consider_all_candidates)
6175         {
6176           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6177             {
6178               if (ci == cand->id || (start && ci == start->id))
6179                 continue;
6180
6181               cnd = data->vcands[ci];
6182
6183               cp = get_group_iv_cost (data, group, cnd);
6184               if (!cp)
6185                 continue;
6186
6187               iv_ca_set_cp (data, ivs, group, cp);
6188               acost = iv_ca_cost (ivs);
6189
6190               if (acost < best_cost)
6191                 {
6192                   best_cost = acost;
6193                   new_cp = cp;
6194                 }
6195             }
6196         }
6197       else
6198         {
6199           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6200             {
6201               if (ci == cand->id || (start && ci == start->id))
6202                 continue;
6203
6204               cnd = data->vcands[ci];
6205
6206               cp = get_group_iv_cost (data, group, cnd);
6207               if (!cp)
6208                 continue;
6209
6210               iv_ca_set_cp (data, ivs, group, cp);
6211               acost = iv_ca_cost (ivs);
6212
6213               if (acost < best_cost)
6214                 {
6215                   best_cost = acost;
6216                   new_cp = cp;
6217                 }
6218             }
6219         }
6220       /* Restore to old cp for use.  */
6221       iv_ca_set_cp (data, ivs, group, old_cp);
6222
6223       if (!new_cp)
6224         {
6225           iv_ca_delta_free (delta);
6226           return infinite_cost;
6227         }
6228
6229       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6230     }
6231
6232   iv_ca_delta_commit (data, ivs, *delta, true);
6233   cost = iv_ca_cost (ivs);
6234   iv_ca_delta_commit (data, ivs, *delta, false);
6235
6236   return cost;
6237 }
6238
6239 /* Try optimizing the set of candidates IVS by removing candidates different
6240    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6241    differences in DELTA.  */
6242
6243 static comp_cost
6244 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6245              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6246 {
6247   bitmap_iterator bi;
6248   struct iv_ca_delta *act_delta, *best_delta;
6249   unsigned i;
6250   comp_cost best_cost, acost;
6251   struct iv_cand *cand;
6252
6253   best_delta = NULL;
6254   best_cost = iv_ca_cost (ivs);
6255
6256   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6257     {
6258       cand = data->vcands[i];
6259
6260       if (cand == except_cand)
6261         continue;
6262
6263       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6264
6265       if (acost < best_cost)
6266         {
6267           best_cost = acost;
6268           iv_ca_delta_free (&best_delta);
6269           best_delta = act_delta;
6270         }
6271       else
6272         iv_ca_delta_free (&act_delta);
6273     }
6274
6275   if (!best_delta)
6276     {
6277       *delta = NULL;
6278       return best_cost;
6279     }
6280
6281   /* Recurse to possibly remove other unnecessary ivs.  */
6282   iv_ca_delta_commit (data, ivs, best_delta, true);
6283   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6284   iv_ca_delta_commit (data, ivs, best_delta, false);
6285   *delta = iv_ca_delta_join (best_delta, *delta);
6286   return best_cost;
6287 }
6288
6289 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6290    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6291    the corresponding cost_pair, otherwise just return BEST_CP.  */
6292
6293 static struct cost_pair*
6294 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6295                         unsigned int cand_idx, struct iv_cand *old_cand,
6296                         struct cost_pair *best_cp)
6297 {
6298   struct iv_cand *cand;
6299   struct cost_pair *cp;
6300
6301   gcc_assert (old_cand != NULL && best_cp != NULL);
6302   if (cand_idx == old_cand->id)
6303     return best_cp;
6304
6305   cand = data->vcands[cand_idx];
6306   cp = get_group_iv_cost (data, group, cand);
6307   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6308     return cp;
6309
6310   return best_cp;
6311 }
6312
6313 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6314    which are used by more than one iv uses.  For each of those candidates,
6315    this function tries to represent iv uses under that candidate using
6316    other ones with lower local cost, then tries to prune the new set.
6317    If the new set has lower cost, It returns the new cost after recording
6318    candidate replacement in list DELTA.  */
6319
6320 static comp_cost
6321 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6322                struct iv_ca_delta **delta)
6323 {
6324   bitmap_iterator bi, bj;
6325   unsigned int i, j, k;
6326   struct iv_cand *cand;
6327   comp_cost orig_cost, acost;
6328   struct iv_ca_delta *act_delta, *tmp_delta;
6329   struct cost_pair *old_cp, *best_cp = NULL;
6330
6331   *delta = NULL;
6332   orig_cost = iv_ca_cost (ivs);
6333
6334   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6335     {
6336       if (ivs->n_cand_uses[i] == 1
6337           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6338         continue;
6339
6340       cand = data->vcands[i];
6341
6342       act_delta = NULL;
6343       /*  Represent uses under current candidate using other ones with
6344           lower local cost.  */
6345       for (j = 0; j < ivs->upto; j++)
6346         {
6347           struct iv_group *group = data->vgroups[j];
6348           old_cp = iv_ca_cand_for_group (ivs, group);
6349
6350           if (old_cp->cand != cand)
6351             continue;
6352
6353           best_cp = old_cp;
6354           if (data->consider_all_candidates)
6355             for (k = 0; k < data->vcands.length (); k++)
6356               best_cp = cheaper_cost_with_cand (data, group, k,
6357                                                 old_cp->cand, best_cp);
6358           else
6359             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6360               best_cp = cheaper_cost_with_cand (data, group, k,
6361                                                 old_cp->cand, best_cp);
6362
6363           if (best_cp == old_cp)
6364             continue;
6365
6366           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6367         }
6368       /* No need for further prune.  */
6369       if (!act_delta)
6370         continue;
6371
6372       /* Prune the new candidate set.  */
6373       iv_ca_delta_commit (data, ivs, act_delta, true);
6374       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6375       iv_ca_delta_commit (data, ivs, act_delta, false);
6376       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6377
6378       if (acost < orig_cost)
6379         {
6380           *delta = act_delta;
6381           return acost;
6382         }
6383       else
6384         iv_ca_delta_free (&act_delta);
6385     }
6386
6387   return orig_cost;
6388 }
6389
6390 /* Tries to extend the sets IVS in the best possible way in order to
6391    express the GROUP.  If ORIGINALP is true, prefer candidates from
6392    the original set of IVs, otherwise favor important candidates not
6393    based on any memory object.  */
6394
6395 static bool
6396 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6397                   struct iv_group *group, bool originalp)
6398 {
6399   comp_cost best_cost, act_cost;
6400   unsigned i;
6401   bitmap_iterator bi;
6402   struct iv_cand *cand;
6403   struct iv_ca_delta *best_delta = NULL, *act_delta;
6404   struct cost_pair *cp;
6405
6406   iv_ca_add_group (data, ivs, group);
6407   best_cost = iv_ca_cost (ivs);
6408   cp = iv_ca_cand_for_group (ivs, group);
6409   if (cp)
6410     {
6411       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6412       iv_ca_set_no_cp (data, ivs, group);
6413     }
6414
6415   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6416      first try important candidates not based on any memory object.  Only if
6417      this fails, try the specific ones.  Rationale -- in loops with many
6418      variables the best choice often is to use just one generic biv.  If we
6419      added here many ivs specific to the uses, the optimization algorithm later
6420      would be likely to get stuck in a local minimum, thus causing us to create
6421      too many ivs.  The approach from few ivs to more seems more likely to be
6422      successful -- starting from few ivs, replacing an expensive use by a
6423      specific iv should always be a win.  */
6424   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6425     {
6426       cand = data->vcands[i];
6427
6428       if (originalp && cand->pos !=IP_ORIGINAL)
6429         continue;
6430
6431       if (!originalp && cand->iv->base_object != NULL_TREE)
6432         continue;
6433
6434       if (iv_ca_cand_used_p (ivs, cand))
6435         continue;
6436
6437       cp = get_group_iv_cost (data, group, cand);
6438       if (!cp)
6439         continue;
6440
6441       iv_ca_set_cp (data, ivs, group, cp);
6442       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6443                                true);
6444       iv_ca_set_no_cp (data, ivs, group);
6445       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6446
6447       if (act_cost < best_cost)
6448         {
6449           best_cost = act_cost;
6450
6451           iv_ca_delta_free (&best_delta);
6452           best_delta = act_delta;
6453         }
6454       else
6455         iv_ca_delta_free (&act_delta);
6456     }
6457
6458   if (best_cost.infinite_cost_p ())
6459     {
6460       for (i = 0; i < group->n_map_members; i++)
6461         {
6462           cp = group->cost_map + i;
6463           cand = cp->cand;
6464           if (!cand)
6465             continue;
6466
6467           /* Already tried this.  */
6468           if (cand->important)
6469             {
6470               if (originalp && cand->pos == IP_ORIGINAL)
6471                 continue;
6472               if (!originalp && cand->iv->base_object == NULL_TREE)
6473                 continue;
6474             }
6475
6476           if (iv_ca_cand_used_p (ivs, cand))
6477             continue;
6478
6479           act_delta = NULL;
6480           iv_ca_set_cp (data, ivs, group, cp);
6481           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6482           iv_ca_set_no_cp (data, ivs, group);
6483           act_delta = iv_ca_delta_add (group,
6484                                        iv_ca_cand_for_group (ivs, group),
6485                                        cp, act_delta);
6486
6487           if (act_cost < best_cost)
6488             {
6489               best_cost = act_cost;
6490
6491               if (best_delta)
6492                 iv_ca_delta_free (&best_delta);
6493               best_delta = act_delta;
6494             }
6495           else
6496             iv_ca_delta_free (&act_delta);
6497         }
6498     }
6499
6500   iv_ca_delta_commit (data, ivs, best_delta, true);
6501   iv_ca_delta_free (&best_delta);
6502
6503   return !best_cost.infinite_cost_p ();
6504 }
6505
6506 /* Finds an initial assignment of candidates to uses.  */
6507
6508 static struct iv_ca *
6509 get_initial_solution (struct ivopts_data *data, bool originalp)
6510 {
6511   unsigned i;
6512   struct iv_ca *ivs = iv_ca_new (data);
6513
6514   for (i = 0; i < data->vgroups.length (); i++)
6515     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6516       {
6517         iv_ca_free (&ivs);
6518         return NULL;
6519       }
6520
6521   return ivs;
6522 }
6523
6524 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6525    points to a bool variable, this function tries to break local
6526    optimal fixed-point by replacing candidates in IVS if it's true.  */
6527
6528 static bool
6529 try_improve_iv_set (struct ivopts_data *data,
6530                     struct iv_ca *ivs, bool *try_replace_p)
6531 {
6532   unsigned i, n_ivs;
6533   comp_cost acost, best_cost = iv_ca_cost (ivs);
6534   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6535   struct iv_cand *cand;
6536
6537   /* Try extending the set of induction variables by one.  */
6538   for (i = 0; i < data->vcands.length (); i++)
6539     {
6540       cand = data->vcands[i];
6541
6542       if (iv_ca_cand_used_p (ivs, cand))
6543         continue;
6544
6545       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6546       if (!act_delta)
6547         continue;
6548
6549       /* If we successfully added the candidate and the set is small enough,
6550          try optimizing it by removing other candidates.  */
6551       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6552         {
6553           iv_ca_delta_commit (data, ivs, act_delta, true);
6554           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6555           iv_ca_delta_commit (data, ivs, act_delta, false);
6556           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6557         }
6558
6559       if (acost < best_cost)
6560         {
6561           best_cost = acost;
6562           iv_ca_delta_free (&best_delta);
6563           best_delta = act_delta;
6564         }
6565       else
6566         iv_ca_delta_free (&act_delta);
6567     }
6568
6569   if (!best_delta)
6570     {
6571       /* Try removing the candidates from the set instead.  */
6572       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6573
6574       if (!best_delta && *try_replace_p)
6575         {
6576           *try_replace_p = false;
6577           /* So far candidate selecting algorithm tends to choose fewer IVs
6578              so that it can handle cases in which loops have many variables
6579              but the best choice is often to use only one general biv.  One
6580              weakness is it can't handle opposite cases, in which different
6581              candidates should be chosen with respect to each use.  To solve
6582              the problem, we replace candidates in a manner described by the
6583              comments of iv_ca_replace, thus give general algorithm a chance
6584              to break local optimal fixed-point in these cases.  */
6585           best_cost = iv_ca_replace (data, ivs, &best_delta);
6586         }
6587
6588       if (!best_delta)
6589         return false;
6590     }
6591
6592   iv_ca_delta_commit (data, ivs, best_delta, true);
6593   gcc_assert (best_cost == iv_ca_cost (ivs));
6594   iv_ca_delta_free (&best_delta);
6595   return true;
6596 }
6597
6598 /* Attempts to find the optimal set of induction variables.  We do simple
6599    greedy heuristic -- we try to replace at most one candidate in the selected
6600    solution and remove the unused ivs while this improves the cost.  */
6601
6602 static struct iv_ca *
6603 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6604 {
6605   struct iv_ca *set;
6606   bool try_replace_p = true;
6607
6608   /* Get the initial solution.  */
6609   set = get_initial_solution (data, originalp);
6610   if (!set)
6611     {
6612       if (dump_file && (dump_flags & TDF_DETAILS))
6613         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6614       return NULL;
6615     }
6616
6617   if (dump_file && (dump_flags & TDF_DETAILS))
6618     {
6619       fprintf (dump_file, "Initial set of candidates:\n");
6620       iv_ca_dump (data, dump_file, set);
6621     }
6622
6623   while (try_improve_iv_set (data, set, &try_replace_p))
6624     {
6625       if (dump_file && (dump_flags & TDF_DETAILS))
6626         {
6627           fprintf (dump_file, "Improved to:\n");
6628           iv_ca_dump (data, dump_file, set);
6629         }
6630     }
6631
6632   return set;
6633 }
6634
6635 static struct iv_ca *
6636 find_optimal_iv_set (struct ivopts_data *data)
6637 {
6638   unsigned i;
6639   comp_cost cost, origcost;
6640   struct iv_ca *set, *origset;
6641
6642   /* Determine the cost based on a strategy that starts with original IVs,
6643      and try again using a strategy that prefers candidates not based
6644      on any IVs.  */
6645   origset = find_optimal_iv_set_1 (data, true);
6646   set = find_optimal_iv_set_1 (data, false);
6647
6648   if (!origset && !set)
6649     return NULL;
6650
6651   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6652   cost = set ? iv_ca_cost (set) : infinite_cost;
6653
6654   if (dump_file && (dump_flags & TDF_DETAILS))
6655     {
6656       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6657                origcost.cost, origcost.complexity);
6658       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6659                cost.cost, cost.complexity);
6660     }
6661
6662   /* Choose the one with the best cost.  */
6663   if (origcost <= cost)
6664     {
6665       if (set)
6666         iv_ca_free (&set);
6667       set = origset;
6668     }
6669   else if (origset)
6670     iv_ca_free (&origset);
6671
6672   for (i = 0; i < data->vgroups.length (); i++)
6673     {
6674       struct iv_group *group = data->vgroups[i];
6675       group->selected = iv_ca_cand_for_group (set, group)->cand;
6676     }
6677
6678   return set;
6679 }
6680
6681 /* Creates a new induction variable corresponding to CAND.  */
6682
6683 static void
6684 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6685 {
6686   gimple_stmt_iterator incr_pos;
6687   tree base;
6688   struct iv_use *use;
6689   struct iv_group *group;
6690   bool after = false;
6691
6692   gcc_assert (cand->iv != NULL);
6693
6694   switch (cand->pos)
6695     {
6696     case IP_NORMAL:
6697       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6698       break;
6699
6700     case IP_END:
6701       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6702       after = true;
6703       break;
6704
6705     case IP_AFTER_USE:
6706       after = true;
6707       /* fall through */
6708     case IP_BEFORE_USE:
6709       incr_pos = gsi_for_stmt (cand->incremented_at);
6710       break;
6711
6712     case IP_ORIGINAL:
6713       /* Mark that the iv is preserved.  */
6714       name_info (data, cand->var_before)->preserve_biv = true;
6715       name_info (data, cand->var_after)->preserve_biv = true;
6716
6717       /* Rewrite the increment so that it uses var_before directly.  */
6718       use = find_interesting_uses_op (data, cand->var_after);
6719       group = data->vgroups[use->group_id];
6720       group->selected = cand;
6721       return;
6722     }
6723
6724   gimple_add_tmp_var (cand->var_before);
6725
6726   base = unshare_expr (cand->iv->base);
6727
6728   create_iv (base, unshare_expr (cand->iv->step),
6729              cand->var_before, data->current_loop,
6730              &incr_pos, after, &cand->var_before, &cand->var_after);
6731 }
6732
6733 /* Creates new induction variables described in SET.  */
6734
6735 static void
6736 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6737 {
6738   unsigned i;
6739   struct iv_cand *cand;
6740   bitmap_iterator bi;
6741
6742   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6743     {
6744       cand = data->vcands[i];
6745       create_new_iv (data, cand);
6746     }
6747
6748   if (dump_file && (dump_flags & TDF_DETAILS))
6749     {
6750       fprintf (dump_file, "Selected IV set for loop %d",
6751                data->current_loop->num);
6752       if (data->loop_loc != UNKNOWN_LOCATION)
6753         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6754                  LOCATION_LINE (data->loop_loc));
6755       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6756                avg_loop_niter (data->current_loop));
6757       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6758       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6759         {
6760           cand = data->vcands[i];
6761           dump_cand (dump_file, cand);
6762         }
6763       fprintf (dump_file, "\n");
6764     }
6765 }
6766
6767 /* Rewrites USE (definition of iv used in a nonlinear expression)
6768    using candidate CAND.  */
6769
6770 static void
6771 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6772                             struct iv_use *use, struct iv_cand *cand)
6773 {
6774   gassign *ass;
6775   gimple_stmt_iterator bsi;
6776   tree comp, type = get_use_type (use), tgt;
6777
6778   /* An important special case -- if we are asked to express value of
6779      the original iv by itself, just exit; there is no need to
6780      introduce a new computation (that might also need casting the
6781      variable to unsigned and back).  */
6782   if (cand->pos == IP_ORIGINAL
6783       && cand->incremented_at == use->stmt)
6784     {
6785       tree op = NULL_TREE;
6786       enum tree_code stmt_code;
6787
6788       gcc_assert (is_gimple_assign (use->stmt));
6789       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6790
6791       /* Check whether we may leave the computation unchanged.
6792          This is the case only if it does not rely on other
6793          computations in the loop -- otherwise, the computation
6794          we rely upon may be removed in remove_unused_ivs,
6795          thus leading to ICE.  */
6796       stmt_code = gimple_assign_rhs_code (use->stmt);
6797       if (stmt_code == PLUS_EXPR
6798           || stmt_code == MINUS_EXPR
6799           || stmt_code == POINTER_PLUS_EXPR)
6800         {
6801           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6802             op = gimple_assign_rhs2 (use->stmt);
6803           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6804             op = gimple_assign_rhs1 (use->stmt);
6805         }
6806
6807       if (op != NULL_TREE)
6808         {
6809           if (expr_invariant_in_loop_p (data->current_loop, op))
6810             return;
6811           if (TREE_CODE (op) == SSA_NAME)
6812             {
6813               struct iv *iv = get_iv (data, op);
6814               if (iv != NULL && integer_zerop (iv->step))
6815                 return;
6816             }
6817         }
6818     }
6819
6820   switch (gimple_code (use->stmt))
6821     {
6822     case GIMPLE_PHI:
6823       tgt = PHI_RESULT (use->stmt);
6824
6825       /* If we should keep the biv, do not replace it.  */
6826       if (name_info (data, tgt)->preserve_biv)
6827         return;
6828
6829       bsi = gsi_after_labels (gimple_bb (use->stmt));
6830       break;
6831
6832     case GIMPLE_ASSIGN:
6833       tgt = gimple_assign_lhs (use->stmt);
6834       bsi = gsi_for_stmt (use->stmt);
6835       break;
6836
6837     default:
6838       gcc_unreachable ();
6839     }
6840
6841   aff_tree aff_inv, aff_var;
6842   if (!get_computation_aff_1 (data->current_loop, use->stmt,
6843                               use, cand, &aff_inv, &aff_var))
6844     gcc_unreachable ();
6845
6846   unshare_aff_combination (&aff_inv);
6847   unshare_aff_combination (&aff_var);
6848   /* Prefer CSE opportunity than loop invariant by adding offset at last
6849      so that iv_uses have different offsets can be CSEed.  */
6850   poly_widest_int offset = aff_inv.offset;
6851   aff_inv.offset = 0;
6852
6853   gimple_seq stmt_list = NULL, seq = NULL;
6854   tree comp_op1 = aff_combination_to_tree (&aff_inv);
6855   tree comp_op2 = aff_combination_to_tree (&aff_var);
6856   gcc_assert (comp_op1 && comp_op2);
6857
6858   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6859   gimple_seq_add_seq (&stmt_list, seq);
6860   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6861   gimple_seq_add_seq (&stmt_list, seq);
6862
6863   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6864     std::swap (comp_op1, comp_op2);
6865
6866   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6867     {
6868       comp = fold_build_pointer_plus (comp_op1,
6869                                       fold_convert (sizetype, comp_op2));
6870       comp = fold_build_pointer_plus (comp,
6871                                       wide_int_to_tree (sizetype, offset));
6872     }
6873   else
6874     {
6875       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6876                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
6877       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6878                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6879     }
6880
6881   comp = fold_convert (type, comp);
6882   if (!valid_gimple_rhs_p (comp)
6883       || (gimple_code (use->stmt) != GIMPLE_PHI
6884           /* We can't allow re-allocating the stmt as it might be pointed
6885              to still.  */
6886           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6887               >= gimple_num_ops (gsi_stmt (bsi)))))
6888     {
6889       comp = force_gimple_operand (comp, &seq, true, NULL);
6890       gimple_seq_add_seq (&stmt_list, seq);
6891       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6892         {
6893           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6894           /* As this isn't a plain copy we have to reset alignment
6895              information.  */
6896           if (SSA_NAME_PTR_INFO (comp))
6897             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6898         }
6899     }
6900
6901   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
6902   if (gimple_code (use->stmt) == GIMPLE_PHI)
6903     {
6904       ass = gimple_build_assign (tgt, comp);
6905       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6906
6907       bsi = gsi_for_stmt (use->stmt);
6908       remove_phi_node (&bsi, false);
6909     }
6910   else
6911     {
6912       gimple_assign_set_rhs_from_tree (&bsi, comp);
6913       use->stmt = gsi_stmt (bsi);
6914     }
6915 }
6916
6917 /* Performs a peephole optimization to reorder the iv update statement with
6918    a mem ref to enable instruction combining in later phases. The mem ref uses
6919    the iv value before the update, so the reordering transformation requires
6920    adjustment of the offset. CAND is the selected IV_CAND.
6921
6922    Example:
6923
6924    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6925    iv2 = iv1 + 1;
6926
6927    if (t < val)      (1)
6928      goto L;
6929    goto Head;
6930
6931
6932    directly propagating t over to (1) will introduce overlapping live range
6933    thus increase register pressure. This peephole transform it into:
6934
6935
6936    iv2 = iv1 + 1;
6937    t = MEM_REF (base, iv2, 8, 8);
6938    if (t < val)
6939      goto L;
6940    goto Head;
6941 */
6942
6943 static void
6944 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6945 {
6946   tree var_after;
6947   gimple *iv_update, *stmt;
6948   basic_block bb;
6949   gimple_stmt_iterator gsi, gsi_iv;
6950
6951   if (cand->pos != IP_NORMAL)
6952     return;
6953
6954   var_after = cand->var_after;
6955   iv_update = SSA_NAME_DEF_STMT (var_after);
6956
6957   bb = gimple_bb (iv_update);
6958   gsi = gsi_last_nondebug_bb (bb);
6959   stmt = gsi_stmt (gsi);
6960
6961   /* Only handle conditional statement for now.  */
6962   if (gimple_code (stmt) != GIMPLE_COND)
6963     return;
6964
6965   gsi_prev_nondebug (&gsi);
6966   stmt = gsi_stmt (gsi);
6967   if (stmt != iv_update)
6968     return;
6969
6970   gsi_prev_nondebug (&gsi);
6971   if (gsi_end_p (gsi))
6972     return;
6973
6974   stmt = gsi_stmt (gsi);
6975   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6976     return;
6977
6978   if (stmt != use->stmt)
6979     return;
6980
6981   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6982     return;
6983
6984   if (dump_file && (dump_flags & TDF_DETAILS))
6985     {
6986       fprintf (dump_file, "Reordering \n");
6987       print_gimple_stmt (dump_file, iv_update, 0);
6988       print_gimple_stmt (dump_file, use->stmt, 0);
6989       fprintf (dump_file, "\n");
6990     }
6991
6992   gsi = gsi_for_stmt (use->stmt);
6993   gsi_iv = gsi_for_stmt (iv_update);
6994   gsi_move_before (&gsi_iv, &gsi);
6995
6996   cand->pos = IP_BEFORE_USE;
6997   cand->incremented_at = use->stmt;
6998 }
6999
7000 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7001
7002 static void
7003 rewrite_use_address (struct ivopts_data *data,
7004                      struct iv_use *use, struct iv_cand *cand)
7005 {
7006   aff_tree aff;
7007   bool ok;
7008
7009   adjust_iv_update_pos (cand, use);
7010   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7011   gcc_assert (ok);
7012   unshare_aff_combination (&aff);
7013
7014   /* To avoid undefined overflow problems, all IV candidates use unsigned
7015      integer types.  The drawback is that this makes it impossible for
7016      create_mem_ref to distinguish an IV that is based on a memory object
7017      from one that represents simply an offset.
7018
7019      To work around this problem, we pass a hint to create_mem_ref that
7020      indicates which variable (if any) in aff is an IV based on a memory
7021      object.  Note that we only consider the candidate.  If this is not
7022      based on an object, the base of the reference is in some subexpression
7023      of the use -- but these will use pointer types, so they are recognized
7024      by the create_mem_ref heuristics anyway.  */
7025   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7026   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7027   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7028   tree type = TREE_TYPE (*use->op_p);
7029   unsigned int align = get_object_alignment (*use->op_p);
7030   if (align != TYPE_ALIGN (type))
7031     type = build_aligned_type (type, align);
7032
7033   tree ref = create_mem_ref (&bsi, type, &aff,
7034                              reference_alias_ptr_type (*use->op_p),
7035                              iv, base_hint, data->speed);
7036
7037   copy_ref_info (ref, *use->op_p);
7038   *use->op_p = ref;
7039 }
7040
7041 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7042    candidate CAND.  */
7043
7044 static void
7045 rewrite_use_compare (struct ivopts_data *data,
7046                      struct iv_use *use, struct iv_cand *cand)
7047 {
7048   tree comp, op, bound;
7049   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7050   enum tree_code compare;
7051   struct iv_group *group = data->vgroups[use->group_id];
7052   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7053
7054   bound = cp->value;
7055   if (bound)
7056     {
7057       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7058       tree var_type = TREE_TYPE (var);
7059       gimple_seq stmts;
7060
7061       if (dump_file && (dump_flags & TDF_DETAILS))
7062         {
7063           fprintf (dump_file, "Replacing exit test: ");
7064           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7065         }
7066       compare = cp->comp;
7067       bound = unshare_expr (fold_convert (var_type, bound));
7068       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7069       if (stmts)
7070         gsi_insert_seq_on_edge_immediate (
7071                 loop_preheader_edge (data->current_loop),
7072                 stmts);
7073
7074       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7075       gimple_cond_set_lhs (cond_stmt, var);
7076       gimple_cond_set_code (cond_stmt, compare);
7077       gimple_cond_set_rhs (cond_stmt, op);
7078       return;
7079     }
7080
7081   /* The induction variable elimination failed; just express the original
7082      giv.  */
7083   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7084   gcc_assert (comp != NULL_TREE);
7085   gcc_assert (use->op_p != NULL);
7086   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7087                                          SSA_NAME_VAR (*use->op_p),
7088                                          true, GSI_SAME_STMT);
7089 }
7090
7091 /* Rewrite the groups using the selected induction variables.  */
7092
7093 static void
7094 rewrite_groups (struct ivopts_data *data)
7095 {
7096   unsigned i, j;
7097
7098   for (i = 0; i < data->vgroups.length (); i++)
7099     {
7100       struct iv_group *group = data->vgroups[i];
7101       struct iv_cand *cand = group->selected;
7102
7103       gcc_assert (cand);
7104
7105       if (group->type == USE_NONLINEAR_EXPR)
7106         {
7107           for (j = 0; j < group->vuses.length (); j++)
7108             {
7109               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7110               update_stmt (group->vuses[j]->stmt);
7111             }
7112         }
7113       else if (group->type == USE_ADDRESS)
7114         {
7115           for (j = 0; j < group->vuses.length (); j++)
7116             {
7117               rewrite_use_address (data, group->vuses[j], cand);
7118               update_stmt (group->vuses[j]->stmt);
7119             }
7120         }
7121       else
7122         {
7123           gcc_assert (group->type == USE_COMPARE);
7124
7125           for (j = 0; j < group->vuses.length (); j++)
7126             {
7127               rewrite_use_compare (data, group->vuses[j], cand);
7128               update_stmt (group->vuses[j]->stmt);
7129             }
7130         }
7131     }
7132 }
7133
7134 /* Removes the ivs that are not used after rewriting.  */
7135
7136 static void
7137 remove_unused_ivs (struct ivopts_data *data)
7138 {
7139   unsigned j;
7140   bitmap_iterator bi;
7141   bitmap toremove = BITMAP_ALLOC (NULL);
7142
7143   /* Figure out an order in which to release SSA DEFs so that we don't
7144      release something that we'd have to propagate into a debug stmt
7145      afterwards.  */
7146   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7147     {
7148       struct version_info *info;
7149
7150       info = ver_info (data, j);
7151       if (info->iv
7152           && !integer_zerop (info->iv->step)
7153           && !info->inv_id
7154           && !info->iv->nonlin_use
7155           && !info->preserve_biv)
7156         {
7157           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7158
7159           tree def = info->iv->ssa_name;
7160
7161           if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7162             {
7163               imm_use_iterator imm_iter;
7164               use_operand_p use_p;
7165               gimple *stmt;
7166               int count = 0;
7167
7168               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7169                 {
7170                   if (!gimple_debug_bind_p (stmt))
7171                     continue;
7172
7173                   /* We just want to determine whether to do nothing
7174                      (count == 0), to substitute the computed
7175                      expression into a single use of the SSA DEF by
7176                      itself (count == 1), or to use a debug temp
7177                      because the SSA DEF is used multiple times or as
7178                      part of a larger expression (count > 1). */
7179                   count++;
7180                   if (gimple_debug_bind_get_value (stmt) != def)
7181                     count++;
7182
7183                   if (count > 1)
7184                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7185                 }
7186
7187               if (!count)
7188                 continue;
7189
7190               struct iv_use dummy_use;
7191               struct iv_cand *best_cand = NULL, *cand;
7192               unsigned i, best_pref = 0, cand_pref;
7193
7194               memset (&dummy_use, 0, sizeof (dummy_use));
7195               dummy_use.iv = info->iv;
7196               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7197                 {
7198                   cand = data->vgroups[i]->selected;
7199                   if (cand == best_cand)
7200                     continue;
7201                   cand_pref = operand_equal_p (cand->iv->step,
7202                                                info->iv->step, 0)
7203                     ? 4 : 0;
7204                   cand_pref
7205                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7206                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7207                     ? 2 : 0;
7208                   cand_pref
7209                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7210                     ? 1 : 0;
7211                   if (best_cand == NULL || best_pref < cand_pref)
7212                     {
7213                       best_cand = cand;
7214                       best_pref = cand_pref;
7215                     }
7216                 }
7217
7218               if (!best_cand)
7219                 continue;
7220
7221               tree comp = get_computation_at (data->current_loop,
7222                                               SSA_NAME_DEF_STMT (def),
7223                                               &dummy_use, best_cand);
7224               if (!comp)
7225                 continue;
7226
7227               if (count > 1)
7228                 {
7229                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7230                   DECL_ARTIFICIAL (vexpr) = 1;
7231                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7232                   if (SSA_NAME_VAR (def))
7233                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7234                   else
7235                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7236                   gdebug *def_temp
7237                     = gimple_build_debug_bind (vexpr, comp, NULL);
7238                   gimple_stmt_iterator gsi;
7239
7240                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7241                     gsi = gsi_after_labels (gimple_bb
7242                                             (SSA_NAME_DEF_STMT (def)));
7243                   else
7244                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7245
7246                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7247                   comp = vexpr;
7248                 }
7249
7250               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7251                 {
7252                   if (!gimple_debug_bind_p (stmt))
7253                     continue;
7254
7255                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7256                     SET_USE (use_p, comp);
7257
7258                   update_stmt (stmt);
7259                 }
7260             }
7261         }
7262     }
7263
7264   release_defs_bitset (toremove);
7265
7266   BITMAP_FREE (toremove);
7267 }
7268
7269 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7270    for hash_map::traverse.  */
7271
7272 bool
7273 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7274 {
7275   free (value);
7276   return true;
7277 }
7278
7279 /* Frees data allocated by the optimization of a single loop.  */
7280
7281 static void
7282 free_loop_data (struct ivopts_data *data)
7283 {
7284   unsigned i, j;
7285   bitmap_iterator bi;
7286   tree obj;
7287
7288   if (data->niters)
7289     {
7290       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7291       delete data->niters;
7292       data->niters = NULL;
7293     }
7294
7295   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7296     {
7297       struct version_info *info;
7298
7299       info = ver_info (data, i);
7300       info->iv = NULL;
7301       info->has_nonlin_use = false;
7302       info->preserve_biv = false;
7303       info->inv_id = 0;
7304     }
7305   bitmap_clear (data->relevant);
7306   bitmap_clear (data->important_candidates);
7307
7308   for (i = 0; i < data->vgroups.length (); i++)
7309     {
7310       struct iv_group *group = data->vgroups[i];
7311
7312       for (j = 0; j < group->vuses.length (); j++)
7313         free (group->vuses[j]);
7314       group->vuses.release ();
7315
7316       BITMAP_FREE (group->related_cands);
7317       for (j = 0; j < group->n_map_members; j++)
7318         {
7319           if (group->cost_map[j].inv_vars)
7320             BITMAP_FREE (group->cost_map[j].inv_vars);
7321           if (group->cost_map[j].inv_exprs)
7322             BITMAP_FREE (group->cost_map[j].inv_exprs);
7323         }
7324
7325       free (group->cost_map);
7326       free (group);
7327     }
7328   data->vgroups.truncate (0);
7329
7330   for (i = 0; i < data->vcands.length (); i++)
7331     {
7332       struct iv_cand *cand = data->vcands[i];
7333
7334       if (cand->inv_vars)
7335         BITMAP_FREE (cand->inv_vars);
7336       if (cand->inv_exprs)
7337         BITMAP_FREE (cand->inv_exprs);
7338       free (cand);
7339     }
7340   data->vcands.truncate (0);
7341
7342   if (data->version_info_size < num_ssa_names)
7343     {
7344       data->version_info_size = 2 * num_ssa_names;
7345       free (data->version_info);
7346       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7347     }
7348
7349   data->max_inv_var_id = 0;
7350   data->max_inv_expr_id = 0;
7351
7352   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7353     SET_DECL_RTL (obj, NULL_RTX);
7354
7355   decl_rtl_to_reset.truncate (0);
7356
7357   data->inv_expr_tab->empty ();
7358
7359   data->iv_common_cand_tab->empty ();
7360   data->iv_common_cands.truncate (0);
7361 }
7362
7363 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7364    loop tree.  */
7365
7366 static void
7367 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7368 {
7369   free_loop_data (data);
7370   free (data->version_info);
7371   BITMAP_FREE (data->relevant);
7372   BITMAP_FREE (data->important_candidates);
7373
7374   decl_rtl_to_reset.release ();
7375   data->vgroups.release ();
7376   data->vcands.release ();
7377   delete data->inv_expr_tab;
7378   data->inv_expr_tab = NULL;
7379   free_affine_expand_cache (&data->name_expansion_cache);
7380   delete data->iv_common_cand_tab;
7381   data->iv_common_cand_tab = NULL;
7382   data->iv_common_cands.release ();
7383   obstack_free (&data->iv_obstack, NULL);
7384 }
7385
7386 /* Returns true if the loop body BODY includes any function calls.  */
7387
7388 static bool
7389 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7390 {
7391   gimple_stmt_iterator gsi;
7392   unsigned i;
7393
7394   for (i = 0; i < num_nodes; i++)
7395     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7396       {
7397         gimple *stmt = gsi_stmt (gsi);
7398         if (is_gimple_call (stmt)
7399             && !gimple_call_internal_p (stmt)
7400             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7401           return true;
7402       }
7403   return false;
7404 }
7405
7406 /* Optimizes the LOOP.  Returns true if anything changed.  */
7407
7408 static bool
7409 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7410 {
7411   bool changed = false;
7412   struct iv_ca *iv_ca;
7413   edge exit = single_dom_exit (loop);
7414   basic_block *body;
7415
7416   gcc_assert (!data->niters);
7417   data->current_loop = loop;
7418   data->loop_loc = find_loop_location (loop);
7419   data->speed = optimize_loop_for_speed_p (loop);
7420
7421   if (dump_file && (dump_flags & TDF_DETAILS))
7422     {
7423       fprintf (dump_file, "Processing loop %d", loop->num);
7424       if (data->loop_loc != UNKNOWN_LOCATION)
7425         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7426                  LOCATION_LINE (data->loop_loc));
7427       fprintf (dump_file, "\n");
7428
7429       if (exit)
7430         {
7431           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7432                    exit->src->index, exit->dest->index);
7433           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7434           fprintf (dump_file, "\n");
7435         }
7436
7437       fprintf (dump_file, "\n");
7438     }
7439
7440   body = get_loop_body (loop);
7441   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7442   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7443   free (body);
7444
7445   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7446
7447   /* For each ssa name determines whether it behaves as an induction variable
7448      in some loop.  */
7449   if (!find_induction_variables (data))
7450     goto finish;
7451
7452   /* Finds interesting uses (item 1).  */
7453   find_interesting_uses (data);
7454   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7455     goto finish;
7456
7457   /* Finds candidates for the induction variables (item 2).  */
7458   find_iv_candidates (data);
7459
7460   /* Calculates the costs (item 3, part 1).  */
7461   determine_iv_costs (data);
7462   determine_group_iv_costs (data);
7463   determine_set_costs (data);
7464
7465   /* Find the optimal set of induction variables (item 3, part 2).  */
7466   iv_ca = find_optimal_iv_set (data);
7467   if (!iv_ca)
7468     goto finish;
7469   changed = true;
7470
7471   /* Create the new induction variables (item 4, part 1).  */
7472   create_new_ivs (data, iv_ca);
7473   iv_ca_free (&iv_ca);
7474
7475   /* Rewrite the uses (item 4, part 2).  */
7476   rewrite_groups (data);
7477
7478   /* Remove the ivs that are unused after rewriting.  */
7479   remove_unused_ivs (data);
7480
7481   /* We have changed the structure of induction variables; it might happen
7482      that definitions in the scev database refer to some of them that were
7483      eliminated.  */
7484   scev_reset ();
7485
7486 finish:
7487   free_loop_data (data);
7488
7489   return changed;
7490 }
7491
7492 /* Main entry point.  Optimizes induction variables in loops.  */
7493
7494 void
7495 tree_ssa_iv_optimize (void)
7496 {
7497   struct loop *loop;
7498   struct ivopts_data data;
7499
7500   tree_ssa_iv_optimize_init (&data);
7501
7502   /* Optimize the loops starting with the innermost ones.  */
7503   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7504     {
7505       if (dump_file && (dump_flags & TDF_DETAILS))
7506         flow_loop_dump (loop, dump_file, NULL, 1);
7507
7508       tree_ssa_iv_optimize_loop (&data, loop);
7509     }
7510
7511   tree_ssa_iv_optimize_finalize (&data);
7512 }
7513
7514 #include "gt-tree-ssa-loop-ivopts.h"