gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "backend.h"
  68 #include "cfghooks.h"
  69 #include "tree.h"
  70 #include "gimple.h"
  71 #include "rtl.h"
  72 #include "ssa.h"
  73 #include "alias.h"
  74 #include "fold-const.h"
  75 #include "stor-layout.h"
  76 #include "tm_p.h"
  77 #include "gimple-pretty-print.h"
  78 #include "internal-fn.h"
  79 #include "tree-eh.h"
  80 #include "gimplify.h"
  81 #include "gimple-iterator.h"
  82 #include "gimplify-me.h"
  83 #include "cgraph.h"
  84 #include "tree-cfg.h"
  85 #include "tree-ssa-loop-ivopts.h"
  86 #include "tree-ssa-loop-manip.h"
  87 #include "tree-ssa-loop-niter.h"
  88 #include "tree-ssa-loop.h"
  89 #include "flags.h"
  90 #include "insn-config.h"
  91 #include "expmed.h"
  92 #include "dojump.h"
  93 #include "explow.h"
  94 #include "calls.h"
  95 #include "emit-rtl.h"
  96 #include "varasm.h"
  97 #include "stmt.h"
  98 #include "expr.h"
  99 #include "tree-dfa.h"
 100 #include "tree-ssa.h"
 101 #include "cfgloop.h"
 102 #include "tree-pass.h"
 103 #include "tree-chrec.h"
 104 #include "tree-scalar-evolution.h"
 105 #include "params.h"
 106 #include "langhooks.h"
 107 #include "tree-affine.h"
 108 #include "target.h"
 109 #include "tree-inline.h"
 110 #include "tree-ssa-propagate.h"
 111 #include "tree-ssa-address.h"
 112 #include "builtins.h"
 113 #include "tree-vectorizer.h"
 114
 115 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 116    cost of different addressing modes.  This should be moved to a TBD
 117    interface between the GIMPLE and RTL worlds.  */
 118 #include "recog.h"
 119
 120 /* The infinite cost.  */
 121 #define INFTY 10000000
 122
 123 #define AVG_LOOP_NITER(LOOP) 5
 124
 125 /* Returns the expected number of loop iterations for LOOP.
 126    The average trip count is computed from profile data if it
 127    exists. */
 128
 129 static inline HOST_WIDE_INT
 130 avg_loop_niter (struct loop *loop)
 131 {
 132   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 133   if (niter == -1)
 134     return AVG_LOOP_NITER (loop);
 135
 136   return niter;
 137 }
 138
 139 /* Representation of the induction variable.  */
 140 struct iv
 141 {
 142   tree base;            /* Initial value of the iv.  */
 143   tree base_object;     /* A memory object to that the induction variable points.  */
 144   tree step;            /* Step of the iv (constant only).  */
 145   tree ssa_name;        /* The ssa name with the value.  */
 146   unsigned use_id;      /* The identifier in the use if it is the case.  */
 147   bool biv_p;           /* Is it a biv?  */
 148   bool have_use_for;    /* Do we already have a use for it?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150   bool have_address_use;/* For biv, indicate if it's used in any address
 151                            type use.  */
 152 };
 153
 154 /* Per-ssa version information (induction variable descriptions, etc.).  */
 155 struct version_info
 156 {
 157   tree name;            /* The ssa name.  */
 158   struct iv *iv;        /* Induction variable description.  */
 159   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 160                            an expression that is not an induction variable.  */
 161   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 162   unsigned inv_id;      /* Id of an invariant.  */
 163 };
 164
 165 /* Types of uses.  */
 166 enum use_type
 167 {
 168   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 169   USE_ADDRESS,          /* Use in an address.  */
 170   USE_COMPARE           /* Use is a compare.  */
 171 };
 172
 173 /* Cost of a computation.  */
 174 struct comp_cost
 175 {
 176   int cost;             /* The runtime cost.  */
 177   unsigned complexity;  /* The estimate of the complexity of the code for
 178                            the computation (in no concrete units --
 179                            complexity field should be larger for more
 180                            complex expressions and addressing modes).  */
 181 };
 182
 183 static const comp_cost no_cost = {0, 0};
 184 static const comp_cost infinite_cost = {INFTY, INFTY};
 185
 186 /* The candidate - cost pair.  */
 187 struct cost_pair
 188 {
 189   struct iv_cand *cand; /* The candidate.  */
 190   comp_cost cost;       /* The cost.  */
 191   bitmap depends_on;    /* The list of invariants that have to be
 192                            preserved.  */
 193   tree value;           /* For final value elimination, the expression for
 194                            the final value of the iv.  For iv elimination,
 195                            the new bound to compare with.  */
 196   enum tree_code comp;  /* For iv elimination, the comparison.  */
 197   int inv_expr_id;      /* Loop invariant expression id.  */
 198 };
 199
 200 /* Use.  */
 201 struct iv_use
 202 {
 203   unsigned id;          /* The id of the use.  */
 204   unsigned sub_id;      /* The id of the sub use.  */
 205   enum use_type type;   /* Type of the use.  */
 206   struct iv *iv;        /* The induction variable it is based on.  */
 207   gimple *stmt;         /* Statement in that it occurs.  */
 208   tree *op_p;           /* The place where it occurs.  */
 209   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 210                            important ones.  */
 211
 212   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 213   struct cost_pair *cost_map;
 214                         /* The costs wrto the iv candidates.  */
 215
 216   struct iv_cand *selected;
 217                         /* The selected candidate.  */
 218
 219   struct iv_use *next;  /* The next sub use.  */
 220   tree addr_base;       /* Base address with const offset stripped.  */
 221   unsigned HOST_WIDE_INT addr_offset;
 222                         /* Const offset stripped from base address.  */
 223 };
 224
 225 /* The position where the iv is computed.  */
 226 enum iv_position
 227 {
 228   IP_NORMAL,            /* At the end, just before the exit condition.  */
 229   IP_END,               /* At the end of the latch block.  */
 230   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 231   IP_AFTER_USE,         /* Immediately after a specific use.  */
 232   IP_ORIGINAL           /* The original biv.  */
 233 };
 234
 235 /* The induction variable candidate.  */
 236 struct iv_cand
 237 {
 238   unsigned id;          /* The number of the candidate.  */
 239   bool important;       /* Whether this is an "important" candidate, i.e. such
 240                            that it should be considered by all uses.  */
 241   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 242   gimple *incremented_at;/* For original biv, the statement where it is
 243                            incremented.  */
 244   tree var_before;      /* The variable used for it before increment.  */
 245   tree var_after;       /* The variable used for it after increment.  */
 246   struct iv *iv;        /* The value of the candidate.  NULL for
 247                            "pseudocandidate" used to indicate the possibility
 248                            to replace the final value of an iv by direct
 249                            computation of the value.  */
 250   unsigned cost;        /* Cost of the candidate.  */
 251   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 252   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 253                               where it is incremented.  */
 254   bitmap depends_on;    /* The list of invariants that are used in step of the
 255                            biv.  */
 256   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 257                            smaller type.  */
 258 };
 259
 260 /* Loop invariant expression hashtable entry.  */
 261 struct iv_inv_expr_ent
 262 {
 263   tree expr;
 264   int id;
 265   hashval_t hash;
 266 };
 267
 268 /* The data used by the induction variable optimizations.  */
 269
 270 /* Hashtable helpers.  */
 271
 272 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 273 {
 274   static inline hashval_t hash (const iv_inv_expr_ent *);
 275   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 276 };
 277
 278 /* Hash function for loop invariant expressions.  */
 279
 280 inline hashval_t
 281 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 282 {
 283   return expr->hash;
 284 }
 285
 286 /* Hash table equality function for expressions.  */
 287
 288 inline bool
 289 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 290                            const iv_inv_expr_ent *expr2)
 291 {
 292   return expr1->hash == expr2->hash
 293          && operand_equal_p (expr1->expr, expr2->expr, 0);
 294 }
 295
 296 struct ivopts_data
 297 {
 298   /* The currently optimized loop.  */
 299   struct loop *current_loop;
 300   source_location loop_loc;
 301
 302   /* Numbers of iterations for all exits of the current loop.  */
 303   hash_map<edge, tree_niter_desc *> *niters;
 304
 305   /* Number of registers used in it.  */
 306   unsigned regs_used;
 307
 308   /* The size of version_info array allocated.  */
 309   unsigned version_info_size;
 310
 311   /* The array of information for the ssa names.  */
 312   struct version_info *version_info;
 313
 314   /* The hashtable of loop invariant expressions created
 315      by ivopt.  */
 316   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 317
 318   /* Loop invariant expression id.  */
 319   int inv_expr_id;
 320
 321   /* The bitmap of indices in version_info whose value was changed.  */
 322   bitmap relevant;
 323
 324   /* The uses of induction variables.  */
 325   vec<iv_use *> iv_uses;
 326
 327   /* The candidates.  */
 328   vec<iv_cand *> iv_candidates;
 329
 330   /* A bitmap of important candidates.  */
 331   bitmap important_candidates;
 332
 333   /* Cache used by tree_to_aff_combination_expand.  */
 334   hash_map<tree, name_expansion *> *name_expansion_cache;
 335
 336   /* The maximum invariant id.  */
 337   unsigned max_inv_id;
 338
 339   /* Number of no_overflow BIVs which are not used in memory address.  */
 340   unsigned bivs_not_used_in_addr;
 341
 342   /* Obstack for iv structure.  */
 343   struct obstack iv_obstack;
 344
 345   /* Whether to consider just related and important candidates when replacing a
 346      use.  */
 347   bool consider_all_candidates;
 348
 349   /* Are we optimizing for speed?  */
 350   bool speed;
 351
 352   /* Whether the loop body includes any function calls.  */
 353   bool body_includes_call;
 354
 355   /* Whether the loop body can only be exited via single exit.  */
 356   bool loop_single_exit_p;
 357 };
 358
 359 /* An assignment of iv candidates to uses.  */
 360
 361 struct iv_ca
 362 {
 363   /* The number of uses covered by the assignment.  */
 364   unsigned upto;
 365
 366   /* Number of uses that cannot be expressed by the candidates in the set.  */
 367   unsigned bad_uses;
 368
 369   /* Candidate assigned to a use, together with the related costs.  */
 370   struct cost_pair **cand_for_use;
 371
 372   /* Number of times each candidate is used.  */
 373   unsigned *n_cand_uses;
 374
 375   /* The candidates used.  */
 376   bitmap cands;
 377
 378   /* The number of candidates in the set.  */
 379   unsigned n_cands;
 380
 381   /* Total number of registers needed.  */
 382   unsigned n_regs;
 383
 384   /* Total cost of expressing uses.  */
 385   comp_cost cand_use_cost;
 386
 387   /* Total cost of candidates.  */
 388   unsigned cand_cost;
 389
 390   /* Number of times each invariant is used.  */
 391   unsigned *n_invariant_uses;
 392
 393   /* The array holding the number of uses of each loop
 394      invariant expressions created by ivopt.  */
 395   unsigned *used_inv_expr;
 396
 397   /* The number of created loop invariants.  */
 398   unsigned num_used_inv_expr;
 399
 400   /* Total cost of the assignment.  */
 401   comp_cost cost;
 402 };
 403
 404 /* Difference of two iv candidate assignments.  */
 405
 406 struct iv_ca_delta
 407 {
 408   /* Changed use.  */
 409   struct iv_use *use;
 410
 411   /* An old assignment (for rollback purposes).  */
 412   struct cost_pair *old_cp;
 413
 414   /* A new assignment.  */
 415   struct cost_pair *new_cp;
 416
 417   /* Next change in the list.  */
 418   struct iv_ca_delta *next_change;
 419 };
 420
 421 /* Bound on number of candidates below that all candidates are considered.  */
 422
 423 #define CONSIDER_ALL_CANDIDATES_BOUND \
 424   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 425
 426 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 427    optimizing such a loop would help, and it would take ages).  */
 428
 429 #define MAX_CONSIDERED_USES \
 430   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 431
 432 /* If there are at most this number of ivs in the set, try removing unnecessary
 433    ivs from the set always.  */
 434
 435 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 436   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 437
 438 /* The list of trees for that the decl_rtl field must be reset is stored
 439    here.  */
 440
 441 static vec<tree> decl_rtl_to_reset;
 442
 443 static comp_cost force_expr_to_var_cost (tree, bool);
 444
 445 /* Number of uses recorded in DATA.  */
 446
 447 static inline unsigned
 448 n_iv_uses (struct ivopts_data *data)
 449 {
 450   return data->iv_uses.length ();
 451 }
 452
 453 /* Ith use recorded in DATA.  */
 454
 455 static inline struct iv_use *
 456 iv_use (struct ivopts_data *data, unsigned i)
 457 {
 458   return data->iv_uses[i];
 459 }
 460
 461 /* Number of candidates recorded in DATA.  */
 462
 463 static inline unsigned
 464 n_iv_cands (struct ivopts_data *data)
 465 {
 466   return data->iv_candidates.length ();
 467 }
 468
 469 /* Ith candidate recorded in DATA.  */
 470
 471 static inline struct iv_cand *
 472 iv_cand (struct ivopts_data *data, unsigned i)
 473 {
 474   return data->iv_candidates[i];
 475 }
 476
 477 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 478
 479 edge
 480 single_dom_exit (struct loop *loop)
 481 {
 482   edge exit = single_exit (loop);
 483
 484   if (!exit)
 485     return NULL;
 486
 487   if (!just_once_each_iteration_p (loop, exit->src))
 488     return NULL;
 489
 490   return exit;
 491 }
 492
 493 /* Dumps information about the induction variable IV to FILE.  */
 494
 495 void
 496 dump_iv (FILE *file, struct iv *iv, bool dump_name)
 497 {
 498   if (iv->ssa_name && dump_name)
 499     {
 500       fprintf (file, "ssa name ");
 501       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 502       fprintf (file, "\n");
 503     }
 504
 505   fprintf (file, "  type ");
 506   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 507   fprintf (file, "\n");
 508
 509   if (iv->step)
 510     {
 511       fprintf (file, "  base ");
 512       print_generic_expr (file, iv->base, TDF_SLIM);
 513       fprintf (file, "\n");
 514
 515       fprintf (file, "  step ");
 516       print_generic_expr (file, iv->step, TDF_SLIM);
 517       fprintf (file, "\n");
 518     }
 519   else
 520     {
 521       fprintf (file, "  invariant ");
 522       print_generic_expr (file, iv->base, TDF_SLIM);
 523       fprintf (file, "\n");
 524     }
 525
 526   if (iv->base_object)
 527     {
 528       fprintf (file, "  base object ");
 529       print_generic_expr (file, iv->base_object, TDF_SLIM);
 530       fprintf (file, "\n");
 531     }
 532
 533   if (iv->biv_p)
 534     fprintf (file, "  is a biv\n");
 535
 536   if (iv->no_overflow)
 537     fprintf (file, "  iv doesn't overflow wrto loop niter\n");
 538 }
 539
 540 /* Dumps information about the USE to FILE.  */
 541
 542 void
 543 dump_use (FILE *file, struct iv_use *use)
 544 {
 545   fprintf (file, "use %d", use->id);
 546   if (use->sub_id)
 547     fprintf (file, ".%d", use->sub_id);
 548
 549   fprintf (file, "\n");
 550
 551   switch (use->type)
 552     {
 553     case USE_NONLINEAR_EXPR:
 554       fprintf (file, "  generic\n");
 555       break;
 556
 557     case USE_ADDRESS:
 558       fprintf (file, "  address\n");
 559       break;
 560
 561     case USE_COMPARE:
 562       fprintf (file, "  compare\n");
 563       break;
 564
 565     default:
 566       gcc_unreachable ();
 567     }
 568
 569   fprintf (file, "  in statement ");
 570   print_gimple_stmt (file, use->stmt, 0, 0);
 571   fprintf (file, "\n");
 572
 573   fprintf (file, "  at position ");
 574   if (use->op_p)
 575     print_generic_expr (file, *use->op_p, TDF_SLIM);
 576   fprintf (file, "\n");
 577
 578   dump_iv (file, use->iv, false);
 579
 580   if (use->related_cands)
 581     {
 582       fprintf (file, "  related candidates ");
 583       dump_bitmap (file, use->related_cands);
 584     }
 585 }
 586
 587 /* Dumps information about the uses to FILE.  */
 588
 589 void
 590 dump_uses (FILE *file, struct ivopts_data *data)
 591 {
 592   unsigned i;
 593   struct iv_use *use;
 594
 595   for (i = 0; i < n_iv_uses (data); i++)
 596     {
 597       use = iv_use (data, i);
 598       do
 599         {
 600           dump_use (file, use);
 601           use = use->next;
 602         }
 603       while (use);
 604       fprintf (file, "\n");
 605     }
 606 }
 607
 608 /* Dumps information about induction variable candidate CAND to FILE.  */
 609
 610 void
 611 dump_cand (FILE *file, struct iv_cand *cand)
 612 {
 613   struct iv *iv = cand->iv;
 614
 615   fprintf (file, "candidate %d%s\n",
 616            cand->id, cand->important ? " (important)" : "");
 617
 618   if (cand->depends_on)
 619     {
 620       fprintf (file, "  depends on ");
 621       dump_bitmap (file, cand->depends_on);
 622     }
 623
 624   if (!iv)
 625     {
 626       fprintf (file, "  final value replacement\n");
 627       return;
 628     }
 629
 630   if (cand->var_before)
 631     {
 632       fprintf (file, "  var_before ");
 633       print_generic_expr (file, cand->var_before, TDF_SLIM);
 634       fprintf (file, "\n");
 635     }
 636   if (cand->var_after)
 637     {
 638       fprintf (file, "  var_after ");
 639       print_generic_expr (file, cand->var_after, TDF_SLIM);
 640       fprintf (file, "\n");
 641     }
 642
 643   switch (cand->pos)
 644     {
 645     case IP_NORMAL:
 646       fprintf (file, "  incremented before exit test\n");
 647       break;
 648
 649     case IP_BEFORE_USE:
 650       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 651       break;
 652
 653     case IP_AFTER_USE:
 654       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 655       break;
 656
 657     case IP_END:
 658       fprintf (file, "  incremented at end\n");
 659       break;
 660
 661     case IP_ORIGINAL:
 662       fprintf (file, "  original biv\n");
 663       break;
 664     }
 665
 666   dump_iv (file, iv, false);
 667 }
 668
 669 /* Returns the info for ssa version VER.  */
 670
 671 static inline struct version_info *
 672 ver_info (struct ivopts_data *data, unsigned ver)
 673 {
 674   return data->version_info + ver;
 675 }
 676
 677 /* Returns the info for ssa name NAME.  */
 678
 679 static inline struct version_info *
 680 name_info (struct ivopts_data *data, tree name)
 681 {
 682   return ver_info (data, SSA_NAME_VERSION (name));
 683 }
 684
 685 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 686    emitted in LOOP.  */
 687
 688 static bool
 689 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 690 {
 691   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 692
 693   gcc_assert (bb);
 694
 695   if (sbb == loop->latch)
 696     return true;
 697
 698   if (sbb != bb)
 699     return false;
 700
 701   return stmt == last_stmt (bb);
 702 }
 703
 704 /* Returns true if STMT if after the place where the original induction
 705    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 706    if the positions are identical.  */
 707
 708 static bool
 709 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 710 {
 711   basic_block cand_bb = gimple_bb (cand->incremented_at);
 712   basic_block stmt_bb = gimple_bb (stmt);
 713
 714   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 715     return false;
 716
 717   if (stmt_bb != cand_bb)
 718     return true;
 719
 720   if (true_if_equal
 721       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 722     return true;
 723   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 724 }
 725
 726 /* Returns true if STMT if after the place where the induction variable
 727    CAND is incremented in LOOP.  */
 728
 729 static bool
 730 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 731 {
 732   switch (cand->pos)
 733     {
 734     case IP_END:
 735       return false;
 736
 737     case IP_NORMAL:
 738       return stmt_after_ip_normal_pos (loop, stmt);
 739
 740     case IP_ORIGINAL:
 741     case IP_AFTER_USE:
 742       return stmt_after_inc_pos (cand, stmt, false);
 743
 744     case IP_BEFORE_USE:
 745       return stmt_after_inc_pos (cand, stmt, true);
 746
 747     default:
 748       gcc_unreachable ();
 749     }
 750 }
 751
 752 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 753
 754 static bool
 755 abnormal_ssa_name_p (tree exp)
 756 {
 757   if (!exp)
 758     return false;
 759
 760   if (TREE_CODE (exp) != SSA_NAME)
 761     return false;
 762
 763   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 764 }
 765
 766 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 767    abnormal phi node.  Callback for for_each_index.  */
 768
 769 static bool
 770 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 771                                   void *data ATTRIBUTE_UNUSED)
 772 {
 773   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 774     {
 775       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 776         return false;
 777       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 778         return false;
 779     }
 780
 781   return !abnormal_ssa_name_p (*index);
 782 }
 783
 784 /* Returns true if EXPR contains a ssa name that occurs in an
 785    abnormal phi node.  */
 786
 787 bool
 788 contains_abnormal_ssa_name_p (tree expr)
 789 {
 790   enum tree_code code;
 791   enum tree_code_class codeclass;
 792
 793   if (!expr)
 794     return false;
 795
 796   code = TREE_CODE (expr);
 797   codeclass = TREE_CODE_CLASS (code);
 798
 799   if (code == SSA_NAME)
 800     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 801
 802   if (code == INTEGER_CST
 803       || is_gimple_min_invariant (expr))
 804     return false;
 805
 806   if (code == ADDR_EXPR)
 807     return !for_each_index (&TREE_OPERAND (expr, 0),
 808                             idx_contains_abnormal_ssa_name_p,
 809                             NULL);
 810
 811   if (code == COND_EXPR)
 812     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 813       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 814       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 815
 816   switch (codeclass)
 817     {
 818     case tcc_binary:
 819     case tcc_comparison:
 820       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 821         return true;
 822
 823       /* Fallthru.  */
 824     case tcc_unary:
 825       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 826         return true;
 827
 828       break;
 829
 830     default:
 831       gcc_unreachable ();
 832     }
 833
 834   return false;
 835 }
 836
 837 /*  Returns the structure describing number of iterations determined from
 838     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 839
 840 static struct tree_niter_desc *
 841 niter_for_exit (struct ivopts_data *data, edge exit)
 842 {
 843   struct tree_niter_desc *desc;
 844   tree_niter_desc **slot;
 845
 846   if (!data->niters)
 847     {
 848       data->niters = new hash_map<edge, tree_niter_desc *>;
 849       slot = NULL;
 850     }
 851   else
 852     slot = data->niters->get (exit);
 853
 854   if (!slot)
 855     {
 856       /* Try to determine number of iterations.  We cannot safely work with ssa
 857          names that appear in phi nodes on abnormal edges, so that we do not
 858          create overlapping life ranges for them (PR 27283).  */
 859       desc = XNEW (struct tree_niter_desc);
 860       if (!number_of_iterations_exit (data->current_loop,
 861                                       exit, desc, true)
 862           || contains_abnormal_ssa_name_p (desc->niter))
 863         {
 864           XDELETE (desc);
 865           desc = NULL;
 866         }
 867       data->niters->put (exit, desc);
 868     }
 869   else
 870     desc = *slot;
 871
 872   return desc;
 873 }
 874
 875 /* Returns the structure describing number of iterations determined from
 876    single dominating exit of DATA->current_loop, or NULL if something
 877    goes wrong.  */
 878
 879 static struct tree_niter_desc *
 880 niter_for_single_dom_exit (struct ivopts_data *data)
 881 {
 882   edge exit = single_dom_exit (data->current_loop);
 883
 884   if (!exit)
 885     return NULL;
 886
 887   return niter_for_exit (data, exit);
 888 }
 889
 890 /* Initializes data structures used by the iv optimization pass, stored
 891    in DATA.  */
 892
 893 static void
 894 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 895 {
 896   data->version_info_size = 2 * num_ssa_names;
 897   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 898   data->relevant = BITMAP_ALLOC (NULL);
 899   data->important_candidates = BITMAP_ALLOC (NULL);
 900   data->max_inv_id = 0;
 901   data->niters = NULL;
 902   data->iv_uses.create (20);
 903   data->iv_candidates.create (20);
 904   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 905   data->inv_expr_id = 0;
 906   data->name_expansion_cache = NULL;
 907   decl_rtl_to_reset.create (20);
 908   gcc_obstack_init (&data->iv_obstack);
 909 }
 910
 911 /* Returns a memory object to that EXPR points.  In case we are able to
 912    determine that it does not point to any such object, NULL is returned.  */
 913
 914 static tree
 915 determine_base_object (tree expr)
 916 {
 917   enum tree_code code = TREE_CODE (expr);
 918   tree base, obj;
 919
 920   /* If this is a pointer casted to any type, we need to determine
 921      the base object for the pointer; so handle conversions before
 922      throwing away non-pointer expressions.  */
 923   if (CONVERT_EXPR_P (expr))
 924     return determine_base_object (TREE_OPERAND (expr, 0));
 925
 926   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 927     return NULL_TREE;
 928
 929   switch (code)
 930     {
 931     case INTEGER_CST:
 932       return NULL_TREE;
 933
 934     case ADDR_EXPR:
 935       obj = TREE_OPERAND (expr, 0);
 936       base = get_base_address (obj);
 937
 938       if (!base)
 939         return expr;
 940
 941       if (TREE_CODE (base) == MEM_REF)
 942         return determine_base_object (TREE_OPERAND (base, 0));
 943
 944       return fold_convert (ptr_type_node,
 945                            build_fold_addr_expr (base));
 946
 947     case POINTER_PLUS_EXPR:
 948       return determine_base_object (TREE_OPERAND (expr, 0));
 949
 950     case PLUS_EXPR:
 951     case MINUS_EXPR:
 952       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 953       gcc_unreachable ();
 954
 955     default:
 956       return fold_convert (ptr_type_node, expr);
 957     }
 958 }
 959
 960 /* Return true if address expression with non-DECL_P operand appears
 961    in EXPR.  */
 962
 963 static bool
 964 contain_complex_addr_expr (tree expr)
 965 {
 966   bool res = false;
 967
 968   STRIP_NOPS (expr);
 969   switch (TREE_CODE (expr))
 970     {
 971     case POINTER_PLUS_EXPR:
 972     case PLUS_EXPR:
 973     case MINUS_EXPR:
 974       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 975       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 976       break;
 977
 978     case ADDR_EXPR:
 979       return (!DECL_P (TREE_OPERAND (expr, 0)));
 980
 981     default:
 982       return false;
 983     }
 984
 985   return res;
 986 }
 987
 988 /* Allocates an induction variable with given initial value BASE and step STEP
 989    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
 990
 991 static struct iv *
 992 alloc_iv (struct ivopts_data *data, tree base, tree step,
 993           bool no_overflow = false)
 994 {
 995   tree expr = base;
 996   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
 997                                               sizeof (struct iv));
 998   gcc_assert (step != NULL_TREE);
 999
1000   /* Lower address expression in base except ones with DECL_P as operand.
1001      By doing this:
1002        1) More accurate cost can be computed for address expressions;
1003        2) Duplicate candidates won't be created for bases in different
1004           forms, like &a[0] and &a.  */
1005   STRIP_NOPS (expr);
1006   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1007       || contain_complex_addr_expr (expr))
1008     {
1009       aff_tree comb;
1010       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1011       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1012     }
1013
1014   iv->base = base;
1015   iv->base_object = determine_base_object (base);
1016   iv->step = step;
1017   iv->biv_p = false;
1018   iv->have_use_for = false;
1019   iv->use_id = 0;
1020   iv->ssa_name = NULL_TREE;
1021   iv->no_overflow = no_overflow;
1022   iv->have_address_use = false;
1023
1024   return iv;
1025 }
1026
1027 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1028    doesn't overflow.  */
1029
1030 static void
1031 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1032         bool no_overflow)
1033 {
1034   struct version_info *info = name_info (data, iv);
1035
1036   gcc_assert (!info->iv);
1037
1038   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1039   info->iv = alloc_iv (data, base, step, no_overflow);
1040   info->iv->ssa_name = iv;
1041 }
1042
1043 /* Finds induction variable declaration for VAR.  */
1044
1045 static struct iv *
1046 get_iv (struct ivopts_data *data, tree var)
1047 {
1048   basic_block bb;
1049   tree type = TREE_TYPE (var);
1050
1051   if (!POINTER_TYPE_P (type)
1052       && !INTEGRAL_TYPE_P (type))
1053     return NULL;
1054
1055   if (!name_info (data, var)->iv)
1056     {
1057       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1058
1059       if (!bb
1060           || !flow_bb_inside_loop_p (data->current_loop, bb))
1061         set_iv (data, var, var, build_int_cst (type, 0), true);
1062     }
1063
1064   return name_info (data, var)->iv;
1065 }
1066
1067 /* Return the first non-invariant ssa var found in EXPR.  */
1068
1069 static tree
1070 extract_single_var_from_expr (tree expr)
1071 {
1072   int i, n;
1073   tree tmp;
1074   enum tree_code code;
1075
1076   if (!expr || is_gimple_min_invariant (expr))
1077     return NULL;
1078
1079   code = TREE_CODE (expr);
1080   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1081     {
1082       n = TREE_OPERAND_LENGTH (expr);
1083       for (i = 0; i < n; i++)
1084         {
1085           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1086
1087           if (tmp)
1088             return tmp;
1089         }
1090     }
1091   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1092 }
1093
1094 /* Finds basic ivs.  */
1095
1096 static bool
1097 find_bivs (struct ivopts_data *data)
1098 {
1099   gphi *phi;
1100   affine_iv iv;
1101   tree step, type, base, stop;
1102   bool found = false;
1103   struct loop *loop = data->current_loop;
1104   gphi_iterator psi;
1105
1106   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1107     {
1108       phi = psi.phi ();
1109
1110       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1111         continue;
1112
1113       if (virtual_operand_p (PHI_RESULT (phi)))
1114         continue;
1115
1116       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1117         continue;
1118
1119       if (integer_zerop (iv.step))
1120         continue;
1121
1122       step = iv.step;
1123       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1124       /* Stop expanding iv base at the first ssa var referred by iv step.
1125          Ideally we should stop at any ssa var, because that's expensive
1126          and unusual to happen, we just do it on the first one.
1127
1128          See PR64705 for the rationale.  */
1129       stop = extract_single_var_from_expr (step);
1130       base = expand_simple_operations (base, stop);
1131       if (contains_abnormal_ssa_name_p (base)
1132           || contains_abnormal_ssa_name_p (step))
1133         continue;
1134
1135       type = TREE_TYPE (PHI_RESULT (phi));
1136       base = fold_convert (type, base);
1137       if (step)
1138         {
1139           if (POINTER_TYPE_P (type))
1140             step = convert_to_ptrofftype (step);
1141           else
1142             step = fold_convert (type, step);
1143         }
1144
1145       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1146       found = true;
1147     }
1148
1149   return found;
1150 }
1151
1152 /* Marks basic ivs.  */
1153
1154 static void
1155 mark_bivs (struct ivopts_data *data)
1156 {
1157   gphi *phi;
1158   gimple *def;
1159   tree var;
1160   struct iv *iv, *incr_iv;
1161   struct loop *loop = data->current_loop;
1162   basic_block incr_bb;
1163   gphi_iterator psi;
1164
1165   data->bivs_not_used_in_addr = 0;
1166   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1167     {
1168       phi = psi.phi ();
1169
1170       iv = get_iv (data, PHI_RESULT (phi));
1171       if (!iv)
1172         continue;
1173
1174       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1175       def = SSA_NAME_DEF_STMT (var);
1176       /* Don't mark iv peeled from other one as biv.  */
1177       if (def
1178           && gimple_code (def) == GIMPLE_PHI
1179           && gimple_bb (def) == loop->header)
1180         continue;
1181
1182       incr_iv = get_iv (data, var);
1183       if (!incr_iv)
1184         continue;
1185
1186       /* If the increment is in the subloop, ignore it.  */
1187       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1188       if (incr_bb->loop_father != data->current_loop
1189           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1190         continue;
1191
1192       iv->biv_p = true;
1193       incr_iv->biv_p = true;
1194       if (iv->no_overflow)
1195         data->bivs_not_used_in_addr++;
1196       if (incr_iv->no_overflow)
1197         data->bivs_not_used_in_addr++;
1198     }
1199 }
1200
1201 /* Checks whether STMT defines a linear induction variable and stores its
1202    parameters to IV.  */
1203
1204 static bool
1205 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1206 {
1207   tree lhs, stop;
1208   struct loop *loop = data->current_loop;
1209
1210   iv->base = NULL_TREE;
1211   iv->step = NULL_TREE;
1212
1213   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1214     return false;
1215
1216   lhs = gimple_assign_lhs (stmt);
1217   if (TREE_CODE (lhs) != SSA_NAME)
1218     return false;
1219
1220   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1221     return false;
1222
1223   /* Stop expanding iv base at the first ssa var referred by iv step.
1224      Ideally we should stop at any ssa var, because that's expensive
1225      and unusual to happen, we just do it on the first one.
1226
1227      See PR64705 for the rationale.  */
1228   stop = extract_single_var_from_expr (iv->step);
1229   iv->base = expand_simple_operations (iv->base, stop);
1230   if (contains_abnormal_ssa_name_p (iv->base)
1231       || contains_abnormal_ssa_name_p (iv->step))
1232     return false;
1233
1234   /* If STMT could throw, then do not consider STMT as defining a GIV.
1235      While this will suppress optimizations, we can not safely delete this
1236      GIV and associated statements, even if it appears it is not used.  */
1237   if (stmt_could_throw_p (stmt))
1238     return false;
1239
1240   return true;
1241 }
1242
1243 /* Finds general ivs in statement STMT.  */
1244
1245 static void
1246 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1247 {
1248   affine_iv iv;
1249
1250   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1251     return;
1252
1253   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1254 }
1255
1256 /* Finds general ivs in basic block BB.  */
1257
1258 static void
1259 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1260 {
1261   gimple_stmt_iterator bsi;
1262
1263   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1264     find_givs_in_stmt (data, gsi_stmt (bsi));
1265 }
1266
1267 /* Finds general ivs.  */
1268
1269 static void
1270 find_givs (struct ivopts_data *data)
1271 {
1272   struct loop *loop = data->current_loop;
1273   basic_block *body = get_loop_body_in_dom_order (loop);
1274   unsigned i;
1275
1276   for (i = 0; i < loop->num_nodes; i++)
1277     find_givs_in_bb (data, body[i]);
1278   free (body);
1279 }
1280
1281 /* For each ssa name defined in LOOP determines whether it is an induction
1282    variable and if so, its initial value and step.  */
1283
1284 static bool
1285 find_induction_variables (struct ivopts_data *data)
1286 {
1287   unsigned i;
1288   bitmap_iterator bi;
1289
1290   if (!find_bivs (data))
1291     return false;
1292
1293   find_givs (data);
1294   mark_bivs (data);
1295
1296   if (dump_file && (dump_flags & TDF_DETAILS))
1297     {
1298       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1299
1300       if (niter)
1301         {
1302           fprintf (dump_file, "  number of iterations ");
1303           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1304           if (!integer_zerop (niter->may_be_zero))
1305             {
1306               fprintf (dump_file, "; zero if ");
1307               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1308             }
1309           fprintf (dump_file, "\n\n");
1310         };
1311
1312       fprintf (dump_file, "Induction variables:\n\n");
1313
1314       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1315         {
1316           if (ver_info (data, i)->iv)
1317             dump_iv (dump_file, ver_info (data, i)->iv, true);
1318         }
1319     }
1320
1321   return true;
1322 }
1323
1324 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1325    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1326    is the const offset stripped from IV base.  For uses of other types,
1327    ADDR_BASE and ADDR_OFFSET are zero by default.  */
1328
1329 static struct iv_use *
1330 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1331             gimple *stmt, enum use_type use_type, tree addr_base = NULL,
1332             unsigned HOST_WIDE_INT addr_offset = 0)
1333 {
1334   struct iv_use *use = XCNEW (struct iv_use);
1335
1336   use->id = n_iv_uses (data);
1337   use->sub_id = 0;
1338   use->type = use_type;
1339   use->iv = iv;
1340   use->stmt = stmt;
1341   use->op_p = use_p;
1342   use->related_cands = BITMAP_ALLOC (NULL);
1343   use->next = NULL;
1344   use->addr_base = addr_base;
1345   use->addr_offset = addr_offset;
1346
1347   data->iv_uses.safe_push (use);
1348
1349   return use;
1350 }
1351
1352 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1353    The sub use is recorded under the one whose use id is ID_GROUP.  */
1354
1355 static struct iv_use *
1356 record_sub_use (struct ivopts_data *data, tree *use_p,
1357                     struct iv *iv, gimple *stmt, enum use_type use_type,
1358                     tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1359                     unsigned int id_group)
1360 {
1361   struct iv_use *use = XCNEW (struct iv_use);
1362   struct iv_use *group = iv_use (data, id_group);
1363
1364   use->id = group->id;
1365   use->sub_id = 0;
1366   use->type = use_type;
1367   use->iv = iv;
1368   use->stmt = stmt;
1369   use->op_p = use_p;
1370   use->related_cands = NULL;
1371   use->addr_base = addr_base;
1372   use->addr_offset = addr_offset;
1373
1374   /* Sub use list is maintained in offset ascending order.  */
1375   if (addr_offset <= group->addr_offset)
1376     {
1377       use->related_cands = group->related_cands;
1378       group->related_cands = NULL;
1379       use->next = group;
1380       data->iv_uses[id_group] = use;
1381     }
1382   else
1383     {
1384       struct iv_use *pre;
1385       do
1386         {
1387           pre = group;
1388           group = group->next;
1389         }
1390       while (group && addr_offset > group->addr_offset);
1391       use->next = pre->next;
1392       pre->next = use;
1393     }
1394
1395   return use;
1396 }
1397
1398 /* Checks whether OP is a loop-level invariant and if so, records it.
1399    NONLINEAR_USE is true if the invariant is used in a way we do not
1400    handle specially.  */
1401
1402 static void
1403 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1404 {
1405   basic_block bb;
1406   struct version_info *info;
1407
1408   if (TREE_CODE (op) != SSA_NAME
1409       || virtual_operand_p (op))
1410     return;
1411
1412   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1413   if (bb
1414       && flow_bb_inside_loop_p (data->current_loop, bb))
1415     return;
1416
1417   info = name_info (data, op);
1418   info->name = op;
1419   info->has_nonlin_use |= nonlinear_use;
1420   if (!info->inv_id)
1421     info->inv_id = ++data->max_inv_id;
1422   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1423 }
1424
1425 /* Checks whether the use OP is interesting and if so, records it.  */
1426
1427 static struct iv_use *
1428 find_interesting_uses_op (struct ivopts_data *data, tree op)
1429 {
1430   struct iv *iv;
1431   gimple *stmt;
1432   struct iv_use *use;
1433
1434   if (TREE_CODE (op) != SSA_NAME)
1435     return NULL;
1436
1437   iv = get_iv (data, op);
1438   if (!iv)
1439     return NULL;
1440
1441   if (iv->have_use_for)
1442     {
1443       use = iv_use (data, iv->use_id);
1444
1445       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1446       return use;
1447     }
1448
1449   if (integer_zerop (iv->step))
1450     {
1451       record_invariant (data, op, true);
1452       return NULL;
1453     }
1454   iv->have_use_for = true;
1455
1456   stmt = SSA_NAME_DEF_STMT (op);
1457   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1458               || is_gimple_assign (stmt));
1459
1460   use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1461   iv->use_id = use->id;
1462
1463   return use;
1464 }
1465
1466 /* Given a condition in statement STMT, checks whether it is a compare
1467    of an induction variable and an invariant.  If this is the case,
1468    CONTROL_VAR is set to location of the iv, BOUND to the location of
1469    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1470    induction variable descriptions, and true is returned.  If this is not
1471    the case, CONTROL_VAR and BOUND are set to the arguments of the
1472    condition and false is returned.  */
1473
1474 static bool
1475 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1476                        tree **control_var, tree **bound,
1477                        struct iv **iv_var, struct iv **iv_bound)
1478 {
1479   /* The objects returned when COND has constant operands.  */
1480   static struct iv const_iv;
1481   static tree zero;
1482   tree *op0 = &zero, *op1 = &zero;
1483   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1484   bool ret = false;
1485
1486   if (gimple_code (stmt) == GIMPLE_COND)
1487     {
1488       gcond *cond_stmt = as_a <gcond *> (stmt);
1489       op0 = gimple_cond_lhs_ptr (cond_stmt);
1490       op1 = gimple_cond_rhs_ptr (cond_stmt);
1491     }
1492   else
1493     {
1494       op0 = gimple_assign_rhs1_ptr (stmt);
1495       op1 = gimple_assign_rhs2_ptr (stmt);
1496     }
1497
1498   zero = integer_zero_node;
1499   const_iv.step = integer_zero_node;
1500
1501   if (TREE_CODE (*op0) == SSA_NAME)
1502     iv0 = get_iv (data, *op0);
1503   if (TREE_CODE (*op1) == SSA_NAME)
1504     iv1 = get_iv (data, *op1);
1505
1506   /* Exactly one of the compared values must be an iv, and the other one must
1507      be an invariant.  */
1508   if (!iv0 || !iv1)
1509     goto end;
1510
1511   if (integer_zerop (iv0->step))
1512     {
1513       /* Control variable may be on the other side.  */
1514       std::swap (op0, op1);
1515       std::swap (iv0, iv1);
1516     }
1517   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1518
1519 end:
1520   if (control_var)
1521     *control_var = op0;
1522   if (iv_var)
1523     *iv_var = iv0;
1524   if (bound)
1525     *bound = op1;
1526   if (iv_bound)
1527     *iv_bound = iv1;
1528
1529   return ret;
1530 }
1531
1532 /* Checks whether the condition in STMT is interesting and if so,
1533    records it.  */
1534
1535 static void
1536 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1537 {
1538   tree *var_p, *bound_p;
1539   struct iv *var_iv;
1540
1541   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1542     {
1543       find_interesting_uses_op (data, *var_p);
1544       find_interesting_uses_op (data, *bound_p);
1545       return;
1546     }
1547
1548   record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1549 }
1550
1551 /* Returns the outermost loop EXPR is obviously invariant in
1552    relative to the loop LOOP, i.e. if all its operands are defined
1553    outside of the returned loop.  Returns NULL if EXPR is not
1554    even obviously invariant in LOOP.  */
1555
1556 struct loop *
1557 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1558 {
1559   basic_block def_bb;
1560   unsigned i, len;
1561
1562   if (is_gimple_min_invariant (expr))
1563     return current_loops->tree_root;
1564
1565   if (TREE_CODE (expr) == SSA_NAME)
1566     {
1567       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1568       if (def_bb)
1569         {
1570           if (flow_bb_inside_loop_p (loop, def_bb))
1571             return NULL;
1572           return superloop_at_depth (loop,
1573                                      loop_depth (def_bb->loop_father) + 1);
1574         }
1575
1576       return current_loops->tree_root;
1577     }
1578
1579   if (!EXPR_P (expr))
1580     return NULL;
1581
1582   unsigned maxdepth = 0;
1583   len = TREE_OPERAND_LENGTH (expr);
1584   for (i = 0; i < len; i++)
1585     {
1586       struct loop *ivloop;
1587       if (!TREE_OPERAND (expr, i))
1588         continue;
1589
1590       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1591       if (!ivloop)
1592         return NULL;
1593       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1594     }
1595
1596   return superloop_at_depth (loop, maxdepth);
1597 }
1598
1599 /* Returns true if expression EXPR is obviously invariant in LOOP,
1600    i.e. if all its operands are defined outside of the LOOP.  LOOP
1601    should not be the function body.  */
1602
1603 bool
1604 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1605 {
1606   basic_block def_bb;
1607   unsigned i, len;
1608
1609   gcc_assert (loop_depth (loop) > 0);
1610
1611   if (is_gimple_min_invariant (expr))
1612     return true;
1613
1614   if (TREE_CODE (expr) == SSA_NAME)
1615     {
1616       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1617       if (def_bb
1618           && flow_bb_inside_loop_p (loop, def_bb))
1619         return false;
1620
1621       return true;
1622     }
1623
1624   if (!EXPR_P (expr))
1625     return false;
1626
1627   len = TREE_OPERAND_LENGTH (expr);
1628   for (i = 0; i < len; i++)
1629     if (TREE_OPERAND (expr, i)
1630         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1631       return false;
1632
1633   return true;
1634 }
1635
1636 /* Given expression EXPR which computes inductive values with respect
1637    to loop recorded in DATA, this function returns biv from which EXPR
1638    is derived by tracing definition chains of ssa variables in EXPR.  */
1639
1640 static struct iv*
1641 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1642 {
1643   struct iv *iv;
1644   unsigned i, n;
1645   tree e2, e1;
1646   enum tree_code code;
1647   gimple *stmt;
1648
1649   if (expr == NULL_TREE)
1650     return NULL;
1651
1652   if (is_gimple_min_invariant (expr))
1653     return NULL;
1654
1655   code = TREE_CODE (expr);
1656   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1657     {
1658       n = TREE_OPERAND_LENGTH (expr);
1659       for (i = 0; i < n; i++)
1660         {
1661           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1662           if (iv)
1663             return iv;
1664         }
1665     }
1666
1667   /* Stop if it's not ssa name.  */
1668   if (code != SSA_NAME)
1669     return NULL;
1670
1671   iv = get_iv (data, expr);
1672   if (!iv || integer_zerop (iv->step))
1673     return NULL;
1674   else if (iv->biv_p)
1675     return iv;
1676
1677   stmt = SSA_NAME_DEF_STMT (expr);
1678   if (gphi *phi = dyn_cast <gphi *> (stmt))
1679     {
1680       ssa_op_iter iter;
1681       use_operand_p use_p;
1682
1683       if (virtual_operand_p (gimple_phi_result (phi)))
1684         return NULL;
1685
1686       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1687         {
1688           tree use = USE_FROM_PTR (use_p);
1689           iv = find_deriving_biv_for_expr (data, use);
1690           if (iv)
1691             return iv;
1692         }
1693       return NULL;
1694     }
1695   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1696     return NULL;
1697
1698   e1 = gimple_assign_rhs1 (stmt);
1699   code = gimple_assign_rhs_code (stmt);
1700   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1701     return find_deriving_biv_for_expr (data, e1);
1702
1703   switch (code)
1704     {
1705     case MULT_EXPR:
1706     case PLUS_EXPR:
1707     case MINUS_EXPR:
1708     case POINTER_PLUS_EXPR:
1709       /* Increments, decrements and multiplications by a constant
1710          are simple.  */
1711       e2 = gimple_assign_rhs2 (stmt);
1712       iv = find_deriving_biv_for_expr (data, e2);
1713       if (iv)
1714         return iv;
1715
1716       /* Fallthru.  */
1717     CASE_CONVERT:
1718       /* Casts are simple.  */
1719       return find_deriving_biv_for_expr (data, e1);
1720
1721     default:
1722       break;
1723     }
1724
1725   return NULL;
1726 }
1727
1728 /* Record BIV, its predecessor and successor that they are used in
1729    address type uses.  */
1730
1731 static void
1732 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1733 {
1734   unsigned i;
1735   tree type, base_1, base_2;
1736   bitmap_iterator bi;
1737
1738   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1739       || biv->have_address_use || !biv->no_overflow)
1740     return;
1741
1742   type = TREE_TYPE (biv->base);
1743   if (!INTEGRAL_TYPE_P (type))
1744     return;
1745
1746   biv->have_address_use = true;
1747   data->bivs_not_used_in_addr--;
1748   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1749   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1750     {
1751       struct iv *iv = ver_info (data, i)->iv;
1752
1753       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1754           || iv->have_address_use || !iv->no_overflow)
1755         continue;
1756
1757       if (type != TREE_TYPE (iv->base)
1758           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1759         continue;
1760
1761       if (!operand_equal_p (biv->step, iv->step, 0))
1762         continue;
1763
1764       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1765       if (operand_equal_p (base_1, iv->base, 0)
1766           || operand_equal_p (base_2, biv->base, 0))
1767         {
1768           iv->have_address_use = true;
1769           data->bivs_not_used_in_addr--;
1770         }
1771     }
1772 }
1773
1774 /* Cumulates the steps of indices into DATA and replaces their values with the
1775    initial ones.  Returns false when the value of the index cannot be determined.
1776    Callback for for_each_index.  */
1777
1778 struct ifs_ivopts_data
1779 {
1780   struct ivopts_data *ivopts_data;
1781   gimple *stmt;
1782   tree step;
1783 };
1784
1785 static bool
1786 idx_find_step (tree base, tree *idx, void *data)
1787 {
1788   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1789   struct iv *iv;
1790   bool use_overflow_semantics = false;
1791   tree step, iv_base, iv_step, lbound, off;
1792   struct loop *loop = dta->ivopts_data->current_loop;
1793
1794   /* If base is a component ref, require that the offset of the reference
1795      be invariant.  */
1796   if (TREE_CODE (base) == COMPONENT_REF)
1797     {
1798       off = component_ref_field_offset (base);
1799       return expr_invariant_in_loop_p (loop, off);
1800     }
1801
1802   /* If base is array, first check whether we will be able to move the
1803      reference out of the loop (in order to take its address in strength
1804      reduction).  In order for this to work we need both lower bound
1805      and step to be loop invariants.  */
1806   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1807     {
1808       /* Moreover, for a range, the size needs to be invariant as well.  */
1809       if (TREE_CODE (base) == ARRAY_RANGE_REF
1810           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1811         return false;
1812
1813       step = array_ref_element_size (base);
1814       lbound = array_ref_low_bound (base);
1815
1816       if (!expr_invariant_in_loop_p (loop, step)
1817           || !expr_invariant_in_loop_p (loop, lbound))
1818         return false;
1819     }
1820
1821   if (TREE_CODE (*idx) != SSA_NAME)
1822     return true;
1823
1824   iv = get_iv (dta->ivopts_data, *idx);
1825   if (!iv)
1826     return false;
1827
1828   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1829           *&x[0], which is not folded and does not trigger the
1830           ARRAY_REF path below.  */
1831   *idx = iv->base;
1832
1833   if (integer_zerop (iv->step))
1834     return true;
1835
1836   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1837     {
1838       step = array_ref_element_size (base);
1839
1840       /* We only handle addresses whose step is an integer constant.  */
1841       if (TREE_CODE (step) != INTEGER_CST)
1842         return false;
1843     }
1844   else
1845     /* The step for pointer arithmetics already is 1 byte.  */
1846     step = size_one_node;
1847
1848   iv_base = iv->base;
1849   iv_step = iv->step;
1850   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1851     use_overflow_semantics = true;
1852
1853   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1854                             sizetype, &iv_base, &iv_step, dta->stmt,
1855                             use_overflow_semantics))
1856     {
1857       /* The index might wrap.  */
1858       return false;
1859     }
1860
1861   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1862   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1863
1864   if (dta->ivopts_data->bivs_not_used_in_addr)
1865     {
1866       if (!iv->biv_p)
1867         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1868
1869       record_biv_for_address_use (dta->ivopts_data, iv);
1870     }
1871   return true;
1872 }
1873
1874 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1875    object is passed to it in DATA.  */
1876
1877 static bool
1878 idx_record_use (tree base, tree *idx,
1879                 void *vdata)
1880 {
1881   struct ivopts_data *data = (struct ivopts_data *) vdata;
1882   find_interesting_uses_op (data, *idx);
1883   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1884     {
1885       find_interesting_uses_op (data, array_ref_element_size (base));
1886       find_interesting_uses_op (data, array_ref_low_bound (base));
1887     }
1888   return true;
1889 }
1890
1891 /* If we can prove that TOP = cst * BOT for some constant cst,
1892    store cst to MUL and return true.  Otherwise return false.
1893    The returned value is always sign-extended, regardless of the
1894    signedness of TOP and BOT.  */
1895
1896 static bool
1897 constant_multiple_of (tree top, tree bot, widest_int *mul)
1898 {
1899   tree mby;
1900   enum tree_code code;
1901   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1902   widest_int res, p0, p1;
1903
1904   STRIP_NOPS (top);
1905   STRIP_NOPS (bot);
1906
1907   if (operand_equal_p (top, bot, 0))
1908     {
1909       *mul = 1;
1910       return true;
1911     }
1912
1913   code = TREE_CODE (top);
1914   switch (code)
1915     {
1916     case MULT_EXPR:
1917       mby = TREE_OPERAND (top, 1);
1918       if (TREE_CODE (mby) != INTEGER_CST)
1919         return false;
1920
1921       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1922         return false;
1923
1924       *mul = wi::sext (res * wi::to_widest (mby), precision);
1925       return true;
1926
1927     case PLUS_EXPR:
1928     case MINUS_EXPR:
1929       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1930           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1931         return false;
1932
1933       if (code == MINUS_EXPR)
1934         p1 = -p1;
1935       *mul = wi::sext (p0 + p1, precision);
1936       return true;
1937
1938     case INTEGER_CST:
1939       if (TREE_CODE (bot) != INTEGER_CST)
1940         return false;
1941
1942       p0 = widest_int::from (top, SIGNED);
1943       p1 = widest_int::from (bot, SIGNED);
1944       if (p1 == 0)
1945         return false;
1946       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1947       return res == 0;
1948
1949     default:
1950       return false;
1951     }
1952 }
1953
1954 /* Return true if memory reference REF with step STEP may be unaligned.  */
1955
1956 static bool
1957 may_be_unaligned_p (tree ref, tree step)
1958 {
1959   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1960      thus they are not misaligned.  */
1961   if (TREE_CODE (ref) == TARGET_MEM_REF)
1962     return false;
1963
1964   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1965   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1966     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1967
1968   unsigned HOST_WIDE_INT bitpos;
1969   unsigned int ref_align;
1970   get_object_alignment_1 (ref, &ref_align, &bitpos);
1971   if (ref_align < align
1972       || (bitpos % align) != 0
1973       || (bitpos % BITS_PER_UNIT) != 0)
1974     return true;
1975
1976   unsigned int trailing_zeros = tree_ctz (step);
1977   if (trailing_zeros < HOST_BITS_PER_INT
1978       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1979     return true;
1980
1981   return false;
1982 }
1983
1984 /* Return true if EXPR may be non-addressable.   */
1985
1986 bool
1987 may_be_nonaddressable_p (tree expr)
1988 {
1989   switch (TREE_CODE (expr))
1990     {
1991     case TARGET_MEM_REF:
1992       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1993          target, thus they are always addressable.  */
1994       return false;
1995
1996     case COMPONENT_REF:
1997       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1998              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1999
2000     case VIEW_CONVERT_EXPR:
2001       /* This kind of view-conversions may wrap non-addressable objects
2002          and make them look addressable.  After some processing the
2003          non-addressability may be uncovered again, causing ADDR_EXPRs
2004          of inappropriate objects to be built.  */
2005       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2006           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2007         return true;
2008
2009       /* ... fall through ... */
2010
2011     case ARRAY_REF:
2012     case ARRAY_RANGE_REF:
2013       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2014
2015     CASE_CONVERT:
2016       return true;
2017
2018     default:
2019       break;
2020     }
2021
2022   return false;
2023 }
2024
2025 static tree
2026 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
2027
2028 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2029    If there is an existing use which has same stripped iv base and step,
2030    this function records this one as a sub use to that; otherwise records
2031    it as a normal one.  */
2032
2033 static struct iv_use *
2034 record_group_use (struct ivopts_data *data, tree *use_p,
2035                   struct iv *iv, gimple *stmt, enum use_type use_type)
2036 {
2037   unsigned int i;
2038   struct iv_use *use;
2039   tree addr_base;
2040   unsigned HOST_WIDE_INT addr_offset;
2041
2042   /* Only support sub use for address type uses, that is, with base
2043      object.  */
2044   if (!iv->base_object)
2045     return record_use (data, use_p, iv, stmt, use_type);
2046
2047   addr_base = strip_offset (iv->base, &addr_offset);
2048   for (i = 0; i < n_iv_uses (data); i++)
2049     {
2050       use = iv_use (data, i);
2051       if (use->type != USE_ADDRESS || !use->iv->base_object)
2052         continue;
2053
2054       /* Check if it has the same stripped base and step.  */
2055       if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
2056           && operand_equal_p (iv->step, use->iv->step, 0)
2057           && operand_equal_p (addr_base, use->addr_base, 0))
2058         break;
2059     }
2060
2061   if (i == n_iv_uses (data))
2062     return record_use (data, use_p, iv, stmt,
2063                        use_type, addr_base, addr_offset);
2064   else
2065     return record_sub_use (data, use_p, iv, stmt,
2066                            use_type, addr_base, addr_offset, i);
2067 }
2068
2069 /* Finds addresses in *OP_P inside STMT.  */
2070
2071 static void
2072 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2073                                tree *op_p)
2074 {
2075   tree base = *op_p, step = size_zero_node;
2076   struct iv *civ;
2077   struct ifs_ivopts_data ifs_ivopts_data;
2078
2079   /* Do not play with volatile memory references.  A bit too conservative,
2080      perhaps, but safe.  */
2081   if (gimple_has_volatile_ops (stmt))
2082     goto fail;
2083
2084   /* Ignore bitfields for now.  Not really something terribly complicated
2085      to handle.  TODO.  */
2086   if (TREE_CODE (base) == BIT_FIELD_REF)
2087     goto fail;
2088
2089   base = unshare_expr (base);
2090
2091   if (TREE_CODE (base) == TARGET_MEM_REF)
2092     {
2093       tree type = build_pointer_type (TREE_TYPE (base));
2094       tree astep;
2095
2096       if (TMR_BASE (base)
2097           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2098         {
2099           civ = get_iv (data, TMR_BASE (base));
2100           if (!civ)
2101             goto fail;
2102
2103           TMR_BASE (base) = civ->base;
2104           step = civ->step;
2105         }
2106       if (TMR_INDEX2 (base)
2107           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2108         {
2109           civ = get_iv (data, TMR_INDEX2 (base));
2110           if (!civ)
2111             goto fail;
2112
2113           TMR_INDEX2 (base) = civ->base;
2114           step = civ->step;
2115         }
2116       if (TMR_INDEX (base)
2117           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2118         {
2119           civ = get_iv (data, TMR_INDEX (base));
2120           if (!civ)
2121             goto fail;
2122
2123           TMR_INDEX (base) = civ->base;
2124           astep = civ->step;
2125
2126           if (astep)
2127             {
2128               if (TMR_STEP (base))
2129                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2130
2131               step = fold_build2 (PLUS_EXPR, type, step, astep);
2132             }
2133         }
2134
2135       if (integer_zerop (step))
2136         goto fail;
2137       base = tree_mem_ref_addr (type, base);
2138     }
2139   else
2140     {
2141       ifs_ivopts_data.ivopts_data = data;
2142       ifs_ivopts_data.stmt = stmt;
2143       ifs_ivopts_data.step = size_zero_node;
2144       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2145           || integer_zerop (ifs_ivopts_data.step))
2146         goto fail;
2147       step = ifs_ivopts_data.step;
2148
2149       /* Check that the base expression is addressable.  This needs
2150          to be done after substituting bases of IVs into it.  */
2151       if (may_be_nonaddressable_p (base))
2152         goto fail;
2153
2154       /* Moreover, on strict alignment platforms, check that it is
2155          sufficiently aligned.  */
2156       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2157         goto fail;
2158
2159       base = build_fold_addr_expr (base);
2160
2161       /* Substituting bases of IVs into the base expression might
2162          have caused folding opportunities.  */
2163       if (TREE_CODE (base) == ADDR_EXPR)
2164         {
2165           tree *ref = &TREE_OPERAND (base, 0);
2166           while (handled_component_p (*ref))
2167             ref = &TREE_OPERAND (*ref, 0);
2168           if (TREE_CODE (*ref) == MEM_REF)
2169             {
2170               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2171                                       TREE_OPERAND (*ref, 0),
2172                                       TREE_OPERAND (*ref, 1));
2173               if (tem)
2174                 *ref = tem;
2175             }
2176         }
2177     }
2178
2179   civ = alloc_iv (data, base, step);
2180   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2181   return;
2182
2183 fail:
2184   for_each_index (op_p, idx_record_use, data);
2185 }
2186
2187 /* Finds and records invariants used in STMT.  */
2188
2189 static void
2190 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2191 {
2192   ssa_op_iter iter;
2193   use_operand_p use_p;
2194   tree op;
2195
2196   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2197     {
2198       op = USE_FROM_PTR (use_p);
2199       record_invariant (data, op, false);
2200     }
2201 }
2202
2203 /* Finds interesting uses of induction variables in the statement STMT.  */
2204
2205 static void
2206 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2207 {
2208   struct iv *iv;
2209   tree op, *lhs, *rhs;
2210   ssa_op_iter iter;
2211   use_operand_p use_p;
2212   enum tree_code code;
2213
2214   find_invariants_stmt (data, stmt);
2215
2216   if (gimple_code (stmt) == GIMPLE_COND)
2217     {
2218       find_interesting_uses_cond (data, stmt);
2219       return;
2220     }
2221
2222   if (is_gimple_assign (stmt))
2223     {
2224       lhs = gimple_assign_lhs_ptr (stmt);
2225       rhs = gimple_assign_rhs1_ptr (stmt);
2226
2227       if (TREE_CODE (*lhs) == SSA_NAME)
2228         {
2229           /* If the statement defines an induction variable, the uses are not
2230              interesting by themselves.  */
2231
2232           iv = get_iv (data, *lhs);
2233
2234           if (iv && !integer_zerop (iv->step))
2235             return;
2236         }
2237
2238       code = gimple_assign_rhs_code (stmt);
2239       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2240           && (REFERENCE_CLASS_P (*rhs)
2241               || is_gimple_val (*rhs)))
2242         {
2243           if (REFERENCE_CLASS_P (*rhs))
2244             find_interesting_uses_address (data, stmt, rhs);
2245           else
2246             find_interesting_uses_op (data, *rhs);
2247
2248           if (REFERENCE_CLASS_P (*lhs))
2249             find_interesting_uses_address (data, stmt, lhs);
2250           return;
2251         }
2252       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2253         {
2254           find_interesting_uses_cond (data, stmt);
2255           return;
2256         }
2257
2258       /* TODO -- we should also handle address uses of type
2259
2260          memory = call (whatever);
2261
2262          and
2263
2264          call (memory).  */
2265     }
2266
2267   if (gimple_code (stmt) == GIMPLE_PHI
2268       && gimple_bb (stmt) == data->current_loop->header)
2269     {
2270       iv = get_iv (data, PHI_RESULT (stmt));
2271
2272       if (iv && !integer_zerop (iv->step))
2273         return;
2274     }
2275
2276   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2277     {
2278       op = USE_FROM_PTR (use_p);
2279
2280       if (TREE_CODE (op) != SSA_NAME)
2281         continue;
2282
2283       iv = get_iv (data, op);
2284       if (!iv)
2285         continue;
2286
2287       find_interesting_uses_op (data, op);
2288     }
2289 }
2290
2291 /* Finds interesting uses of induction variables outside of loops
2292    on loop exit edge EXIT.  */
2293
2294 static void
2295 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2296 {
2297   gphi *phi;
2298   gphi_iterator psi;
2299   tree def;
2300
2301   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2302     {
2303       phi = psi.phi ();
2304       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2305       if (!virtual_operand_p (def))
2306         find_interesting_uses_op (data, def);
2307     }
2308 }
2309
2310 /* Finds uses of the induction variables that are interesting.  */
2311
2312 static void
2313 find_interesting_uses (struct ivopts_data *data)
2314 {
2315   basic_block bb;
2316   gimple_stmt_iterator bsi;
2317   basic_block *body = get_loop_body (data->current_loop);
2318   unsigned i;
2319   struct version_info *info;
2320   edge e;
2321
2322   if (dump_file && (dump_flags & TDF_DETAILS))
2323     fprintf (dump_file, "Uses:\n\n");
2324
2325   for (i = 0; i < data->current_loop->num_nodes; i++)
2326     {
2327       edge_iterator ei;
2328       bb = body[i];
2329
2330       FOR_EACH_EDGE (e, ei, bb->succs)
2331         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2332             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2333           find_interesting_uses_outside (data, e);
2334
2335       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2336         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2337       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2338         if (!is_gimple_debug (gsi_stmt (bsi)))
2339           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2340     }
2341
2342   if (dump_file && (dump_flags & TDF_DETAILS))
2343     {
2344       bitmap_iterator bi;
2345
2346       fprintf (dump_file, "\n");
2347
2348       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2349         {
2350           info = ver_info (data, i);
2351           if (info->inv_id)
2352             {
2353               fprintf (dump_file, "  ");
2354               print_generic_expr (dump_file, info->name, TDF_SLIM);
2355               fprintf (dump_file, " is invariant (%d)%s\n",
2356                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2357             }
2358         }
2359
2360       fprintf (dump_file, "\n");
2361     }
2362
2363   free (body);
2364 }
2365
2366 /* Compute maximum offset of [base + offset] addressing mode
2367    for memory reference represented by USE.  */
2368
2369 static HOST_WIDE_INT
2370 compute_max_addr_offset (struct iv_use *use)
2371 {
2372   int width;
2373   rtx reg, addr;
2374   HOST_WIDE_INT i, off;
2375   unsigned list_index, num;
2376   addr_space_t as;
2377   machine_mode mem_mode, addr_mode;
2378   static vec<HOST_WIDE_INT> max_offset_list;
2379
2380   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2381   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2382
2383   num = max_offset_list.length ();
2384   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2385   if (list_index >= num)
2386     {
2387       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2388       for (; num < max_offset_list.length (); num++)
2389         max_offset_list[num] = -1;
2390     }
2391
2392   off = max_offset_list[list_index];
2393   if (off != -1)
2394     return off;
2395
2396   addr_mode = targetm.addr_space.address_mode (as);
2397   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2398   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2399
2400   width = GET_MODE_BITSIZE (addr_mode) - 1;
2401   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2402     width = HOST_BITS_PER_WIDE_INT - 1;
2403
2404   for (i = width; i > 0; i--)
2405     {
2406       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2407       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2408       if (memory_address_addr_space_p (mem_mode, addr, as))
2409         break;
2410
2411       /* For some strict-alignment targets, the offset must be naturally
2412          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2413       off = ((unsigned HOST_WIDE_INT) 1 << i);
2414       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2415         {
2416           off -= GET_MODE_SIZE (mem_mode);
2417           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2418           if (memory_address_addr_space_p (mem_mode, addr, as))
2419             break;
2420         }
2421     }
2422   if (i == 0)
2423     off = 0;
2424
2425   max_offset_list[list_index] = off;
2426   return off;
2427 }
2428
2429 /* Check if all small groups should be split.  Return true if and
2430    only if:
2431
2432      1) At least one groups contain two uses with different offsets.
2433      2) No group contains more than two uses with different offsets.
2434
2435    Return false otherwise.  We want to split such groups because:
2436
2437      1) Small groups don't have much benefit and may interfer with
2438         general candidate selection.
2439      2) Size for problem with only small groups is usually small and
2440         general algorithm can handle it well.
2441
2442    TODO -- Above claim may not hold when auto increment is supported.  */
2443
2444 static bool
2445 split_all_small_groups (struct ivopts_data *data)
2446 {
2447   bool split_p = false;
2448   unsigned int i, n, distinct;
2449   struct iv_use *pre, *use;
2450
2451   n = n_iv_uses (data);
2452   for (i = 0; i < n; i++)
2453     {
2454       use = iv_use (data, i);
2455       if (!use->next)
2456         continue;
2457
2458       distinct = 1;
2459       gcc_assert (use->type == USE_ADDRESS);
2460       for (pre = use, use = use->next; use; pre = use, use = use->next)
2461         {
2462           if (pre->addr_offset != use->addr_offset)
2463             distinct++;
2464
2465           if (distinct > 2)
2466             return false;
2467         }
2468       if (distinct == 2)
2469         split_p = true;
2470     }
2471
2472   return split_p;
2473 }
2474
2475 /* For each group of address type uses, this function further groups
2476    these uses according to the maximum offset supported by target's
2477    [base + offset] addressing mode.  */
2478
2479 static void
2480 group_address_uses (struct ivopts_data *data)
2481 {
2482   HOST_WIDE_INT max_offset = -1;
2483   unsigned int i, n, sub_id;
2484   struct iv_use *pre, *use;
2485   unsigned HOST_WIDE_INT addr_offset_first;
2486
2487   /* Reset max offset to split all small groups.  */
2488   if (split_all_small_groups (data))
2489     max_offset = 0;
2490
2491   n = n_iv_uses (data);
2492   for (i = 0; i < n; i++)
2493     {
2494       use = iv_use (data, i);
2495       if (!use->next)
2496         continue;
2497
2498       gcc_assert (use->type == USE_ADDRESS);
2499       if (max_offset != 0)
2500         max_offset = compute_max_addr_offset (use);
2501
2502       while (use)
2503         {
2504           sub_id = 0;
2505           addr_offset_first = use->addr_offset;
2506           /* Only uses with offset that can fit in offset part against
2507              the first use can be grouped together.  */
2508           for (pre = use, use = use->next;
2509                use && (use->addr_offset - addr_offset_first
2510                        <= (unsigned HOST_WIDE_INT) max_offset);
2511                pre = use, use = use->next)
2512             {
2513               use->id = pre->id;
2514               use->sub_id = ++sub_id;
2515             }
2516
2517           /* Break the list and create new group.  */
2518           if (use)
2519             {
2520               pre->next = NULL;
2521               use->id = n_iv_uses (data);
2522               use->related_cands = BITMAP_ALLOC (NULL);
2523               data->iv_uses.safe_push (use);
2524             }
2525         }
2526     }
2527
2528   if (dump_file && (dump_flags & TDF_DETAILS))
2529     dump_uses (dump_file, data);
2530 }
2531
2532 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2533    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2534    we are at the top-level of the processed address.  */
2535
2536 static tree
2537 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2538                 HOST_WIDE_INT *offset)
2539 {
2540   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2541   enum tree_code code;
2542   tree type, orig_type = TREE_TYPE (expr);
2543   HOST_WIDE_INT off0, off1, st;
2544   tree orig_expr = expr;
2545
2546   STRIP_NOPS (expr);
2547
2548   type = TREE_TYPE (expr);
2549   code = TREE_CODE (expr);
2550   *offset = 0;
2551
2552   switch (code)
2553     {
2554     case INTEGER_CST:
2555       if (!cst_and_fits_in_hwi (expr)
2556           || integer_zerop (expr))
2557         return orig_expr;
2558
2559       *offset = int_cst_value (expr);
2560       return build_int_cst (orig_type, 0);
2561
2562     case POINTER_PLUS_EXPR:
2563     case PLUS_EXPR:
2564     case MINUS_EXPR:
2565       op0 = TREE_OPERAND (expr, 0);
2566       op1 = TREE_OPERAND (expr, 1);
2567
2568       op0 = strip_offset_1 (op0, false, false, &off0);
2569       op1 = strip_offset_1 (op1, false, false, &off1);
2570
2571       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2572       if (op0 == TREE_OPERAND (expr, 0)
2573           && op1 == TREE_OPERAND (expr, 1))
2574         return orig_expr;
2575
2576       if (integer_zerop (op1))
2577         expr = op0;
2578       else if (integer_zerop (op0))
2579         {
2580           if (code == MINUS_EXPR)
2581             expr = fold_build1 (NEGATE_EXPR, type, op1);
2582           else
2583             expr = op1;
2584         }
2585       else
2586         expr = fold_build2 (code, type, op0, op1);
2587
2588       return fold_convert (orig_type, expr);
2589
2590     case MULT_EXPR:
2591       op1 = TREE_OPERAND (expr, 1);
2592       if (!cst_and_fits_in_hwi (op1))
2593         return orig_expr;
2594
2595       op0 = TREE_OPERAND (expr, 0);
2596       op0 = strip_offset_1 (op0, false, false, &off0);
2597       if (op0 == TREE_OPERAND (expr, 0))
2598         return orig_expr;
2599
2600       *offset = off0 * int_cst_value (op1);
2601       if (integer_zerop (op0))
2602         expr = op0;
2603       else
2604         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2605
2606       return fold_convert (orig_type, expr);
2607
2608     case ARRAY_REF:
2609     case ARRAY_RANGE_REF:
2610       if (!inside_addr)
2611         return orig_expr;
2612
2613       step = array_ref_element_size (expr);
2614       if (!cst_and_fits_in_hwi (step))
2615         break;
2616
2617       st = int_cst_value (step);
2618       op1 = TREE_OPERAND (expr, 1);
2619       op1 = strip_offset_1 (op1, false, false, &off1);
2620       *offset = off1 * st;
2621
2622       if (top_compref
2623           && integer_zerop (op1))
2624         {
2625           /* Strip the component reference completely.  */
2626           op0 = TREE_OPERAND (expr, 0);
2627           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2628           *offset += off0;
2629           return op0;
2630         }
2631       break;
2632
2633     case COMPONENT_REF:
2634       {
2635         tree field;
2636
2637         if (!inside_addr)
2638           return orig_expr;
2639
2640         tmp = component_ref_field_offset (expr);
2641         field = TREE_OPERAND (expr, 1);
2642         if (top_compref
2643             && cst_and_fits_in_hwi (tmp)
2644             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2645           {
2646             HOST_WIDE_INT boffset, abs_off;
2647
2648             /* Strip the component reference completely.  */
2649             op0 = TREE_OPERAND (expr, 0);
2650             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2651             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2652             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2653             if (boffset < 0)
2654               abs_off = -abs_off;
2655
2656             *offset = off0 + int_cst_value (tmp) + abs_off;
2657             return op0;
2658           }
2659       }
2660       break;
2661
2662     case ADDR_EXPR:
2663       op0 = TREE_OPERAND (expr, 0);
2664       op0 = strip_offset_1 (op0, true, true, &off0);
2665       *offset += off0;
2666
2667       if (op0 == TREE_OPERAND (expr, 0))
2668         return orig_expr;
2669
2670       expr = build_fold_addr_expr (op0);
2671       return fold_convert (orig_type, expr);
2672
2673     case MEM_REF:
2674       /* ???  Offset operand?  */
2675       inside_addr = false;
2676       break;
2677
2678     default:
2679       return orig_expr;
2680     }
2681
2682   /* Default handling of expressions for that we want to recurse into
2683      the first operand.  */
2684   op0 = TREE_OPERAND (expr, 0);
2685   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2686   *offset += off0;
2687
2688   if (op0 == TREE_OPERAND (expr, 0)
2689       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2690     return orig_expr;
2691
2692   expr = copy_node (expr);
2693   TREE_OPERAND (expr, 0) = op0;
2694   if (op1)
2695     TREE_OPERAND (expr, 1) = op1;
2696
2697   /* Inside address, we might strip the top level component references,
2698      thus changing type of the expression.  Handling of ADDR_EXPR
2699      will fix that.  */
2700   expr = fold_convert (orig_type, expr);
2701
2702   return expr;
2703 }
2704
2705 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2706
2707 static tree
2708 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2709 {
2710   HOST_WIDE_INT off;
2711   tree core = strip_offset_1 (expr, false, false, &off);
2712   *offset = off;
2713   return core;
2714 }
2715
2716 /* Returns variant of TYPE that can be used as base for different uses.
2717    We return unsigned type with the same precision, which avoids problems
2718    with overflows.  */
2719
2720 static tree
2721 generic_type_for (tree type)
2722 {
2723   if (POINTER_TYPE_P (type))
2724     return unsigned_type_for (type);
2725
2726   if (TYPE_UNSIGNED (type))
2727     return type;
2728
2729   return unsigned_type_for (type);
2730 }
2731
2732 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2733    the bitmap to that we should store it.  */
2734
2735 static struct ivopts_data *fd_ivopts_data;
2736 static tree
2737 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2738 {
2739   bitmap *depends_on = (bitmap *) data;
2740   struct version_info *info;
2741
2742   if (TREE_CODE (*expr_p) != SSA_NAME)
2743     return NULL_TREE;
2744   info = name_info (fd_ivopts_data, *expr_p);
2745
2746   if (!info->inv_id || info->has_nonlin_use)
2747     return NULL_TREE;
2748
2749   if (!*depends_on)
2750     *depends_on = BITMAP_ALLOC (NULL);
2751   bitmap_set_bit (*depends_on, info->inv_id);
2752
2753   return NULL_TREE;
2754 }
2755
2756 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2757    position to POS.  If USE is not NULL, the candidate is set as related to
2758    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2759    replacement of the final value of the iv by a direct computation.  */
2760
2761 static struct iv_cand *
2762 add_candidate_1 (struct ivopts_data *data,
2763                  tree base, tree step, bool important, enum iv_position pos,
2764                  struct iv_use *use, gimple *incremented_at,
2765                  struct iv *orig_iv = NULL)
2766 {
2767   unsigned i;
2768   struct iv_cand *cand = NULL;
2769   tree type, orig_type;
2770
2771   /* For non-original variables, make sure their values are computed in a type
2772      that does not invoke undefined behavior on overflows (since in general,
2773      we cannot prove that these induction variables are non-wrapping).  */
2774   if (pos != IP_ORIGINAL)
2775     {
2776       orig_type = TREE_TYPE (base);
2777       type = generic_type_for (orig_type);
2778       if (type != orig_type)
2779         {
2780           base = fold_convert (type, base);
2781           step = fold_convert (type, step);
2782         }
2783     }
2784
2785   for (i = 0; i < n_iv_cands (data); i++)
2786     {
2787       cand = iv_cand (data, i);
2788
2789       if (cand->pos != pos)
2790         continue;
2791
2792       if (cand->incremented_at != incremented_at
2793           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2794               && cand->ainc_use != use))
2795         continue;
2796
2797       if (!cand->iv)
2798         {
2799           if (!base && !step)
2800             break;
2801
2802           continue;
2803         }
2804
2805       if (!base && !step)
2806         continue;
2807
2808       if (operand_equal_p (base, cand->iv->base, 0)
2809           && operand_equal_p (step, cand->iv->step, 0)
2810           && (TYPE_PRECISION (TREE_TYPE (base))
2811               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2812         break;
2813     }
2814
2815   if (i == n_iv_cands (data))
2816     {
2817       cand = XCNEW (struct iv_cand);
2818       cand->id = i;
2819
2820       if (!base && !step)
2821         cand->iv = NULL;
2822       else
2823         cand->iv = alloc_iv (data, base, step);
2824
2825       cand->pos = pos;
2826       if (pos != IP_ORIGINAL && cand->iv)
2827         {
2828           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2829           cand->var_after = cand->var_before;
2830         }
2831       cand->important = important;
2832       cand->incremented_at = incremented_at;
2833       data->iv_candidates.safe_push (cand);
2834
2835       if (step
2836           && TREE_CODE (step) != INTEGER_CST)
2837         {
2838           fd_ivopts_data = data;
2839           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2840         }
2841
2842       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2843         cand->ainc_use = use;
2844       else
2845         cand->ainc_use = NULL;
2846
2847       cand->orig_iv = orig_iv;
2848       if (dump_file && (dump_flags & TDF_DETAILS))
2849         dump_cand (dump_file, cand);
2850     }
2851
2852   if (important && !cand->important)
2853     {
2854       cand->important = true;
2855       if (dump_file && (dump_flags & TDF_DETAILS))
2856         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2857     }
2858
2859   if (use)
2860     {
2861       bitmap_set_bit (use->related_cands, i);
2862       if (dump_file && (dump_flags & TDF_DETAILS))
2863         fprintf (dump_file, "Candidate %d is related to use %d\n",
2864                  cand->id, use->id);
2865     }
2866
2867   return cand;
2868 }
2869
2870 /* Returns true if incrementing the induction variable at the end of the LOOP
2871    is allowed.
2872
2873    The purpose is to avoid splitting latch edge with a biv increment, thus
2874    creating a jump, possibly confusing other optimization passes and leaving
2875    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2876    is not available (so we do not have a better alternative), or if the latch
2877    edge is already nonempty.  */
2878
2879 static bool
2880 allow_ip_end_pos_p (struct loop *loop)
2881 {
2882   if (!ip_normal_pos (loop))
2883     return true;
2884
2885   if (!empty_block_p (ip_end_pos (loop)))
2886     return true;
2887
2888   return false;
2889 }
2890
2891 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2892    Important field is set to IMPORTANT.  */
2893
2894 static void
2895 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2896                         bool important, struct iv_use *use)
2897 {
2898   basic_block use_bb = gimple_bb (use->stmt);
2899   machine_mode mem_mode;
2900   unsigned HOST_WIDE_INT cstepi;
2901
2902   /* If we insert the increment in any position other than the standard
2903      ones, we must ensure that it is incremented once per iteration.
2904      It must not be in an inner nested loop, or one side of an if
2905      statement.  */
2906   if (use_bb->loop_father != data->current_loop
2907       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2908       || stmt_could_throw_p (use->stmt)
2909       || !cst_and_fits_in_hwi (step))
2910     return;
2911
2912   cstepi = int_cst_value (step);
2913
2914   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2915   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2916         || USE_STORE_PRE_INCREMENT (mem_mode))
2917        && GET_MODE_SIZE (mem_mode) == cstepi)
2918       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2919            || USE_STORE_PRE_DECREMENT (mem_mode))
2920           && GET_MODE_SIZE (mem_mode) == -cstepi))
2921     {
2922       enum tree_code code = MINUS_EXPR;
2923       tree new_base;
2924       tree new_step = step;
2925
2926       if (POINTER_TYPE_P (TREE_TYPE (base)))
2927         {
2928           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2929           code = POINTER_PLUS_EXPR;
2930         }
2931       else
2932         new_step = fold_convert (TREE_TYPE (base), new_step);
2933       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2934       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2935                        use->stmt);
2936     }
2937   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2938         || USE_STORE_POST_INCREMENT (mem_mode))
2939        && GET_MODE_SIZE (mem_mode) == cstepi)
2940       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2941            || USE_STORE_POST_DECREMENT (mem_mode))
2942           && GET_MODE_SIZE (mem_mode) == -cstepi))
2943     {
2944       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2945                        use->stmt);
2946     }
2947 }
2948
2949 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2950    position to POS.  If USE is not NULL, the candidate is set as related to
2951    it.  The candidate computation is scheduled before exit condition and at
2952    the end of loop.  */
2953
2954 static void
2955 add_candidate (struct ivopts_data *data,
2956                tree base, tree step, bool important, struct iv_use *use,
2957                struct iv *orig_iv = NULL)
2958 {
2959   gcc_assert (use == NULL || use->sub_id == 0);
2960
2961   if (ip_normal_pos (data->current_loop))
2962     add_candidate_1 (data, base, step, important,
2963                      IP_NORMAL, use, NULL, orig_iv);
2964   if (ip_end_pos (data->current_loop)
2965       && allow_ip_end_pos_p (data->current_loop))
2966     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
2967 }
2968
2969 /* Adds standard iv candidates.  */
2970
2971 static void
2972 add_standard_iv_candidates (struct ivopts_data *data)
2973 {
2974   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2975
2976   /* The same for a double-integer type if it is still fast enough.  */
2977   if (TYPE_PRECISION
2978         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2979       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2980     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2981                    build_int_cst (long_integer_type_node, 1), true, NULL);
2982
2983   /* The same for a double-integer type if it is still fast enough.  */
2984   if (TYPE_PRECISION
2985         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2986       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2987     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2988                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2989 }
2990
2991
2992 /* Adds candidates bases on the old induction variable IV.  */
2993
2994 static void
2995 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
2996 {
2997   gimple *phi;
2998   tree def;
2999   struct iv_cand *cand;
3000
3001   /* Check if this biv is used in address type use.  */
3002   if (iv->no_overflow  && iv->have_address_use
3003       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3004       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3005     {
3006       tree base = fold_convert (sizetype, iv->base);
3007       tree step = fold_convert (sizetype, iv->step);
3008
3009       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3010       add_candidate (data, base, step, true, NULL, iv);
3011       /* Add iv cand of the original type only if it has nonlinear use.  */
3012       if (iv->have_use_for)
3013         add_candidate (data, iv->base, iv->step, true, NULL);
3014     }
3015   else
3016     add_candidate (data, iv->base, iv->step, true, NULL);
3017
3018   /* The same, but with initial value zero.  */
3019   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3020     add_candidate (data, size_int (0), iv->step, true, NULL);
3021   else
3022     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3023                    iv->step, true, NULL);
3024
3025   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3026   if (gimple_code (phi) == GIMPLE_PHI)
3027     {
3028       /* Additionally record the possibility of leaving the original iv
3029          untouched.  */
3030       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3031       /* Don't add candidate if it's from another PHI node because
3032          it's an affine iv appearing in the form of PEELED_CHREC.  */
3033       phi = SSA_NAME_DEF_STMT (def);
3034       if (gimple_code (phi) != GIMPLE_PHI)
3035         {
3036           cand = add_candidate_1 (data,
3037                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3038                                   SSA_NAME_DEF_STMT (def));
3039           cand->var_before = iv->ssa_name;
3040           cand->var_after = def;
3041         }
3042       else
3043         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3044     }
3045 }
3046
3047 /* Adds candidates based on the old induction variables.  */
3048
3049 static void
3050 add_iv_candidate_for_bivs (struct ivopts_data *data)
3051 {
3052   unsigned i;
3053   struct iv *iv;
3054   bitmap_iterator bi;
3055
3056   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3057     {
3058       iv = ver_info (data, i)->iv;
3059       if (iv && iv->biv_p && !integer_zerop (iv->step))
3060         add_iv_candidate_for_biv (data, iv);
3061     }
3062 }
3063
3064 /* Adds candidates based on the value of USE's iv.  */
3065
3066 static void
3067 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3068 {
3069   unsigned HOST_WIDE_INT offset;
3070   tree base;
3071   tree basetype;
3072   struct iv *iv = use->iv;
3073
3074   add_candidate (data, iv->base, iv->step, false, use);
3075
3076   /* The same, but with initial value zero.  Make such variable important,
3077      since it is generic enough so that possibly many uses may be based
3078      on it.  */
3079   basetype = TREE_TYPE (iv->base);
3080   if (POINTER_TYPE_P (basetype))
3081     basetype = sizetype;
3082   add_candidate (data, build_int_cst (basetype, 0), iv->step, true, use);
3083
3084   /* Third, try removing the constant offset.  Make sure to even
3085      add a candidate for &a[0] vs. (T *)&a.  */
3086   base = strip_offset (iv->base, &offset);
3087   if (offset || base != iv->base)
3088     add_candidate (data, base, iv->step, false, use);
3089
3090   /* At last, add auto-incremental candidates.  Make such variables
3091      important since other iv uses with same base object may be based
3092      on it.  */
3093   if (use != NULL && use->type == USE_ADDRESS)
3094     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3095 }
3096
3097 /* Adds candidates based on the uses.  */
3098
3099 static void
3100 add_iv_candidate_for_uses (struct ivopts_data *data)
3101 {
3102   unsigned i;
3103
3104   for (i = 0; i < n_iv_uses (data); i++)
3105     {
3106       struct iv_use *use = iv_use (data, i);
3107
3108       if (!use)
3109         continue;
3110
3111       switch (use->type)
3112         {
3113         case USE_NONLINEAR_EXPR:
3114         case USE_COMPARE:
3115         case USE_ADDRESS:
3116           /* Just add the ivs based on the value of the iv used here.  */
3117           add_iv_candidate_for_use (data, use);
3118           break;
3119
3120         default:
3121           gcc_unreachable ();
3122         }
3123     }
3124 }
3125
3126 /* Record important candidates and add them to related_cands bitmaps
3127    if needed.  */
3128
3129 static void
3130 record_important_candidates (struct ivopts_data *data)
3131 {
3132   unsigned i;
3133   struct iv_use *use;
3134
3135   for (i = 0; i < n_iv_cands (data); i++)
3136     {
3137       struct iv_cand *cand = iv_cand (data, i);
3138
3139       if (cand->important)
3140         bitmap_set_bit (data->important_candidates, i);
3141     }
3142
3143   data->consider_all_candidates = (n_iv_cands (data)
3144                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3145
3146   if (data->consider_all_candidates)
3147     {
3148       /* We will not need "related_cands" bitmaps in this case,
3149          so release them to decrease peak memory consumption.  */
3150       for (i = 0; i < n_iv_uses (data); i++)
3151         {
3152           use = iv_use (data, i);
3153           BITMAP_FREE (use->related_cands);
3154         }
3155     }
3156   else
3157     {
3158       /* Add important candidates to the related_cands bitmaps.  */
3159       for (i = 0; i < n_iv_uses (data); i++)
3160         bitmap_ior_into (iv_use (data, i)->related_cands,
3161                          data->important_candidates);
3162     }
3163 }
3164
3165 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3166    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3167    we allocate a simple list to every use.  */
3168
3169 static void
3170 alloc_use_cost_map (struct ivopts_data *data)
3171 {
3172   unsigned i, size, s;
3173
3174   for (i = 0; i < n_iv_uses (data); i++)
3175     {
3176       struct iv_use *use = iv_use (data, i);
3177
3178       if (data->consider_all_candidates)
3179         size = n_iv_cands (data);
3180       else
3181         {
3182           s = bitmap_count_bits (use->related_cands);
3183
3184           /* Round up to the power of two, so that moduling by it is fast.  */
3185           size = s ? (1 << ceil_log2 (s)) : 1;
3186         }
3187
3188       use->n_map_members = size;
3189       use->cost_map = XCNEWVEC (struct cost_pair, size);
3190     }
3191 }
3192
3193 /* Returns description of computation cost of expression whose runtime
3194    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
3195
3196 static comp_cost
3197 new_cost (unsigned runtime, unsigned complexity)
3198 {
3199   comp_cost cost;
3200
3201   cost.cost = runtime;
3202   cost.complexity = complexity;
3203
3204   return cost;
3205 }
3206
3207 /* Returns true if COST is infinite.  */
3208
3209 static bool
3210 infinite_cost_p (comp_cost cost)
3211 {
3212   return cost.cost == INFTY;
3213 }
3214
3215 /* Adds costs COST1 and COST2.  */
3216
3217 static comp_cost
3218 add_costs (comp_cost cost1, comp_cost cost2)
3219 {
3220   if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3221     return infinite_cost;
3222
3223   cost1.cost += cost2.cost;
3224   cost1.complexity += cost2.complexity;
3225
3226   return cost1;
3227 }
3228 /* Subtracts costs COST1 and COST2.  */
3229
3230 static comp_cost
3231 sub_costs (comp_cost cost1, comp_cost cost2)
3232 {
3233   cost1.cost -= cost2.cost;
3234   cost1.complexity -= cost2.complexity;
3235
3236   return cost1;
3237 }
3238
3239 /* Returns a negative number if COST1 < COST2, a positive number if
3240    COST1 > COST2, and 0 if COST1 = COST2.  */
3241
3242 static int
3243 compare_costs (comp_cost cost1, comp_cost cost2)
3244 {
3245   if (cost1.cost == cost2.cost)
3246     return cost1.complexity - cost2.complexity;
3247
3248   return cost1.cost - cost2.cost;
3249 }
3250
3251 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3252    on invariants DEPENDS_ON and that the value used in expressing it
3253    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3254
3255 static void
3256 set_use_iv_cost (struct ivopts_data *data,
3257                  struct iv_use *use, struct iv_cand *cand,
3258                  comp_cost cost, bitmap depends_on, tree value,
3259                  enum tree_code comp, int inv_expr_id)
3260 {
3261   unsigned i, s;
3262
3263   if (infinite_cost_p (cost))
3264     {
3265       BITMAP_FREE (depends_on);
3266       return;
3267     }
3268
3269   if (data->consider_all_candidates)
3270     {
3271       use->cost_map[cand->id].cand = cand;
3272       use->cost_map[cand->id].cost = cost;
3273       use->cost_map[cand->id].depends_on = depends_on;
3274       use->cost_map[cand->id].value = value;
3275       use->cost_map[cand->id].comp = comp;
3276       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3277       return;
3278     }
3279
3280   /* n_map_members is a power of two, so this computes modulo.  */
3281   s = cand->id & (use->n_map_members - 1);
3282   for (i = s; i < use->n_map_members; i++)
3283     if (!use->cost_map[i].cand)
3284       goto found;
3285   for (i = 0; i < s; i++)
3286     if (!use->cost_map[i].cand)
3287       goto found;
3288
3289   gcc_unreachable ();
3290
3291 found:
3292   use->cost_map[i].cand = cand;
3293   use->cost_map[i].cost = cost;
3294   use->cost_map[i].depends_on = depends_on;
3295   use->cost_map[i].value = value;
3296   use->cost_map[i].comp = comp;
3297   use->cost_map[i].inv_expr_id = inv_expr_id;
3298 }
3299
3300 /* Gets cost of (USE, CANDIDATE) pair.  */
3301
3302 static struct cost_pair *
3303 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3304                  struct iv_cand *cand)
3305 {
3306   unsigned i, s;
3307   struct cost_pair *ret;
3308
3309   if (!cand)
3310     return NULL;
3311
3312   if (data->consider_all_candidates)
3313     {
3314       ret = use->cost_map + cand->id;
3315       if (!ret->cand)
3316         return NULL;
3317
3318       return ret;
3319     }
3320
3321   /* n_map_members is a power of two, so this computes modulo.  */
3322   s = cand->id & (use->n_map_members - 1);
3323   for (i = s; i < use->n_map_members; i++)
3324     if (use->cost_map[i].cand == cand)
3325       return use->cost_map + i;
3326     else if (use->cost_map[i].cand == NULL)
3327       return NULL;
3328   for (i = 0; i < s; i++)
3329     if (use->cost_map[i].cand == cand)
3330       return use->cost_map + i;
3331     else if (use->cost_map[i].cand == NULL)
3332       return NULL;
3333
3334   return NULL;
3335 }
3336
3337 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3338 static rtx
3339 produce_memory_decl_rtl (tree obj, int *regno)
3340 {
3341   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3342   machine_mode address_mode = targetm.addr_space.address_mode (as);
3343   rtx x;
3344
3345   gcc_assert (obj);
3346   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3347     {
3348       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3349       x = gen_rtx_SYMBOL_REF (address_mode, name);
3350       SET_SYMBOL_REF_DECL (x, obj);
3351       x = gen_rtx_MEM (DECL_MODE (obj), x);
3352       set_mem_addr_space (x, as);
3353       targetm.encode_section_info (obj, x, true);
3354     }
3355   else
3356     {
3357       x = gen_raw_REG (address_mode, (*regno)++);
3358       x = gen_rtx_MEM (DECL_MODE (obj), x);
3359       set_mem_addr_space (x, as);
3360     }
3361
3362   return x;
3363 }
3364
3365 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3366    walk_tree.  DATA contains the actual fake register number.  */
3367
3368 static tree
3369 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3370 {
3371   tree obj = NULL_TREE;
3372   rtx x = NULL_RTX;
3373   int *regno = (int *) data;
3374
3375   switch (TREE_CODE (*expr_p))
3376     {
3377     case ADDR_EXPR:
3378       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3379            handled_component_p (*expr_p);
3380            expr_p = &TREE_OPERAND (*expr_p, 0))
3381         continue;
3382       obj = *expr_p;
3383       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3384         x = produce_memory_decl_rtl (obj, regno);
3385       break;
3386
3387     case SSA_NAME:
3388       *ws = 0;
3389       obj = SSA_NAME_VAR (*expr_p);
3390       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3391       if (!obj)
3392         return NULL_TREE;
3393       if (!DECL_RTL_SET_P (obj))
3394         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3395       break;
3396
3397     case VAR_DECL:
3398     case PARM_DECL:
3399     case RESULT_DECL:
3400       *ws = 0;
3401       obj = *expr_p;
3402
3403       if (DECL_RTL_SET_P (obj))
3404         break;
3405
3406       if (DECL_MODE (obj) == BLKmode)
3407         x = produce_memory_decl_rtl (obj, regno);
3408       else
3409         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3410
3411       break;
3412
3413     default:
3414       break;
3415     }
3416
3417   if (x)
3418     {
3419       decl_rtl_to_reset.safe_push (obj);
3420       SET_DECL_RTL (obj, x);
3421     }
3422
3423   return NULL_TREE;
3424 }
3425
3426 /* Determines cost of the computation of EXPR.  */
3427
3428 static unsigned
3429 computation_cost (tree expr, bool speed)
3430 {
3431   rtx_insn *seq;
3432   rtx rslt;
3433   tree type = TREE_TYPE (expr);
3434   unsigned cost;
3435   /* Avoid using hard regs in ways which may be unsupported.  */
3436   int regno = LAST_VIRTUAL_REGISTER + 1;
3437   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3438   enum node_frequency real_frequency = node->frequency;
3439
3440   node->frequency = NODE_FREQUENCY_NORMAL;
3441   crtl->maybe_hot_insn_p = speed;
3442   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3443   start_sequence ();
3444   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3445   seq = get_insns ();
3446   end_sequence ();
3447   default_rtl_profile ();
3448   node->frequency = real_frequency;
3449
3450   cost = seq_cost (seq, speed);
3451   if (MEM_P (rslt))
3452     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3453                           TYPE_ADDR_SPACE (type), speed);
3454   else if (!REG_P (rslt))
3455     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3456
3457   return cost;
3458 }
3459
3460 /* Returns variable containing the value of candidate CAND at statement AT.  */
3461
3462 static tree
3463 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3464 {
3465   if (stmt_after_increment (loop, cand, stmt))
3466     return cand->var_after;
3467   else
3468     return cand->var_before;
3469 }
3470
3471 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3472    same precision that is at least as wide as the precision of TYPE, stores
3473    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3474    type of A and B.  */
3475
3476 static tree
3477 determine_common_wider_type (tree *a, tree *b)
3478 {
3479   tree wider_type = NULL;
3480   tree suba, subb;
3481   tree atype = TREE_TYPE (*a);
3482
3483   if (CONVERT_EXPR_P (*a))
3484     {
3485       suba = TREE_OPERAND (*a, 0);
3486       wider_type = TREE_TYPE (suba);
3487       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3488         return atype;
3489     }
3490   else
3491     return atype;
3492
3493   if (CONVERT_EXPR_P (*b))
3494     {
3495       subb = TREE_OPERAND (*b, 0);
3496       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3497         return atype;
3498     }
3499   else
3500     return atype;
3501
3502   *a = suba;
3503   *b = subb;
3504   return wider_type;
3505 }
3506
3507 /* Determines the expression by that USE is expressed from induction variable
3508    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3509    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3510
3511 static bool
3512 get_computation_aff (struct loop *loop,
3513                      struct iv_use *use, struct iv_cand *cand, gimple *at,
3514                      struct aff_tree *aff)
3515 {
3516   tree ubase = use->iv->base;
3517   tree ustep = use->iv->step;
3518   tree cbase = cand->iv->base;
3519   tree cstep = cand->iv->step, cstep_common;
3520   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3521   tree common_type, var;
3522   tree uutype;
3523   aff_tree cbase_aff, var_aff;
3524   widest_int rat;
3525
3526   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3527     {
3528       /* We do not have a precision to express the values of use.  */
3529       return false;
3530     }
3531
3532   var = var_at_stmt (loop, cand, at);
3533   uutype = unsigned_type_for (utype);
3534
3535   /* If the conversion is not noop, perform it.  */
3536   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3537     {
3538       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3539           && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3540         {
3541           tree inner_base, inner_step, inner_type;
3542           inner_base = TREE_OPERAND (cbase, 0);
3543           if (CONVERT_EXPR_P (cstep))
3544             inner_step = TREE_OPERAND (cstep, 0);
3545           else
3546             inner_step = cstep;
3547
3548           inner_type = TREE_TYPE (inner_base);
3549           /* If candidate is added from a biv whose type is smaller than
3550              ctype, we know both candidate and the biv won't overflow.
3551              In this case, it's safe to skip the convertion in candidate.
3552              As an example, (unsigned short)((unsigned long)A) equals to
3553              (unsigned short)A, if A has a type no larger than short.  */
3554           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3555             {
3556               cbase = inner_base;
3557               cstep = inner_step;
3558             }
3559         }
3560       cstep = fold_convert (uutype, cstep);
3561       cbase = fold_convert (uutype, cbase);
3562       var = fold_convert (uutype, var);
3563     }
3564
3565   if (!constant_multiple_of (ustep, cstep, &rat))
3566     return false;
3567
3568   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3569      type, we achieve better folding by computing their difference in this
3570      wider type, and cast the result to UUTYPE.  We do not need to worry about
3571      overflows, as all the arithmetics will in the end be performed in UUTYPE
3572      anyway.  */
3573   common_type = determine_common_wider_type (&ubase, &cbase);
3574
3575   /* use = ubase - ratio * cbase + ratio * var.  */
3576   tree_to_aff_combination (ubase, common_type, aff);
3577   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3578   tree_to_aff_combination (var, uutype, &var_aff);
3579
3580   /* We need to shift the value if we are after the increment.  */
3581   if (stmt_after_increment (loop, cand, at))
3582     {
3583       aff_tree cstep_aff;
3584
3585       if (common_type != uutype)
3586         cstep_common = fold_convert (common_type, cstep);
3587       else
3588         cstep_common = cstep;
3589
3590       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3591       aff_combination_add (&cbase_aff, &cstep_aff);
3592     }
3593
3594   aff_combination_scale (&cbase_aff, -rat);
3595   aff_combination_add (aff, &cbase_aff);
3596   if (common_type != uutype)
3597     aff_combination_convert (aff, uutype);
3598
3599   aff_combination_scale (&var_aff, rat);
3600   aff_combination_add (aff, &var_aff);
3601
3602   return true;
3603 }
3604
3605 /* Return the type of USE.  */
3606
3607 static tree
3608 get_use_type (struct iv_use *use)
3609 {
3610   tree base_type = TREE_TYPE (use->iv->base);
3611   tree type;
3612
3613   if (use->type == USE_ADDRESS)
3614     {
3615       /* The base_type may be a void pointer.  Create a pointer type based on
3616          the mem_ref instead.  */
3617       type = build_pointer_type (TREE_TYPE (*use->op_p));
3618       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3619                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3620     }
3621   else
3622     type = base_type;
3623
3624   return type;
3625 }
3626
3627 /* Determines the expression by that USE is expressed from induction variable
3628    CAND at statement AT in LOOP.  The computation is unshared.  */
3629
3630 static tree
3631 get_computation_at (struct loop *loop,
3632                     struct iv_use *use, struct iv_cand *cand, gimple *at)
3633 {
3634   aff_tree aff;
3635   tree type = get_use_type (use);
3636
3637   if (!get_computation_aff (loop, use, cand, at, &aff))
3638     return NULL_TREE;
3639   unshare_aff_combination (&aff);
3640   return fold_convert (type, aff_combination_to_tree (&aff));
3641 }
3642
3643 /* Determines the expression by that USE is expressed from induction variable
3644    CAND in LOOP.  The computation is unshared.  */
3645
3646 static tree
3647 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3648 {
3649   return get_computation_at (loop, use, cand, use->stmt);
3650 }
3651
3652 /* Adjust the cost COST for being in loop setup rather than loop body.
3653    If we're optimizing for space, the loop setup overhead is constant;
3654    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3655 static unsigned
3656 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3657 {
3658   if (cost == INFTY)
3659     return cost;
3660   else if (optimize_loop_for_speed_p (data->current_loop))
3661     return cost / avg_loop_niter (data->current_loop);
3662   else
3663     return cost;
3664 }
3665
3666 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3667    validity for a memory reference accessing memory of mode MODE in
3668    address space AS.  */
3669
3670
3671 bool
3672 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3673                                  addr_space_t as)
3674 {
3675 #define MAX_RATIO 128
3676   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3677   static vec<sbitmap> valid_mult_list;
3678   sbitmap valid_mult;
3679
3680   if (data_index >= valid_mult_list.length ())
3681     valid_mult_list.safe_grow_cleared (data_index + 1);
3682
3683   valid_mult = valid_mult_list[data_index];
3684   if (!valid_mult)
3685     {
3686       machine_mode address_mode = targetm.addr_space.address_mode (as);
3687       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3688       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3689       rtx addr, scaled;
3690       HOST_WIDE_INT i;
3691
3692       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3693       bitmap_clear (valid_mult);
3694       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3695       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3696       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3697         {
3698           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3699           if (memory_address_addr_space_p (mode, addr, as)
3700               || memory_address_addr_space_p (mode, scaled, as))
3701             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3702         }
3703
3704       if (dump_file && (dump_flags & TDF_DETAILS))
3705         {
3706           fprintf (dump_file, "  allowed multipliers:");
3707           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3708             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3709               fprintf (dump_file, " %d", (int) i);
3710           fprintf (dump_file, "\n");
3711           fprintf (dump_file, "\n");
3712         }
3713
3714       valid_mult_list[data_index] = valid_mult;
3715     }
3716
3717   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3718     return false;
3719
3720   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3721 }
3722
3723 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3724    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3725    variable is omitted.  Compute the cost for a memory reference that accesses
3726    a memory location of mode MEM_MODE in address space AS.
3727
3728    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3729    size of MEM_MODE / RATIO) is available.  To make this determination, we
3730    look at the size of the increment to be made, which is given in CSTEP.
3731    CSTEP may be zero if the step is unknown.
3732    STMT_AFTER_INC is true iff the statement we're looking at is after the
3733    increment of the original biv.
3734
3735    TODO -- there must be some better way.  This all is quite crude.  */
3736
3737 enum ainc_type
3738 {
3739   AINC_PRE_INC,         /* Pre increment.  */
3740   AINC_PRE_DEC,         /* Pre decrement.  */
3741   AINC_POST_INC,        /* Post increment.  */
3742   AINC_POST_DEC,        /* Post decrement.  */
3743   AINC_NONE             /* Also the number of auto increment types.  */
3744 };
3745
3746 struct address_cost_data
3747 {
3748   HOST_WIDE_INT min_offset, max_offset;
3749   unsigned costs[2][2][2][2];
3750   unsigned ainc_costs[AINC_NONE];
3751 };
3752
3753
3754 static comp_cost
3755 get_address_cost (bool symbol_present, bool var_present,
3756                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3757                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3758                   addr_space_t as, bool speed,
3759                   bool stmt_after_inc, bool *may_autoinc)
3760 {
3761   machine_mode address_mode = targetm.addr_space.address_mode (as);
3762   static vec<address_cost_data *> address_cost_data_list;
3763   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3764   address_cost_data *data;
3765   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3766   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3767   unsigned cost, acost, complexity;
3768   enum ainc_type autoinc_type;
3769   bool offset_p, ratio_p, autoinc;
3770   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3771   unsigned HOST_WIDE_INT mask;
3772   unsigned bits;
3773
3774   if (data_index >= address_cost_data_list.length ())
3775     address_cost_data_list.safe_grow_cleared (data_index + 1);
3776
3777   data = address_cost_data_list[data_index];
3778   if (!data)
3779     {
3780       HOST_WIDE_INT i;
3781       HOST_WIDE_INT rat, off = 0;
3782       int old_cse_not_expected, width;
3783       unsigned sym_p, var_p, off_p, rat_p, add_c;
3784       rtx_insn *seq;
3785       rtx addr, base;
3786       rtx reg0, reg1;
3787
3788       data = (address_cost_data *) xcalloc (1, sizeof (*data));
3789
3790       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3791
3792       width = GET_MODE_BITSIZE (address_mode) - 1;
3793       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3794         width = HOST_BITS_PER_WIDE_INT - 1;
3795       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3796
3797       for (i = width; i >= 0; i--)
3798         {
3799           off = -((unsigned HOST_WIDE_INT) 1 << i);
3800           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3801           if (memory_address_addr_space_p (mem_mode, addr, as))
3802             break;
3803         }
3804       data->min_offset = (i == -1? 0 : off);
3805
3806       for (i = width; i >= 0; i--)
3807         {
3808           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3809           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3810           if (memory_address_addr_space_p (mem_mode, addr, as))
3811             break;
3812           /* For some strict-alignment targets, the offset must be naturally
3813              aligned.  Try an aligned offset if mem_mode is not QImode.  */
3814           off = mem_mode != QImode
3815                 ? ((unsigned HOST_WIDE_INT) 1 << i)
3816                     - GET_MODE_SIZE (mem_mode)
3817                 : 0;
3818           if (off > 0)
3819             {
3820               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3821               if (memory_address_addr_space_p (mem_mode, addr, as))
3822                 break;
3823             }
3824         }
3825       if (i == -1)
3826         off = 0;
3827       data->max_offset = off;
3828
3829       if (dump_file && (dump_flags & TDF_DETAILS))
3830         {
3831           fprintf (dump_file, "get_address_cost:\n");
3832           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3833                    GET_MODE_NAME (mem_mode),
3834                    data->min_offset);
3835           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3836                    GET_MODE_NAME (mem_mode),
3837                    data->max_offset);
3838         }
3839
3840       rat = 1;
3841       for (i = 2; i <= MAX_RATIO; i++)
3842         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3843           {
3844             rat = i;
3845             break;
3846           }
3847
3848       /* Compute the cost of various addressing modes.  */
3849       acost = 0;
3850       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3851       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3852
3853       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3854           || USE_STORE_PRE_DECREMENT (mem_mode))
3855         {
3856           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3857           has_predec[mem_mode]
3858             = memory_address_addr_space_p (mem_mode, addr, as);
3859
3860           if (has_predec[mem_mode])
3861             data->ainc_costs[AINC_PRE_DEC]
3862               = address_cost (addr, mem_mode, as, speed);
3863         }
3864       if (USE_LOAD_POST_DECREMENT (mem_mode)
3865           || USE_STORE_POST_DECREMENT (mem_mode))
3866         {
3867           addr = gen_rtx_POST_DEC (address_mode, reg0);
3868           has_postdec[mem_mode]
3869             = memory_address_addr_space_p (mem_mode, addr, as);
3870
3871           if (has_postdec[mem_mode])
3872             data->ainc_costs[AINC_POST_DEC]
3873               = address_cost (addr, mem_mode, as, speed);
3874         }
3875       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3876           || USE_STORE_PRE_DECREMENT (mem_mode))
3877         {
3878           addr = gen_rtx_PRE_INC (address_mode, reg0);
3879           has_preinc[mem_mode]
3880             = memory_address_addr_space_p (mem_mode, addr, as);
3881
3882           if (has_preinc[mem_mode])
3883             data->ainc_costs[AINC_PRE_INC]
3884               = address_cost (addr, mem_mode, as, speed);
3885         }
3886       if (USE_LOAD_POST_INCREMENT (mem_mode)
3887           || USE_STORE_POST_INCREMENT (mem_mode))
3888         {
3889           addr = gen_rtx_POST_INC (address_mode, reg0);
3890           has_postinc[mem_mode]
3891             = memory_address_addr_space_p (mem_mode, addr, as);
3892
3893           if (has_postinc[mem_mode])
3894             data->ainc_costs[AINC_POST_INC]
3895               = address_cost (addr, mem_mode, as, speed);
3896         }
3897       for (i = 0; i < 16; i++)
3898         {
3899           sym_p = i & 1;
3900           var_p = (i >> 1) & 1;
3901           off_p = (i >> 2) & 1;
3902           rat_p = (i >> 3) & 1;
3903
3904           addr = reg0;
3905           if (rat_p)
3906             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3907                                    gen_int_mode (rat, address_mode));
3908
3909           if (var_p)
3910             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3911
3912           if (sym_p)
3913             {
3914               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3915               /* ??? We can run into trouble with some backends by presenting
3916                  it with symbols which haven't been properly passed through
3917                  targetm.encode_section_info.  By setting the local bit, we
3918                  enhance the probability of things working.  */
3919               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3920
3921               if (off_p)
3922                 base = gen_rtx_fmt_e (CONST, address_mode,
3923                                       gen_rtx_fmt_ee
3924                                         (PLUS, address_mode, base,
3925                                          gen_int_mode (off, address_mode)));
3926             }
3927           else if (off_p)
3928             base = gen_int_mode (off, address_mode);
3929           else
3930             base = NULL_RTX;
3931
3932           if (base)
3933             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3934
3935           start_sequence ();
3936           /* To avoid splitting addressing modes, pretend that no cse will
3937              follow.  */
3938           old_cse_not_expected = cse_not_expected;
3939           cse_not_expected = true;
3940           addr = memory_address_addr_space (mem_mode, addr, as);
3941           cse_not_expected = old_cse_not_expected;
3942           seq = get_insns ();
3943           end_sequence ();
3944
3945           acost = seq_cost (seq, speed);
3946           acost += address_cost (addr, mem_mode, as, speed);
3947
3948           if (!acost)
3949             acost = 1;
3950           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3951         }
3952
3953       /* On some targets, it is quite expensive to load symbol to a register,
3954          which makes addresses that contain symbols look much more expensive.
3955          However, the symbol will have to be loaded in any case before the
3956          loop (and quite likely we have it in register already), so it does not
3957          make much sense to penalize them too heavily.  So make some final
3958          tweaks for the SYMBOL_PRESENT modes:
3959
3960          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3961          var is cheaper, use this mode with small penalty.
3962          If VAR_PRESENT is true, try whether the mode with
3963          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3964          if this is the case, use it.  */
3965       add_c = add_cost (speed, address_mode);
3966       for (i = 0; i < 8; i++)
3967         {
3968           var_p = i & 1;
3969           off_p = (i >> 1) & 1;
3970           rat_p = (i >> 2) & 1;
3971
3972           acost = data->costs[0][1][off_p][rat_p] + 1;
3973           if (var_p)
3974             acost += add_c;
3975
3976           if (acost < data->costs[1][var_p][off_p][rat_p])
3977             data->costs[1][var_p][off_p][rat_p] = acost;
3978         }
3979
3980       if (dump_file && (dump_flags & TDF_DETAILS))
3981         {
3982           fprintf (dump_file, "Address costs:\n");
3983
3984           for (i = 0; i < 16; i++)
3985             {
3986               sym_p = i & 1;
3987               var_p = (i >> 1) & 1;
3988               off_p = (i >> 2) & 1;
3989               rat_p = (i >> 3) & 1;
3990
3991               fprintf (dump_file, "  ");
3992               if (sym_p)
3993                 fprintf (dump_file, "sym + ");
3994               if (var_p)
3995                 fprintf (dump_file, "var + ");
3996               if (off_p)
3997                 fprintf (dump_file, "cst + ");
3998               if (rat_p)
3999                 fprintf (dump_file, "rat * ");
4000
4001               acost = data->costs[sym_p][var_p][off_p][rat_p];
4002               fprintf (dump_file, "index costs %d\n", acost);
4003             }
4004           if (has_predec[mem_mode] || has_postdec[mem_mode]
4005               || has_preinc[mem_mode] || has_postinc[mem_mode])
4006             fprintf (dump_file, "  May include autoinc/dec\n");
4007           fprintf (dump_file, "\n");
4008         }
4009
4010       address_cost_data_list[data_index] = data;
4011     }
4012
4013   bits = GET_MODE_BITSIZE (address_mode);
4014   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4015   offset &= mask;
4016   if ((offset >> (bits - 1) & 1))
4017     offset |= ~mask;
4018   s_offset = offset;
4019
4020   autoinc = false;
4021   autoinc_type = AINC_NONE;
4022   msize = GET_MODE_SIZE (mem_mode);
4023   autoinc_offset = offset;
4024   if (stmt_after_inc)
4025     autoinc_offset += ratio * cstep;
4026   if (symbol_present || var_present || ratio != 1)
4027     autoinc = false;
4028   else
4029     {
4030       if (has_postinc[mem_mode] && autoinc_offset == 0
4031           && msize == cstep)
4032         autoinc_type = AINC_POST_INC;
4033       else if (has_postdec[mem_mode] && autoinc_offset == 0
4034                && msize == -cstep)
4035         autoinc_type = AINC_POST_DEC;
4036       else if (has_preinc[mem_mode] && autoinc_offset == msize
4037                && msize == cstep)
4038         autoinc_type = AINC_PRE_INC;
4039       else if (has_predec[mem_mode] && autoinc_offset == -msize
4040                && msize == -cstep)
4041         autoinc_type = AINC_PRE_DEC;
4042
4043       if (autoinc_type != AINC_NONE)
4044         autoinc = true;
4045     }
4046
4047   cost = 0;
4048   offset_p = (s_offset != 0
4049               && data->min_offset <= s_offset
4050               && s_offset <= data->max_offset);
4051   ratio_p = (ratio != 1
4052              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4053
4054   if (ratio != 1 && !ratio_p)
4055     cost += mult_by_coeff_cost (ratio, address_mode, speed);
4056
4057   if (s_offset && !offset_p && !symbol_present)
4058     cost += add_cost (speed, address_mode);
4059
4060   if (may_autoinc)
4061     *may_autoinc = autoinc;
4062   if (autoinc)
4063     acost = data->ainc_costs[autoinc_type];
4064   else
4065     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4066   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4067   return new_cost (cost + acost, complexity);
4068 }
4069
4070  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4071     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
4072     calculating the operands of EXPR.  Returns true if successful, and returns
4073     the cost in COST.  */
4074
4075 static bool
4076 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4077                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4078 {
4079   comp_cost res;
4080   tree op1 = TREE_OPERAND (expr, 1);
4081   tree cst = TREE_OPERAND (mult, 1);
4082   tree multop = TREE_OPERAND (mult, 0);
4083   int m = exact_log2 (int_cst_value (cst));
4084   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4085   int as_cost, sa_cost;
4086   bool mult_in_op1;
4087
4088   if (!(m >= 0 && m < maxm))
4089     return false;
4090
4091   STRIP_NOPS (op1);
4092   mult_in_op1 = operand_equal_p (op1, mult, 0);
4093
4094   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4095
4096   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4097      use that in preference to a shift insn followed by an add insn.  */
4098   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4099              ? shiftadd_cost (speed, mode, m)
4100              : (mult_in_op1
4101                 ? shiftsub1_cost (speed, mode, m)
4102                 : shiftsub0_cost (speed, mode, m)));
4103
4104   res = new_cost (MIN (as_cost, sa_cost), 0);
4105   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4106
4107   STRIP_NOPS (multop);
4108   if (!is_gimple_val (multop))
4109     res = add_costs (res, force_expr_to_var_cost (multop, speed));
4110
4111   *cost = res;
4112   return true;
4113 }
4114
4115 /* Estimates cost of forcing expression EXPR into a variable.  */
4116
4117 static comp_cost
4118 force_expr_to_var_cost (tree expr, bool speed)
4119 {
4120   static bool costs_initialized = false;
4121   static unsigned integer_cost [2];
4122   static unsigned symbol_cost [2];
4123   static unsigned address_cost [2];
4124   tree op0, op1;
4125   comp_cost cost0, cost1, cost;
4126   machine_mode mode;
4127
4128   if (!costs_initialized)
4129     {
4130       tree type = build_pointer_type (integer_type_node);
4131       tree var, addr;
4132       rtx x;
4133       int i;
4134
4135       var = create_tmp_var_raw (integer_type_node, "test_var");
4136       TREE_STATIC (var) = 1;
4137       x = produce_memory_decl_rtl (var, NULL);
4138       SET_DECL_RTL (var, x);
4139
4140       addr = build1 (ADDR_EXPR, type, var);
4141
4142
4143       for (i = 0; i < 2; i++)
4144         {
4145           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4146                                                              2000), i);
4147
4148           symbol_cost[i] = computation_cost (addr, i) + 1;
4149
4150           address_cost[i]
4151             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4152           if (dump_file && (dump_flags & TDF_DETAILS))
4153             {
4154               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4155               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4156               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4157               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4158               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4159               fprintf (dump_file, "\n");
4160             }
4161         }
4162
4163       costs_initialized = true;
4164     }
4165
4166   STRIP_NOPS (expr);
4167
4168   if (SSA_VAR_P (expr))
4169     return no_cost;
4170
4171   if (is_gimple_min_invariant (expr))
4172     {
4173       if (TREE_CODE (expr) == INTEGER_CST)
4174         return new_cost (integer_cost [speed], 0);
4175
4176       if (TREE_CODE (expr) == ADDR_EXPR)
4177         {
4178           tree obj = TREE_OPERAND (expr, 0);
4179
4180           if (TREE_CODE (obj) == VAR_DECL
4181               || TREE_CODE (obj) == PARM_DECL
4182               || TREE_CODE (obj) == RESULT_DECL)
4183             return new_cost (symbol_cost [speed], 0);
4184         }
4185
4186       return new_cost (address_cost [speed], 0);
4187     }
4188
4189   switch (TREE_CODE (expr))
4190     {
4191     case POINTER_PLUS_EXPR:
4192     case PLUS_EXPR:
4193     case MINUS_EXPR:
4194     case MULT_EXPR:
4195       op0 = TREE_OPERAND (expr, 0);
4196       op1 = TREE_OPERAND (expr, 1);
4197       STRIP_NOPS (op0);
4198       STRIP_NOPS (op1);
4199       break;
4200
4201     CASE_CONVERT:
4202     case NEGATE_EXPR:
4203       op0 = TREE_OPERAND (expr, 0);
4204       STRIP_NOPS (op0);
4205       op1 = NULL_TREE;
4206       break;
4207
4208     default:
4209       /* Just an arbitrary value, FIXME.  */
4210       return new_cost (target_spill_cost[speed], 0);
4211     }
4212
4213   if (op0 == NULL_TREE
4214       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4215     cost0 = no_cost;
4216   else
4217     cost0 = force_expr_to_var_cost (op0, speed);
4218
4219   if (op1 == NULL_TREE
4220       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4221     cost1 = no_cost;
4222   else
4223     cost1 = force_expr_to_var_cost (op1, speed);
4224
4225   mode = TYPE_MODE (TREE_TYPE (expr));
4226   switch (TREE_CODE (expr))
4227     {
4228     case POINTER_PLUS_EXPR:
4229     case PLUS_EXPR:
4230     case MINUS_EXPR:
4231     case NEGATE_EXPR:
4232       cost = new_cost (add_cost (speed, mode), 0);
4233       if (TREE_CODE (expr) != NEGATE_EXPR)
4234         {
4235           tree mult = NULL_TREE;
4236           comp_cost sa_cost;
4237           if (TREE_CODE (op1) == MULT_EXPR)
4238             mult = op1;
4239           else if (TREE_CODE (op0) == MULT_EXPR)
4240             mult = op0;
4241
4242           if (mult != NULL_TREE
4243               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4244               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4245                                     speed, &sa_cost))
4246             return sa_cost;
4247         }
4248       break;
4249
4250     CASE_CONVERT:
4251       {
4252         tree inner_mode, outer_mode;
4253         outer_mode = TREE_TYPE (expr);
4254         inner_mode = TREE_TYPE (op0);
4255         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4256                                        TYPE_MODE (inner_mode), speed), 0);
4257       }
4258       break;
4259
4260     case MULT_EXPR:
4261       if (cst_and_fits_in_hwi (op0))
4262         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4263                                              mode, speed), 0);
4264       else if (cst_and_fits_in_hwi (op1))
4265         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4266                                              mode, speed), 0);
4267       else
4268         return new_cost (target_spill_cost [speed], 0);
4269       break;
4270
4271     default:
4272       gcc_unreachable ();
4273     }
4274
4275   cost = add_costs (cost, cost0);
4276   cost = add_costs (cost, cost1);
4277
4278   /* Bound the cost by target_spill_cost.  The parts of complicated
4279      computations often are either loop invariant or at least can
4280      be shared between several iv uses, so letting this grow without
4281      limits would not give reasonable results.  */
4282   if (cost.cost > (int) target_spill_cost [speed])
4283     cost.cost = target_spill_cost [speed];
4284
4285   return cost;
4286 }
4287
4288 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4289    invariants the computation depends on.  */
4290
4291 static comp_cost
4292 force_var_cost (struct ivopts_data *data,
4293                 tree expr, bitmap *depends_on)
4294 {
4295   if (depends_on)
4296     {
4297       fd_ivopts_data = data;
4298       walk_tree (&expr, find_depends, depends_on, NULL);
4299     }
4300
4301   return force_expr_to_var_cost (expr, data->speed);
4302 }
4303
4304 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4305    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4306    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4307    invariants the computation depends on.  */
4308
4309 static comp_cost
4310 split_address_cost (struct ivopts_data *data,
4311                     tree addr, bool *symbol_present, bool *var_present,
4312                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4313 {
4314   tree core;
4315   HOST_WIDE_INT bitsize;
4316   HOST_WIDE_INT bitpos;
4317   tree toffset;
4318   machine_mode mode;
4319   int unsignedp, volatilep;
4320
4321   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4322                               &unsignedp, &volatilep, false);
4323
4324   if (toffset != 0
4325       || bitpos % BITS_PER_UNIT != 0
4326       || TREE_CODE (core) != VAR_DECL)
4327     {
4328       *symbol_present = false;
4329       *var_present = true;
4330       fd_ivopts_data = data;
4331       if (depends_on)
4332         walk_tree (&addr, find_depends, depends_on, NULL);
4333
4334       return new_cost (target_spill_cost[data->speed], 0);
4335     }
4336
4337   *offset += bitpos / BITS_PER_UNIT;
4338   if (TREE_STATIC (core)
4339       || DECL_EXTERNAL (core))
4340     {
4341       *symbol_present = true;
4342       *var_present = false;
4343       return no_cost;
4344     }
4345
4346   *symbol_present = false;
4347   *var_present = true;
4348   return no_cost;
4349 }
4350
4351 /* Estimates cost of expressing difference of addresses E1 - E2 as
4352    var + symbol + offset.  The value of offset is added to OFFSET,
4353    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4354    part is missing.  DEPENDS_ON is a set of the invariants the computation
4355    depends on.  */
4356
4357 static comp_cost
4358 ptr_difference_cost (struct ivopts_data *data,
4359                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4360                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4361 {
4362   HOST_WIDE_INT diff = 0;
4363   aff_tree aff_e1, aff_e2;
4364   tree type;
4365
4366   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4367
4368   if (ptr_difference_const (e1, e2, &diff))
4369     {
4370       *offset += diff;
4371       *symbol_present = false;
4372       *var_present = false;
4373       return no_cost;
4374     }
4375
4376   if (integer_zerop (e2))
4377     return split_address_cost (data, TREE_OPERAND (e1, 0),
4378                                symbol_present, var_present, offset, depends_on);
4379
4380   *symbol_present = false;
4381   *var_present = true;
4382
4383   type = signed_type_for (TREE_TYPE (e1));
4384   tree_to_aff_combination (e1, type, &aff_e1);
4385   tree_to_aff_combination (e2, type, &aff_e2);
4386   aff_combination_scale (&aff_e2, -1);
4387   aff_combination_add (&aff_e1, &aff_e2);
4388
4389   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4390 }
4391
4392 /* Estimates cost of expressing difference E1 - E2 as
4393    var + symbol + offset.  The value of offset is added to OFFSET,
4394    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4395    part is missing.  DEPENDS_ON is a set of the invariants the computation
4396    depends on.  */
4397
4398 static comp_cost
4399 difference_cost (struct ivopts_data *data,
4400                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4401                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4402 {
4403   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4404   unsigned HOST_WIDE_INT off1, off2;
4405   aff_tree aff_e1, aff_e2;
4406   tree type;
4407
4408   e1 = strip_offset (e1, &off1);
4409   e2 = strip_offset (e2, &off2);
4410   *offset += off1 - off2;
4411
4412   STRIP_NOPS (e1);
4413   STRIP_NOPS (e2);
4414
4415   if (TREE_CODE (e1) == ADDR_EXPR)
4416     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4417                                 offset, depends_on);
4418   *symbol_present = false;
4419
4420   if (operand_equal_p (e1, e2, 0))
4421     {
4422       *var_present = false;
4423       return no_cost;
4424     }
4425
4426   *var_present = true;
4427
4428   if (integer_zerop (e2))
4429     return force_var_cost (data, e1, depends_on);
4430
4431   if (integer_zerop (e1))
4432     {
4433       comp_cost cost = force_var_cost (data, e2, depends_on);
4434       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4435       return cost;
4436     }
4437
4438   type = signed_type_for (TREE_TYPE (e1));
4439   tree_to_aff_combination (e1, type, &aff_e1);
4440   tree_to_aff_combination (e2, type, &aff_e2);
4441   aff_combination_scale (&aff_e2, -1);
4442   aff_combination_add (&aff_e1, &aff_e2);
4443
4444   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4445 }
4446
4447 /* Returns true if AFF1 and AFF2 are identical.  */
4448
4449 static bool
4450 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4451 {
4452   unsigned i;
4453
4454   if (aff1->n != aff2->n)
4455     return false;
4456
4457   for (i = 0; i < aff1->n; i++)
4458     {
4459       if (aff1->elts[i].coef != aff2->elts[i].coef)
4460         return false;
4461
4462       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4463         return false;
4464     }
4465   return true;
4466 }
4467
4468 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4469
4470 static int
4471 get_expr_id (struct ivopts_data *data, tree expr)
4472 {
4473   struct iv_inv_expr_ent ent;
4474   struct iv_inv_expr_ent **slot;
4475
4476   ent.expr = expr;
4477   ent.hash = iterative_hash_expr (expr, 0);
4478   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4479   if (*slot)
4480     return (*slot)->id;
4481
4482   *slot = XNEW (struct iv_inv_expr_ent);
4483   (*slot)->expr = expr;
4484   (*slot)->hash = ent.hash;
4485   (*slot)->id = data->inv_expr_id++;
4486   return (*slot)->id;
4487 }
4488
4489 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4490    requires a new compiler generated temporary.  Returns -1 otherwise.
4491    ADDRESS_P is a flag indicating if the expression is for address
4492    computation.  */
4493
4494 static int
4495 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4496                             tree cbase, HOST_WIDE_INT ratio,
4497                             bool address_p)
4498 {
4499   aff_tree ubase_aff, cbase_aff;
4500   tree expr, ub, cb;
4501
4502   STRIP_NOPS (ubase);
4503   STRIP_NOPS (cbase);
4504   ub = ubase;
4505   cb = cbase;
4506
4507   if ((TREE_CODE (ubase) == INTEGER_CST)
4508       && (TREE_CODE (cbase) == INTEGER_CST))
4509     return -1;
4510
4511   /* Strips the constant part. */
4512   if (TREE_CODE (ubase) == PLUS_EXPR
4513       || TREE_CODE (ubase) == MINUS_EXPR
4514       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4515     {
4516       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4517         ubase = TREE_OPERAND (ubase, 0);
4518     }
4519
4520   /* Strips the constant part. */
4521   if (TREE_CODE (cbase) == PLUS_EXPR
4522       || TREE_CODE (cbase) == MINUS_EXPR
4523       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4524     {
4525       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4526         cbase = TREE_OPERAND (cbase, 0);
4527     }
4528
4529   if (address_p)
4530     {
4531       if (((TREE_CODE (ubase) == SSA_NAME)
4532            || (TREE_CODE (ubase) == ADDR_EXPR
4533                && is_gimple_min_invariant (ubase)))
4534           && (TREE_CODE (cbase) == INTEGER_CST))
4535         return -1;
4536
4537       if (((TREE_CODE (cbase) == SSA_NAME)
4538            || (TREE_CODE (cbase) == ADDR_EXPR
4539                && is_gimple_min_invariant (cbase)))
4540           && (TREE_CODE (ubase) == INTEGER_CST))
4541         return -1;
4542     }
4543
4544   if (ratio == 1)
4545     {
4546       if (operand_equal_p (ubase, cbase, 0))
4547         return -1;
4548
4549       if (TREE_CODE (ubase) == ADDR_EXPR
4550           && TREE_CODE (cbase) == ADDR_EXPR)
4551         {
4552           tree usym, csym;
4553
4554           usym = TREE_OPERAND (ubase, 0);
4555           csym = TREE_OPERAND (cbase, 0);
4556           if (TREE_CODE (usym) == ARRAY_REF)
4557             {
4558               tree ind = TREE_OPERAND (usym, 1);
4559               if (TREE_CODE (ind) == INTEGER_CST
4560                   && tree_fits_shwi_p (ind)
4561                   && tree_to_shwi (ind) == 0)
4562                 usym = TREE_OPERAND (usym, 0);
4563             }
4564           if (TREE_CODE (csym) == ARRAY_REF)
4565             {
4566               tree ind = TREE_OPERAND (csym, 1);
4567               if (TREE_CODE (ind) == INTEGER_CST
4568                   && tree_fits_shwi_p (ind)
4569                   && tree_to_shwi (ind) == 0)
4570                 csym = TREE_OPERAND (csym, 0);
4571             }
4572           if (operand_equal_p (usym, csym, 0))
4573             return -1;
4574         }
4575       /* Now do more complex comparison  */
4576       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4577       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4578       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4579         return -1;
4580     }
4581
4582   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4583   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4584
4585   aff_combination_scale (&cbase_aff, -1 * ratio);
4586   aff_combination_add (&ubase_aff, &cbase_aff);
4587   expr = aff_combination_to_tree (&ubase_aff);
4588   return get_expr_id (data, expr);
4589 }
4590
4591
4592
4593 /* Determines the cost of the computation by that USE is expressed
4594    from induction variable CAND.  If ADDRESS_P is true, we just need
4595    to create an address from it, otherwise we want to get it into
4596    register.  A set of invariants we depend on is stored in
4597    DEPENDS_ON.  AT is the statement at that the value is computed.
4598    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4599    addressing is likely.  */
4600
4601 static comp_cost
4602 get_computation_cost_at (struct ivopts_data *data,
4603                          struct iv_use *use, struct iv_cand *cand,
4604                          bool address_p, bitmap *depends_on, gimple *at,
4605                          bool *can_autoinc,
4606                          int *inv_expr_id)
4607 {
4608   tree ubase = use->iv->base, ustep = use->iv->step;
4609   tree cbase, cstep;
4610   tree utype = TREE_TYPE (ubase), ctype;
4611   unsigned HOST_WIDE_INT cstepi, offset = 0;
4612   HOST_WIDE_INT ratio, aratio;
4613   bool var_present, symbol_present, stmt_is_after_inc;
4614   comp_cost cost;
4615   widest_int rat;
4616   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4617   machine_mode mem_mode = (address_p
4618                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4619                                 : VOIDmode);
4620
4621   if (depends_on)
4622     *depends_on = NULL;
4623
4624   /* Only consider real candidates.  */
4625   if (!cand->iv)
4626     return infinite_cost;
4627
4628   cbase = cand->iv->base;
4629   cstep = cand->iv->step;
4630   ctype = TREE_TYPE (cbase);
4631
4632   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4633     {
4634       /* We do not have a precision to express the values of use.  */
4635       return infinite_cost;
4636     }
4637
4638   if (address_p
4639       || (use->iv->base_object
4640           && cand->iv->base_object
4641           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4642           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4643     {
4644       /* Do not try to express address of an object with computation based
4645          on address of a different object.  This may cause problems in rtl
4646          level alias analysis (that does not expect this to be happening,
4647          as this is illegal in C), and would be unlikely to be useful
4648          anyway.  */
4649       if (use->iv->base_object
4650           && cand->iv->base_object
4651           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4652         return infinite_cost;
4653     }
4654
4655   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4656     {
4657       /* TODO -- add direct handling of this case.  */
4658       goto fallback;
4659     }
4660
4661   /* CSTEPI is removed from the offset in case statement is after the
4662      increment.  If the step is not constant, we use zero instead.
4663      This is a bit imprecise (there is the extra addition), but
4664      redundancy elimination is likely to transform the code so that
4665      it uses value of the variable before increment anyway,
4666      so it is not that much unrealistic.  */
4667   if (cst_and_fits_in_hwi (cstep))
4668     cstepi = int_cst_value (cstep);
4669   else
4670     cstepi = 0;
4671
4672   if (!constant_multiple_of (ustep, cstep, &rat))
4673     return infinite_cost;
4674
4675   if (wi::fits_shwi_p (rat))
4676     ratio = rat.to_shwi ();
4677   else
4678     return infinite_cost;
4679
4680   STRIP_NOPS (cbase);
4681   ctype = TREE_TYPE (cbase);
4682
4683   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4684
4685   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4686      or ratio == 1, it is better to handle this like
4687
4688      ubase - ratio * cbase + ratio * var
4689
4690      (also holds in the case ratio == -1, TODO.  */
4691
4692   if (cst_and_fits_in_hwi (cbase))
4693     {
4694       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4695       cost = difference_cost (data,
4696                               ubase, build_int_cst (utype, 0),
4697                               &symbol_present, &var_present, &offset,
4698                               depends_on);
4699       cost.cost /= avg_loop_niter (data->current_loop);
4700     }
4701   else if (ratio == 1)
4702     {
4703       tree real_cbase = cbase;
4704
4705       /* Check to see if any adjustment is needed.  */
4706       if (cstepi == 0 && stmt_is_after_inc)
4707         {
4708           aff_tree real_cbase_aff;
4709           aff_tree cstep_aff;
4710
4711           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4712                                    &real_cbase_aff);
4713           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4714
4715           aff_combination_add (&real_cbase_aff, &cstep_aff);
4716           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4717         }
4718
4719       cost = difference_cost (data,
4720                               ubase, real_cbase,
4721                               &symbol_present, &var_present, &offset,
4722                               depends_on);
4723       cost.cost /= avg_loop_niter (data->current_loop);
4724     }
4725   else if (address_p
4726            && !POINTER_TYPE_P (ctype)
4727            && multiplier_allowed_in_address_p
4728                 (ratio, mem_mode,
4729                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4730     {
4731       if (cstepi == 0 && stmt_is_after_inc)
4732         {
4733           if (POINTER_TYPE_P (ctype))
4734             cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4735           else
4736             cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4737         }
4738       cbase
4739         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4740       cost = difference_cost (data,
4741                               ubase, cbase,
4742                               &symbol_present, &var_present, &offset,
4743                               depends_on);
4744       cost.cost /= avg_loop_niter (data->current_loop);
4745     }
4746   else
4747     {
4748       cost = force_var_cost (data, cbase, depends_on);
4749       cost = add_costs (cost,
4750                         difference_cost (data,
4751                                          ubase, build_int_cst (utype, 0),
4752                                          &symbol_present, &var_present,
4753                                          &offset, depends_on));
4754       cost.cost /= avg_loop_niter (data->current_loop);
4755       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4756     }
4757
4758   /* Set of invariants depended on by sub use has already been computed
4759      for the first use in the group.  */
4760   if (use->sub_id)
4761     {
4762       cost.cost = 0;
4763       if (depends_on && *depends_on)
4764         bitmap_clear (*depends_on);
4765     }
4766   else if (inv_expr_id)
4767     {
4768       *inv_expr_id =
4769           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4770       /* Clear depends on.  */
4771       if (*inv_expr_id != -1 && depends_on && *depends_on)
4772         bitmap_clear (*depends_on);
4773     }
4774
4775   /* If we are after the increment, the value of the candidate is higher by
4776      one iteration.  */
4777   if (stmt_is_after_inc)
4778     offset -= ratio * cstepi;
4779
4780   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4781      (symbol/var1/const parts may be omitted).  If we are looking for an
4782      address, find the cost of addressing this.  */
4783   if (address_p)
4784     return add_costs (cost,
4785                       get_address_cost (symbol_present, var_present,
4786                                         offset, ratio, cstepi,
4787                                         mem_mode,
4788                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4789                                         speed, stmt_is_after_inc,
4790                                         can_autoinc));
4791
4792   /* Otherwise estimate the costs for computing the expression.  */
4793   if (!symbol_present && !var_present && !offset)
4794     {
4795       if (ratio != 1)
4796         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4797       return cost;
4798     }
4799
4800   /* Symbol + offset should be compile-time computable so consider that they
4801       are added once to the variable, if present.  */
4802   if (var_present && (symbol_present || offset))
4803     cost.cost += adjust_setup_cost (data,
4804                                     add_cost (speed, TYPE_MODE (ctype)));
4805
4806   /* Having offset does not affect runtime cost in case it is added to
4807      symbol, but it increases complexity.  */
4808   if (offset)
4809     cost.complexity++;
4810
4811   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4812
4813   aratio = ratio > 0 ? ratio : -ratio;
4814   if (aratio != 1)
4815     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4816   return cost;
4817
4818 fallback:
4819   if (can_autoinc)
4820     *can_autoinc = false;
4821
4822   {
4823     /* Just get the expression, expand it and measure the cost.  */
4824     tree comp = get_computation_at (data->current_loop, use, cand, at);
4825
4826     if (!comp)
4827       return infinite_cost;
4828
4829     if (address_p)
4830       comp = build_simple_mem_ref (comp);
4831
4832     return new_cost (computation_cost (comp, speed), 0);
4833   }
4834 }
4835
4836 /* Determines the cost of the computation by that USE is expressed
4837    from induction variable CAND.  If ADDRESS_P is true, we just need
4838    to create an address from it, otherwise we want to get it into
4839    register.  A set of invariants we depend on is stored in
4840    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4841    autoinc addressing is likely.  */
4842
4843 static comp_cost
4844 get_computation_cost (struct ivopts_data *data,
4845                       struct iv_use *use, struct iv_cand *cand,
4846                       bool address_p, bitmap *depends_on,
4847                       bool *can_autoinc, int *inv_expr_id)
4848 {
4849   return get_computation_cost_at (data,
4850                                   use, cand, address_p, depends_on, use->stmt,
4851                                   can_autoinc, inv_expr_id);
4852 }
4853
4854 /* Determines cost of basing replacement of USE on CAND in a generic
4855    expression.  */
4856
4857 static bool
4858 determine_use_iv_cost_generic (struct ivopts_data *data,
4859                                struct iv_use *use, struct iv_cand *cand)
4860 {
4861   bitmap depends_on;
4862   comp_cost cost;
4863   int inv_expr_id = -1;
4864
4865   /* The simple case first -- if we need to express value of the preserved
4866      original biv, the cost is 0.  This also prevents us from counting the
4867      cost of increment twice -- once at this use and once in the cost of
4868      the candidate.  */
4869   if (cand->pos == IP_ORIGINAL
4870       && cand->incremented_at == use->stmt)
4871     {
4872       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4873                        ERROR_MARK, -1);
4874       return true;
4875     }
4876
4877   cost = get_computation_cost (data, use, cand, false, &depends_on,
4878                                NULL, &inv_expr_id);
4879
4880   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4881                    inv_expr_id);
4882
4883   return !infinite_cost_p (cost);
4884 }
4885
4886 /* Determines cost of basing replacement of USE on CAND in an address.  */
4887
4888 static bool
4889 determine_use_iv_cost_address (struct ivopts_data *data,
4890                                struct iv_use *use, struct iv_cand *cand)
4891 {
4892   bitmap depends_on;
4893   bool can_autoinc;
4894   int inv_expr_id = -1;
4895   struct iv_use *sub_use;
4896   comp_cost sub_cost;
4897   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4898                                          &can_autoinc, &inv_expr_id);
4899
4900   if (cand->ainc_use == use)
4901     {
4902       if (can_autoinc)
4903         cost.cost -= cand->cost_step;
4904       /* If we generated the candidate solely for exploiting autoincrement
4905          opportunities, and it turns out it can't be used, set the cost to
4906          infinity to make sure we ignore it.  */
4907       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4908         cost = infinite_cost;
4909     }
4910   for (sub_use = use->next;
4911        sub_use && !infinite_cost_p (cost);
4912        sub_use = sub_use->next)
4913     {
4914       sub_cost = get_computation_cost (data, sub_use, cand, true, NULL,
4915                                        &can_autoinc, NULL);
4916       cost = add_costs (cost, sub_cost);
4917     }
4918
4919   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4920                    inv_expr_id);
4921
4922   return !infinite_cost_p (cost);
4923 }
4924
4925 /* Computes value of candidate CAND at position AT in iteration NITER, and
4926    stores it to VAL.  */
4927
4928 static void
4929 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4930                aff_tree *val)
4931 {
4932   aff_tree step, delta, nit;
4933   struct iv *iv = cand->iv;
4934   tree type = TREE_TYPE (iv->base);
4935   tree steptype = type;
4936   if (POINTER_TYPE_P (type))
4937     steptype = sizetype;
4938   steptype = unsigned_type_for (type);
4939
4940   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4941   aff_combination_convert (&step, steptype);
4942   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4943   aff_combination_convert (&nit, steptype);
4944   aff_combination_mult (&nit, &step, &delta);
4945   if (stmt_after_increment (loop, cand, at))
4946     aff_combination_add (&delta, &step);
4947
4948   tree_to_aff_combination (iv->base, type, val);
4949   if (!POINTER_TYPE_P (type))
4950     aff_combination_convert (val, steptype);
4951   aff_combination_add (val, &delta);
4952 }
4953
4954 /* Returns period of induction variable iv.  */
4955
4956 static tree
4957 iv_period (struct iv *iv)
4958 {
4959   tree step = iv->step, period, type;
4960   tree pow2div;
4961
4962   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4963
4964   type = unsigned_type_for (TREE_TYPE (step));
4965   /* Period of the iv is lcm (step, type_range)/step -1,
4966      i.e., N*type_range/step - 1. Since type range is power
4967      of two, N == (step >> num_of_ending_zeros_binary (step),
4968      so the final result is
4969
4970        (type_range >> num_of_ending_zeros_binary (step)) - 1
4971
4972   */
4973   pow2div = num_ending_zeros (step);
4974
4975   period = build_low_bits_mask (type,
4976                                 (TYPE_PRECISION (type)
4977                                  - tree_to_uhwi (pow2div)));
4978
4979   return period;
4980 }
4981
4982 /* Returns the comparison operator used when eliminating the iv USE.  */
4983
4984 static enum tree_code
4985 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4986 {
4987   struct loop *loop = data->current_loop;
4988   basic_block ex_bb;
4989   edge exit;
4990
4991   ex_bb = gimple_bb (use->stmt);
4992   exit = EDGE_SUCC (ex_bb, 0);
4993   if (flow_bb_inside_loop_p (loop, exit->dest))
4994     exit = EDGE_SUCC (ex_bb, 1);
4995
4996   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4997 }
4998
4999 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5000    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5001    calculation is performed in non-wrapping type.
5002
5003    TODO: More generally, we could test for the situation that
5004          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5005          This would require knowing the sign of OFFSET.  */
5006
5007 static bool
5008 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5009 {
5010   enum tree_code code;
5011   tree e1, e2;
5012   aff_tree aff_e1, aff_e2, aff_offset;
5013
5014   if (!nowrap_type_p (TREE_TYPE (base)))
5015     return false;
5016
5017   base = expand_simple_operations (base);
5018
5019   if (TREE_CODE (base) == SSA_NAME)
5020     {
5021       gimple *stmt = SSA_NAME_DEF_STMT (base);
5022
5023       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5024         return false;
5025
5026       code = gimple_assign_rhs_code (stmt);
5027       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5028         return false;
5029
5030       e1 = gimple_assign_rhs1 (stmt);
5031       e2 = gimple_assign_rhs2 (stmt);
5032     }
5033   else
5034     {
5035       code = TREE_CODE (base);
5036       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5037         return false;
5038       e1 = TREE_OPERAND (base, 0);
5039       e2 = TREE_OPERAND (base, 1);
5040     }
5041
5042   /* Use affine expansion as deeper inspection to prove the equality.  */
5043   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5044                                   &aff_e2, &data->name_expansion_cache);
5045   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5046                                   &aff_offset, &data->name_expansion_cache);
5047   aff_combination_scale (&aff_offset, -1);
5048   switch (code)
5049     {
5050     case PLUS_EXPR:
5051       aff_combination_add (&aff_e2, &aff_offset);
5052       if (aff_combination_zero_p (&aff_e2))
5053         return true;
5054
5055       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5056                                       &aff_e1, &data->name_expansion_cache);
5057       aff_combination_add (&aff_e1, &aff_offset);
5058       return aff_combination_zero_p (&aff_e1);
5059
5060     case POINTER_PLUS_EXPR:
5061       aff_combination_add (&aff_e2, &aff_offset);
5062       return aff_combination_zero_p (&aff_e2);
5063
5064     default:
5065       return false;
5066     }
5067 }
5068
5069 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5070    comparison with CAND.  NITER describes the number of iterations of
5071    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5072
5073    We aim to handle the following situation:
5074
5075    sometype *base, *p;
5076    int a, b, i;
5077
5078    i = a;
5079    p = p_0 = base + a;
5080
5081    do
5082      {
5083        bla (*p);
5084        p++;
5085        i++;
5086      }
5087    while (i < b);
5088
5089    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5090    We aim to optimize this to
5091
5092    p = p_0 = base + a;
5093    do
5094      {
5095        bla (*p);
5096        p++;
5097      }
5098    while (p < p_0 - a + b);
5099
5100    This preserves the correctness, since the pointer arithmetics does not
5101    overflow.  More precisely:
5102
5103    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5104       overflow in computing it or the values of p.
5105    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5106       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5107
5108 static bool
5109 iv_elimination_compare_lt (struct ivopts_data *data,
5110                            struct iv_cand *cand, enum tree_code *comp_p,
5111                            struct tree_niter_desc *niter)
5112 {
5113   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5114   struct aff_tree nit, tmpa, tmpb;
5115   enum tree_code comp;
5116   HOST_WIDE_INT step;
5117
5118   /* We need to know that the candidate induction variable does not overflow.
5119      While more complex analysis may be used to prove this, for now just
5120      check that the variable appears in the original program and that it
5121      is computed in a type that guarantees no overflows.  */
5122   cand_type = TREE_TYPE (cand->iv->base);
5123   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5124     return false;
5125
5126   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5127      the calculation of the BOUND could overflow, making the comparison
5128      invalid.  */
5129   if (!data->loop_single_exit_p)
5130     return false;
5131
5132   /* We need to be able to decide whether candidate is increasing or decreasing
5133      in order to choose the right comparison operator.  */
5134   if (!cst_and_fits_in_hwi (cand->iv->step))
5135     return false;
5136   step = int_cst_value (cand->iv->step);
5137
5138   /* Check that the number of iterations matches the expected pattern:
5139      a + 1 > b ? 0 : b - a - 1.  */
5140   mbz = niter->may_be_zero;
5141   if (TREE_CODE (mbz) == GT_EXPR)
5142     {
5143       /* Handle a + 1 > b.  */
5144       tree op0 = TREE_OPERAND (mbz, 0);
5145       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5146         {
5147           a = TREE_OPERAND (op0, 0);
5148           b = TREE_OPERAND (mbz, 1);
5149         }
5150       else
5151         return false;
5152     }
5153   else if (TREE_CODE (mbz) == LT_EXPR)
5154     {
5155       tree op1 = TREE_OPERAND (mbz, 1);
5156
5157       /* Handle b < a + 1.  */
5158       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5159         {
5160           a = TREE_OPERAND (op1, 0);
5161           b = TREE_OPERAND (mbz, 0);
5162         }
5163       else
5164         return false;
5165     }
5166   else
5167     return false;
5168
5169   /* Expected number of iterations is B - A - 1.  Check that it matches
5170      the actual number, i.e., that B - A - NITER = 1.  */
5171   tree_to_aff_combination (niter->niter, nit_type, &nit);
5172   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5173   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5174   aff_combination_scale (&nit, -1);
5175   aff_combination_scale (&tmpa, -1);
5176   aff_combination_add (&tmpb, &tmpa);
5177   aff_combination_add (&tmpb, &nit);
5178   if (tmpb.n != 0 || tmpb.offset != 1)
5179     return false;
5180
5181   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5182      overflow.  */
5183   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5184                         cand->iv->step,
5185                         fold_convert (TREE_TYPE (cand->iv->step), a));
5186   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5187     return false;
5188
5189   /* Determine the new comparison operator.  */
5190   comp = step < 0 ? GT_EXPR : LT_EXPR;
5191   if (*comp_p == NE_EXPR)
5192     *comp_p = comp;
5193   else if (*comp_p == EQ_EXPR)
5194     *comp_p = invert_tree_comparison (comp, false);
5195   else
5196     gcc_unreachable ();
5197
5198   return true;
5199 }
5200
5201 /* Check whether it is possible to express the condition in USE by comparison
5202    of candidate CAND.  If so, store the value compared with to BOUND, and the
5203    comparison operator to COMP.  */
5204
5205 static bool
5206 may_eliminate_iv (struct ivopts_data *data,
5207                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5208                   enum tree_code *comp)
5209 {
5210   basic_block ex_bb;
5211   edge exit;
5212   tree period;
5213   struct loop *loop = data->current_loop;
5214   aff_tree bnd;
5215   struct tree_niter_desc *desc = NULL;
5216
5217   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5218     return false;
5219
5220   /* For now works only for exits that dominate the loop latch.
5221      TODO: extend to other conditions inside loop body.  */
5222   ex_bb = gimple_bb (use->stmt);
5223   if (use->stmt != last_stmt (ex_bb)
5224       || gimple_code (use->stmt) != GIMPLE_COND
5225       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5226     return false;
5227
5228   exit = EDGE_SUCC (ex_bb, 0);
5229   if (flow_bb_inside_loop_p (loop, exit->dest))
5230     exit = EDGE_SUCC (ex_bb, 1);
5231   if (flow_bb_inside_loop_p (loop, exit->dest))
5232     return false;
5233
5234   desc = niter_for_exit (data, exit);
5235   if (!desc)
5236     return false;
5237
5238   /* Determine whether we can use the variable to test the exit condition.
5239      This is the case iff the period of the induction variable is greater
5240      than the number of iterations for which the exit condition is true.  */
5241   period = iv_period (cand->iv);
5242
5243   /* If the number of iterations is constant, compare against it directly.  */
5244   if (TREE_CODE (desc->niter) == INTEGER_CST)
5245     {
5246       /* See cand_value_at.  */
5247       if (stmt_after_increment (loop, cand, use->stmt))
5248         {
5249           if (!tree_int_cst_lt (desc->niter, period))
5250             return false;
5251         }
5252       else
5253         {
5254           if (tree_int_cst_lt (period, desc->niter))
5255             return false;
5256         }
5257     }
5258
5259   /* If not, and if this is the only possible exit of the loop, see whether
5260      we can get a conservative estimate on the number of iterations of the
5261      entire loop and compare against that instead.  */
5262   else
5263     {
5264       widest_int period_value, max_niter;
5265
5266       max_niter = desc->max;
5267       if (stmt_after_increment (loop, cand, use->stmt))
5268         max_niter += 1;
5269       period_value = wi::to_widest (period);
5270       if (wi::gtu_p (max_niter, period_value))
5271         {
5272           /* See if we can take advantage of inferred loop bound information.  */
5273           if (data->loop_single_exit_p)
5274             {
5275               if (!max_loop_iterations (loop, &max_niter))
5276                 return false;
5277               /* The loop bound is already adjusted by adding 1.  */
5278               if (wi::gtu_p (max_niter, period_value))
5279                 return false;
5280             }
5281           else
5282             return false;
5283         }
5284     }
5285
5286   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5287
5288   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5289                          aff_combination_to_tree (&bnd));
5290   *comp = iv_elimination_compare (data, use);
5291
5292   /* It is unlikely that computing the number of iterations using division
5293      would be more profitable than keeping the original induction variable.  */
5294   if (expression_expensive_p (*bound))
5295     return false;
5296
5297   /* Sometimes, it is possible to handle the situation that the number of
5298      iterations may be zero unless additional assumtions by using <
5299      instead of != in the exit condition.
5300
5301      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5302            base the exit condition on it.  However, that is often too
5303            expensive.  */
5304   if (!integer_zerop (desc->may_be_zero))
5305     return iv_elimination_compare_lt (data, cand, comp, desc);
5306
5307   return true;
5308 }
5309
5310  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5311     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5312
5313 static int
5314 parm_decl_cost (struct ivopts_data *data, tree bound)
5315 {
5316   tree sbound = bound;
5317   STRIP_NOPS (sbound);
5318
5319   if (TREE_CODE (sbound) == SSA_NAME
5320       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5321       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5322       && data->body_includes_call)
5323     return COSTS_N_INSNS (1);
5324
5325   return 0;
5326 }
5327
5328 /* Determines cost of basing replacement of USE on CAND in a condition.  */
5329
5330 static bool
5331 determine_use_iv_cost_condition (struct ivopts_data *data,
5332                                  struct iv_use *use, struct iv_cand *cand)
5333 {
5334   tree bound = NULL_TREE;
5335   struct iv *cmp_iv;
5336   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5337   comp_cost elim_cost, express_cost, cost, bound_cost;
5338   bool ok;
5339   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5340   tree *control_var, *bound_cst;
5341   enum tree_code comp = ERROR_MARK;
5342
5343   /* Only consider real candidates.  */
5344   if (!cand->iv)
5345     {
5346       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5347                        ERROR_MARK, -1);
5348       return false;
5349     }
5350
5351   /* Try iv elimination.  */
5352   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5353     {
5354       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5355       if (elim_cost.cost == 0)
5356         elim_cost.cost = parm_decl_cost (data, bound);
5357       else if (TREE_CODE (bound) == INTEGER_CST)
5358         elim_cost.cost = 0;
5359       /* If we replace a loop condition 'i < n' with 'p < base + n',
5360          depends_on_elim will have 'base' and 'n' set, which implies
5361          that both 'base' and 'n' will be live during the loop.  More likely,
5362          'base + n' will be loop invariant, resulting in only one live value
5363          during the loop.  So in that case we clear depends_on_elim and set
5364         elim_inv_expr_id instead.  */
5365       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5366         {
5367           elim_inv_expr_id = get_expr_id (data, bound);
5368           bitmap_clear (depends_on_elim);
5369         }
5370       /* The bound is a loop invariant, so it will be only computed
5371          once.  */
5372       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5373     }
5374   else
5375     elim_cost = infinite_cost;
5376
5377   /* Try expressing the original giv.  If it is compared with an invariant,
5378      note that we cannot get rid of it.  */
5379   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5380                               NULL, &cmp_iv);
5381   gcc_assert (ok);
5382
5383   /* When the condition is a comparison of the candidate IV against
5384      zero, prefer this IV.
5385
5386      TODO: The constant that we're subtracting from the cost should
5387      be target-dependent.  This information should be added to the
5388      target costs for each backend.  */
5389   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5390       && integer_zerop (*bound_cst)
5391       && (operand_equal_p (*control_var, cand->var_after, 0)
5392           || operand_equal_p (*control_var, cand->var_before, 0)))
5393     elim_cost.cost -= 1;
5394
5395   express_cost = get_computation_cost (data, use, cand, false,
5396                                        &depends_on_express, NULL,
5397                                        &express_inv_expr_id);
5398   fd_ivopts_data = data;
5399   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5400
5401   /* Count the cost of the original bound as well.  */
5402   bound_cost = force_var_cost (data, *bound_cst, NULL);
5403   if (bound_cost.cost == 0)
5404     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5405   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5406     bound_cost.cost = 0;
5407   express_cost.cost += bound_cost.cost;
5408
5409   /* Choose the better approach, preferring the eliminated IV. */
5410   if (compare_costs (elim_cost, express_cost) <= 0)
5411     {
5412       cost = elim_cost;
5413       depends_on = depends_on_elim;
5414       depends_on_elim = NULL;
5415       inv_expr_id = elim_inv_expr_id;
5416     }
5417   else
5418     {
5419       cost = express_cost;
5420       depends_on = depends_on_express;
5421       depends_on_express = NULL;
5422       bound = NULL_TREE;
5423       comp = ERROR_MARK;
5424       inv_expr_id = express_inv_expr_id;
5425     }
5426
5427   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5428
5429   if (depends_on_elim)
5430     BITMAP_FREE (depends_on_elim);
5431   if (depends_on_express)
5432     BITMAP_FREE (depends_on_express);
5433
5434   return !infinite_cost_p (cost);
5435 }
5436
5437 /* Determines cost of basing replacement of USE on CAND.  Returns false
5438    if USE cannot be based on CAND.  */
5439
5440 static bool
5441 determine_use_iv_cost (struct ivopts_data *data,
5442                        struct iv_use *use, struct iv_cand *cand)
5443 {
5444   switch (use->type)
5445     {
5446     case USE_NONLINEAR_EXPR:
5447       return determine_use_iv_cost_generic (data, use, cand);
5448
5449     case USE_ADDRESS:
5450       return determine_use_iv_cost_address (data, use, cand);
5451
5452     case USE_COMPARE:
5453       return determine_use_iv_cost_condition (data, use, cand);
5454
5455     default:
5456       gcc_unreachable ();
5457     }
5458 }
5459
5460 /* Return true if get_computation_cost indicates that autoincrement is
5461    a possibility for the pair of USE and CAND, false otherwise.  */
5462
5463 static bool
5464 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5465                            struct iv_cand *cand)
5466 {
5467   bitmap depends_on;
5468   bool can_autoinc;
5469   comp_cost cost;
5470
5471   if (use->type != USE_ADDRESS)
5472     return false;
5473
5474   cost = get_computation_cost (data, use, cand, true, &depends_on,
5475                                &can_autoinc, NULL);
5476
5477   BITMAP_FREE (depends_on);
5478
5479   return !infinite_cost_p (cost) && can_autoinc;
5480 }
5481
5482 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5483    use that allows autoincrement, and set their AINC_USE if possible.  */
5484
5485 static void
5486 set_autoinc_for_original_candidates (struct ivopts_data *data)
5487 {
5488   unsigned i, j;
5489
5490   for (i = 0; i < n_iv_cands (data); i++)
5491     {
5492       struct iv_cand *cand = iv_cand (data, i);
5493       struct iv_use *closest_before = NULL;
5494       struct iv_use *closest_after = NULL;
5495       if (cand->pos != IP_ORIGINAL)
5496         continue;
5497
5498       for (j = 0; j < n_iv_uses (data); j++)
5499         {
5500           struct iv_use *use = iv_use (data, j);
5501           unsigned uid = gimple_uid (use->stmt);
5502
5503           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5504             continue;
5505
5506           if (uid < gimple_uid (cand->incremented_at)
5507               && (closest_before == NULL
5508                   || uid > gimple_uid (closest_before->stmt)))
5509             closest_before = use;
5510
5511           if (uid > gimple_uid (cand->incremented_at)
5512               && (closest_after == NULL
5513                   || uid < gimple_uid (closest_after->stmt)))
5514             closest_after = use;
5515         }
5516
5517       if (closest_before != NULL
5518           && autoinc_possible_for_pair (data, closest_before, cand))
5519         cand->ainc_use = closest_before;
5520       else if (closest_after != NULL
5521                && autoinc_possible_for_pair (data, closest_after, cand))
5522         cand->ainc_use = closest_after;
5523     }
5524 }
5525
5526 /* Finds the candidates for the induction variables.  */
5527
5528 static void
5529 find_iv_candidates (struct ivopts_data *data)
5530 {
5531   /* Add commonly used ivs.  */
5532   add_standard_iv_candidates (data);
5533
5534   /* Add old induction variables.  */
5535   add_iv_candidate_for_bivs (data);
5536
5537   /* Add induction variables derived from uses.  */
5538   add_iv_candidate_for_uses (data);
5539
5540   set_autoinc_for_original_candidates (data);
5541
5542   /* Record the important candidates.  */
5543   record_important_candidates (data);
5544 }
5545
5546 /* Determines costs of basing the use of the iv on an iv candidate.  */
5547
5548 static void
5549 determine_use_iv_costs (struct ivopts_data *data)
5550 {
5551   unsigned i, j;
5552   struct iv_use *use;
5553   struct iv_cand *cand;
5554   bitmap to_clear = BITMAP_ALLOC (NULL);
5555
5556   alloc_use_cost_map (data);
5557
5558   for (i = 0; i < n_iv_uses (data); i++)
5559     {
5560       use = iv_use (data, i);
5561
5562       if (data->consider_all_candidates)
5563         {
5564           for (j = 0; j < n_iv_cands (data); j++)
5565             {
5566               cand = iv_cand (data, j);
5567               determine_use_iv_cost (data, use, cand);
5568             }
5569         }
5570       else
5571         {
5572           bitmap_iterator bi;
5573
5574           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5575             {
5576               cand = iv_cand (data, j);
5577               if (!determine_use_iv_cost (data, use, cand))
5578                 bitmap_set_bit (to_clear, j);
5579             }
5580
5581           /* Remove the candidates for that the cost is infinite from
5582              the list of related candidates.  */
5583           bitmap_and_compl_into (use->related_cands, to_clear);
5584           bitmap_clear (to_clear);
5585         }
5586     }
5587
5588   BITMAP_FREE (to_clear);
5589
5590   if (dump_file && (dump_flags & TDF_DETAILS))
5591     {
5592       fprintf (dump_file, "Use-candidate costs:\n");
5593
5594       for (i = 0; i < n_iv_uses (data); i++)
5595         {
5596           use = iv_use (data, i);
5597
5598           fprintf (dump_file, "Use %d:\n", i);
5599           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5600           for (j = 0; j < use->n_map_members; j++)
5601             {
5602               if (!use->cost_map[j].cand
5603                   || infinite_cost_p (use->cost_map[j].cost))
5604                 continue;
5605
5606               fprintf (dump_file, "  %d\t%d\t%d\t",
5607                        use->cost_map[j].cand->id,
5608                        use->cost_map[j].cost.cost,
5609                        use->cost_map[j].cost.complexity);
5610               if (use->cost_map[j].depends_on)
5611                 bitmap_print (dump_file,
5612                               use->cost_map[j].depends_on, "","");
5613               if (use->cost_map[j].inv_expr_id != -1)
5614                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5615               fprintf (dump_file, "\n");
5616             }
5617
5618           fprintf (dump_file, "\n");
5619         }
5620       fprintf (dump_file, "\n");
5621     }
5622 }
5623
5624 /* Determines cost of the candidate CAND.  */
5625
5626 static void
5627 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5628 {
5629   comp_cost cost_base;
5630   unsigned cost, cost_step;
5631   tree base;
5632
5633   if (!cand->iv)
5634     {
5635       cand->cost = 0;
5636       return;
5637     }
5638
5639   /* There are two costs associated with the candidate -- its increment
5640      and its initialization.  The second is almost negligible for any loop
5641      that rolls enough, so we take it just very little into account.  */
5642
5643   base = cand->iv->base;
5644   cost_base = force_var_cost (data, base, NULL);
5645   /* It will be exceptional that the iv register happens to be initialized with
5646      the proper value at no cost.  In general, there will at least be a regcopy
5647      or a const set.  */
5648   if (cost_base.cost == 0)
5649     cost_base.cost = COSTS_N_INSNS (1);
5650   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5651
5652   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5653
5654   /* Prefer the original ivs unless we may gain something by replacing it.
5655      The reason is to make debugging simpler; so this is not relevant for
5656      artificial ivs created by other optimization passes.  */
5657   if (cand->pos != IP_ORIGINAL
5658       || !SSA_NAME_VAR (cand->var_before)
5659       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5660     cost++;
5661
5662   /* Prefer not to insert statements into latch unless there are some
5663      already (so that we do not create unnecessary jumps).  */
5664   if (cand->pos == IP_END
5665       && empty_block_p (ip_end_pos (data->current_loop)))
5666     cost++;
5667
5668   cand->cost = cost;
5669   cand->cost_step = cost_step;
5670 }
5671
5672 /* Determines costs of computation of the candidates.  */
5673
5674 static void
5675 determine_iv_costs (struct ivopts_data *data)
5676 {
5677   unsigned i;
5678
5679   if (dump_file && (dump_flags & TDF_DETAILS))
5680     {
5681       fprintf (dump_file, "Candidate costs:\n");
5682       fprintf (dump_file, "  cand\tcost\n");
5683     }
5684
5685   for (i = 0; i < n_iv_cands (data); i++)
5686     {
5687       struct iv_cand *cand = iv_cand (data, i);
5688
5689       determine_iv_cost (data, cand);
5690
5691       if (dump_file && (dump_flags & TDF_DETAILS))
5692         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5693     }
5694
5695   if (dump_file && (dump_flags & TDF_DETAILS))
5696     fprintf (dump_file, "\n");
5697 }
5698
5699 /* Calculates cost for having SIZE induction variables.  */
5700
5701 static unsigned
5702 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5703 {
5704   /* We add size to the cost, so that we prefer eliminating ivs
5705      if possible.  */
5706   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5707                                             data->body_includes_call);
5708 }
5709
5710 /* For each size of the induction variable set determine the penalty.  */
5711
5712 static void
5713 determine_set_costs (struct ivopts_data *data)
5714 {
5715   unsigned j, n;
5716   gphi *phi;
5717   gphi_iterator psi;
5718   tree op;
5719   struct loop *loop = data->current_loop;
5720   bitmap_iterator bi;
5721
5722   if (dump_file && (dump_flags & TDF_DETAILS))
5723     {
5724       fprintf (dump_file, "Global costs:\n");
5725       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5726       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5727       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5728       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5729     }
5730
5731   n = 0;
5732   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5733     {
5734       phi = psi.phi ();
5735       op = PHI_RESULT (phi);
5736
5737       if (virtual_operand_p (op))
5738         continue;
5739
5740       if (get_iv (data, op))
5741         continue;
5742
5743       n++;
5744     }
5745
5746   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5747     {
5748       struct version_info *info = ver_info (data, j);
5749
5750       if (info->inv_id && info->has_nonlin_use)
5751         n++;
5752     }
5753
5754   data->regs_used = n;
5755   if (dump_file && (dump_flags & TDF_DETAILS))
5756     fprintf (dump_file, "  regs_used %d\n", n);
5757
5758   if (dump_file && (dump_flags & TDF_DETAILS))
5759     {
5760       fprintf (dump_file, "  cost for size:\n");
5761       fprintf (dump_file, "  ivs\tcost\n");
5762       for (j = 0; j <= 2 * target_avail_regs; j++)
5763         fprintf (dump_file, "  %d\t%d\n", j,
5764                  ivopts_global_cost_for_size (data, j));
5765       fprintf (dump_file, "\n");
5766     }
5767 }
5768
5769 /* Returns true if A is a cheaper cost pair than B.  */
5770
5771 static bool
5772 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5773 {
5774   int cmp;
5775
5776   if (!a)
5777     return false;
5778
5779   if (!b)
5780     return true;
5781
5782   cmp = compare_costs (a->cost, b->cost);
5783   if (cmp < 0)
5784     return true;
5785
5786   if (cmp > 0)
5787     return false;
5788
5789   /* In case the costs are the same, prefer the cheaper candidate.  */
5790   if (a->cand->cost < b->cand->cost)
5791     return true;
5792
5793   return false;
5794 }
5795
5796
5797 /* Returns candidate by that USE is expressed in IVS.  */
5798
5799 static struct cost_pair *
5800 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5801 {
5802   return ivs->cand_for_use[use->id];
5803 }
5804
5805 /* Computes the cost field of IVS structure.  */
5806
5807 static void
5808 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5809 {
5810   comp_cost cost = ivs->cand_use_cost;
5811
5812   cost.cost += ivs->cand_cost;
5813
5814   cost.cost += ivopts_global_cost_for_size (data,
5815                                             ivs->n_regs + ivs->num_used_inv_expr);
5816
5817   ivs->cost = cost;
5818 }
5819
5820 /* Remove invariants in set INVS to set IVS.  */
5821
5822 static void
5823 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5824 {
5825   bitmap_iterator bi;
5826   unsigned iid;
5827
5828   if (!invs)
5829     return;
5830
5831   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5832     {
5833       ivs->n_invariant_uses[iid]--;
5834       if (ivs->n_invariant_uses[iid] == 0)
5835         ivs->n_regs--;
5836     }
5837 }
5838
5839 /* Set USE not to be expressed by any candidate in IVS.  */
5840
5841 static void
5842 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5843                  struct iv_use *use)
5844 {
5845   unsigned uid = use->id, cid;
5846   struct cost_pair *cp;
5847
5848   cp = ivs->cand_for_use[uid];
5849   if (!cp)
5850     return;
5851   cid = cp->cand->id;
5852
5853   ivs->bad_uses++;
5854   ivs->cand_for_use[uid] = NULL;
5855   ivs->n_cand_uses[cid]--;
5856
5857   if (ivs->n_cand_uses[cid] == 0)
5858     {
5859       bitmap_clear_bit (ivs->cands, cid);
5860       /* Do not count the pseudocandidates.  */
5861       if (cp->cand->iv)
5862         ivs->n_regs--;
5863       ivs->n_cands--;
5864       ivs->cand_cost -= cp->cand->cost;
5865
5866       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5867     }
5868
5869   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5870
5871   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5872
5873   if (cp->inv_expr_id != -1)
5874     {
5875       ivs->used_inv_expr[cp->inv_expr_id]--;
5876       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5877         ivs->num_used_inv_expr--;
5878     }
5879   iv_ca_recount_cost (data, ivs);
5880 }
5881
5882 /* Add invariants in set INVS to set IVS.  */
5883
5884 static void
5885 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5886 {
5887   bitmap_iterator bi;
5888   unsigned iid;
5889
5890   if (!invs)
5891     return;
5892
5893   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5894     {
5895       ivs->n_invariant_uses[iid]++;
5896       if (ivs->n_invariant_uses[iid] == 1)
5897         ivs->n_regs++;
5898     }
5899 }
5900
5901 /* Set cost pair for USE in set IVS to CP.  */
5902
5903 static void
5904 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5905               struct iv_use *use, struct cost_pair *cp)
5906 {
5907   unsigned uid = use->id, cid;
5908
5909   if (ivs->cand_for_use[uid] == cp)
5910     return;
5911
5912   if (ivs->cand_for_use[uid])
5913     iv_ca_set_no_cp (data, ivs, use);
5914
5915   if (cp)
5916     {
5917       cid = cp->cand->id;
5918
5919       ivs->bad_uses--;
5920       ivs->cand_for_use[uid] = cp;
5921       ivs->n_cand_uses[cid]++;
5922       if (ivs->n_cand_uses[cid] == 1)
5923         {
5924           bitmap_set_bit (ivs->cands, cid);
5925           /* Do not count the pseudocandidates.  */
5926           if (cp->cand->iv)
5927             ivs->n_regs++;
5928           ivs->n_cands++;
5929           ivs->cand_cost += cp->cand->cost;
5930
5931           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5932         }
5933
5934       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5935       iv_ca_set_add_invariants (ivs, cp->depends_on);
5936
5937       if (cp->inv_expr_id != -1)
5938         {
5939           ivs->used_inv_expr[cp->inv_expr_id]++;
5940           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5941             ivs->num_used_inv_expr++;
5942         }
5943       iv_ca_recount_cost (data, ivs);
5944     }
5945 }
5946
5947 /* Extend set IVS by expressing USE by some of the candidates in it
5948    if possible.  Consider all important candidates if candidates in
5949    set IVS don't give any result.  */
5950
5951 static void
5952 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5953                struct iv_use *use)
5954 {
5955   struct cost_pair *best_cp = NULL, *cp;
5956   bitmap_iterator bi;
5957   unsigned i;
5958   struct iv_cand *cand;
5959
5960   gcc_assert (ivs->upto >= use->id);
5961   ivs->upto++;
5962   ivs->bad_uses++;
5963
5964   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5965     {
5966       cand = iv_cand (data, i);
5967       cp = get_use_iv_cost (data, use, cand);
5968       if (cheaper_cost_pair (cp, best_cp))
5969         best_cp = cp;
5970     }
5971
5972   if (best_cp == NULL)
5973     {
5974       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5975         {
5976           cand = iv_cand (data, i);
5977           cp = get_use_iv_cost (data, use, cand);
5978           if (cheaper_cost_pair (cp, best_cp))
5979             best_cp = cp;
5980         }
5981     }
5982
5983   iv_ca_set_cp (data, ivs, use, best_cp);
5984 }
5985
5986 /* Get cost for assignment IVS.  */
5987
5988 static comp_cost
5989 iv_ca_cost (struct iv_ca *ivs)
5990 {
5991   /* This was a conditional expression but it triggered a bug in
5992      Sun C 5.5.  */
5993   if (ivs->bad_uses)
5994     return infinite_cost;
5995   else
5996     return ivs->cost;
5997 }
5998
5999 /* Returns true if all dependences of CP are among invariants in IVS.  */
6000
6001 static bool
6002 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6003 {
6004   unsigned i;
6005   bitmap_iterator bi;
6006
6007   if (!cp->depends_on)
6008     return true;
6009
6010   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6011     {
6012       if (ivs->n_invariant_uses[i] == 0)
6013         return false;
6014     }
6015
6016   return true;
6017 }
6018
6019 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6020    it before NEXT_CHANGE.  */
6021
6022 static struct iv_ca_delta *
6023 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
6024                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
6025 {
6026   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6027
6028   change->use = use;
6029   change->old_cp = old_cp;
6030   change->new_cp = new_cp;
6031   change->next_change = next_change;
6032
6033   return change;
6034 }
6035
6036 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6037    are rewritten.  */
6038
6039 static struct iv_ca_delta *
6040 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6041 {
6042   struct iv_ca_delta *last;
6043
6044   if (!l2)
6045     return l1;
6046
6047   if (!l1)
6048     return l2;
6049
6050   for (last = l1; last->next_change; last = last->next_change)
6051     continue;
6052   last->next_change = l2;
6053
6054   return l1;
6055 }
6056
6057 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6058
6059 static struct iv_ca_delta *
6060 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6061 {
6062   struct iv_ca_delta *act, *next, *prev = NULL;
6063
6064   for (act = delta; act; act = next)
6065     {
6066       next = act->next_change;
6067       act->next_change = prev;
6068       prev = act;
6069
6070       std::swap (act->old_cp, act->new_cp);
6071     }
6072
6073   return prev;
6074 }
6075
6076 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6077    reverted instead.  */
6078
6079 static void
6080 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6081                     struct iv_ca_delta *delta, bool forward)
6082 {
6083   struct cost_pair *from, *to;
6084   struct iv_ca_delta *act;
6085
6086   if (!forward)
6087     delta = iv_ca_delta_reverse (delta);
6088
6089   for (act = delta; act; act = act->next_change)
6090     {
6091       from = act->old_cp;
6092       to = act->new_cp;
6093       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
6094       iv_ca_set_cp (data, ivs, act->use, to);
6095     }
6096
6097   if (!forward)
6098     iv_ca_delta_reverse (delta);
6099 }
6100
6101 /* Returns true if CAND is used in IVS.  */
6102
6103 static bool
6104 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6105 {
6106   return ivs->n_cand_uses[cand->id] > 0;
6107 }
6108
6109 /* Returns number of induction variable candidates in the set IVS.  */
6110
6111 static unsigned
6112 iv_ca_n_cands (struct iv_ca *ivs)
6113 {
6114   return ivs->n_cands;
6115 }
6116
6117 /* Free the list of changes DELTA.  */
6118
6119 static void
6120 iv_ca_delta_free (struct iv_ca_delta **delta)
6121 {
6122   struct iv_ca_delta *act, *next;
6123
6124   for (act = *delta; act; act = next)
6125     {
6126       next = act->next_change;
6127       free (act);
6128     }
6129
6130   *delta = NULL;
6131 }
6132
6133 /* Allocates new iv candidates assignment.  */
6134
6135 static struct iv_ca *
6136 iv_ca_new (struct ivopts_data *data)
6137 {
6138   struct iv_ca *nw = XNEW (struct iv_ca);
6139
6140   nw->upto = 0;
6141   nw->bad_uses = 0;
6142   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
6143   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
6144   nw->cands = BITMAP_ALLOC (NULL);
6145   nw->n_cands = 0;
6146   nw->n_regs = 0;
6147   nw->cand_use_cost = no_cost;
6148   nw->cand_cost = 0;
6149   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6150   nw->cost = no_cost;
6151   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
6152   nw->num_used_inv_expr = 0;
6153
6154   return nw;
6155 }
6156
6157 /* Free memory occupied by the set IVS.  */
6158
6159 static void
6160 iv_ca_free (struct iv_ca **ivs)
6161 {
6162   free ((*ivs)->cand_for_use);
6163   free ((*ivs)->n_cand_uses);
6164   BITMAP_FREE ((*ivs)->cands);
6165   free ((*ivs)->n_invariant_uses);
6166   free ((*ivs)->used_inv_expr);
6167   free (*ivs);
6168   *ivs = NULL;
6169 }
6170
6171 /* Dumps IVS to FILE.  */
6172
6173 static void
6174 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6175 {
6176   const char *pref = "  invariants ";
6177   unsigned i;
6178   comp_cost cost = iv_ca_cost (ivs);
6179
6180   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6181   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
6182            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6183   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6184
6185    for (i = 0; i < ivs->upto; i++)
6186     {
6187       struct iv_use *use = iv_use (data, i);
6188       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6189       if (cp)
6190         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6191                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6192       else
6193         fprintf (file, "   use:%d --> ??\n", use->id);
6194     }
6195
6196   for (i = 1; i <= data->max_inv_id; i++)
6197     if (ivs->n_invariant_uses[i])
6198       {
6199         fprintf (file, "%s%d", pref, i);
6200         pref = ", ";
6201       }
6202   fprintf (file, "\n\n");
6203 }
6204
6205 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6206    new set, and store differences in DELTA.  Number of induction variables
6207    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6208    the function will try to find a solution with mimimal iv candidates.  */
6209
6210 static comp_cost
6211 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6212               struct iv_cand *cand, struct iv_ca_delta **delta,
6213               unsigned *n_ivs, bool min_ncand)
6214 {
6215   unsigned i;
6216   comp_cost cost;
6217   struct iv_use *use;
6218   struct cost_pair *old_cp, *new_cp;
6219
6220   *delta = NULL;
6221   for (i = 0; i < ivs->upto; i++)
6222     {
6223       use = iv_use (data, i);
6224       old_cp = iv_ca_cand_for_use (ivs, use);
6225
6226       if (old_cp
6227           && old_cp->cand == cand)
6228         continue;
6229
6230       new_cp = get_use_iv_cost (data, use, cand);
6231       if (!new_cp)
6232         continue;
6233
6234       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6235         continue;
6236
6237       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6238         continue;
6239
6240       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6241     }
6242
6243   iv_ca_delta_commit (data, ivs, *delta, true);
6244   cost = iv_ca_cost (ivs);
6245   if (n_ivs)
6246     *n_ivs = iv_ca_n_cands (ivs);
6247   iv_ca_delta_commit (data, ivs, *delta, false);
6248
6249   return cost;
6250 }
6251
6252 /* Try narrowing set IVS by removing CAND.  Return the cost of
6253    the new set and store the differences in DELTA.  START is
6254    the candidate with which we start narrowing.  */
6255
6256 static comp_cost
6257 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6258               struct iv_cand *cand, struct iv_cand *start,
6259               struct iv_ca_delta **delta)
6260 {
6261   unsigned i, ci;
6262   struct iv_use *use;
6263   struct cost_pair *old_cp, *new_cp, *cp;
6264   bitmap_iterator bi;
6265   struct iv_cand *cnd;
6266   comp_cost cost, best_cost, acost;
6267
6268   *delta = NULL;
6269   for (i = 0; i < n_iv_uses (data); i++)
6270     {
6271       use = iv_use (data, i);
6272
6273       old_cp = iv_ca_cand_for_use (ivs, use);
6274       if (old_cp->cand != cand)
6275         continue;
6276
6277       best_cost = iv_ca_cost (ivs);
6278       /* Start narrowing with START.  */
6279       new_cp = get_use_iv_cost (data, use, start);
6280
6281       if (data->consider_all_candidates)
6282         {
6283           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6284             {
6285               if (ci == cand->id || (start && ci == start->id))
6286                 continue;
6287
6288               cnd = iv_cand (data, ci);
6289
6290               cp = get_use_iv_cost (data, use, cnd);
6291               if (!cp)
6292                 continue;
6293
6294               iv_ca_set_cp (data, ivs, use, cp);
6295               acost = iv_ca_cost (ivs);
6296
6297               if (compare_costs (acost, best_cost) < 0)
6298                 {
6299                   best_cost = acost;
6300                   new_cp = cp;
6301                 }
6302             }
6303         }
6304       else
6305         {
6306           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6307             {
6308               if (ci == cand->id || (start && ci == start->id))
6309                 continue;
6310
6311               cnd = iv_cand (data, ci);
6312
6313               cp = get_use_iv_cost (data, use, cnd);
6314               if (!cp)
6315                 continue;
6316
6317               iv_ca_set_cp (data, ivs, use, cp);
6318               acost = iv_ca_cost (ivs);
6319
6320               if (compare_costs (acost, best_cost) < 0)
6321                 {
6322                   best_cost = acost;
6323                   new_cp = cp;
6324                 }
6325             }
6326         }
6327       /* Restore to old cp for use.  */
6328       iv_ca_set_cp (data, ivs, use, old_cp);
6329
6330       if (!new_cp)
6331         {
6332           iv_ca_delta_free (delta);
6333           return infinite_cost;
6334         }
6335
6336       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6337     }
6338
6339   iv_ca_delta_commit (data, ivs, *delta, true);
6340   cost = iv_ca_cost (ivs);
6341   iv_ca_delta_commit (data, ivs, *delta, false);
6342
6343   return cost;
6344 }
6345
6346 /* Try optimizing the set of candidates IVS by removing candidates different
6347    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6348    differences in DELTA.  */
6349
6350 static comp_cost
6351 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6352              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6353 {
6354   bitmap_iterator bi;
6355   struct iv_ca_delta *act_delta, *best_delta;
6356   unsigned i;
6357   comp_cost best_cost, acost;
6358   struct iv_cand *cand;
6359
6360   best_delta = NULL;
6361   best_cost = iv_ca_cost (ivs);
6362
6363   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6364     {
6365       cand = iv_cand (data, i);
6366
6367       if (cand == except_cand)
6368         continue;
6369
6370       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6371
6372       if (compare_costs (acost, best_cost) < 0)
6373         {
6374           best_cost = acost;
6375           iv_ca_delta_free (&best_delta);
6376           best_delta = act_delta;
6377         }
6378       else
6379         iv_ca_delta_free (&act_delta);
6380     }
6381
6382   if (!best_delta)
6383     {
6384       *delta = NULL;
6385       return best_cost;
6386     }
6387
6388   /* Recurse to possibly remove other unnecessary ivs.  */
6389   iv_ca_delta_commit (data, ivs, best_delta, true);
6390   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6391   iv_ca_delta_commit (data, ivs, best_delta, false);
6392   *delta = iv_ca_delta_join (best_delta, *delta);
6393   return best_cost;
6394 }
6395
6396 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6397    cheaper local cost for USE than BEST_CP.  Return pointer to
6398    the corresponding cost_pair, otherwise just return BEST_CP.  */
6399
6400 static struct cost_pair*
6401 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6402                         unsigned int cand_idx, struct iv_cand *old_cand,
6403                         struct cost_pair *best_cp)
6404 {
6405   struct iv_cand *cand;
6406   struct cost_pair *cp;
6407
6408   gcc_assert (old_cand != NULL && best_cp != NULL);
6409   if (cand_idx == old_cand->id)
6410     return best_cp;
6411
6412   cand = iv_cand (data, cand_idx);
6413   cp = get_use_iv_cost (data, use, cand);
6414   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6415     return cp;
6416
6417   return best_cp;
6418 }
6419
6420 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6421    which are used by more than one iv uses.  For each of those candidates,
6422    this function tries to represent iv uses under that candidate using
6423    other ones with lower local cost, then tries to prune the new set.
6424    If the new set has lower cost, It returns the new cost after recording
6425    candidate replacement in list DELTA.  */
6426
6427 static comp_cost
6428 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6429                struct iv_ca_delta **delta)
6430 {
6431   bitmap_iterator bi, bj;
6432   unsigned int i, j, k;
6433   struct iv_use *use;
6434   struct iv_cand *cand;
6435   comp_cost orig_cost, acost;
6436   struct iv_ca_delta *act_delta, *tmp_delta;
6437   struct cost_pair *old_cp, *best_cp = NULL;
6438
6439   *delta = NULL;
6440   orig_cost = iv_ca_cost (ivs);
6441
6442   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6443     {
6444       if (ivs->n_cand_uses[i] == 1
6445           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6446         continue;
6447
6448       cand = iv_cand (data, i);
6449
6450       act_delta = NULL;
6451       /*  Represent uses under current candidate using other ones with
6452           lower local cost.  */
6453       for (j = 0; j < ivs->upto; j++)
6454         {
6455           use = iv_use (data, j);
6456           old_cp = iv_ca_cand_for_use (ivs, use);
6457
6458           if (old_cp->cand != cand)
6459             continue;
6460
6461           best_cp = old_cp;
6462           if (data->consider_all_candidates)
6463             for (k = 0; k < n_iv_cands (data); k++)
6464               best_cp = cheaper_cost_with_cand (data, use, k,
6465                                                 old_cp->cand, best_cp);
6466           else
6467             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6468               best_cp = cheaper_cost_with_cand (data, use, k,
6469                                                 old_cp->cand, best_cp);
6470
6471           if (best_cp == old_cp)
6472             continue;
6473
6474           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6475         }
6476       /* No need for further prune.  */
6477       if (!act_delta)
6478         continue;
6479
6480       /* Prune the new candidate set.  */
6481       iv_ca_delta_commit (data, ivs, act_delta, true);
6482       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6483       iv_ca_delta_commit (data, ivs, act_delta, false);
6484       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6485
6486       if (compare_costs (acost, orig_cost) < 0)
6487         {
6488           *delta = act_delta;
6489           return acost;
6490         }
6491       else
6492         iv_ca_delta_free (&act_delta);
6493     }
6494
6495   return orig_cost;
6496 }
6497
6498 /* Tries to extend the sets IVS in the best possible way in order
6499    to express the USE.  If ORIGINALP is true, prefer candidates from
6500    the original set of IVs, otherwise favor important candidates not
6501    based on any memory object.  */
6502
6503 static bool
6504 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6505                   struct iv_use *use, bool originalp)
6506 {
6507   comp_cost best_cost, act_cost;
6508   unsigned i;
6509   bitmap_iterator bi;
6510   struct iv_cand *cand;
6511   struct iv_ca_delta *best_delta = NULL, *act_delta;
6512   struct cost_pair *cp;
6513
6514   iv_ca_add_use (data, ivs, use);
6515   best_cost = iv_ca_cost (ivs);
6516   cp = iv_ca_cand_for_use (ivs, use);
6517   if (cp)
6518     {
6519       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6520       iv_ca_set_no_cp (data, ivs, use);
6521     }
6522
6523   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6524      first try important candidates not based on any memory object.  Only if
6525      this fails, try the specific ones.  Rationale -- in loops with many
6526      variables the best choice often is to use just one generic biv.  If we
6527      added here many ivs specific to the uses, the optimization algorithm later
6528      would be likely to get stuck in a local minimum, thus causing us to create
6529      too many ivs.  The approach from few ivs to more seems more likely to be
6530      successful -- starting from few ivs, replacing an expensive use by a
6531      specific iv should always be a win.  */
6532   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6533     {
6534       cand = iv_cand (data, i);
6535
6536       if (originalp && cand->pos !=IP_ORIGINAL)
6537         continue;
6538
6539       if (!originalp && cand->iv->base_object != NULL_TREE)
6540         continue;
6541
6542       if (iv_ca_cand_used_p (ivs, cand))
6543         continue;
6544
6545       cp = get_use_iv_cost (data, use, cand);
6546       if (!cp)
6547         continue;
6548
6549       iv_ca_set_cp (data, ivs, use, cp);
6550       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6551                                true);
6552       iv_ca_set_no_cp (data, ivs, use);
6553       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6554
6555       if (compare_costs (act_cost, best_cost) < 0)
6556         {
6557           best_cost = act_cost;
6558
6559           iv_ca_delta_free (&best_delta);
6560           best_delta = act_delta;
6561         }
6562       else
6563         iv_ca_delta_free (&act_delta);
6564     }
6565
6566   if (infinite_cost_p (best_cost))
6567     {
6568       for (i = 0; i < use->n_map_members; i++)
6569         {
6570           cp = use->cost_map + i;
6571           cand = cp->cand;
6572           if (!cand)
6573             continue;
6574
6575           /* Already tried this.  */
6576           if (cand->important)
6577             {
6578               if (originalp && cand->pos == IP_ORIGINAL)
6579                 continue;
6580               if (!originalp && cand->iv->base_object == NULL_TREE)
6581                 continue;
6582             }
6583
6584           if (iv_ca_cand_used_p (ivs, cand))
6585             continue;
6586
6587           act_delta = NULL;
6588           iv_ca_set_cp (data, ivs, use, cp);
6589           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6590           iv_ca_set_no_cp (data, ivs, use);
6591           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6592                                        cp, act_delta);
6593
6594           if (compare_costs (act_cost, best_cost) < 0)
6595             {
6596               best_cost = act_cost;
6597
6598               if (best_delta)
6599                 iv_ca_delta_free (&best_delta);
6600               best_delta = act_delta;
6601             }
6602           else
6603             iv_ca_delta_free (&act_delta);
6604         }
6605     }
6606
6607   iv_ca_delta_commit (data, ivs, best_delta, true);
6608   iv_ca_delta_free (&best_delta);
6609
6610   return !infinite_cost_p (best_cost);
6611 }
6612
6613 /* Finds an initial assignment of candidates to uses.  */
6614
6615 static struct iv_ca *
6616 get_initial_solution (struct ivopts_data *data, bool originalp)
6617 {
6618   struct iv_ca *ivs = iv_ca_new (data);
6619   unsigned i;
6620
6621   for (i = 0; i < n_iv_uses (data); i++)
6622     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6623       {
6624         iv_ca_free (&ivs);
6625         return NULL;
6626       }
6627
6628   return ivs;
6629 }
6630
6631 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6632    points to a bool variable, this function tries to break local
6633    optimal fixed-point by replacing candidates in IVS if it's true.  */
6634
6635 static bool
6636 try_improve_iv_set (struct ivopts_data *data,
6637                     struct iv_ca *ivs, bool *try_replace_p)
6638 {
6639   unsigned i, n_ivs;
6640   comp_cost acost, best_cost = iv_ca_cost (ivs);
6641   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6642   struct iv_cand *cand;
6643
6644   /* Try extending the set of induction variables by one.  */
6645   for (i = 0; i < n_iv_cands (data); i++)
6646     {
6647       cand = iv_cand (data, i);
6648
6649       if (iv_ca_cand_used_p (ivs, cand))
6650         continue;
6651
6652       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6653       if (!act_delta)
6654         continue;
6655
6656       /* If we successfully added the candidate and the set is small enough,
6657          try optimizing it by removing other candidates.  */
6658       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6659         {
6660           iv_ca_delta_commit (data, ivs, act_delta, true);
6661           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6662           iv_ca_delta_commit (data, ivs, act_delta, false);
6663           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6664         }
6665
6666       if (compare_costs (acost, best_cost) < 0)
6667         {
6668           best_cost = acost;
6669           iv_ca_delta_free (&best_delta);
6670           best_delta = act_delta;
6671         }
6672       else
6673         iv_ca_delta_free (&act_delta);
6674     }
6675
6676   if (!best_delta)
6677     {
6678       /* Try removing the candidates from the set instead.  */
6679       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6680
6681       if (!best_delta && *try_replace_p)
6682         {
6683           *try_replace_p = false;
6684           /* So far candidate selecting algorithm tends to choose fewer IVs
6685              so that it can handle cases in which loops have many variables
6686              but the best choice is often to use only one general biv.  One
6687              weakness is it can't handle opposite cases, in which different
6688              candidates should be chosen with respect to each use.  To solve
6689              the problem, we replace candidates in a manner described by the
6690              comments of iv_ca_replace, thus give general algorithm a chance
6691              to break local optimal fixed-point in these cases.  */
6692           best_cost = iv_ca_replace (data, ivs, &best_delta);
6693         }
6694
6695       if (!best_delta)
6696         return false;
6697     }
6698
6699   iv_ca_delta_commit (data, ivs, best_delta, true);
6700   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6701   iv_ca_delta_free (&best_delta);
6702   return true;
6703 }
6704
6705 /* Attempts to find the optimal set of induction variables.  We do simple
6706    greedy heuristic -- we try to replace at most one candidate in the selected
6707    solution and remove the unused ivs while this improves the cost.  */
6708
6709 static struct iv_ca *
6710 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6711 {
6712   struct iv_ca *set;
6713   bool try_replace_p = true;
6714
6715   /* Get the initial solution.  */
6716   set = get_initial_solution (data, originalp);
6717   if (!set)
6718     {
6719       if (dump_file && (dump_flags & TDF_DETAILS))
6720         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6721       return NULL;
6722     }
6723
6724   if (dump_file && (dump_flags & TDF_DETAILS))
6725     {
6726       fprintf (dump_file, "Initial set of candidates:\n");
6727       iv_ca_dump (data, dump_file, set);
6728     }
6729
6730   while (try_improve_iv_set (data, set, &try_replace_p))
6731     {
6732       if (dump_file && (dump_flags & TDF_DETAILS))
6733         {
6734           fprintf (dump_file, "Improved to:\n");
6735           iv_ca_dump (data, dump_file, set);
6736         }
6737     }
6738
6739   return set;
6740 }
6741
6742 static struct iv_ca *
6743 find_optimal_iv_set (struct ivopts_data *data)
6744 {
6745   unsigned i;
6746   struct iv_ca *set, *origset;
6747   struct iv_use *use;
6748   comp_cost cost, origcost;
6749
6750   /* Determine the cost based on a strategy that starts with original IVs,
6751      and try again using a strategy that prefers candidates not based
6752      on any IVs.  */
6753   origset = find_optimal_iv_set_1 (data, true);
6754   set = find_optimal_iv_set_1 (data, false);
6755
6756   if (!origset && !set)
6757     return NULL;
6758
6759   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6760   cost = set ? iv_ca_cost (set) : infinite_cost;
6761
6762   if (dump_file && (dump_flags & TDF_DETAILS))
6763     {
6764       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6765                origcost.cost, origcost.complexity);
6766       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6767                cost.cost, cost.complexity);
6768     }
6769
6770   /* Choose the one with the best cost.  */
6771   if (compare_costs (origcost, cost) <= 0)
6772     {
6773       if (set)
6774         iv_ca_free (&set);
6775       set = origset;
6776     }
6777   else if (origset)
6778     iv_ca_free (&origset);
6779
6780   for (i = 0; i < n_iv_uses (data); i++)
6781     {
6782       use = iv_use (data, i);
6783       use->selected = iv_ca_cand_for_use (set, use)->cand;
6784     }
6785
6786   return set;
6787 }
6788
6789 /* Creates a new induction variable corresponding to CAND.  */
6790
6791 static void
6792 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6793 {
6794   gimple_stmt_iterator incr_pos;
6795   tree base;
6796   bool after = false;
6797
6798   if (!cand->iv)
6799     return;
6800
6801   switch (cand->pos)
6802     {
6803     case IP_NORMAL:
6804       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6805       break;
6806
6807     case IP_END:
6808       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6809       after = true;
6810       break;
6811
6812     case IP_AFTER_USE:
6813       after = true;
6814       /* fall through */
6815     case IP_BEFORE_USE:
6816       incr_pos = gsi_for_stmt (cand->incremented_at);
6817       break;
6818
6819     case IP_ORIGINAL:
6820       /* Mark that the iv is preserved.  */
6821       name_info (data, cand->var_before)->preserve_biv = true;
6822       name_info (data, cand->var_after)->preserve_biv = true;
6823
6824       /* Rewrite the increment so that it uses var_before directly.  */
6825       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6826       return;
6827     }
6828
6829   gimple_add_tmp_var (cand->var_before);
6830
6831   base = unshare_expr (cand->iv->base);
6832
6833   create_iv (base, unshare_expr (cand->iv->step),
6834              cand->var_before, data->current_loop,
6835              &incr_pos, after, &cand->var_before, &cand->var_after);
6836 }
6837
6838 /* Creates new induction variables described in SET.  */
6839
6840 static void
6841 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6842 {
6843   unsigned i;
6844   struct iv_cand *cand;
6845   bitmap_iterator bi;
6846
6847   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6848     {
6849       cand = iv_cand (data, i);
6850       create_new_iv (data, cand);
6851     }
6852
6853   if (dump_file && (dump_flags & TDF_DETAILS))
6854     {
6855       fprintf (dump_file, "Selected IV set for loop %d",
6856                data->current_loop->num);
6857       if (data->loop_loc != UNKNOWN_LOCATION)
6858         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6859                  LOCATION_LINE (data->loop_loc));
6860       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6861       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6862         {
6863           cand = iv_cand (data, i);
6864           dump_cand (dump_file, cand);
6865         }
6866       fprintf (dump_file, "\n");
6867     }
6868 }
6869
6870 /* Rewrites USE (definition of iv used in a nonlinear expression)
6871    using candidate CAND.  */
6872
6873 static void
6874 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6875                             struct iv_use *use, struct iv_cand *cand)
6876 {
6877   tree comp;
6878   tree op, tgt;
6879   gassign *ass;
6880   gimple_stmt_iterator bsi;
6881
6882   /* An important special case -- if we are asked to express value of
6883      the original iv by itself, just exit; there is no need to
6884      introduce a new computation (that might also need casting the
6885      variable to unsigned and back).  */
6886   if (cand->pos == IP_ORIGINAL
6887       && cand->incremented_at == use->stmt)
6888     {
6889       enum tree_code stmt_code;
6890
6891       gcc_assert (is_gimple_assign (use->stmt));
6892       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6893
6894       /* Check whether we may leave the computation unchanged.
6895          This is the case only if it does not rely on other
6896          computations in the loop -- otherwise, the computation
6897          we rely upon may be removed in remove_unused_ivs,
6898          thus leading to ICE.  */
6899       stmt_code = gimple_assign_rhs_code (use->stmt);
6900       if (stmt_code == PLUS_EXPR
6901           || stmt_code == MINUS_EXPR
6902           || stmt_code == POINTER_PLUS_EXPR)
6903         {
6904           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6905             op = gimple_assign_rhs2 (use->stmt);
6906           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6907             op = gimple_assign_rhs1 (use->stmt);
6908           else
6909             op = NULL_TREE;
6910         }
6911       else
6912         op = NULL_TREE;
6913
6914       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6915         return;
6916     }
6917
6918   comp = get_computation (data->current_loop, use, cand);
6919   gcc_assert (comp != NULL_TREE);
6920
6921   switch (gimple_code (use->stmt))
6922     {
6923     case GIMPLE_PHI:
6924       tgt = PHI_RESULT (use->stmt);
6925
6926       /* If we should keep the biv, do not replace it.  */
6927       if (name_info (data, tgt)->preserve_biv)
6928         return;
6929
6930       bsi = gsi_after_labels (gimple_bb (use->stmt));
6931       break;
6932
6933     case GIMPLE_ASSIGN:
6934       tgt = gimple_assign_lhs (use->stmt);
6935       bsi = gsi_for_stmt (use->stmt);
6936       break;
6937
6938     default:
6939       gcc_unreachable ();
6940     }
6941
6942   if (!valid_gimple_rhs_p (comp)
6943       || (gimple_code (use->stmt) != GIMPLE_PHI
6944           /* We can't allow re-allocating the stmt as it might be pointed
6945              to still.  */
6946           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6947               >= gimple_num_ops (gsi_stmt (bsi)))))
6948     {
6949       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6950                                        true, GSI_SAME_STMT);
6951       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6952         {
6953           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6954           /* As this isn't a plain copy we have to reset alignment
6955              information.  */
6956           if (SSA_NAME_PTR_INFO (comp))
6957             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6958         }
6959     }
6960
6961   if (gimple_code (use->stmt) == GIMPLE_PHI)
6962     {
6963       ass = gimple_build_assign (tgt, comp);
6964       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6965
6966       bsi = gsi_for_stmt (use->stmt);
6967       remove_phi_node (&bsi, false);
6968     }
6969   else
6970     {
6971       gimple_assign_set_rhs_from_tree (&bsi, comp);
6972       use->stmt = gsi_stmt (bsi);
6973     }
6974 }
6975
6976 /* Performs a peephole optimization to reorder the iv update statement with
6977    a mem ref to enable instruction combining in later phases. The mem ref uses
6978    the iv value before the update, so the reordering transformation requires
6979    adjustment of the offset. CAND is the selected IV_CAND.
6980
6981    Example:
6982
6983    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6984    iv2 = iv1 + 1;
6985
6986    if (t < val)      (1)
6987      goto L;
6988    goto Head;
6989
6990
6991    directly propagating t over to (1) will introduce overlapping live range
6992    thus increase register pressure. This peephole transform it into:
6993
6994
6995    iv2 = iv1 + 1;
6996    t = MEM_REF (base, iv2, 8, 8);
6997    if (t < val)
6998      goto L;
6999    goto Head;
7000 */
7001
7002 static void
7003 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7004 {
7005   tree var_after;
7006   gimple *iv_update, *stmt;
7007   basic_block bb;
7008   gimple_stmt_iterator gsi, gsi_iv;
7009
7010   if (cand->pos != IP_NORMAL)
7011     return;
7012
7013   var_after = cand->var_after;
7014   iv_update = SSA_NAME_DEF_STMT (var_after);
7015
7016   bb = gimple_bb (iv_update);
7017   gsi = gsi_last_nondebug_bb (bb);
7018   stmt = gsi_stmt (gsi);
7019
7020   /* Only handle conditional statement for now.  */
7021   if (gimple_code (stmt) != GIMPLE_COND)
7022     return;
7023
7024   gsi_prev_nondebug (&gsi);
7025   stmt = gsi_stmt (gsi);
7026   if (stmt != iv_update)
7027     return;
7028
7029   gsi_prev_nondebug (&gsi);
7030   if (gsi_end_p (gsi))
7031     return;
7032
7033   stmt = gsi_stmt (gsi);
7034   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7035     return;
7036
7037   if (stmt != use->stmt)
7038     return;
7039
7040   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7041     return;
7042
7043   if (dump_file && (dump_flags & TDF_DETAILS))
7044     {
7045       fprintf (dump_file, "Reordering \n");
7046       print_gimple_stmt (dump_file, iv_update, 0, 0);
7047       print_gimple_stmt (dump_file, use->stmt, 0, 0);
7048       fprintf (dump_file, "\n");
7049     }
7050
7051   gsi = gsi_for_stmt (use->stmt);
7052   gsi_iv = gsi_for_stmt (iv_update);
7053   gsi_move_before (&gsi_iv, &gsi);
7054
7055   cand->pos = IP_BEFORE_USE;
7056   cand->incremented_at = use->stmt;
7057 }
7058
7059 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7060
7061 static void
7062 rewrite_use_address_1 (struct ivopts_data *data,
7063                        struct iv_use *use, struct iv_cand *cand)
7064 {
7065   aff_tree aff;
7066   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7067   tree base_hint = NULL_TREE;
7068   tree ref, iv;
7069   bool ok;
7070
7071   adjust_iv_update_pos (cand, use);
7072   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7073   gcc_assert (ok);
7074   unshare_aff_combination (&aff);
7075
7076   /* To avoid undefined overflow problems, all IV candidates use unsigned
7077      integer types.  The drawback is that this makes it impossible for
7078      create_mem_ref to distinguish an IV that is based on a memory object
7079      from one that represents simply an offset.
7080
7081      To work around this problem, we pass a hint to create_mem_ref that
7082      indicates which variable (if any) in aff is an IV based on a memory
7083      object.  Note that we only consider the candidate.  If this is not
7084      based on an object, the base of the reference is in some subexpression
7085      of the use -- but these will use pointer types, so they are recognized
7086      by the create_mem_ref heuristics anyway.  */
7087   if (cand->iv->base_object)
7088     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7089
7090   iv = var_at_stmt (data->current_loop, cand, use->stmt);
7091   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7092                         reference_alias_ptr_type (*use->op_p),
7093                         iv, base_hint, data->speed);
7094   copy_ref_info (ref, *use->op_p);
7095   *use->op_p = ref;
7096 }
7097
7098 /* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
7099    first use of a group, rewrites sub uses in the group too.  */
7100
7101 static void
7102 rewrite_use_address (struct ivopts_data *data,
7103                       struct iv_use *use, struct iv_cand *cand)
7104 {
7105   struct iv_use *next;
7106
7107   gcc_assert (use->sub_id == 0);
7108   rewrite_use_address_1 (data, use, cand);
7109   update_stmt (use->stmt);
7110
7111   for (next = use->next; next != NULL; next = next->next)
7112     {
7113       rewrite_use_address_1 (data, next, cand);
7114       update_stmt (next->stmt);
7115     }
7116
7117   return;
7118 }
7119
7120 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7121    candidate CAND.  */
7122
7123 static void
7124 rewrite_use_compare (struct ivopts_data *data,
7125                      struct iv_use *use, struct iv_cand *cand)
7126 {
7127   tree comp, *var_p, op, bound;
7128   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7129   enum tree_code compare;
7130   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
7131   bool ok;
7132
7133   bound = cp->value;
7134   if (bound)
7135     {
7136       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7137       tree var_type = TREE_TYPE (var);
7138       gimple_seq stmts;
7139
7140       if (dump_file && (dump_flags & TDF_DETAILS))
7141         {
7142           fprintf (dump_file, "Replacing exit test: ");
7143           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7144         }
7145       compare = cp->comp;
7146       bound = unshare_expr (fold_convert (var_type, bound));
7147       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7148       if (stmts)
7149         gsi_insert_seq_on_edge_immediate (
7150                 loop_preheader_edge (data->current_loop),
7151                 stmts);
7152
7153       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7154       gimple_cond_set_lhs (cond_stmt, var);
7155       gimple_cond_set_code (cond_stmt, compare);
7156       gimple_cond_set_rhs (cond_stmt, op);
7157       return;
7158     }
7159
7160   /* The induction variable elimination failed; just express the original
7161      giv.  */
7162   comp = get_computation (data->current_loop, use, cand);
7163   gcc_assert (comp != NULL_TREE);
7164
7165   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7166   gcc_assert (ok);
7167
7168   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7169                                      true, GSI_SAME_STMT);
7170 }
7171
7172 /* Rewrites USE using candidate CAND.  */
7173
7174 static void
7175 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7176 {
7177   switch (use->type)
7178     {
7179       case USE_NONLINEAR_EXPR:
7180         rewrite_use_nonlinear_expr (data, use, cand);
7181         break;
7182
7183       case USE_ADDRESS:
7184         rewrite_use_address (data, use, cand);
7185         break;
7186
7187       case USE_COMPARE:
7188         rewrite_use_compare (data, use, cand);
7189         break;
7190
7191       default:
7192         gcc_unreachable ();
7193     }
7194
7195   update_stmt (use->stmt);
7196 }
7197
7198 /* Rewrite the uses using the selected induction variables.  */
7199
7200 static void
7201 rewrite_uses (struct ivopts_data *data)
7202 {
7203   unsigned i;
7204   struct iv_cand *cand;
7205   struct iv_use *use;
7206
7207   for (i = 0; i < n_iv_uses (data); i++)
7208     {
7209       use = iv_use (data, i);
7210       cand = use->selected;
7211       gcc_assert (cand);
7212
7213       rewrite_use (data, use, cand);
7214     }
7215 }
7216
7217 /* Removes the ivs that are not used after rewriting.  */
7218
7219 static void
7220 remove_unused_ivs (struct ivopts_data *data)
7221 {
7222   unsigned j;
7223   bitmap_iterator bi;
7224   bitmap toremove = BITMAP_ALLOC (NULL);
7225
7226   /* Figure out an order in which to release SSA DEFs so that we don't
7227      release something that we'd have to propagate into a debug stmt
7228      afterwards.  */
7229   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7230     {
7231       struct version_info *info;
7232
7233       info = ver_info (data, j);
7234       if (info->iv
7235           && !integer_zerop (info->iv->step)
7236           && !info->inv_id
7237           && !info->iv->have_use_for
7238           && !info->preserve_biv)
7239         {
7240           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7241
7242           tree def = info->iv->ssa_name;
7243
7244           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7245             {
7246               imm_use_iterator imm_iter;
7247               use_operand_p use_p;
7248               gimple *stmt;
7249               int count = 0;
7250
7251               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7252                 {
7253                   if (!gimple_debug_bind_p (stmt))
7254                     continue;
7255
7256                   /* We just want to determine whether to do nothing
7257                      (count == 0), to substitute the computed
7258                      expression into a single use of the SSA DEF by
7259                      itself (count == 1), or to use a debug temp
7260                      because the SSA DEF is used multiple times or as
7261                      part of a larger expression (count > 1). */
7262                   count++;
7263                   if (gimple_debug_bind_get_value (stmt) != def)
7264                     count++;
7265
7266                   if (count > 1)
7267                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7268                 }
7269
7270               if (!count)
7271                 continue;
7272
7273               struct iv_use dummy_use;
7274               struct iv_cand *best_cand = NULL, *cand;
7275               unsigned i, best_pref = 0, cand_pref;
7276
7277               memset (&dummy_use, 0, sizeof (dummy_use));
7278               dummy_use.iv = info->iv;
7279               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7280                 {
7281                   cand = iv_use (data, i)->selected;
7282                   if (cand == best_cand)
7283                     continue;
7284                   cand_pref = operand_equal_p (cand->iv->step,
7285                                                info->iv->step, 0)
7286                     ? 4 : 0;
7287                   cand_pref
7288                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7289                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7290                     ? 2 : 0;
7291                   cand_pref
7292                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7293                     ? 1 : 0;
7294                   if (best_cand == NULL || best_pref < cand_pref)
7295                     {
7296                       best_cand = cand;
7297                       best_pref = cand_pref;
7298                     }
7299                 }
7300
7301               if (!best_cand)
7302                 continue;
7303
7304               tree comp = get_computation_at (data->current_loop,
7305                                               &dummy_use, best_cand,
7306                                               SSA_NAME_DEF_STMT (def));
7307               if (!comp)
7308                 continue;
7309
7310               if (count > 1)
7311                 {
7312                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7313                   DECL_ARTIFICIAL (vexpr) = 1;
7314                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7315                   if (SSA_NAME_VAR (def))
7316                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7317                   else
7318                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7319                   gdebug *def_temp
7320                     = gimple_build_debug_bind (vexpr, comp, NULL);
7321                   gimple_stmt_iterator gsi;
7322
7323                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7324                     gsi = gsi_after_labels (gimple_bb
7325                                             (SSA_NAME_DEF_STMT (def)));
7326                   else
7327                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7328
7329                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7330                   comp = vexpr;
7331                 }
7332
7333               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7334                 {
7335                   if (!gimple_debug_bind_p (stmt))
7336                     continue;
7337
7338                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7339                     SET_USE (use_p, comp);
7340
7341                   update_stmt (stmt);
7342                 }
7343             }
7344         }
7345     }
7346
7347   release_defs_bitset (toremove);
7348
7349   BITMAP_FREE (toremove);
7350 }
7351
7352 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7353    for hash_map::traverse.  */
7354
7355 bool
7356 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7357 {
7358   free (value);
7359   return true;
7360 }
7361
7362 /* Frees data allocated by the optimization of a single loop.  */
7363
7364 static void
7365 free_loop_data (struct ivopts_data *data)
7366 {
7367   unsigned i, j;
7368   bitmap_iterator bi;
7369   tree obj;
7370
7371   if (data->niters)
7372     {
7373       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7374       delete data->niters;
7375       data->niters = NULL;
7376     }
7377
7378   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7379     {
7380       struct version_info *info;
7381
7382       info = ver_info (data, i);
7383       info->iv = NULL;
7384       info->has_nonlin_use = false;
7385       info->preserve_biv = false;
7386       info->inv_id = 0;
7387     }
7388   bitmap_clear (data->relevant);
7389   bitmap_clear (data->important_candidates);
7390
7391   for (i = 0; i < n_iv_uses (data); i++)
7392     {
7393       struct iv_use *use = iv_use (data, i);
7394       struct iv_use *pre = use, *sub = use->next;
7395
7396       while (sub)
7397         {
7398           gcc_assert (sub->related_cands == NULL);
7399           gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7400
7401           pre = sub;
7402           sub = sub->next;
7403           free (pre);
7404         }
7405
7406       BITMAP_FREE (use->related_cands);
7407       for (j = 0; j < use->n_map_members; j++)
7408         if (use->cost_map[j].depends_on)
7409           BITMAP_FREE (use->cost_map[j].depends_on);
7410       free (use->cost_map);
7411       free (use);
7412     }
7413   data->iv_uses.truncate (0);
7414
7415   for (i = 0; i < n_iv_cands (data); i++)
7416     {
7417       struct iv_cand *cand = iv_cand (data, i);
7418
7419       if (cand->depends_on)
7420         BITMAP_FREE (cand->depends_on);
7421       free (cand);
7422     }
7423   data->iv_candidates.truncate (0);
7424
7425   if (data->version_info_size < num_ssa_names)
7426     {
7427       data->version_info_size = 2 * num_ssa_names;
7428       free (data->version_info);
7429       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7430     }
7431
7432   data->max_inv_id = 0;
7433
7434   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7435     SET_DECL_RTL (obj, NULL_RTX);
7436
7437   decl_rtl_to_reset.truncate (0);
7438
7439   data->inv_expr_tab->empty ();
7440   data->inv_expr_id = 0;
7441 }
7442
7443 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7444    loop tree.  */
7445
7446 static void
7447 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7448 {
7449   free_loop_data (data);
7450   free (data->version_info);
7451   BITMAP_FREE (data->relevant);
7452   BITMAP_FREE (data->important_candidates);
7453
7454   decl_rtl_to_reset.release ();
7455   data->iv_uses.release ();
7456   data->iv_candidates.release ();
7457   delete data->inv_expr_tab;
7458   data->inv_expr_tab = NULL;
7459   free_affine_expand_cache (&data->name_expansion_cache);
7460   obstack_free (&data->iv_obstack, NULL);
7461 }
7462
7463 /* Returns true if the loop body BODY includes any function calls.  */
7464
7465 static bool
7466 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7467 {
7468   gimple_stmt_iterator gsi;
7469   unsigned i;
7470
7471   for (i = 0; i < num_nodes; i++)
7472     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7473       {
7474         gimple *stmt = gsi_stmt (gsi);
7475         if (is_gimple_call (stmt)
7476             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7477           return true;
7478       }
7479   return false;
7480 }
7481
7482 /* Optimizes the LOOP.  Returns true if anything changed.  */
7483
7484 static bool
7485 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7486 {
7487   bool changed = false;
7488   struct iv_ca *iv_ca;
7489   edge exit = single_dom_exit (loop);
7490   basic_block *body;
7491
7492   gcc_assert (!data->niters);
7493   data->current_loop = loop;
7494   data->loop_loc = find_loop_location (loop);
7495   data->speed = optimize_loop_for_speed_p (loop);
7496
7497   if (dump_file && (dump_flags & TDF_DETAILS))
7498     {
7499       fprintf (dump_file, "Processing loop %d", loop->num);
7500       if (data->loop_loc != UNKNOWN_LOCATION)
7501         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7502                  LOCATION_LINE (data->loop_loc));
7503       fprintf (dump_file, "\n");
7504
7505       if (exit)
7506         {
7507           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7508                    exit->src->index, exit->dest->index);
7509           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7510           fprintf (dump_file, "\n");
7511         }
7512
7513       fprintf (dump_file, "\n");
7514     }
7515
7516   body = get_loop_body (loop);
7517   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7518   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7519   free (body);
7520
7521   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7522
7523   /* For each ssa name determines whether it behaves as an induction variable
7524      in some loop.  */
7525   if (!find_induction_variables (data))
7526     goto finish;
7527
7528   /* Finds interesting uses (item 1).  */
7529   find_interesting_uses (data);
7530   group_address_uses (data);
7531   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7532     goto finish;
7533
7534   /* Finds candidates for the induction variables (item 2).  */
7535   find_iv_candidates (data);
7536
7537   /* Calculates the costs (item 3, part 1).  */
7538   determine_iv_costs (data);
7539   determine_use_iv_costs (data);
7540   determine_set_costs (data);
7541
7542   /* Find the optimal set of induction variables (item 3, part 2).  */
7543   iv_ca = find_optimal_iv_set (data);
7544   if (!iv_ca)
7545     goto finish;
7546   changed = true;
7547
7548   /* Create the new induction variables (item 4, part 1).  */
7549   create_new_ivs (data, iv_ca);
7550   iv_ca_free (&iv_ca);
7551
7552   /* Rewrite the uses (item 4, part 2).  */
7553   rewrite_uses (data);
7554
7555   /* Remove the ivs that are unused after rewriting.  */
7556   remove_unused_ivs (data);
7557
7558   /* We have changed the structure of induction variables; it might happen
7559      that definitions in the scev database refer to some of them that were
7560      eliminated.  */
7561   scev_reset ();
7562
7563 finish:
7564   free_loop_data (data);
7565
7566   return changed;
7567 }
7568
7569 /* Main entry point.  Optimizes induction variables in loops.  */
7570
7571 void
7572 tree_ssa_iv_optimize (void)
7573 {
7574   struct loop *loop;
7575   struct ivopts_data data;
7576
7577   tree_ssa_iv_optimize_init (&data);
7578
7579   /* Optimize the loops starting with the innermost ones.  */
7580   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7581     {
7582       if (dump_file && (dump_flags & TDF_DETAILS))
7583         flow_loop_dump (loop, dump_file, NULL, 1);
7584
7585       tree_ssa_iv_optimize_loop (&data, loop);
7586     }
7587
7588   tree_ssa_iv_optimize_finalize (&data);
7589 }