gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "stor-layout.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "gimple-pretty-print.h"
  73 #include "hash-map.h"
  74 #include "hash-table.h"
  75 #include "tree-ssa-alias.h"
  76 #include "internal-fn.h"
  77 #include "tree-eh.h"
  78 #include "gimple-expr.h"
  79 #include "is-a.h"
  80 #include "gimple.h"
  81 #include "gimplify.h"
  82 #include "gimple-iterator.h"
  83 #include "gimplify-me.h"
  84 #include "gimple-ssa.h"
  85 #include "cgraph.h"
  86 #include "tree-cfg.h"
  87 #include "tree-phinodes.h"
  88 #include "ssa-iterators.h"
  89 #include "stringpool.h"
  90 #include "tree-ssanames.h"
  91 #include "tree-ssa-loop-ivopts.h"
  92 #include "tree-ssa-loop-manip.h"
  93 #include "tree-ssa-loop-niter.h"
  94 #include "tree-ssa-loop.h"
  95 #include "expr.h"
  96 #include "tree-dfa.h"
  97 #include "tree-ssa.h"
  98 #include "cfgloop.h"
  99 #include "tree-pass.h"
 100 #include "insn-config.h"
 101 #include "tree-chrec.h"
 102 #include "tree-scalar-evolution.h"
 103 #include "cfgloop.h"
 104 #include "params.h"
 105 #include "langhooks.h"
 106 #include "tree-affine.h"
 107 #include "target.h"
 108 #include "tree-inline.h"
 109 #include "tree-ssa-propagate.h"
 110 #include "expmed.h"
 111 #include "tree-ssa-address.h"
 112 #include "builtins.h"
 113
 114 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 115    cost of different addressing modes.  This should be moved to a TBD
 116    interface between the GIMPLE and RTL worlds.  */
 117 #include "expr.h"
 118 #include "recog.h"
 119
 120 /* The infinite cost.  */
 121 #define INFTY 10000000
 122
 123 #define AVG_LOOP_NITER(LOOP) 5
 124
 125 /* Returns the expected number of loop iterations for LOOP.
 126    The average trip count is computed from profile data if it
 127    exists. */
 128
 129 static inline HOST_WIDE_INT
 130 avg_loop_niter (struct loop *loop)
 131 {
 132   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 133   if (niter == -1)
 134     return AVG_LOOP_NITER (loop);
 135
 136   return niter;
 137 }
 138
 139 /* Representation of the induction variable.  */
 140 struct iv
 141 {
 142   tree base;            /* Initial value of the iv.  */
 143   tree base_object;     /* A memory object to that the induction variable points.  */
 144   tree step;            /* Step of the iv (constant only).  */
 145   tree ssa_name;        /* The ssa name with the value.  */
 146   bool biv_p;           /* Is it a biv?  */
 147   bool have_use_for;    /* Do we already have a use for it?  */
 148   unsigned use_id;      /* The identifier in the use if it is the case.  */
 149 };
 150
 151 /* Per-ssa version information (induction variable descriptions, etc.).  */
 152 struct version_info
 153 {
 154   tree name;            /* The ssa name.  */
 155   struct iv *iv;        /* Induction variable description.  */
 156   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 157                            an expression that is not an induction variable.  */
 158   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 159   unsigned inv_id;      /* Id of an invariant.  */
 160 };
 161
 162 /* Types of uses.  */
 163 enum use_type
 164 {
 165   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 166   USE_ADDRESS,          /* Use in an address.  */
 167   USE_COMPARE           /* Use is a compare.  */
 168 };
 169
 170 /* Cost of a computation.  */
 171 typedef struct
 172 {
 173   int cost;             /* The runtime cost.  */
 174   unsigned complexity;  /* The estimate of the complexity of the code for
 175                            the computation (in no concrete units --
 176                            complexity field should be larger for more
 177                            complex expressions and addressing modes).  */
 178 } comp_cost;
 179
 180 static const comp_cost no_cost = {0, 0};
 181 static const comp_cost infinite_cost = {INFTY, INFTY};
 182
 183 /* The candidate - cost pair.  */
 184 struct cost_pair
 185 {
 186   struct iv_cand *cand; /* The candidate.  */
 187   comp_cost cost;       /* The cost.  */
 188   bitmap depends_on;    /* The list of invariants that have to be
 189                            preserved.  */
 190   tree value;           /* For final value elimination, the expression for
 191                            the final value of the iv.  For iv elimination,
 192                            the new bound to compare with.  */
 193   enum tree_code comp;  /* For iv elimination, the comparison.  */
 194   int inv_expr_id;      /* Loop invariant expression id.  */
 195 };
 196
 197 /* Use.  */
 198 struct iv_use
 199 {
 200   unsigned id;          /* The id of the use.  */
 201   enum use_type type;   /* Type of the use.  */
 202   struct iv *iv;        /* The induction variable it is based on.  */
 203   gimple stmt;          /* Statement in that it occurs.  */
 204   tree *op_p;           /* The place where it occurs.  */
 205   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 206                            important ones.  */
 207
 208   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 209   struct cost_pair *cost_map;
 210                         /* The costs wrto the iv candidates.  */
 211
 212   struct iv_cand *selected;
 213                         /* The selected candidate.  */
 214 };
 215
 216 /* The position where the iv is computed.  */
 217 enum iv_position
 218 {
 219   IP_NORMAL,            /* At the end, just before the exit condition.  */
 220   IP_END,               /* At the end of the latch block.  */
 221   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 222   IP_AFTER_USE,         /* Immediately after a specific use.  */
 223   IP_ORIGINAL           /* The original biv.  */
 224 };
 225
 226 /* The induction variable candidate.  */
 227 struct iv_cand
 228 {
 229   unsigned id;          /* The number of the candidate.  */
 230   bool important;       /* Whether this is an "important" candidate, i.e. such
 231                            that it should be considered by all uses.  */
 232   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 233   gimple incremented_at;/* For original biv, the statement where it is
 234                            incremented.  */
 235   tree var_before;      /* The variable used for it before increment.  */
 236   tree var_after;       /* The variable used for it after increment.  */
 237   struct iv *iv;        /* The value of the candidate.  NULL for
 238                            "pseudocandidate" used to indicate the possibility
 239                            to replace the final value of an iv by direct
 240                            computation of the value.  */
 241   unsigned cost;        /* Cost of the candidate.  */
 242   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 243   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 244                               where it is incremented.  */
 245   bitmap depends_on;    /* The list of invariants that are used in step of the
 246                            biv.  */
 247 };
 248
 249 /* Loop invariant expression hashtable entry.  */
 250 struct iv_inv_expr_ent
 251 {
 252   tree expr;
 253   int id;
 254   hashval_t hash;
 255 };
 256
 257 /* The data used by the induction variable optimizations.  */
 258
 259 typedef struct iv_use *iv_use_p;
 260
 261 typedef struct iv_cand *iv_cand_p;
 262
 263 /* Hashtable helpers.  */
 264
 265 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 266 {
 267   typedef iv_inv_expr_ent value_type;
 268   typedef iv_inv_expr_ent compare_type;
 269   static inline hashval_t hash (const value_type *);
 270   static inline bool equal (const value_type *, const compare_type *);
 271 };
 272
 273 /* Hash function for loop invariant expressions.  */
 274
 275 inline hashval_t
 276 iv_inv_expr_hasher::hash (const value_type *expr)
 277 {
 278   return expr->hash;
 279 }
 280
 281 /* Hash table equality function for expressions.  */
 282
 283 inline bool
 284 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 285 {
 286   return expr1->hash == expr2->hash
 287          && operand_equal_p (expr1->expr, expr2->expr, 0);
 288 }
 289
 290 struct ivopts_data
 291 {
 292   /* The currently optimized loop.  */
 293   struct loop *current_loop;
 294
 295   /* Numbers of iterations for all exits of the current loop.  */
 296   hash_map<edge, tree_niter_desc *> *niters;
 297
 298   /* Number of registers used in it.  */
 299   unsigned regs_used;
 300
 301   /* The size of version_info array allocated.  */
 302   unsigned version_info_size;
 303
 304   /* The array of information for the ssa names.  */
 305   struct version_info *version_info;
 306
 307   /* The hashtable of loop invariant expressions created
 308      by ivopt.  */
 309   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 310
 311   /* Loop invariant expression id.  */
 312   int inv_expr_id;
 313
 314   /* The bitmap of indices in version_info whose value was changed.  */
 315   bitmap relevant;
 316
 317   /* The uses of induction variables.  */
 318   vec<iv_use_p> iv_uses;
 319
 320   /* The candidates.  */
 321   vec<iv_cand_p> iv_candidates;
 322
 323   /* A bitmap of important candidates.  */
 324   bitmap important_candidates;
 325
 326   /* Cache used by tree_to_aff_combination_expand.  */
 327   hash_map<tree, name_expansion *> *name_expansion_cache;
 328
 329   /* The maximum invariant id.  */
 330   unsigned max_inv_id;
 331
 332   /* Whether to consider just related and important candidates when replacing a
 333      use.  */
 334   bool consider_all_candidates;
 335
 336   /* Are we optimizing for speed?  */
 337   bool speed;
 338
 339   /* Whether the loop body includes any function calls.  */
 340   bool body_includes_call;
 341
 342   /* Whether the loop body can only be exited via single exit.  */
 343   bool loop_single_exit_p;
 344 };
 345
 346 /* An assignment of iv candidates to uses.  */
 347
 348 struct iv_ca
 349 {
 350   /* The number of uses covered by the assignment.  */
 351   unsigned upto;
 352
 353   /* Number of uses that cannot be expressed by the candidates in the set.  */
 354   unsigned bad_uses;
 355
 356   /* Candidate assigned to a use, together with the related costs.  */
 357   struct cost_pair **cand_for_use;
 358
 359   /* Number of times each candidate is used.  */
 360   unsigned *n_cand_uses;
 361
 362   /* The candidates used.  */
 363   bitmap cands;
 364
 365   /* The number of candidates in the set.  */
 366   unsigned n_cands;
 367
 368   /* Total number of registers needed.  */
 369   unsigned n_regs;
 370
 371   /* Total cost of expressing uses.  */
 372   comp_cost cand_use_cost;
 373
 374   /* Total cost of candidates.  */
 375   unsigned cand_cost;
 376
 377   /* Number of times each invariant is used.  */
 378   unsigned *n_invariant_uses;
 379
 380   /* The array holding the number of uses of each loop
 381      invariant expressions created by ivopt.  */
 382   unsigned *used_inv_expr;
 383
 384   /* The number of created loop invariants.  */
 385   unsigned num_used_inv_expr;
 386
 387   /* Total cost of the assignment.  */
 388   comp_cost cost;
 389 };
 390
 391 /* Difference of two iv candidate assignments.  */
 392
 393 struct iv_ca_delta
 394 {
 395   /* Changed use.  */
 396   struct iv_use *use;
 397
 398   /* An old assignment (for rollback purposes).  */
 399   struct cost_pair *old_cp;
 400
 401   /* A new assignment.  */
 402   struct cost_pair *new_cp;
 403
 404   /* Next change in the list.  */
 405   struct iv_ca_delta *next_change;
 406 };
 407
 408 /* Bound on number of candidates below that all candidates are considered.  */
 409
 410 #define CONSIDER_ALL_CANDIDATES_BOUND \
 411   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 412
 413 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 414    optimizing such a loop would help, and it would take ages).  */
 415
 416 #define MAX_CONSIDERED_USES \
 417   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 418
 419 /* If there are at most this number of ivs in the set, try removing unnecessary
 420    ivs from the set always.  */
 421
 422 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 423   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 424
 425 /* The list of trees for that the decl_rtl field must be reset is stored
 426    here.  */
 427
 428 static vec<tree> decl_rtl_to_reset;
 429
 430 static comp_cost force_expr_to_var_cost (tree, bool);
 431
 432 /* Number of uses recorded in DATA.  */
 433
 434 static inline unsigned
 435 n_iv_uses (struct ivopts_data *data)
 436 {
 437   return data->iv_uses.length ();
 438 }
 439
 440 /* Ith use recorded in DATA.  */
 441
 442 static inline struct iv_use *
 443 iv_use (struct ivopts_data *data, unsigned i)
 444 {
 445   return data->iv_uses[i];
 446 }
 447
 448 /* Number of candidates recorded in DATA.  */
 449
 450 static inline unsigned
 451 n_iv_cands (struct ivopts_data *data)
 452 {
 453   return data->iv_candidates.length ();
 454 }
 455
 456 /* Ith candidate recorded in DATA.  */
 457
 458 static inline struct iv_cand *
 459 iv_cand (struct ivopts_data *data, unsigned i)
 460 {
 461   return data->iv_candidates[i];
 462 }
 463
 464 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 465
 466 edge
 467 single_dom_exit (struct loop *loop)
 468 {
 469   edge exit = single_exit (loop);
 470
 471   if (!exit)
 472     return NULL;
 473
 474   if (!just_once_each_iteration_p (loop, exit->src))
 475     return NULL;
 476
 477   return exit;
 478 }
 479
 480 /* Dumps information about the induction variable IV to FILE.  */
 481
 482 void
 483 dump_iv (FILE *file, struct iv *iv)
 484 {
 485   if (iv->ssa_name)
 486     {
 487       fprintf (file, "ssa name ");
 488       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 489       fprintf (file, "\n");
 490     }
 491
 492   fprintf (file, "  type ");
 493   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 494   fprintf (file, "\n");
 495
 496   if (iv->step)
 497     {
 498       fprintf (file, "  base ");
 499       print_generic_expr (file, iv->base, TDF_SLIM);
 500       fprintf (file, "\n");
 501
 502       fprintf (file, "  step ");
 503       print_generic_expr (file, iv->step, TDF_SLIM);
 504       fprintf (file, "\n");
 505     }
 506   else
 507     {
 508       fprintf (file, "  invariant ");
 509       print_generic_expr (file, iv->base, TDF_SLIM);
 510       fprintf (file, "\n");
 511     }
 512
 513   if (iv->base_object)
 514     {
 515       fprintf (file, "  base object ");
 516       print_generic_expr (file, iv->base_object, TDF_SLIM);
 517       fprintf (file, "\n");
 518     }
 519
 520   if (iv->biv_p)
 521     fprintf (file, "  is a biv\n");
 522 }
 523
 524 /* Dumps information about the USE to FILE.  */
 525
 526 void
 527 dump_use (FILE *file, struct iv_use *use)
 528 {
 529   fprintf (file, "use %d\n", use->id);
 530
 531   switch (use->type)
 532     {
 533     case USE_NONLINEAR_EXPR:
 534       fprintf (file, "  generic\n");
 535       break;
 536
 537     case USE_ADDRESS:
 538       fprintf (file, "  address\n");
 539       break;
 540
 541     case USE_COMPARE:
 542       fprintf (file, "  compare\n");
 543       break;
 544
 545     default:
 546       gcc_unreachable ();
 547     }
 548
 549   fprintf (file, "  in statement ");
 550   print_gimple_stmt (file, use->stmt, 0, 0);
 551   fprintf (file, "\n");
 552
 553   fprintf (file, "  at position ");
 554   if (use->op_p)
 555     print_generic_expr (file, *use->op_p, TDF_SLIM);
 556   fprintf (file, "\n");
 557
 558   dump_iv (file, use->iv);
 559
 560   if (use->related_cands)
 561     {
 562       fprintf (file, "  related candidates ");
 563       dump_bitmap (file, use->related_cands);
 564     }
 565 }
 566
 567 /* Dumps information about the uses to FILE.  */
 568
 569 void
 570 dump_uses (FILE *file, struct ivopts_data *data)
 571 {
 572   unsigned i;
 573   struct iv_use *use;
 574
 575   for (i = 0; i < n_iv_uses (data); i++)
 576     {
 577       use = iv_use (data, i);
 578
 579       dump_use (file, use);
 580       fprintf (file, "\n");
 581     }
 582 }
 583
 584 /* Dumps information about induction variable candidate CAND to FILE.  */
 585
 586 void
 587 dump_cand (FILE *file, struct iv_cand *cand)
 588 {
 589   struct iv *iv = cand->iv;
 590
 591   fprintf (file, "candidate %d%s\n",
 592            cand->id, cand->important ? " (important)" : "");
 593
 594   if (cand->depends_on)
 595     {
 596       fprintf (file, "  depends on ");
 597       dump_bitmap (file, cand->depends_on);
 598     }
 599
 600   if (!iv)
 601     {
 602       fprintf (file, "  final value replacement\n");
 603       return;
 604     }
 605
 606   if (cand->var_before)
 607     {
 608       fprintf (file, "  var_before ");
 609       print_generic_expr (file, cand->var_before, TDF_SLIM);
 610       fprintf (file, "\n");
 611     }
 612   if (cand->var_after)
 613     {
 614       fprintf (file, "  var_after ");
 615       print_generic_expr (file, cand->var_after, TDF_SLIM);
 616       fprintf (file, "\n");
 617     }
 618
 619   switch (cand->pos)
 620     {
 621     case IP_NORMAL:
 622       fprintf (file, "  incremented before exit test\n");
 623       break;
 624
 625     case IP_BEFORE_USE:
 626       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 627       break;
 628
 629     case IP_AFTER_USE:
 630       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 631       break;
 632
 633     case IP_END:
 634       fprintf (file, "  incremented at end\n");
 635       break;
 636
 637     case IP_ORIGINAL:
 638       fprintf (file, "  original biv\n");
 639       break;
 640     }
 641
 642   dump_iv (file, iv);
 643 }
 644
 645 /* Returns the info for ssa version VER.  */
 646
 647 static inline struct version_info *
 648 ver_info (struct ivopts_data *data, unsigned ver)
 649 {
 650   return data->version_info + ver;
 651 }
 652
 653 /* Returns the info for ssa name NAME.  */
 654
 655 static inline struct version_info *
 656 name_info (struct ivopts_data *data, tree name)
 657 {
 658   return ver_info (data, SSA_NAME_VERSION (name));
 659 }
 660
 661 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 662    emitted in LOOP.  */
 663
 664 static bool
 665 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 666 {
 667   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 668
 669   gcc_assert (bb);
 670
 671   if (sbb == loop->latch)
 672     return true;
 673
 674   if (sbb != bb)
 675     return false;
 676
 677   return stmt == last_stmt (bb);
 678 }
 679
 680 /* Returns true if STMT if after the place where the original induction
 681    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 682    if the positions are identical.  */
 683
 684 static bool
 685 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 686 {
 687   basic_block cand_bb = gimple_bb (cand->incremented_at);
 688   basic_block stmt_bb = gimple_bb (stmt);
 689
 690   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 691     return false;
 692
 693   if (stmt_bb != cand_bb)
 694     return true;
 695
 696   if (true_if_equal
 697       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 698     return true;
 699   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 700 }
 701
 702 /* Returns true if STMT if after the place where the induction variable
 703    CAND is incremented in LOOP.  */
 704
 705 static bool
 706 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 707 {
 708   switch (cand->pos)
 709     {
 710     case IP_END:
 711       return false;
 712
 713     case IP_NORMAL:
 714       return stmt_after_ip_normal_pos (loop, stmt);
 715
 716     case IP_ORIGINAL:
 717     case IP_AFTER_USE:
 718       return stmt_after_inc_pos (cand, stmt, false);
 719
 720     case IP_BEFORE_USE:
 721       return stmt_after_inc_pos (cand, stmt, true);
 722
 723     default:
 724       gcc_unreachable ();
 725     }
 726 }
 727
 728 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 729
 730 static bool
 731 abnormal_ssa_name_p (tree exp)
 732 {
 733   if (!exp)
 734     return false;
 735
 736   if (TREE_CODE (exp) != SSA_NAME)
 737     return false;
 738
 739   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 740 }
 741
 742 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 743    abnormal phi node.  Callback for for_each_index.  */
 744
 745 static bool
 746 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 747                                   void *data ATTRIBUTE_UNUSED)
 748 {
 749   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 750     {
 751       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 752         return false;
 753       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 754         return false;
 755     }
 756
 757   return !abnormal_ssa_name_p (*index);
 758 }
 759
 760 /* Returns true if EXPR contains a ssa name that occurs in an
 761    abnormal phi node.  */
 762
 763 bool
 764 contains_abnormal_ssa_name_p (tree expr)
 765 {
 766   enum tree_code code;
 767   enum tree_code_class codeclass;
 768
 769   if (!expr)
 770     return false;
 771
 772   code = TREE_CODE (expr);
 773   codeclass = TREE_CODE_CLASS (code);
 774
 775   if (code == SSA_NAME)
 776     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 777
 778   if (code == INTEGER_CST
 779       || is_gimple_min_invariant (expr))
 780     return false;
 781
 782   if (code == ADDR_EXPR)
 783     return !for_each_index (&TREE_OPERAND (expr, 0),
 784                             idx_contains_abnormal_ssa_name_p,
 785                             NULL);
 786
 787   if (code == COND_EXPR)
 788     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 789       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 790       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 791
 792   switch (codeclass)
 793     {
 794     case tcc_binary:
 795     case tcc_comparison:
 796       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 797         return true;
 798
 799       /* Fallthru.  */
 800     case tcc_unary:
 801       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 802         return true;
 803
 804       break;
 805
 806     default:
 807       gcc_unreachable ();
 808     }
 809
 810   return false;
 811 }
 812
 813 /*  Returns the structure describing number of iterations determined from
 814     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 815
 816 static struct tree_niter_desc *
 817 niter_for_exit (struct ivopts_data *data, edge exit)
 818 {
 819   struct tree_niter_desc *desc;
 820   tree_niter_desc **slot;
 821
 822   if (!data->niters)
 823     {
 824       data->niters = new hash_map<edge, tree_niter_desc *>;
 825       slot = NULL;
 826     }
 827   else
 828     slot = data->niters->get (exit);
 829
 830   if (!slot)
 831     {
 832       /* Try to determine number of iterations.  We cannot safely work with ssa
 833          names that appear in phi nodes on abnormal edges, so that we do not
 834          create overlapping life ranges for them (PR 27283).  */
 835       desc = XNEW (struct tree_niter_desc);
 836       if (!number_of_iterations_exit (data->current_loop,
 837                                       exit, desc, true)
 838           || contains_abnormal_ssa_name_p (desc->niter))
 839         {
 840           XDELETE (desc);
 841           desc = NULL;
 842         }
 843       data->niters->put (exit, desc);
 844     }
 845   else
 846     desc = *slot;
 847
 848   return desc;
 849 }
 850
 851 /* Returns the structure describing number of iterations determined from
 852    single dominating exit of DATA->current_loop, or NULL if something
 853    goes wrong.  */
 854
 855 static struct tree_niter_desc *
 856 niter_for_single_dom_exit (struct ivopts_data *data)
 857 {
 858   edge exit = single_dom_exit (data->current_loop);
 859
 860   if (!exit)
 861     return NULL;
 862
 863   return niter_for_exit (data, exit);
 864 }
 865
 866 /* Initializes data structures used by the iv optimization pass, stored
 867    in DATA.  */
 868
 869 static void
 870 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 871 {
 872   data->version_info_size = 2 * num_ssa_names;
 873   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 874   data->relevant = BITMAP_ALLOC (NULL);
 875   data->important_candidates = BITMAP_ALLOC (NULL);
 876   data->max_inv_id = 0;
 877   data->niters = NULL;
 878   data->iv_uses.create (20);
 879   data->iv_candidates.create (20);
 880   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 881   data->inv_expr_id = 0;
 882   data->name_expansion_cache = NULL;
 883   decl_rtl_to_reset.create (20);
 884 }
 885
 886 /* Returns a memory object to that EXPR points.  In case we are able to
 887    determine that it does not point to any such object, NULL is returned.  */
 888
 889 static tree
 890 determine_base_object (tree expr)
 891 {
 892   enum tree_code code = TREE_CODE (expr);
 893   tree base, obj;
 894
 895   /* If this is a pointer casted to any type, we need to determine
 896      the base object for the pointer; so handle conversions before
 897      throwing away non-pointer expressions.  */
 898   if (CONVERT_EXPR_P (expr))
 899     return determine_base_object (TREE_OPERAND (expr, 0));
 900
 901   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 902     return NULL_TREE;
 903
 904   switch (code)
 905     {
 906     case INTEGER_CST:
 907       return NULL_TREE;
 908
 909     case ADDR_EXPR:
 910       obj = TREE_OPERAND (expr, 0);
 911       base = get_base_address (obj);
 912
 913       if (!base)
 914         return expr;
 915
 916       if (TREE_CODE (base) == MEM_REF)
 917         return determine_base_object (TREE_OPERAND (base, 0));
 918
 919       return fold_convert (ptr_type_node,
 920                            build_fold_addr_expr (base));
 921
 922     case POINTER_PLUS_EXPR:
 923       return determine_base_object (TREE_OPERAND (expr, 0));
 924
 925     case PLUS_EXPR:
 926     case MINUS_EXPR:
 927       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 928       gcc_unreachable ();
 929
 930     default:
 931       return fold_convert (ptr_type_node, expr);
 932     }
 933 }
 934
 935 /* Return true if address expression with non-DECL_P operand appears
 936    in EXPR.  */
 937
 938 static bool
 939 contain_complex_addr_expr (tree expr)
 940 {
 941   bool res = false;
 942
 943   STRIP_NOPS (expr);
 944   switch (TREE_CODE (expr))
 945     {
 946     case POINTER_PLUS_EXPR:
 947     case PLUS_EXPR:
 948     case MINUS_EXPR:
 949       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 950       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 951       break;
 952
 953     case ADDR_EXPR:
 954       return (!DECL_P (TREE_OPERAND (expr, 0)));
 955
 956     default:
 957       return false;
 958     }
 959
 960   return res;
 961 }
 962
 963 /* Allocates an induction variable with given initial value BASE and step STEP
 964    for loop LOOP.  */
 965
 966 static struct iv *
 967 alloc_iv (tree base, tree step)
 968 {
 969   tree expr = base;
 970   struct iv *iv = XCNEW (struct iv);
 971   gcc_assert (step != NULL_TREE);
 972
 973   /* Lower address expression in base except ones with DECL_P as operand.
 974      By doing this:
 975        1) More accurate cost can be computed for address expressions;
 976        2) Duplicate candidates won't be created for bases in different
 977           forms, like &a[0] and &a.  */
 978   STRIP_NOPS (expr);
 979   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
 980       || contain_complex_addr_expr (expr))
 981     {
 982       aff_tree comb;
 983       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
 984       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
 985     }
 986
 987   iv->base = base;
 988   iv->base_object = determine_base_object (base);
 989   iv->step = step;
 990   iv->biv_p = false;
 991   iv->have_use_for = false;
 992   iv->use_id = 0;
 993   iv->ssa_name = NULL_TREE;
 994
 995   return iv;
 996 }
 997
 998 /* Sets STEP and BASE for induction variable IV.  */
 999
1000 static void
1001 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1002 {
1003   struct version_info *info = name_info (data, iv);
1004
1005   gcc_assert (!info->iv);
1006
1007   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1008   info->iv = alloc_iv (base, step);
1009   info->iv->ssa_name = iv;
1010 }
1011
1012 /* Finds induction variable declaration for VAR.  */
1013
1014 static struct iv *
1015 get_iv (struct ivopts_data *data, tree var)
1016 {
1017   basic_block bb;
1018   tree type = TREE_TYPE (var);
1019
1020   if (!POINTER_TYPE_P (type)
1021       && !INTEGRAL_TYPE_P (type))
1022     return NULL;
1023
1024   if (!name_info (data, var)->iv)
1025     {
1026       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1027
1028       if (!bb
1029           || !flow_bb_inside_loop_p (data->current_loop, bb))
1030         set_iv (data, var, var, build_int_cst (type, 0));
1031     }
1032
1033   return name_info (data, var)->iv;
1034 }
1035
1036 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1037    not define a simple affine biv with nonzero step.  */
1038
1039 static tree
1040 determine_biv_step (gimple phi)
1041 {
1042   struct loop *loop = gimple_bb (phi)->loop_father;
1043   tree name = PHI_RESULT (phi);
1044   affine_iv iv;
1045
1046   if (virtual_operand_p (name))
1047     return NULL_TREE;
1048
1049   if (!simple_iv (loop, loop, name, &iv, true))
1050     return NULL_TREE;
1051
1052   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1053 }
1054
1055 /* Finds basic ivs.  */
1056
1057 static bool
1058 find_bivs (struct ivopts_data *data)
1059 {
1060   gimple phi;
1061   tree step, type, base;
1062   bool found = false;
1063   struct loop *loop = data->current_loop;
1064   gimple_stmt_iterator psi;
1065
1066   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1067     {
1068       phi = gsi_stmt (psi);
1069
1070       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1071         continue;
1072
1073       step = determine_biv_step (phi);
1074       if (!step)
1075         continue;
1076
1077       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1078       base = expand_simple_operations (base);
1079       if (contains_abnormal_ssa_name_p (base)
1080           || contains_abnormal_ssa_name_p (step))
1081         continue;
1082
1083       type = TREE_TYPE (PHI_RESULT (phi));
1084       base = fold_convert (type, base);
1085       if (step)
1086         {
1087           if (POINTER_TYPE_P (type))
1088             step = convert_to_ptrofftype (step);
1089           else
1090             step = fold_convert (type, step);
1091         }
1092
1093       set_iv (data, PHI_RESULT (phi), base, step);
1094       found = true;
1095     }
1096
1097   return found;
1098 }
1099
1100 /* Marks basic ivs.  */
1101
1102 static void
1103 mark_bivs (struct ivopts_data *data)
1104 {
1105   gimple phi, def;
1106   tree var;
1107   struct iv *iv, *incr_iv;
1108   struct loop *loop = data->current_loop;
1109   basic_block incr_bb;
1110   gimple_stmt_iterator psi;
1111
1112   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1113     {
1114       phi = gsi_stmt (psi);
1115
1116       iv = get_iv (data, PHI_RESULT (phi));
1117       if (!iv)
1118         continue;
1119
1120       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1121       def = SSA_NAME_DEF_STMT (var);
1122       /* Don't mark iv peeled from other one as biv.  */
1123       if (def
1124           && gimple_code (def) == GIMPLE_PHI
1125           && gimple_bb (def) == loop->header)
1126         continue;
1127
1128       incr_iv = get_iv (data, var);
1129       if (!incr_iv)
1130         continue;
1131
1132       /* If the increment is in the subloop, ignore it.  */
1133       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1134       if (incr_bb->loop_father != data->current_loop
1135           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1136         continue;
1137
1138       iv->biv_p = true;
1139       incr_iv->biv_p = true;
1140     }
1141 }
1142
1143 /* Checks whether STMT defines a linear induction variable and stores its
1144    parameters to IV.  */
1145
1146 static bool
1147 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1148 {
1149   tree lhs;
1150   struct loop *loop = data->current_loop;
1151
1152   iv->base = NULL_TREE;
1153   iv->step = NULL_TREE;
1154
1155   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1156     return false;
1157
1158   lhs = gimple_assign_lhs (stmt);
1159   if (TREE_CODE (lhs) != SSA_NAME)
1160     return false;
1161
1162   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1163     return false;
1164   iv->base = expand_simple_operations (iv->base);
1165
1166   if (contains_abnormal_ssa_name_p (iv->base)
1167       || contains_abnormal_ssa_name_p (iv->step))
1168     return false;
1169
1170   /* If STMT could throw, then do not consider STMT as defining a GIV.
1171      While this will suppress optimizations, we can not safely delete this
1172      GIV and associated statements, even if it appears it is not used.  */
1173   if (stmt_could_throw_p (stmt))
1174     return false;
1175
1176   return true;
1177 }
1178
1179 /* Finds general ivs in statement STMT.  */
1180
1181 static void
1182 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1183 {
1184   affine_iv iv;
1185
1186   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1187     return;
1188
1189   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1190 }
1191
1192 /* Finds general ivs in basic block BB.  */
1193
1194 static void
1195 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1196 {
1197   gimple_stmt_iterator bsi;
1198
1199   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1200     find_givs_in_stmt (data, gsi_stmt (bsi));
1201 }
1202
1203 /* Finds general ivs.  */
1204
1205 static void
1206 find_givs (struct ivopts_data *data)
1207 {
1208   struct loop *loop = data->current_loop;
1209   basic_block *body = get_loop_body_in_dom_order (loop);
1210   unsigned i;
1211
1212   for (i = 0; i < loop->num_nodes; i++)
1213     find_givs_in_bb (data, body[i]);
1214   free (body);
1215 }
1216
1217 /* For each ssa name defined in LOOP determines whether it is an induction
1218    variable and if so, its initial value and step.  */
1219
1220 static bool
1221 find_induction_variables (struct ivopts_data *data)
1222 {
1223   unsigned i;
1224   bitmap_iterator bi;
1225
1226   if (!find_bivs (data))
1227     return false;
1228
1229   find_givs (data);
1230   mark_bivs (data);
1231
1232   if (dump_file && (dump_flags & TDF_DETAILS))
1233     {
1234       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1235
1236       if (niter)
1237         {
1238           fprintf (dump_file, "  number of iterations ");
1239           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1240           if (!integer_zerop (niter->may_be_zero))
1241             {
1242               fprintf (dump_file, "; zero if ");
1243               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1244             }
1245           fprintf (dump_file, "\n\n");
1246         };
1247
1248       fprintf (dump_file, "Induction variables:\n\n");
1249
1250       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1251         {
1252           if (ver_info (data, i)->iv)
1253             dump_iv (dump_file, ver_info (data, i)->iv);
1254         }
1255     }
1256
1257   return true;
1258 }
1259
1260 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1261
1262 static struct iv_use *
1263 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1264             gimple stmt, enum use_type use_type)
1265 {
1266   struct iv_use *use = XCNEW (struct iv_use);
1267
1268   use->id = n_iv_uses (data);
1269   use->type = use_type;
1270   use->iv = iv;
1271   use->stmt = stmt;
1272   use->op_p = use_p;
1273   use->related_cands = BITMAP_ALLOC (NULL);
1274
1275   /* To avoid showing ssa name in the dumps, if it was not reset by the
1276      caller.  */
1277   iv->ssa_name = NULL_TREE;
1278
1279   if (dump_file && (dump_flags & TDF_DETAILS))
1280     dump_use (dump_file, use);
1281
1282   data->iv_uses.safe_push (use);
1283
1284   return use;
1285 }
1286
1287 /* Checks whether OP is a loop-level invariant and if so, records it.
1288    NONLINEAR_USE is true if the invariant is used in a way we do not
1289    handle specially.  */
1290
1291 static void
1292 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1293 {
1294   basic_block bb;
1295   struct version_info *info;
1296
1297   if (TREE_CODE (op) != SSA_NAME
1298       || virtual_operand_p (op))
1299     return;
1300
1301   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1302   if (bb
1303       && flow_bb_inside_loop_p (data->current_loop, bb))
1304     return;
1305
1306   info = name_info (data, op);
1307   info->name = op;
1308   info->has_nonlin_use |= nonlinear_use;
1309   if (!info->inv_id)
1310     info->inv_id = ++data->max_inv_id;
1311   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1312 }
1313
1314 /* Checks whether the use OP is interesting and if so, records it.  */
1315
1316 static struct iv_use *
1317 find_interesting_uses_op (struct ivopts_data *data, tree op)
1318 {
1319   struct iv *iv;
1320   struct iv *civ;
1321   gimple stmt;
1322   struct iv_use *use;
1323
1324   if (TREE_CODE (op) != SSA_NAME)
1325     return NULL;
1326
1327   iv = get_iv (data, op);
1328   if (!iv)
1329     return NULL;
1330
1331   if (iv->have_use_for)
1332     {
1333       use = iv_use (data, iv->use_id);
1334
1335       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1336       return use;
1337     }
1338
1339   if (integer_zerop (iv->step))
1340     {
1341       record_invariant (data, op, true);
1342       return NULL;
1343     }
1344   iv->have_use_for = true;
1345
1346   civ = XNEW (struct iv);
1347   *civ = *iv;
1348
1349   stmt = SSA_NAME_DEF_STMT (op);
1350   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1351               || is_gimple_assign (stmt));
1352
1353   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1354   iv->use_id = use->id;
1355
1356   return use;
1357 }
1358
1359 /* Given a condition in statement STMT, checks whether it is a compare
1360    of an induction variable and an invariant.  If this is the case,
1361    CONTROL_VAR is set to location of the iv, BOUND to the location of
1362    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1363    induction variable descriptions, and true is returned.  If this is not
1364    the case, CONTROL_VAR and BOUND are set to the arguments of the
1365    condition and false is returned.  */
1366
1367 static bool
1368 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1369                        tree **control_var, tree **bound,
1370                        struct iv **iv_var, struct iv **iv_bound)
1371 {
1372   /* The objects returned when COND has constant operands.  */
1373   static struct iv const_iv;
1374   static tree zero;
1375   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1376   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1377   bool ret = false;
1378
1379   if (gimple_code (stmt) == GIMPLE_COND)
1380     {
1381       op0 = gimple_cond_lhs_ptr (stmt);
1382       op1 = gimple_cond_rhs_ptr (stmt);
1383     }
1384   else
1385     {
1386       op0 = gimple_assign_rhs1_ptr (stmt);
1387       op1 = gimple_assign_rhs2_ptr (stmt);
1388     }
1389
1390   zero = integer_zero_node;
1391   const_iv.step = integer_zero_node;
1392
1393   if (TREE_CODE (*op0) == SSA_NAME)
1394     iv0 = get_iv (data, *op0);
1395   if (TREE_CODE (*op1) == SSA_NAME)
1396     iv1 = get_iv (data, *op1);
1397
1398   /* Exactly one of the compared values must be an iv, and the other one must
1399      be an invariant.  */
1400   if (!iv0 || !iv1)
1401     goto end;
1402
1403   if (integer_zerop (iv0->step))
1404     {
1405       /* Control variable may be on the other side.  */
1406       tmp_op = op0; op0 = op1; op1 = tmp_op;
1407       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1408     }
1409   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1410
1411 end:
1412   if (control_var)
1413     *control_var = op0;;
1414   if (iv_var)
1415     *iv_var = iv0;;
1416   if (bound)
1417     *bound = op1;
1418   if (iv_bound)
1419     *iv_bound = iv1;
1420
1421   return ret;
1422 }
1423
1424 /* Checks whether the condition in STMT is interesting and if so,
1425    records it.  */
1426
1427 static void
1428 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1429 {
1430   tree *var_p, *bound_p;
1431   struct iv *var_iv, *civ;
1432
1433   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1434     {
1435       find_interesting_uses_op (data, *var_p);
1436       find_interesting_uses_op (data, *bound_p);
1437       return;
1438     }
1439
1440   civ = XNEW (struct iv);
1441   *civ = *var_iv;
1442   record_use (data, NULL, civ, stmt, USE_COMPARE);
1443 }
1444
1445 /* Returns the outermost loop EXPR is obviously invariant in
1446    relative to the loop LOOP, i.e. if all its operands are defined
1447    outside of the returned loop.  Returns NULL if EXPR is not
1448    even obviously invariant in LOOP.  */
1449
1450 struct loop *
1451 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1452 {
1453   basic_block def_bb;
1454   unsigned i, len;
1455
1456   if (is_gimple_min_invariant (expr))
1457     return current_loops->tree_root;
1458
1459   if (TREE_CODE (expr) == SSA_NAME)
1460     {
1461       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1462       if (def_bb)
1463         {
1464           if (flow_bb_inside_loop_p (loop, def_bb))
1465             return NULL;
1466           return superloop_at_depth (loop,
1467                                      loop_depth (def_bb->loop_father) + 1);
1468         }
1469
1470       return current_loops->tree_root;
1471     }
1472
1473   if (!EXPR_P (expr))
1474     return NULL;
1475
1476   unsigned maxdepth = 0;
1477   len = TREE_OPERAND_LENGTH (expr);
1478   for (i = 0; i < len; i++)
1479     {
1480       struct loop *ivloop;
1481       if (!TREE_OPERAND (expr, i))
1482         continue;
1483
1484       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1485       if (!ivloop)
1486         return NULL;
1487       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1488     }
1489
1490   return superloop_at_depth (loop, maxdepth);
1491 }
1492
1493 /* Returns true if expression EXPR is obviously invariant in LOOP,
1494    i.e. if all its operands are defined outside of the LOOP.  LOOP
1495    should not be the function body.  */
1496
1497 bool
1498 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1499 {
1500   basic_block def_bb;
1501   unsigned i, len;
1502
1503   gcc_assert (loop_depth (loop) > 0);
1504
1505   if (is_gimple_min_invariant (expr))
1506     return true;
1507
1508   if (TREE_CODE (expr) == SSA_NAME)
1509     {
1510       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1511       if (def_bb
1512           && flow_bb_inside_loop_p (loop, def_bb))
1513         return false;
1514
1515       return true;
1516     }
1517
1518   if (!EXPR_P (expr))
1519     return false;
1520
1521   len = TREE_OPERAND_LENGTH (expr);
1522   for (i = 0; i < len; i++)
1523     if (TREE_OPERAND (expr, i)
1524         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1525       return false;
1526
1527   return true;
1528 }
1529
1530 /* Cumulates the steps of indices into DATA and replaces their values with the
1531    initial ones.  Returns false when the value of the index cannot be determined.
1532    Callback for for_each_index.  */
1533
1534 struct ifs_ivopts_data
1535 {
1536   struct ivopts_data *ivopts_data;
1537   gimple stmt;
1538   tree step;
1539 };
1540
1541 static bool
1542 idx_find_step (tree base, tree *idx, void *data)
1543 {
1544   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1545   struct iv *iv;
1546   tree step, iv_base, iv_step, lbound, off;
1547   struct loop *loop = dta->ivopts_data->current_loop;
1548
1549   /* If base is a component ref, require that the offset of the reference
1550      be invariant.  */
1551   if (TREE_CODE (base) == COMPONENT_REF)
1552     {
1553       off = component_ref_field_offset (base);
1554       return expr_invariant_in_loop_p (loop, off);
1555     }
1556
1557   /* If base is array, first check whether we will be able to move the
1558      reference out of the loop (in order to take its address in strength
1559      reduction).  In order for this to work we need both lower bound
1560      and step to be loop invariants.  */
1561   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1562     {
1563       /* Moreover, for a range, the size needs to be invariant as well.  */
1564       if (TREE_CODE (base) == ARRAY_RANGE_REF
1565           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1566         return false;
1567
1568       step = array_ref_element_size (base);
1569       lbound = array_ref_low_bound (base);
1570
1571       if (!expr_invariant_in_loop_p (loop, step)
1572           || !expr_invariant_in_loop_p (loop, lbound))
1573         return false;
1574     }
1575
1576   if (TREE_CODE (*idx) != SSA_NAME)
1577     return true;
1578
1579   iv = get_iv (dta->ivopts_data, *idx);
1580   if (!iv)
1581     return false;
1582
1583   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1584           *&x[0], which is not folded and does not trigger the
1585           ARRAY_REF path below.  */
1586   *idx = iv->base;
1587
1588   if (integer_zerop (iv->step))
1589     return true;
1590
1591   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1592     {
1593       step = array_ref_element_size (base);
1594
1595       /* We only handle addresses whose step is an integer constant.  */
1596       if (TREE_CODE (step) != INTEGER_CST)
1597         return false;
1598     }
1599   else
1600     /* The step for pointer arithmetics already is 1 byte.  */
1601     step = size_one_node;
1602
1603   iv_base = iv->base;
1604   iv_step = iv->step;
1605   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1606                             sizetype, &iv_base, &iv_step, dta->stmt,
1607                             false))
1608     {
1609       /* The index might wrap.  */
1610       return false;
1611     }
1612
1613   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1614   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1615
1616   return true;
1617 }
1618
1619 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1620    object is passed to it in DATA.  */
1621
1622 static bool
1623 idx_record_use (tree base, tree *idx,
1624                 void *vdata)
1625 {
1626   struct ivopts_data *data = (struct ivopts_data *) vdata;
1627   find_interesting_uses_op (data, *idx);
1628   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1629     {
1630       find_interesting_uses_op (data, array_ref_element_size (base));
1631       find_interesting_uses_op (data, array_ref_low_bound (base));
1632     }
1633   return true;
1634 }
1635
1636 /* If we can prove that TOP = cst * BOT for some constant cst,
1637    store cst to MUL and return true.  Otherwise return false.
1638    The returned value is always sign-extended, regardless of the
1639    signedness of TOP and BOT.  */
1640
1641 static bool
1642 constant_multiple_of (tree top, tree bot, widest_int *mul)
1643 {
1644   tree mby;
1645   enum tree_code code;
1646   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1647   widest_int res, p0, p1;
1648
1649   STRIP_NOPS (top);
1650   STRIP_NOPS (bot);
1651
1652   if (operand_equal_p (top, bot, 0))
1653     {
1654       *mul = 1;
1655       return true;
1656     }
1657
1658   code = TREE_CODE (top);
1659   switch (code)
1660     {
1661     case MULT_EXPR:
1662       mby = TREE_OPERAND (top, 1);
1663       if (TREE_CODE (mby) != INTEGER_CST)
1664         return false;
1665
1666       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1667         return false;
1668
1669       *mul = wi::sext (res * wi::to_widest (mby), precision);
1670       return true;
1671
1672     case PLUS_EXPR:
1673     case MINUS_EXPR:
1674       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1675           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1676         return false;
1677
1678       if (code == MINUS_EXPR)
1679         p1 = -p1;
1680       *mul = wi::sext (p0 + p1, precision);
1681       return true;
1682
1683     case INTEGER_CST:
1684       if (TREE_CODE (bot) != INTEGER_CST)
1685         return false;
1686
1687       p0 = widest_int::from (top, SIGNED);
1688       p1 = widest_int::from (bot, SIGNED);
1689       if (p1 == 0)
1690         return false;
1691       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1692       return res == 0;
1693
1694     default:
1695       return false;
1696     }
1697 }
1698
1699 /* Return true if memory reference REF with step STEP may be unaligned.  */
1700
1701 static bool
1702 may_be_unaligned_p (tree ref, tree step)
1703 {
1704   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1705      thus they are not misaligned.  */
1706   if (TREE_CODE (ref) == TARGET_MEM_REF)
1707     return false;
1708
1709   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1710   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1711     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1712
1713   unsigned HOST_WIDE_INT bitpos;
1714   unsigned int ref_align;
1715   get_object_alignment_1 (ref, &ref_align, &bitpos);
1716   if (ref_align < align
1717       || (bitpos % align) != 0
1718       || (bitpos % BITS_PER_UNIT) != 0)
1719     return true;
1720
1721   unsigned int trailing_zeros = tree_ctz (step);
1722   if (trailing_zeros < HOST_BITS_PER_INT
1723       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1724     return true;
1725
1726   return false;
1727 }
1728
1729 /* Return true if EXPR may be non-addressable.   */
1730
1731 bool
1732 may_be_nonaddressable_p (tree expr)
1733 {
1734   switch (TREE_CODE (expr))
1735     {
1736     case TARGET_MEM_REF:
1737       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1738          target, thus they are always addressable.  */
1739       return false;
1740
1741     case COMPONENT_REF:
1742       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1743              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1744
1745     case VIEW_CONVERT_EXPR:
1746       /* This kind of view-conversions may wrap non-addressable objects
1747          and make them look addressable.  After some processing the
1748          non-addressability may be uncovered again, causing ADDR_EXPRs
1749          of inappropriate objects to be built.  */
1750       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1751           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1752         return true;
1753
1754       /* ... fall through ... */
1755
1756     case ARRAY_REF:
1757     case ARRAY_RANGE_REF:
1758       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1759
1760     CASE_CONVERT:
1761       return true;
1762
1763     default:
1764       break;
1765     }
1766
1767   return false;
1768 }
1769
1770 /* Finds addresses in *OP_P inside STMT.  */
1771
1772 static void
1773 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1774 {
1775   tree base = *op_p, step = size_zero_node;
1776   struct iv *civ;
1777   struct ifs_ivopts_data ifs_ivopts_data;
1778
1779   /* Do not play with volatile memory references.  A bit too conservative,
1780      perhaps, but safe.  */
1781   if (gimple_has_volatile_ops (stmt))
1782     goto fail;
1783
1784   /* Ignore bitfields for now.  Not really something terribly complicated
1785      to handle.  TODO.  */
1786   if (TREE_CODE (base) == BIT_FIELD_REF)
1787     goto fail;
1788
1789   base = unshare_expr (base);
1790
1791   if (TREE_CODE (base) == TARGET_MEM_REF)
1792     {
1793       tree type = build_pointer_type (TREE_TYPE (base));
1794       tree astep;
1795
1796       if (TMR_BASE (base)
1797           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1798         {
1799           civ = get_iv (data, TMR_BASE (base));
1800           if (!civ)
1801             goto fail;
1802
1803           TMR_BASE (base) = civ->base;
1804           step = civ->step;
1805         }
1806       if (TMR_INDEX2 (base)
1807           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1808         {
1809           civ = get_iv (data, TMR_INDEX2 (base));
1810           if (!civ)
1811             goto fail;
1812
1813           TMR_INDEX2 (base) = civ->base;
1814           step = civ->step;
1815         }
1816       if (TMR_INDEX (base)
1817           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1818         {
1819           civ = get_iv (data, TMR_INDEX (base));
1820           if (!civ)
1821             goto fail;
1822
1823           TMR_INDEX (base) = civ->base;
1824           astep = civ->step;
1825
1826           if (astep)
1827             {
1828               if (TMR_STEP (base))
1829                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1830
1831               step = fold_build2 (PLUS_EXPR, type, step, astep);
1832             }
1833         }
1834
1835       if (integer_zerop (step))
1836         goto fail;
1837       base = tree_mem_ref_addr (type, base);
1838     }
1839   else
1840     {
1841       ifs_ivopts_data.ivopts_data = data;
1842       ifs_ivopts_data.stmt = stmt;
1843       ifs_ivopts_data.step = size_zero_node;
1844       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1845           || integer_zerop (ifs_ivopts_data.step))
1846         goto fail;
1847       step = ifs_ivopts_data.step;
1848
1849       /* Check that the base expression is addressable.  This needs
1850          to be done after substituting bases of IVs into it.  */
1851       if (may_be_nonaddressable_p (base))
1852         goto fail;
1853
1854       /* Moreover, on strict alignment platforms, check that it is
1855          sufficiently aligned.  */
1856       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1857         goto fail;
1858
1859       base = build_fold_addr_expr (base);
1860
1861       /* Substituting bases of IVs into the base expression might
1862          have caused folding opportunities.  */
1863       if (TREE_CODE (base) == ADDR_EXPR)
1864         {
1865           tree *ref = &TREE_OPERAND (base, 0);
1866           while (handled_component_p (*ref))
1867             ref = &TREE_OPERAND (*ref, 0);
1868           if (TREE_CODE (*ref) == MEM_REF)
1869             {
1870               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1871                                       TREE_OPERAND (*ref, 0),
1872                                       TREE_OPERAND (*ref, 1));
1873               if (tem)
1874                 *ref = tem;
1875             }
1876         }
1877     }
1878
1879   civ = alloc_iv (base, step);
1880   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1881   return;
1882
1883 fail:
1884   for_each_index (op_p, idx_record_use, data);
1885 }
1886
1887 /* Finds and records invariants used in STMT.  */
1888
1889 static void
1890 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1891 {
1892   ssa_op_iter iter;
1893   use_operand_p use_p;
1894   tree op;
1895
1896   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1897     {
1898       op = USE_FROM_PTR (use_p);
1899       record_invariant (data, op, false);
1900     }
1901 }
1902
1903 /* Finds interesting uses of induction variables in the statement STMT.  */
1904
1905 static void
1906 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1907 {
1908   struct iv *iv;
1909   tree op, *lhs, *rhs;
1910   ssa_op_iter iter;
1911   use_operand_p use_p;
1912   enum tree_code code;
1913
1914   find_invariants_stmt (data, stmt);
1915
1916   if (gimple_code (stmt) == GIMPLE_COND)
1917     {
1918       find_interesting_uses_cond (data, stmt);
1919       return;
1920     }
1921
1922   if (is_gimple_assign (stmt))
1923     {
1924       lhs = gimple_assign_lhs_ptr (stmt);
1925       rhs = gimple_assign_rhs1_ptr (stmt);
1926
1927       if (TREE_CODE (*lhs) == SSA_NAME)
1928         {
1929           /* If the statement defines an induction variable, the uses are not
1930              interesting by themselves.  */
1931
1932           iv = get_iv (data, *lhs);
1933
1934           if (iv && !integer_zerop (iv->step))
1935             return;
1936         }
1937
1938       code = gimple_assign_rhs_code (stmt);
1939       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1940           && (REFERENCE_CLASS_P (*rhs)
1941               || is_gimple_val (*rhs)))
1942         {
1943           if (REFERENCE_CLASS_P (*rhs))
1944             find_interesting_uses_address (data, stmt, rhs);
1945           else
1946             find_interesting_uses_op (data, *rhs);
1947
1948           if (REFERENCE_CLASS_P (*lhs))
1949             find_interesting_uses_address (data, stmt, lhs);
1950           return;
1951         }
1952       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1953         {
1954           find_interesting_uses_cond (data, stmt);
1955           return;
1956         }
1957
1958       /* TODO -- we should also handle address uses of type
1959
1960          memory = call (whatever);
1961
1962          and
1963
1964          call (memory).  */
1965     }
1966
1967   if (gimple_code (stmt) == GIMPLE_PHI
1968       && gimple_bb (stmt) == data->current_loop->header)
1969     {
1970       iv = get_iv (data, PHI_RESULT (stmt));
1971
1972       if (iv && !integer_zerop (iv->step))
1973         return;
1974     }
1975
1976   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1977     {
1978       op = USE_FROM_PTR (use_p);
1979
1980       if (TREE_CODE (op) != SSA_NAME)
1981         continue;
1982
1983       iv = get_iv (data, op);
1984       if (!iv)
1985         continue;
1986
1987       find_interesting_uses_op (data, op);
1988     }
1989 }
1990
1991 /* Finds interesting uses of induction variables outside of loops
1992    on loop exit edge EXIT.  */
1993
1994 static void
1995 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1996 {
1997   gimple phi;
1998   gimple_stmt_iterator psi;
1999   tree def;
2000
2001   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2002     {
2003       phi = gsi_stmt (psi);
2004       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2005       if (!virtual_operand_p (def))
2006         find_interesting_uses_op (data, def);
2007     }
2008 }
2009
2010 /* Finds uses of the induction variables that are interesting.  */
2011
2012 static void
2013 find_interesting_uses (struct ivopts_data *data)
2014 {
2015   basic_block bb;
2016   gimple_stmt_iterator bsi;
2017   basic_block *body = get_loop_body (data->current_loop);
2018   unsigned i;
2019   struct version_info *info;
2020   edge e;
2021
2022   if (dump_file && (dump_flags & TDF_DETAILS))
2023     fprintf (dump_file, "Uses:\n\n");
2024
2025   for (i = 0; i < data->current_loop->num_nodes; i++)
2026     {
2027       edge_iterator ei;
2028       bb = body[i];
2029
2030       FOR_EACH_EDGE (e, ei, bb->succs)
2031         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2032             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2033           find_interesting_uses_outside (data, e);
2034
2035       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2036         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2037       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2038         if (!is_gimple_debug (gsi_stmt (bsi)))
2039           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2040     }
2041
2042   if (dump_file && (dump_flags & TDF_DETAILS))
2043     {
2044       bitmap_iterator bi;
2045
2046       fprintf (dump_file, "\n");
2047
2048       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2049         {
2050           info = ver_info (data, i);
2051           if (info->inv_id)
2052             {
2053               fprintf (dump_file, "  ");
2054               print_generic_expr (dump_file, info->name, TDF_SLIM);
2055               fprintf (dump_file, " is invariant (%d)%s\n",
2056                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2057             }
2058         }
2059
2060       fprintf (dump_file, "\n");
2061     }
2062
2063   free (body);
2064 }
2065
2066 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2067    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2068    we are at the top-level of the processed address.  */
2069
2070 static tree
2071 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2072                 HOST_WIDE_INT *offset)
2073 {
2074   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2075   enum tree_code code;
2076   tree type, orig_type = TREE_TYPE (expr);
2077   HOST_WIDE_INT off0, off1, st;
2078   tree orig_expr = expr;
2079
2080   STRIP_NOPS (expr);
2081
2082   type = TREE_TYPE (expr);
2083   code = TREE_CODE (expr);
2084   *offset = 0;
2085
2086   switch (code)
2087     {
2088     case INTEGER_CST:
2089       if (!cst_and_fits_in_hwi (expr)
2090           || integer_zerop (expr))
2091         return orig_expr;
2092
2093       *offset = int_cst_value (expr);
2094       return build_int_cst (orig_type, 0);
2095
2096     case POINTER_PLUS_EXPR:
2097     case PLUS_EXPR:
2098     case MINUS_EXPR:
2099       op0 = TREE_OPERAND (expr, 0);
2100       op1 = TREE_OPERAND (expr, 1);
2101
2102       op0 = strip_offset_1 (op0, false, false, &off0);
2103       op1 = strip_offset_1 (op1, false, false, &off1);
2104
2105       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2106       if (op0 == TREE_OPERAND (expr, 0)
2107           && op1 == TREE_OPERAND (expr, 1))
2108         return orig_expr;
2109
2110       if (integer_zerop (op1))
2111         expr = op0;
2112       else if (integer_zerop (op0))
2113         {
2114           if (code == MINUS_EXPR)
2115             expr = fold_build1 (NEGATE_EXPR, type, op1);
2116           else
2117             expr = op1;
2118         }
2119       else
2120         expr = fold_build2 (code, type, op0, op1);
2121
2122       return fold_convert (orig_type, expr);
2123
2124     case MULT_EXPR:
2125       op1 = TREE_OPERAND (expr, 1);
2126       if (!cst_and_fits_in_hwi (op1))
2127         return orig_expr;
2128
2129       op0 = TREE_OPERAND (expr, 0);
2130       op0 = strip_offset_1 (op0, false, false, &off0);
2131       if (op0 == TREE_OPERAND (expr, 0))
2132         return orig_expr;
2133
2134       *offset = off0 * int_cst_value (op1);
2135       if (integer_zerop (op0))
2136         expr = op0;
2137       else
2138         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2139
2140       return fold_convert (orig_type, expr);
2141
2142     case ARRAY_REF:
2143     case ARRAY_RANGE_REF:
2144       if (!inside_addr)
2145         return orig_expr;
2146
2147       step = array_ref_element_size (expr);
2148       if (!cst_and_fits_in_hwi (step))
2149         break;
2150
2151       st = int_cst_value (step);
2152       op1 = TREE_OPERAND (expr, 1);
2153       op1 = strip_offset_1 (op1, false, false, &off1);
2154       *offset = off1 * st;
2155
2156       if (top_compref
2157           && integer_zerop (op1))
2158         {
2159           /* Strip the component reference completely.  */
2160           op0 = TREE_OPERAND (expr, 0);
2161           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2162           *offset += off0;
2163           return op0;
2164         }
2165       break;
2166
2167     case COMPONENT_REF:
2168       {
2169         tree field;
2170
2171         if (!inside_addr)
2172           return orig_expr;
2173
2174         tmp = component_ref_field_offset (expr);
2175         field = TREE_OPERAND (expr, 1);
2176         if (top_compref
2177             && cst_and_fits_in_hwi (tmp)
2178             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2179           {
2180             HOST_WIDE_INT boffset, abs_off;
2181
2182             /* Strip the component reference completely.  */
2183             op0 = TREE_OPERAND (expr, 0);
2184             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2185             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2186             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2187             if (boffset < 0)
2188               abs_off = -abs_off;
2189
2190             *offset = off0 + int_cst_value (tmp) + abs_off;
2191             return op0;
2192           }
2193       }
2194       break;
2195
2196     case ADDR_EXPR:
2197       op0 = TREE_OPERAND (expr, 0);
2198       op0 = strip_offset_1 (op0, true, true, &off0);
2199       *offset += off0;
2200
2201       if (op0 == TREE_OPERAND (expr, 0))
2202         return orig_expr;
2203
2204       expr = build_fold_addr_expr (op0);
2205       return fold_convert (orig_type, expr);
2206
2207     case MEM_REF:
2208       /* ???  Offset operand?  */
2209       inside_addr = false;
2210       break;
2211
2212     default:
2213       return orig_expr;
2214     }
2215
2216   /* Default handling of expressions for that we want to recurse into
2217      the first operand.  */
2218   op0 = TREE_OPERAND (expr, 0);
2219   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2220   *offset += off0;
2221
2222   if (op0 == TREE_OPERAND (expr, 0)
2223       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2224     return orig_expr;
2225
2226   expr = copy_node (expr);
2227   TREE_OPERAND (expr, 0) = op0;
2228   if (op1)
2229     TREE_OPERAND (expr, 1) = op1;
2230
2231   /* Inside address, we might strip the top level component references,
2232      thus changing type of the expression.  Handling of ADDR_EXPR
2233      will fix that.  */
2234   expr = fold_convert (orig_type, expr);
2235
2236   return expr;
2237 }
2238
2239 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2240
2241 static tree
2242 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2243 {
2244   HOST_WIDE_INT off;
2245   tree core = strip_offset_1 (expr, false, false, &off);
2246   *offset = off;
2247   return core;
2248 }
2249
2250 /* Returns variant of TYPE that can be used as base for different uses.
2251    We return unsigned type with the same precision, which avoids problems
2252    with overflows.  */
2253
2254 static tree
2255 generic_type_for (tree type)
2256 {
2257   if (POINTER_TYPE_P (type))
2258     return unsigned_type_for (type);
2259
2260   if (TYPE_UNSIGNED (type))
2261     return type;
2262
2263   return unsigned_type_for (type);
2264 }
2265
2266 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2267    the bitmap to that we should store it.  */
2268
2269 static struct ivopts_data *fd_ivopts_data;
2270 static tree
2271 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2272 {
2273   bitmap *depends_on = (bitmap *) data;
2274   struct version_info *info;
2275
2276   if (TREE_CODE (*expr_p) != SSA_NAME)
2277     return NULL_TREE;
2278   info = name_info (fd_ivopts_data, *expr_p);
2279
2280   if (!info->inv_id || info->has_nonlin_use)
2281     return NULL_TREE;
2282
2283   if (!*depends_on)
2284     *depends_on = BITMAP_ALLOC (NULL);
2285   bitmap_set_bit (*depends_on, info->inv_id);
2286
2287   return NULL_TREE;
2288 }
2289
2290 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2291    position to POS.  If USE is not NULL, the candidate is set as related to
2292    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2293    replacement of the final value of the iv by a direct computation.  */
2294
2295 static struct iv_cand *
2296 add_candidate_1 (struct ivopts_data *data,
2297                  tree base, tree step, bool important, enum iv_position pos,
2298                  struct iv_use *use, gimple incremented_at)
2299 {
2300   unsigned i;
2301   struct iv_cand *cand = NULL;
2302   tree type, orig_type;
2303
2304   /* For non-original variables, make sure their values are computed in a type
2305      that does not invoke undefined behavior on overflows (since in general,
2306      we cannot prove that these induction variables are non-wrapping).  */
2307   if (pos != IP_ORIGINAL)
2308     {
2309       orig_type = TREE_TYPE (base);
2310       type = generic_type_for (orig_type);
2311       if (type != orig_type)
2312         {
2313           base = fold_convert (type, base);
2314           step = fold_convert (type, step);
2315         }
2316     }
2317
2318   for (i = 0; i < n_iv_cands (data); i++)
2319     {
2320       cand = iv_cand (data, i);
2321
2322       if (cand->pos != pos)
2323         continue;
2324
2325       if (cand->incremented_at != incremented_at
2326           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2327               && cand->ainc_use != use))
2328         continue;
2329
2330       if (!cand->iv)
2331         {
2332           if (!base && !step)
2333             break;
2334
2335           continue;
2336         }
2337
2338       if (!base && !step)
2339         continue;
2340
2341       if (operand_equal_p (base, cand->iv->base, 0)
2342           && operand_equal_p (step, cand->iv->step, 0)
2343           && (TYPE_PRECISION (TREE_TYPE (base))
2344               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2345         break;
2346     }
2347
2348   if (i == n_iv_cands (data))
2349     {
2350       cand = XCNEW (struct iv_cand);
2351       cand->id = i;
2352
2353       if (!base && !step)
2354         cand->iv = NULL;
2355       else
2356         cand->iv = alloc_iv (base, step);
2357
2358       cand->pos = pos;
2359       if (pos != IP_ORIGINAL && cand->iv)
2360         {
2361           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2362           cand->var_after = cand->var_before;
2363         }
2364       cand->important = important;
2365       cand->incremented_at = incremented_at;
2366       data->iv_candidates.safe_push (cand);
2367
2368       if (step
2369           && TREE_CODE (step) != INTEGER_CST)
2370         {
2371           fd_ivopts_data = data;
2372           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2373         }
2374
2375       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2376         cand->ainc_use = use;
2377       else
2378         cand->ainc_use = NULL;
2379
2380       if (dump_file && (dump_flags & TDF_DETAILS))
2381         dump_cand (dump_file, cand);
2382     }
2383
2384   if (important && !cand->important)
2385     {
2386       cand->important = true;
2387       if (dump_file && (dump_flags & TDF_DETAILS))
2388         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2389     }
2390
2391   if (use)
2392     {
2393       bitmap_set_bit (use->related_cands, i);
2394       if (dump_file && (dump_flags & TDF_DETAILS))
2395         fprintf (dump_file, "Candidate %d is related to use %d\n",
2396                  cand->id, use->id);
2397     }
2398
2399   return cand;
2400 }
2401
2402 /* Returns true if incrementing the induction variable at the end of the LOOP
2403    is allowed.
2404
2405    The purpose is to avoid splitting latch edge with a biv increment, thus
2406    creating a jump, possibly confusing other optimization passes and leaving
2407    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2408    is not available (so we do not have a better alternative), or if the latch
2409    edge is already nonempty.  */
2410
2411 static bool
2412 allow_ip_end_pos_p (struct loop *loop)
2413 {
2414   if (!ip_normal_pos (loop))
2415     return true;
2416
2417   if (!empty_block_p (ip_end_pos (loop)))
2418     return true;
2419
2420   return false;
2421 }
2422
2423 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2424    Important field is set to IMPORTANT.  */
2425
2426 static void
2427 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2428                         bool important, struct iv_use *use)
2429 {
2430   basic_block use_bb = gimple_bb (use->stmt);
2431   enum machine_mode mem_mode;
2432   unsigned HOST_WIDE_INT cstepi;
2433
2434   /* If we insert the increment in any position other than the standard
2435      ones, we must ensure that it is incremented once per iteration.
2436      It must not be in an inner nested loop, or one side of an if
2437      statement.  */
2438   if (use_bb->loop_father != data->current_loop
2439       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2440       || stmt_could_throw_p (use->stmt)
2441       || !cst_and_fits_in_hwi (step))
2442     return;
2443
2444   cstepi = int_cst_value (step);
2445
2446   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2447   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2448         || USE_STORE_PRE_INCREMENT (mem_mode))
2449        && GET_MODE_SIZE (mem_mode) == cstepi)
2450       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2451            || USE_STORE_PRE_DECREMENT (mem_mode))
2452           && GET_MODE_SIZE (mem_mode) == -cstepi))
2453     {
2454       enum tree_code code = MINUS_EXPR;
2455       tree new_base;
2456       tree new_step = step;
2457
2458       if (POINTER_TYPE_P (TREE_TYPE (base)))
2459         {
2460           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2461           code = POINTER_PLUS_EXPR;
2462         }
2463       else
2464         new_step = fold_convert (TREE_TYPE (base), new_step);
2465       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2466       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2467                        use->stmt);
2468     }
2469   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2470         || USE_STORE_POST_INCREMENT (mem_mode))
2471        && GET_MODE_SIZE (mem_mode) == cstepi)
2472       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2473            || USE_STORE_POST_DECREMENT (mem_mode))
2474           && GET_MODE_SIZE (mem_mode) == -cstepi))
2475     {
2476       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2477                        use->stmt);
2478     }
2479 }
2480
2481 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2482    position to POS.  If USE is not NULL, the candidate is set as related to
2483    it.  The candidate computation is scheduled on all available positions.  */
2484
2485 static void
2486 add_candidate (struct ivopts_data *data,
2487                tree base, tree step, bool important, struct iv_use *use)
2488 {
2489   if (ip_normal_pos (data->current_loop))
2490     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2491   if (ip_end_pos (data->current_loop)
2492       && allow_ip_end_pos_p (data->current_loop))
2493     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2494
2495   if (use != NULL && use->type == USE_ADDRESS)
2496     add_autoinc_candidates (data, base, step, important, use);
2497 }
2498
2499 /* Adds standard iv candidates.  */
2500
2501 static void
2502 add_standard_iv_candidates (struct ivopts_data *data)
2503 {
2504   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2505
2506   /* The same for a double-integer type if it is still fast enough.  */
2507   if (TYPE_PRECISION
2508         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2509       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2510     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2511                    build_int_cst (long_integer_type_node, 1), true, NULL);
2512
2513   /* The same for a double-integer type if it is still fast enough.  */
2514   if (TYPE_PRECISION
2515         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2516       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2517     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2518                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2519 }
2520
2521
2522 /* Adds candidates bases on the old induction variable IV.  */
2523
2524 static void
2525 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2526 {
2527   gimple phi;
2528   tree def;
2529   struct iv_cand *cand;
2530
2531   add_candidate (data, iv->base, iv->step, true, NULL);
2532
2533   /* The same, but with initial value zero.  */
2534   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2535     add_candidate (data, size_int (0), iv->step, true, NULL);
2536   else
2537     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2538                    iv->step, true, NULL);
2539
2540   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2541   if (gimple_code (phi) == GIMPLE_PHI)
2542     {
2543       /* Additionally record the possibility of leaving the original iv
2544          untouched.  */
2545       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2546       /* Don't add candidate if it's from another PHI node because
2547          it's an affine iv appearing in the form of PEELED_CHREC.  */
2548       phi = SSA_NAME_DEF_STMT (def);
2549       if (gimple_code (phi) != GIMPLE_PHI)
2550         {
2551           cand = add_candidate_1 (data,
2552                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2553                                   SSA_NAME_DEF_STMT (def));
2554           cand->var_before = iv->ssa_name;
2555           cand->var_after = def;
2556         }
2557       else
2558         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2559     }
2560 }
2561
2562 /* Adds candidates based on the old induction variables.  */
2563
2564 static void
2565 add_old_ivs_candidates (struct ivopts_data *data)
2566 {
2567   unsigned i;
2568   struct iv *iv;
2569   bitmap_iterator bi;
2570
2571   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2572     {
2573       iv = ver_info (data, i)->iv;
2574       if (iv && iv->biv_p && !integer_zerop (iv->step))
2575         add_old_iv_candidates (data, iv);
2576     }
2577 }
2578
2579 /* Adds candidates based on the value of the induction variable IV and USE.  */
2580
2581 static void
2582 add_iv_value_candidates (struct ivopts_data *data,
2583                          struct iv *iv, struct iv_use *use)
2584 {
2585   unsigned HOST_WIDE_INT offset;
2586   tree base;
2587   tree basetype;
2588
2589   add_candidate (data, iv->base, iv->step, false, use);
2590
2591   /* The same, but with initial value zero.  Make such variable important,
2592      since it is generic enough so that possibly many uses may be based
2593      on it.  */
2594   basetype = TREE_TYPE (iv->base);
2595   if (POINTER_TYPE_P (basetype))
2596     basetype = sizetype;
2597   add_candidate (data, build_int_cst (basetype, 0),
2598                  iv->step, true, use);
2599
2600   /* Third, try removing the constant offset.  Make sure to even
2601      add a candidate for &a[0] vs. (T *)&a.  */
2602   base = strip_offset (iv->base, &offset);
2603   if (offset
2604       || base != iv->base)
2605     add_candidate (data, base, iv->step, false, use);
2606 }
2607
2608 /* Adds candidates based on the uses.  */
2609
2610 static void
2611 add_derived_ivs_candidates (struct ivopts_data *data)
2612 {
2613   unsigned i;
2614
2615   for (i = 0; i < n_iv_uses (data); i++)
2616     {
2617       struct iv_use *use = iv_use (data, i);
2618
2619       if (!use)
2620         continue;
2621
2622       switch (use->type)
2623         {
2624         case USE_NONLINEAR_EXPR:
2625         case USE_COMPARE:
2626         case USE_ADDRESS:
2627           /* Just add the ivs based on the value of the iv used here.  */
2628           add_iv_value_candidates (data, use->iv, use);
2629           break;
2630
2631         default:
2632           gcc_unreachable ();
2633         }
2634     }
2635 }
2636
2637 /* Record important candidates and add them to related_cands bitmaps
2638    if needed.  */
2639
2640 static void
2641 record_important_candidates (struct ivopts_data *data)
2642 {
2643   unsigned i;
2644   struct iv_use *use;
2645
2646   for (i = 0; i < n_iv_cands (data); i++)
2647     {
2648       struct iv_cand *cand = iv_cand (data, i);
2649
2650       if (cand->important)
2651         bitmap_set_bit (data->important_candidates, i);
2652     }
2653
2654   data->consider_all_candidates = (n_iv_cands (data)
2655                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2656
2657   if (data->consider_all_candidates)
2658     {
2659       /* We will not need "related_cands" bitmaps in this case,
2660          so release them to decrease peak memory consumption.  */
2661       for (i = 0; i < n_iv_uses (data); i++)
2662         {
2663           use = iv_use (data, i);
2664           BITMAP_FREE (use->related_cands);
2665         }
2666     }
2667   else
2668     {
2669       /* Add important candidates to the related_cands bitmaps.  */
2670       for (i = 0; i < n_iv_uses (data); i++)
2671         bitmap_ior_into (iv_use (data, i)->related_cands,
2672                          data->important_candidates);
2673     }
2674 }
2675
2676 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2677    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2678    we allocate a simple list to every use.  */
2679
2680 static void
2681 alloc_use_cost_map (struct ivopts_data *data)
2682 {
2683   unsigned i, size, s;
2684
2685   for (i = 0; i < n_iv_uses (data); i++)
2686     {
2687       struct iv_use *use = iv_use (data, i);
2688
2689       if (data->consider_all_candidates)
2690         size = n_iv_cands (data);
2691       else
2692         {
2693           s = bitmap_count_bits (use->related_cands);
2694
2695           /* Round up to the power of two, so that moduling by it is fast.  */
2696           size = s ? (1 << ceil_log2 (s)) : 1;
2697         }
2698
2699       use->n_map_members = size;
2700       use->cost_map = XCNEWVEC (struct cost_pair, size);
2701     }
2702 }
2703
2704 /* Returns description of computation cost of expression whose runtime
2705    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2706
2707 static comp_cost
2708 new_cost (unsigned runtime, unsigned complexity)
2709 {
2710   comp_cost cost;
2711
2712   cost.cost = runtime;
2713   cost.complexity = complexity;
2714
2715   return cost;
2716 }
2717
2718 /* Adds costs COST1 and COST2.  */
2719
2720 static comp_cost
2721 add_costs (comp_cost cost1, comp_cost cost2)
2722 {
2723   cost1.cost += cost2.cost;
2724   cost1.complexity += cost2.complexity;
2725
2726   return cost1;
2727 }
2728 /* Subtracts costs COST1 and COST2.  */
2729
2730 static comp_cost
2731 sub_costs (comp_cost cost1, comp_cost cost2)
2732 {
2733   cost1.cost -= cost2.cost;
2734   cost1.complexity -= cost2.complexity;
2735
2736   return cost1;
2737 }
2738
2739 /* Returns a negative number if COST1 < COST2, a positive number if
2740    COST1 > COST2, and 0 if COST1 = COST2.  */
2741
2742 static int
2743 compare_costs (comp_cost cost1, comp_cost cost2)
2744 {
2745   if (cost1.cost == cost2.cost)
2746     return cost1.complexity - cost2.complexity;
2747
2748   return cost1.cost - cost2.cost;
2749 }
2750
2751 /* Returns true if COST is infinite.  */
2752
2753 static bool
2754 infinite_cost_p (comp_cost cost)
2755 {
2756   return cost.cost == INFTY;
2757 }
2758
2759 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2760    on invariants DEPENDS_ON and that the value used in expressing it
2761    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2762
2763 static void
2764 set_use_iv_cost (struct ivopts_data *data,
2765                  struct iv_use *use, struct iv_cand *cand,
2766                  comp_cost cost, bitmap depends_on, tree value,
2767                  enum tree_code comp, int inv_expr_id)
2768 {
2769   unsigned i, s;
2770
2771   if (infinite_cost_p (cost))
2772     {
2773       BITMAP_FREE (depends_on);
2774       return;
2775     }
2776
2777   if (data->consider_all_candidates)
2778     {
2779       use->cost_map[cand->id].cand = cand;
2780       use->cost_map[cand->id].cost = cost;
2781       use->cost_map[cand->id].depends_on = depends_on;
2782       use->cost_map[cand->id].value = value;
2783       use->cost_map[cand->id].comp = comp;
2784       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2785       return;
2786     }
2787
2788   /* n_map_members is a power of two, so this computes modulo.  */
2789   s = cand->id & (use->n_map_members - 1);
2790   for (i = s; i < use->n_map_members; i++)
2791     if (!use->cost_map[i].cand)
2792       goto found;
2793   for (i = 0; i < s; i++)
2794     if (!use->cost_map[i].cand)
2795       goto found;
2796
2797   gcc_unreachable ();
2798
2799 found:
2800   use->cost_map[i].cand = cand;
2801   use->cost_map[i].cost = cost;
2802   use->cost_map[i].depends_on = depends_on;
2803   use->cost_map[i].value = value;
2804   use->cost_map[i].comp = comp;
2805   use->cost_map[i].inv_expr_id = inv_expr_id;
2806 }
2807
2808 /* Gets cost of (USE, CANDIDATE) pair.  */
2809
2810 static struct cost_pair *
2811 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2812                  struct iv_cand *cand)
2813 {
2814   unsigned i, s;
2815   struct cost_pair *ret;
2816
2817   if (!cand)
2818     return NULL;
2819
2820   if (data->consider_all_candidates)
2821     {
2822       ret = use->cost_map + cand->id;
2823       if (!ret->cand)
2824         return NULL;
2825
2826       return ret;
2827     }
2828
2829   /* n_map_members is a power of two, so this computes modulo.  */
2830   s = cand->id & (use->n_map_members - 1);
2831   for (i = s; i < use->n_map_members; i++)
2832     if (use->cost_map[i].cand == cand)
2833       return use->cost_map + i;
2834     else if (use->cost_map[i].cand == NULL)
2835       return NULL;
2836   for (i = 0; i < s; i++)
2837     if (use->cost_map[i].cand == cand)
2838       return use->cost_map + i;
2839     else if (use->cost_map[i].cand == NULL)
2840       return NULL;
2841
2842   return NULL;
2843 }
2844
2845 /* Returns estimate on cost of computing SEQ.  */
2846
2847 static unsigned
2848 seq_cost (rtx seq, bool speed)
2849 {
2850   unsigned cost = 0;
2851   rtx set;
2852
2853   for (; seq; seq = NEXT_INSN (seq))
2854     {
2855       set = single_set (seq);
2856       if (set)
2857         cost += set_src_cost (SET_SRC (set), speed);
2858       else
2859         cost++;
2860     }
2861
2862   return cost;
2863 }
2864
2865 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2866 static rtx
2867 produce_memory_decl_rtl (tree obj, int *regno)
2868 {
2869   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2870   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2871   rtx x;
2872
2873   gcc_assert (obj);
2874   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2875     {
2876       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2877       x = gen_rtx_SYMBOL_REF (address_mode, name);
2878       SET_SYMBOL_REF_DECL (x, obj);
2879       x = gen_rtx_MEM (DECL_MODE (obj), x);
2880       set_mem_addr_space (x, as);
2881       targetm.encode_section_info (obj, x, true);
2882     }
2883   else
2884     {
2885       x = gen_raw_REG (address_mode, (*regno)++);
2886       x = gen_rtx_MEM (DECL_MODE (obj), x);
2887       set_mem_addr_space (x, as);
2888     }
2889
2890   return x;
2891 }
2892
2893 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2894    walk_tree.  DATA contains the actual fake register number.  */
2895
2896 static tree
2897 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2898 {
2899   tree obj = NULL_TREE;
2900   rtx x = NULL_RTX;
2901   int *regno = (int *) data;
2902
2903   switch (TREE_CODE (*expr_p))
2904     {
2905     case ADDR_EXPR:
2906       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2907            handled_component_p (*expr_p);
2908            expr_p = &TREE_OPERAND (*expr_p, 0))
2909         continue;
2910       obj = *expr_p;
2911       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2912         x = produce_memory_decl_rtl (obj, regno);
2913       break;
2914
2915     case SSA_NAME:
2916       *ws = 0;
2917       obj = SSA_NAME_VAR (*expr_p);
2918       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2919       if (!obj)
2920         return NULL_TREE;
2921       if (!DECL_RTL_SET_P (obj))
2922         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2923       break;
2924
2925     case VAR_DECL:
2926     case PARM_DECL:
2927     case RESULT_DECL:
2928       *ws = 0;
2929       obj = *expr_p;
2930
2931       if (DECL_RTL_SET_P (obj))
2932         break;
2933
2934       if (DECL_MODE (obj) == BLKmode)
2935         x = produce_memory_decl_rtl (obj, regno);
2936       else
2937         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2938
2939       break;
2940
2941     default:
2942       break;
2943     }
2944
2945   if (x)
2946     {
2947       decl_rtl_to_reset.safe_push (obj);
2948       SET_DECL_RTL (obj, x);
2949     }
2950
2951   return NULL_TREE;
2952 }
2953
2954 /* Determines cost of the computation of EXPR.  */
2955
2956 static unsigned
2957 computation_cost (tree expr, bool speed)
2958 {
2959   rtx seq, rslt;
2960   tree type = TREE_TYPE (expr);
2961   unsigned cost;
2962   /* Avoid using hard regs in ways which may be unsupported.  */
2963   int regno = LAST_VIRTUAL_REGISTER + 1;
2964   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2965   enum node_frequency real_frequency = node->frequency;
2966
2967   node->frequency = NODE_FREQUENCY_NORMAL;
2968   crtl->maybe_hot_insn_p = speed;
2969   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2970   start_sequence ();
2971   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2972   seq = get_insns ();
2973   end_sequence ();
2974   default_rtl_profile ();
2975   node->frequency = real_frequency;
2976
2977   cost = seq_cost (seq, speed);
2978   if (MEM_P (rslt))
2979     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2980                           TYPE_ADDR_SPACE (type), speed);
2981   else if (!REG_P (rslt))
2982     cost += set_src_cost (rslt, speed);
2983
2984   return cost;
2985 }
2986
2987 /* Returns variable containing the value of candidate CAND at statement AT.  */
2988
2989 static tree
2990 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2991 {
2992   if (stmt_after_increment (loop, cand, stmt))
2993     return cand->var_after;
2994   else
2995     return cand->var_before;
2996 }
2997
2998 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2999    same precision that is at least as wide as the precision of TYPE, stores
3000    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3001    type of A and B.  */
3002
3003 static tree
3004 determine_common_wider_type (tree *a, tree *b)
3005 {
3006   tree wider_type = NULL;
3007   tree suba, subb;
3008   tree atype = TREE_TYPE (*a);
3009
3010   if (CONVERT_EXPR_P (*a))
3011     {
3012       suba = TREE_OPERAND (*a, 0);
3013       wider_type = TREE_TYPE (suba);
3014       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3015         return atype;
3016     }
3017   else
3018     return atype;
3019
3020   if (CONVERT_EXPR_P (*b))
3021     {
3022       subb = TREE_OPERAND (*b, 0);
3023       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3024         return atype;
3025     }
3026   else
3027     return atype;
3028
3029   *a = suba;
3030   *b = subb;
3031   return wider_type;
3032 }
3033
3034 /* Determines the expression by that USE is expressed from induction variable
3035    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3036    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3037
3038 static bool
3039 get_computation_aff (struct loop *loop,
3040                      struct iv_use *use, struct iv_cand *cand, gimple at,
3041                      struct aff_tree *aff)
3042 {
3043   tree ubase = use->iv->base;
3044   tree ustep = use->iv->step;
3045   tree cbase = cand->iv->base;
3046   tree cstep = cand->iv->step, cstep_common;
3047   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3048   tree common_type, var;
3049   tree uutype;
3050   aff_tree cbase_aff, var_aff;
3051   widest_int rat;
3052
3053   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3054     {
3055       /* We do not have a precision to express the values of use.  */
3056       return false;
3057     }
3058
3059   var = var_at_stmt (loop, cand, at);
3060   uutype = unsigned_type_for (utype);
3061
3062   /* If the conversion is not noop, perform it.  */
3063   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3064     {
3065       cstep = fold_convert (uutype, cstep);
3066       cbase = fold_convert (uutype, cbase);
3067       var = fold_convert (uutype, var);
3068     }
3069
3070   if (!constant_multiple_of (ustep, cstep, &rat))
3071     return false;
3072
3073   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3074      type, we achieve better folding by computing their difference in this
3075      wider type, and cast the result to UUTYPE.  We do not need to worry about
3076      overflows, as all the arithmetics will in the end be performed in UUTYPE
3077      anyway.  */
3078   common_type = determine_common_wider_type (&ubase, &cbase);
3079
3080   /* use = ubase - ratio * cbase + ratio * var.  */
3081   tree_to_aff_combination (ubase, common_type, aff);
3082   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3083   tree_to_aff_combination (var, uutype, &var_aff);
3084
3085   /* We need to shift the value if we are after the increment.  */
3086   if (stmt_after_increment (loop, cand, at))
3087     {
3088       aff_tree cstep_aff;
3089
3090       if (common_type != uutype)
3091         cstep_common = fold_convert (common_type, cstep);
3092       else
3093         cstep_common = cstep;
3094
3095       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3096       aff_combination_add (&cbase_aff, &cstep_aff);
3097     }
3098
3099   aff_combination_scale (&cbase_aff, -rat);
3100   aff_combination_add (aff, &cbase_aff);
3101   if (common_type != uutype)
3102     aff_combination_convert (aff, uutype);
3103
3104   aff_combination_scale (&var_aff, rat);
3105   aff_combination_add (aff, &var_aff);
3106
3107   return true;
3108 }
3109
3110 /* Return the type of USE.  */
3111
3112 static tree
3113 get_use_type (struct iv_use *use)
3114 {
3115   tree base_type = TREE_TYPE (use->iv->base);
3116   tree type;
3117
3118   if (use->type == USE_ADDRESS)
3119     {
3120       /* The base_type may be a void pointer.  Create a pointer type based on
3121          the mem_ref instead.  */
3122       type = build_pointer_type (TREE_TYPE (*use->op_p));
3123       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3124                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3125     }
3126   else
3127     type = base_type;
3128
3129   return type;
3130 }
3131
3132 /* Determines the expression by that USE is expressed from induction variable
3133    CAND at statement AT in LOOP.  The computation is unshared.  */
3134
3135 static tree
3136 get_computation_at (struct loop *loop,
3137                     struct iv_use *use, struct iv_cand *cand, gimple at)
3138 {
3139   aff_tree aff;
3140   tree type = get_use_type (use);
3141
3142   if (!get_computation_aff (loop, use, cand, at, &aff))
3143     return NULL_TREE;
3144   unshare_aff_combination (&aff);
3145   return fold_convert (type, aff_combination_to_tree (&aff));
3146 }
3147
3148 /* Determines the expression by that USE is expressed from induction variable
3149    CAND in LOOP.  The computation is unshared.  */
3150
3151 static tree
3152 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3153 {
3154   return get_computation_at (loop, use, cand, use->stmt);
3155 }
3156
3157 /* Adjust the cost COST for being in loop setup rather than loop body.
3158    If we're optimizing for space, the loop setup overhead is constant;
3159    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3160 static unsigned
3161 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3162 {
3163   if (cost == INFTY)
3164     return cost;
3165   else if (optimize_loop_for_speed_p (data->current_loop))
3166     return cost / avg_loop_niter (data->current_loop);
3167   else
3168     return cost;
3169 }
3170
3171 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3172    validity for a memory reference accessing memory of mode MODE in
3173    address space AS.  */
3174
3175
3176 bool
3177 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3178                                  addr_space_t as)
3179 {
3180 #define MAX_RATIO 128
3181   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3182   static vec<sbitmap> valid_mult_list;
3183   sbitmap valid_mult;
3184
3185   if (data_index >= valid_mult_list.length ())
3186     valid_mult_list.safe_grow_cleared (data_index + 1);
3187
3188   valid_mult = valid_mult_list[data_index];
3189   if (!valid_mult)
3190     {
3191       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3192       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3193       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3194       rtx addr, scaled;
3195       HOST_WIDE_INT i;
3196
3197       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3198       bitmap_clear (valid_mult);
3199       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3200       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3201       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3202         {
3203           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3204           if (memory_address_addr_space_p (mode, addr, as)
3205               || memory_address_addr_space_p (mode, scaled, as))
3206             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3207         }
3208
3209       if (dump_file && (dump_flags & TDF_DETAILS))
3210         {
3211           fprintf (dump_file, "  allowed multipliers:");
3212           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3213             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3214               fprintf (dump_file, " %d", (int) i);
3215           fprintf (dump_file, "\n");
3216           fprintf (dump_file, "\n");
3217         }
3218
3219       valid_mult_list[data_index] = valid_mult;
3220     }
3221
3222   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3223     return false;
3224
3225   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3226 }
3227
3228 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3229    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3230    variable is omitted.  Compute the cost for a memory reference that accesses
3231    a memory location of mode MEM_MODE in address space AS.
3232
3233    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3234    size of MEM_MODE / RATIO) is available.  To make this determination, we
3235    look at the size of the increment to be made, which is given in CSTEP.
3236    CSTEP may be zero if the step is unknown.
3237    STMT_AFTER_INC is true iff the statement we're looking at is after the
3238    increment of the original biv.
3239
3240    TODO -- there must be some better way.  This all is quite crude.  */
3241
3242 enum ainc_type
3243 {
3244   AINC_PRE_INC,         /* Pre increment.  */
3245   AINC_PRE_DEC,         /* Pre decrement.  */
3246   AINC_POST_INC,        /* Post increment.  */
3247   AINC_POST_DEC,        /* Post decrement.  */
3248   AINC_NONE             /* Also the number of auto increment types.  */
3249 };
3250
3251 typedef struct address_cost_data_s
3252 {
3253   HOST_WIDE_INT min_offset, max_offset;
3254   unsigned costs[2][2][2][2];
3255   unsigned ainc_costs[AINC_NONE];
3256 } *address_cost_data;
3257
3258
3259 static comp_cost
3260 get_address_cost (bool symbol_present, bool var_present,
3261                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3262                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3263                   addr_space_t as, bool speed,
3264                   bool stmt_after_inc, bool *may_autoinc)
3265 {
3266   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3267   static vec<address_cost_data> address_cost_data_list;
3268   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3269   address_cost_data data;
3270   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3271   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3272   unsigned cost, acost, complexity;
3273   enum ainc_type autoinc_type;
3274   bool offset_p, ratio_p, autoinc;
3275   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3276   unsigned HOST_WIDE_INT mask;
3277   unsigned bits;
3278
3279   if (data_index >= address_cost_data_list.length ())
3280     address_cost_data_list.safe_grow_cleared (data_index + 1);
3281
3282   data = address_cost_data_list[data_index];
3283   if (!data)
3284     {
3285       HOST_WIDE_INT i;
3286       HOST_WIDE_INT rat, off = 0;
3287       int old_cse_not_expected, width;
3288       unsigned sym_p, var_p, off_p, rat_p, add_c;
3289       rtx seq, addr, base;
3290       rtx reg0, reg1;
3291
3292       data = (address_cost_data) xcalloc (1, sizeof (*data));
3293
3294       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3295
3296       width = GET_MODE_BITSIZE (address_mode) - 1;
3297       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3298         width = HOST_BITS_PER_WIDE_INT - 1;
3299       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3300
3301       for (i = width; i >= 0; i--)
3302         {
3303           off = -((unsigned HOST_WIDE_INT) 1 << i);
3304           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3305           if (memory_address_addr_space_p (mem_mode, addr, as))
3306             break;
3307         }
3308       data->min_offset = (i == -1? 0 : off);
3309
3310       for (i = width; i >= 0; i--)
3311         {
3312           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3313           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3314           if (memory_address_addr_space_p (mem_mode, addr, as))
3315             break;
3316           /* For some TARGET, like ARM THUMB1, the offset should be nature
3317              aligned.  Try an aligned offset if address_mode is not QImode.  */
3318           off = (address_mode == QImode)
3319                 ? 0
3320                 : ((unsigned HOST_WIDE_INT) 1 << i)
3321                     - GET_MODE_SIZE (address_mode);
3322           if (off > 0)
3323             {
3324               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3325               if (memory_address_addr_space_p (mem_mode, addr, as))
3326                 break;
3327             }
3328         }
3329       if (i == -1)
3330         off = 0;
3331       data->max_offset = off;
3332
3333       if (dump_file && (dump_flags & TDF_DETAILS))
3334         {
3335           fprintf (dump_file, "get_address_cost:\n");
3336           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3337                    GET_MODE_NAME (mem_mode),
3338                    data->min_offset);
3339           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3340                    GET_MODE_NAME (mem_mode),
3341                    data->max_offset);
3342         }
3343
3344       rat = 1;
3345       for (i = 2; i <= MAX_RATIO; i++)
3346         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3347           {
3348             rat = i;
3349             break;
3350           }
3351
3352       /* Compute the cost of various addressing modes.  */
3353       acost = 0;
3354       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3355       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3356
3357       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3358           || USE_STORE_PRE_DECREMENT (mem_mode))
3359         {
3360           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3361           has_predec[mem_mode]
3362             = memory_address_addr_space_p (mem_mode, addr, as);
3363
3364           if (has_predec[mem_mode])
3365             data->ainc_costs[AINC_PRE_DEC]
3366               = address_cost (addr, mem_mode, as, speed);
3367         }
3368       if (USE_LOAD_POST_DECREMENT (mem_mode)
3369           || USE_STORE_POST_DECREMENT (mem_mode))
3370         {
3371           addr = gen_rtx_POST_DEC (address_mode, reg0);
3372           has_postdec[mem_mode]
3373             = memory_address_addr_space_p (mem_mode, addr, as);
3374
3375           if (has_postdec[mem_mode])
3376             data->ainc_costs[AINC_POST_DEC]
3377               = address_cost (addr, mem_mode, as, speed);
3378         }
3379       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3380           || USE_STORE_PRE_DECREMENT (mem_mode))
3381         {
3382           addr = gen_rtx_PRE_INC (address_mode, reg0);
3383           has_preinc[mem_mode]
3384             = memory_address_addr_space_p (mem_mode, addr, as);
3385
3386           if (has_preinc[mem_mode])
3387             data->ainc_costs[AINC_PRE_INC]
3388               = address_cost (addr, mem_mode, as, speed);
3389         }
3390       if (USE_LOAD_POST_INCREMENT (mem_mode)
3391           || USE_STORE_POST_INCREMENT (mem_mode))
3392         {
3393           addr = gen_rtx_POST_INC (address_mode, reg0);
3394           has_postinc[mem_mode]
3395             = memory_address_addr_space_p (mem_mode, addr, as);
3396
3397           if (has_postinc[mem_mode])
3398             data->ainc_costs[AINC_POST_INC]
3399               = address_cost (addr, mem_mode, as, speed);
3400         }
3401       for (i = 0; i < 16; i++)
3402         {
3403           sym_p = i & 1;
3404           var_p = (i >> 1) & 1;
3405           off_p = (i >> 2) & 1;
3406           rat_p = (i >> 3) & 1;
3407
3408           addr = reg0;
3409           if (rat_p)
3410             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3411                                    gen_int_mode (rat, address_mode));
3412
3413           if (var_p)
3414             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3415
3416           if (sym_p)
3417             {
3418               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3419               /* ??? We can run into trouble with some backends by presenting
3420                  it with symbols which haven't been properly passed through
3421                  targetm.encode_section_info.  By setting the local bit, we
3422                  enhance the probability of things working.  */
3423               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3424
3425               if (off_p)
3426                 base = gen_rtx_fmt_e (CONST, address_mode,
3427                                       gen_rtx_fmt_ee
3428                                         (PLUS, address_mode, base,
3429                                          gen_int_mode (off, address_mode)));
3430             }
3431           else if (off_p)
3432             base = gen_int_mode (off, address_mode);
3433           else
3434             base = NULL_RTX;
3435
3436           if (base)
3437             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3438
3439           start_sequence ();
3440           /* To avoid splitting addressing modes, pretend that no cse will
3441              follow.  */
3442           old_cse_not_expected = cse_not_expected;
3443           cse_not_expected = true;
3444           addr = memory_address_addr_space (mem_mode, addr, as);
3445           cse_not_expected = old_cse_not_expected;
3446           seq = get_insns ();
3447           end_sequence ();
3448
3449           acost = seq_cost (seq, speed);
3450           acost += address_cost (addr, mem_mode, as, speed);
3451
3452           if (!acost)
3453             acost = 1;
3454           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3455         }
3456
3457       /* On some targets, it is quite expensive to load symbol to a register,
3458          which makes addresses that contain symbols look much more expensive.
3459          However, the symbol will have to be loaded in any case before the
3460          loop (and quite likely we have it in register already), so it does not
3461          make much sense to penalize them too heavily.  So make some final
3462          tweaks for the SYMBOL_PRESENT modes:
3463
3464          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3465          var is cheaper, use this mode with small penalty.
3466          If VAR_PRESENT is true, try whether the mode with
3467          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3468          if this is the case, use it.  */
3469       add_c = add_cost (speed, address_mode);
3470       for (i = 0; i < 8; i++)
3471         {
3472           var_p = i & 1;
3473           off_p = (i >> 1) & 1;
3474           rat_p = (i >> 2) & 1;
3475
3476           acost = data->costs[0][1][off_p][rat_p] + 1;
3477           if (var_p)
3478             acost += add_c;
3479
3480           if (acost < data->costs[1][var_p][off_p][rat_p])
3481             data->costs[1][var_p][off_p][rat_p] = acost;
3482         }
3483
3484       if (dump_file && (dump_flags & TDF_DETAILS))
3485         {
3486           fprintf (dump_file, "Address costs:\n");
3487
3488           for (i = 0; i < 16; i++)
3489             {
3490               sym_p = i & 1;
3491               var_p = (i >> 1) & 1;
3492               off_p = (i >> 2) & 1;
3493               rat_p = (i >> 3) & 1;
3494
3495               fprintf (dump_file, "  ");
3496               if (sym_p)
3497                 fprintf (dump_file, "sym + ");
3498               if (var_p)
3499                 fprintf (dump_file, "var + ");
3500               if (off_p)
3501                 fprintf (dump_file, "cst + ");
3502               if (rat_p)
3503                 fprintf (dump_file, "rat * ");
3504
3505               acost = data->costs[sym_p][var_p][off_p][rat_p];
3506               fprintf (dump_file, "index costs %d\n", acost);
3507             }
3508           if (has_predec[mem_mode] || has_postdec[mem_mode]
3509               || has_preinc[mem_mode] || has_postinc[mem_mode])
3510             fprintf (dump_file, "  May include autoinc/dec\n");
3511           fprintf (dump_file, "\n");
3512         }
3513
3514       address_cost_data_list[data_index] = data;
3515     }
3516
3517   bits = GET_MODE_BITSIZE (address_mode);
3518   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3519   offset &= mask;
3520   if ((offset >> (bits - 1) & 1))
3521     offset |= ~mask;
3522   s_offset = offset;
3523
3524   autoinc = false;
3525   autoinc_type = AINC_NONE;
3526   msize = GET_MODE_SIZE (mem_mode);
3527   autoinc_offset = offset;
3528   if (stmt_after_inc)
3529     autoinc_offset += ratio * cstep;
3530   if (symbol_present || var_present || ratio != 1)
3531     autoinc = false;
3532   else
3533     {
3534       if (has_postinc[mem_mode] && autoinc_offset == 0
3535           && msize == cstep)
3536         autoinc_type = AINC_POST_INC;
3537       else if (has_postdec[mem_mode] && autoinc_offset == 0
3538                && msize == -cstep)
3539         autoinc_type = AINC_POST_DEC;
3540       else if (has_preinc[mem_mode] && autoinc_offset == msize
3541                && msize == cstep)
3542         autoinc_type = AINC_PRE_INC;
3543       else if (has_predec[mem_mode] && autoinc_offset == -msize
3544                && msize == -cstep)
3545         autoinc_type = AINC_PRE_DEC;
3546
3547       if (autoinc_type != AINC_NONE)
3548         autoinc = true;
3549     }
3550
3551   cost = 0;
3552   offset_p = (s_offset != 0
3553               && data->min_offset <= s_offset
3554               && s_offset <= data->max_offset);
3555   ratio_p = (ratio != 1
3556              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3557
3558   if (ratio != 1 && !ratio_p)
3559     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3560
3561   if (s_offset && !offset_p && !symbol_present)
3562     cost += add_cost (speed, address_mode);
3563
3564   if (may_autoinc)
3565     *may_autoinc = autoinc;
3566   if (autoinc)
3567     acost = data->ainc_costs[autoinc_type];
3568   else
3569     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3570   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3571   return new_cost (cost + acost, complexity);
3572 }
3573
3574  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3575     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3576     calculating the operands of EXPR.  Returns true if successful, and returns
3577     the cost in COST.  */
3578
3579 static bool
3580 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3581                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3582 {
3583   comp_cost res;
3584   tree op1 = TREE_OPERAND (expr, 1);
3585   tree cst = TREE_OPERAND (mult, 1);
3586   tree multop = TREE_OPERAND (mult, 0);
3587   int m = exact_log2 (int_cst_value (cst));
3588   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3589   int sa_cost;
3590   bool equal_p = false;
3591
3592   if (!(m >= 0 && m < maxm))
3593     return false;
3594
3595   if (operand_equal_p (op1, mult, 0))
3596     equal_p = true;
3597
3598   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3599              ? shiftadd_cost (speed, mode, m)
3600              : (equal_p
3601                 ? shiftsub1_cost (speed, mode, m)
3602                 : shiftsub0_cost (speed, mode, m)));
3603   res = new_cost (sa_cost, 0);
3604   res = add_costs (res, equal_p ? cost0 : cost1);
3605
3606   STRIP_NOPS (multop);
3607   if (!is_gimple_val (multop))
3608     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3609
3610   *cost = res;
3611   return true;
3612 }
3613
3614 /* Estimates cost of forcing expression EXPR into a variable.  */
3615
3616 static comp_cost
3617 force_expr_to_var_cost (tree expr, bool speed)
3618 {
3619   static bool costs_initialized = false;
3620   static unsigned integer_cost [2];
3621   static unsigned symbol_cost [2];
3622   static unsigned address_cost [2];
3623   tree op0, op1;
3624   comp_cost cost0, cost1, cost;
3625   enum machine_mode mode;
3626
3627   if (!costs_initialized)
3628     {
3629       tree type = build_pointer_type (integer_type_node);
3630       tree var, addr;
3631       rtx x;
3632       int i;
3633
3634       var = create_tmp_var_raw (integer_type_node, "test_var");
3635       TREE_STATIC (var) = 1;
3636       x = produce_memory_decl_rtl (var, NULL);
3637       SET_DECL_RTL (var, x);
3638
3639       addr = build1 (ADDR_EXPR, type, var);
3640
3641
3642       for (i = 0; i < 2; i++)
3643         {
3644           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3645                                                              2000), i);
3646
3647           symbol_cost[i] = computation_cost (addr, i) + 1;
3648
3649           address_cost[i]
3650             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3651           if (dump_file && (dump_flags & TDF_DETAILS))
3652             {
3653               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3654               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3655               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3656               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3657               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3658               fprintf (dump_file, "\n");
3659             }
3660         }
3661
3662       costs_initialized = true;
3663     }
3664
3665   STRIP_NOPS (expr);
3666
3667   if (SSA_VAR_P (expr))
3668     return no_cost;
3669
3670   if (is_gimple_min_invariant (expr))
3671     {
3672       if (TREE_CODE (expr) == INTEGER_CST)
3673         return new_cost (integer_cost [speed], 0);
3674
3675       if (TREE_CODE (expr) == ADDR_EXPR)
3676         {
3677           tree obj = TREE_OPERAND (expr, 0);
3678
3679           if (TREE_CODE (obj) == VAR_DECL
3680               || TREE_CODE (obj) == PARM_DECL
3681               || TREE_CODE (obj) == RESULT_DECL)
3682             return new_cost (symbol_cost [speed], 0);
3683         }
3684
3685       return new_cost (address_cost [speed], 0);
3686     }
3687
3688   switch (TREE_CODE (expr))
3689     {
3690     case POINTER_PLUS_EXPR:
3691     case PLUS_EXPR:
3692     case MINUS_EXPR:
3693     case MULT_EXPR:
3694       op0 = TREE_OPERAND (expr, 0);
3695       op1 = TREE_OPERAND (expr, 1);
3696       STRIP_NOPS (op0);
3697       STRIP_NOPS (op1);
3698       break;
3699
3700     CASE_CONVERT:
3701     case NEGATE_EXPR:
3702       op0 = TREE_OPERAND (expr, 0);
3703       STRIP_NOPS (op0);
3704       op1 = NULL_TREE;
3705       break;
3706
3707     default:
3708       /* Just an arbitrary value, FIXME.  */
3709       return new_cost (target_spill_cost[speed], 0);
3710     }
3711
3712   if (op0 == NULL_TREE
3713       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3714     cost0 = no_cost;
3715   else
3716     cost0 = force_expr_to_var_cost (op0, speed);
3717
3718   if (op1 == NULL_TREE
3719       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3720     cost1 = no_cost;
3721   else
3722     cost1 = force_expr_to_var_cost (op1, speed);
3723
3724   mode = TYPE_MODE (TREE_TYPE (expr));
3725   switch (TREE_CODE (expr))
3726     {
3727     case POINTER_PLUS_EXPR:
3728     case PLUS_EXPR:
3729     case MINUS_EXPR:
3730     case NEGATE_EXPR:
3731       cost = new_cost (add_cost (speed, mode), 0);
3732       if (TREE_CODE (expr) != NEGATE_EXPR)
3733         {
3734           tree mult = NULL_TREE;
3735           comp_cost sa_cost;
3736           if (TREE_CODE (op1) == MULT_EXPR)
3737             mult = op1;
3738           else if (TREE_CODE (op0) == MULT_EXPR)
3739             mult = op0;
3740
3741           if (mult != NULL_TREE
3742               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3743               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3744                                     speed, &sa_cost))
3745             return sa_cost;
3746         }
3747       break;
3748
3749     CASE_CONVERT:
3750       {
3751         tree inner_mode, outer_mode;
3752         outer_mode = TREE_TYPE (expr);
3753         inner_mode = TREE_TYPE (op0);
3754         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3755                                        TYPE_MODE (inner_mode), speed), 0);
3756       }
3757       break;
3758
3759     case MULT_EXPR:
3760       if (cst_and_fits_in_hwi (op0))
3761         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3762                                              mode, speed), 0);
3763       else if (cst_and_fits_in_hwi (op1))
3764         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3765                                              mode, speed), 0);
3766       else
3767         return new_cost (target_spill_cost [speed], 0);
3768       break;
3769
3770     default:
3771       gcc_unreachable ();
3772     }
3773
3774   cost = add_costs (cost, cost0);
3775   cost = add_costs (cost, cost1);
3776
3777   /* Bound the cost by target_spill_cost.  The parts of complicated
3778      computations often are either loop invariant or at least can
3779      be shared between several iv uses, so letting this grow without
3780      limits would not give reasonable results.  */
3781   if (cost.cost > (int) target_spill_cost [speed])
3782     cost.cost = target_spill_cost [speed];
3783
3784   return cost;
3785 }
3786
3787 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3788    invariants the computation depends on.  */
3789
3790 static comp_cost
3791 force_var_cost (struct ivopts_data *data,
3792                 tree expr, bitmap *depends_on)
3793 {
3794   if (depends_on)
3795     {
3796       fd_ivopts_data = data;
3797       walk_tree (&expr, find_depends, depends_on, NULL);
3798     }
3799
3800   return force_expr_to_var_cost (expr, data->speed);
3801 }
3802
3803 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3804    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3805    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3806    invariants the computation depends on.  */
3807
3808 static comp_cost
3809 split_address_cost (struct ivopts_data *data,
3810                     tree addr, bool *symbol_present, bool *var_present,
3811                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3812 {
3813   tree core;
3814   HOST_WIDE_INT bitsize;
3815   HOST_WIDE_INT bitpos;
3816   tree toffset;
3817   enum machine_mode mode;
3818   int unsignedp, volatilep;
3819
3820   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3821                               &unsignedp, &volatilep, false);
3822
3823   if (toffset != 0
3824       || bitpos % BITS_PER_UNIT != 0
3825       || TREE_CODE (core) != VAR_DECL)
3826     {
3827       *symbol_present = false;
3828       *var_present = true;
3829       fd_ivopts_data = data;
3830       walk_tree (&addr, find_depends, depends_on, NULL);
3831       return new_cost (target_spill_cost[data->speed], 0);
3832     }
3833
3834   *offset += bitpos / BITS_PER_UNIT;
3835   if (TREE_STATIC (core)
3836       || DECL_EXTERNAL (core))
3837     {
3838       *symbol_present = true;
3839       *var_present = false;
3840       return no_cost;
3841     }
3842
3843   *symbol_present = false;
3844   *var_present = true;
3845   return no_cost;
3846 }
3847
3848 /* Estimates cost of expressing difference of addresses E1 - E2 as
3849    var + symbol + offset.  The value of offset is added to OFFSET,
3850    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3851    part is missing.  DEPENDS_ON is a set of the invariants the computation
3852    depends on.  */
3853
3854 static comp_cost
3855 ptr_difference_cost (struct ivopts_data *data,
3856                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3857                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3858 {
3859   HOST_WIDE_INT diff = 0;
3860   aff_tree aff_e1, aff_e2;
3861   tree type;
3862
3863   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3864
3865   if (ptr_difference_const (e1, e2, &diff))
3866     {
3867       *offset += diff;
3868       *symbol_present = false;
3869       *var_present = false;
3870       return no_cost;
3871     }
3872
3873   if (integer_zerop (e2))
3874     return split_address_cost (data, TREE_OPERAND (e1, 0),
3875                                symbol_present, var_present, offset, depends_on);
3876
3877   *symbol_present = false;
3878   *var_present = true;
3879
3880   type = signed_type_for (TREE_TYPE (e1));
3881   tree_to_aff_combination (e1, type, &aff_e1);
3882   tree_to_aff_combination (e2, type, &aff_e2);
3883   aff_combination_scale (&aff_e2, -1);
3884   aff_combination_add (&aff_e1, &aff_e2);
3885
3886   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3887 }
3888
3889 /* Estimates cost of expressing difference E1 - E2 as
3890    var + symbol + offset.  The value of offset is added to OFFSET,
3891    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3892    part is missing.  DEPENDS_ON is a set of the invariants the computation
3893    depends on.  */
3894
3895 static comp_cost
3896 difference_cost (struct ivopts_data *data,
3897                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3898                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3899 {
3900   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3901   unsigned HOST_WIDE_INT off1, off2;
3902   aff_tree aff_e1, aff_e2;
3903   tree type;
3904
3905   e1 = strip_offset (e1, &off1);
3906   e2 = strip_offset (e2, &off2);
3907   *offset += off1 - off2;
3908
3909   STRIP_NOPS (e1);
3910   STRIP_NOPS (e2);
3911
3912   if (TREE_CODE (e1) == ADDR_EXPR)
3913     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3914                                 offset, depends_on);
3915   *symbol_present = false;
3916
3917   if (operand_equal_p (e1, e2, 0))
3918     {
3919       *var_present = false;
3920       return no_cost;
3921     }
3922
3923   *var_present = true;
3924
3925   if (integer_zerop (e2))
3926     return force_var_cost (data, e1, depends_on);
3927
3928   if (integer_zerop (e1))
3929     {
3930       comp_cost cost = force_var_cost (data, e2, depends_on);
3931       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3932       return cost;
3933     }
3934
3935   type = signed_type_for (TREE_TYPE (e1));
3936   tree_to_aff_combination (e1, type, &aff_e1);
3937   tree_to_aff_combination (e2, type, &aff_e2);
3938   aff_combination_scale (&aff_e2, -1);
3939   aff_combination_add (&aff_e1, &aff_e2);
3940
3941   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3942 }
3943
3944 /* Returns true if AFF1 and AFF2 are identical.  */
3945
3946 static bool
3947 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3948 {
3949   unsigned i;
3950
3951   if (aff1->n != aff2->n)
3952     return false;
3953
3954   for (i = 0; i < aff1->n; i++)
3955     {
3956       if (aff1->elts[i].coef != aff2->elts[i].coef)
3957         return false;
3958
3959       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3960         return false;
3961     }
3962   return true;
3963 }
3964
3965 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3966
3967 static int
3968 get_expr_id (struct ivopts_data *data, tree expr)
3969 {
3970   struct iv_inv_expr_ent ent;
3971   struct iv_inv_expr_ent **slot;
3972
3973   ent.expr = expr;
3974   ent.hash = iterative_hash_expr (expr, 0);
3975   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3976   if (*slot)
3977     return (*slot)->id;
3978
3979   *slot = XNEW (struct iv_inv_expr_ent);
3980   (*slot)->expr = expr;
3981   (*slot)->hash = ent.hash;
3982   (*slot)->id = data->inv_expr_id++;
3983   return (*slot)->id;
3984 }
3985
3986 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3987    requires a new compiler generated temporary.  Returns -1 otherwise.
3988    ADDRESS_P is a flag indicating if the expression is for address
3989    computation.  */
3990
3991 static int
3992 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3993                             tree cbase, HOST_WIDE_INT ratio,
3994                             bool address_p)
3995 {
3996   aff_tree ubase_aff, cbase_aff;
3997   tree expr, ub, cb;
3998
3999   STRIP_NOPS (ubase);
4000   STRIP_NOPS (cbase);
4001   ub = ubase;
4002   cb = cbase;
4003
4004   if ((TREE_CODE (ubase) == INTEGER_CST)
4005       && (TREE_CODE (cbase) == INTEGER_CST))
4006     return -1;
4007
4008   /* Strips the constant part. */
4009   if (TREE_CODE (ubase) == PLUS_EXPR
4010       || TREE_CODE (ubase) == MINUS_EXPR
4011       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4012     {
4013       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4014         ubase = TREE_OPERAND (ubase, 0);
4015     }
4016
4017   /* Strips the constant part. */
4018   if (TREE_CODE (cbase) == PLUS_EXPR
4019       || TREE_CODE (cbase) == MINUS_EXPR
4020       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4021     {
4022       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4023         cbase = TREE_OPERAND (cbase, 0);
4024     }
4025
4026   if (address_p)
4027     {
4028       if (((TREE_CODE (ubase) == SSA_NAME)
4029            || (TREE_CODE (ubase) == ADDR_EXPR
4030                && is_gimple_min_invariant (ubase)))
4031           && (TREE_CODE (cbase) == INTEGER_CST))
4032         return -1;
4033
4034       if (((TREE_CODE (cbase) == SSA_NAME)
4035            || (TREE_CODE (cbase) == ADDR_EXPR
4036                && is_gimple_min_invariant (cbase)))
4037           && (TREE_CODE (ubase) == INTEGER_CST))
4038         return -1;
4039     }
4040
4041   if (ratio == 1)
4042     {
4043       if (operand_equal_p (ubase, cbase, 0))
4044         return -1;
4045
4046       if (TREE_CODE (ubase) == ADDR_EXPR
4047           && TREE_CODE (cbase) == ADDR_EXPR)
4048         {
4049           tree usym, csym;
4050
4051           usym = TREE_OPERAND (ubase, 0);
4052           csym = TREE_OPERAND (cbase, 0);
4053           if (TREE_CODE (usym) == ARRAY_REF)
4054             {
4055               tree ind = TREE_OPERAND (usym, 1);
4056               if (TREE_CODE (ind) == INTEGER_CST
4057                   && tree_fits_shwi_p (ind)
4058                   && tree_to_shwi (ind) == 0)
4059                 usym = TREE_OPERAND (usym, 0);
4060             }
4061           if (TREE_CODE (csym) == ARRAY_REF)
4062             {
4063               tree ind = TREE_OPERAND (csym, 1);
4064               if (TREE_CODE (ind) == INTEGER_CST
4065                   && tree_fits_shwi_p (ind)
4066                   && tree_to_shwi (ind) == 0)
4067                 csym = TREE_OPERAND (csym, 0);
4068             }
4069           if (operand_equal_p (usym, csym, 0))
4070             return -1;
4071         }
4072       /* Now do more complex comparison  */
4073       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4074       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4075       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4076         return -1;
4077     }
4078
4079   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4080   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4081
4082   aff_combination_scale (&cbase_aff, -1 * ratio);
4083   aff_combination_add (&ubase_aff, &cbase_aff);
4084   expr = aff_combination_to_tree (&ubase_aff);
4085   return get_expr_id (data, expr);
4086 }
4087
4088
4089
4090 /* Determines the cost of the computation by that USE is expressed
4091    from induction variable CAND.  If ADDRESS_P is true, we just need
4092    to create an address from it, otherwise we want to get it into
4093    register.  A set of invariants we depend on is stored in
4094    DEPENDS_ON.  AT is the statement at that the value is computed.
4095    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4096    addressing is likely.  */
4097
4098 static comp_cost
4099 get_computation_cost_at (struct ivopts_data *data,
4100                          struct iv_use *use, struct iv_cand *cand,
4101                          bool address_p, bitmap *depends_on, gimple at,
4102                          bool *can_autoinc,
4103                          int *inv_expr_id)
4104 {
4105   tree ubase = use->iv->base, ustep = use->iv->step;
4106   tree cbase, cstep;
4107   tree utype = TREE_TYPE (ubase), ctype;
4108   unsigned HOST_WIDE_INT cstepi, offset = 0;
4109   HOST_WIDE_INT ratio, aratio;
4110   bool var_present, symbol_present, stmt_is_after_inc;
4111   comp_cost cost;
4112   widest_int rat;
4113   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4114   enum machine_mode mem_mode = (address_p
4115                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4116                                 : VOIDmode);
4117
4118   *depends_on = NULL;
4119
4120   /* Only consider real candidates.  */
4121   if (!cand->iv)
4122     return infinite_cost;
4123
4124   cbase = cand->iv->base;
4125   cstep = cand->iv->step;
4126   ctype = TREE_TYPE (cbase);
4127
4128   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4129     {
4130       /* We do not have a precision to express the values of use.  */
4131       return infinite_cost;
4132     }
4133
4134   if (address_p
4135       || (use->iv->base_object
4136           && cand->iv->base_object
4137           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4138           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4139     {
4140       /* Do not try to express address of an object with computation based
4141          on address of a different object.  This may cause problems in rtl
4142          level alias analysis (that does not expect this to be happening,
4143          as this is illegal in C), and would be unlikely to be useful
4144          anyway.  */
4145       if (use->iv->base_object
4146           && cand->iv->base_object
4147           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4148         return infinite_cost;
4149     }
4150
4151   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4152     {
4153       /* TODO -- add direct handling of this case.  */
4154       goto fallback;
4155     }
4156
4157   /* CSTEPI is removed from the offset in case statement is after the
4158      increment.  If the step is not constant, we use zero instead.
4159      This is a bit imprecise (there is the extra addition), but
4160      redundancy elimination is likely to transform the code so that
4161      it uses value of the variable before increment anyway,
4162      so it is not that much unrealistic.  */
4163   if (cst_and_fits_in_hwi (cstep))
4164     cstepi = int_cst_value (cstep);
4165   else
4166     cstepi = 0;
4167
4168   if (!constant_multiple_of (ustep, cstep, &rat))
4169     return infinite_cost;
4170
4171   if (wi::fits_shwi_p (rat))
4172     ratio = rat.to_shwi ();
4173   else
4174     return infinite_cost;
4175
4176   STRIP_NOPS (cbase);
4177   ctype = TREE_TYPE (cbase);
4178
4179   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4180
4181   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4182      or ratio == 1, it is better to handle this like
4183
4184      ubase - ratio * cbase + ratio * var
4185
4186      (also holds in the case ratio == -1, TODO.  */
4187
4188   if (cst_and_fits_in_hwi (cbase))
4189     {
4190       offset = - ratio * int_cst_value (cbase);
4191       cost = difference_cost (data,
4192                               ubase, build_int_cst (utype, 0),
4193                               &symbol_present, &var_present, &offset,
4194                               depends_on);
4195       cost.cost /= avg_loop_niter (data->current_loop);
4196     }
4197   else if (ratio == 1)
4198     {
4199       tree real_cbase = cbase;
4200
4201       /* Check to see if any adjustment is needed.  */
4202       if (cstepi == 0 && stmt_is_after_inc)
4203         {
4204           aff_tree real_cbase_aff;
4205           aff_tree cstep_aff;
4206
4207           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4208                                    &real_cbase_aff);
4209           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4210
4211           aff_combination_add (&real_cbase_aff, &cstep_aff);
4212           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4213         }
4214
4215       cost = difference_cost (data,
4216                               ubase, real_cbase,
4217                               &symbol_present, &var_present, &offset,
4218                               depends_on);
4219       cost.cost /= avg_loop_niter (data->current_loop);
4220     }
4221   else if (address_p
4222            && !POINTER_TYPE_P (ctype)
4223            && multiplier_allowed_in_address_p
4224                 (ratio, mem_mode,
4225                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4226     {
4227       cbase
4228         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4229       cost = difference_cost (data,
4230                               ubase, cbase,
4231                               &symbol_present, &var_present, &offset,
4232                               depends_on);
4233       cost.cost /= avg_loop_niter (data->current_loop);
4234     }
4235   else
4236     {
4237       cost = force_var_cost (data, cbase, depends_on);
4238       cost = add_costs (cost,
4239                         difference_cost (data,
4240                                          ubase, build_int_cst (utype, 0),
4241                                          &symbol_present, &var_present,
4242                                          &offset, depends_on));
4243       cost.cost /= avg_loop_niter (data->current_loop);
4244       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4245     }
4246
4247   if (inv_expr_id)
4248     {
4249       *inv_expr_id =
4250           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4251       /* Clear depends on.  */
4252       if (*inv_expr_id != -1 && depends_on && *depends_on)
4253         bitmap_clear (*depends_on);
4254     }
4255
4256   /* If we are after the increment, the value of the candidate is higher by
4257      one iteration.  */
4258   if (stmt_is_after_inc)
4259     offset -= ratio * cstepi;
4260
4261   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4262      (symbol/var1/const parts may be omitted).  If we are looking for an
4263      address, find the cost of addressing this.  */
4264   if (address_p)
4265     return add_costs (cost,
4266                       get_address_cost (symbol_present, var_present,
4267                                         offset, ratio, cstepi,
4268                                         mem_mode,
4269                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4270                                         speed, stmt_is_after_inc,
4271                                         can_autoinc));
4272
4273   /* Otherwise estimate the costs for computing the expression.  */
4274   if (!symbol_present && !var_present && !offset)
4275     {
4276       if (ratio != 1)
4277         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4278       return cost;
4279     }
4280
4281   /* Symbol + offset should be compile-time computable so consider that they
4282       are added once to the variable, if present.  */
4283   if (var_present && (symbol_present || offset))
4284     cost.cost += adjust_setup_cost (data,
4285                                     add_cost (speed, TYPE_MODE (ctype)));
4286
4287   /* Having offset does not affect runtime cost in case it is added to
4288      symbol, but it increases complexity.  */
4289   if (offset)
4290     cost.complexity++;
4291
4292   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4293
4294   aratio = ratio > 0 ? ratio : -ratio;
4295   if (aratio != 1)
4296     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4297   return cost;
4298
4299 fallback:
4300   if (can_autoinc)
4301     *can_autoinc = false;
4302
4303   {
4304     /* Just get the expression, expand it and measure the cost.  */
4305     tree comp = get_computation_at (data->current_loop, use, cand, at);
4306
4307     if (!comp)
4308       return infinite_cost;
4309
4310     if (address_p)
4311       comp = build_simple_mem_ref (comp);
4312
4313     return new_cost (computation_cost (comp, speed), 0);
4314   }
4315 }
4316
4317 /* Determines the cost of the computation by that USE is expressed
4318    from induction variable CAND.  If ADDRESS_P is true, we just need
4319    to create an address from it, otherwise we want to get it into
4320    register.  A set of invariants we depend on is stored in
4321    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4322    autoinc addressing is likely.  */
4323
4324 static comp_cost
4325 get_computation_cost (struct ivopts_data *data,
4326                       struct iv_use *use, struct iv_cand *cand,
4327                       bool address_p, bitmap *depends_on,
4328                       bool *can_autoinc, int *inv_expr_id)
4329 {
4330   return get_computation_cost_at (data,
4331                                   use, cand, address_p, depends_on, use->stmt,
4332                                   can_autoinc, inv_expr_id);
4333 }
4334
4335 /* Determines cost of basing replacement of USE on CAND in a generic
4336    expression.  */
4337
4338 static bool
4339 determine_use_iv_cost_generic (struct ivopts_data *data,
4340                                struct iv_use *use, struct iv_cand *cand)
4341 {
4342   bitmap depends_on;
4343   comp_cost cost;
4344   int inv_expr_id = -1;
4345
4346   /* The simple case first -- if we need to express value of the preserved
4347      original biv, the cost is 0.  This also prevents us from counting the
4348      cost of increment twice -- once at this use and once in the cost of
4349      the candidate.  */
4350   if (cand->pos == IP_ORIGINAL
4351       && cand->incremented_at == use->stmt)
4352     {
4353       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4354                        ERROR_MARK, -1);
4355       return true;
4356     }
4357
4358   cost = get_computation_cost (data, use, cand, false, &depends_on,
4359                                NULL, &inv_expr_id);
4360
4361   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4362                    inv_expr_id);
4363
4364   return !infinite_cost_p (cost);
4365 }
4366
4367 /* Determines cost of basing replacement of USE on CAND in an address.  */
4368
4369 static bool
4370 determine_use_iv_cost_address (struct ivopts_data *data,
4371                                struct iv_use *use, struct iv_cand *cand)
4372 {
4373   bitmap depends_on;
4374   bool can_autoinc;
4375   int inv_expr_id = -1;
4376   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4377                                          &can_autoinc, &inv_expr_id);
4378
4379   if (cand->ainc_use == use)
4380     {
4381       if (can_autoinc)
4382         cost.cost -= cand->cost_step;
4383       /* If we generated the candidate solely for exploiting autoincrement
4384          opportunities, and it turns out it can't be used, set the cost to
4385          infinity to make sure we ignore it.  */
4386       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4387         cost = infinite_cost;
4388     }
4389   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4390                    inv_expr_id);
4391
4392   return !infinite_cost_p (cost);
4393 }
4394
4395 /* Computes value of candidate CAND at position AT in iteration NITER, and
4396    stores it to VAL.  */
4397
4398 static void
4399 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4400                aff_tree *val)
4401 {
4402   aff_tree step, delta, nit;
4403   struct iv *iv = cand->iv;
4404   tree type = TREE_TYPE (iv->base);
4405   tree steptype = type;
4406   if (POINTER_TYPE_P (type))
4407     steptype = sizetype;
4408   steptype = unsigned_type_for (type);
4409
4410   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4411   aff_combination_convert (&step, steptype);
4412   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4413   aff_combination_convert (&nit, steptype);
4414   aff_combination_mult (&nit, &step, &delta);
4415   if (stmt_after_increment (loop, cand, at))
4416     aff_combination_add (&delta, &step);
4417
4418   tree_to_aff_combination (iv->base, type, val);
4419   if (!POINTER_TYPE_P (type))
4420     aff_combination_convert (val, steptype);
4421   aff_combination_add (val, &delta);
4422 }
4423
4424 /* Returns period of induction variable iv.  */
4425
4426 static tree
4427 iv_period (struct iv *iv)
4428 {
4429   tree step = iv->step, period, type;
4430   tree pow2div;
4431
4432   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4433
4434   type = unsigned_type_for (TREE_TYPE (step));
4435   /* Period of the iv is lcm (step, type_range)/step -1,
4436      i.e., N*type_range/step - 1. Since type range is power
4437      of two, N == (step >> num_of_ending_zeros_binary (step),
4438      so the final result is
4439
4440        (type_range >> num_of_ending_zeros_binary (step)) - 1
4441
4442   */
4443   pow2div = num_ending_zeros (step);
4444
4445   period = build_low_bits_mask (type,
4446                                 (TYPE_PRECISION (type)
4447                                  - tree_to_uhwi (pow2div)));
4448
4449   return period;
4450 }
4451
4452 /* Returns the comparison operator used when eliminating the iv USE.  */
4453
4454 static enum tree_code
4455 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4456 {
4457   struct loop *loop = data->current_loop;
4458   basic_block ex_bb;
4459   edge exit;
4460
4461   ex_bb = gimple_bb (use->stmt);
4462   exit = EDGE_SUCC (ex_bb, 0);
4463   if (flow_bb_inside_loop_p (loop, exit->dest))
4464     exit = EDGE_SUCC (ex_bb, 1);
4465
4466   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4467 }
4468
4469 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4470    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4471    calculation is performed in non-wrapping type.
4472
4473    TODO: More generally, we could test for the situation that
4474          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4475          This would require knowing the sign of OFFSET.  */
4476
4477 static bool
4478 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4479 {
4480   enum tree_code code;
4481   tree e1, e2;
4482   aff_tree aff_e1, aff_e2, aff_offset;
4483
4484   if (!nowrap_type_p (TREE_TYPE (base)))
4485     return false;
4486
4487   base = expand_simple_operations (base);
4488
4489   if (TREE_CODE (base) == SSA_NAME)
4490     {
4491       gimple stmt = SSA_NAME_DEF_STMT (base);
4492
4493       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4494         return false;
4495
4496       code = gimple_assign_rhs_code (stmt);
4497       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4498         return false;
4499
4500       e1 = gimple_assign_rhs1 (stmt);
4501       e2 = gimple_assign_rhs2 (stmt);
4502     }
4503   else
4504     {
4505       code = TREE_CODE (base);
4506       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4507         return false;
4508       e1 = TREE_OPERAND (base, 0);
4509       e2 = TREE_OPERAND (base, 1);
4510     }
4511
4512   /* Use affine expansion as deeper inspection to prove the equality.  */
4513   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4514                                   &aff_e2, &data->name_expansion_cache);
4515   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4516                                   &aff_offset, &data->name_expansion_cache);
4517   aff_combination_scale (&aff_offset, -1);
4518   switch (code)
4519     {
4520     case PLUS_EXPR:
4521       aff_combination_add (&aff_e2, &aff_offset);
4522       if (aff_combination_zero_p (&aff_e2))
4523         return true;
4524
4525       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4526                                       &aff_e1, &data->name_expansion_cache);
4527       aff_combination_add (&aff_e1, &aff_offset);
4528       return aff_combination_zero_p (&aff_e1);
4529
4530     case POINTER_PLUS_EXPR:
4531       aff_combination_add (&aff_e2, &aff_offset);
4532       return aff_combination_zero_p (&aff_e2);
4533
4534     default:
4535       return false;
4536     }
4537 }
4538
4539 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4540    comparison with CAND.  NITER describes the number of iterations of
4541    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4542
4543    We aim to handle the following situation:
4544
4545    sometype *base, *p;
4546    int a, b, i;
4547
4548    i = a;
4549    p = p_0 = base + a;
4550
4551    do
4552      {
4553        bla (*p);
4554        p++;
4555        i++;
4556      }
4557    while (i < b);
4558
4559    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4560    We aim to optimize this to
4561
4562    p = p_0 = base + a;
4563    do
4564      {
4565        bla (*p);
4566        p++;
4567      }
4568    while (p < p_0 - a + b);
4569
4570    This preserves the correctness, since the pointer arithmetics does not
4571    overflow.  More precisely:
4572
4573    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4574       overflow in computing it or the values of p.
4575    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4576       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4577
4578 static bool
4579 iv_elimination_compare_lt (struct ivopts_data *data,
4580                            struct iv_cand *cand, enum tree_code *comp_p,
4581                            struct tree_niter_desc *niter)
4582 {
4583   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4584   struct aff_tree nit, tmpa, tmpb;
4585   enum tree_code comp;
4586   HOST_WIDE_INT step;
4587
4588   /* We need to know that the candidate induction variable does not overflow.
4589      While more complex analysis may be used to prove this, for now just
4590      check that the variable appears in the original program and that it
4591      is computed in a type that guarantees no overflows.  */
4592   cand_type = TREE_TYPE (cand->iv->base);
4593   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4594     return false;
4595
4596   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4597      the calculation of the BOUND could overflow, making the comparison
4598      invalid.  */
4599   if (!data->loop_single_exit_p)
4600     return false;
4601
4602   /* We need to be able to decide whether candidate is increasing or decreasing
4603      in order to choose the right comparison operator.  */
4604   if (!cst_and_fits_in_hwi (cand->iv->step))
4605     return false;
4606   step = int_cst_value (cand->iv->step);
4607
4608   /* Check that the number of iterations matches the expected pattern:
4609      a + 1 > b ? 0 : b - a - 1.  */
4610   mbz = niter->may_be_zero;
4611   if (TREE_CODE (mbz) == GT_EXPR)
4612     {
4613       /* Handle a + 1 > b.  */
4614       tree op0 = TREE_OPERAND (mbz, 0);
4615       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4616         {
4617           a = TREE_OPERAND (op0, 0);
4618           b = TREE_OPERAND (mbz, 1);
4619         }
4620       else
4621         return false;
4622     }
4623   else if (TREE_CODE (mbz) == LT_EXPR)
4624     {
4625       tree op1 = TREE_OPERAND (mbz, 1);
4626
4627       /* Handle b < a + 1.  */
4628       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4629         {
4630           a = TREE_OPERAND (op1, 0);
4631           b = TREE_OPERAND (mbz, 0);
4632         }
4633       else
4634         return false;
4635     }
4636   else
4637     return false;
4638
4639   /* Expected number of iterations is B - A - 1.  Check that it matches
4640      the actual number, i.e., that B - A - NITER = 1.  */
4641   tree_to_aff_combination (niter->niter, nit_type, &nit);
4642   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4643   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4644   aff_combination_scale (&nit, -1);
4645   aff_combination_scale (&tmpa, -1);
4646   aff_combination_add (&tmpb, &tmpa);
4647   aff_combination_add (&tmpb, &nit);
4648   if (tmpb.n != 0 || tmpb.offset != 1)
4649     return false;
4650
4651   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4652      overflow.  */
4653   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4654                         cand->iv->step,
4655                         fold_convert (TREE_TYPE (cand->iv->step), a));
4656   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4657     return false;
4658
4659   /* Determine the new comparison operator.  */
4660   comp = step < 0 ? GT_EXPR : LT_EXPR;
4661   if (*comp_p == NE_EXPR)
4662     *comp_p = comp;
4663   else if (*comp_p == EQ_EXPR)
4664     *comp_p = invert_tree_comparison (comp, false);
4665   else
4666     gcc_unreachable ();
4667
4668   return true;
4669 }
4670
4671 /* Check whether it is possible to express the condition in USE by comparison
4672    of candidate CAND.  If so, store the value compared with to BOUND, and the
4673    comparison operator to COMP.  */
4674
4675 static bool
4676 may_eliminate_iv (struct ivopts_data *data,
4677                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4678                   enum tree_code *comp)
4679 {
4680   basic_block ex_bb;
4681   edge exit;
4682   tree period;
4683   struct loop *loop = data->current_loop;
4684   aff_tree bnd;
4685   struct tree_niter_desc *desc = NULL;
4686
4687   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4688     return false;
4689
4690   /* For now works only for exits that dominate the loop latch.
4691      TODO: extend to other conditions inside loop body.  */
4692   ex_bb = gimple_bb (use->stmt);
4693   if (use->stmt != last_stmt (ex_bb)
4694       || gimple_code (use->stmt) != GIMPLE_COND
4695       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4696     return false;
4697
4698   exit = EDGE_SUCC (ex_bb, 0);
4699   if (flow_bb_inside_loop_p (loop, exit->dest))
4700     exit = EDGE_SUCC (ex_bb, 1);
4701   if (flow_bb_inside_loop_p (loop, exit->dest))
4702     return false;
4703
4704   desc = niter_for_exit (data, exit);
4705   if (!desc)
4706     return false;
4707
4708   /* Determine whether we can use the variable to test the exit condition.
4709      This is the case iff the period of the induction variable is greater
4710      than the number of iterations for which the exit condition is true.  */
4711   period = iv_period (cand->iv);
4712
4713   /* If the number of iterations is constant, compare against it directly.  */
4714   if (TREE_CODE (desc->niter) == INTEGER_CST)
4715     {
4716       /* See cand_value_at.  */
4717       if (stmt_after_increment (loop, cand, use->stmt))
4718         {
4719           if (!tree_int_cst_lt (desc->niter, period))
4720             return false;
4721         }
4722       else
4723         {
4724           if (tree_int_cst_lt (period, desc->niter))
4725             return false;
4726         }
4727     }
4728
4729   /* If not, and if this is the only possible exit of the loop, see whether
4730      we can get a conservative estimate on the number of iterations of the
4731      entire loop and compare against that instead.  */
4732   else
4733     {
4734       widest_int period_value, max_niter;
4735
4736       max_niter = desc->max;
4737       if (stmt_after_increment (loop, cand, use->stmt))
4738         max_niter += 1;
4739       period_value = wi::to_widest (period);
4740       if (wi::gtu_p (max_niter, period_value))
4741         {
4742           /* See if we can take advantage of inferred loop bound information.  */
4743           if (data->loop_single_exit_p)
4744             {
4745               if (!max_loop_iterations (loop, &max_niter))
4746                 return false;
4747               /* The loop bound is already adjusted by adding 1.  */
4748               if (wi::gtu_p (max_niter, period_value))
4749                 return false;
4750             }
4751           else
4752             return false;
4753         }
4754     }
4755
4756   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4757
4758   *bound = fold_convert (TREE_TYPE (cand->iv->base),
4759                          aff_combination_to_tree (&bnd));
4760   *comp = iv_elimination_compare (data, use);
4761
4762   /* It is unlikely that computing the number of iterations using division
4763      would be more profitable than keeping the original induction variable.  */
4764   if (expression_expensive_p (*bound))
4765     return false;
4766
4767   /* Sometimes, it is possible to handle the situation that the number of
4768      iterations may be zero unless additional assumtions by using <
4769      instead of != in the exit condition.
4770
4771      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4772            base the exit condition on it.  However, that is often too
4773            expensive.  */
4774   if (!integer_zerop (desc->may_be_zero))
4775     return iv_elimination_compare_lt (data, cand, comp, desc);
4776
4777   return true;
4778 }
4779
4780  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4781     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4782
4783 static int
4784 parm_decl_cost (struct ivopts_data *data, tree bound)
4785 {
4786   tree sbound = bound;
4787   STRIP_NOPS (sbound);
4788
4789   if (TREE_CODE (sbound) == SSA_NAME
4790       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4791       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4792       && data->body_includes_call)
4793     return COSTS_N_INSNS (1);
4794
4795   return 0;
4796 }
4797
4798 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4799
4800 static bool
4801 determine_use_iv_cost_condition (struct ivopts_data *data,
4802                                  struct iv_use *use, struct iv_cand *cand)
4803 {
4804   tree bound = NULL_TREE;
4805   struct iv *cmp_iv;
4806   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4807   comp_cost elim_cost, express_cost, cost, bound_cost;
4808   bool ok;
4809   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4810   tree *control_var, *bound_cst;
4811   enum tree_code comp = ERROR_MARK;
4812
4813   /* Only consider real candidates.  */
4814   if (!cand->iv)
4815     {
4816       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4817                        ERROR_MARK, -1);
4818       return false;
4819     }
4820
4821   /* Try iv elimination.  */
4822   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4823     {
4824       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4825       if (elim_cost.cost == 0)
4826         elim_cost.cost = parm_decl_cost (data, bound);
4827       else if (TREE_CODE (bound) == INTEGER_CST)
4828         elim_cost.cost = 0;
4829       /* If we replace a loop condition 'i < n' with 'p < base + n',
4830          depends_on_elim will have 'base' and 'n' set, which implies
4831          that both 'base' and 'n' will be live during the loop.  More likely,
4832          'base + n' will be loop invariant, resulting in only one live value
4833          during the loop.  So in that case we clear depends_on_elim and set
4834         elim_inv_expr_id instead.  */
4835       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4836         {
4837           elim_inv_expr_id = get_expr_id (data, bound);
4838           bitmap_clear (depends_on_elim);
4839         }
4840       /* The bound is a loop invariant, so it will be only computed
4841          once.  */
4842       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4843     }
4844   else
4845     elim_cost = infinite_cost;
4846
4847   /* Try expressing the original giv.  If it is compared with an invariant,
4848      note that we cannot get rid of it.  */
4849   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4850                               NULL, &cmp_iv);
4851   gcc_assert (ok);
4852
4853   /* When the condition is a comparison of the candidate IV against
4854      zero, prefer this IV.
4855
4856      TODO: The constant that we're subtracting from the cost should
4857      be target-dependent.  This information should be added to the
4858      target costs for each backend.  */
4859   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4860       && integer_zerop (*bound_cst)
4861       && (operand_equal_p (*control_var, cand->var_after, 0)
4862           || operand_equal_p (*control_var, cand->var_before, 0)))
4863     elim_cost.cost -= 1;
4864
4865   express_cost = get_computation_cost (data, use, cand, false,
4866                                        &depends_on_express, NULL,
4867                                        &express_inv_expr_id);
4868   fd_ivopts_data = data;
4869   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4870
4871   /* Count the cost of the original bound as well.  */
4872   bound_cost = force_var_cost (data, *bound_cst, NULL);
4873   if (bound_cost.cost == 0)
4874     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4875   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4876     bound_cost.cost = 0;
4877   express_cost.cost += bound_cost.cost;
4878
4879   /* Choose the better approach, preferring the eliminated IV. */
4880   if (compare_costs (elim_cost, express_cost) <= 0)
4881     {
4882       cost = elim_cost;
4883       depends_on = depends_on_elim;
4884       depends_on_elim = NULL;
4885       inv_expr_id = elim_inv_expr_id;
4886     }
4887   else
4888     {
4889       cost = express_cost;
4890       depends_on = depends_on_express;
4891       depends_on_express = NULL;
4892       bound = NULL_TREE;
4893       comp = ERROR_MARK;
4894       inv_expr_id = express_inv_expr_id;
4895     }
4896
4897   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4898
4899   if (depends_on_elim)
4900     BITMAP_FREE (depends_on_elim);
4901   if (depends_on_express)
4902     BITMAP_FREE (depends_on_express);
4903
4904   return !infinite_cost_p (cost);
4905 }
4906
4907 /* Determines cost of basing replacement of USE on CAND.  Returns false
4908    if USE cannot be based on CAND.  */
4909
4910 static bool
4911 determine_use_iv_cost (struct ivopts_data *data,
4912                        struct iv_use *use, struct iv_cand *cand)
4913 {
4914   switch (use->type)
4915     {
4916     case USE_NONLINEAR_EXPR:
4917       return determine_use_iv_cost_generic (data, use, cand);
4918
4919     case USE_ADDRESS:
4920       return determine_use_iv_cost_address (data, use, cand);
4921
4922     case USE_COMPARE:
4923       return determine_use_iv_cost_condition (data, use, cand);
4924
4925     default:
4926       gcc_unreachable ();
4927     }
4928 }
4929
4930 /* Return true if get_computation_cost indicates that autoincrement is
4931    a possibility for the pair of USE and CAND, false otherwise.  */
4932
4933 static bool
4934 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4935                            struct iv_cand *cand)
4936 {
4937   bitmap depends_on;
4938   bool can_autoinc;
4939   comp_cost cost;
4940
4941   if (use->type != USE_ADDRESS)
4942     return false;
4943
4944   cost = get_computation_cost (data, use, cand, true, &depends_on,
4945                                &can_autoinc, NULL);
4946
4947   BITMAP_FREE (depends_on);
4948
4949   return !infinite_cost_p (cost) && can_autoinc;
4950 }
4951
4952 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4953    use that allows autoincrement, and set their AINC_USE if possible.  */
4954
4955 static void
4956 set_autoinc_for_original_candidates (struct ivopts_data *data)
4957 {
4958   unsigned i, j;
4959
4960   for (i = 0; i < n_iv_cands (data); i++)
4961     {
4962       struct iv_cand *cand = iv_cand (data, i);
4963       struct iv_use *closest_before = NULL;
4964       struct iv_use *closest_after = NULL;
4965       if (cand->pos != IP_ORIGINAL)
4966         continue;
4967
4968       for (j = 0; j < n_iv_uses (data); j++)
4969         {
4970           struct iv_use *use = iv_use (data, j);
4971           unsigned uid = gimple_uid (use->stmt);
4972
4973           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4974             continue;
4975
4976           if (uid < gimple_uid (cand->incremented_at)
4977               && (closest_before == NULL
4978                   || uid > gimple_uid (closest_before->stmt)))
4979             closest_before = use;
4980
4981           if (uid > gimple_uid (cand->incremented_at)
4982               && (closest_after == NULL
4983                   || uid < gimple_uid (closest_after->stmt)))
4984             closest_after = use;
4985         }
4986
4987       if (closest_before != NULL
4988           && autoinc_possible_for_pair (data, closest_before, cand))
4989         cand->ainc_use = closest_before;
4990       else if (closest_after != NULL
4991                && autoinc_possible_for_pair (data, closest_after, cand))
4992         cand->ainc_use = closest_after;
4993     }
4994 }
4995
4996 /* Finds the candidates for the induction variables.  */
4997
4998 static void
4999 find_iv_candidates (struct ivopts_data *data)
5000 {
5001   /* Add commonly used ivs.  */
5002   add_standard_iv_candidates (data);
5003
5004   /* Add old induction variables.  */
5005   add_old_ivs_candidates (data);
5006
5007   /* Add induction variables derived from uses.  */
5008   add_derived_ivs_candidates (data);
5009
5010   set_autoinc_for_original_candidates (data);
5011
5012   /* Record the important candidates.  */
5013   record_important_candidates (data);
5014 }
5015
5016 /* Determines costs of basing the use of the iv on an iv candidate.  */
5017
5018 static void
5019 determine_use_iv_costs (struct ivopts_data *data)
5020 {
5021   unsigned i, j;
5022   struct iv_use *use;
5023   struct iv_cand *cand;
5024   bitmap to_clear = BITMAP_ALLOC (NULL);
5025
5026   alloc_use_cost_map (data);
5027
5028   for (i = 0; i < n_iv_uses (data); i++)
5029     {
5030       use = iv_use (data, i);
5031
5032       if (data->consider_all_candidates)
5033         {
5034           for (j = 0; j < n_iv_cands (data); j++)
5035             {
5036               cand = iv_cand (data, j);
5037               determine_use_iv_cost (data, use, cand);
5038             }
5039         }
5040       else
5041         {
5042           bitmap_iterator bi;
5043
5044           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5045             {
5046               cand = iv_cand (data, j);
5047               if (!determine_use_iv_cost (data, use, cand))
5048                 bitmap_set_bit (to_clear, j);
5049             }
5050
5051           /* Remove the candidates for that the cost is infinite from
5052              the list of related candidates.  */
5053           bitmap_and_compl_into (use->related_cands, to_clear);
5054           bitmap_clear (to_clear);
5055         }
5056     }
5057
5058   BITMAP_FREE (to_clear);
5059
5060   if (dump_file && (dump_flags & TDF_DETAILS))
5061     {
5062       fprintf (dump_file, "Use-candidate costs:\n");
5063
5064       for (i = 0; i < n_iv_uses (data); i++)
5065         {
5066           use = iv_use (data, i);
5067
5068           fprintf (dump_file, "Use %d:\n", i);
5069           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5070           for (j = 0; j < use->n_map_members; j++)
5071             {
5072               if (!use->cost_map[j].cand
5073                   || infinite_cost_p (use->cost_map[j].cost))
5074                 continue;
5075
5076               fprintf (dump_file, "  %d\t%d\t%d\t",
5077                        use->cost_map[j].cand->id,
5078                        use->cost_map[j].cost.cost,
5079                        use->cost_map[j].cost.complexity);
5080               if (use->cost_map[j].depends_on)
5081                 bitmap_print (dump_file,
5082                               use->cost_map[j].depends_on, "","");
5083               if (use->cost_map[j].inv_expr_id != -1)
5084                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5085               fprintf (dump_file, "\n");
5086             }
5087
5088           fprintf (dump_file, "\n");
5089         }
5090       fprintf (dump_file, "\n");
5091     }
5092 }
5093
5094 /* Determines cost of the candidate CAND.  */
5095
5096 static void
5097 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5098 {
5099   comp_cost cost_base;
5100   unsigned cost, cost_step;
5101   tree base;
5102
5103   if (!cand->iv)
5104     {
5105       cand->cost = 0;
5106       return;
5107     }
5108
5109   /* There are two costs associated with the candidate -- its increment
5110      and its initialization.  The second is almost negligible for any loop
5111      that rolls enough, so we take it just very little into account.  */
5112
5113   base = cand->iv->base;
5114   cost_base = force_var_cost (data, base, NULL);
5115   /* It will be exceptional that the iv register happens to be initialized with
5116      the proper value at no cost.  In general, there will at least be a regcopy
5117      or a const set.  */
5118   if (cost_base.cost == 0)
5119     cost_base.cost = COSTS_N_INSNS (1);
5120   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5121
5122   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5123
5124   /* Prefer the original ivs unless we may gain something by replacing it.
5125      The reason is to make debugging simpler; so this is not relevant for
5126      artificial ivs created by other optimization passes.  */
5127   if (cand->pos != IP_ORIGINAL
5128       || !SSA_NAME_VAR (cand->var_before)
5129       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5130     cost++;
5131
5132   /* Prefer not to insert statements into latch unless there are some
5133      already (so that we do not create unnecessary jumps).  */
5134   if (cand->pos == IP_END
5135       && empty_block_p (ip_end_pos (data->current_loop)))
5136     cost++;
5137
5138   cand->cost = cost;
5139   cand->cost_step = cost_step;
5140 }
5141
5142 /* Determines costs of computation of the candidates.  */
5143
5144 static void
5145 determine_iv_costs (struct ivopts_data *data)
5146 {
5147   unsigned i;
5148
5149   if (dump_file && (dump_flags & TDF_DETAILS))
5150     {
5151       fprintf (dump_file, "Candidate costs:\n");
5152       fprintf (dump_file, "  cand\tcost\n");
5153     }
5154
5155   for (i = 0; i < n_iv_cands (data); i++)
5156     {
5157       struct iv_cand *cand = iv_cand (data, i);
5158
5159       determine_iv_cost (data, cand);
5160
5161       if (dump_file && (dump_flags & TDF_DETAILS))
5162         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5163     }
5164
5165   if (dump_file && (dump_flags & TDF_DETAILS))
5166     fprintf (dump_file, "\n");
5167 }
5168
5169 /* Calculates cost for having SIZE induction variables.  */
5170
5171 static unsigned
5172 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5173 {
5174   /* We add size to the cost, so that we prefer eliminating ivs
5175      if possible.  */
5176   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5177                                             data->body_includes_call);
5178 }
5179
5180 /* For each size of the induction variable set determine the penalty.  */
5181
5182 static void
5183 determine_set_costs (struct ivopts_data *data)
5184 {
5185   unsigned j, n;
5186   gimple phi;
5187   gimple_stmt_iterator psi;
5188   tree op;
5189   struct loop *loop = data->current_loop;
5190   bitmap_iterator bi;
5191
5192   if (dump_file && (dump_flags & TDF_DETAILS))
5193     {
5194       fprintf (dump_file, "Global costs:\n");
5195       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5196       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5197       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5198       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5199     }
5200
5201   n = 0;
5202   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5203     {
5204       phi = gsi_stmt (psi);
5205       op = PHI_RESULT (phi);
5206
5207       if (virtual_operand_p (op))
5208         continue;
5209
5210       if (get_iv (data, op))
5211         continue;
5212
5213       n++;
5214     }
5215
5216   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5217     {
5218       struct version_info *info = ver_info (data, j);
5219
5220       if (info->inv_id && info->has_nonlin_use)
5221         n++;
5222     }
5223
5224   data->regs_used = n;
5225   if (dump_file && (dump_flags & TDF_DETAILS))
5226     fprintf (dump_file, "  regs_used %d\n", n);
5227
5228   if (dump_file && (dump_flags & TDF_DETAILS))
5229     {
5230       fprintf (dump_file, "  cost for size:\n");
5231       fprintf (dump_file, "  ivs\tcost\n");
5232       for (j = 0; j <= 2 * target_avail_regs; j++)
5233         fprintf (dump_file, "  %d\t%d\n", j,
5234                  ivopts_global_cost_for_size (data, j));
5235       fprintf (dump_file, "\n");
5236     }
5237 }
5238
5239 /* Returns true if A is a cheaper cost pair than B.  */
5240
5241 static bool
5242 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5243 {
5244   int cmp;
5245
5246   if (!a)
5247     return false;
5248
5249   if (!b)
5250     return true;
5251
5252   cmp = compare_costs (a->cost, b->cost);
5253   if (cmp < 0)
5254     return true;
5255
5256   if (cmp > 0)
5257     return false;
5258
5259   /* In case the costs are the same, prefer the cheaper candidate.  */
5260   if (a->cand->cost < b->cand->cost)
5261     return true;
5262
5263   return false;
5264 }
5265
5266
5267 /* Returns candidate by that USE is expressed in IVS.  */
5268
5269 static struct cost_pair *
5270 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5271 {
5272   return ivs->cand_for_use[use->id];
5273 }
5274
5275 /* Computes the cost field of IVS structure.  */
5276
5277 static void
5278 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5279 {
5280   comp_cost cost = ivs->cand_use_cost;
5281
5282   cost.cost += ivs->cand_cost;
5283
5284   cost.cost += ivopts_global_cost_for_size (data,
5285                                             ivs->n_regs + ivs->num_used_inv_expr);
5286
5287   ivs->cost = cost;
5288 }
5289
5290 /* Remove invariants in set INVS to set IVS.  */
5291
5292 static void
5293 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5294 {
5295   bitmap_iterator bi;
5296   unsigned iid;
5297
5298   if (!invs)
5299     return;
5300
5301   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5302     {
5303       ivs->n_invariant_uses[iid]--;
5304       if (ivs->n_invariant_uses[iid] == 0)
5305         ivs->n_regs--;
5306     }
5307 }
5308
5309 /* Set USE not to be expressed by any candidate in IVS.  */
5310
5311 static void
5312 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5313                  struct iv_use *use)
5314 {
5315   unsigned uid = use->id, cid;
5316   struct cost_pair *cp;
5317
5318   cp = ivs->cand_for_use[uid];
5319   if (!cp)
5320     return;
5321   cid = cp->cand->id;
5322
5323   ivs->bad_uses++;
5324   ivs->cand_for_use[uid] = NULL;
5325   ivs->n_cand_uses[cid]--;
5326
5327   if (ivs->n_cand_uses[cid] == 0)
5328     {
5329       bitmap_clear_bit (ivs->cands, cid);
5330       /* Do not count the pseudocandidates.  */
5331       if (cp->cand->iv)
5332         ivs->n_regs--;
5333       ivs->n_cands--;
5334       ivs->cand_cost -= cp->cand->cost;
5335
5336       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5337     }
5338
5339   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5340
5341   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5342
5343   if (cp->inv_expr_id != -1)
5344     {
5345       ivs->used_inv_expr[cp->inv_expr_id]--;
5346       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5347         ivs->num_used_inv_expr--;
5348     }
5349   iv_ca_recount_cost (data, ivs);
5350 }
5351
5352 /* Add invariants in set INVS to set IVS.  */
5353
5354 static void
5355 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5356 {
5357   bitmap_iterator bi;
5358   unsigned iid;
5359
5360   if (!invs)
5361     return;
5362
5363   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5364     {
5365       ivs->n_invariant_uses[iid]++;
5366       if (ivs->n_invariant_uses[iid] == 1)
5367         ivs->n_regs++;
5368     }
5369 }
5370
5371 /* Set cost pair for USE in set IVS to CP.  */
5372
5373 static void
5374 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5375               struct iv_use *use, struct cost_pair *cp)
5376 {
5377   unsigned uid = use->id, cid;
5378
5379   if (ivs->cand_for_use[uid] == cp)
5380     return;
5381
5382   if (ivs->cand_for_use[uid])
5383     iv_ca_set_no_cp (data, ivs, use);
5384
5385   if (cp)
5386     {
5387       cid = cp->cand->id;
5388
5389       ivs->bad_uses--;
5390       ivs->cand_for_use[uid] = cp;
5391       ivs->n_cand_uses[cid]++;
5392       if (ivs->n_cand_uses[cid] == 1)
5393         {
5394           bitmap_set_bit (ivs->cands, cid);
5395           /* Do not count the pseudocandidates.  */
5396           if (cp->cand->iv)
5397             ivs->n_regs++;
5398           ivs->n_cands++;
5399           ivs->cand_cost += cp->cand->cost;
5400
5401           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5402         }
5403
5404       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5405       iv_ca_set_add_invariants (ivs, cp->depends_on);
5406
5407       if (cp->inv_expr_id != -1)
5408         {
5409           ivs->used_inv_expr[cp->inv_expr_id]++;
5410           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5411             ivs->num_used_inv_expr++;
5412         }
5413       iv_ca_recount_cost (data, ivs);
5414     }
5415 }
5416
5417 /* Extend set IVS by expressing USE by some of the candidates in it
5418    if possible.  Consider all important candidates if candidates in
5419    set IVS don't give any result.  */
5420
5421 static void
5422 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5423                struct iv_use *use)
5424 {
5425   struct cost_pair *best_cp = NULL, *cp;
5426   bitmap_iterator bi;
5427   unsigned i;
5428   struct iv_cand *cand;
5429
5430   gcc_assert (ivs->upto >= use->id);
5431   ivs->upto++;
5432   ivs->bad_uses++;
5433
5434   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5435     {
5436       cand = iv_cand (data, i);
5437       cp = get_use_iv_cost (data, use, cand);
5438       if (cheaper_cost_pair (cp, best_cp))
5439         best_cp = cp;
5440     }
5441
5442   if (best_cp == NULL)
5443     {
5444       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5445         {
5446           cand = iv_cand (data, i);
5447           cp = get_use_iv_cost (data, use, cand);
5448           if (cheaper_cost_pair (cp, best_cp))
5449             best_cp = cp;
5450         }
5451     }
5452
5453   iv_ca_set_cp (data, ivs, use, best_cp);
5454 }
5455
5456 /* Get cost for assignment IVS.  */
5457
5458 static comp_cost
5459 iv_ca_cost (struct iv_ca *ivs)
5460 {
5461   /* This was a conditional expression but it triggered a bug in
5462      Sun C 5.5.  */
5463   if (ivs->bad_uses)
5464     return infinite_cost;
5465   else
5466     return ivs->cost;
5467 }
5468
5469 /* Returns true if all dependences of CP are among invariants in IVS.  */
5470
5471 static bool
5472 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5473 {
5474   unsigned i;
5475   bitmap_iterator bi;
5476
5477   if (!cp->depends_on)
5478     return true;
5479
5480   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5481     {
5482       if (ivs->n_invariant_uses[i] == 0)
5483         return false;
5484     }
5485
5486   return true;
5487 }
5488
5489 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5490    it before NEXT_CHANGE.  */
5491
5492 static struct iv_ca_delta *
5493 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5494                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5495 {
5496   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5497
5498   change->use = use;
5499   change->old_cp = old_cp;
5500   change->new_cp = new_cp;
5501   change->next_change = next_change;
5502
5503   return change;
5504 }
5505
5506 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5507    are rewritten.  */
5508
5509 static struct iv_ca_delta *
5510 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5511 {
5512   struct iv_ca_delta *last;
5513
5514   if (!l2)
5515     return l1;
5516
5517   if (!l1)
5518     return l2;
5519
5520   for (last = l1; last->next_change; last = last->next_change)
5521     continue;
5522   last->next_change = l2;
5523
5524   return l1;
5525 }
5526
5527 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5528
5529 static struct iv_ca_delta *
5530 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5531 {
5532   struct iv_ca_delta *act, *next, *prev = NULL;
5533   struct cost_pair *tmp;
5534
5535   for (act = delta; act; act = next)
5536     {
5537       next = act->next_change;
5538       act->next_change = prev;
5539       prev = act;
5540
5541       tmp = act->old_cp;
5542       act->old_cp = act->new_cp;
5543       act->new_cp = tmp;
5544     }
5545
5546   return prev;
5547 }
5548
5549 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5550    reverted instead.  */
5551
5552 static void
5553 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5554                     struct iv_ca_delta *delta, bool forward)
5555 {
5556   struct cost_pair *from, *to;
5557   struct iv_ca_delta *act;
5558
5559   if (!forward)
5560     delta = iv_ca_delta_reverse (delta);
5561
5562   for (act = delta; act; act = act->next_change)
5563     {
5564       from = act->old_cp;
5565       to = act->new_cp;
5566       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5567       iv_ca_set_cp (data, ivs, act->use, to);
5568     }
5569
5570   if (!forward)
5571     iv_ca_delta_reverse (delta);
5572 }
5573
5574 /* Returns true if CAND is used in IVS.  */
5575
5576 static bool
5577 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5578 {
5579   return ivs->n_cand_uses[cand->id] > 0;
5580 }
5581
5582 /* Returns number of induction variable candidates in the set IVS.  */
5583
5584 static unsigned
5585 iv_ca_n_cands (struct iv_ca *ivs)
5586 {
5587   return ivs->n_cands;
5588 }
5589
5590 /* Free the list of changes DELTA.  */
5591
5592 static void
5593 iv_ca_delta_free (struct iv_ca_delta **delta)
5594 {
5595   struct iv_ca_delta *act, *next;
5596
5597   for (act = *delta; act; act = next)
5598     {
5599       next = act->next_change;
5600       free (act);
5601     }
5602
5603   *delta = NULL;
5604 }
5605
5606 /* Allocates new iv candidates assignment.  */
5607
5608 static struct iv_ca *
5609 iv_ca_new (struct ivopts_data *data)
5610 {
5611   struct iv_ca *nw = XNEW (struct iv_ca);
5612
5613   nw->upto = 0;
5614   nw->bad_uses = 0;
5615   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5616   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5617   nw->cands = BITMAP_ALLOC (NULL);
5618   nw->n_cands = 0;
5619   nw->n_regs = 0;
5620   nw->cand_use_cost = no_cost;
5621   nw->cand_cost = 0;
5622   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5623   nw->cost = no_cost;
5624   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5625   nw->num_used_inv_expr = 0;
5626
5627   return nw;
5628 }
5629
5630 /* Free memory occupied by the set IVS.  */
5631
5632 static void
5633 iv_ca_free (struct iv_ca **ivs)
5634 {
5635   free ((*ivs)->cand_for_use);
5636   free ((*ivs)->n_cand_uses);
5637   BITMAP_FREE ((*ivs)->cands);
5638   free ((*ivs)->n_invariant_uses);
5639   free ((*ivs)->used_inv_expr);
5640   free (*ivs);
5641   *ivs = NULL;
5642 }
5643
5644 /* Dumps IVS to FILE.  */
5645
5646 static void
5647 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5648 {
5649   const char *pref = "  invariants ";
5650   unsigned i;
5651   comp_cost cost = iv_ca_cost (ivs);
5652
5653   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5654   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5655            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5656   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5657
5658    for (i = 0; i < ivs->upto; i++)
5659     {
5660       struct iv_use *use = iv_use (data, i);
5661       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5662       if (cp)
5663         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5664                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5665       else
5666         fprintf (file, "   use:%d --> ??\n", use->id);
5667     }
5668
5669   for (i = 1; i <= data->max_inv_id; i++)
5670     if (ivs->n_invariant_uses[i])
5671       {
5672         fprintf (file, "%s%d", pref, i);
5673         pref = ", ";
5674       }
5675   fprintf (file, "\n\n");
5676 }
5677
5678 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5679    new set, and store differences in DELTA.  Number of induction variables
5680    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5681    the function will try to find a solution with mimimal iv candidates.  */
5682
5683 static comp_cost
5684 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5685               struct iv_cand *cand, struct iv_ca_delta **delta,
5686               unsigned *n_ivs, bool min_ncand)
5687 {
5688   unsigned i;
5689   comp_cost cost;
5690   struct iv_use *use;
5691   struct cost_pair *old_cp, *new_cp;
5692
5693   *delta = NULL;
5694   for (i = 0; i < ivs->upto; i++)
5695     {
5696       use = iv_use (data, i);
5697       old_cp = iv_ca_cand_for_use (ivs, use);
5698
5699       if (old_cp
5700           && old_cp->cand == cand)
5701         continue;
5702
5703       new_cp = get_use_iv_cost (data, use, cand);
5704       if (!new_cp)
5705         continue;
5706
5707       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5708         continue;
5709
5710       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5711         continue;
5712
5713       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5714     }
5715
5716   iv_ca_delta_commit (data, ivs, *delta, true);
5717   cost = iv_ca_cost (ivs);
5718   if (n_ivs)
5719     *n_ivs = iv_ca_n_cands (ivs);
5720   iv_ca_delta_commit (data, ivs, *delta, false);
5721
5722   return cost;
5723 }
5724
5725 /* Try narrowing set IVS by removing CAND.  Return the cost of
5726    the new set and store the differences in DELTA.  START is
5727    the candidate with which we start narrowing.  */
5728
5729 static comp_cost
5730 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5731               struct iv_cand *cand, struct iv_cand *start,
5732               struct iv_ca_delta **delta)
5733 {
5734   unsigned i, ci;
5735   struct iv_use *use;
5736   struct cost_pair *old_cp, *new_cp, *cp;
5737   bitmap_iterator bi;
5738   struct iv_cand *cnd;
5739   comp_cost cost, best_cost, acost;
5740
5741   *delta = NULL;
5742   for (i = 0; i < n_iv_uses (data); i++)
5743     {
5744       use = iv_use (data, i);
5745
5746       old_cp = iv_ca_cand_for_use (ivs, use);
5747       if (old_cp->cand != cand)
5748         continue;
5749
5750       best_cost = iv_ca_cost (ivs);
5751       /* Start narrowing with START.  */
5752       new_cp = get_use_iv_cost (data, use, start);
5753
5754       if (data->consider_all_candidates)
5755         {
5756           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5757             {
5758               if (ci == cand->id || (start && ci == start->id))
5759                 continue;
5760
5761               cnd = iv_cand (data, ci);
5762
5763               cp = get_use_iv_cost (data, use, cnd);
5764               if (!cp)
5765                 continue;
5766
5767               iv_ca_set_cp (data, ivs, use, cp);
5768               acost = iv_ca_cost (ivs);
5769
5770               if (compare_costs (acost, best_cost) < 0)
5771                 {
5772                   best_cost = acost;
5773                   new_cp = cp;
5774                 }
5775             }
5776         }
5777       else
5778         {
5779           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5780             {
5781               if (ci == cand->id || (start && ci == start->id))
5782                 continue;
5783
5784               cnd = iv_cand (data, ci);
5785
5786               cp = get_use_iv_cost (data, use, cnd);
5787               if (!cp)
5788                 continue;
5789
5790               iv_ca_set_cp (data, ivs, use, cp);
5791               acost = iv_ca_cost (ivs);
5792
5793               if (compare_costs (acost, best_cost) < 0)
5794                 {
5795                   best_cost = acost;
5796                   new_cp = cp;
5797                 }
5798             }
5799         }
5800       /* Restore to old cp for use.  */
5801       iv_ca_set_cp (data, ivs, use, old_cp);
5802
5803       if (!new_cp)
5804         {
5805           iv_ca_delta_free (delta);
5806           return infinite_cost;
5807         }
5808
5809       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5810     }
5811
5812   iv_ca_delta_commit (data, ivs, *delta, true);
5813   cost = iv_ca_cost (ivs);
5814   iv_ca_delta_commit (data, ivs, *delta, false);
5815
5816   return cost;
5817 }
5818
5819 /* Try optimizing the set of candidates IVS by removing candidates different
5820    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5821    differences in DELTA.  */
5822
5823 static comp_cost
5824 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5825              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5826 {
5827   bitmap_iterator bi;
5828   struct iv_ca_delta *act_delta, *best_delta;
5829   unsigned i;
5830   comp_cost best_cost, acost;
5831   struct iv_cand *cand;
5832
5833   best_delta = NULL;
5834   best_cost = iv_ca_cost (ivs);
5835
5836   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5837     {
5838       cand = iv_cand (data, i);
5839
5840       if (cand == except_cand)
5841         continue;
5842
5843       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5844
5845       if (compare_costs (acost, best_cost) < 0)
5846         {
5847           best_cost = acost;
5848           iv_ca_delta_free (&best_delta);
5849           best_delta = act_delta;
5850         }
5851       else
5852         iv_ca_delta_free (&act_delta);
5853     }
5854
5855   if (!best_delta)
5856     {
5857       *delta = NULL;
5858       return best_cost;
5859     }
5860
5861   /* Recurse to possibly remove other unnecessary ivs.  */
5862   iv_ca_delta_commit (data, ivs, best_delta, true);
5863   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5864   iv_ca_delta_commit (data, ivs, best_delta, false);
5865   *delta = iv_ca_delta_join (best_delta, *delta);
5866   return best_cost;
5867 }
5868
5869 /* Tries to extend the sets IVS in the best possible way in order
5870    to express the USE.  If ORIGINALP is true, prefer candidates from
5871    the original set of IVs, otherwise favor important candidates not
5872    based on any memory object.  */
5873
5874 static bool
5875 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5876                   struct iv_use *use, bool originalp)
5877 {
5878   comp_cost best_cost, act_cost;
5879   unsigned i;
5880   bitmap_iterator bi;
5881   struct iv_cand *cand;
5882   struct iv_ca_delta *best_delta = NULL, *act_delta;
5883   struct cost_pair *cp;
5884
5885   iv_ca_add_use (data, ivs, use);
5886   best_cost = iv_ca_cost (ivs);
5887   cp = iv_ca_cand_for_use (ivs, use);
5888   if (cp)
5889     {
5890       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5891       iv_ca_set_no_cp (data, ivs, use);
5892     }
5893
5894   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5895      first try important candidates not based on any memory object.  Only if
5896      this fails, try the specific ones.  Rationale -- in loops with many
5897      variables the best choice often is to use just one generic biv.  If we
5898      added here many ivs specific to the uses, the optimization algorithm later
5899      would be likely to get stuck in a local minimum, thus causing us to create
5900      too many ivs.  The approach from few ivs to more seems more likely to be
5901      successful -- starting from few ivs, replacing an expensive use by a
5902      specific iv should always be a win.  */
5903   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5904     {
5905       cand = iv_cand (data, i);
5906
5907       if (originalp && cand->pos !=IP_ORIGINAL)
5908         continue;
5909
5910       if (!originalp && cand->iv->base_object != NULL_TREE)
5911         continue;
5912
5913       if (iv_ca_cand_used_p (ivs, cand))
5914         continue;
5915
5916       cp = get_use_iv_cost (data, use, cand);
5917       if (!cp)
5918         continue;
5919
5920       iv_ca_set_cp (data, ivs, use, cp);
5921       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5922                                true);
5923       iv_ca_set_no_cp (data, ivs, use);
5924       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5925
5926       if (compare_costs (act_cost, best_cost) < 0)
5927         {
5928           best_cost = act_cost;
5929
5930           iv_ca_delta_free (&best_delta);
5931           best_delta = act_delta;
5932         }
5933       else
5934         iv_ca_delta_free (&act_delta);
5935     }
5936
5937   if (infinite_cost_p (best_cost))
5938     {
5939       for (i = 0; i < use->n_map_members; i++)
5940         {
5941           cp = use->cost_map + i;
5942           cand = cp->cand;
5943           if (!cand)
5944             continue;
5945
5946           /* Already tried this.  */
5947           if (cand->important)
5948             {
5949               if (originalp && cand->pos == IP_ORIGINAL)
5950                 continue;
5951               if (!originalp && cand->iv->base_object == NULL_TREE)
5952                 continue;
5953             }
5954
5955           if (iv_ca_cand_used_p (ivs, cand))
5956             continue;
5957
5958           act_delta = NULL;
5959           iv_ca_set_cp (data, ivs, use, cp);
5960           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5961           iv_ca_set_no_cp (data, ivs, use);
5962           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5963                                        cp, act_delta);
5964
5965           if (compare_costs (act_cost, best_cost) < 0)
5966             {
5967               best_cost = act_cost;
5968
5969               if (best_delta)
5970                 iv_ca_delta_free (&best_delta);
5971               best_delta = act_delta;
5972             }
5973           else
5974             iv_ca_delta_free (&act_delta);
5975         }
5976     }
5977
5978   iv_ca_delta_commit (data, ivs, best_delta, true);
5979   iv_ca_delta_free (&best_delta);
5980
5981   return !infinite_cost_p (best_cost);
5982 }
5983
5984 /* Finds an initial assignment of candidates to uses.  */
5985
5986 static struct iv_ca *
5987 get_initial_solution (struct ivopts_data *data, bool originalp)
5988 {
5989   struct iv_ca *ivs = iv_ca_new (data);
5990   unsigned i;
5991
5992   for (i = 0; i < n_iv_uses (data); i++)
5993     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5994       {
5995         iv_ca_free (&ivs);
5996         return NULL;
5997       }
5998
5999   return ivs;
6000 }
6001
6002 /* Tries to improve set of induction variables IVS.  */
6003
6004 static bool
6005 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6006 {
6007   unsigned i, n_ivs;
6008   comp_cost acost, best_cost = iv_ca_cost (ivs);
6009   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6010   struct iv_cand *cand;
6011
6012   /* Try extending the set of induction variables by one.  */
6013   for (i = 0; i < n_iv_cands (data); i++)
6014     {
6015       cand = iv_cand (data, i);
6016
6017       if (iv_ca_cand_used_p (ivs, cand))
6018         continue;
6019
6020       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6021       if (!act_delta)
6022         continue;
6023
6024       /* If we successfully added the candidate and the set is small enough,
6025          try optimizing it by removing other candidates.  */
6026       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6027         {
6028           iv_ca_delta_commit (data, ivs, act_delta, true);
6029           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6030           iv_ca_delta_commit (data, ivs, act_delta, false);
6031           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6032         }
6033
6034       if (compare_costs (acost, best_cost) < 0)
6035         {
6036           best_cost = acost;
6037           iv_ca_delta_free (&best_delta);
6038           best_delta = act_delta;
6039         }
6040       else
6041         iv_ca_delta_free (&act_delta);
6042     }
6043
6044   if (!best_delta)
6045     {
6046       /* Try removing the candidates from the set instead.  */
6047       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6048
6049       /* Nothing more we can do.  */
6050       if (!best_delta)
6051         return false;
6052     }
6053
6054   iv_ca_delta_commit (data, ivs, best_delta, true);
6055   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6056   iv_ca_delta_free (&best_delta);
6057   return true;
6058 }
6059
6060 /* Attempts to find the optimal set of induction variables.  We do simple
6061    greedy heuristic -- we try to replace at most one candidate in the selected
6062    solution and remove the unused ivs while this improves the cost.  */
6063
6064 static struct iv_ca *
6065 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6066 {
6067   struct iv_ca *set;
6068
6069   /* Get the initial solution.  */
6070   set = get_initial_solution (data, originalp);
6071   if (!set)
6072     {
6073       if (dump_file && (dump_flags & TDF_DETAILS))
6074         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6075       return NULL;
6076     }
6077
6078   if (dump_file && (dump_flags & TDF_DETAILS))
6079     {
6080       fprintf (dump_file, "Initial set of candidates:\n");
6081       iv_ca_dump (data, dump_file, set);
6082     }
6083
6084   while (try_improve_iv_set (data, set))
6085     {
6086       if (dump_file && (dump_flags & TDF_DETAILS))
6087         {
6088           fprintf (dump_file, "Improved to:\n");
6089           iv_ca_dump (data, dump_file, set);
6090         }
6091     }
6092
6093   return set;
6094 }
6095
6096 static struct iv_ca *
6097 find_optimal_iv_set (struct ivopts_data *data)
6098 {
6099   unsigned i;
6100   struct iv_ca *set, *origset;
6101   struct iv_use *use;
6102   comp_cost cost, origcost;
6103
6104   /* Determine the cost based on a strategy that starts with original IVs,
6105      and try again using a strategy that prefers candidates not based
6106      on any IVs.  */
6107   origset = find_optimal_iv_set_1 (data, true);
6108   set = find_optimal_iv_set_1 (data, false);
6109
6110   if (!origset && !set)
6111     return NULL;
6112
6113   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6114   cost = set ? iv_ca_cost (set) : infinite_cost;
6115
6116   if (dump_file && (dump_flags & TDF_DETAILS))
6117     {
6118       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6119                origcost.cost, origcost.complexity);
6120       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6121                cost.cost, cost.complexity);
6122     }
6123
6124   /* Choose the one with the best cost.  */
6125   if (compare_costs (origcost, cost) <= 0)
6126     {
6127       if (set)
6128         iv_ca_free (&set);
6129       set = origset;
6130     }
6131   else if (origset)
6132     iv_ca_free (&origset);
6133
6134   for (i = 0; i < n_iv_uses (data); i++)
6135     {
6136       use = iv_use (data, i);
6137       use->selected = iv_ca_cand_for_use (set, use)->cand;
6138     }
6139
6140   return set;
6141 }
6142
6143 /* Creates a new induction variable corresponding to CAND.  */
6144
6145 static void
6146 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6147 {
6148   gimple_stmt_iterator incr_pos;
6149   tree base;
6150   bool after = false;
6151
6152   if (!cand->iv)
6153     return;
6154
6155   switch (cand->pos)
6156     {
6157     case IP_NORMAL:
6158       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6159       break;
6160
6161     case IP_END:
6162       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6163       after = true;
6164       break;
6165
6166     case IP_AFTER_USE:
6167       after = true;
6168       /* fall through */
6169     case IP_BEFORE_USE:
6170       incr_pos = gsi_for_stmt (cand->incremented_at);
6171       break;
6172
6173     case IP_ORIGINAL:
6174       /* Mark that the iv is preserved.  */
6175       name_info (data, cand->var_before)->preserve_biv = true;
6176       name_info (data, cand->var_after)->preserve_biv = true;
6177
6178       /* Rewrite the increment so that it uses var_before directly.  */
6179       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6180       return;
6181     }
6182
6183   gimple_add_tmp_var (cand->var_before);
6184
6185   base = unshare_expr (cand->iv->base);
6186
6187   create_iv (base, unshare_expr (cand->iv->step),
6188              cand->var_before, data->current_loop,
6189              &incr_pos, after, &cand->var_before, &cand->var_after);
6190 }
6191
6192 /* Creates new induction variables described in SET.  */
6193
6194 static void
6195 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6196 {
6197   unsigned i;
6198   struct iv_cand *cand;
6199   bitmap_iterator bi;
6200
6201   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6202     {
6203       cand = iv_cand (data, i);
6204       create_new_iv (data, cand);
6205     }
6206
6207   if (dump_file && (dump_flags & TDF_DETAILS))
6208     {
6209       fprintf (dump_file, "\nSelected IV set: \n");
6210       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6211         {
6212           cand = iv_cand (data, i);
6213           dump_cand (dump_file, cand);
6214         }
6215       fprintf (dump_file, "\n");
6216     }
6217 }
6218
6219 /* Rewrites USE (definition of iv used in a nonlinear expression)
6220    using candidate CAND.  */
6221
6222 static void
6223 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6224                             struct iv_use *use, struct iv_cand *cand)
6225 {
6226   tree comp;
6227   tree op, tgt;
6228   gimple ass;
6229   gimple_stmt_iterator bsi;
6230
6231   /* An important special case -- if we are asked to express value of
6232      the original iv by itself, just exit; there is no need to
6233      introduce a new computation (that might also need casting the
6234      variable to unsigned and back).  */
6235   if (cand->pos == IP_ORIGINAL
6236       && cand->incremented_at == use->stmt)
6237     {
6238       enum tree_code stmt_code;
6239
6240       gcc_assert (is_gimple_assign (use->stmt));
6241       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6242
6243       /* Check whether we may leave the computation unchanged.
6244          This is the case only if it does not rely on other
6245          computations in the loop -- otherwise, the computation
6246          we rely upon may be removed in remove_unused_ivs,
6247          thus leading to ICE.  */
6248       stmt_code = gimple_assign_rhs_code (use->stmt);
6249       if (stmt_code == PLUS_EXPR
6250           || stmt_code == MINUS_EXPR
6251           || stmt_code == POINTER_PLUS_EXPR)
6252         {
6253           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6254             op = gimple_assign_rhs2 (use->stmt);
6255           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6256             op = gimple_assign_rhs1 (use->stmt);
6257           else
6258             op = NULL_TREE;
6259         }
6260       else
6261         op = NULL_TREE;
6262
6263       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6264         return;
6265     }
6266
6267   comp = get_computation (data->current_loop, use, cand);
6268   gcc_assert (comp != NULL_TREE);
6269
6270   switch (gimple_code (use->stmt))
6271     {
6272     case GIMPLE_PHI:
6273       tgt = PHI_RESULT (use->stmt);
6274
6275       /* If we should keep the biv, do not replace it.  */
6276       if (name_info (data, tgt)->preserve_biv)
6277         return;
6278
6279       bsi = gsi_after_labels (gimple_bb (use->stmt));
6280       break;
6281
6282     case GIMPLE_ASSIGN:
6283       tgt = gimple_assign_lhs (use->stmt);
6284       bsi = gsi_for_stmt (use->stmt);
6285       break;
6286
6287     default:
6288       gcc_unreachable ();
6289     }
6290
6291   if (!valid_gimple_rhs_p (comp)
6292       || (gimple_code (use->stmt) != GIMPLE_PHI
6293           /* We can't allow re-allocating the stmt as it might be pointed
6294              to still.  */
6295           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6296               >= gimple_num_ops (gsi_stmt (bsi)))))
6297     {
6298       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6299                                        true, GSI_SAME_STMT);
6300       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6301         {
6302           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6303           /* As this isn't a plain copy we have to reset alignment
6304              information.  */
6305           if (SSA_NAME_PTR_INFO (comp))
6306             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6307         }
6308     }
6309
6310   if (gimple_code (use->stmt) == GIMPLE_PHI)
6311     {
6312       ass = gimple_build_assign (tgt, comp);
6313       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6314
6315       bsi = gsi_for_stmt (use->stmt);
6316       remove_phi_node (&bsi, false);
6317     }
6318   else
6319     {
6320       gimple_assign_set_rhs_from_tree (&bsi, comp);
6321       use->stmt = gsi_stmt (bsi);
6322     }
6323 }
6324
6325 /* Performs a peephole optimization to reorder the iv update statement with
6326    a mem ref to enable instruction combining in later phases. The mem ref uses
6327    the iv value before the update, so the reordering transformation requires
6328    adjustment of the offset. CAND is the selected IV_CAND.
6329
6330    Example:
6331
6332    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6333    iv2 = iv1 + 1;
6334
6335    if (t < val)      (1)
6336      goto L;
6337    goto Head;
6338
6339
6340    directly propagating t over to (1) will introduce overlapping live range
6341    thus increase register pressure. This peephole transform it into:
6342
6343
6344    iv2 = iv1 + 1;
6345    t = MEM_REF (base, iv2, 8, 8);
6346    if (t < val)
6347      goto L;
6348    goto Head;
6349 */
6350
6351 static void
6352 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6353 {
6354   tree var_after;
6355   gimple iv_update, stmt;
6356   basic_block bb;
6357   gimple_stmt_iterator gsi, gsi_iv;
6358
6359   if (cand->pos != IP_NORMAL)
6360     return;
6361
6362   var_after = cand->var_after;
6363   iv_update = SSA_NAME_DEF_STMT (var_after);
6364
6365   bb = gimple_bb (iv_update);
6366   gsi = gsi_last_nondebug_bb (bb);
6367   stmt = gsi_stmt (gsi);
6368
6369   /* Only handle conditional statement for now.  */
6370   if (gimple_code (stmt) != GIMPLE_COND)
6371     return;
6372
6373   gsi_prev_nondebug (&gsi);
6374   stmt = gsi_stmt (gsi);
6375   if (stmt != iv_update)
6376     return;
6377
6378   gsi_prev_nondebug (&gsi);
6379   if (gsi_end_p (gsi))
6380     return;
6381
6382   stmt = gsi_stmt (gsi);
6383   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6384     return;
6385
6386   if (stmt != use->stmt)
6387     return;
6388
6389   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6390     return;
6391
6392   if (dump_file && (dump_flags & TDF_DETAILS))
6393     {
6394       fprintf (dump_file, "Reordering \n");
6395       print_gimple_stmt (dump_file, iv_update, 0, 0);
6396       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6397       fprintf (dump_file, "\n");
6398     }
6399
6400   gsi = gsi_for_stmt (use->stmt);
6401   gsi_iv = gsi_for_stmt (iv_update);
6402   gsi_move_before (&gsi_iv, &gsi);
6403
6404   cand->pos = IP_BEFORE_USE;
6405   cand->incremented_at = use->stmt;
6406 }
6407
6408 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6409
6410 static void
6411 rewrite_use_address (struct ivopts_data *data,
6412                      struct iv_use *use, struct iv_cand *cand)
6413 {
6414   aff_tree aff;
6415   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6416   tree base_hint = NULL_TREE;
6417   tree ref, iv;
6418   bool ok;
6419
6420   adjust_iv_update_pos (cand, use);
6421   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6422   gcc_assert (ok);
6423   unshare_aff_combination (&aff);
6424
6425   /* To avoid undefined overflow problems, all IV candidates use unsigned
6426      integer types.  The drawback is that this makes it impossible for
6427      create_mem_ref to distinguish an IV that is based on a memory object
6428      from one that represents simply an offset.
6429
6430      To work around this problem, we pass a hint to create_mem_ref that
6431      indicates which variable (if any) in aff is an IV based on a memory
6432      object.  Note that we only consider the candidate.  If this is not
6433      based on an object, the base of the reference is in some subexpression
6434      of the use -- but these will use pointer types, so they are recognized
6435      by the create_mem_ref heuristics anyway.  */
6436   if (cand->iv->base_object)
6437     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6438
6439   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6440   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6441                         reference_alias_ptr_type (*use->op_p),
6442                         iv, base_hint, data->speed);
6443   copy_ref_info (ref, *use->op_p);
6444   *use->op_p = ref;
6445 }
6446
6447 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6448    candidate CAND.  */
6449
6450 static void
6451 rewrite_use_compare (struct ivopts_data *data,
6452                      struct iv_use *use, struct iv_cand *cand)
6453 {
6454   tree comp, *var_p, op, bound;
6455   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6456   enum tree_code compare;
6457   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6458   bool ok;
6459
6460   bound = cp->value;
6461   if (bound)
6462     {
6463       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6464       tree var_type = TREE_TYPE (var);
6465       gimple_seq stmts;
6466
6467       if (dump_file && (dump_flags & TDF_DETAILS))
6468         {
6469           fprintf (dump_file, "Replacing exit test: ");
6470           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6471         }
6472       compare = cp->comp;
6473       bound = unshare_expr (fold_convert (var_type, bound));
6474       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6475       if (stmts)
6476         gsi_insert_seq_on_edge_immediate (
6477                 loop_preheader_edge (data->current_loop),
6478                 stmts);
6479
6480       gimple_cond_set_lhs (use->stmt, var);
6481       gimple_cond_set_code (use->stmt, compare);
6482       gimple_cond_set_rhs (use->stmt, op);
6483       return;
6484     }
6485
6486   /* The induction variable elimination failed; just express the original
6487      giv.  */
6488   comp = get_computation (data->current_loop, use, cand);
6489   gcc_assert (comp != NULL_TREE);
6490
6491   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6492   gcc_assert (ok);
6493
6494   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6495                                      true, GSI_SAME_STMT);
6496 }
6497
6498 /* Rewrites USE using candidate CAND.  */
6499
6500 static void
6501 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6502 {
6503   switch (use->type)
6504     {
6505       case USE_NONLINEAR_EXPR:
6506         rewrite_use_nonlinear_expr (data, use, cand);
6507         break;
6508
6509       case USE_ADDRESS:
6510         rewrite_use_address (data, use, cand);
6511         break;
6512
6513       case USE_COMPARE:
6514         rewrite_use_compare (data, use, cand);
6515         break;
6516
6517       default:
6518         gcc_unreachable ();
6519     }
6520
6521   update_stmt (use->stmt);
6522 }
6523
6524 /* Rewrite the uses using the selected induction variables.  */
6525
6526 static void
6527 rewrite_uses (struct ivopts_data *data)
6528 {
6529   unsigned i;
6530   struct iv_cand *cand;
6531   struct iv_use *use;
6532
6533   for (i = 0; i < n_iv_uses (data); i++)
6534     {
6535       use = iv_use (data, i);
6536       cand = use->selected;
6537       gcc_assert (cand);
6538
6539       rewrite_use (data, use, cand);
6540     }
6541 }
6542
6543 /* Removes the ivs that are not used after rewriting.  */
6544
6545 static void
6546 remove_unused_ivs (struct ivopts_data *data)
6547 {
6548   unsigned j;
6549   bitmap_iterator bi;
6550   bitmap toremove = BITMAP_ALLOC (NULL);
6551
6552   /* Figure out an order in which to release SSA DEFs so that we don't
6553      release something that we'd have to propagate into a debug stmt
6554      afterwards.  */
6555   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6556     {
6557       struct version_info *info;
6558
6559       info = ver_info (data, j);
6560       if (info->iv
6561           && !integer_zerop (info->iv->step)
6562           && !info->inv_id
6563           && !info->iv->have_use_for
6564           && !info->preserve_biv)
6565         {
6566           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6567
6568           tree def = info->iv->ssa_name;
6569
6570           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6571             {
6572               imm_use_iterator imm_iter;
6573               use_operand_p use_p;
6574               gimple stmt;
6575               int count = 0;
6576
6577               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6578                 {
6579                   if (!gimple_debug_bind_p (stmt))
6580                     continue;
6581
6582                   /* We just want to determine whether to do nothing
6583                      (count == 0), to substitute the computed
6584                      expression into a single use of the SSA DEF by
6585                      itself (count == 1), or to use a debug temp
6586                      because the SSA DEF is used multiple times or as
6587                      part of a larger expression (count > 1). */
6588                   count++;
6589                   if (gimple_debug_bind_get_value (stmt) != def)
6590                     count++;
6591
6592                   if (count > 1)
6593                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6594                 }
6595
6596               if (!count)
6597                 continue;
6598
6599               struct iv_use dummy_use;
6600               struct iv_cand *best_cand = NULL, *cand;
6601               unsigned i, best_pref = 0, cand_pref;
6602
6603               memset (&dummy_use, 0, sizeof (dummy_use));
6604               dummy_use.iv = info->iv;
6605               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6606                 {
6607                   cand = iv_use (data, i)->selected;
6608                   if (cand == best_cand)
6609                     continue;
6610                   cand_pref = operand_equal_p (cand->iv->step,
6611                                                info->iv->step, 0)
6612                     ? 4 : 0;
6613                   cand_pref
6614                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6615                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6616                     ? 2 : 0;
6617                   cand_pref
6618                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6619                     ? 1 : 0;
6620                   if (best_cand == NULL || best_pref < cand_pref)
6621                     {
6622                       best_cand = cand;
6623                       best_pref = cand_pref;
6624                     }
6625                 }
6626
6627               if (!best_cand)
6628                 continue;
6629
6630               tree comp = get_computation_at (data->current_loop,
6631                                               &dummy_use, best_cand,
6632                                               SSA_NAME_DEF_STMT (def));
6633               if (!comp)
6634                 continue;
6635
6636               if (count > 1)
6637                 {
6638                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6639                   DECL_ARTIFICIAL (vexpr) = 1;
6640                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6641                   if (SSA_NAME_VAR (def))
6642                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6643                   else
6644                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6645                   gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6646                   gimple_stmt_iterator gsi;
6647
6648                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6649                     gsi = gsi_after_labels (gimple_bb
6650                                             (SSA_NAME_DEF_STMT (def)));
6651                   else
6652                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6653
6654                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6655                   comp = vexpr;
6656                 }
6657
6658               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6659                 {
6660                   if (!gimple_debug_bind_p (stmt))
6661                     continue;
6662
6663                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6664                     SET_USE (use_p, comp);
6665
6666                   update_stmt (stmt);
6667                 }
6668             }
6669         }
6670     }
6671
6672   release_defs_bitset (toremove);
6673
6674   BITMAP_FREE (toremove);
6675 }
6676
6677 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6678    for hash_map::traverse.  */
6679
6680 bool
6681 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6682 {
6683   free (value);
6684   return true;
6685 }
6686
6687 /* Frees data allocated by the optimization of a single loop.  */
6688
6689 static void
6690 free_loop_data (struct ivopts_data *data)
6691 {
6692   unsigned i, j;
6693   bitmap_iterator bi;
6694   tree obj;
6695
6696   if (data->niters)
6697     {
6698       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6699       delete data->niters;
6700       data->niters = NULL;
6701     }
6702
6703   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6704     {
6705       struct version_info *info;
6706
6707       info = ver_info (data, i);
6708       free (info->iv);
6709       info->iv = NULL;
6710       info->has_nonlin_use = false;
6711       info->preserve_biv = false;
6712       info->inv_id = 0;
6713     }
6714   bitmap_clear (data->relevant);
6715   bitmap_clear (data->important_candidates);
6716
6717   for (i = 0; i < n_iv_uses (data); i++)
6718     {
6719       struct iv_use *use = iv_use (data, i);
6720
6721       free (use->iv);
6722       BITMAP_FREE (use->related_cands);
6723       for (j = 0; j < use->n_map_members; j++)
6724         if (use->cost_map[j].depends_on)
6725           BITMAP_FREE (use->cost_map[j].depends_on);
6726       free (use->cost_map);
6727       free (use);
6728     }
6729   data->iv_uses.truncate (0);
6730
6731   for (i = 0; i < n_iv_cands (data); i++)
6732     {
6733       struct iv_cand *cand = iv_cand (data, i);
6734
6735       free (cand->iv);
6736       if (cand->depends_on)
6737         BITMAP_FREE (cand->depends_on);
6738       free (cand);
6739     }
6740   data->iv_candidates.truncate (0);
6741
6742   if (data->version_info_size < num_ssa_names)
6743     {
6744       data->version_info_size = 2 * num_ssa_names;
6745       free (data->version_info);
6746       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6747     }
6748
6749   data->max_inv_id = 0;
6750
6751   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6752     SET_DECL_RTL (obj, NULL_RTX);
6753
6754   decl_rtl_to_reset.truncate (0);
6755
6756   data->inv_expr_tab->empty ();
6757   data->inv_expr_id = 0;
6758 }
6759
6760 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6761    loop tree.  */
6762
6763 static void
6764 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6765 {
6766   free_loop_data (data);
6767   free (data->version_info);
6768   BITMAP_FREE (data->relevant);
6769   BITMAP_FREE (data->important_candidates);
6770
6771   decl_rtl_to_reset.release ();
6772   data->iv_uses.release ();
6773   data->iv_candidates.release ();
6774   delete data->inv_expr_tab;
6775   data->inv_expr_tab = NULL;
6776   free_affine_expand_cache (&data->name_expansion_cache);
6777 }
6778
6779 /* Returns true if the loop body BODY includes any function calls.  */
6780
6781 static bool
6782 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6783 {
6784   gimple_stmt_iterator gsi;
6785   unsigned i;
6786
6787   for (i = 0; i < num_nodes; i++)
6788     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6789       {
6790         gimple stmt = gsi_stmt (gsi);
6791         if (is_gimple_call (stmt)
6792             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6793           return true;
6794       }
6795   return false;
6796 }
6797
6798 /* Optimizes the LOOP.  Returns true if anything changed.  */
6799
6800 static bool
6801 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6802 {
6803   bool changed = false;
6804   struct iv_ca *iv_ca;
6805   edge exit = single_dom_exit (loop);
6806   basic_block *body;
6807
6808   gcc_assert (!data->niters);
6809   data->current_loop = loop;
6810   data->speed = optimize_loop_for_speed_p (loop);
6811
6812   if (dump_file && (dump_flags & TDF_DETAILS))
6813     {
6814       fprintf (dump_file, "Processing loop %d\n", loop->num);
6815
6816       if (exit)
6817         {
6818           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6819                    exit->src->index, exit->dest->index);
6820           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6821           fprintf (dump_file, "\n");
6822         }
6823
6824       fprintf (dump_file, "\n");
6825     }
6826
6827   body = get_loop_body (loop);
6828   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6829   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6830   free (body);
6831
6832   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6833
6834   /* For each ssa name determines whether it behaves as an induction variable
6835      in some loop.  */
6836   if (!find_induction_variables (data))
6837     goto finish;
6838
6839   /* Finds interesting uses (item 1).  */
6840   find_interesting_uses (data);
6841   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6842     goto finish;
6843
6844   /* Finds candidates for the induction variables (item 2).  */
6845   find_iv_candidates (data);
6846
6847   /* Calculates the costs (item 3, part 1).  */
6848   determine_iv_costs (data);
6849   determine_use_iv_costs (data);
6850   determine_set_costs (data);
6851
6852   /* Find the optimal set of induction variables (item 3, part 2).  */
6853   iv_ca = find_optimal_iv_set (data);
6854   if (!iv_ca)
6855     goto finish;
6856   changed = true;
6857
6858   /* Create the new induction variables (item 4, part 1).  */
6859   create_new_ivs (data, iv_ca);
6860   iv_ca_free (&iv_ca);
6861
6862   /* Rewrite the uses (item 4, part 2).  */
6863   rewrite_uses (data);
6864
6865   /* Remove the ivs that are unused after rewriting.  */
6866   remove_unused_ivs (data);
6867
6868   /* We have changed the structure of induction variables; it might happen
6869      that definitions in the scev database refer to some of them that were
6870      eliminated.  */
6871   scev_reset ();
6872
6873 finish:
6874   free_loop_data (data);
6875
6876   return changed;
6877 }
6878
6879 /* Main entry point.  Optimizes induction variables in loops.  */
6880
6881 void
6882 tree_ssa_iv_optimize (void)
6883 {
6884   struct loop *loop;
6885   struct ivopts_data data;
6886
6887   tree_ssa_iv_optimize_init (&data);
6888
6889   /* Optimize the loops starting with the innermost ones.  */
6890   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6891     {
6892       if (dump_file && (dump_flags & TDF_DETAILS))
6893         flow_loop_dump (loop, dump_file, NULL, 1);
6894
6895       tree_ssa_iv_optimize_loop (&data, loop);
6896     }
6897
6898   tree_ssa_iv_optimize_finalize (&data);
6899 }