gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "stor-layout.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "gimple-pretty-print.h"
  73 #include "gimple.h"
  74 #include "gimplify.h"
  75 #include "gimple-iterator.h"
  76 #include "gimplify-me.h"
  77 #include "gimple-ssa.h"
  78 #include "cgraph.h"
  79 #include "tree-cfg.h"
  80 #include "tree-phinodes.h"
  81 #include "ssa-iterators.h"
  82 #include "stringpool.h"
  83 #include "tree-ssanames.h"
  84 #include "tree-ssa-loop-ivopts.h"
  85 #include "tree-ssa-loop-manip.h"
  86 #include "tree-ssa-loop-niter.h"
  87 #include "tree-ssa-loop.h"
  88 #include "expr.h"
  89 #include "tree-dfa.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-pass.h"
  93 #include "ggc.h"
  94 #include "insn-config.h"
  95 #include "pointer-set.h"
  96 #include "hash-table.h"
  97 #include "tree-chrec.h"
  98 #include "tree-scalar-evolution.h"
  99 #include "cfgloop.h"
 100 #include "params.h"
 101 #include "langhooks.h"
 102 #include "tree-affine.h"
 103 #include "target.h"
 104 #include "tree-inline.h"
 105 #include "tree-ssa-propagate.h"
 106 #include "expmed.h"
 107 #include "tree-ssa-address.h"
 108
 109 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 110    cost of different addressing modes.  This should be moved to a TBD
 111    interface between the GIMPLE and RTL worlds.  */
 112 #include "expr.h"
 113 #include "recog.h"
 114
 115 /* The infinite cost.  */
 116 #define INFTY 10000000
 117
 118 #define AVG_LOOP_NITER(LOOP) 5
 119
 120 /* Returns the expected number of loop iterations for LOOP.
 121    The average trip count is computed from profile data if it
 122    exists. */
 123
 124 static inline HOST_WIDE_INT
 125 avg_loop_niter (struct loop *loop)
 126 {
 127   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 128   if (niter == -1)
 129     return AVG_LOOP_NITER (loop);
 130
 131   return niter;
 132 }
 133
 134 /* Representation of the induction variable.  */
 135 struct iv
 136 {
 137   tree base;            /* Initial value of the iv.  */
 138   tree base_object;     /* A memory object to that the induction variable points.  */
 139   tree step;            /* Step of the iv (constant only).  */
 140   tree ssa_name;        /* The ssa name with the value.  */
 141   bool biv_p;           /* Is it a biv?  */
 142   bool have_use_for;    /* Do we already have a use for it?  */
 143   unsigned use_id;      /* The identifier in the use if it is the case.  */
 144 };
 145
 146 /* Per-ssa version information (induction variable descriptions, etc.).  */
 147 struct version_info
 148 {
 149   tree name;            /* The ssa name.  */
 150   struct iv *iv;        /* Induction variable description.  */
 151   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 152                            an expression that is not an induction variable.  */
 153   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 154   unsigned inv_id;      /* Id of an invariant.  */
 155 };
 156
 157 /* Types of uses.  */
 158 enum use_type
 159 {
 160   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 161   USE_ADDRESS,          /* Use in an address.  */
 162   USE_COMPARE           /* Use is a compare.  */
 163 };
 164
 165 /* Cost of a computation.  */
 166 typedef struct
 167 {
 168   int cost;             /* The runtime cost.  */
 169   unsigned complexity;  /* The estimate of the complexity of the code for
 170                            the computation (in no concrete units --
 171                            complexity field should be larger for more
 172                            complex expressions and addressing modes).  */
 173 } comp_cost;
 174
 175 static const comp_cost no_cost = {0, 0};
 176 static const comp_cost infinite_cost = {INFTY, INFTY};
 177
 178 /* The candidate - cost pair.  */
 179 struct cost_pair
 180 {
 181   struct iv_cand *cand; /* The candidate.  */
 182   comp_cost cost;       /* The cost.  */
 183   bitmap depends_on;    /* The list of invariants that have to be
 184                            preserved.  */
 185   tree value;           /* For final value elimination, the expression for
 186                            the final value of the iv.  For iv elimination,
 187                            the new bound to compare with.  */
 188   enum tree_code comp;  /* For iv elimination, the comparison.  */
 189   int inv_expr_id;      /* Loop invariant expression id.  */
 190 };
 191
 192 /* Use.  */
 193 struct iv_use
 194 {
 195   unsigned id;          /* The id of the use.  */
 196   enum use_type type;   /* Type of the use.  */
 197   struct iv *iv;        /* The induction variable it is based on.  */
 198   gimple stmt;          /* Statement in that it occurs.  */
 199   tree *op_p;           /* The place where it occurs.  */
 200   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 201                            important ones.  */
 202
 203   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 204   struct cost_pair *cost_map;
 205                         /* The costs wrto the iv candidates.  */
 206
 207   struct iv_cand *selected;
 208                         /* The selected candidate.  */
 209 };
 210
 211 /* The position where the iv is computed.  */
 212 enum iv_position
 213 {
 214   IP_NORMAL,            /* At the end, just before the exit condition.  */
 215   IP_END,               /* At the end of the latch block.  */
 216   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 217   IP_AFTER_USE,         /* Immediately after a specific use.  */
 218   IP_ORIGINAL           /* The original biv.  */
 219 };
 220
 221 /* The induction variable candidate.  */
 222 struct iv_cand
 223 {
 224   unsigned id;          /* The number of the candidate.  */
 225   bool important;       /* Whether this is an "important" candidate, i.e. such
 226                            that it should be considered by all uses.  */
 227   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 228   gimple incremented_at;/* For original biv, the statement where it is
 229                            incremented.  */
 230   tree var_before;      /* The variable used for it before increment.  */
 231   tree var_after;       /* The variable used for it after increment.  */
 232   struct iv *iv;        /* The value of the candidate.  NULL for
 233                            "pseudocandidate" used to indicate the possibility
 234                            to replace the final value of an iv by direct
 235                            computation of the value.  */
 236   unsigned cost;        /* Cost of the candidate.  */
 237   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 238   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 239                               where it is incremented.  */
 240   bitmap depends_on;    /* The list of invariants that are used in step of the
 241                            biv.  */
 242 };
 243
 244 /* Loop invariant expression hashtable entry.  */
 245 struct iv_inv_expr_ent
 246 {
 247   tree expr;
 248   int id;
 249   hashval_t hash;
 250 };
 251
 252 /* The data used by the induction variable optimizations.  */
 253
 254 typedef struct iv_use *iv_use_p;
 255
 256 typedef struct iv_cand *iv_cand_p;
 257
 258 /* Hashtable helpers.  */
 259
 260 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 261 {
 262   typedef iv_inv_expr_ent value_type;
 263   typedef iv_inv_expr_ent compare_type;
 264   static inline hashval_t hash (const value_type *);
 265   static inline bool equal (const value_type *, const compare_type *);
 266 };
 267
 268 /* Hash function for loop invariant expressions.  */
 269
 270 inline hashval_t
 271 iv_inv_expr_hasher::hash (const value_type *expr)
 272 {
 273   return expr->hash;
 274 }
 275
 276 /* Hash table equality function for expressions.  */
 277
 278 inline bool
 279 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 280 {
 281   return expr1->hash == expr2->hash
 282          && operand_equal_p (expr1->expr, expr2->expr, 0);
 283 }
 284
 285 struct ivopts_data
 286 {
 287   /* The currently optimized loop.  */
 288   struct loop *current_loop;
 289
 290   /* Numbers of iterations for all exits of the current loop.  */
 291   struct pointer_map_t *niters;
 292
 293   /* Number of registers used in it.  */
 294   unsigned regs_used;
 295
 296   /* The size of version_info array allocated.  */
 297   unsigned version_info_size;
 298
 299   /* The array of information for the ssa names.  */
 300   struct version_info *version_info;
 301
 302   /* The hashtable of loop invariant expressions created
 303      by ivopt.  */
 304   hash_table <iv_inv_expr_hasher> inv_expr_tab;
 305
 306   /* Loop invariant expression id.  */
 307   int inv_expr_id;
 308
 309   /* The bitmap of indices in version_info whose value was changed.  */
 310   bitmap relevant;
 311
 312   /* The uses of induction variables.  */
 313   vec<iv_use_p> iv_uses;
 314
 315   /* The candidates.  */
 316   vec<iv_cand_p> iv_candidates;
 317
 318   /* A bitmap of important candidates.  */
 319   bitmap important_candidates;
 320
 321   /* The maximum invariant id.  */
 322   unsigned max_inv_id;
 323
 324   /* Whether to consider just related and important candidates when replacing a
 325      use.  */
 326   bool consider_all_candidates;
 327
 328   /* Are we optimizing for speed?  */
 329   bool speed;
 330
 331   /* Whether the loop body includes any function calls.  */
 332   bool body_includes_call;
 333
 334   /* Whether the loop body can only be exited via single exit.  */
 335   bool loop_single_exit_p;
 336 };
 337
 338 /* An assignment of iv candidates to uses.  */
 339
 340 struct iv_ca
 341 {
 342   /* The number of uses covered by the assignment.  */
 343   unsigned upto;
 344
 345   /* Number of uses that cannot be expressed by the candidates in the set.  */
 346   unsigned bad_uses;
 347
 348   /* Candidate assigned to a use, together with the related costs.  */
 349   struct cost_pair **cand_for_use;
 350
 351   /* Number of times each candidate is used.  */
 352   unsigned *n_cand_uses;
 353
 354   /* The candidates used.  */
 355   bitmap cands;
 356
 357   /* The number of candidates in the set.  */
 358   unsigned n_cands;
 359
 360   /* Total number of registers needed.  */
 361   unsigned n_regs;
 362
 363   /* Total cost of expressing uses.  */
 364   comp_cost cand_use_cost;
 365
 366   /* Total cost of candidates.  */
 367   unsigned cand_cost;
 368
 369   /* Number of times each invariant is used.  */
 370   unsigned *n_invariant_uses;
 371
 372   /* The array holding the number of uses of each loop
 373      invariant expressions created by ivopt.  */
 374   unsigned *used_inv_expr;
 375
 376   /* The number of created loop invariants.  */
 377   unsigned num_used_inv_expr;
 378
 379   /* Total cost of the assignment.  */
 380   comp_cost cost;
 381 };
 382
 383 /* Difference of two iv candidate assignments.  */
 384
 385 struct iv_ca_delta
 386 {
 387   /* Changed use.  */
 388   struct iv_use *use;
 389
 390   /* An old assignment (for rollback purposes).  */
 391   struct cost_pair *old_cp;
 392
 393   /* A new assignment.  */
 394   struct cost_pair *new_cp;
 395
 396   /* Next change in the list.  */
 397   struct iv_ca_delta *next_change;
 398 };
 399
 400 /* Bound on number of candidates below that all candidates are considered.  */
 401
 402 #define CONSIDER_ALL_CANDIDATES_BOUND \
 403   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 404
 405 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 406    optimizing such a loop would help, and it would take ages).  */
 407
 408 #define MAX_CONSIDERED_USES \
 409   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 410
 411 /* If there are at most this number of ivs in the set, try removing unnecessary
 412    ivs from the set always.  */
 413
 414 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 415   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 416
 417 /* The list of trees for that the decl_rtl field must be reset is stored
 418    here.  */
 419
 420 static vec<tree> decl_rtl_to_reset;
 421
 422 static comp_cost force_expr_to_var_cost (tree, bool);
 423
 424 /* Number of uses recorded in DATA.  */
 425
 426 static inline unsigned
 427 n_iv_uses (struct ivopts_data *data)
 428 {
 429   return data->iv_uses.length ();
 430 }
 431
 432 /* Ith use recorded in DATA.  */
 433
 434 static inline struct iv_use *
 435 iv_use (struct ivopts_data *data, unsigned i)
 436 {
 437   return data->iv_uses[i];
 438 }
 439
 440 /* Number of candidates recorded in DATA.  */
 441
 442 static inline unsigned
 443 n_iv_cands (struct ivopts_data *data)
 444 {
 445   return data->iv_candidates.length ();
 446 }
 447
 448 /* Ith candidate recorded in DATA.  */
 449
 450 static inline struct iv_cand *
 451 iv_cand (struct ivopts_data *data, unsigned i)
 452 {
 453   return data->iv_candidates[i];
 454 }
 455
 456 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 457
 458 edge
 459 single_dom_exit (struct loop *loop)
 460 {
 461   edge exit = single_exit (loop);
 462
 463   if (!exit)
 464     return NULL;
 465
 466   if (!just_once_each_iteration_p (loop, exit->src))
 467     return NULL;
 468
 469   return exit;
 470 }
 471
 472 /* Dumps information about the induction variable IV to FILE.  */
 473
 474 void
 475 dump_iv (FILE *file, struct iv *iv)
 476 {
 477   if (iv->ssa_name)
 478     {
 479       fprintf (file, "ssa name ");
 480       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 481       fprintf (file, "\n");
 482     }
 483
 484   fprintf (file, "  type ");
 485   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 486   fprintf (file, "\n");
 487
 488   if (iv->step)
 489     {
 490       fprintf (file, "  base ");
 491       print_generic_expr (file, iv->base, TDF_SLIM);
 492       fprintf (file, "\n");
 493
 494       fprintf (file, "  step ");
 495       print_generic_expr (file, iv->step, TDF_SLIM);
 496       fprintf (file, "\n");
 497     }
 498   else
 499     {
 500       fprintf (file, "  invariant ");
 501       print_generic_expr (file, iv->base, TDF_SLIM);
 502       fprintf (file, "\n");
 503     }
 504
 505   if (iv->base_object)
 506     {
 507       fprintf (file, "  base object ");
 508       print_generic_expr (file, iv->base_object, TDF_SLIM);
 509       fprintf (file, "\n");
 510     }
 511
 512   if (iv->biv_p)
 513     fprintf (file, "  is a biv\n");
 514 }
 515
 516 /* Dumps information about the USE to FILE.  */
 517
 518 void
 519 dump_use (FILE *file, struct iv_use *use)
 520 {
 521   fprintf (file, "use %d\n", use->id);
 522
 523   switch (use->type)
 524     {
 525     case USE_NONLINEAR_EXPR:
 526       fprintf (file, "  generic\n");
 527       break;
 528
 529     case USE_ADDRESS:
 530       fprintf (file, "  address\n");
 531       break;
 532
 533     case USE_COMPARE:
 534       fprintf (file, "  compare\n");
 535       break;
 536
 537     default:
 538       gcc_unreachable ();
 539     }
 540
 541   fprintf (file, "  in statement ");
 542   print_gimple_stmt (file, use->stmt, 0, 0);
 543   fprintf (file, "\n");
 544
 545   fprintf (file, "  at position ");
 546   if (use->op_p)
 547     print_generic_expr (file, *use->op_p, TDF_SLIM);
 548   fprintf (file, "\n");
 549
 550   dump_iv (file, use->iv);
 551
 552   if (use->related_cands)
 553     {
 554       fprintf (file, "  related candidates ");
 555       dump_bitmap (file, use->related_cands);
 556     }
 557 }
 558
 559 /* Dumps information about the uses to FILE.  */
 560
 561 void
 562 dump_uses (FILE *file, struct ivopts_data *data)
 563 {
 564   unsigned i;
 565   struct iv_use *use;
 566
 567   for (i = 0; i < n_iv_uses (data); i++)
 568     {
 569       use = iv_use (data, i);
 570
 571       dump_use (file, use);
 572       fprintf (file, "\n");
 573     }
 574 }
 575
 576 /* Dumps information about induction variable candidate CAND to FILE.  */
 577
 578 void
 579 dump_cand (FILE *file, struct iv_cand *cand)
 580 {
 581   struct iv *iv = cand->iv;
 582
 583   fprintf (file, "candidate %d%s\n",
 584            cand->id, cand->important ? " (important)" : "");
 585
 586   if (cand->depends_on)
 587     {
 588       fprintf (file, "  depends on ");
 589       dump_bitmap (file, cand->depends_on);
 590     }
 591
 592   if (!iv)
 593     {
 594       fprintf (file, "  final value replacement\n");
 595       return;
 596     }
 597
 598   if (cand->var_before)
 599     {
 600       fprintf (file, "  var_before ");
 601       print_generic_expr (file, cand->var_before, TDF_SLIM);
 602       fprintf (file, "\n");
 603     }
 604   if (cand->var_after)
 605     {
 606       fprintf (file, "  var_after ");
 607       print_generic_expr (file, cand->var_after, TDF_SLIM);
 608       fprintf (file, "\n");
 609     }
 610
 611   switch (cand->pos)
 612     {
 613     case IP_NORMAL:
 614       fprintf (file, "  incremented before exit test\n");
 615       break;
 616
 617     case IP_BEFORE_USE:
 618       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 619       break;
 620
 621     case IP_AFTER_USE:
 622       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 623       break;
 624
 625     case IP_END:
 626       fprintf (file, "  incremented at end\n");
 627       break;
 628
 629     case IP_ORIGINAL:
 630       fprintf (file, "  original biv\n");
 631       break;
 632     }
 633
 634   dump_iv (file, iv);
 635 }
 636
 637 /* Returns the info for ssa version VER.  */
 638
 639 static inline struct version_info *
 640 ver_info (struct ivopts_data *data, unsigned ver)
 641 {
 642   return data->version_info + ver;
 643 }
 644
 645 /* Returns the info for ssa name NAME.  */
 646
 647 static inline struct version_info *
 648 name_info (struct ivopts_data *data, tree name)
 649 {
 650   return ver_info (data, SSA_NAME_VERSION (name));
 651 }
 652
 653 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 654    emitted in LOOP.  */
 655
 656 static bool
 657 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 658 {
 659   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 660
 661   gcc_assert (bb);
 662
 663   if (sbb == loop->latch)
 664     return true;
 665
 666   if (sbb != bb)
 667     return false;
 668
 669   return stmt == last_stmt (bb);
 670 }
 671
 672 /* Returns true if STMT if after the place where the original induction
 673    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 674    if the positions are identical.  */
 675
 676 static bool
 677 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 678 {
 679   basic_block cand_bb = gimple_bb (cand->incremented_at);
 680   basic_block stmt_bb = gimple_bb (stmt);
 681
 682   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 683     return false;
 684
 685   if (stmt_bb != cand_bb)
 686     return true;
 687
 688   if (true_if_equal
 689       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 690     return true;
 691   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 692 }
 693
 694 /* Returns true if STMT if after the place where the induction variable
 695    CAND is incremented in LOOP.  */
 696
 697 static bool
 698 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 699 {
 700   switch (cand->pos)
 701     {
 702     case IP_END:
 703       return false;
 704
 705     case IP_NORMAL:
 706       return stmt_after_ip_normal_pos (loop, stmt);
 707
 708     case IP_ORIGINAL:
 709     case IP_AFTER_USE:
 710       return stmt_after_inc_pos (cand, stmt, false);
 711
 712     case IP_BEFORE_USE:
 713       return stmt_after_inc_pos (cand, stmt, true);
 714
 715     default:
 716       gcc_unreachable ();
 717     }
 718 }
 719
 720 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 721
 722 static bool
 723 abnormal_ssa_name_p (tree exp)
 724 {
 725   if (!exp)
 726     return false;
 727
 728   if (TREE_CODE (exp) != SSA_NAME)
 729     return false;
 730
 731   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 732 }
 733
 734 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 735    abnormal phi node.  Callback for for_each_index.  */
 736
 737 static bool
 738 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 739                                   void *data ATTRIBUTE_UNUSED)
 740 {
 741   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 742     {
 743       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 744         return false;
 745       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 746         return false;
 747     }
 748
 749   return !abnormal_ssa_name_p (*index);
 750 }
 751
 752 /* Returns true if EXPR contains a ssa name that occurs in an
 753    abnormal phi node.  */
 754
 755 bool
 756 contains_abnormal_ssa_name_p (tree expr)
 757 {
 758   enum tree_code code;
 759   enum tree_code_class codeclass;
 760
 761   if (!expr)
 762     return false;
 763
 764   code = TREE_CODE (expr);
 765   codeclass = TREE_CODE_CLASS (code);
 766
 767   if (code == SSA_NAME)
 768     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 769
 770   if (code == INTEGER_CST
 771       || is_gimple_min_invariant (expr))
 772     return false;
 773
 774   if (code == ADDR_EXPR)
 775     return !for_each_index (&TREE_OPERAND (expr, 0),
 776                             idx_contains_abnormal_ssa_name_p,
 777                             NULL);
 778
 779   if (code == COND_EXPR)
 780     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 781       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 782       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 783
 784   switch (codeclass)
 785     {
 786     case tcc_binary:
 787     case tcc_comparison:
 788       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 789         return true;
 790
 791       /* Fallthru.  */
 792     case tcc_unary:
 793       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 794         return true;
 795
 796       break;
 797
 798     default:
 799       gcc_unreachable ();
 800     }
 801
 802   return false;
 803 }
 804
 805 /*  Returns the structure describing number of iterations determined from
 806     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 807
 808 static struct tree_niter_desc *
 809 niter_for_exit (struct ivopts_data *data, edge exit)
 810 {
 811   struct tree_niter_desc *desc;
 812   void **slot;
 813
 814   if (!data->niters)
 815     {
 816       data->niters = pointer_map_create ();
 817       slot = NULL;
 818     }
 819   else
 820     slot = pointer_map_contains (data->niters, exit);
 821
 822   if (!slot)
 823     {
 824       /* Try to determine number of iterations.  We cannot safely work with ssa
 825          names that appear in phi nodes on abnormal edges, so that we do not
 826          create overlapping life ranges for them (PR 27283).  */
 827       desc = XNEW (struct tree_niter_desc);
 828       if (!number_of_iterations_exit (data->current_loop,
 829                                       exit, desc, true)
 830           || contains_abnormal_ssa_name_p (desc->niter))
 831         {
 832           XDELETE (desc);
 833           desc = NULL;
 834         }
 835       slot = pointer_map_insert (data->niters, exit);
 836       *slot = desc;
 837     }
 838   else
 839     desc = (struct tree_niter_desc *) *slot;
 840
 841   return desc;
 842 }
 843
 844 /* Returns the structure describing number of iterations determined from
 845    single dominating exit of DATA->current_loop, or NULL if something
 846    goes wrong.  */
 847
 848 static struct tree_niter_desc *
 849 niter_for_single_dom_exit (struct ivopts_data *data)
 850 {
 851   edge exit = single_dom_exit (data->current_loop);
 852
 853   if (!exit)
 854     return NULL;
 855
 856   return niter_for_exit (data, exit);
 857 }
 858
 859 /* Initializes data structures used by the iv optimization pass, stored
 860    in DATA.  */
 861
 862 static void
 863 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 864 {
 865   data->version_info_size = 2 * num_ssa_names;
 866   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 867   data->relevant = BITMAP_ALLOC (NULL);
 868   data->important_candidates = BITMAP_ALLOC (NULL);
 869   data->max_inv_id = 0;
 870   data->niters = NULL;
 871   data->iv_uses.create (20);
 872   data->iv_candidates.create (20);
 873   data->inv_expr_tab.create (10);
 874   data->inv_expr_id = 0;
 875   decl_rtl_to_reset.create (20);
 876 }
 877
 878 /* Returns a memory object to that EXPR points.  In case we are able to
 879    determine that it does not point to any such object, NULL is returned.  */
 880
 881 static tree
 882 determine_base_object (tree expr)
 883 {
 884   enum tree_code code = TREE_CODE (expr);
 885   tree base, obj;
 886
 887   /* If this is a pointer casted to any type, we need to determine
 888      the base object for the pointer; so handle conversions before
 889      throwing away non-pointer expressions.  */
 890   if (CONVERT_EXPR_P (expr))
 891     return determine_base_object (TREE_OPERAND (expr, 0));
 892
 893   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 894     return NULL_TREE;
 895
 896   switch (code)
 897     {
 898     case INTEGER_CST:
 899       return NULL_TREE;
 900
 901     case ADDR_EXPR:
 902       obj = TREE_OPERAND (expr, 0);
 903       base = get_base_address (obj);
 904
 905       if (!base)
 906         return expr;
 907
 908       if (TREE_CODE (base) == MEM_REF)
 909         return determine_base_object (TREE_OPERAND (base, 0));
 910
 911       return fold_convert (ptr_type_node,
 912                            build_fold_addr_expr (base));
 913
 914     case POINTER_PLUS_EXPR:
 915       return determine_base_object (TREE_OPERAND (expr, 0));
 916
 917     case PLUS_EXPR:
 918     case MINUS_EXPR:
 919       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 920       gcc_unreachable ();
 921
 922     default:
 923       return fold_convert (ptr_type_node, expr);
 924     }
 925 }
 926
 927 /* Allocates an induction variable with given initial value BASE and step STEP
 928    for loop LOOP.  */
 929
 930 static struct iv *
 931 alloc_iv (tree base, tree step)
 932 {
 933   tree base_object = base;
 934   struct iv *iv = XCNEW (struct iv);
 935   gcc_assert (step != NULL_TREE);
 936
 937   /* Lower all address expressions except ones with DECL_P as operand.
 938      By doing this:
 939        1) More accurate cost can be computed for address expressions;
 940        2) Duplicate candidates won't be created for bases in different
 941           forms, like &a[0] and &a.  */
 942   STRIP_NOPS (base_object);
 943   if (TREE_CODE (base_object) == ADDR_EXPR
 944       && !DECL_P (TREE_OPERAND (base_object, 0)))
 945     {
 946       aff_tree comb;
 947       double_int size;
 948       base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
 949                                              &comb, &size);
 950       gcc_assert (base_object != NULL_TREE);
 951       base_object = build_fold_addr_expr (base_object);
 952       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
 953     }
 954
 955   iv->base = base;
 956   iv->base_object = determine_base_object (base_object);
 957   iv->step = step;
 958   iv->biv_p = false;
 959   iv->have_use_for = false;
 960   iv->use_id = 0;
 961   iv->ssa_name = NULL_TREE;
 962
 963   return iv;
 964 }
 965
 966 /* Sets STEP and BASE for induction variable IV.  */
 967
 968 static void
 969 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 970 {
 971   struct version_info *info = name_info (data, iv);
 972
 973   gcc_assert (!info->iv);
 974
 975   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 976   info->iv = alloc_iv (base, step);
 977   info->iv->ssa_name = iv;
 978 }
 979
 980 /* Finds induction variable declaration for VAR.  */
 981
 982 static struct iv *
 983 get_iv (struct ivopts_data *data, tree var)
 984 {
 985   basic_block bb;
 986   tree type = TREE_TYPE (var);
 987
 988   if (!POINTER_TYPE_P (type)
 989       && !INTEGRAL_TYPE_P (type))
 990     return NULL;
 991
 992   if (!name_info (data, var)->iv)
 993     {
 994       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 995
 996       if (!bb
 997           || !flow_bb_inside_loop_p (data->current_loop, bb))
 998         set_iv (data, var, var, build_int_cst (type, 0));
 999     }
1000
1001   return name_info (data, var)->iv;
1002 }
1003
1004 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1005    not define a simple affine biv with nonzero step.  */
1006
1007 static tree
1008 determine_biv_step (gimple phi)
1009 {
1010   struct loop *loop = gimple_bb (phi)->loop_father;
1011   tree name = PHI_RESULT (phi);
1012   affine_iv iv;
1013
1014   if (virtual_operand_p (name))
1015     return NULL_TREE;
1016
1017   if (!simple_iv (loop, loop, name, &iv, true))
1018     return NULL_TREE;
1019
1020   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1021 }
1022
1023 /* Finds basic ivs.  */
1024
1025 static bool
1026 find_bivs (struct ivopts_data *data)
1027 {
1028   gimple phi;
1029   tree step, type, base;
1030   bool found = false;
1031   struct loop *loop = data->current_loop;
1032   gimple_stmt_iterator psi;
1033
1034   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1035     {
1036       phi = gsi_stmt (psi);
1037
1038       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1039         continue;
1040
1041       step = determine_biv_step (phi);
1042       if (!step)
1043         continue;
1044
1045       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1046       base = expand_simple_operations (base);
1047       if (contains_abnormal_ssa_name_p (base)
1048           || contains_abnormal_ssa_name_p (step))
1049         continue;
1050
1051       type = TREE_TYPE (PHI_RESULT (phi));
1052       base = fold_convert (type, base);
1053       if (step)
1054         {
1055           if (POINTER_TYPE_P (type))
1056             step = convert_to_ptrofftype (step);
1057           else
1058             step = fold_convert (type, step);
1059         }
1060
1061       set_iv (data, PHI_RESULT (phi), base, step);
1062       found = true;
1063     }
1064
1065   return found;
1066 }
1067
1068 /* Marks basic ivs.  */
1069
1070 static void
1071 mark_bivs (struct ivopts_data *data)
1072 {
1073   gimple phi;
1074   tree var;
1075   struct iv *iv, *incr_iv;
1076   struct loop *loop = data->current_loop;
1077   basic_block incr_bb;
1078   gimple_stmt_iterator psi;
1079
1080   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1081     {
1082       phi = gsi_stmt (psi);
1083
1084       iv = get_iv (data, PHI_RESULT (phi));
1085       if (!iv)
1086         continue;
1087
1088       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1089       incr_iv = get_iv (data, var);
1090       if (!incr_iv)
1091         continue;
1092
1093       /* If the increment is in the subloop, ignore it.  */
1094       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1095       if (incr_bb->loop_father != data->current_loop
1096           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1097         continue;
1098
1099       iv->biv_p = true;
1100       incr_iv->biv_p = true;
1101     }
1102 }
1103
1104 /* Checks whether STMT defines a linear induction variable and stores its
1105    parameters to IV.  */
1106
1107 static bool
1108 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1109 {
1110   tree lhs;
1111   struct loop *loop = data->current_loop;
1112
1113   iv->base = NULL_TREE;
1114   iv->step = NULL_TREE;
1115
1116   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1117     return false;
1118
1119   lhs = gimple_assign_lhs (stmt);
1120   if (TREE_CODE (lhs) != SSA_NAME)
1121     return false;
1122
1123   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1124     return false;
1125   iv->base = expand_simple_operations (iv->base);
1126
1127   if (contains_abnormal_ssa_name_p (iv->base)
1128       || contains_abnormal_ssa_name_p (iv->step))
1129     return false;
1130
1131   /* If STMT could throw, then do not consider STMT as defining a GIV.
1132      While this will suppress optimizations, we can not safely delete this
1133      GIV and associated statements, even if it appears it is not used.  */
1134   if (stmt_could_throw_p (stmt))
1135     return false;
1136
1137   return true;
1138 }
1139
1140 /* Finds general ivs in statement STMT.  */
1141
1142 static void
1143 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1144 {
1145   affine_iv iv;
1146
1147   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1148     return;
1149
1150   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1151 }
1152
1153 /* Finds general ivs in basic block BB.  */
1154
1155 static void
1156 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1157 {
1158   gimple_stmt_iterator bsi;
1159
1160   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1161     find_givs_in_stmt (data, gsi_stmt (bsi));
1162 }
1163
1164 /* Finds general ivs.  */
1165
1166 static void
1167 find_givs (struct ivopts_data *data)
1168 {
1169   struct loop *loop = data->current_loop;
1170   basic_block *body = get_loop_body_in_dom_order (loop);
1171   unsigned i;
1172
1173   for (i = 0; i < loop->num_nodes; i++)
1174     find_givs_in_bb (data, body[i]);
1175   free (body);
1176 }
1177
1178 /* For each ssa name defined in LOOP determines whether it is an induction
1179    variable and if so, its initial value and step.  */
1180
1181 static bool
1182 find_induction_variables (struct ivopts_data *data)
1183 {
1184   unsigned i;
1185   bitmap_iterator bi;
1186
1187   if (!find_bivs (data))
1188     return false;
1189
1190   find_givs (data);
1191   mark_bivs (data);
1192
1193   if (dump_file && (dump_flags & TDF_DETAILS))
1194     {
1195       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1196
1197       if (niter)
1198         {
1199           fprintf (dump_file, "  number of iterations ");
1200           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1201           if (!integer_zerop (niter->may_be_zero))
1202             {
1203               fprintf (dump_file, "; zero if ");
1204               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1205             }
1206           fprintf (dump_file, "\n\n");
1207         };
1208
1209       fprintf (dump_file, "Induction variables:\n\n");
1210
1211       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1212         {
1213           if (ver_info (data, i)->iv)
1214             dump_iv (dump_file, ver_info (data, i)->iv);
1215         }
1216     }
1217
1218   return true;
1219 }
1220
1221 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1222
1223 static struct iv_use *
1224 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1225             gimple stmt, enum use_type use_type)
1226 {
1227   struct iv_use *use = XCNEW (struct iv_use);
1228
1229   use->id = n_iv_uses (data);
1230   use->type = use_type;
1231   use->iv = iv;
1232   use->stmt = stmt;
1233   use->op_p = use_p;
1234   use->related_cands = BITMAP_ALLOC (NULL);
1235
1236   /* To avoid showing ssa name in the dumps, if it was not reset by the
1237      caller.  */
1238   iv->ssa_name = NULL_TREE;
1239
1240   if (dump_file && (dump_flags & TDF_DETAILS))
1241     dump_use (dump_file, use);
1242
1243   data->iv_uses.safe_push (use);
1244
1245   return use;
1246 }
1247
1248 /* Checks whether OP is a loop-level invariant and if so, records it.
1249    NONLINEAR_USE is true if the invariant is used in a way we do not
1250    handle specially.  */
1251
1252 static void
1253 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1254 {
1255   basic_block bb;
1256   struct version_info *info;
1257
1258   if (TREE_CODE (op) != SSA_NAME
1259       || virtual_operand_p (op))
1260     return;
1261
1262   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1263   if (bb
1264       && flow_bb_inside_loop_p (data->current_loop, bb))
1265     return;
1266
1267   info = name_info (data, op);
1268   info->name = op;
1269   info->has_nonlin_use |= nonlinear_use;
1270   if (!info->inv_id)
1271     info->inv_id = ++data->max_inv_id;
1272   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1273 }
1274
1275 /* Checks whether the use OP is interesting and if so, records it.  */
1276
1277 static struct iv_use *
1278 find_interesting_uses_op (struct ivopts_data *data, tree op)
1279 {
1280   struct iv *iv;
1281   struct iv *civ;
1282   gimple stmt;
1283   struct iv_use *use;
1284
1285   if (TREE_CODE (op) != SSA_NAME)
1286     return NULL;
1287
1288   iv = get_iv (data, op);
1289   if (!iv)
1290     return NULL;
1291
1292   if (iv->have_use_for)
1293     {
1294       use = iv_use (data, iv->use_id);
1295
1296       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1297       return use;
1298     }
1299
1300   if (integer_zerop (iv->step))
1301     {
1302       record_invariant (data, op, true);
1303       return NULL;
1304     }
1305   iv->have_use_for = true;
1306
1307   civ = XNEW (struct iv);
1308   *civ = *iv;
1309
1310   stmt = SSA_NAME_DEF_STMT (op);
1311   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1312               || is_gimple_assign (stmt));
1313
1314   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1315   iv->use_id = use->id;
1316
1317   return use;
1318 }
1319
1320 /* Given a condition in statement STMT, checks whether it is a compare
1321    of an induction variable and an invariant.  If this is the case,
1322    CONTROL_VAR is set to location of the iv, BOUND to the location of
1323    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1324    induction variable descriptions, and true is returned.  If this is not
1325    the case, CONTROL_VAR and BOUND are set to the arguments of the
1326    condition and false is returned.  */
1327
1328 static bool
1329 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1330                        tree **control_var, tree **bound,
1331                        struct iv **iv_var, struct iv **iv_bound)
1332 {
1333   /* The objects returned when COND has constant operands.  */
1334   static struct iv const_iv;
1335   static tree zero;
1336   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1337   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1338   bool ret = false;
1339
1340   if (gimple_code (stmt) == GIMPLE_COND)
1341     {
1342       op0 = gimple_cond_lhs_ptr (stmt);
1343       op1 = gimple_cond_rhs_ptr (stmt);
1344     }
1345   else
1346     {
1347       op0 = gimple_assign_rhs1_ptr (stmt);
1348       op1 = gimple_assign_rhs2_ptr (stmt);
1349     }
1350
1351   zero = integer_zero_node;
1352   const_iv.step = integer_zero_node;
1353
1354   if (TREE_CODE (*op0) == SSA_NAME)
1355     iv0 = get_iv (data, *op0);
1356   if (TREE_CODE (*op1) == SSA_NAME)
1357     iv1 = get_iv (data, *op1);
1358
1359   /* Exactly one of the compared values must be an iv, and the other one must
1360      be an invariant.  */
1361   if (!iv0 || !iv1)
1362     goto end;
1363
1364   if (integer_zerop (iv0->step))
1365     {
1366       /* Control variable may be on the other side.  */
1367       tmp_op = op0; op0 = op1; op1 = tmp_op;
1368       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1369     }
1370   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1371
1372 end:
1373   if (control_var)
1374     *control_var = op0;;
1375   if (iv_var)
1376     *iv_var = iv0;;
1377   if (bound)
1378     *bound = op1;
1379   if (iv_bound)
1380     *iv_bound = iv1;
1381
1382   return ret;
1383 }
1384
1385 /* Checks whether the condition in STMT is interesting and if so,
1386    records it.  */
1387
1388 static void
1389 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1390 {
1391   tree *var_p, *bound_p;
1392   struct iv *var_iv, *civ;
1393
1394   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1395     {
1396       find_interesting_uses_op (data, *var_p);
1397       find_interesting_uses_op (data, *bound_p);
1398       return;
1399     }
1400
1401   civ = XNEW (struct iv);
1402   *civ = *var_iv;
1403   record_use (data, NULL, civ, stmt, USE_COMPARE);
1404 }
1405
1406 /* Returns the outermost loop EXPR is obviously invariant in
1407    relative to the loop LOOP, i.e. if all its operands are defined
1408    outside of the returned loop.  Returns NULL if EXPR is not
1409    even obviously invariant in LOOP.  */
1410
1411 struct loop *
1412 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1413 {
1414   basic_block def_bb;
1415   unsigned i, len;
1416
1417   if (is_gimple_min_invariant (expr))
1418     return current_loops->tree_root;
1419
1420   if (TREE_CODE (expr) == SSA_NAME)
1421     {
1422       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1423       if (def_bb)
1424         {
1425           if (flow_bb_inside_loop_p (loop, def_bb))
1426             return NULL;
1427           return superloop_at_depth (loop,
1428                                      loop_depth (def_bb->loop_father) + 1);
1429         }
1430
1431       return current_loops->tree_root;
1432     }
1433
1434   if (!EXPR_P (expr))
1435     return NULL;
1436
1437   unsigned maxdepth = 0;
1438   len = TREE_OPERAND_LENGTH (expr);
1439   for (i = 0; i < len; i++)
1440     {
1441       struct loop *ivloop;
1442       if (!TREE_OPERAND (expr, i))
1443         continue;
1444
1445       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1446       if (!ivloop)
1447         return NULL;
1448       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1449     }
1450
1451   return superloop_at_depth (loop, maxdepth);
1452 }
1453
1454 /* Returns true if expression EXPR is obviously invariant in LOOP,
1455    i.e. if all its operands are defined outside of the LOOP.  LOOP
1456    should not be the function body.  */
1457
1458 bool
1459 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1460 {
1461   basic_block def_bb;
1462   unsigned i, len;
1463
1464   gcc_assert (loop_depth (loop) > 0);
1465
1466   if (is_gimple_min_invariant (expr))
1467     return true;
1468
1469   if (TREE_CODE (expr) == SSA_NAME)
1470     {
1471       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1472       if (def_bb
1473           && flow_bb_inside_loop_p (loop, def_bb))
1474         return false;
1475
1476       return true;
1477     }
1478
1479   if (!EXPR_P (expr))
1480     return false;
1481
1482   len = TREE_OPERAND_LENGTH (expr);
1483   for (i = 0; i < len; i++)
1484     if (TREE_OPERAND (expr, i)
1485         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1486       return false;
1487
1488   return true;
1489 }
1490
1491 /* Cumulates the steps of indices into DATA and replaces their values with the
1492    initial ones.  Returns false when the value of the index cannot be determined.
1493    Callback for for_each_index.  */
1494
1495 struct ifs_ivopts_data
1496 {
1497   struct ivopts_data *ivopts_data;
1498   gimple stmt;
1499   tree step;
1500 };
1501
1502 static bool
1503 idx_find_step (tree base, tree *idx, void *data)
1504 {
1505   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1506   struct iv *iv;
1507   tree step, iv_base, iv_step, lbound, off;
1508   struct loop *loop = dta->ivopts_data->current_loop;
1509
1510   /* If base is a component ref, require that the offset of the reference
1511      be invariant.  */
1512   if (TREE_CODE (base) == COMPONENT_REF)
1513     {
1514       off = component_ref_field_offset (base);
1515       return expr_invariant_in_loop_p (loop, off);
1516     }
1517
1518   /* If base is array, first check whether we will be able to move the
1519      reference out of the loop (in order to take its address in strength
1520      reduction).  In order for this to work we need both lower bound
1521      and step to be loop invariants.  */
1522   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1523     {
1524       /* Moreover, for a range, the size needs to be invariant as well.  */
1525       if (TREE_CODE (base) == ARRAY_RANGE_REF
1526           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1527         return false;
1528
1529       step = array_ref_element_size (base);
1530       lbound = array_ref_low_bound (base);
1531
1532       if (!expr_invariant_in_loop_p (loop, step)
1533           || !expr_invariant_in_loop_p (loop, lbound))
1534         return false;
1535     }
1536
1537   if (TREE_CODE (*idx) != SSA_NAME)
1538     return true;
1539
1540   iv = get_iv (dta->ivopts_data, *idx);
1541   if (!iv)
1542     return false;
1543
1544   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1545           *&x[0], which is not folded and does not trigger the
1546           ARRAY_REF path below.  */
1547   *idx = iv->base;
1548
1549   if (integer_zerop (iv->step))
1550     return true;
1551
1552   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1553     {
1554       step = array_ref_element_size (base);
1555
1556       /* We only handle addresses whose step is an integer constant.  */
1557       if (TREE_CODE (step) != INTEGER_CST)
1558         return false;
1559     }
1560   else
1561     /* The step for pointer arithmetics already is 1 byte.  */
1562     step = size_one_node;
1563
1564   iv_base = iv->base;
1565   iv_step = iv->step;
1566   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1567                             sizetype, &iv_base, &iv_step, dta->stmt,
1568                             false))
1569     {
1570       /* The index might wrap.  */
1571       return false;
1572     }
1573
1574   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1575   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1576
1577   return true;
1578 }
1579
1580 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1581    object is passed to it in DATA.  */
1582
1583 static bool
1584 idx_record_use (tree base, tree *idx,
1585                 void *vdata)
1586 {
1587   struct ivopts_data *data = (struct ivopts_data *) vdata;
1588   find_interesting_uses_op (data, *idx);
1589   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1590     {
1591       find_interesting_uses_op (data, array_ref_element_size (base));
1592       find_interesting_uses_op (data, array_ref_low_bound (base));
1593     }
1594   return true;
1595 }
1596
1597 /* If we can prove that TOP = cst * BOT for some constant cst,
1598    store cst to MUL and return true.  Otherwise return false.
1599    The returned value is always sign-extended, regardless of the
1600    signedness of TOP and BOT.  */
1601
1602 static bool
1603 constant_multiple_of (tree top, tree bot, double_int *mul)
1604 {
1605   tree mby;
1606   enum tree_code code;
1607   double_int res, p0, p1;
1608   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1609
1610   STRIP_NOPS (top);
1611   STRIP_NOPS (bot);
1612
1613   if (operand_equal_p (top, bot, 0))
1614     {
1615       *mul = double_int_one;
1616       return true;
1617     }
1618
1619   code = TREE_CODE (top);
1620   switch (code)
1621     {
1622     case MULT_EXPR:
1623       mby = TREE_OPERAND (top, 1);
1624       if (TREE_CODE (mby) != INTEGER_CST)
1625         return false;
1626
1627       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1628         return false;
1629
1630       *mul = (res * tree_to_double_int (mby)).sext (precision);
1631       return true;
1632
1633     case PLUS_EXPR:
1634     case MINUS_EXPR:
1635       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1636           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1637         return false;
1638
1639       if (code == MINUS_EXPR)
1640         p1 = -p1;
1641       *mul = (p0 + p1).sext (precision);
1642       return true;
1643
1644     case INTEGER_CST:
1645       if (TREE_CODE (bot) != INTEGER_CST)
1646         return false;
1647
1648       p0 = tree_to_double_int (top).sext (precision);
1649       p1 = tree_to_double_int (bot).sext (precision);
1650       if (p1.is_zero ())
1651         return false;
1652       *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1653       return res.is_zero ();
1654
1655     default:
1656       return false;
1657     }
1658 }
1659
1660 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1661
1662 static bool
1663 may_be_unaligned_p (tree ref, tree step)
1664 {
1665   tree base;
1666   tree base_type;
1667   HOST_WIDE_INT bitsize;
1668   HOST_WIDE_INT bitpos;
1669   tree toffset;
1670   enum machine_mode mode;
1671   int unsignedp, volatilep;
1672   unsigned base_align;
1673
1674   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1675      thus they are not misaligned.  */
1676   if (TREE_CODE (ref) == TARGET_MEM_REF)
1677     return false;
1678
1679   /* The test below is basically copy of what expr.c:normal_inner_ref
1680      does to check whether the object must be loaded by parts when
1681      STRICT_ALIGNMENT is true.  */
1682   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1683                               &unsignedp, &volatilep, true);
1684   base_type = TREE_TYPE (base);
1685   base_align = get_object_alignment (base);
1686   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1687
1688   if (mode != BLKmode)
1689     {
1690       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1691
1692       if (base_align < mode_align
1693           || (bitpos % mode_align) != 0
1694           || (bitpos % BITS_PER_UNIT) != 0)
1695         return true;
1696
1697       if (toffset
1698           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1699         return true;
1700
1701       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1702         return true;
1703     }
1704
1705   return false;
1706 }
1707
1708 /* Return true if EXPR may be non-addressable.   */
1709
1710 bool
1711 may_be_nonaddressable_p (tree expr)
1712 {
1713   switch (TREE_CODE (expr))
1714     {
1715     case TARGET_MEM_REF:
1716       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1717          target, thus they are always addressable.  */
1718       return false;
1719
1720     case COMPONENT_REF:
1721       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1722              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1723
1724     case VIEW_CONVERT_EXPR:
1725       /* This kind of view-conversions may wrap non-addressable objects
1726          and make them look addressable.  After some processing the
1727          non-addressability may be uncovered again, causing ADDR_EXPRs
1728          of inappropriate objects to be built.  */
1729       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1730           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1731         return true;
1732
1733       /* ... fall through ... */
1734
1735     case ARRAY_REF:
1736     case ARRAY_RANGE_REF:
1737       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1738
1739     CASE_CONVERT:
1740       return true;
1741
1742     default:
1743       break;
1744     }
1745
1746   return false;
1747 }
1748
1749 /* Finds addresses in *OP_P inside STMT.  */
1750
1751 static void
1752 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1753 {
1754   tree base = *op_p, step = size_zero_node;
1755   struct iv *civ;
1756   struct ifs_ivopts_data ifs_ivopts_data;
1757
1758   /* Do not play with volatile memory references.  A bit too conservative,
1759      perhaps, but safe.  */
1760   if (gimple_has_volatile_ops (stmt))
1761     goto fail;
1762
1763   /* Ignore bitfields for now.  Not really something terribly complicated
1764      to handle.  TODO.  */
1765   if (TREE_CODE (base) == BIT_FIELD_REF)
1766     goto fail;
1767
1768   base = unshare_expr (base);
1769
1770   if (TREE_CODE (base) == TARGET_MEM_REF)
1771     {
1772       tree type = build_pointer_type (TREE_TYPE (base));
1773       tree astep;
1774
1775       if (TMR_BASE (base)
1776           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1777         {
1778           civ = get_iv (data, TMR_BASE (base));
1779           if (!civ)
1780             goto fail;
1781
1782           TMR_BASE (base) = civ->base;
1783           step = civ->step;
1784         }
1785       if (TMR_INDEX2 (base)
1786           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1787         {
1788           civ = get_iv (data, TMR_INDEX2 (base));
1789           if (!civ)
1790             goto fail;
1791
1792           TMR_INDEX2 (base) = civ->base;
1793           step = civ->step;
1794         }
1795       if (TMR_INDEX (base)
1796           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1797         {
1798           civ = get_iv (data, TMR_INDEX (base));
1799           if (!civ)
1800             goto fail;
1801
1802           TMR_INDEX (base) = civ->base;
1803           astep = civ->step;
1804
1805           if (astep)
1806             {
1807               if (TMR_STEP (base))
1808                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1809
1810               step = fold_build2 (PLUS_EXPR, type, step, astep);
1811             }
1812         }
1813
1814       if (integer_zerop (step))
1815         goto fail;
1816       base = tree_mem_ref_addr (type, base);
1817     }
1818   else
1819     {
1820       ifs_ivopts_data.ivopts_data = data;
1821       ifs_ivopts_data.stmt = stmt;
1822       ifs_ivopts_data.step = size_zero_node;
1823       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1824           || integer_zerop (ifs_ivopts_data.step))
1825         goto fail;
1826       step = ifs_ivopts_data.step;
1827
1828       /* Check that the base expression is addressable.  This needs
1829          to be done after substituting bases of IVs into it.  */
1830       if (may_be_nonaddressable_p (base))
1831         goto fail;
1832
1833       /* Moreover, on strict alignment platforms, check that it is
1834          sufficiently aligned.  */
1835       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1836         goto fail;
1837
1838       base = build_fold_addr_expr (base);
1839
1840       /* Substituting bases of IVs into the base expression might
1841          have caused folding opportunities.  */
1842       if (TREE_CODE (base) == ADDR_EXPR)
1843         {
1844           tree *ref = &TREE_OPERAND (base, 0);
1845           while (handled_component_p (*ref))
1846             ref = &TREE_OPERAND (*ref, 0);
1847           if (TREE_CODE (*ref) == MEM_REF)
1848             {
1849               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1850                                       TREE_OPERAND (*ref, 0),
1851                                       TREE_OPERAND (*ref, 1));
1852               if (tem)
1853                 *ref = tem;
1854             }
1855         }
1856     }
1857
1858   civ = alloc_iv (base, step);
1859   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1860   return;
1861
1862 fail:
1863   for_each_index (op_p, idx_record_use, data);
1864 }
1865
1866 /* Finds and records invariants used in STMT.  */
1867
1868 static void
1869 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1870 {
1871   ssa_op_iter iter;
1872   use_operand_p use_p;
1873   tree op;
1874
1875   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1876     {
1877       op = USE_FROM_PTR (use_p);
1878       record_invariant (data, op, false);
1879     }
1880 }
1881
1882 /* Finds interesting uses of induction variables in the statement STMT.  */
1883
1884 static void
1885 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1886 {
1887   struct iv *iv;
1888   tree op, *lhs, *rhs;
1889   ssa_op_iter iter;
1890   use_operand_p use_p;
1891   enum tree_code code;
1892
1893   find_invariants_stmt (data, stmt);
1894
1895   if (gimple_code (stmt) == GIMPLE_COND)
1896     {
1897       find_interesting_uses_cond (data, stmt);
1898       return;
1899     }
1900
1901   if (is_gimple_assign (stmt))
1902     {
1903       lhs = gimple_assign_lhs_ptr (stmt);
1904       rhs = gimple_assign_rhs1_ptr (stmt);
1905
1906       if (TREE_CODE (*lhs) == SSA_NAME)
1907         {
1908           /* If the statement defines an induction variable, the uses are not
1909              interesting by themselves.  */
1910
1911           iv = get_iv (data, *lhs);
1912
1913           if (iv && !integer_zerop (iv->step))
1914             return;
1915         }
1916
1917       code = gimple_assign_rhs_code (stmt);
1918       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1919           && (REFERENCE_CLASS_P (*rhs)
1920               || is_gimple_val (*rhs)))
1921         {
1922           if (REFERENCE_CLASS_P (*rhs))
1923             find_interesting_uses_address (data, stmt, rhs);
1924           else
1925             find_interesting_uses_op (data, *rhs);
1926
1927           if (REFERENCE_CLASS_P (*lhs))
1928             find_interesting_uses_address (data, stmt, lhs);
1929           return;
1930         }
1931       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1932         {
1933           find_interesting_uses_cond (data, stmt);
1934           return;
1935         }
1936
1937       /* TODO -- we should also handle address uses of type
1938
1939          memory = call (whatever);
1940
1941          and
1942
1943          call (memory).  */
1944     }
1945
1946   if (gimple_code (stmt) == GIMPLE_PHI
1947       && gimple_bb (stmt) == data->current_loop->header)
1948     {
1949       iv = get_iv (data, PHI_RESULT (stmt));
1950
1951       if (iv && !integer_zerop (iv->step))
1952         return;
1953     }
1954
1955   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1956     {
1957       op = USE_FROM_PTR (use_p);
1958
1959       if (TREE_CODE (op) != SSA_NAME)
1960         continue;
1961
1962       iv = get_iv (data, op);
1963       if (!iv)
1964         continue;
1965
1966       find_interesting_uses_op (data, op);
1967     }
1968 }
1969
1970 /* Finds interesting uses of induction variables outside of loops
1971    on loop exit edge EXIT.  */
1972
1973 static void
1974 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1975 {
1976   gimple phi;
1977   gimple_stmt_iterator psi;
1978   tree def;
1979
1980   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1981     {
1982       phi = gsi_stmt (psi);
1983       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1984       if (!virtual_operand_p (def))
1985         find_interesting_uses_op (data, def);
1986     }
1987 }
1988
1989 /* Finds uses of the induction variables that are interesting.  */
1990
1991 static void
1992 find_interesting_uses (struct ivopts_data *data)
1993 {
1994   basic_block bb;
1995   gimple_stmt_iterator bsi;
1996   basic_block *body = get_loop_body (data->current_loop);
1997   unsigned i;
1998   struct version_info *info;
1999   edge e;
2000
2001   if (dump_file && (dump_flags & TDF_DETAILS))
2002     fprintf (dump_file, "Uses:\n\n");
2003
2004   for (i = 0; i < data->current_loop->num_nodes; i++)
2005     {
2006       edge_iterator ei;
2007       bb = body[i];
2008
2009       FOR_EACH_EDGE (e, ei, bb->succs)
2010         if (e->dest != EXIT_BLOCK_PTR
2011             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2012           find_interesting_uses_outside (data, e);
2013
2014       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2015         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2016       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2017         if (!is_gimple_debug (gsi_stmt (bsi)))
2018           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2019     }
2020
2021   if (dump_file && (dump_flags & TDF_DETAILS))
2022     {
2023       bitmap_iterator bi;
2024
2025       fprintf (dump_file, "\n");
2026
2027       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2028         {
2029           info = ver_info (data, i);
2030           if (info->inv_id)
2031             {
2032               fprintf (dump_file, "  ");
2033               print_generic_expr (dump_file, info->name, TDF_SLIM);
2034               fprintf (dump_file, " is invariant (%d)%s\n",
2035                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2036             }
2037         }
2038
2039       fprintf (dump_file, "\n");
2040     }
2041
2042   free (body);
2043 }
2044
2045 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2046    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2047    we are at the top-level of the processed address.  */
2048
2049 static tree
2050 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2051                 HOST_WIDE_INT *offset)
2052 {
2053   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2054   enum tree_code code;
2055   tree type, orig_type = TREE_TYPE (expr);
2056   HOST_WIDE_INT off0, off1, st;
2057   tree orig_expr = expr;
2058
2059   STRIP_NOPS (expr);
2060
2061   type = TREE_TYPE (expr);
2062   code = TREE_CODE (expr);
2063   *offset = 0;
2064
2065   switch (code)
2066     {
2067     case INTEGER_CST:
2068       if (!cst_and_fits_in_hwi (expr)
2069           || integer_zerop (expr))
2070         return orig_expr;
2071
2072       *offset = int_cst_value (expr);
2073       return build_int_cst (orig_type, 0);
2074
2075     case POINTER_PLUS_EXPR:
2076     case PLUS_EXPR:
2077     case MINUS_EXPR:
2078       op0 = TREE_OPERAND (expr, 0);
2079       op1 = TREE_OPERAND (expr, 1);
2080
2081       op0 = strip_offset_1 (op0, false, false, &off0);
2082       op1 = strip_offset_1 (op1, false, false, &off1);
2083
2084       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2085       if (op0 == TREE_OPERAND (expr, 0)
2086           && op1 == TREE_OPERAND (expr, 1))
2087         return orig_expr;
2088
2089       if (integer_zerop (op1))
2090         expr = op0;
2091       else if (integer_zerop (op0))
2092         {
2093           if (code == MINUS_EXPR)
2094             expr = fold_build1 (NEGATE_EXPR, type, op1);
2095           else
2096             expr = op1;
2097         }
2098       else
2099         expr = fold_build2 (code, type, op0, op1);
2100
2101       return fold_convert (orig_type, expr);
2102
2103     case MULT_EXPR:
2104       op1 = TREE_OPERAND (expr, 1);
2105       if (!cst_and_fits_in_hwi (op1))
2106         return orig_expr;
2107
2108       op0 = TREE_OPERAND (expr, 0);
2109       op0 = strip_offset_1 (op0, false, false, &off0);
2110       if (op0 == TREE_OPERAND (expr, 0))
2111         return orig_expr;
2112
2113       *offset = off0 * int_cst_value (op1);
2114       if (integer_zerop (op0))
2115         expr = op0;
2116       else
2117         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2118
2119       return fold_convert (orig_type, expr);
2120
2121     case ARRAY_REF:
2122     case ARRAY_RANGE_REF:
2123       if (!inside_addr)
2124         return orig_expr;
2125
2126       step = array_ref_element_size (expr);
2127       if (!cst_and_fits_in_hwi (step))
2128         break;
2129
2130       st = int_cst_value (step);
2131       op1 = TREE_OPERAND (expr, 1);
2132       op1 = strip_offset_1 (op1, false, false, &off1);
2133       *offset = off1 * st;
2134
2135       if (top_compref
2136           && integer_zerop (op1))
2137         {
2138           /* Strip the component reference completely.  */
2139           op0 = TREE_OPERAND (expr, 0);
2140           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2141           *offset += off0;
2142           return op0;
2143         }
2144       break;
2145
2146     case COMPONENT_REF:
2147       {
2148         tree field;
2149
2150         if (!inside_addr)
2151           return orig_expr;
2152
2153         tmp = component_ref_field_offset (expr);
2154         field = TREE_OPERAND (expr, 1);
2155         if (top_compref
2156             && cst_and_fits_in_hwi (tmp)
2157             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2158           {
2159             HOST_WIDE_INT boffset, abs_off;
2160
2161             /* Strip the component reference completely.  */
2162             op0 = TREE_OPERAND (expr, 0);
2163             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2164             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2165             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2166             if (boffset < 0)
2167               abs_off = -abs_off;
2168
2169             *offset = off0 + int_cst_value (tmp) + abs_off;
2170             return op0;
2171           }
2172       }
2173       break;
2174
2175     case ADDR_EXPR:
2176       op0 = TREE_OPERAND (expr, 0);
2177       op0 = strip_offset_1 (op0, true, true, &off0);
2178       *offset += off0;
2179
2180       if (op0 == TREE_OPERAND (expr, 0))
2181         return orig_expr;
2182
2183       expr = build_fold_addr_expr (op0);
2184       return fold_convert (orig_type, expr);
2185
2186     case MEM_REF:
2187       /* ???  Offset operand?  */
2188       inside_addr = false;
2189       break;
2190
2191     default:
2192       return orig_expr;
2193     }
2194
2195   /* Default handling of expressions for that we want to recurse into
2196      the first operand.  */
2197   op0 = TREE_OPERAND (expr, 0);
2198   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2199   *offset += off0;
2200
2201   if (op0 == TREE_OPERAND (expr, 0)
2202       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2203     return orig_expr;
2204
2205   expr = copy_node (expr);
2206   TREE_OPERAND (expr, 0) = op0;
2207   if (op1)
2208     TREE_OPERAND (expr, 1) = op1;
2209
2210   /* Inside address, we might strip the top level component references,
2211      thus changing type of the expression.  Handling of ADDR_EXPR
2212      will fix that.  */
2213   expr = fold_convert (orig_type, expr);
2214
2215   return expr;
2216 }
2217
2218 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2219
2220 static tree
2221 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2222 {
2223   HOST_WIDE_INT off;
2224   tree core = strip_offset_1 (expr, false, false, &off);
2225   *offset = off;
2226   return core;
2227 }
2228
2229 /* Returns variant of TYPE that can be used as base for different uses.
2230    We return unsigned type with the same precision, which avoids problems
2231    with overflows.  */
2232
2233 static tree
2234 generic_type_for (tree type)
2235 {
2236   if (POINTER_TYPE_P (type))
2237     return unsigned_type_for (type);
2238
2239   if (TYPE_UNSIGNED (type))
2240     return type;
2241
2242   return unsigned_type_for (type);
2243 }
2244
2245 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2246    the bitmap to that we should store it.  */
2247
2248 static struct ivopts_data *fd_ivopts_data;
2249 static tree
2250 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2251 {
2252   bitmap *depends_on = (bitmap *) data;
2253   struct version_info *info;
2254
2255   if (TREE_CODE (*expr_p) != SSA_NAME)
2256     return NULL_TREE;
2257   info = name_info (fd_ivopts_data, *expr_p);
2258
2259   if (!info->inv_id || info->has_nonlin_use)
2260     return NULL_TREE;
2261
2262   if (!*depends_on)
2263     *depends_on = BITMAP_ALLOC (NULL);
2264   bitmap_set_bit (*depends_on, info->inv_id);
2265
2266   return NULL_TREE;
2267 }
2268
2269 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2270    position to POS.  If USE is not NULL, the candidate is set as related to
2271    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2272    replacement of the final value of the iv by a direct computation.  */
2273
2274 static struct iv_cand *
2275 add_candidate_1 (struct ivopts_data *data,
2276                  tree base, tree step, bool important, enum iv_position pos,
2277                  struct iv_use *use, gimple incremented_at)
2278 {
2279   unsigned i;
2280   struct iv_cand *cand = NULL;
2281   tree type, orig_type;
2282
2283   /* For non-original variables, make sure their values are computed in a type
2284      that does not invoke undefined behavior on overflows (since in general,
2285      we cannot prove that these induction variables are non-wrapping).  */
2286   if (pos != IP_ORIGINAL)
2287     {
2288       orig_type = TREE_TYPE (base);
2289       type = generic_type_for (orig_type);
2290       if (type != orig_type)
2291         {
2292           base = fold_convert (type, base);
2293           step = fold_convert (type, step);
2294         }
2295     }
2296
2297   for (i = 0; i < n_iv_cands (data); i++)
2298     {
2299       cand = iv_cand (data, i);
2300
2301       if (cand->pos != pos)
2302         continue;
2303
2304       if (cand->incremented_at != incremented_at
2305           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2306               && cand->ainc_use != use))
2307         continue;
2308
2309       if (!cand->iv)
2310         {
2311           if (!base && !step)
2312             break;
2313
2314           continue;
2315         }
2316
2317       if (!base && !step)
2318         continue;
2319
2320       if (operand_equal_p (base, cand->iv->base, 0)
2321           && operand_equal_p (step, cand->iv->step, 0)
2322           && (TYPE_PRECISION (TREE_TYPE (base))
2323               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2324         break;
2325     }
2326
2327   if (i == n_iv_cands (data))
2328     {
2329       cand = XCNEW (struct iv_cand);
2330       cand->id = i;
2331
2332       if (!base && !step)
2333         cand->iv = NULL;
2334       else
2335         cand->iv = alloc_iv (base, step);
2336
2337       cand->pos = pos;
2338       if (pos != IP_ORIGINAL && cand->iv)
2339         {
2340           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2341           cand->var_after = cand->var_before;
2342         }
2343       cand->important = important;
2344       cand->incremented_at = incremented_at;
2345       data->iv_candidates.safe_push (cand);
2346
2347       if (step
2348           && TREE_CODE (step) != INTEGER_CST)
2349         {
2350           fd_ivopts_data = data;
2351           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2352         }
2353
2354       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2355         cand->ainc_use = use;
2356       else
2357         cand->ainc_use = NULL;
2358
2359       if (dump_file && (dump_flags & TDF_DETAILS))
2360         dump_cand (dump_file, cand);
2361     }
2362
2363   if (important && !cand->important)
2364     {
2365       cand->important = true;
2366       if (dump_file && (dump_flags & TDF_DETAILS))
2367         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2368     }
2369
2370   if (use)
2371     {
2372       bitmap_set_bit (use->related_cands, i);
2373       if (dump_file && (dump_flags & TDF_DETAILS))
2374         fprintf (dump_file, "Candidate %d is related to use %d\n",
2375                  cand->id, use->id);
2376     }
2377
2378   return cand;
2379 }
2380
2381 /* Returns true if incrementing the induction variable at the end of the LOOP
2382    is allowed.
2383
2384    The purpose is to avoid splitting latch edge with a biv increment, thus
2385    creating a jump, possibly confusing other optimization passes and leaving
2386    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2387    is not available (so we do not have a better alternative), or if the latch
2388    edge is already nonempty.  */
2389
2390 static bool
2391 allow_ip_end_pos_p (struct loop *loop)
2392 {
2393   if (!ip_normal_pos (loop))
2394     return true;
2395
2396   if (!empty_block_p (ip_end_pos (loop)))
2397     return true;
2398
2399   return false;
2400 }
2401
2402 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2403    Important field is set to IMPORTANT.  */
2404
2405 static void
2406 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2407                         bool important, struct iv_use *use)
2408 {
2409   basic_block use_bb = gimple_bb (use->stmt);
2410   enum machine_mode mem_mode;
2411   unsigned HOST_WIDE_INT cstepi;
2412
2413   /* If we insert the increment in any position other than the standard
2414      ones, we must ensure that it is incremented once per iteration.
2415      It must not be in an inner nested loop, or one side of an if
2416      statement.  */
2417   if (use_bb->loop_father != data->current_loop
2418       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2419       || stmt_could_throw_p (use->stmt)
2420       || !cst_and_fits_in_hwi (step))
2421     return;
2422
2423   cstepi = int_cst_value (step);
2424
2425   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2426   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2427         || USE_STORE_PRE_INCREMENT (mem_mode))
2428        && GET_MODE_SIZE (mem_mode) == cstepi)
2429       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2430            || USE_STORE_PRE_DECREMENT (mem_mode))
2431           && GET_MODE_SIZE (mem_mode) == -cstepi))
2432     {
2433       enum tree_code code = MINUS_EXPR;
2434       tree new_base;
2435       tree new_step = step;
2436
2437       if (POINTER_TYPE_P (TREE_TYPE (base)))
2438         {
2439           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2440           code = POINTER_PLUS_EXPR;
2441         }
2442       else
2443         new_step = fold_convert (TREE_TYPE (base), new_step);
2444       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2445       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2446                        use->stmt);
2447     }
2448   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2449         || USE_STORE_POST_INCREMENT (mem_mode))
2450        && GET_MODE_SIZE (mem_mode) == cstepi)
2451       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2452            || USE_STORE_POST_DECREMENT (mem_mode))
2453           && GET_MODE_SIZE (mem_mode) == -cstepi))
2454     {
2455       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2456                        use->stmt);
2457     }
2458 }
2459
2460 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2461    position to POS.  If USE is not NULL, the candidate is set as related to
2462    it.  The candidate computation is scheduled on all available positions.  */
2463
2464 static void
2465 add_candidate (struct ivopts_data *data,
2466                tree base, tree step, bool important, struct iv_use *use)
2467 {
2468   if (ip_normal_pos (data->current_loop))
2469     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2470   if (ip_end_pos (data->current_loop)
2471       && allow_ip_end_pos_p (data->current_loop))
2472     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2473
2474   if (use != NULL && use->type == USE_ADDRESS)
2475     add_autoinc_candidates (data, base, step, important, use);
2476 }
2477
2478 /* Adds standard iv candidates.  */
2479
2480 static void
2481 add_standard_iv_candidates (struct ivopts_data *data)
2482 {
2483   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2484
2485   /* The same for a double-integer type if it is still fast enough.  */
2486   if (TYPE_PRECISION
2487         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2488       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2489     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2490                    build_int_cst (long_integer_type_node, 1), true, NULL);
2491
2492   /* The same for a double-integer type if it is still fast enough.  */
2493   if (TYPE_PRECISION
2494         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2495       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2496     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2497                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2498 }
2499
2500
2501 /* Adds candidates bases on the old induction variable IV.  */
2502
2503 static void
2504 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2505 {
2506   gimple phi;
2507   tree def;
2508   struct iv_cand *cand;
2509
2510   add_candidate (data, iv->base, iv->step, true, NULL);
2511
2512   /* The same, but with initial value zero.  */
2513   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2514     add_candidate (data, size_int (0), iv->step, true, NULL);
2515   else
2516     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2517                    iv->step, true, NULL);
2518
2519   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2520   if (gimple_code (phi) == GIMPLE_PHI)
2521     {
2522       /* Additionally record the possibility of leaving the original iv
2523          untouched.  */
2524       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2525       cand = add_candidate_1 (data,
2526                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2527                               SSA_NAME_DEF_STMT (def));
2528       cand->var_before = iv->ssa_name;
2529       cand->var_after = def;
2530     }
2531 }
2532
2533 /* Adds candidates based on the old induction variables.  */
2534
2535 static void
2536 add_old_ivs_candidates (struct ivopts_data *data)
2537 {
2538   unsigned i;
2539   struct iv *iv;
2540   bitmap_iterator bi;
2541
2542   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2543     {
2544       iv = ver_info (data, i)->iv;
2545       if (iv && iv->biv_p && !integer_zerop (iv->step))
2546         add_old_iv_candidates (data, iv);
2547     }
2548 }
2549
2550 /* Adds candidates based on the value of the induction variable IV and USE.  */
2551
2552 static void
2553 add_iv_value_candidates (struct ivopts_data *data,
2554                          struct iv *iv, struct iv_use *use)
2555 {
2556   unsigned HOST_WIDE_INT offset;
2557   tree base;
2558   tree basetype;
2559
2560   add_candidate (data, iv->base, iv->step, false, use);
2561
2562   /* The same, but with initial value zero.  Make such variable important,
2563      since it is generic enough so that possibly many uses may be based
2564      on it.  */
2565   basetype = TREE_TYPE (iv->base);
2566   if (POINTER_TYPE_P (basetype))
2567     basetype = sizetype;
2568   add_candidate (data, build_int_cst (basetype, 0),
2569                  iv->step, true, use);
2570
2571   /* Third, try removing the constant offset.  Make sure to even
2572      add a candidate for &a[0] vs. (T *)&a.  */
2573   base = strip_offset (iv->base, &offset);
2574   if (offset
2575       || base != iv->base)
2576     add_candidate (data, base, iv->step, false, use);
2577 }
2578
2579 /* Adds candidates based on the uses.  */
2580
2581 static void
2582 add_derived_ivs_candidates (struct ivopts_data *data)
2583 {
2584   unsigned i;
2585
2586   for (i = 0; i < n_iv_uses (data); i++)
2587     {
2588       struct iv_use *use = iv_use (data, i);
2589
2590       if (!use)
2591         continue;
2592
2593       switch (use->type)
2594         {
2595         case USE_NONLINEAR_EXPR:
2596         case USE_COMPARE:
2597         case USE_ADDRESS:
2598           /* Just add the ivs based on the value of the iv used here.  */
2599           add_iv_value_candidates (data, use->iv, use);
2600           break;
2601
2602         default:
2603           gcc_unreachable ();
2604         }
2605     }
2606 }
2607
2608 /* Record important candidates and add them to related_cands bitmaps
2609    if needed.  */
2610
2611 static void
2612 record_important_candidates (struct ivopts_data *data)
2613 {
2614   unsigned i;
2615   struct iv_use *use;
2616
2617   for (i = 0; i < n_iv_cands (data); i++)
2618     {
2619       struct iv_cand *cand = iv_cand (data, i);
2620
2621       if (cand->important)
2622         bitmap_set_bit (data->important_candidates, i);
2623     }
2624
2625   data->consider_all_candidates = (n_iv_cands (data)
2626                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2627
2628   if (data->consider_all_candidates)
2629     {
2630       /* We will not need "related_cands" bitmaps in this case,
2631          so release them to decrease peak memory consumption.  */
2632       for (i = 0; i < n_iv_uses (data); i++)
2633         {
2634           use = iv_use (data, i);
2635           BITMAP_FREE (use->related_cands);
2636         }
2637     }
2638   else
2639     {
2640       /* Add important candidates to the related_cands bitmaps.  */
2641       for (i = 0; i < n_iv_uses (data); i++)
2642         bitmap_ior_into (iv_use (data, i)->related_cands,
2643                          data->important_candidates);
2644     }
2645 }
2646
2647 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2648    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2649    we allocate a simple list to every use.  */
2650
2651 static void
2652 alloc_use_cost_map (struct ivopts_data *data)
2653 {
2654   unsigned i, size, s;
2655
2656   for (i = 0; i < n_iv_uses (data); i++)
2657     {
2658       struct iv_use *use = iv_use (data, i);
2659
2660       if (data->consider_all_candidates)
2661         size = n_iv_cands (data);
2662       else
2663         {
2664           s = bitmap_count_bits (use->related_cands);
2665
2666           /* Round up to the power of two, so that moduling by it is fast.  */
2667           size = s ? (1 << ceil_log2 (s)) : 1;
2668         }
2669
2670       use->n_map_members = size;
2671       use->cost_map = XCNEWVEC (struct cost_pair, size);
2672     }
2673 }
2674
2675 /* Returns description of computation cost of expression whose runtime
2676    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2677
2678 static comp_cost
2679 new_cost (unsigned runtime, unsigned complexity)
2680 {
2681   comp_cost cost;
2682
2683   cost.cost = runtime;
2684   cost.complexity = complexity;
2685
2686   return cost;
2687 }
2688
2689 /* Adds costs COST1 and COST2.  */
2690
2691 static comp_cost
2692 add_costs (comp_cost cost1, comp_cost cost2)
2693 {
2694   cost1.cost += cost2.cost;
2695   cost1.complexity += cost2.complexity;
2696
2697   return cost1;
2698 }
2699 /* Subtracts costs COST1 and COST2.  */
2700
2701 static comp_cost
2702 sub_costs (comp_cost cost1, comp_cost cost2)
2703 {
2704   cost1.cost -= cost2.cost;
2705   cost1.complexity -= cost2.complexity;
2706
2707   return cost1;
2708 }
2709
2710 /* Returns a negative number if COST1 < COST2, a positive number if
2711    COST1 > COST2, and 0 if COST1 = COST2.  */
2712
2713 static int
2714 compare_costs (comp_cost cost1, comp_cost cost2)
2715 {
2716   if (cost1.cost == cost2.cost)
2717     return cost1.complexity - cost2.complexity;
2718
2719   return cost1.cost - cost2.cost;
2720 }
2721
2722 /* Returns true if COST is infinite.  */
2723
2724 static bool
2725 infinite_cost_p (comp_cost cost)
2726 {
2727   return cost.cost == INFTY;
2728 }
2729
2730 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2731    on invariants DEPENDS_ON and that the value used in expressing it
2732    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2733
2734 static void
2735 set_use_iv_cost (struct ivopts_data *data,
2736                  struct iv_use *use, struct iv_cand *cand,
2737                  comp_cost cost, bitmap depends_on, tree value,
2738                  enum tree_code comp, int inv_expr_id)
2739 {
2740   unsigned i, s;
2741
2742   if (infinite_cost_p (cost))
2743     {
2744       BITMAP_FREE (depends_on);
2745       return;
2746     }
2747
2748   if (data->consider_all_candidates)
2749     {
2750       use->cost_map[cand->id].cand = cand;
2751       use->cost_map[cand->id].cost = cost;
2752       use->cost_map[cand->id].depends_on = depends_on;
2753       use->cost_map[cand->id].value = value;
2754       use->cost_map[cand->id].comp = comp;
2755       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2756       return;
2757     }
2758
2759   /* n_map_members is a power of two, so this computes modulo.  */
2760   s = cand->id & (use->n_map_members - 1);
2761   for (i = s; i < use->n_map_members; i++)
2762     if (!use->cost_map[i].cand)
2763       goto found;
2764   for (i = 0; i < s; i++)
2765     if (!use->cost_map[i].cand)
2766       goto found;
2767
2768   gcc_unreachable ();
2769
2770 found:
2771   use->cost_map[i].cand = cand;
2772   use->cost_map[i].cost = cost;
2773   use->cost_map[i].depends_on = depends_on;
2774   use->cost_map[i].value = value;
2775   use->cost_map[i].comp = comp;
2776   use->cost_map[i].inv_expr_id = inv_expr_id;
2777 }
2778
2779 /* Gets cost of (USE, CANDIDATE) pair.  */
2780
2781 static struct cost_pair *
2782 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2783                  struct iv_cand *cand)
2784 {
2785   unsigned i, s;
2786   struct cost_pair *ret;
2787
2788   if (!cand)
2789     return NULL;
2790
2791   if (data->consider_all_candidates)
2792     {
2793       ret = use->cost_map + cand->id;
2794       if (!ret->cand)
2795         return NULL;
2796
2797       return ret;
2798     }
2799
2800   /* n_map_members is a power of two, so this computes modulo.  */
2801   s = cand->id & (use->n_map_members - 1);
2802   for (i = s; i < use->n_map_members; i++)
2803     if (use->cost_map[i].cand == cand)
2804       return use->cost_map + i;
2805     else if (use->cost_map[i].cand == NULL)
2806       return NULL;
2807   for (i = 0; i < s; i++)
2808     if (use->cost_map[i].cand == cand)
2809       return use->cost_map + i;
2810     else if (use->cost_map[i].cand == NULL)
2811       return NULL;
2812
2813   return NULL;
2814 }
2815
2816 /* Returns estimate on cost of computing SEQ.  */
2817
2818 static unsigned
2819 seq_cost (rtx seq, bool speed)
2820 {
2821   unsigned cost = 0;
2822   rtx set;
2823
2824   for (; seq; seq = NEXT_INSN (seq))
2825     {
2826       set = single_set (seq);
2827       if (set)
2828         cost += set_src_cost (SET_SRC (set), speed);
2829       else
2830         cost++;
2831     }
2832
2833   return cost;
2834 }
2835
2836 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2837 static rtx
2838 produce_memory_decl_rtl (tree obj, int *regno)
2839 {
2840   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2841   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2842   rtx x;
2843
2844   gcc_assert (obj);
2845   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2846     {
2847       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2848       x = gen_rtx_SYMBOL_REF (address_mode, name);
2849       SET_SYMBOL_REF_DECL (x, obj);
2850       x = gen_rtx_MEM (DECL_MODE (obj), x);
2851       set_mem_addr_space (x, as);
2852       targetm.encode_section_info (obj, x, true);
2853     }
2854   else
2855     {
2856       x = gen_raw_REG (address_mode, (*regno)++);
2857       x = gen_rtx_MEM (DECL_MODE (obj), x);
2858       set_mem_addr_space (x, as);
2859     }
2860
2861   return x;
2862 }
2863
2864 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2865    walk_tree.  DATA contains the actual fake register number.  */
2866
2867 static tree
2868 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2869 {
2870   tree obj = NULL_TREE;
2871   rtx x = NULL_RTX;
2872   int *regno = (int *) data;
2873
2874   switch (TREE_CODE (*expr_p))
2875     {
2876     case ADDR_EXPR:
2877       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2878            handled_component_p (*expr_p);
2879            expr_p = &TREE_OPERAND (*expr_p, 0))
2880         continue;
2881       obj = *expr_p;
2882       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2883         x = produce_memory_decl_rtl (obj, regno);
2884       break;
2885
2886     case SSA_NAME:
2887       *ws = 0;
2888       obj = SSA_NAME_VAR (*expr_p);
2889       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2890       if (!obj)
2891         return NULL_TREE;
2892       if (!DECL_RTL_SET_P (obj))
2893         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2894       break;
2895
2896     case VAR_DECL:
2897     case PARM_DECL:
2898     case RESULT_DECL:
2899       *ws = 0;
2900       obj = *expr_p;
2901
2902       if (DECL_RTL_SET_P (obj))
2903         break;
2904
2905       if (DECL_MODE (obj) == BLKmode)
2906         x = produce_memory_decl_rtl (obj, regno);
2907       else
2908         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2909
2910       break;
2911
2912     default:
2913       break;
2914     }
2915
2916   if (x)
2917     {
2918       decl_rtl_to_reset.safe_push (obj);
2919       SET_DECL_RTL (obj, x);
2920     }
2921
2922   return NULL_TREE;
2923 }
2924
2925 /* Determines cost of the computation of EXPR.  */
2926
2927 static unsigned
2928 computation_cost (tree expr, bool speed)
2929 {
2930   rtx seq, rslt;
2931   tree type = TREE_TYPE (expr);
2932   unsigned cost;
2933   /* Avoid using hard regs in ways which may be unsupported.  */
2934   int regno = LAST_VIRTUAL_REGISTER + 1;
2935   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2936   enum node_frequency real_frequency = node->frequency;
2937
2938   node->frequency = NODE_FREQUENCY_NORMAL;
2939   crtl->maybe_hot_insn_p = speed;
2940   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2941   start_sequence ();
2942   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2943   seq = get_insns ();
2944   end_sequence ();
2945   default_rtl_profile ();
2946   node->frequency = real_frequency;
2947
2948   cost = seq_cost (seq, speed);
2949   if (MEM_P (rslt))
2950     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2951                           TYPE_ADDR_SPACE (type), speed);
2952   else if (!REG_P (rslt))
2953     cost += set_src_cost (rslt, speed);
2954
2955   return cost;
2956 }
2957
2958 /* Returns variable containing the value of candidate CAND at statement AT.  */
2959
2960 static tree
2961 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2962 {
2963   if (stmt_after_increment (loop, cand, stmt))
2964     return cand->var_after;
2965   else
2966     return cand->var_before;
2967 }
2968
2969 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2970    same precision that is at least as wide as the precision of TYPE, stores
2971    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2972    type of A and B.  */
2973
2974 static tree
2975 determine_common_wider_type (tree *a, tree *b)
2976 {
2977   tree wider_type = NULL;
2978   tree suba, subb;
2979   tree atype = TREE_TYPE (*a);
2980
2981   if (CONVERT_EXPR_P (*a))
2982     {
2983       suba = TREE_OPERAND (*a, 0);
2984       wider_type = TREE_TYPE (suba);
2985       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2986         return atype;
2987     }
2988   else
2989     return atype;
2990
2991   if (CONVERT_EXPR_P (*b))
2992     {
2993       subb = TREE_OPERAND (*b, 0);
2994       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2995         return atype;
2996     }
2997   else
2998     return atype;
2999
3000   *a = suba;
3001   *b = subb;
3002   return wider_type;
3003 }
3004
3005 /* Determines the expression by that USE is expressed from induction variable
3006    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3007    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3008
3009 static bool
3010 get_computation_aff (struct loop *loop,
3011                      struct iv_use *use, struct iv_cand *cand, gimple at,
3012                      struct affine_tree_combination *aff)
3013 {
3014   tree ubase = use->iv->base;
3015   tree ustep = use->iv->step;
3016   tree cbase = cand->iv->base;
3017   tree cstep = cand->iv->step, cstep_common;
3018   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3019   tree common_type, var;
3020   tree uutype;
3021   aff_tree cbase_aff, var_aff;
3022   double_int rat;
3023
3024   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3025     {
3026       /* We do not have a precision to express the values of use.  */
3027       return false;
3028     }
3029
3030   var = var_at_stmt (loop, cand, at);
3031   uutype = unsigned_type_for (utype);
3032
3033   /* If the conversion is not noop, perform it.  */
3034   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3035     {
3036       cstep = fold_convert (uutype, cstep);
3037       cbase = fold_convert (uutype, cbase);
3038       var = fold_convert (uutype, var);
3039     }
3040
3041   if (!constant_multiple_of (ustep, cstep, &rat))
3042     return false;
3043
3044   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3045      type, we achieve better folding by computing their difference in this
3046      wider type, and cast the result to UUTYPE.  We do not need to worry about
3047      overflows, as all the arithmetics will in the end be performed in UUTYPE
3048      anyway.  */
3049   common_type = determine_common_wider_type (&ubase, &cbase);
3050
3051   /* use = ubase - ratio * cbase + ratio * var.  */
3052   tree_to_aff_combination (ubase, common_type, aff);
3053   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3054   tree_to_aff_combination (var, uutype, &var_aff);
3055
3056   /* We need to shift the value if we are after the increment.  */
3057   if (stmt_after_increment (loop, cand, at))
3058     {
3059       aff_tree cstep_aff;
3060
3061       if (common_type != uutype)
3062         cstep_common = fold_convert (common_type, cstep);
3063       else
3064         cstep_common = cstep;
3065
3066       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3067       aff_combination_add (&cbase_aff, &cstep_aff);
3068     }
3069
3070   aff_combination_scale (&cbase_aff, -rat);
3071   aff_combination_add (aff, &cbase_aff);
3072   if (common_type != uutype)
3073     aff_combination_convert (aff, uutype);
3074
3075   aff_combination_scale (&var_aff, rat);
3076   aff_combination_add (aff, &var_aff);
3077
3078   return true;
3079 }
3080
3081 /* Return the type of USE.  */
3082
3083 static tree
3084 get_use_type (struct iv_use *use)
3085 {
3086   tree base_type = TREE_TYPE (use->iv->base);
3087   tree type;
3088
3089   if (use->type == USE_ADDRESS)
3090     {
3091       /* The base_type may be a void pointer.  Create a pointer type based on
3092          the mem_ref instead.  */
3093       type = build_pointer_type (TREE_TYPE (*use->op_p));
3094       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3095                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3096     }
3097   else
3098     type = base_type;
3099
3100   return type;
3101 }
3102
3103 /* Determines the expression by that USE is expressed from induction variable
3104    CAND at statement AT in LOOP.  The computation is unshared.  */
3105
3106 static tree
3107 get_computation_at (struct loop *loop,
3108                     struct iv_use *use, struct iv_cand *cand, gimple at)
3109 {
3110   aff_tree aff;
3111   tree type = get_use_type (use);
3112
3113   if (!get_computation_aff (loop, use, cand, at, &aff))
3114     return NULL_TREE;
3115   unshare_aff_combination (&aff);
3116   return fold_convert (type, aff_combination_to_tree (&aff));
3117 }
3118
3119 /* Determines the expression by that USE is expressed from induction variable
3120    CAND in LOOP.  The computation is unshared.  */
3121
3122 static tree
3123 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3124 {
3125   return get_computation_at (loop, use, cand, use->stmt);
3126 }
3127
3128 /* Adjust the cost COST for being in loop setup rather than loop body.
3129    If we're optimizing for space, the loop setup overhead is constant;
3130    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3131 static unsigned
3132 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3133 {
3134   if (cost == INFTY)
3135     return cost;
3136   else if (optimize_loop_for_speed_p (data->current_loop))
3137     return cost / avg_loop_niter (data->current_loop);
3138   else
3139     return cost;
3140 }
3141
3142 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3143    validity for a memory reference accessing memory of mode MODE in
3144    address space AS.  */
3145
3146
3147 bool
3148 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3149                                  addr_space_t as)
3150 {
3151 #define MAX_RATIO 128
3152   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3153   static vec<sbitmap> valid_mult_list;
3154   sbitmap valid_mult;
3155
3156   if (data_index >= valid_mult_list.length ())
3157     valid_mult_list.safe_grow_cleared (data_index + 1);
3158
3159   valid_mult = valid_mult_list[data_index];
3160   if (!valid_mult)
3161     {
3162       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3163       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3164       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3165       rtx addr, scaled;
3166       HOST_WIDE_INT i;
3167
3168       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3169       bitmap_clear (valid_mult);
3170       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3171       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3172       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3173         {
3174           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3175           if (memory_address_addr_space_p (mode, addr, as)
3176               || memory_address_addr_space_p (mode, scaled, as))
3177             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3178         }
3179
3180       if (dump_file && (dump_flags & TDF_DETAILS))
3181         {
3182           fprintf (dump_file, "  allowed multipliers:");
3183           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3184             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3185               fprintf (dump_file, " %d", (int) i);
3186           fprintf (dump_file, "\n");
3187           fprintf (dump_file, "\n");
3188         }
3189
3190       valid_mult_list[data_index] = valid_mult;
3191     }
3192
3193   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3194     return false;
3195
3196   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3197 }
3198
3199 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3200    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3201    variable is omitted.  Compute the cost for a memory reference that accesses
3202    a memory location of mode MEM_MODE in address space AS.
3203
3204    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3205    size of MEM_MODE / RATIO) is available.  To make this determination, we
3206    look at the size of the increment to be made, which is given in CSTEP.
3207    CSTEP may be zero if the step is unknown.
3208    STMT_AFTER_INC is true iff the statement we're looking at is after the
3209    increment of the original biv.
3210
3211    TODO -- there must be some better way.  This all is quite crude.  */
3212
3213 enum ainc_type
3214 {
3215   AINC_PRE_INC,         /* Pre increment.  */
3216   AINC_PRE_DEC,         /* Pre decrement.  */
3217   AINC_POST_INC,        /* Post increment.  */
3218   AINC_POST_DEC,        /* Post decrement.  */
3219   AINC_NONE             /* Also the number of auto increment types.  */
3220 };
3221
3222 typedef struct address_cost_data_s
3223 {
3224   HOST_WIDE_INT min_offset, max_offset;
3225   unsigned costs[2][2][2][2];
3226   unsigned ainc_costs[AINC_NONE];
3227 } *address_cost_data;
3228
3229
3230 static comp_cost
3231 get_address_cost (bool symbol_present, bool var_present,
3232                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3233                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3234                   addr_space_t as, bool speed,
3235                   bool stmt_after_inc, bool *may_autoinc)
3236 {
3237   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3238   static vec<address_cost_data> address_cost_data_list;
3239   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3240   address_cost_data data;
3241   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3242   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3243   unsigned cost, acost, complexity;
3244   enum ainc_type autoinc_type;
3245   bool offset_p, ratio_p, autoinc;
3246   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3247   unsigned HOST_WIDE_INT mask;
3248   unsigned bits;
3249
3250   if (data_index >= address_cost_data_list.length ())
3251     address_cost_data_list.safe_grow_cleared (data_index + 1);
3252
3253   data = address_cost_data_list[data_index];
3254   if (!data)
3255     {
3256       HOST_WIDE_INT i;
3257       HOST_WIDE_INT rat, off = 0;
3258       int old_cse_not_expected, width;
3259       unsigned sym_p, var_p, off_p, rat_p, add_c;
3260       rtx seq, addr, base;
3261       rtx reg0, reg1;
3262
3263       data = (address_cost_data) xcalloc (1, sizeof (*data));
3264
3265       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3266
3267       width = GET_MODE_BITSIZE (address_mode) - 1;
3268       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3269         width = HOST_BITS_PER_WIDE_INT - 1;
3270       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3271
3272       for (i = width; i >= 0; i--)
3273         {
3274           off = -((unsigned HOST_WIDE_INT) 1 << i);
3275           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3276           if (memory_address_addr_space_p (mem_mode, addr, as))
3277             break;
3278         }
3279       data->min_offset = (i == -1? 0 : off);
3280
3281       for (i = width; i >= 0; i--)
3282         {
3283           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3284           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3285           if (memory_address_addr_space_p (mem_mode, addr, as))
3286             break;
3287         }
3288       if (i == -1)
3289         off = 0;
3290       data->max_offset = off;
3291
3292       if (dump_file && (dump_flags & TDF_DETAILS))
3293         {
3294           fprintf (dump_file, "get_address_cost:\n");
3295           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3296                    GET_MODE_NAME (mem_mode),
3297                    data->min_offset);
3298           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3299                    GET_MODE_NAME (mem_mode),
3300                    data->max_offset);
3301         }
3302
3303       rat = 1;
3304       for (i = 2; i <= MAX_RATIO; i++)
3305         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3306           {
3307             rat = i;
3308             break;
3309           }
3310
3311       /* Compute the cost of various addressing modes.  */
3312       acost = 0;
3313       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3314       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3315
3316       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3317           || USE_STORE_PRE_DECREMENT (mem_mode))
3318         {
3319           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3320           has_predec[mem_mode]
3321             = memory_address_addr_space_p (mem_mode, addr, as);
3322
3323           if (has_predec[mem_mode])
3324             data->ainc_costs[AINC_PRE_DEC]
3325               = address_cost (addr, mem_mode, as, speed);
3326         }
3327       if (USE_LOAD_POST_DECREMENT (mem_mode)
3328           || USE_STORE_POST_DECREMENT (mem_mode))
3329         {
3330           addr = gen_rtx_POST_DEC (address_mode, reg0);
3331           has_postdec[mem_mode]
3332             = memory_address_addr_space_p (mem_mode, addr, as);
3333
3334           if (has_postdec[mem_mode])
3335             data->ainc_costs[AINC_POST_DEC]
3336               = address_cost (addr, mem_mode, as, speed);
3337         }
3338       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3339           || USE_STORE_PRE_DECREMENT (mem_mode))
3340         {
3341           addr = gen_rtx_PRE_INC (address_mode, reg0);
3342           has_preinc[mem_mode]
3343             = memory_address_addr_space_p (mem_mode, addr, as);
3344
3345           if (has_preinc[mem_mode])
3346             data->ainc_costs[AINC_PRE_INC]
3347               = address_cost (addr, mem_mode, as, speed);
3348         }
3349       if (USE_LOAD_POST_INCREMENT (mem_mode)
3350           || USE_STORE_POST_INCREMENT (mem_mode))
3351         {
3352           addr = gen_rtx_POST_INC (address_mode, reg0);
3353           has_postinc[mem_mode]
3354             = memory_address_addr_space_p (mem_mode, addr, as);
3355
3356           if (has_postinc[mem_mode])
3357             data->ainc_costs[AINC_POST_INC]
3358               = address_cost (addr, mem_mode, as, speed);
3359         }
3360       for (i = 0; i < 16; i++)
3361         {
3362           sym_p = i & 1;
3363           var_p = (i >> 1) & 1;
3364           off_p = (i >> 2) & 1;
3365           rat_p = (i >> 3) & 1;
3366
3367           addr = reg0;
3368           if (rat_p)
3369             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3370                                    gen_int_mode (rat, address_mode));
3371
3372           if (var_p)
3373             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3374
3375           if (sym_p)
3376             {
3377               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3378               /* ??? We can run into trouble with some backends by presenting
3379                  it with symbols which haven't been properly passed through
3380                  targetm.encode_section_info.  By setting the local bit, we
3381                  enhance the probability of things working.  */
3382               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3383
3384               if (off_p)
3385                 base = gen_rtx_fmt_e (CONST, address_mode,
3386                                       gen_rtx_fmt_ee
3387                                         (PLUS, address_mode, base,
3388                                          gen_int_mode (off, address_mode)));
3389             }
3390           else if (off_p)
3391             base = gen_int_mode (off, address_mode);
3392           else
3393             base = NULL_RTX;
3394
3395           if (base)
3396             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3397
3398           start_sequence ();
3399           /* To avoid splitting addressing modes, pretend that no cse will
3400              follow.  */
3401           old_cse_not_expected = cse_not_expected;
3402           cse_not_expected = true;
3403           addr = memory_address_addr_space (mem_mode, addr, as);
3404           cse_not_expected = old_cse_not_expected;
3405           seq = get_insns ();
3406           end_sequence ();
3407
3408           acost = seq_cost (seq, speed);
3409           acost += address_cost (addr, mem_mode, as, speed);
3410
3411           if (!acost)
3412             acost = 1;
3413           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3414         }
3415
3416       /* On some targets, it is quite expensive to load symbol to a register,
3417          which makes addresses that contain symbols look much more expensive.
3418          However, the symbol will have to be loaded in any case before the
3419          loop (and quite likely we have it in register already), so it does not
3420          make much sense to penalize them too heavily.  So make some final
3421          tweaks for the SYMBOL_PRESENT modes:
3422
3423          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3424          var is cheaper, use this mode with small penalty.
3425          If VAR_PRESENT is true, try whether the mode with
3426          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3427          if this is the case, use it.  */
3428       add_c = add_cost (speed, address_mode);
3429       for (i = 0; i < 8; i++)
3430         {
3431           var_p = i & 1;
3432           off_p = (i >> 1) & 1;
3433           rat_p = (i >> 2) & 1;
3434
3435           acost = data->costs[0][1][off_p][rat_p] + 1;
3436           if (var_p)
3437             acost += add_c;
3438
3439           if (acost < data->costs[1][var_p][off_p][rat_p])
3440             data->costs[1][var_p][off_p][rat_p] = acost;
3441         }
3442
3443       if (dump_file && (dump_flags & TDF_DETAILS))
3444         {
3445           fprintf (dump_file, "Address costs:\n");
3446
3447           for (i = 0; i < 16; i++)
3448             {
3449               sym_p = i & 1;
3450               var_p = (i >> 1) & 1;
3451               off_p = (i >> 2) & 1;
3452               rat_p = (i >> 3) & 1;
3453
3454               fprintf (dump_file, "  ");
3455               if (sym_p)
3456                 fprintf (dump_file, "sym + ");
3457               if (var_p)
3458                 fprintf (dump_file, "var + ");
3459               if (off_p)
3460                 fprintf (dump_file, "cst + ");
3461               if (rat_p)
3462                 fprintf (dump_file, "rat * ");
3463
3464               acost = data->costs[sym_p][var_p][off_p][rat_p];
3465               fprintf (dump_file, "index costs %d\n", acost);
3466             }
3467           if (has_predec[mem_mode] || has_postdec[mem_mode]
3468               || has_preinc[mem_mode] || has_postinc[mem_mode])
3469             fprintf (dump_file, "  May include autoinc/dec\n");
3470           fprintf (dump_file, "\n");
3471         }
3472
3473       address_cost_data_list[data_index] = data;
3474     }
3475
3476   bits = GET_MODE_BITSIZE (address_mode);
3477   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3478   offset &= mask;
3479   if ((offset >> (bits - 1) & 1))
3480     offset |= ~mask;
3481   s_offset = offset;
3482
3483   autoinc = false;
3484   autoinc_type = AINC_NONE;
3485   msize = GET_MODE_SIZE (mem_mode);
3486   autoinc_offset = offset;
3487   if (stmt_after_inc)
3488     autoinc_offset += ratio * cstep;
3489   if (symbol_present || var_present || ratio != 1)
3490     autoinc = false;
3491   else
3492     {
3493       if (has_postinc[mem_mode] && autoinc_offset == 0
3494           && msize == cstep)
3495         autoinc_type = AINC_POST_INC;
3496       else if (has_postdec[mem_mode] && autoinc_offset == 0
3497                && msize == -cstep)
3498         autoinc_type = AINC_POST_DEC;
3499       else if (has_preinc[mem_mode] && autoinc_offset == msize
3500                && msize == cstep)
3501         autoinc_type = AINC_PRE_INC;
3502       else if (has_predec[mem_mode] && autoinc_offset == -msize
3503                && msize == -cstep)
3504         autoinc_type = AINC_PRE_DEC;
3505
3506       if (autoinc_type != AINC_NONE)
3507         autoinc = true;
3508     }
3509
3510   cost = 0;
3511   offset_p = (s_offset != 0
3512               && data->min_offset <= s_offset
3513               && s_offset <= data->max_offset);
3514   ratio_p = (ratio != 1
3515              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3516
3517   if (ratio != 1 && !ratio_p)
3518     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3519
3520   if (s_offset && !offset_p && !symbol_present)
3521     cost += add_cost (speed, address_mode);
3522
3523   if (may_autoinc)
3524     *may_autoinc = autoinc;
3525   if (autoinc)
3526     acost = data->ainc_costs[autoinc_type];
3527   else
3528     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3529   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3530   return new_cost (cost + acost, complexity);
3531 }
3532
3533  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3534     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3535     calculating the operands of EXPR.  Returns true if successful, and returns
3536     the cost in COST.  */
3537
3538 static bool
3539 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3540                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3541 {
3542   comp_cost res;
3543   tree op1 = TREE_OPERAND (expr, 1);
3544   tree cst = TREE_OPERAND (mult, 1);
3545   tree multop = TREE_OPERAND (mult, 0);
3546   int m = exact_log2 (int_cst_value (cst));
3547   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3548   int sa_cost;
3549   bool equal_p = false;
3550
3551   if (!(m >= 0 && m < maxm))
3552     return false;
3553
3554   if (operand_equal_p (op1, mult, 0))
3555     equal_p = true;
3556
3557   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3558              ? shiftadd_cost (speed, mode, m)
3559              : (equal_p
3560                 ? shiftsub1_cost (speed, mode, m)
3561                 : shiftsub0_cost (speed, mode, m)));
3562   res = new_cost (sa_cost, 0);
3563   res = add_costs (res, equal_p ? cost0 : cost1);
3564
3565   STRIP_NOPS (multop);
3566   if (!is_gimple_val (multop))
3567     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3568
3569   *cost = res;
3570   return true;
3571 }
3572
3573 /* Estimates cost of forcing expression EXPR into a variable.  */
3574
3575 static comp_cost
3576 force_expr_to_var_cost (tree expr, bool speed)
3577 {
3578   static bool costs_initialized = false;
3579   static unsigned integer_cost [2];
3580   static unsigned symbol_cost [2];
3581   static unsigned address_cost [2];
3582   tree op0, op1;
3583   comp_cost cost0, cost1, cost;
3584   enum machine_mode mode;
3585
3586   if (!costs_initialized)
3587     {
3588       tree type = build_pointer_type (integer_type_node);
3589       tree var, addr;
3590       rtx x;
3591       int i;
3592
3593       var = create_tmp_var_raw (integer_type_node, "test_var");
3594       TREE_STATIC (var) = 1;
3595       x = produce_memory_decl_rtl (var, NULL);
3596       SET_DECL_RTL (var, x);
3597
3598       addr = build1 (ADDR_EXPR, type, var);
3599
3600
3601       for (i = 0; i < 2; i++)
3602         {
3603           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3604                                                              2000), i);
3605
3606           symbol_cost[i] = computation_cost (addr, i) + 1;
3607
3608           address_cost[i]
3609             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3610           if (dump_file && (dump_flags & TDF_DETAILS))
3611             {
3612               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3613               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3614               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3615               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3616               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3617               fprintf (dump_file, "\n");
3618             }
3619         }
3620
3621       costs_initialized = true;
3622     }
3623
3624   STRIP_NOPS (expr);
3625
3626   if (SSA_VAR_P (expr))
3627     return no_cost;
3628
3629   if (is_gimple_min_invariant (expr))
3630     {
3631       if (TREE_CODE (expr) == INTEGER_CST)
3632         return new_cost (integer_cost [speed], 0);
3633
3634       if (TREE_CODE (expr) == ADDR_EXPR)
3635         {
3636           tree obj = TREE_OPERAND (expr, 0);
3637
3638           if (TREE_CODE (obj) == VAR_DECL
3639               || TREE_CODE (obj) == PARM_DECL
3640               || TREE_CODE (obj) == RESULT_DECL)
3641             return new_cost (symbol_cost [speed], 0);
3642         }
3643
3644       return new_cost (address_cost [speed], 0);
3645     }
3646
3647   switch (TREE_CODE (expr))
3648     {
3649     case POINTER_PLUS_EXPR:
3650     case PLUS_EXPR:
3651     case MINUS_EXPR:
3652     case MULT_EXPR:
3653       op0 = TREE_OPERAND (expr, 0);
3654       op1 = TREE_OPERAND (expr, 1);
3655       STRIP_NOPS (op0);
3656       STRIP_NOPS (op1);
3657       break;
3658
3659     CASE_CONVERT:
3660     case NEGATE_EXPR:
3661       op0 = TREE_OPERAND (expr, 0);
3662       STRIP_NOPS (op0);
3663       op1 = NULL_TREE;
3664       break;
3665
3666     default:
3667       /* Just an arbitrary value, FIXME.  */
3668       return new_cost (target_spill_cost[speed], 0);
3669     }
3670
3671   if (op0 == NULL_TREE
3672       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3673     cost0 = no_cost;
3674   else
3675     cost0 = force_expr_to_var_cost (op0, speed);
3676
3677   if (op1 == NULL_TREE
3678       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3679     cost1 = no_cost;
3680   else
3681     cost1 = force_expr_to_var_cost (op1, speed);
3682
3683   mode = TYPE_MODE (TREE_TYPE (expr));
3684   switch (TREE_CODE (expr))
3685     {
3686     case POINTER_PLUS_EXPR:
3687     case PLUS_EXPR:
3688     case MINUS_EXPR:
3689     case NEGATE_EXPR:
3690       cost = new_cost (add_cost (speed, mode), 0);
3691       if (TREE_CODE (expr) != NEGATE_EXPR)
3692         {
3693           tree mult = NULL_TREE;
3694           comp_cost sa_cost;
3695           if (TREE_CODE (op1) == MULT_EXPR)
3696             mult = op1;
3697           else if (TREE_CODE (op0) == MULT_EXPR)
3698             mult = op0;
3699
3700           if (mult != NULL_TREE
3701               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3702               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3703                                     speed, &sa_cost))
3704             return sa_cost;
3705         }
3706       break;
3707
3708     CASE_CONVERT:
3709       {
3710         tree inner_mode, outer_mode;
3711         outer_mode = TREE_TYPE (expr);
3712         inner_mode = TREE_TYPE (op0);
3713         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3714                                        TYPE_MODE (inner_mode), speed), 0);
3715       }
3716       break;
3717
3718     case MULT_EXPR:
3719       if (cst_and_fits_in_hwi (op0))
3720         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3721                                              mode, speed), 0);
3722       else if (cst_and_fits_in_hwi (op1))
3723         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3724                                              mode, speed), 0);
3725       else
3726         return new_cost (target_spill_cost [speed], 0);
3727       break;
3728
3729     default:
3730       gcc_unreachable ();
3731     }
3732
3733   cost = add_costs (cost, cost0);
3734   cost = add_costs (cost, cost1);
3735
3736   /* Bound the cost by target_spill_cost.  The parts of complicated
3737      computations often are either loop invariant or at least can
3738      be shared between several iv uses, so letting this grow without
3739      limits would not give reasonable results.  */
3740   if (cost.cost > (int) target_spill_cost [speed])
3741     cost.cost = target_spill_cost [speed];
3742
3743   return cost;
3744 }
3745
3746 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3747    invariants the computation depends on.  */
3748
3749 static comp_cost
3750 force_var_cost (struct ivopts_data *data,
3751                 tree expr, bitmap *depends_on)
3752 {
3753   if (depends_on)
3754     {
3755       fd_ivopts_data = data;
3756       walk_tree (&expr, find_depends, depends_on, NULL);
3757     }
3758
3759   return force_expr_to_var_cost (expr, data->speed);
3760 }
3761
3762 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3763    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3764    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3765    invariants the computation depends on.  */
3766
3767 static comp_cost
3768 split_address_cost (struct ivopts_data *data,
3769                     tree addr, bool *symbol_present, bool *var_present,
3770                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3771 {
3772   tree core;
3773   HOST_WIDE_INT bitsize;
3774   HOST_WIDE_INT bitpos;
3775   tree toffset;
3776   enum machine_mode mode;
3777   int unsignedp, volatilep;
3778
3779   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3780                               &unsignedp, &volatilep, false);
3781
3782   if (toffset != 0
3783       || bitpos % BITS_PER_UNIT != 0
3784       || TREE_CODE (core) != VAR_DECL)
3785     {
3786       *symbol_present = false;
3787       *var_present = true;
3788       fd_ivopts_data = data;
3789       walk_tree (&addr, find_depends, depends_on, NULL);
3790       return new_cost (target_spill_cost[data->speed], 0);
3791     }
3792
3793   *offset += bitpos / BITS_PER_UNIT;
3794   if (TREE_STATIC (core)
3795       || DECL_EXTERNAL (core))
3796     {
3797       *symbol_present = true;
3798       *var_present = false;
3799       return no_cost;
3800     }
3801
3802   *symbol_present = false;
3803   *var_present = true;
3804   return no_cost;
3805 }
3806
3807 /* Estimates cost of expressing difference of addresses E1 - E2 as
3808    var + symbol + offset.  The value of offset is added to OFFSET,
3809    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3810    part is missing.  DEPENDS_ON is a set of the invariants the computation
3811    depends on.  */
3812
3813 static comp_cost
3814 ptr_difference_cost (struct ivopts_data *data,
3815                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3816                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3817 {
3818   HOST_WIDE_INT diff = 0;
3819   aff_tree aff_e1, aff_e2;
3820   tree type;
3821
3822   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3823
3824   if (ptr_difference_const (e1, e2, &diff))
3825     {
3826       *offset += diff;
3827       *symbol_present = false;
3828       *var_present = false;
3829       return no_cost;
3830     }
3831
3832   if (integer_zerop (e2))
3833     return split_address_cost (data, TREE_OPERAND (e1, 0),
3834                                symbol_present, var_present, offset, depends_on);
3835
3836   *symbol_present = false;
3837   *var_present = true;
3838
3839   type = signed_type_for (TREE_TYPE (e1));
3840   tree_to_aff_combination (e1, type, &aff_e1);
3841   tree_to_aff_combination (e2, type, &aff_e2);
3842   aff_combination_scale (&aff_e2, double_int_minus_one);
3843   aff_combination_add (&aff_e1, &aff_e2);
3844
3845   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3846 }
3847
3848 /* Estimates cost of expressing difference E1 - E2 as
3849    var + symbol + offset.  The value of offset is added to OFFSET,
3850    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3851    part is missing.  DEPENDS_ON is a set of the invariants the computation
3852    depends on.  */
3853
3854 static comp_cost
3855 difference_cost (struct ivopts_data *data,
3856                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3857                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3858 {
3859   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3860   unsigned HOST_WIDE_INT off1, off2;
3861   aff_tree aff_e1, aff_e2;
3862   tree type;
3863
3864   e1 = strip_offset (e1, &off1);
3865   e2 = strip_offset (e2, &off2);
3866   *offset += off1 - off2;
3867
3868   STRIP_NOPS (e1);
3869   STRIP_NOPS (e2);
3870
3871   if (TREE_CODE (e1) == ADDR_EXPR)
3872     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3873                                 offset, depends_on);
3874   *symbol_present = false;
3875
3876   if (operand_equal_p (e1, e2, 0))
3877     {
3878       *var_present = false;
3879       return no_cost;
3880     }
3881
3882   *var_present = true;
3883
3884   if (integer_zerop (e2))
3885     return force_var_cost (data, e1, depends_on);
3886
3887   if (integer_zerop (e1))
3888     {
3889       comp_cost cost = force_var_cost (data, e2, depends_on);
3890       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3891       return cost;
3892     }
3893
3894   type = signed_type_for (TREE_TYPE (e1));
3895   tree_to_aff_combination (e1, type, &aff_e1);
3896   tree_to_aff_combination (e2, type, &aff_e2);
3897   aff_combination_scale (&aff_e2, double_int_minus_one);
3898   aff_combination_add (&aff_e1, &aff_e2);
3899
3900   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3901 }
3902
3903 /* Returns true if AFF1 and AFF2 are identical.  */
3904
3905 static bool
3906 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3907 {
3908   unsigned i;
3909
3910   if (aff1->n != aff2->n)
3911     return false;
3912
3913   for (i = 0; i < aff1->n; i++)
3914     {
3915       if (aff1->elts[i].coef != aff2->elts[i].coef)
3916         return false;
3917
3918       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3919         return false;
3920     }
3921   return true;
3922 }
3923
3924 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3925
3926 static int
3927 get_expr_id (struct ivopts_data *data, tree expr)
3928 {
3929   struct iv_inv_expr_ent ent;
3930   struct iv_inv_expr_ent **slot;
3931
3932   ent.expr = expr;
3933   ent.hash = iterative_hash_expr (expr, 0);
3934   slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3935   if (*slot)
3936     return (*slot)->id;
3937
3938   *slot = XNEW (struct iv_inv_expr_ent);
3939   (*slot)->expr = expr;
3940   (*slot)->hash = ent.hash;
3941   (*slot)->id = data->inv_expr_id++;
3942   return (*slot)->id;
3943 }
3944
3945 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3946    requires a new compiler generated temporary.  Returns -1 otherwise.
3947    ADDRESS_P is a flag indicating if the expression is for address
3948    computation.  */
3949
3950 static int
3951 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3952                             tree cbase, HOST_WIDE_INT ratio,
3953                             bool address_p)
3954 {
3955   aff_tree ubase_aff, cbase_aff;
3956   tree expr, ub, cb;
3957
3958   STRIP_NOPS (ubase);
3959   STRIP_NOPS (cbase);
3960   ub = ubase;
3961   cb = cbase;
3962
3963   if ((TREE_CODE (ubase) == INTEGER_CST)
3964       && (TREE_CODE (cbase) == INTEGER_CST))
3965     return -1;
3966
3967   /* Strips the constant part. */
3968   if (TREE_CODE (ubase) == PLUS_EXPR
3969       || TREE_CODE (ubase) == MINUS_EXPR
3970       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3971     {
3972       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3973         ubase = TREE_OPERAND (ubase, 0);
3974     }
3975
3976   /* Strips the constant part. */
3977   if (TREE_CODE (cbase) == PLUS_EXPR
3978       || TREE_CODE (cbase) == MINUS_EXPR
3979       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3980     {
3981       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3982         cbase = TREE_OPERAND (cbase, 0);
3983     }
3984
3985   if (address_p)
3986     {
3987       if (((TREE_CODE (ubase) == SSA_NAME)
3988            || (TREE_CODE (ubase) == ADDR_EXPR
3989                && is_gimple_min_invariant (ubase)))
3990           && (TREE_CODE (cbase) == INTEGER_CST))
3991         return -1;
3992
3993       if (((TREE_CODE (cbase) == SSA_NAME)
3994            || (TREE_CODE (cbase) == ADDR_EXPR
3995                && is_gimple_min_invariant (cbase)))
3996           && (TREE_CODE (ubase) == INTEGER_CST))
3997         return -1;
3998     }
3999
4000   if (ratio == 1)
4001     {
4002       if (operand_equal_p (ubase, cbase, 0))
4003         return -1;
4004
4005       if (TREE_CODE (ubase) == ADDR_EXPR
4006           && TREE_CODE (cbase) == ADDR_EXPR)
4007         {
4008           tree usym, csym;
4009
4010           usym = TREE_OPERAND (ubase, 0);
4011           csym = TREE_OPERAND (cbase, 0);
4012           if (TREE_CODE (usym) == ARRAY_REF)
4013             {
4014               tree ind = TREE_OPERAND (usym, 1);
4015               if (TREE_CODE (ind) == INTEGER_CST
4016                   && tree_fits_shwi_p (ind)
4017                   && TREE_INT_CST_LOW (ind) == 0)
4018                 usym = TREE_OPERAND (usym, 0);
4019             }
4020           if (TREE_CODE (csym) == ARRAY_REF)
4021             {
4022               tree ind = TREE_OPERAND (csym, 1);
4023               if (TREE_CODE (ind) == INTEGER_CST
4024                   && tree_fits_shwi_p (ind)
4025                   && TREE_INT_CST_LOW (ind) == 0)
4026                 csym = TREE_OPERAND (csym, 0);
4027             }
4028           if (operand_equal_p (usym, csym, 0))
4029             return -1;
4030         }
4031       /* Now do more complex comparison  */
4032       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4033       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4034       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4035         return -1;
4036     }
4037
4038   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4039   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4040
4041   aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
4042   aff_combination_add (&ubase_aff, &cbase_aff);
4043   expr = aff_combination_to_tree (&ubase_aff);
4044   return get_expr_id (data, expr);
4045 }
4046
4047
4048
4049 /* Determines the cost of the computation by that USE is expressed
4050    from induction variable CAND.  If ADDRESS_P is true, we just need
4051    to create an address from it, otherwise we want to get it into
4052    register.  A set of invariants we depend on is stored in
4053    DEPENDS_ON.  AT is the statement at that the value is computed.
4054    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4055    addressing is likely.  */
4056
4057 static comp_cost
4058 get_computation_cost_at (struct ivopts_data *data,
4059                          struct iv_use *use, struct iv_cand *cand,
4060                          bool address_p, bitmap *depends_on, gimple at,
4061                          bool *can_autoinc,
4062                          int *inv_expr_id)
4063 {
4064   tree ubase = use->iv->base, ustep = use->iv->step;
4065   tree cbase, cstep;
4066   tree utype = TREE_TYPE (ubase), ctype;
4067   unsigned HOST_WIDE_INT cstepi, offset = 0;
4068   HOST_WIDE_INT ratio, aratio;
4069   bool var_present, symbol_present, stmt_is_after_inc;
4070   comp_cost cost;
4071   double_int rat;
4072   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4073   enum machine_mode mem_mode = (address_p
4074                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4075                                 : VOIDmode);
4076
4077   *depends_on = NULL;
4078
4079   /* Only consider real candidates.  */
4080   if (!cand->iv)
4081     return infinite_cost;
4082
4083   cbase = cand->iv->base;
4084   cstep = cand->iv->step;
4085   ctype = TREE_TYPE (cbase);
4086
4087   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4088     {
4089       /* We do not have a precision to express the values of use.  */
4090       return infinite_cost;
4091     }
4092
4093   if (address_p
4094       || (use->iv->base_object
4095           && cand->iv->base_object
4096           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4097           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4098     {
4099       /* Do not try to express address of an object with computation based
4100          on address of a different object.  This may cause problems in rtl
4101          level alias analysis (that does not expect this to be happening,
4102          as this is illegal in C), and would be unlikely to be useful
4103          anyway.  */
4104       if (use->iv->base_object
4105           && cand->iv->base_object
4106           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4107         return infinite_cost;
4108     }
4109
4110   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4111     {
4112       /* TODO -- add direct handling of this case.  */
4113       goto fallback;
4114     }
4115
4116   /* CSTEPI is removed from the offset in case statement is after the
4117      increment.  If the step is not constant, we use zero instead.
4118      This is a bit imprecise (there is the extra addition), but
4119      redundancy elimination is likely to transform the code so that
4120      it uses value of the variable before increment anyway,
4121      so it is not that much unrealistic.  */
4122   if (cst_and_fits_in_hwi (cstep))
4123     cstepi = int_cst_value (cstep);
4124   else
4125     cstepi = 0;
4126
4127   if (!constant_multiple_of (ustep, cstep, &rat))
4128     return infinite_cost;
4129
4130   if (rat.fits_shwi ())
4131     ratio = rat.to_shwi ();
4132   else
4133     return infinite_cost;
4134
4135   STRIP_NOPS (cbase);
4136   ctype = TREE_TYPE (cbase);
4137
4138   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4139
4140   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4141      or ratio == 1, it is better to handle this like
4142
4143      ubase - ratio * cbase + ratio * var
4144
4145      (also holds in the case ratio == -1, TODO.  */
4146
4147   if (cst_and_fits_in_hwi (cbase))
4148     {
4149       offset = - ratio * int_cst_value (cbase);
4150       cost = difference_cost (data,
4151                               ubase, build_int_cst (utype, 0),
4152                               &symbol_present, &var_present, &offset,
4153                               depends_on);
4154       cost.cost /= avg_loop_niter (data->current_loop);
4155     }
4156   else if (ratio == 1)
4157     {
4158       tree real_cbase = cbase;
4159
4160       /* Check to see if any adjustment is needed.  */
4161       if (cstepi == 0 && stmt_is_after_inc)
4162         {
4163           aff_tree real_cbase_aff;
4164           aff_tree cstep_aff;
4165
4166           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4167                                    &real_cbase_aff);
4168           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4169
4170           aff_combination_add (&real_cbase_aff, &cstep_aff);
4171           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4172         }
4173
4174       cost = difference_cost (data,
4175                               ubase, real_cbase,
4176                               &symbol_present, &var_present, &offset,
4177                               depends_on);
4178       cost.cost /= avg_loop_niter (data->current_loop);
4179     }
4180   else if (address_p
4181            && !POINTER_TYPE_P (ctype)
4182            && multiplier_allowed_in_address_p
4183                 (ratio, mem_mode,
4184                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4185     {
4186       cbase
4187         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4188       cost = difference_cost (data,
4189                               ubase, cbase,
4190                               &symbol_present, &var_present, &offset,
4191                               depends_on);
4192       cost.cost /= avg_loop_niter (data->current_loop);
4193     }
4194   else
4195     {
4196       cost = force_var_cost (data, cbase, depends_on);
4197       cost = add_costs (cost,
4198                         difference_cost (data,
4199                                          ubase, build_int_cst (utype, 0),
4200                                          &symbol_present, &var_present,
4201                                          &offset, depends_on));
4202       cost.cost /= avg_loop_niter (data->current_loop);
4203       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4204     }
4205
4206   if (inv_expr_id)
4207     {
4208       *inv_expr_id =
4209           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4210       /* Clear depends on.  */
4211       if (*inv_expr_id != -1 && depends_on && *depends_on)
4212         bitmap_clear (*depends_on);
4213     }
4214
4215   /* If we are after the increment, the value of the candidate is higher by
4216      one iteration.  */
4217   if (stmt_is_after_inc)
4218     offset -= ratio * cstepi;
4219
4220   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4221      (symbol/var1/const parts may be omitted).  If we are looking for an
4222      address, find the cost of addressing this.  */
4223   if (address_p)
4224     return add_costs (cost,
4225                       get_address_cost (symbol_present, var_present,
4226                                         offset, ratio, cstepi,
4227                                         mem_mode,
4228                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4229                                         speed, stmt_is_after_inc,
4230                                         can_autoinc));
4231
4232   /* Otherwise estimate the costs for computing the expression.  */
4233   if (!symbol_present && !var_present && !offset)
4234     {
4235       if (ratio != 1)
4236         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4237       return cost;
4238     }
4239
4240   /* Symbol + offset should be compile-time computable so consider that they
4241       are added once to the variable, if present.  */
4242   if (var_present && (symbol_present || offset))
4243     cost.cost += adjust_setup_cost (data,
4244                                     add_cost (speed, TYPE_MODE (ctype)));
4245
4246   /* Having offset does not affect runtime cost in case it is added to
4247      symbol, but it increases complexity.  */
4248   if (offset)
4249     cost.complexity++;
4250
4251   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4252
4253   aratio = ratio > 0 ? ratio : -ratio;
4254   if (aratio != 1)
4255     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4256   return cost;
4257
4258 fallback:
4259   if (can_autoinc)
4260     *can_autoinc = false;
4261
4262   {
4263     /* Just get the expression, expand it and measure the cost.  */
4264     tree comp = get_computation_at (data->current_loop, use, cand, at);
4265
4266     if (!comp)
4267       return infinite_cost;
4268
4269     if (address_p)
4270       comp = build_simple_mem_ref (comp);
4271
4272     return new_cost (computation_cost (comp, speed), 0);
4273   }
4274 }
4275
4276 /* Determines the cost of the computation by that USE is expressed
4277    from induction variable CAND.  If ADDRESS_P is true, we just need
4278    to create an address from it, otherwise we want to get it into
4279    register.  A set of invariants we depend on is stored in
4280    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4281    autoinc addressing is likely.  */
4282
4283 static comp_cost
4284 get_computation_cost (struct ivopts_data *data,
4285                       struct iv_use *use, struct iv_cand *cand,
4286                       bool address_p, bitmap *depends_on,
4287                       bool *can_autoinc, int *inv_expr_id)
4288 {
4289   return get_computation_cost_at (data,
4290                                   use, cand, address_p, depends_on, use->stmt,
4291                                   can_autoinc, inv_expr_id);
4292 }
4293
4294 /* Determines cost of basing replacement of USE on CAND in a generic
4295    expression.  */
4296
4297 static bool
4298 determine_use_iv_cost_generic (struct ivopts_data *data,
4299                                struct iv_use *use, struct iv_cand *cand)
4300 {
4301   bitmap depends_on;
4302   comp_cost cost;
4303   int inv_expr_id = -1;
4304
4305   /* The simple case first -- if we need to express value of the preserved
4306      original biv, the cost is 0.  This also prevents us from counting the
4307      cost of increment twice -- once at this use and once in the cost of
4308      the candidate.  */
4309   if (cand->pos == IP_ORIGINAL
4310       && cand->incremented_at == use->stmt)
4311     {
4312       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4313                        ERROR_MARK, -1);
4314       return true;
4315     }
4316
4317   cost = get_computation_cost (data, use, cand, false, &depends_on,
4318                                NULL, &inv_expr_id);
4319
4320   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4321                    inv_expr_id);
4322
4323   return !infinite_cost_p (cost);
4324 }
4325
4326 /* Determines cost of basing replacement of USE on CAND in an address.  */
4327
4328 static bool
4329 determine_use_iv_cost_address (struct ivopts_data *data,
4330                                struct iv_use *use, struct iv_cand *cand)
4331 {
4332   bitmap depends_on;
4333   bool can_autoinc;
4334   int inv_expr_id = -1;
4335   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4336                                          &can_autoinc, &inv_expr_id);
4337
4338   if (cand->ainc_use == use)
4339     {
4340       if (can_autoinc)
4341         cost.cost -= cand->cost_step;
4342       /* If we generated the candidate solely for exploiting autoincrement
4343          opportunities, and it turns out it can't be used, set the cost to
4344          infinity to make sure we ignore it.  */
4345       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4346         cost = infinite_cost;
4347     }
4348   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4349                    inv_expr_id);
4350
4351   return !infinite_cost_p (cost);
4352 }
4353
4354 /* Computes value of candidate CAND at position AT in iteration NITER, and
4355    stores it to VAL.  */
4356
4357 static void
4358 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4359                aff_tree *val)
4360 {
4361   aff_tree step, delta, nit;
4362   struct iv *iv = cand->iv;
4363   tree type = TREE_TYPE (iv->base);
4364   tree steptype = type;
4365   if (POINTER_TYPE_P (type))
4366     steptype = sizetype;
4367
4368   tree_to_aff_combination (iv->step, steptype, &step);
4369   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4370   aff_combination_convert (&nit, steptype);
4371   aff_combination_mult (&nit, &step, &delta);
4372   if (stmt_after_increment (loop, cand, at))
4373     aff_combination_add (&delta, &step);
4374
4375   tree_to_aff_combination (iv->base, type, val);
4376   aff_combination_add (val, &delta);
4377 }
4378
4379 /* Returns period of induction variable iv.  */
4380
4381 static tree
4382 iv_period (struct iv *iv)
4383 {
4384   tree step = iv->step, period, type;
4385   tree pow2div;
4386
4387   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4388
4389   type = unsigned_type_for (TREE_TYPE (step));
4390   /* Period of the iv is lcm (step, type_range)/step -1,
4391      i.e., N*type_range/step - 1. Since type range is power
4392      of two, N == (step >> num_of_ending_zeros_binary (step),
4393      so the final result is
4394
4395        (type_range >> num_of_ending_zeros_binary (step)) - 1
4396
4397   */
4398   pow2div = num_ending_zeros (step);
4399
4400   period = build_low_bits_mask (type,
4401                                 (TYPE_PRECISION (type)
4402                                  - tree_to_uhwi (pow2div)));
4403
4404   return period;
4405 }
4406
4407 /* Returns the comparison operator used when eliminating the iv USE.  */
4408
4409 static enum tree_code
4410 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4411 {
4412   struct loop *loop = data->current_loop;
4413   basic_block ex_bb;
4414   edge exit;
4415
4416   ex_bb = gimple_bb (use->stmt);
4417   exit = EDGE_SUCC (ex_bb, 0);
4418   if (flow_bb_inside_loop_p (loop, exit->dest))
4419     exit = EDGE_SUCC (ex_bb, 1);
4420
4421   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4422 }
4423
4424 static tree
4425 strip_wrap_conserving_type_conversions (tree exp)
4426 {
4427   while (tree_ssa_useless_type_conversion (exp)
4428          && (nowrap_type_p (TREE_TYPE (exp))
4429              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4430     exp = TREE_OPERAND (exp, 0);
4431   return exp;
4432 }
4433
4434 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4435    check for an exact match.  */
4436
4437 static bool
4438 expr_equal_p (tree e, tree what)
4439 {
4440   gimple stmt;
4441   enum tree_code code;
4442
4443   e = strip_wrap_conserving_type_conversions (e);
4444   what = strip_wrap_conserving_type_conversions (what);
4445
4446   code = TREE_CODE (what);
4447   if (TREE_TYPE (e) != TREE_TYPE (what))
4448     return false;
4449
4450   if (operand_equal_p (e, what, 0))
4451     return true;
4452
4453   if (TREE_CODE (e) != SSA_NAME)
4454     return false;
4455
4456   stmt = SSA_NAME_DEF_STMT (e);
4457   if (gimple_code (stmt) != GIMPLE_ASSIGN
4458       || gimple_assign_rhs_code (stmt) != code)
4459     return false;
4460
4461   switch (get_gimple_rhs_class (code))
4462     {
4463     case GIMPLE_BINARY_RHS:
4464       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4465         return false;
4466       /* Fallthru.  */
4467
4468     case GIMPLE_UNARY_RHS:
4469     case GIMPLE_SINGLE_RHS:
4470       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4471     default:
4472       return false;
4473     }
4474 }
4475
4476 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4477    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4478    calculation is performed in non-wrapping type.
4479
4480    TODO: More generally, we could test for the situation that
4481          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4482          This would require knowing the sign of OFFSET.
4483
4484          Also, we only look for the first addition in the computation of BASE.
4485          More complex analysis would be better, but introducing it just for
4486          this optimization seems like an overkill.  */
4487
4488 static bool
4489 difference_cannot_overflow_p (tree base, tree offset)
4490 {
4491   enum tree_code code;
4492   tree e1, e2;
4493
4494   if (!nowrap_type_p (TREE_TYPE (base)))
4495     return false;
4496
4497   base = expand_simple_operations (base);
4498
4499   if (TREE_CODE (base) == SSA_NAME)
4500     {
4501       gimple stmt = SSA_NAME_DEF_STMT (base);
4502
4503       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4504         return false;
4505
4506       code = gimple_assign_rhs_code (stmt);
4507       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4508         return false;
4509
4510       e1 = gimple_assign_rhs1 (stmt);
4511       e2 = gimple_assign_rhs2 (stmt);
4512     }
4513   else
4514     {
4515       code = TREE_CODE (base);
4516       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4517         return false;
4518       e1 = TREE_OPERAND (base, 0);
4519       e2 = TREE_OPERAND (base, 1);
4520     }
4521
4522   /* TODO: deeper inspection may be necessary to prove the equality.  */
4523   switch (code)
4524     {
4525     case PLUS_EXPR:
4526       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4527     case POINTER_PLUS_EXPR:
4528       return expr_equal_p (e2, offset);
4529
4530     default:
4531       return false;
4532     }
4533 }
4534
4535 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4536    comparison with CAND.  NITER describes the number of iterations of
4537    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4538
4539    We aim to handle the following situation:
4540
4541    sometype *base, *p;
4542    int a, b, i;
4543
4544    i = a;
4545    p = p_0 = base + a;
4546
4547    do
4548      {
4549        bla (*p);
4550        p++;
4551        i++;
4552      }
4553    while (i < b);
4554
4555    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4556    We aim to optimize this to
4557
4558    p = p_0 = base + a;
4559    do
4560      {
4561        bla (*p);
4562        p++;
4563      }
4564    while (p < p_0 - a + b);
4565
4566    This preserves the correctness, since the pointer arithmetics does not
4567    overflow.  More precisely:
4568
4569    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4570       overflow in computing it or the values of p.
4571    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4572       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4573
4574 static bool
4575 iv_elimination_compare_lt (struct ivopts_data *data,
4576                            struct iv_cand *cand, enum tree_code *comp_p,
4577                            struct tree_niter_desc *niter)
4578 {
4579   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4580   struct affine_tree_combination nit, tmpa, tmpb;
4581   enum tree_code comp;
4582   HOST_WIDE_INT step;
4583
4584   /* We need to know that the candidate induction variable does not overflow.
4585      While more complex analysis may be used to prove this, for now just
4586      check that the variable appears in the original program and that it
4587      is computed in a type that guarantees no overflows.  */
4588   cand_type = TREE_TYPE (cand->iv->base);
4589   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4590     return false;
4591
4592   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4593      the calculation of the BOUND could overflow, making the comparison
4594      invalid.  */
4595   if (!data->loop_single_exit_p)
4596     return false;
4597
4598   /* We need to be able to decide whether candidate is increasing or decreasing
4599      in order to choose the right comparison operator.  */
4600   if (!cst_and_fits_in_hwi (cand->iv->step))
4601     return false;
4602   step = int_cst_value (cand->iv->step);
4603
4604   /* Check that the number of iterations matches the expected pattern:
4605      a + 1 > b ? 0 : b - a - 1.  */
4606   mbz = niter->may_be_zero;
4607   if (TREE_CODE (mbz) == GT_EXPR)
4608     {
4609       /* Handle a + 1 > b.  */
4610       tree op0 = TREE_OPERAND (mbz, 0);
4611       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4612         {
4613           a = TREE_OPERAND (op0, 0);
4614           b = TREE_OPERAND (mbz, 1);
4615         }
4616       else
4617         return false;
4618     }
4619   else if (TREE_CODE (mbz) == LT_EXPR)
4620     {
4621       tree op1 = TREE_OPERAND (mbz, 1);
4622
4623       /* Handle b < a + 1.  */
4624       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4625         {
4626           a = TREE_OPERAND (op1, 0);
4627           b = TREE_OPERAND (mbz, 0);
4628         }
4629       else
4630         return false;
4631     }
4632   else
4633     return false;
4634
4635   /* Expected number of iterations is B - A - 1.  Check that it matches
4636      the actual number, i.e., that B - A - NITER = 1.  */
4637   tree_to_aff_combination (niter->niter, nit_type, &nit);
4638   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4639   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4640   aff_combination_scale (&nit, double_int_minus_one);
4641   aff_combination_scale (&tmpa, double_int_minus_one);
4642   aff_combination_add (&tmpb, &tmpa);
4643   aff_combination_add (&tmpb, &nit);
4644   if (tmpb.n != 0 || tmpb.offset != double_int_one)
4645     return false;
4646
4647   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4648      overflow.  */
4649   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4650                         cand->iv->step,
4651                         fold_convert (TREE_TYPE (cand->iv->step), a));
4652   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4653     return false;
4654
4655   /* Determine the new comparison operator.  */
4656   comp = step < 0 ? GT_EXPR : LT_EXPR;
4657   if (*comp_p == NE_EXPR)
4658     *comp_p = comp;
4659   else if (*comp_p == EQ_EXPR)
4660     *comp_p = invert_tree_comparison (comp, false);
4661   else
4662     gcc_unreachable ();
4663
4664   return true;
4665 }
4666
4667 /* Check whether it is possible to express the condition in USE by comparison
4668    of candidate CAND.  If so, store the value compared with to BOUND, and the
4669    comparison operator to COMP.  */
4670
4671 static bool
4672 may_eliminate_iv (struct ivopts_data *data,
4673                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4674                   enum tree_code *comp)
4675 {
4676   basic_block ex_bb;
4677   edge exit;
4678   tree period;
4679   struct loop *loop = data->current_loop;
4680   aff_tree bnd;
4681   struct tree_niter_desc *desc = NULL;
4682
4683   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4684     return false;
4685
4686   /* For now works only for exits that dominate the loop latch.
4687      TODO: extend to other conditions inside loop body.  */
4688   ex_bb = gimple_bb (use->stmt);
4689   if (use->stmt != last_stmt (ex_bb)
4690       || gimple_code (use->stmt) != GIMPLE_COND
4691       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4692     return false;
4693
4694   exit = EDGE_SUCC (ex_bb, 0);
4695   if (flow_bb_inside_loop_p (loop, exit->dest))
4696     exit = EDGE_SUCC (ex_bb, 1);
4697   if (flow_bb_inside_loop_p (loop, exit->dest))
4698     return false;
4699
4700   desc = niter_for_exit (data, exit);
4701   if (!desc)
4702     return false;
4703
4704   /* Determine whether we can use the variable to test the exit condition.
4705      This is the case iff the period of the induction variable is greater
4706      than the number of iterations for which the exit condition is true.  */
4707   period = iv_period (cand->iv);
4708
4709   /* If the number of iterations is constant, compare against it directly.  */
4710   if (TREE_CODE (desc->niter) == INTEGER_CST)
4711     {
4712       /* See cand_value_at.  */
4713       if (stmt_after_increment (loop, cand, use->stmt))
4714         {
4715           if (!tree_int_cst_lt (desc->niter, period))
4716             return false;
4717         }
4718       else
4719         {
4720           if (tree_int_cst_lt (period, desc->niter))
4721             return false;
4722         }
4723     }
4724
4725   /* If not, and if this is the only possible exit of the loop, see whether
4726      we can get a conservative estimate on the number of iterations of the
4727      entire loop and compare against that instead.  */
4728   else
4729     {
4730       double_int period_value, max_niter;
4731
4732       max_niter = desc->max;
4733       if (stmt_after_increment (loop, cand, use->stmt))
4734         max_niter += double_int_one;
4735       period_value = tree_to_double_int (period);
4736       if (max_niter.ugt (period_value))
4737         {
4738           /* See if we can take advantage of inferred loop bound information.  */
4739           if (data->loop_single_exit_p)
4740             {
4741               if (!max_loop_iterations (loop, &max_niter))
4742                 return false;
4743               /* The loop bound is already adjusted by adding 1.  */
4744               if (max_niter.ugt (period_value))
4745                 return false;
4746             }
4747           else
4748             return false;
4749         }
4750     }
4751
4752   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4753
4754   *bound = aff_combination_to_tree (&bnd);
4755   *comp = iv_elimination_compare (data, use);
4756
4757   /* It is unlikely that computing the number of iterations using division
4758      would be more profitable than keeping the original induction variable.  */
4759   if (expression_expensive_p (*bound))
4760     return false;
4761
4762   /* Sometimes, it is possible to handle the situation that the number of
4763      iterations may be zero unless additional assumtions by using <
4764      instead of != in the exit condition.
4765
4766      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4767            base the exit condition on it.  However, that is often too
4768            expensive.  */
4769   if (!integer_zerop (desc->may_be_zero))
4770     return iv_elimination_compare_lt (data, cand, comp, desc);
4771
4772   return true;
4773 }
4774
4775  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4776     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4777
4778 static int
4779 parm_decl_cost (struct ivopts_data *data, tree bound)
4780 {
4781   tree sbound = bound;
4782   STRIP_NOPS (sbound);
4783
4784   if (TREE_CODE (sbound) == SSA_NAME
4785       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4786       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4787       && data->body_includes_call)
4788     return COSTS_N_INSNS (1);
4789
4790   return 0;
4791 }
4792
4793 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4794
4795 static bool
4796 determine_use_iv_cost_condition (struct ivopts_data *data,
4797                                  struct iv_use *use, struct iv_cand *cand)
4798 {
4799   tree bound = NULL_TREE;
4800   struct iv *cmp_iv;
4801   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4802   comp_cost elim_cost, express_cost, cost, bound_cost;
4803   bool ok;
4804   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4805   tree *control_var, *bound_cst;
4806   enum tree_code comp = ERROR_MARK;
4807
4808   /* Only consider real candidates.  */
4809   if (!cand->iv)
4810     {
4811       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4812                        ERROR_MARK, -1);
4813       return false;
4814     }
4815
4816   /* Try iv elimination.  */
4817   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4818     {
4819       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4820       if (elim_cost.cost == 0)
4821         elim_cost.cost = parm_decl_cost (data, bound);
4822       else if (TREE_CODE (bound) == INTEGER_CST)
4823         elim_cost.cost = 0;
4824       /* If we replace a loop condition 'i < n' with 'p < base + n',
4825          depends_on_elim will have 'base' and 'n' set, which implies
4826          that both 'base' and 'n' will be live during the loop.  More likely,
4827          'base + n' will be loop invariant, resulting in only one live value
4828          during the loop.  So in that case we clear depends_on_elim and set
4829         elim_inv_expr_id instead.  */
4830       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4831         {
4832           elim_inv_expr_id = get_expr_id (data, bound);
4833           bitmap_clear (depends_on_elim);
4834         }
4835       /* The bound is a loop invariant, so it will be only computed
4836          once.  */
4837       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4838     }
4839   else
4840     elim_cost = infinite_cost;
4841
4842   /* Try expressing the original giv.  If it is compared with an invariant,
4843      note that we cannot get rid of it.  */
4844   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4845                               NULL, &cmp_iv);
4846   gcc_assert (ok);
4847
4848   /* When the condition is a comparison of the candidate IV against
4849      zero, prefer this IV.
4850
4851      TODO: The constant that we're subtracting from the cost should
4852      be target-dependent.  This information should be added to the
4853      target costs for each backend.  */
4854   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4855       && integer_zerop (*bound_cst)
4856       && (operand_equal_p (*control_var, cand->var_after, 0)
4857           || operand_equal_p (*control_var, cand->var_before, 0)))
4858     elim_cost.cost -= 1;
4859
4860   express_cost = get_computation_cost (data, use, cand, false,
4861                                        &depends_on_express, NULL,
4862                                        &express_inv_expr_id);
4863   fd_ivopts_data = data;
4864   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4865
4866   /* Count the cost of the original bound as well.  */
4867   bound_cost = force_var_cost (data, *bound_cst, NULL);
4868   if (bound_cost.cost == 0)
4869     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4870   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4871     bound_cost.cost = 0;
4872   express_cost.cost += bound_cost.cost;
4873
4874   /* Choose the better approach, preferring the eliminated IV. */
4875   if (compare_costs (elim_cost, express_cost) <= 0)
4876     {
4877       cost = elim_cost;
4878       depends_on = depends_on_elim;
4879       depends_on_elim = NULL;
4880       inv_expr_id = elim_inv_expr_id;
4881     }
4882   else
4883     {
4884       cost = express_cost;
4885       depends_on = depends_on_express;
4886       depends_on_express = NULL;
4887       bound = NULL_TREE;
4888       comp = ERROR_MARK;
4889       inv_expr_id = express_inv_expr_id;
4890     }
4891
4892   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4893
4894   if (depends_on_elim)
4895     BITMAP_FREE (depends_on_elim);
4896   if (depends_on_express)
4897     BITMAP_FREE (depends_on_express);
4898
4899   return !infinite_cost_p (cost);
4900 }
4901
4902 /* Determines cost of basing replacement of USE on CAND.  Returns false
4903    if USE cannot be based on CAND.  */
4904
4905 static bool
4906 determine_use_iv_cost (struct ivopts_data *data,
4907                        struct iv_use *use, struct iv_cand *cand)
4908 {
4909   switch (use->type)
4910     {
4911     case USE_NONLINEAR_EXPR:
4912       return determine_use_iv_cost_generic (data, use, cand);
4913
4914     case USE_ADDRESS:
4915       return determine_use_iv_cost_address (data, use, cand);
4916
4917     case USE_COMPARE:
4918       return determine_use_iv_cost_condition (data, use, cand);
4919
4920     default:
4921       gcc_unreachable ();
4922     }
4923 }
4924
4925 /* Return true if get_computation_cost indicates that autoincrement is
4926    a possibility for the pair of USE and CAND, false otherwise.  */
4927
4928 static bool
4929 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4930                            struct iv_cand *cand)
4931 {
4932   bitmap depends_on;
4933   bool can_autoinc;
4934   comp_cost cost;
4935
4936   if (use->type != USE_ADDRESS)
4937     return false;
4938
4939   cost = get_computation_cost (data, use, cand, true, &depends_on,
4940                                &can_autoinc, NULL);
4941
4942   BITMAP_FREE (depends_on);
4943
4944   return !infinite_cost_p (cost) && can_autoinc;
4945 }
4946
4947 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4948    use that allows autoincrement, and set their AINC_USE if possible.  */
4949
4950 static void
4951 set_autoinc_for_original_candidates (struct ivopts_data *data)
4952 {
4953   unsigned i, j;
4954
4955   for (i = 0; i < n_iv_cands (data); i++)
4956     {
4957       struct iv_cand *cand = iv_cand (data, i);
4958       struct iv_use *closest_before = NULL;
4959       struct iv_use *closest_after = NULL;
4960       if (cand->pos != IP_ORIGINAL)
4961         continue;
4962
4963       for (j = 0; j < n_iv_uses (data); j++)
4964         {
4965           struct iv_use *use = iv_use (data, j);
4966           unsigned uid = gimple_uid (use->stmt);
4967
4968           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4969             continue;
4970
4971           if (uid < gimple_uid (cand->incremented_at)
4972               && (closest_before == NULL
4973                   || uid > gimple_uid (closest_before->stmt)))
4974             closest_before = use;
4975
4976           if (uid > gimple_uid (cand->incremented_at)
4977               && (closest_after == NULL
4978                   || uid < gimple_uid (closest_after->stmt)))
4979             closest_after = use;
4980         }
4981
4982       if (closest_before != NULL
4983           && autoinc_possible_for_pair (data, closest_before, cand))
4984         cand->ainc_use = closest_before;
4985       else if (closest_after != NULL
4986                && autoinc_possible_for_pair (data, closest_after, cand))
4987         cand->ainc_use = closest_after;
4988     }
4989 }
4990
4991 /* Finds the candidates for the induction variables.  */
4992
4993 static void
4994 find_iv_candidates (struct ivopts_data *data)
4995 {
4996   /* Add commonly used ivs.  */
4997   add_standard_iv_candidates (data);
4998
4999   /* Add old induction variables.  */
5000   add_old_ivs_candidates (data);
5001
5002   /* Add induction variables derived from uses.  */
5003   add_derived_ivs_candidates (data);
5004
5005   set_autoinc_for_original_candidates (data);
5006
5007   /* Record the important candidates.  */
5008   record_important_candidates (data);
5009 }
5010
5011 /* Determines costs of basing the use of the iv on an iv candidate.  */
5012
5013 static void
5014 determine_use_iv_costs (struct ivopts_data *data)
5015 {
5016   unsigned i, j;
5017   struct iv_use *use;
5018   struct iv_cand *cand;
5019   bitmap to_clear = BITMAP_ALLOC (NULL);
5020
5021   alloc_use_cost_map (data);
5022
5023   for (i = 0; i < n_iv_uses (data); i++)
5024     {
5025       use = iv_use (data, i);
5026
5027       if (data->consider_all_candidates)
5028         {
5029           for (j = 0; j < n_iv_cands (data); j++)
5030             {
5031               cand = iv_cand (data, j);
5032               determine_use_iv_cost (data, use, cand);
5033             }
5034         }
5035       else
5036         {
5037           bitmap_iterator bi;
5038
5039           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5040             {
5041               cand = iv_cand (data, j);
5042               if (!determine_use_iv_cost (data, use, cand))
5043                 bitmap_set_bit (to_clear, j);
5044             }
5045
5046           /* Remove the candidates for that the cost is infinite from
5047              the list of related candidates.  */
5048           bitmap_and_compl_into (use->related_cands, to_clear);
5049           bitmap_clear (to_clear);
5050         }
5051     }
5052
5053   BITMAP_FREE (to_clear);
5054
5055   if (dump_file && (dump_flags & TDF_DETAILS))
5056     {
5057       fprintf (dump_file, "Use-candidate costs:\n");
5058
5059       for (i = 0; i < n_iv_uses (data); i++)
5060         {
5061           use = iv_use (data, i);
5062
5063           fprintf (dump_file, "Use %d:\n", i);
5064           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5065           for (j = 0; j < use->n_map_members; j++)
5066             {
5067               if (!use->cost_map[j].cand
5068                   || infinite_cost_p (use->cost_map[j].cost))
5069                 continue;
5070
5071               fprintf (dump_file, "  %d\t%d\t%d\t",
5072                        use->cost_map[j].cand->id,
5073                        use->cost_map[j].cost.cost,
5074                        use->cost_map[j].cost.complexity);
5075               if (use->cost_map[j].depends_on)
5076                 bitmap_print (dump_file,
5077                               use->cost_map[j].depends_on, "","");
5078               if (use->cost_map[j].inv_expr_id != -1)
5079                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5080               fprintf (dump_file, "\n");
5081             }
5082
5083           fprintf (dump_file, "\n");
5084         }
5085       fprintf (dump_file, "\n");
5086     }
5087 }
5088
5089 /* Determines cost of the candidate CAND.  */
5090
5091 static void
5092 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5093 {
5094   comp_cost cost_base;
5095   unsigned cost, cost_step;
5096   tree base;
5097
5098   if (!cand->iv)
5099     {
5100       cand->cost = 0;
5101       return;
5102     }
5103
5104   /* There are two costs associated with the candidate -- its increment
5105      and its initialization.  The second is almost negligible for any loop
5106      that rolls enough, so we take it just very little into account.  */
5107
5108   base = cand->iv->base;
5109   cost_base = force_var_cost (data, base, NULL);
5110   /* It will be exceptional that the iv register happens to be initialized with
5111      the proper value at no cost.  In general, there will at least be a regcopy
5112      or a const set.  */
5113   if (cost_base.cost == 0)
5114     cost_base.cost = COSTS_N_INSNS (1);
5115   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5116
5117   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5118
5119   /* Prefer the original ivs unless we may gain something by replacing it.
5120      The reason is to make debugging simpler; so this is not relevant for
5121      artificial ivs created by other optimization passes.  */
5122   if (cand->pos != IP_ORIGINAL
5123       || !SSA_NAME_VAR (cand->var_before)
5124       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5125     cost++;
5126
5127   /* Prefer not to insert statements into latch unless there are some
5128      already (so that we do not create unnecessary jumps).  */
5129   if (cand->pos == IP_END
5130       && empty_block_p (ip_end_pos (data->current_loop)))
5131     cost++;
5132
5133   cand->cost = cost;
5134   cand->cost_step = cost_step;
5135 }
5136
5137 /* Determines costs of computation of the candidates.  */
5138
5139 static void
5140 determine_iv_costs (struct ivopts_data *data)
5141 {
5142   unsigned i;
5143
5144   if (dump_file && (dump_flags & TDF_DETAILS))
5145     {
5146       fprintf (dump_file, "Candidate costs:\n");
5147       fprintf (dump_file, "  cand\tcost\n");
5148     }
5149
5150   for (i = 0; i < n_iv_cands (data); i++)
5151     {
5152       struct iv_cand *cand = iv_cand (data, i);
5153
5154       determine_iv_cost (data, cand);
5155
5156       if (dump_file && (dump_flags & TDF_DETAILS))
5157         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5158     }
5159
5160   if (dump_file && (dump_flags & TDF_DETAILS))
5161     fprintf (dump_file, "\n");
5162 }
5163
5164 /* Calculates cost for having SIZE induction variables.  */
5165
5166 static unsigned
5167 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5168 {
5169   /* We add size to the cost, so that we prefer eliminating ivs
5170      if possible.  */
5171   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5172                                             data->body_includes_call);
5173 }
5174
5175 /* For each size of the induction variable set determine the penalty.  */
5176
5177 static void
5178 determine_set_costs (struct ivopts_data *data)
5179 {
5180   unsigned j, n;
5181   gimple phi;
5182   gimple_stmt_iterator psi;
5183   tree op;
5184   struct loop *loop = data->current_loop;
5185   bitmap_iterator bi;
5186
5187   if (dump_file && (dump_flags & TDF_DETAILS))
5188     {
5189       fprintf (dump_file, "Global costs:\n");
5190       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5191       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5192       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5193       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5194     }
5195
5196   n = 0;
5197   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5198     {
5199       phi = gsi_stmt (psi);
5200       op = PHI_RESULT (phi);
5201
5202       if (virtual_operand_p (op))
5203         continue;
5204
5205       if (get_iv (data, op))
5206         continue;
5207
5208       n++;
5209     }
5210
5211   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5212     {
5213       struct version_info *info = ver_info (data, j);
5214
5215       if (info->inv_id && info->has_nonlin_use)
5216         n++;
5217     }
5218
5219   data->regs_used = n;
5220   if (dump_file && (dump_flags & TDF_DETAILS))
5221     fprintf (dump_file, "  regs_used %d\n", n);
5222
5223   if (dump_file && (dump_flags & TDF_DETAILS))
5224     {
5225       fprintf (dump_file, "  cost for size:\n");
5226       fprintf (dump_file, "  ivs\tcost\n");
5227       for (j = 0; j <= 2 * target_avail_regs; j++)
5228         fprintf (dump_file, "  %d\t%d\n", j,
5229                  ivopts_global_cost_for_size (data, j));
5230       fprintf (dump_file, "\n");
5231     }
5232 }
5233
5234 /* Returns true if A is a cheaper cost pair than B.  */
5235
5236 static bool
5237 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5238 {
5239   int cmp;
5240
5241   if (!a)
5242     return false;
5243
5244   if (!b)
5245     return true;
5246
5247   cmp = compare_costs (a->cost, b->cost);
5248   if (cmp < 0)
5249     return true;
5250
5251   if (cmp > 0)
5252     return false;
5253
5254   /* In case the costs are the same, prefer the cheaper candidate.  */
5255   if (a->cand->cost < b->cand->cost)
5256     return true;
5257
5258   return false;
5259 }
5260
5261
5262 /* Returns candidate by that USE is expressed in IVS.  */
5263
5264 static struct cost_pair *
5265 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5266 {
5267   return ivs->cand_for_use[use->id];
5268 }
5269
5270 /* Computes the cost field of IVS structure.  */
5271
5272 static void
5273 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5274 {
5275   comp_cost cost = ivs->cand_use_cost;
5276
5277   cost.cost += ivs->cand_cost;
5278
5279   cost.cost += ivopts_global_cost_for_size (data,
5280                                             ivs->n_regs + ivs->num_used_inv_expr);
5281
5282   ivs->cost = cost;
5283 }
5284
5285 /* Remove invariants in set INVS to set IVS.  */
5286
5287 static void
5288 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5289 {
5290   bitmap_iterator bi;
5291   unsigned iid;
5292
5293   if (!invs)
5294     return;
5295
5296   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5297     {
5298       ivs->n_invariant_uses[iid]--;
5299       if (ivs->n_invariant_uses[iid] == 0)
5300         ivs->n_regs--;
5301     }
5302 }
5303
5304 /* Set USE not to be expressed by any candidate in IVS.  */
5305
5306 static void
5307 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5308                  struct iv_use *use)
5309 {
5310   unsigned uid = use->id, cid;
5311   struct cost_pair *cp;
5312
5313   cp = ivs->cand_for_use[uid];
5314   if (!cp)
5315     return;
5316   cid = cp->cand->id;
5317
5318   ivs->bad_uses++;
5319   ivs->cand_for_use[uid] = NULL;
5320   ivs->n_cand_uses[cid]--;
5321
5322   if (ivs->n_cand_uses[cid] == 0)
5323     {
5324       bitmap_clear_bit (ivs->cands, cid);
5325       /* Do not count the pseudocandidates.  */
5326       if (cp->cand->iv)
5327         ivs->n_regs--;
5328       ivs->n_cands--;
5329       ivs->cand_cost -= cp->cand->cost;
5330
5331       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5332     }
5333
5334   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5335
5336   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5337
5338   if (cp->inv_expr_id != -1)
5339     {
5340       ivs->used_inv_expr[cp->inv_expr_id]--;
5341       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5342         ivs->num_used_inv_expr--;
5343     }
5344   iv_ca_recount_cost (data, ivs);
5345 }
5346
5347 /* Add invariants in set INVS to set IVS.  */
5348
5349 static void
5350 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5351 {
5352   bitmap_iterator bi;
5353   unsigned iid;
5354
5355   if (!invs)
5356     return;
5357
5358   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5359     {
5360       ivs->n_invariant_uses[iid]++;
5361       if (ivs->n_invariant_uses[iid] == 1)
5362         ivs->n_regs++;
5363     }
5364 }
5365
5366 /* Set cost pair for USE in set IVS to CP.  */
5367
5368 static void
5369 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5370               struct iv_use *use, struct cost_pair *cp)
5371 {
5372   unsigned uid = use->id, cid;
5373
5374   if (ivs->cand_for_use[uid] == cp)
5375     return;
5376
5377   if (ivs->cand_for_use[uid])
5378     iv_ca_set_no_cp (data, ivs, use);
5379
5380   if (cp)
5381     {
5382       cid = cp->cand->id;
5383
5384       ivs->bad_uses--;
5385       ivs->cand_for_use[uid] = cp;
5386       ivs->n_cand_uses[cid]++;
5387       if (ivs->n_cand_uses[cid] == 1)
5388         {
5389           bitmap_set_bit (ivs->cands, cid);
5390           /* Do not count the pseudocandidates.  */
5391           if (cp->cand->iv)
5392             ivs->n_regs++;
5393           ivs->n_cands++;
5394           ivs->cand_cost += cp->cand->cost;
5395
5396           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5397         }
5398
5399       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5400       iv_ca_set_add_invariants (ivs, cp->depends_on);
5401
5402       if (cp->inv_expr_id != -1)
5403         {
5404           ivs->used_inv_expr[cp->inv_expr_id]++;
5405           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5406             ivs->num_used_inv_expr++;
5407         }
5408       iv_ca_recount_cost (data, ivs);
5409     }
5410 }
5411
5412 /* Extend set IVS by expressing USE by some of the candidates in it
5413    if possible. All important candidates will be considered
5414    if IMPORTANT_CANDIDATES is true.  */
5415
5416 static void
5417 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5418                struct iv_use *use, bool important_candidates)
5419 {
5420   struct cost_pair *best_cp = NULL, *cp;
5421   bitmap_iterator bi;
5422   bitmap cands;
5423   unsigned i;
5424
5425   gcc_assert (ivs->upto >= use->id);
5426
5427   if (ivs->upto == use->id)
5428     {
5429       ivs->upto++;
5430       ivs->bad_uses++;
5431     }
5432
5433   cands = (important_candidates ? data->important_candidates : ivs->cands);
5434   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5435     {
5436       struct iv_cand *cand = iv_cand (data, i);
5437
5438       cp = get_use_iv_cost (data, use, cand);
5439
5440       if (cheaper_cost_pair (cp, best_cp))
5441         best_cp = cp;
5442     }
5443
5444   iv_ca_set_cp (data, ivs, use, best_cp);
5445 }
5446
5447 /* Get cost for assignment IVS.  */
5448
5449 static comp_cost
5450 iv_ca_cost (struct iv_ca *ivs)
5451 {
5452   /* This was a conditional expression but it triggered a bug in
5453      Sun C 5.5.  */
5454   if (ivs->bad_uses)
5455     return infinite_cost;
5456   else
5457     return ivs->cost;
5458 }
5459
5460 /* Returns true if all dependences of CP are among invariants in IVS.  */
5461
5462 static bool
5463 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5464 {
5465   unsigned i;
5466   bitmap_iterator bi;
5467
5468   if (!cp->depends_on)
5469     return true;
5470
5471   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5472     {
5473       if (ivs->n_invariant_uses[i] == 0)
5474         return false;
5475     }
5476
5477   return true;
5478 }
5479
5480 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5481    it before NEXT_CHANGE.  */
5482
5483 static struct iv_ca_delta *
5484 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5485                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5486 {
5487   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5488
5489   change->use = use;
5490   change->old_cp = old_cp;
5491   change->new_cp = new_cp;
5492   change->next_change = next_change;
5493
5494   return change;
5495 }
5496
5497 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5498    are rewritten.  */
5499
5500 static struct iv_ca_delta *
5501 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5502 {
5503   struct iv_ca_delta *last;
5504
5505   if (!l2)
5506     return l1;
5507
5508   if (!l1)
5509     return l2;
5510
5511   for (last = l1; last->next_change; last = last->next_change)
5512     continue;
5513   last->next_change = l2;
5514
5515   return l1;
5516 }
5517
5518 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5519
5520 static struct iv_ca_delta *
5521 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5522 {
5523   struct iv_ca_delta *act, *next, *prev = NULL;
5524   struct cost_pair *tmp;
5525
5526   for (act = delta; act; act = next)
5527     {
5528       next = act->next_change;
5529       act->next_change = prev;
5530       prev = act;
5531
5532       tmp = act->old_cp;
5533       act->old_cp = act->new_cp;
5534       act->new_cp = tmp;
5535     }
5536
5537   return prev;
5538 }
5539
5540 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5541    reverted instead.  */
5542
5543 static void
5544 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5545                     struct iv_ca_delta *delta, bool forward)
5546 {
5547   struct cost_pair *from, *to;
5548   struct iv_ca_delta *act;
5549
5550   if (!forward)
5551     delta = iv_ca_delta_reverse (delta);
5552
5553   for (act = delta; act; act = act->next_change)
5554     {
5555       from = act->old_cp;
5556       to = act->new_cp;
5557       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5558       iv_ca_set_cp (data, ivs, act->use, to);
5559     }
5560
5561   if (!forward)
5562     iv_ca_delta_reverse (delta);
5563 }
5564
5565 /* Returns true if CAND is used in IVS.  */
5566
5567 static bool
5568 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5569 {
5570   return ivs->n_cand_uses[cand->id] > 0;
5571 }
5572
5573 /* Returns number of induction variable candidates in the set IVS.  */
5574
5575 static unsigned
5576 iv_ca_n_cands (struct iv_ca *ivs)
5577 {
5578   return ivs->n_cands;
5579 }
5580
5581 /* Free the list of changes DELTA.  */
5582
5583 static void
5584 iv_ca_delta_free (struct iv_ca_delta **delta)
5585 {
5586   struct iv_ca_delta *act, *next;
5587
5588   for (act = *delta; act; act = next)
5589     {
5590       next = act->next_change;
5591       free (act);
5592     }
5593
5594   *delta = NULL;
5595 }
5596
5597 /* Allocates new iv candidates assignment.  */
5598
5599 static struct iv_ca *
5600 iv_ca_new (struct ivopts_data *data)
5601 {
5602   struct iv_ca *nw = XNEW (struct iv_ca);
5603
5604   nw->upto = 0;
5605   nw->bad_uses = 0;
5606   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5607   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5608   nw->cands = BITMAP_ALLOC (NULL);
5609   nw->n_cands = 0;
5610   nw->n_regs = 0;
5611   nw->cand_use_cost = no_cost;
5612   nw->cand_cost = 0;
5613   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5614   nw->cost = no_cost;
5615   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5616   nw->num_used_inv_expr = 0;
5617
5618   return nw;
5619 }
5620
5621 /* Free memory occupied by the set IVS.  */
5622
5623 static void
5624 iv_ca_free (struct iv_ca **ivs)
5625 {
5626   free ((*ivs)->cand_for_use);
5627   free ((*ivs)->n_cand_uses);
5628   BITMAP_FREE ((*ivs)->cands);
5629   free ((*ivs)->n_invariant_uses);
5630   free ((*ivs)->used_inv_expr);
5631   free (*ivs);
5632   *ivs = NULL;
5633 }
5634
5635 /* Dumps IVS to FILE.  */
5636
5637 static void
5638 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5639 {
5640   const char *pref = "  invariants ";
5641   unsigned i;
5642   comp_cost cost = iv_ca_cost (ivs);
5643
5644   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5645   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5646            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5647   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5648
5649    for (i = 0; i < ivs->upto; i++)
5650     {
5651       struct iv_use *use = iv_use (data, i);
5652       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5653       if (cp)
5654         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5655                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5656       else
5657         fprintf (file, "   use:%d --> ??\n", use->id);
5658     }
5659
5660   for (i = 1; i <= data->max_inv_id; i++)
5661     if (ivs->n_invariant_uses[i])
5662       {
5663         fprintf (file, "%s%d", pref, i);
5664         pref = ", ";
5665       }
5666   fprintf (file, "\n\n");
5667 }
5668
5669 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5670    new set, and store differences in DELTA.  Number of induction variables
5671    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5672    the function will try to find a solution with mimimal iv candidates.  */
5673
5674 static comp_cost
5675 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5676               struct iv_cand *cand, struct iv_ca_delta **delta,
5677               unsigned *n_ivs, bool min_ncand)
5678 {
5679   unsigned i;
5680   comp_cost cost;
5681   struct iv_use *use;
5682   struct cost_pair *old_cp, *new_cp;
5683
5684   *delta = NULL;
5685   for (i = 0; i < ivs->upto; i++)
5686     {
5687       use = iv_use (data, i);
5688       old_cp = iv_ca_cand_for_use (ivs, use);
5689
5690       if (old_cp
5691           && old_cp->cand == cand)
5692         continue;
5693
5694       new_cp = get_use_iv_cost (data, use, cand);
5695       if (!new_cp)
5696         continue;
5697
5698       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5699         continue;
5700
5701       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5702         continue;
5703
5704       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5705     }
5706
5707   iv_ca_delta_commit (data, ivs, *delta, true);
5708   cost = iv_ca_cost (ivs);
5709   if (n_ivs)
5710     *n_ivs = iv_ca_n_cands (ivs);
5711   iv_ca_delta_commit (data, ivs, *delta, false);
5712
5713   return cost;
5714 }
5715
5716 /* Try narrowing set IVS by removing CAND.  Return the cost of
5717    the new set and store the differences in DELTA.  */
5718
5719 static comp_cost
5720 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5721               struct iv_cand *cand, struct iv_ca_delta **delta)
5722 {
5723   unsigned i, ci;
5724   struct iv_use *use;
5725   struct cost_pair *old_cp, *new_cp, *cp;
5726   bitmap_iterator bi;
5727   struct iv_cand *cnd;
5728   comp_cost cost;
5729
5730   *delta = NULL;
5731   for (i = 0; i < n_iv_uses (data); i++)
5732     {
5733       use = iv_use (data, i);
5734
5735       old_cp = iv_ca_cand_for_use (ivs, use);
5736       if (old_cp->cand != cand)
5737         continue;
5738
5739       new_cp = NULL;
5740
5741       if (data->consider_all_candidates)
5742         {
5743           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5744             {
5745               if (ci == cand->id)
5746                 continue;
5747
5748               cnd = iv_cand (data, ci);
5749
5750               cp = get_use_iv_cost (data, use, cnd);
5751               if (!cp)
5752                 continue;
5753
5754               if (!iv_ca_has_deps (ivs, cp))
5755                 continue;
5756
5757               if (!cheaper_cost_pair (cp, new_cp))
5758                 continue;
5759
5760               new_cp = cp;
5761             }
5762         }
5763       else
5764         {
5765           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5766             {
5767               if (ci == cand->id)
5768                 continue;
5769
5770               cnd = iv_cand (data, ci);
5771
5772               cp = get_use_iv_cost (data, use, cnd);
5773               if (!cp)
5774                 continue;
5775               if (!iv_ca_has_deps (ivs, cp))
5776                 continue;
5777
5778               if (!cheaper_cost_pair (cp, new_cp))
5779                 continue;
5780
5781               new_cp = cp;
5782             }
5783         }
5784
5785       if (!new_cp)
5786         {
5787           iv_ca_delta_free (delta);
5788           return infinite_cost;
5789         }
5790
5791       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5792     }
5793
5794   iv_ca_delta_commit (data, ivs, *delta, true);
5795   cost = iv_ca_cost (ivs);
5796   iv_ca_delta_commit (data, ivs, *delta, false);
5797
5798   return cost;
5799 }
5800
5801 /* Try optimizing the set of candidates IVS by removing candidates different
5802    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5803    differences in DELTA.  */
5804
5805 static comp_cost
5806 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5807              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5808 {
5809   bitmap_iterator bi;
5810   struct iv_ca_delta *act_delta, *best_delta;
5811   unsigned i;
5812   comp_cost best_cost, acost;
5813   struct iv_cand *cand;
5814
5815   best_delta = NULL;
5816   best_cost = iv_ca_cost (ivs);
5817
5818   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5819     {
5820       cand = iv_cand (data, i);
5821
5822       if (cand == except_cand)
5823         continue;
5824
5825       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5826
5827       if (compare_costs (acost, best_cost) < 0)
5828         {
5829           best_cost = acost;
5830           iv_ca_delta_free (&best_delta);
5831           best_delta = act_delta;
5832         }
5833       else
5834         iv_ca_delta_free (&act_delta);
5835     }
5836
5837   if (!best_delta)
5838     {
5839       *delta = NULL;
5840       return best_cost;
5841     }
5842
5843   /* Recurse to possibly remove other unnecessary ivs.  */
5844   iv_ca_delta_commit (data, ivs, best_delta, true);
5845   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5846   iv_ca_delta_commit (data, ivs, best_delta, false);
5847   *delta = iv_ca_delta_join (best_delta, *delta);
5848   return best_cost;
5849 }
5850
5851 /* Tries to extend the sets IVS in the best possible way in order
5852    to express the USE.  If ORIGINALP is true, prefer candidates from
5853    the original set of IVs, otherwise favor important candidates not
5854    based on any memory object.  */
5855
5856 static bool
5857 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5858                   struct iv_use *use, bool originalp)
5859 {
5860   comp_cost best_cost, act_cost;
5861   unsigned i;
5862   bitmap_iterator bi;
5863   struct iv_cand *cand;
5864   struct iv_ca_delta *best_delta = NULL, *act_delta;
5865   struct cost_pair *cp;
5866
5867   iv_ca_add_use (data, ivs, use, false);
5868   best_cost = iv_ca_cost (ivs);
5869
5870   cp = iv_ca_cand_for_use (ivs, use);
5871   if (!cp)
5872     {
5873       ivs->upto--;
5874       ivs->bad_uses--;
5875       iv_ca_add_use (data, ivs, use, true);
5876       best_cost = iv_ca_cost (ivs);
5877       cp = iv_ca_cand_for_use (ivs, use);
5878     }
5879   if (cp)
5880     {
5881       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5882       iv_ca_set_no_cp (data, ivs, use);
5883     }
5884
5885   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5886      first try important candidates not based on any memory object.  Only if
5887      this fails, try the specific ones.  Rationale -- in loops with many
5888      variables the best choice often is to use just one generic biv.  If we
5889      added here many ivs specific to the uses, the optimization algorithm later
5890      would be likely to get stuck in a local minimum, thus causing us to create
5891      too many ivs.  The approach from few ivs to more seems more likely to be
5892      successful -- starting from few ivs, replacing an expensive use by a
5893      specific iv should always be a win.  */
5894   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5895     {
5896       cand = iv_cand (data, i);
5897
5898       if (originalp && cand->pos !=IP_ORIGINAL)
5899         continue;
5900
5901       if (!originalp && cand->iv->base_object != NULL_TREE)
5902         continue;
5903
5904       if (iv_ca_cand_used_p (ivs, cand))
5905         continue;
5906
5907       cp = get_use_iv_cost (data, use, cand);
5908       if (!cp)
5909         continue;
5910
5911       iv_ca_set_cp (data, ivs, use, cp);
5912       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5913                                true);
5914       iv_ca_set_no_cp (data, ivs, use);
5915       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5916
5917       if (compare_costs (act_cost, best_cost) < 0)
5918         {
5919           best_cost = act_cost;
5920
5921           iv_ca_delta_free (&best_delta);
5922           best_delta = act_delta;
5923         }
5924       else
5925         iv_ca_delta_free (&act_delta);
5926     }
5927
5928   if (infinite_cost_p (best_cost))
5929     {
5930       for (i = 0; i < use->n_map_members; i++)
5931         {
5932           cp = use->cost_map + i;
5933           cand = cp->cand;
5934           if (!cand)
5935             continue;
5936
5937           /* Already tried this.  */
5938           if (cand->important)
5939             {
5940               if (originalp && cand->pos == IP_ORIGINAL)
5941                 continue;
5942               if (!originalp && cand->iv->base_object == NULL_TREE)
5943                 continue;
5944             }
5945
5946           if (iv_ca_cand_used_p (ivs, cand))
5947             continue;
5948
5949           act_delta = NULL;
5950           iv_ca_set_cp (data, ivs, use, cp);
5951           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5952           iv_ca_set_no_cp (data, ivs, use);
5953           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5954                                        cp, act_delta);
5955
5956           if (compare_costs (act_cost, best_cost) < 0)
5957             {
5958               best_cost = act_cost;
5959
5960               if (best_delta)
5961                 iv_ca_delta_free (&best_delta);
5962               best_delta = act_delta;
5963             }
5964           else
5965             iv_ca_delta_free (&act_delta);
5966         }
5967     }
5968
5969   iv_ca_delta_commit (data, ivs, best_delta, true);
5970   iv_ca_delta_free (&best_delta);
5971
5972   return !infinite_cost_p (best_cost);
5973 }
5974
5975 /* Finds an initial assignment of candidates to uses.  */
5976
5977 static struct iv_ca *
5978 get_initial_solution (struct ivopts_data *data, bool originalp)
5979 {
5980   struct iv_ca *ivs = iv_ca_new (data);
5981   unsigned i;
5982
5983   for (i = 0; i < n_iv_uses (data); i++)
5984     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5985       {
5986         iv_ca_free (&ivs);
5987         return NULL;
5988       }
5989
5990   return ivs;
5991 }
5992
5993 /* Tries to improve set of induction variables IVS.  */
5994
5995 static bool
5996 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5997 {
5998   unsigned i, n_ivs;
5999   comp_cost acost, best_cost = iv_ca_cost (ivs);
6000   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6001   struct iv_cand *cand;
6002
6003   /* Try extending the set of induction variables by one.  */
6004   for (i = 0; i < n_iv_cands (data); i++)
6005     {
6006       cand = iv_cand (data, i);
6007
6008       if (iv_ca_cand_used_p (ivs, cand))
6009         continue;
6010
6011       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6012       if (!act_delta)
6013         continue;
6014
6015       /* If we successfully added the candidate and the set is small enough,
6016          try optimizing it by removing other candidates.  */
6017       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6018         {
6019           iv_ca_delta_commit (data, ivs, act_delta, true);
6020           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6021           iv_ca_delta_commit (data, ivs, act_delta, false);
6022           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6023         }
6024
6025       if (compare_costs (acost, best_cost) < 0)
6026         {
6027           best_cost = acost;
6028           iv_ca_delta_free (&best_delta);
6029           best_delta = act_delta;
6030         }
6031       else
6032         iv_ca_delta_free (&act_delta);
6033     }
6034
6035   if (!best_delta)
6036     {
6037       /* Try removing the candidates from the set instead.  */
6038       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6039
6040       /* Nothing more we can do.  */
6041       if (!best_delta)
6042         return false;
6043     }
6044
6045   iv_ca_delta_commit (data, ivs, best_delta, true);
6046   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6047   iv_ca_delta_free (&best_delta);
6048   return true;
6049 }
6050
6051 /* Attempts to find the optimal set of induction variables.  We do simple
6052    greedy heuristic -- we try to replace at most one candidate in the selected
6053    solution and remove the unused ivs while this improves the cost.  */
6054
6055 static struct iv_ca *
6056 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6057 {
6058   struct iv_ca *set;
6059
6060   /* Get the initial solution.  */
6061   set = get_initial_solution (data, originalp);
6062   if (!set)
6063     {
6064       if (dump_file && (dump_flags & TDF_DETAILS))
6065         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6066       return NULL;
6067     }
6068
6069   if (dump_file && (dump_flags & TDF_DETAILS))
6070     {
6071       fprintf (dump_file, "Initial set of candidates:\n");
6072       iv_ca_dump (data, dump_file, set);
6073     }
6074
6075   while (try_improve_iv_set (data, set))
6076     {
6077       if (dump_file && (dump_flags & TDF_DETAILS))
6078         {
6079           fprintf (dump_file, "Improved to:\n");
6080           iv_ca_dump (data, dump_file, set);
6081         }
6082     }
6083
6084   return set;
6085 }
6086
6087 static struct iv_ca *
6088 find_optimal_iv_set (struct ivopts_data *data)
6089 {
6090   unsigned i;
6091   struct iv_ca *set, *origset;
6092   struct iv_use *use;
6093   comp_cost cost, origcost;
6094
6095   /* Determine the cost based on a strategy that starts with original IVs,
6096      and try again using a strategy that prefers candidates not based
6097      on any IVs.  */
6098   origset = find_optimal_iv_set_1 (data, true);
6099   set = find_optimal_iv_set_1 (data, false);
6100
6101   if (!origset && !set)
6102     return NULL;
6103
6104   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6105   cost = set ? iv_ca_cost (set) : infinite_cost;
6106
6107   if (dump_file && (dump_flags & TDF_DETAILS))
6108     {
6109       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6110                origcost.cost, origcost.complexity);
6111       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6112                cost.cost, cost.complexity);
6113     }
6114
6115   /* Choose the one with the best cost.  */
6116   if (compare_costs (origcost, cost) <= 0)
6117     {
6118       if (set)
6119         iv_ca_free (&set);
6120       set = origset;
6121     }
6122   else if (origset)
6123     iv_ca_free (&origset);
6124
6125   for (i = 0; i < n_iv_uses (data); i++)
6126     {
6127       use = iv_use (data, i);
6128       use->selected = iv_ca_cand_for_use (set, use)->cand;
6129     }
6130
6131   return set;
6132 }
6133
6134 /* Creates a new induction variable corresponding to CAND.  */
6135
6136 static void
6137 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6138 {
6139   gimple_stmt_iterator incr_pos;
6140   tree base;
6141   bool after = false;
6142
6143   if (!cand->iv)
6144     return;
6145
6146   switch (cand->pos)
6147     {
6148     case IP_NORMAL:
6149       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6150       break;
6151
6152     case IP_END:
6153       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6154       after = true;
6155       break;
6156
6157     case IP_AFTER_USE:
6158       after = true;
6159       /* fall through */
6160     case IP_BEFORE_USE:
6161       incr_pos = gsi_for_stmt (cand->incremented_at);
6162       break;
6163
6164     case IP_ORIGINAL:
6165       /* Mark that the iv is preserved.  */
6166       name_info (data, cand->var_before)->preserve_biv = true;
6167       name_info (data, cand->var_after)->preserve_biv = true;
6168
6169       /* Rewrite the increment so that it uses var_before directly.  */
6170       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6171       return;
6172     }
6173
6174   gimple_add_tmp_var (cand->var_before);
6175
6176   base = unshare_expr (cand->iv->base);
6177
6178   create_iv (base, unshare_expr (cand->iv->step),
6179              cand->var_before, data->current_loop,
6180              &incr_pos, after, &cand->var_before, &cand->var_after);
6181 }
6182
6183 /* Creates new induction variables described in SET.  */
6184
6185 static void
6186 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6187 {
6188   unsigned i;
6189   struct iv_cand *cand;
6190   bitmap_iterator bi;
6191
6192   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6193     {
6194       cand = iv_cand (data, i);
6195       create_new_iv (data, cand);
6196     }
6197
6198   if (dump_file && (dump_flags & TDF_DETAILS))
6199     {
6200       fprintf (dump_file, "\nSelected IV set: \n");
6201       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6202         {
6203           cand = iv_cand (data, i);
6204           dump_cand (dump_file, cand);
6205         }
6206       fprintf (dump_file, "\n");
6207     }
6208 }
6209
6210 /* Rewrites USE (definition of iv used in a nonlinear expression)
6211    using candidate CAND.  */
6212
6213 static void
6214 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6215                             struct iv_use *use, struct iv_cand *cand)
6216 {
6217   tree comp;
6218   tree op, tgt;
6219   gimple ass;
6220   gimple_stmt_iterator bsi;
6221
6222   /* An important special case -- if we are asked to express value of
6223      the original iv by itself, just exit; there is no need to
6224      introduce a new computation (that might also need casting the
6225      variable to unsigned and back).  */
6226   if (cand->pos == IP_ORIGINAL
6227       && cand->incremented_at == use->stmt)
6228     {
6229       enum tree_code stmt_code;
6230
6231       gcc_assert (is_gimple_assign (use->stmt));
6232       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6233
6234       /* Check whether we may leave the computation unchanged.
6235          This is the case only if it does not rely on other
6236          computations in the loop -- otherwise, the computation
6237          we rely upon may be removed in remove_unused_ivs,
6238          thus leading to ICE.  */
6239       stmt_code = gimple_assign_rhs_code (use->stmt);
6240       if (stmt_code == PLUS_EXPR
6241           || stmt_code == MINUS_EXPR
6242           || stmt_code == POINTER_PLUS_EXPR)
6243         {
6244           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6245             op = gimple_assign_rhs2 (use->stmt);
6246           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6247             op = gimple_assign_rhs1 (use->stmt);
6248           else
6249             op = NULL_TREE;
6250         }
6251       else
6252         op = NULL_TREE;
6253
6254       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6255         return;
6256     }
6257
6258   comp = get_computation (data->current_loop, use, cand);
6259   gcc_assert (comp != NULL_TREE);
6260
6261   switch (gimple_code (use->stmt))
6262     {
6263     case GIMPLE_PHI:
6264       tgt = PHI_RESULT (use->stmt);
6265
6266       /* If we should keep the biv, do not replace it.  */
6267       if (name_info (data, tgt)->preserve_biv)
6268         return;
6269
6270       bsi = gsi_after_labels (gimple_bb (use->stmt));
6271       break;
6272
6273     case GIMPLE_ASSIGN:
6274       tgt = gimple_assign_lhs (use->stmt);
6275       bsi = gsi_for_stmt (use->stmt);
6276       break;
6277
6278     default:
6279       gcc_unreachable ();
6280     }
6281
6282   if (!valid_gimple_rhs_p (comp)
6283       || (gimple_code (use->stmt) != GIMPLE_PHI
6284           /* We can't allow re-allocating the stmt as it might be pointed
6285              to still.  */
6286           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6287               >= gimple_num_ops (gsi_stmt (bsi)))))
6288     {
6289       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6290                                        true, GSI_SAME_STMT);
6291       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6292         {
6293           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6294           /* As this isn't a plain copy we have to reset alignment
6295              information.  */
6296           if (SSA_NAME_PTR_INFO (comp))
6297             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6298         }
6299     }
6300
6301   if (gimple_code (use->stmt) == GIMPLE_PHI)
6302     {
6303       ass = gimple_build_assign (tgt, comp);
6304       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6305
6306       bsi = gsi_for_stmt (use->stmt);
6307       remove_phi_node (&bsi, false);
6308     }
6309   else
6310     {
6311       gimple_assign_set_rhs_from_tree (&bsi, comp);
6312       use->stmt = gsi_stmt (bsi);
6313     }
6314 }
6315
6316 /* Performs a peephole optimization to reorder the iv update statement with
6317    a mem ref to enable instruction combining in later phases. The mem ref uses
6318    the iv value before the update, so the reordering transformation requires
6319    adjustment of the offset. CAND is the selected IV_CAND.
6320
6321    Example:
6322
6323    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6324    iv2 = iv1 + 1;
6325
6326    if (t < val)      (1)
6327      goto L;
6328    goto Head;
6329
6330
6331    directly propagating t over to (1) will introduce overlapping live range
6332    thus increase register pressure. This peephole transform it into:
6333
6334
6335    iv2 = iv1 + 1;
6336    t = MEM_REF (base, iv2, 8, 8);
6337    if (t < val)
6338      goto L;
6339    goto Head;
6340 */
6341
6342 static void
6343 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6344 {
6345   tree var_after;
6346   gimple iv_update, stmt;
6347   basic_block bb;
6348   gimple_stmt_iterator gsi, gsi_iv;
6349
6350   if (cand->pos != IP_NORMAL)
6351     return;
6352
6353   var_after = cand->var_after;
6354   iv_update = SSA_NAME_DEF_STMT (var_after);
6355
6356   bb = gimple_bb (iv_update);
6357   gsi = gsi_last_nondebug_bb (bb);
6358   stmt = gsi_stmt (gsi);
6359
6360   /* Only handle conditional statement for now.  */
6361   if (gimple_code (stmt) != GIMPLE_COND)
6362     return;
6363
6364   gsi_prev_nondebug (&gsi);
6365   stmt = gsi_stmt (gsi);
6366   if (stmt != iv_update)
6367     return;
6368
6369   gsi_prev_nondebug (&gsi);
6370   if (gsi_end_p (gsi))
6371     return;
6372
6373   stmt = gsi_stmt (gsi);
6374   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6375     return;
6376
6377   if (stmt != use->stmt)
6378     return;
6379
6380   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6381     return;
6382
6383   if (dump_file && (dump_flags & TDF_DETAILS))
6384     {
6385       fprintf (dump_file, "Reordering \n");
6386       print_gimple_stmt (dump_file, iv_update, 0, 0);
6387       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6388       fprintf (dump_file, "\n");
6389     }
6390
6391   gsi = gsi_for_stmt (use->stmt);
6392   gsi_iv = gsi_for_stmt (iv_update);
6393   gsi_move_before (&gsi_iv, &gsi);
6394
6395   cand->pos = IP_BEFORE_USE;
6396   cand->incremented_at = use->stmt;
6397 }
6398
6399 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6400
6401 static void
6402 rewrite_use_address (struct ivopts_data *data,
6403                      struct iv_use *use, struct iv_cand *cand)
6404 {
6405   aff_tree aff;
6406   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6407   tree base_hint = NULL_TREE;
6408   tree ref, iv;
6409   bool ok;
6410
6411   adjust_iv_update_pos (cand, use);
6412   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6413   gcc_assert (ok);
6414   unshare_aff_combination (&aff);
6415
6416   /* To avoid undefined overflow problems, all IV candidates use unsigned
6417      integer types.  The drawback is that this makes it impossible for
6418      create_mem_ref to distinguish an IV that is based on a memory object
6419      from one that represents simply an offset.
6420
6421      To work around this problem, we pass a hint to create_mem_ref that
6422      indicates which variable (if any) in aff is an IV based on a memory
6423      object.  Note that we only consider the candidate.  If this is not
6424      based on an object, the base of the reference is in some subexpression
6425      of the use -- but these will use pointer types, so they are recognized
6426      by the create_mem_ref heuristics anyway.  */
6427   if (cand->iv->base_object)
6428     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6429
6430   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6431   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6432                         reference_alias_ptr_type (*use->op_p),
6433                         iv, base_hint, data->speed);
6434   copy_ref_info (ref, *use->op_p);
6435   *use->op_p = ref;
6436 }
6437
6438 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6439    candidate CAND.  */
6440
6441 static void
6442 rewrite_use_compare (struct ivopts_data *data,
6443                      struct iv_use *use, struct iv_cand *cand)
6444 {
6445   tree comp, *var_p, op, bound;
6446   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6447   enum tree_code compare;
6448   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6449   bool ok;
6450
6451   bound = cp->value;
6452   if (bound)
6453     {
6454       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6455       tree var_type = TREE_TYPE (var);
6456       gimple_seq stmts;
6457
6458       if (dump_file && (dump_flags & TDF_DETAILS))
6459         {
6460           fprintf (dump_file, "Replacing exit test: ");
6461           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6462         }
6463       compare = cp->comp;
6464       bound = unshare_expr (fold_convert (var_type, bound));
6465       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6466       if (stmts)
6467         gsi_insert_seq_on_edge_immediate (
6468                 loop_preheader_edge (data->current_loop),
6469                 stmts);
6470
6471       gimple_cond_set_lhs (use->stmt, var);
6472       gimple_cond_set_code (use->stmt, compare);
6473       gimple_cond_set_rhs (use->stmt, op);
6474       return;
6475     }
6476
6477   /* The induction variable elimination failed; just express the original
6478      giv.  */
6479   comp = get_computation (data->current_loop, use, cand);
6480   gcc_assert (comp != NULL_TREE);
6481
6482   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6483   gcc_assert (ok);
6484
6485   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6486                                      true, GSI_SAME_STMT);
6487 }
6488
6489 /* Rewrites USE using candidate CAND.  */
6490
6491 static void
6492 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6493 {
6494   switch (use->type)
6495     {
6496       case USE_NONLINEAR_EXPR:
6497         rewrite_use_nonlinear_expr (data, use, cand);
6498         break;
6499
6500       case USE_ADDRESS:
6501         rewrite_use_address (data, use, cand);
6502         break;
6503
6504       case USE_COMPARE:
6505         rewrite_use_compare (data, use, cand);
6506         break;
6507
6508       default:
6509         gcc_unreachable ();
6510     }
6511
6512   update_stmt (use->stmt);
6513 }
6514
6515 /* Rewrite the uses using the selected induction variables.  */
6516
6517 static void
6518 rewrite_uses (struct ivopts_data *data)
6519 {
6520   unsigned i;
6521   struct iv_cand *cand;
6522   struct iv_use *use;
6523
6524   for (i = 0; i < n_iv_uses (data); i++)
6525     {
6526       use = iv_use (data, i);
6527       cand = use->selected;
6528       gcc_assert (cand);
6529
6530       rewrite_use (data, use, cand);
6531     }
6532 }
6533
6534 /* Removes the ivs that are not used after rewriting.  */
6535
6536 static void
6537 remove_unused_ivs (struct ivopts_data *data)
6538 {
6539   unsigned j;
6540   bitmap_iterator bi;
6541   bitmap toremove = BITMAP_ALLOC (NULL);
6542
6543   /* Figure out an order in which to release SSA DEFs so that we don't
6544      release something that we'd have to propagate into a debug stmt
6545      afterwards.  */
6546   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6547     {
6548       struct version_info *info;
6549
6550       info = ver_info (data, j);
6551       if (info->iv
6552           && !integer_zerop (info->iv->step)
6553           && !info->inv_id
6554           && !info->iv->have_use_for
6555           && !info->preserve_biv)
6556         {
6557           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6558
6559           tree def = info->iv->ssa_name;
6560
6561           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6562             {
6563               imm_use_iterator imm_iter;
6564               use_operand_p use_p;
6565               gimple stmt;
6566               int count = 0;
6567
6568               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6569                 {
6570                   if (!gimple_debug_bind_p (stmt))
6571                     continue;
6572
6573                   /* We just want to determine whether to do nothing
6574                      (count == 0), to substitute the computed
6575                      expression into a single use of the SSA DEF by
6576                      itself (count == 1), or to use a debug temp
6577                      because the SSA DEF is used multiple times or as
6578                      part of a larger expression (count > 1). */
6579                   count++;
6580                   if (gimple_debug_bind_get_value (stmt) != def)
6581                     count++;
6582
6583                   if (count > 1)
6584                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6585                 }
6586
6587               if (!count)
6588                 continue;
6589
6590               struct iv_use dummy_use;
6591               struct iv_cand *best_cand = NULL, *cand;
6592               unsigned i, best_pref = 0, cand_pref;
6593
6594               memset (&dummy_use, 0, sizeof (dummy_use));
6595               dummy_use.iv = info->iv;
6596               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6597                 {
6598                   cand = iv_use (data, i)->selected;
6599                   if (cand == best_cand)
6600                     continue;
6601                   cand_pref = operand_equal_p (cand->iv->step,
6602                                                info->iv->step, 0)
6603                     ? 4 : 0;
6604                   cand_pref
6605                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6606                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6607                     ? 2 : 0;
6608                   cand_pref
6609                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6610                     ? 1 : 0;
6611                   if (best_cand == NULL || best_pref < cand_pref)
6612                     {
6613                       best_cand = cand;
6614                       best_pref = cand_pref;
6615                     }
6616                 }
6617
6618               if (!best_cand)
6619                 continue;
6620
6621               tree comp = get_computation_at (data->current_loop,
6622                                               &dummy_use, best_cand,
6623                                               SSA_NAME_DEF_STMT (def));
6624               if (!comp)
6625                 continue;
6626
6627               if (count > 1)
6628                 {
6629                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6630                   DECL_ARTIFICIAL (vexpr) = 1;
6631                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6632                   if (SSA_NAME_VAR (def))
6633                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6634                   else
6635                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6636                   gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6637                   gimple_stmt_iterator gsi;
6638
6639                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6640                     gsi = gsi_after_labels (gimple_bb
6641                                             (SSA_NAME_DEF_STMT (def)));
6642                   else
6643                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6644
6645                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6646                   comp = vexpr;
6647                 }
6648
6649               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6650                 {
6651                   if (!gimple_debug_bind_p (stmt))
6652                     continue;
6653
6654                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6655                     SET_USE (use_p, comp);
6656
6657                   update_stmt (stmt);
6658                 }
6659             }
6660         }
6661     }
6662
6663   release_defs_bitset (toremove);
6664
6665   BITMAP_FREE (toremove);
6666 }
6667
6668 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6669    for pointer_map_traverse.  */
6670
6671 static bool
6672 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6673                       void *data ATTRIBUTE_UNUSED)
6674 {
6675   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6676
6677   free (niter);
6678   return true;
6679 }
6680
6681 /* Frees data allocated by the optimization of a single loop.  */
6682
6683 static void
6684 free_loop_data (struct ivopts_data *data)
6685 {
6686   unsigned i, j;
6687   bitmap_iterator bi;
6688   tree obj;
6689
6690   if (data->niters)
6691     {
6692       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6693       pointer_map_destroy (data->niters);
6694       data->niters = NULL;
6695     }
6696
6697   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6698     {
6699       struct version_info *info;
6700
6701       info = ver_info (data, i);
6702       free (info->iv);
6703       info->iv = NULL;
6704       info->has_nonlin_use = false;
6705       info->preserve_biv = false;
6706       info->inv_id = 0;
6707     }
6708   bitmap_clear (data->relevant);
6709   bitmap_clear (data->important_candidates);
6710
6711   for (i = 0; i < n_iv_uses (data); i++)
6712     {
6713       struct iv_use *use = iv_use (data, i);
6714
6715       free (use->iv);
6716       BITMAP_FREE (use->related_cands);
6717       for (j = 0; j < use->n_map_members; j++)
6718         if (use->cost_map[j].depends_on)
6719           BITMAP_FREE (use->cost_map[j].depends_on);
6720       free (use->cost_map);
6721       free (use);
6722     }
6723   data->iv_uses.truncate (0);
6724
6725   for (i = 0; i < n_iv_cands (data); i++)
6726     {
6727       struct iv_cand *cand = iv_cand (data, i);
6728
6729       free (cand->iv);
6730       if (cand->depends_on)
6731         BITMAP_FREE (cand->depends_on);
6732       free (cand);
6733     }
6734   data->iv_candidates.truncate (0);
6735
6736   if (data->version_info_size < num_ssa_names)
6737     {
6738       data->version_info_size = 2 * num_ssa_names;
6739       free (data->version_info);
6740       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6741     }
6742
6743   data->max_inv_id = 0;
6744
6745   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6746     SET_DECL_RTL (obj, NULL_RTX);
6747
6748   decl_rtl_to_reset.truncate (0);
6749
6750   data->inv_expr_tab.empty ();
6751   data->inv_expr_id = 0;
6752 }
6753
6754 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6755    loop tree.  */
6756
6757 static void
6758 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6759 {
6760   free_loop_data (data);
6761   free (data->version_info);
6762   BITMAP_FREE (data->relevant);
6763   BITMAP_FREE (data->important_candidates);
6764
6765   decl_rtl_to_reset.release ();
6766   data->iv_uses.release ();
6767   data->iv_candidates.release ();
6768   data->inv_expr_tab.dispose ();
6769 }
6770
6771 /* Returns true if the loop body BODY includes any function calls.  */
6772
6773 static bool
6774 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6775 {
6776   gimple_stmt_iterator gsi;
6777   unsigned i;
6778
6779   for (i = 0; i < num_nodes; i++)
6780     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6781       {
6782         gimple stmt = gsi_stmt (gsi);
6783         if (is_gimple_call (stmt)
6784             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6785           return true;
6786       }
6787   return false;
6788 }
6789
6790 /* Optimizes the LOOP.  Returns true if anything changed.  */
6791
6792 static bool
6793 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6794 {
6795   bool changed = false;
6796   struct iv_ca *iv_ca;
6797   edge exit = single_dom_exit (loop);
6798   basic_block *body;
6799
6800   gcc_assert (!data->niters);
6801   data->current_loop = loop;
6802   data->speed = optimize_loop_for_speed_p (loop);
6803
6804   if (dump_file && (dump_flags & TDF_DETAILS))
6805     {
6806       fprintf (dump_file, "Processing loop %d\n", loop->num);
6807
6808       if (exit)
6809         {
6810           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6811                    exit->src->index, exit->dest->index);
6812           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6813           fprintf (dump_file, "\n");
6814         }
6815
6816       fprintf (dump_file, "\n");
6817     }
6818
6819   body = get_loop_body (loop);
6820   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6821   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6822   free (body);
6823
6824   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6825
6826   /* For each ssa name determines whether it behaves as an induction variable
6827      in some loop.  */
6828   if (!find_induction_variables (data))
6829     goto finish;
6830
6831   /* Finds interesting uses (item 1).  */
6832   find_interesting_uses (data);
6833   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6834     goto finish;
6835
6836   /* Finds candidates for the induction variables (item 2).  */
6837   find_iv_candidates (data);
6838
6839   /* Calculates the costs (item 3, part 1).  */
6840   determine_iv_costs (data);
6841   determine_use_iv_costs (data);
6842   determine_set_costs (data);
6843
6844   /* Find the optimal set of induction variables (item 3, part 2).  */
6845   iv_ca = find_optimal_iv_set (data);
6846   if (!iv_ca)
6847     goto finish;
6848   changed = true;
6849
6850   /* Create the new induction variables (item 4, part 1).  */
6851   create_new_ivs (data, iv_ca);
6852   iv_ca_free (&iv_ca);
6853
6854   /* Rewrite the uses (item 4, part 2).  */
6855   rewrite_uses (data);
6856
6857   /* Remove the ivs that are unused after rewriting.  */
6858   remove_unused_ivs (data);
6859
6860   /* We have changed the structure of induction variables; it might happen
6861      that definitions in the scev database refer to some of them that were
6862      eliminated.  */
6863   scev_reset ();
6864
6865 finish:
6866   free_loop_data (data);
6867
6868   return changed;
6869 }
6870
6871 /* Main entry point.  Optimizes induction variables in loops.  */
6872
6873 void
6874 tree_ssa_iv_optimize (void)
6875 {
6876   struct loop *loop;
6877   struct ivopts_data data;
6878
6879   tree_ssa_iv_optimize_init (&data);
6880
6881   /* Optimize the loops starting with the innermost ones.  */
6882   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6883     {
6884       if (dump_file && (dump_flags & TDF_DETAILS))
6885         flow_loop_dump (loop, dump_file, NULL, 1);
6886
6887       tree_ssa_iv_optimize_loop (&data, loop);
6888     }
6889
6890   tree_ssa_iv_optimize_finalize (&data);
6891 }