gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "tm_p.h"
  70 #include "basic-block.h"
  71 #include "gimple-pretty-print.h"
  72 #include "tree-ssa.h"
  73 #include "cfgloop.h"
  74 #include "tree-pass.h"
  75 #include "ggc.h"
  76 #include "insn-config.h"
  77 #include "pointer-set.h"
  78 #include "hash-table.h"
  79 #include "tree-chrec.h"
  80 #include "tree-scalar-evolution.h"
  81 #include "cfgloop.h"
  82 #include "params.h"
  83 #include "langhooks.h"
  84 #include "tree-affine.h"
  85 #include "target.h"
  86 #include "tree-inline.h"
  87 #include "tree-ssa-propagate.h"
  88 #include "expmed.h"
  89
  90 /* FIXME: Expressions are expanded to RTL in this pass to determine the
  91    cost of different addressing modes.  This should be moved to a TBD
  92    interface between the GIMPLE and RTL worlds.  */
  93 #include "expr.h"
  94 #include "recog.h"
  95
  96 /* The infinite cost.  */
  97 #define INFTY 10000000
  98
  99 #define AVG_LOOP_NITER(LOOP) 5
 100
 101 /* Returns the expected number of loop iterations for LOOP.
 102    The average trip count is computed from profile data if it
 103    exists. */
 104
 105 static inline HOST_WIDE_INT
 106 avg_loop_niter (struct loop *loop)
 107 {
 108   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 109   if (niter == -1)
 110     return AVG_LOOP_NITER (loop);
 111
 112   return niter;
 113 }
 114
 115 /* Representation of the induction variable.  */
 116 struct iv
 117 {
 118   tree base;            /* Initial value of the iv.  */
 119   tree base_object;     /* A memory object to that the induction variable points.  */
 120   tree step;            /* Step of the iv (constant only).  */
 121   tree ssa_name;        /* The ssa name with the value.  */
 122   bool biv_p;           /* Is it a biv?  */
 123   bool have_use_for;    /* Do we already have a use for it?  */
 124   unsigned use_id;      /* The identifier in the use if it is the case.  */
 125 };
 126
 127 /* Per-ssa version information (induction variable descriptions, etc.).  */
 128 struct version_info
 129 {
 130   tree name;            /* The ssa name.  */
 131   struct iv *iv;        /* Induction variable description.  */
 132   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 133                            an expression that is not an induction variable.  */
 134   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 135   unsigned inv_id;      /* Id of an invariant.  */
 136 };
 137
 138 /* Types of uses.  */
 139 enum use_type
 140 {
 141   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 142   USE_ADDRESS,          /* Use in an address.  */
 143   USE_COMPARE           /* Use is a compare.  */
 144 };
 145
 146 /* Cost of a computation.  */
 147 typedef struct
 148 {
 149   int cost;             /* The runtime cost.  */
 150   unsigned complexity;  /* The estimate of the complexity of the code for
 151                            the computation (in no concrete units --
 152                            complexity field should be larger for more
 153                            complex expressions and addressing modes).  */
 154 } comp_cost;
 155
 156 static const comp_cost no_cost = {0, 0};
 157 static const comp_cost infinite_cost = {INFTY, INFTY};
 158
 159 /* The candidate - cost pair.  */
 160 struct cost_pair
 161 {
 162   struct iv_cand *cand; /* The candidate.  */
 163   comp_cost cost;       /* The cost.  */
 164   bitmap depends_on;    /* The list of invariants that have to be
 165                            preserved.  */
 166   tree value;           /* For final value elimination, the expression for
 167                            the final value of the iv.  For iv elimination,
 168                            the new bound to compare with.  */
 169   enum tree_code comp;  /* For iv elimination, the comparison.  */
 170   int inv_expr_id;      /* Loop invariant expression id.  */
 171 };
 172
 173 /* Use.  */
 174 struct iv_use
 175 {
 176   unsigned id;          /* The id of the use.  */
 177   enum use_type type;   /* Type of the use.  */
 178   struct iv *iv;        /* The induction variable it is based on.  */
 179   gimple stmt;          /* Statement in that it occurs.  */
 180   tree *op_p;           /* The place where it occurs.  */
 181   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 182                            important ones.  */
 183
 184   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 185   struct cost_pair *cost_map;
 186                         /* The costs wrto the iv candidates.  */
 187
 188   struct iv_cand *selected;
 189                         /* The selected candidate.  */
 190 };
 191
 192 /* The position where the iv is computed.  */
 193 enum iv_position
 194 {
 195   IP_NORMAL,            /* At the end, just before the exit condition.  */
 196   IP_END,               /* At the end of the latch block.  */
 197   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 198   IP_AFTER_USE,         /* Immediately after a specific use.  */
 199   IP_ORIGINAL           /* The original biv.  */
 200 };
 201
 202 /* The induction variable candidate.  */
 203 struct iv_cand
 204 {
 205   unsigned id;          /* The number of the candidate.  */
 206   bool important;       /* Whether this is an "important" candidate, i.e. such
 207                            that it should be considered by all uses.  */
 208   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 209   gimple incremented_at;/* For original biv, the statement where it is
 210                            incremented.  */
 211   tree var_before;      /* The variable used for it before increment.  */
 212   tree var_after;       /* The variable used for it after increment.  */
 213   struct iv *iv;        /* The value of the candidate.  NULL for
 214                            "pseudocandidate" used to indicate the possibility
 215                            to replace the final value of an iv by direct
 216                            computation of the value.  */
 217   unsigned cost;        /* Cost of the candidate.  */
 218   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 219   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 220                               where it is incremented.  */
 221   bitmap depends_on;    /* The list of invariants that are used in step of the
 222                            biv.  */
 223 };
 224
 225 /* Loop invariant expression hashtable entry.  */
 226 struct iv_inv_expr_ent
 227 {
 228   tree expr;
 229   int id;
 230   hashval_t hash;
 231 };
 232
 233 /* The data used by the induction variable optimizations.  */
 234
 235 typedef struct iv_use *iv_use_p;
 236
 237 typedef struct iv_cand *iv_cand_p;
 238
 239 /* Hashtable helpers.  */
 240
 241 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 242 {
 243   typedef iv_inv_expr_ent value_type;
 244   typedef iv_inv_expr_ent compare_type;
 245   static inline hashval_t hash (const value_type *);
 246   static inline bool equal (const value_type *, const compare_type *);
 247 };
 248
 249 /* Hash function for loop invariant expressions.  */
 250
 251 inline hashval_t
 252 iv_inv_expr_hasher::hash (const value_type *expr)
 253 {
 254   return expr->hash;
 255 }
 256
 257 /* Hash table equality function for expressions.  */
 258
 259 inline bool
 260 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 261 {
 262   return expr1->hash == expr2->hash
 263          && operand_equal_p (expr1->expr, expr2->expr, 0);
 264 }
 265
 266 struct ivopts_data
 267 {
 268   /* The currently optimized loop.  */
 269   struct loop *current_loop;
 270
 271   /* Numbers of iterations for all exits of the current loop.  */
 272   struct pointer_map_t *niters;
 273
 274   /* Number of registers used in it.  */
 275   unsigned regs_used;
 276
 277   /* The size of version_info array allocated.  */
 278   unsigned version_info_size;
 279
 280   /* The array of information for the ssa names.  */
 281   struct version_info *version_info;
 282
 283   /* The hashtable of loop invariant expressions created
 284      by ivopt.  */
 285   hash_table <iv_inv_expr_hasher> inv_expr_tab;
 286
 287   /* Loop invariant expression id.  */
 288   int inv_expr_id;
 289
 290   /* The bitmap of indices in version_info whose value was changed.  */
 291   bitmap relevant;
 292
 293   /* The uses of induction variables.  */
 294   vec<iv_use_p> iv_uses;
 295
 296   /* The candidates.  */
 297   vec<iv_cand_p> iv_candidates;
 298
 299   /* A bitmap of important candidates.  */
 300   bitmap important_candidates;
 301
 302   /* The maximum invariant id.  */
 303   unsigned max_inv_id;
 304
 305   /* Whether to consider just related and important candidates when replacing a
 306      use.  */
 307   bool consider_all_candidates;
 308
 309   /* Are we optimizing for speed?  */
 310   bool speed;
 311
 312   /* Whether the loop body includes any function calls.  */
 313   bool body_includes_call;
 314
 315   /* Whether the loop body can only be exited via single exit.  */
 316   bool loop_single_exit_p;
 317 };
 318
 319 /* An assignment of iv candidates to uses.  */
 320
 321 struct iv_ca
 322 {
 323   /* The number of uses covered by the assignment.  */
 324   unsigned upto;
 325
 326   /* Number of uses that cannot be expressed by the candidates in the set.  */
 327   unsigned bad_uses;
 328
 329   /* Candidate assigned to a use, together with the related costs.  */
 330   struct cost_pair **cand_for_use;
 331
 332   /* Number of times each candidate is used.  */
 333   unsigned *n_cand_uses;
 334
 335   /* The candidates used.  */
 336   bitmap cands;
 337
 338   /* The number of candidates in the set.  */
 339   unsigned n_cands;
 340
 341   /* Total number of registers needed.  */
 342   unsigned n_regs;
 343
 344   /* Total cost of expressing uses.  */
 345   comp_cost cand_use_cost;
 346
 347   /* Total cost of candidates.  */
 348   unsigned cand_cost;
 349
 350   /* Number of times each invariant is used.  */
 351   unsigned *n_invariant_uses;
 352
 353   /* The array holding the number of uses of each loop
 354      invariant expressions created by ivopt.  */
 355   unsigned *used_inv_expr;
 356
 357   /* The number of created loop invariants.  */
 358   unsigned num_used_inv_expr;
 359
 360   /* Total cost of the assignment.  */
 361   comp_cost cost;
 362 };
 363
 364 /* Difference of two iv candidate assignments.  */
 365
 366 struct iv_ca_delta
 367 {
 368   /* Changed use.  */
 369   struct iv_use *use;
 370
 371   /* An old assignment (for rollback purposes).  */
 372   struct cost_pair *old_cp;
 373
 374   /* A new assignment.  */
 375   struct cost_pair *new_cp;
 376
 377   /* Next change in the list.  */
 378   struct iv_ca_delta *next_change;
 379 };
 380
 381 /* Bound on number of candidates below that all candidates are considered.  */
 382
 383 #define CONSIDER_ALL_CANDIDATES_BOUND \
 384   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 385
 386 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 387    optimizing such a loop would help, and it would take ages).  */
 388
 389 #define MAX_CONSIDERED_USES \
 390   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 391
 392 /* If there are at most this number of ivs in the set, try removing unnecessary
 393    ivs from the set always.  */
 394
 395 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 396   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 397
 398 /* The list of trees for that the decl_rtl field must be reset is stored
 399    here.  */
 400
 401 static vec<tree> decl_rtl_to_reset;
 402
 403 static comp_cost force_expr_to_var_cost (tree, bool);
 404
 405 /* Number of uses recorded in DATA.  */
 406
 407 static inline unsigned
 408 n_iv_uses (struct ivopts_data *data)
 409 {
 410   return data->iv_uses.length ();
 411 }
 412
 413 /* Ith use recorded in DATA.  */
 414
 415 static inline struct iv_use *
 416 iv_use (struct ivopts_data *data, unsigned i)
 417 {
 418   return data->iv_uses[i];
 419 }
 420
 421 /* Number of candidates recorded in DATA.  */
 422
 423 static inline unsigned
 424 n_iv_cands (struct ivopts_data *data)
 425 {
 426   return data->iv_candidates.length ();
 427 }
 428
 429 /* Ith candidate recorded in DATA.  */
 430
 431 static inline struct iv_cand *
 432 iv_cand (struct ivopts_data *data, unsigned i)
 433 {
 434   return data->iv_candidates[i];
 435 }
 436
 437 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 438
 439 edge
 440 single_dom_exit (struct loop *loop)
 441 {
 442   edge exit = single_exit (loop);
 443
 444   if (!exit)
 445     return NULL;
 446
 447   if (!just_once_each_iteration_p (loop, exit->src))
 448     return NULL;
 449
 450   return exit;
 451 }
 452
 453 /* Dumps information about the induction variable IV to FILE.  */
 454
 455 extern void dump_iv (FILE *, struct iv *);
 456 void
 457 dump_iv (FILE *file, struct iv *iv)
 458 {
 459   if (iv->ssa_name)
 460     {
 461       fprintf (file, "ssa name ");
 462       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 463       fprintf (file, "\n");
 464     }
 465
 466   fprintf (file, "  type ");
 467   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 468   fprintf (file, "\n");
 469
 470   if (iv->step)
 471     {
 472       fprintf (file, "  base ");
 473       print_generic_expr (file, iv->base, TDF_SLIM);
 474       fprintf (file, "\n");
 475
 476       fprintf (file, "  step ");
 477       print_generic_expr (file, iv->step, TDF_SLIM);
 478       fprintf (file, "\n");
 479     }
 480   else
 481     {
 482       fprintf (file, "  invariant ");
 483       print_generic_expr (file, iv->base, TDF_SLIM);
 484       fprintf (file, "\n");
 485     }
 486
 487   if (iv->base_object)
 488     {
 489       fprintf (file, "  base object ");
 490       print_generic_expr (file, iv->base_object, TDF_SLIM);
 491       fprintf (file, "\n");
 492     }
 493
 494   if (iv->biv_p)
 495     fprintf (file, "  is a biv\n");
 496 }
 497
 498 /* Dumps information about the USE to FILE.  */
 499
 500 extern void dump_use (FILE *, struct iv_use *);
 501 void
 502 dump_use (FILE *file, struct iv_use *use)
 503 {
 504   fprintf (file, "use %d\n", use->id);
 505
 506   switch (use->type)
 507     {
 508     case USE_NONLINEAR_EXPR:
 509       fprintf (file, "  generic\n");
 510       break;
 511
 512     case USE_ADDRESS:
 513       fprintf (file, "  address\n");
 514       break;
 515
 516     case USE_COMPARE:
 517       fprintf (file, "  compare\n");
 518       break;
 519
 520     default:
 521       gcc_unreachable ();
 522     }
 523
 524   fprintf (file, "  in statement ");
 525   print_gimple_stmt (file, use->stmt, 0, 0);
 526   fprintf (file, "\n");
 527
 528   fprintf (file, "  at position ");
 529   if (use->op_p)
 530     print_generic_expr (file, *use->op_p, TDF_SLIM);
 531   fprintf (file, "\n");
 532
 533   dump_iv (file, use->iv);
 534
 535   if (use->related_cands)
 536     {
 537       fprintf (file, "  related candidates ");
 538       dump_bitmap (file, use->related_cands);
 539     }
 540 }
 541
 542 /* Dumps information about the uses to FILE.  */
 543
 544 extern void dump_uses (FILE *, struct ivopts_data *);
 545 void
 546 dump_uses (FILE *file, struct ivopts_data *data)
 547 {
 548   unsigned i;
 549   struct iv_use *use;
 550
 551   for (i = 0; i < n_iv_uses (data); i++)
 552     {
 553       use = iv_use (data, i);
 554
 555       dump_use (file, use);
 556       fprintf (file, "\n");
 557     }
 558 }
 559
 560 /* Dumps information about induction variable candidate CAND to FILE.  */
 561
 562 extern void dump_cand (FILE *, struct iv_cand *);
 563 void
 564 dump_cand (FILE *file, struct iv_cand *cand)
 565 {
 566   struct iv *iv = cand->iv;
 567
 568   fprintf (file, "candidate %d%s\n",
 569            cand->id, cand->important ? " (important)" : "");
 570
 571   if (cand->depends_on)
 572     {
 573       fprintf (file, "  depends on ");
 574       dump_bitmap (file, cand->depends_on);
 575     }
 576
 577   if (!iv)
 578     {
 579       fprintf (file, "  final value replacement\n");
 580       return;
 581     }
 582
 583   if (cand->var_before)
 584     {
 585       fprintf (file, "  var_before ");
 586       print_generic_expr (file, cand->var_before, TDF_SLIM);
 587       fprintf (file, "\n");
 588     }
 589   if (cand->var_after)
 590     {
 591       fprintf (file, "  var_after ");
 592       print_generic_expr (file, cand->var_after, TDF_SLIM);
 593       fprintf (file, "\n");
 594     }
 595
 596   switch (cand->pos)
 597     {
 598     case IP_NORMAL:
 599       fprintf (file, "  incremented before exit test\n");
 600       break;
 601
 602     case IP_BEFORE_USE:
 603       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 604       break;
 605
 606     case IP_AFTER_USE:
 607       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 608       break;
 609
 610     case IP_END:
 611       fprintf (file, "  incremented at end\n");
 612       break;
 613
 614     case IP_ORIGINAL:
 615       fprintf (file, "  original biv\n");
 616       break;
 617     }
 618
 619   dump_iv (file, iv);
 620 }
 621
 622 /* Returns the info for ssa version VER.  */
 623
 624 static inline struct version_info *
 625 ver_info (struct ivopts_data *data, unsigned ver)
 626 {
 627   return data->version_info + ver;
 628 }
 629
 630 /* Returns the info for ssa name NAME.  */
 631
 632 static inline struct version_info *
 633 name_info (struct ivopts_data *data, tree name)
 634 {
 635   return ver_info (data, SSA_NAME_VERSION (name));
 636 }
 637
 638 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 639    emitted in LOOP.  */
 640
 641 static bool
 642 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 643 {
 644   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 645
 646   gcc_assert (bb);
 647
 648   if (sbb == loop->latch)
 649     return true;
 650
 651   if (sbb != bb)
 652     return false;
 653
 654   return stmt == last_stmt (bb);
 655 }
 656
 657 /* Returns true if STMT if after the place where the original induction
 658    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 659    if the positions are identical.  */
 660
 661 static bool
 662 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 663 {
 664   basic_block cand_bb = gimple_bb (cand->incremented_at);
 665   basic_block stmt_bb = gimple_bb (stmt);
 666
 667   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 668     return false;
 669
 670   if (stmt_bb != cand_bb)
 671     return true;
 672
 673   if (true_if_equal
 674       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 675     return true;
 676   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 677 }
 678
 679 /* Returns true if STMT if after the place where the induction variable
 680    CAND is incremented in LOOP.  */
 681
 682 static bool
 683 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 684 {
 685   switch (cand->pos)
 686     {
 687     case IP_END:
 688       return false;
 689
 690     case IP_NORMAL:
 691       return stmt_after_ip_normal_pos (loop, stmt);
 692
 693     case IP_ORIGINAL:
 694     case IP_AFTER_USE:
 695       return stmt_after_inc_pos (cand, stmt, false);
 696
 697     case IP_BEFORE_USE:
 698       return stmt_after_inc_pos (cand, stmt, true);
 699
 700     default:
 701       gcc_unreachable ();
 702     }
 703 }
 704
 705 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 706
 707 static bool
 708 abnormal_ssa_name_p (tree exp)
 709 {
 710   if (!exp)
 711     return false;
 712
 713   if (TREE_CODE (exp) != SSA_NAME)
 714     return false;
 715
 716   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 717 }
 718
 719 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 720    abnormal phi node.  Callback for for_each_index.  */
 721
 722 static bool
 723 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 724                                   void *data ATTRIBUTE_UNUSED)
 725 {
 726   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 727     {
 728       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 729         return false;
 730       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 731         return false;
 732     }
 733
 734   return !abnormal_ssa_name_p (*index);
 735 }
 736
 737 /* Returns true if EXPR contains a ssa name that occurs in an
 738    abnormal phi node.  */
 739
 740 bool
 741 contains_abnormal_ssa_name_p (tree expr)
 742 {
 743   enum tree_code code;
 744   enum tree_code_class codeclass;
 745
 746   if (!expr)
 747     return false;
 748
 749   code = TREE_CODE (expr);
 750   codeclass = TREE_CODE_CLASS (code);
 751
 752   if (code == SSA_NAME)
 753     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 754
 755   if (code == INTEGER_CST
 756       || is_gimple_min_invariant (expr))
 757     return false;
 758
 759   if (code == ADDR_EXPR)
 760     return !for_each_index (&TREE_OPERAND (expr, 0),
 761                             idx_contains_abnormal_ssa_name_p,
 762                             NULL);
 763
 764   if (code == COND_EXPR)
 765     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 766       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 767       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 768
 769   switch (codeclass)
 770     {
 771     case tcc_binary:
 772     case tcc_comparison:
 773       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 774         return true;
 775
 776       /* Fallthru.  */
 777     case tcc_unary:
 778       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 779         return true;
 780
 781       break;
 782
 783     default:
 784       gcc_unreachable ();
 785     }
 786
 787   return false;
 788 }
 789
 790 /*  Returns the structure describing number of iterations determined from
 791     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 792
 793 static struct tree_niter_desc *
 794 niter_for_exit (struct ivopts_data *data, edge exit)
 795 {
 796   struct tree_niter_desc *desc;
 797   void **slot;
 798
 799   if (!data->niters)
 800     {
 801       data->niters = pointer_map_create ();
 802       slot = NULL;
 803     }
 804   else
 805     slot = pointer_map_contains (data->niters, exit);
 806
 807   if (!slot)
 808     {
 809       /* Try to determine number of iterations.  We cannot safely work with ssa
 810          names that appear in phi nodes on abnormal edges, so that we do not
 811          create overlapping life ranges for them (PR 27283).  */
 812       desc = XNEW (struct tree_niter_desc);
 813       if (!number_of_iterations_exit (data->current_loop,
 814                                       exit, desc, true)
 815           || contains_abnormal_ssa_name_p (desc->niter))
 816         {
 817           XDELETE (desc);
 818           desc = NULL;
 819         }
 820       slot = pointer_map_insert (data->niters, exit);
 821       *slot = desc;
 822     }
 823   else
 824     desc = (struct tree_niter_desc *) *slot;
 825
 826   return desc;
 827 }
 828
 829 /* Returns the structure describing number of iterations determined from
 830    single dominating exit of DATA->current_loop, or NULL if something
 831    goes wrong.  */
 832
 833 static struct tree_niter_desc *
 834 niter_for_single_dom_exit (struct ivopts_data *data)
 835 {
 836   edge exit = single_dom_exit (data->current_loop);
 837
 838   if (!exit)
 839     return NULL;
 840
 841   return niter_for_exit (data, exit);
 842 }
 843
 844 /* Initializes data structures used by the iv optimization pass, stored
 845    in DATA.  */
 846
 847 static void
 848 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 849 {
 850   data->version_info_size = 2 * num_ssa_names;
 851   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 852   data->relevant = BITMAP_ALLOC (NULL);
 853   data->important_candidates = BITMAP_ALLOC (NULL);
 854   data->max_inv_id = 0;
 855   data->niters = NULL;
 856   data->iv_uses.create (20);
 857   data->iv_candidates.create (20);
 858   data->inv_expr_tab.create (10);
 859   data->inv_expr_id = 0;
 860   decl_rtl_to_reset.create (20);
 861 }
 862
 863 /* Returns a memory object to that EXPR points.  In case we are able to
 864    determine that it does not point to any such object, NULL is returned.  */
 865
 866 static tree
 867 determine_base_object (tree expr)
 868 {
 869   enum tree_code code = TREE_CODE (expr);
 870   tree base, obj;
 871
 872   /* If this is a pointer casted to any type, we need to determine
 873      the base object for the pointer; so handle conversions before
 874      throwing away non-pointer expressions.  */
 875   if (CONVERT_EXPR_P (expr))
 876     return determine_base_object (TREE_OPERAND (expr, 0));
 877
 878   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 879     return NULL_TREE;
 880
 881   switch (code)
 882     {
 883     case INTEGER_CST:
 884       return NULL_TREE;
 885
 886     case ADDR_EXPR:
 887       obj = TREE_OPERAND (expr, 0);
 888       base = get_base_address (obj);
 889
 890       if (!base)
 891         return expr;
 892
 893       if (TREE_CODE (base) == MEM_REF)
 894         return determine_base_object (TREE_OPERAND (base, 0));
 895
 896       return fold_convert (ptr_type_node,
 897                            build_fold_addr_expr (base));
 898
 899     case POINTER_PLUS_EXPR:
 900       return determine_base_object (TREE_OPERAND (expr, 0));
 901
 902     case PLUS_EXPR:
 903     case MINUS_EXPR:
 904       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 905       gcc_unreachable ();
 906
 907     default:
 908       return fold_convert (ptr_type_node, expr);
 909     }
 910 }
 911
 912 /* Allocates an induction variable with given initial value BASE and step STEP
 913    for loop LOOP.  */
 914
 915 static struct iv *
 916 alloc_iv (tree base, tree step)
 917 {
 918   struct iv *iv = XCNEW (struct iv);
 919   gcc_assert (step != NULL_TREE);
 920
 921   iv->base = base;
 922   iv->base_object = determine_base_object (base);
 923   iv->step = step;
 924   iv->biv_p = false;
 925   iv->have_use_for = false;
 926   iv->use_id = 0;
 927   iv->ssa_name = NULL_TREE;
 928
 929   return iv;
 930 }
 931
 932 /* Sets STEP and BASE for induction variable IV.  */
 933
 934 static void
 935 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 936 {
 937   struct version_info *info = name_info (data, iv);
 938
 939   gcc_assert (!info->iv);
 940
 941   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 942   info->iv = alloc_iv (base, step);
 943   info->iv->ssa_name = iv;
 944 }
 945
 946 /* Finds induction variable declaration for VAR.  */
 947
 948 static struct iv *
 949 get_iv (struct ivopts_data *data, tree var)
 950 {
 951   basic_block bb;
 952   tree type = TREE_TYPE (var);
 953
 954   if (!POINTER_TYPE_P (type)
 955       && !INTEGRAL_TYPE_P (type))
 956     return NULL;
 957
 958   if (!name_info (data, var)->iv)
 959     {
 960       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 961
 962       if (!bb
 963           || !flow_bb_inside_loop_p (data->current_loop, bb))
 964         set_iv (data, var, var, build_int_cst (type, 0));
 965     }
 966
 967   return name_info (data, var)->iv;
 968 }
 969
 970 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 971    not define a simple affine biv with nonzero step.  */
 972
 973 static tree
 974 determine_biv_step (gimple phi)
 975 {
 976   struct loop *loop = gimple_bb (phi)->loop_father;
 977   tree name = PHI_RESULT (phi);
 978   affine_iv iv;
 979
 980   if (virtual_operand_p (name))
 981     return NULL_TREE;
 982
 983   if (!simple_iv (loop, loop, name, &iv, true))
 984     return NULL_TREE;
 985
 986   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 987 }
 988
 989 /* Finds basic ivs.  */
 990
 991 static bool
 992 find_bivs (struct ivopts_data *data)
 993 {
 994   gimple phi;
 995   tree step, type, base;
 996   bool found = false;
 997   struct loop *loop = data->current_loop;
 998   gimple_stmt_iterator psi;
 999
1000   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1001     {
1002       phi = gsi_stmt (psi);
1003
1004       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1005         continue;
1006
1007       step = determine_biv_step (phi);
1008       if (!step)
1009         continue;
1010
1011       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1012       base = expand_simple_operations (base);
1013       if (contains_abnormal_ssa_name_p (base)
1014           || contains_abnormal_ssa_name_p (step))
1015         continue;
1016
1017       type = TREE_TYPE (PHI_RESULT (phi));
1018       base = fold_convert (type, base);
1019       if (step)
1020         {
1021           if (POINTER_TYPE_P (type))
1022             step = convert_to_ptrofftype (step);
1023           else
1024             step = fold_convert (type, step);
1025         }
1026
1027       set_iv (data, PHI_RESULT (phi), base, step);
1028       found = true;
1029     }
1030
1031   return found;
1032 }
1033
1034 /* Marks basic ivs.  */
1035
1036 static void
1037 mark_bivs (struct ivopts_data *data)
1038 {
1039   gimple phi;
1040   tree var;
1041   struct iv *iv, *incr_iv;
1042   struct loop *loop = data->current_loop;
1043   basic_block incr_bb;
1044   gimple_stmt_iterator psi;
1045
1046   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1047     {
1048       phi = gsi_stmt (psi);
1049
1050       iv = get_iv (data, PHI_RESULT (phi));
1051       if (!iv)
1052         continue;
1053
1054       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1055       incr_iv = get_iv (data, var);
1056       if (!incr_iv)
1057         continue;
1058
1059       /* If the increment is in the subloop, ignore it.  */
1060       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1061       if (incr_bb->loop_father != data->current_loop
1062           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1063         continue;
1064
1065       iv->biv_p = true;
1066       incr_iv->biv_p = true;
1067     }
1068 }
1069
1070 /* Checks whether STMT defines a linear induction variable and stores its
1071    parameters to IV.  */
1072
1073 static bool
1074 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1075 {
1076   tree lhs;
1077   struct loop *loop = data->current_loop;
1078
1079   iv->base = NULL_TREE;
1080   iv->step = NULL_TREE;
1081
1082   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1083     return false;
1084
1085   lhs = gimple_assign_lhs (stmt);
1086   if (TREE_CODE (lhs) != SSA_NAME)
1087     return false;
1088
1089   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1090     return false;
1091   iv->base = expand_simple_operations (iv->base);
1092
1093   if (contains_abnormal_ssa_name_p (iv->base)
1094       || contains_abnormal_ssa_name_p (iv->step))
1095     return false;
1096
1097   /* If STMT could throw, then do not consider STMT as defining a GIV.
1098      While this will suppress optimizations, we can not safely delete this
1099      GIV and associated statements, even if it appears it is not used.  */
1100   if (stmt_could_throw_p (stmt))
1101     return false;
1102
1103   return true;
1104 }
1105
1106 /* Finds general ivs in statement STMT.  */
1107
1108 static void
1109 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1110 {
1111   affine_iv iv;
1112
1113   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1114     return;
1115
1116   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1117 }
1118
1119 /* Finds general ivs in basic block BB.  */
1120
1121 static void
1122 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1123 {
1124   gimple_stmt_iterator bsi;
1125
1126   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1127     find_givs_in_stmt (data, gsi_stmt (bsi));
1128 }
1129
1130 /* Finds general ivs.  */
1131
1132 static void
1133 find_givs (struct ivopts_data *data)
1134 {
1135   struct loop *loop = data->current_loop;
1136   basic_block *body = get_loop_body_in_dom_order (loop);
1137   unsigned i;
1138
1139   for (i = 0; i < loop->num_nodes; i++)
1140     find_givs_in_bb (data, body[i]);
1141   free (body);
1142 }
1143
1144 /* For each ssa name defined in LOOP determines whether it is an induction
1145    variable and if so, its initial value and step.  */
1146
1147 static bool
1148 find_induction_variables (struct ivopts_data *data)
1149 {
1150   unsigned i;
1151   bitmap_iterator bi;
1152
1153   if (!find_bivs (data))
1154     return false;
1155
1156   find_givs (data);
1157   mark_bivs (data);
1158
1159   if (dump_file && (dump_flags & TDF_DETAILS))
1160     {
1161       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1162
1163       if (niter)
1164         {
1165           fprintf (dump_file, "  number of iterations ");
1166           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1167           if (!integer_zerop (niter->may_be_zero))
1168             {
1169               fprintf (dump_file, "; zero if ");
1170               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1171             }
1172           fprintf (dump_file, "\n\n");
1173         };
1174
1175       fprintf (dump_file, "Induction variables:\n\n");
1176
1177       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1178         {
1179           if (ver_info (data, i)->iv)
1180             dump_iv (dump_file, ver_info (data, i)->iv);
1181         }
1182     }
1183
1184   return true;
1185 }
1186
1187 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1188
1189 static struct iv_use *
1190 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1191             gimple stmt, enum use_type use_type)
1192 {
1193   struct iv_use *use = XCNEW (struct iv_use);
1194
1195   use->id = n_iv_uses (data);
1196   use->type = use_type;
1197   use->iv = iv;
1198   use->stmt = stmt;
1199   use->op_p = use_p;
1200   use->related_cands = BITMAP_ALLOC (NULL);
1201
1202   /* To avoid showing ssa name in the dumps, if it was not reset by the
1203      caller.  */
1204   iv->ssa_name = NULL_TREE;
1205
1206   if (dump_file && (dump_flags & TDF_DETAILS))
1207     dump_use (dump_file, use);
1208
1209   data->iv_uses.safe_push (use);
1210
1211   return use;
1212 }
1213
1214 /* Checks whether OP is a loop-level invariant and if so, records it.
1215    NONLINEAR_USE is true if the invariant is used in a way we do not
1216    handle specially.  */
1217
1218 static void
1219 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1220 {
1221   basic_block bb;
1222   struct version_info *info;
1223
1224   if (TREE_CODE (op) != SSA_NAME
1225       || virtual_operand_p (op))
1226     return;
1227
1228   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1229   if (bb
1230       && flow_bb_inside_loop_p (data->current_loop, bb))
1231     return;
1232
1233   info = name_info (data, op);
1234   info->name = op;
1235   info->has_nonlin_use |= nonlinear_use;
1236   if (!info->inv_id)
1237     info->inv_id = ++data->max_inv_id;
1238   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1239 }
1240
1241 /* Checks whether the use OP is interesting and if so, records it.  */
1242
1243 static struct iv_use *
1244 find_interesting_uses_op (struct ivopts_data *data, tree op)
1245 {
1246   struct iv *iv;
1247   struct iv *civ;
1248   gimple stmt;
1249   struct iv_use *use;
1250
1251   if (TREE_CODE (op) != SSA_NAME)
1252     return NULL;
1253
1254   iv = get_iv (data, op);
1255   if (!iv)
1256     return NULL;
1257
1258   if (iv->have_use_for)
1259     {
1260       use = iv_use (data, iv->use_id);
1261
1262       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1263       return use;
1264     }
1265
1266   if (integer_zerop (iv->step))
1267     {
1268       record_invariant (data, op, true);
1269       return NULL;
1270     }
1271   iv->have_use_for = true;
1272
1273   civ = XNEW (struct iv);
1274   *civ = *iv;
1275
1276   stmt = SSA_NAME_DEF_STMT (op);
1277   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1278               || is_gimple_assign (stmt));
1279
1280   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1281   iv->use_id = use->id;
1282
1283   return use;
1284 }
1285
1286 /* Given a condition in statement STMT, checks whether it is a compare
1287    of an induction variable and an invariant.  If this is the case,
1288    CONTROL_VAR is set to location of the iv, BOUND to the location of
1289    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1290    induction variable descriptions, and true is returned.  If this is not
1291    the case, CONTROL_VAR and BOUND are set to the arguments of the
1292    condition and false is returned.  */
1293
1294 static bool
1295 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1296                        tree **control_var, tree **bound,
1297                        struct iv **iv_var, struct iv **iv_bound)
1298 {
1299   /* The objects returned when COND has constant operands.  */
1300   static struct iv const_iv;
1301   static tree zero;
1302   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1303   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1304   bool ret = false;
1305
1306   if (gimple_code (stmt) == GIMPLE_COND)
1307     {
1308       op0 = gimple_cond_lhs_ptr (stmt);
1309       op1 = gimple_cond_rhs_ptr (stmt);
1310     }
1311   else
1312     {
1313       op0 = gimple_assign_rhs1_ptr (stmt);
1314       op1 = gimple_assign_rhs2_ptr (stmt);
1315     }
1316
1317   zero = integer_zero_node;
1318   const_iv.step = integer_zero_node;
1319
1320   if (TREE_CODE (*op0) == SSA_NAME)
1321     iv0 = get_iv (data, *op0);
1322   if (TREE_CODE (*op1) == SSA_NAME)
1323     iv1 = get_iv (data, *op1);
1324
1325   /* Exactly one of the compared values must be an iv, and the other one must
1326      be an invariant.  */
1327   if (!iv0 || !iv1)
1328     goto end;
1329
1330   if (integer_zerop (iv0->step))
1331     {
1332       /* Control variable may be on the other side.  */
1333       tmp_op = op0; op0 = op1; op1 = tmp_op;
1334       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1335     }
1336   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1337
1338 end:
1339   if (control_var)
1340     *control_var = op0;;
1341   if (iv_var)
1342     *iv_var = iv0;;
1343   if (bound)
1344     *bound = op1;
1345   if (iv_bound)
1346     *iv_bound = iv1;
1347
1348   return ret;
1349 }
1350
1351 /* Checks whether the condition in STMT is interesting and if so,
1352    records it.  */
1353
1354 static void
1355 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1356 {
1357   tree *var_p, *bound_p;
1358   struct iv *var_iv, *civ;
1359
1360   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1361     {
1362       find_interesting_uses_op (data, *var_p);
1363       find_interesting_uses_op (data, *bound_p);
1364       return;
1365     }
1366
1367   civ = XNEW (struct iv);
1368   *civ = *var_iv;
1369   record_use (data, NULL, civ, stmt, USE_COMPARE);
1370 }
1371
1372 /* Returns the outermost loop EXPR is obviously invariant in
1373    relative to the loop LOOP, i.e. if all its operands are defined
1374    outside of the returned loop.  Returns NULL if EXPR is not
1375    even obviously invariant in LOOP.  */
1376
1377 struct loop *
1378 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1379 {
1380   basic_block def_bb;
1381   unsigned i, len;
1382
1383   if (is_gimple_min_invariant (expr))
1384     return current_loops->tree_root;
1385
1386   if (TREE_CODE (expr) == SSA_NAME)
1387     {
1388       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1389       if (def_bb)
1390         {
1391           if (flow_bb_inside_loop_p (loop, def_bb))
1392             return NULL;
1393           return superloop_at_depth (loop,
1394                                      loop_depth (def_bb->loop_father) + 1);
1395         }
1396
1397       return current_loops->tree_root;
1398     }
1399
1400   if (!EXPR_P (expr))
1401     return NULL;
1402
1403   unsigned maxdepth = 0;
1404   len = TREE_OPERAND_LENGTH (expr);
1405   for (i = 0; i < len; i++)
1406     {
1407       struct loop *ivloop;
1408       if (!TREE_OPERAND (expr, i))
1409         continue;
1410
1411       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1412       if (!ivloop)
1413         return NULL;
1414       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1415     }
1416
1417   return superloop_at_depth (loop, maxdepth);
1418 }
1419
1420 /* Returns true if expression EXPR is obviously invariant in LOOP,
1421    i.e. if all its operands are defined outside of the LOOP.  LOOP
1422    should not be the function body.  */
1423
1424 bool
1425 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1426 {
1427   basic_block def_bb;
1428   unsigned i, len;
1429
1430   gcc_assert (loop_depth (loop) > 0);
1431
1432   if (is_gimple_min_invariant (expr))
1433     return true;
1434
1435   if (TREE_CODE (expr) == SSA_NAME)
1436     {
1437       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1438       if (def_bb
1439           && flow_bb_inside_loop_p (loop, def_bb))
1440         return false;
1441
1442       return true;
1443     }
1444
1445   if (!EXPR_P (expr))
1446     return false;
1447
1448   len = TREE_OPERAND_LENGTH (expr);
1449   for (i = 0; i < len; i++)
1450     if (TREE_OPERAND (expr, i)
1451         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1452       return false;
1453
1454   return true;
1455 }
1456
1457 /* Returns true if statement STMT is obviously invariant in LOOP,
1458    i.e. if all its operands on the RHS are defined outside of the LOOP.
1459    LOOP should not be the function body.  */
1460
1461 bool
1462 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1463 {
1464   unsigned i;
1465   tree lhs;
1466
1467   gcc_assert (loop_depth (loop) > 0);
1468
1469   lhs = gimple_get_lhs (stmt);
1470   for (i = 0; i < gimple_num_ops (stmt); i++)
1471     {
1472       tree op = gimple_op (stmt, i);
1473       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1474         return false;
1475     }
1476
1477   return true;
1478 }
1479
1480 /* Cumulates the steps of indices into DATA and replaces their values with the
1481    initial ones.  Returns false when the value of the index cannot be determined.
1482    Callback for for_each_index.  */
1483
1484 struct ifs_ivopts_data
1485 {
1486   struct ivopts_data *ivopts_data;
1487   gimple stmt;
1488   tree step;
1489 };
1490
1491 static bool
1492 idx_find_step (tree base, tree *idx, void *data)
1493 {
1494   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1495   struct iv *iv;
1496   tree step, iv_base, iv_step, lbound, off;
1497   struct loop *loop = dta->ivopts_data->current_loop;
1498
1499   /* If base is a component ref, require that the offset of the reference
1500      be invariant.  */
1501   if (TREE_CODE (base) == COMPONENT_REF)
1502     {
1503       off = component_ref_field_offset (base);
1504       return expr_invariant_in_loop_p (loop, off);
1505     }
1506
1507   /* If base is array, first check whether we will be able to move the
1508      reference out of the loop (in order to take its address in strength
1509      reduction).  In order for this to work we need both lower bound
1510      and step to be loop invariants.  */
1511   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1512     {
1513       /* Moreover, for a range, the size needs to be invariant as well.  */
1514       if (TREE_CODE (base) == ARRAY_RANGE_REF
1515           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1516         return false;
1517
1518       step = array_ref_element_size (base);
1519       lbound = array_ref_low_bound (base);
1520
1521       if (!expr_invariant_in_loop_p (loop, step)
1522           || !expr_invariant_in_loop_p (loop, lbound))
1523         return false;
1524     }
1525
1526   if (TREE_CODE (*idx) != SSA_NAME)
1527     return true;
1528
1529   iv = get_iv (dta->ivopts_data, *idx);
1530   if (!iv)
1531     return false;
1532
1533   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1534           *&x[0], which is not folded and does not trigger the
1535           ARRAY_REF path below.  */
1536   *idx = iv->base;
1537
1538   if (integer_zerop (iv->step))
1539     return true;
1540
1541   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1542     {
1543       step = array_ref_element_size (base);
1544
1545       /* We only handle addresses whose step is an integer constant.  */
1546       if (TREE_CODE (step) != INTEGER_CST)
1547         return false;
1548     }
1549   else
1550     /* The step for pointer arithmetics already is 1 byte.  */
1551     step = size_one_node;
1552
1553   iv_base = iv->base;
1554   iv_step = iv->step;
1555   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1556                             sizetype, &iv_base, &iv_step, dta->stmt,
1557                             false))
1558     {
1559       /* The index might wrap.  */
1560       return false;
1561     }
1562
1563   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1564   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1565
1566   return true;
1567 }
1568
1569 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1570    object is passed to it in DATA.  */
1571
1572 static bool
1573 idx_record_use (tree base, tree *idx,
1574                 void *vdata)
1575 {
1576   struct ivopts_data *data = (struct ivopts_data *) vdata;
1577   find_interesting_uses_op (data, *idx);
1578   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1579     {
1580       find_interesting_uses_op (data, array_ref_element_size (base));
1581       find_interesting_uses_op (data, array_ref_low_bound (base));
1582     }
1583   return true;
1584 }
1585
1586 /* If we can prove that TOP = cst * BOT for some constant cst,
1587    store cst to MUL and return true.  Otherwise return false.
1588    The returned value is always sign-extended, regardless of the
1589    signedness of TOP and BOT.  */
1590
1591 static bool
1592 constant_multiple_of (tree top, tree bot, double_int *mul)
1593 {
1594   tree mby;
1595   enum tree_code code;
1596   double_int res, p0, p1;
1597   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1598
1599   STRIP_NOPS (top);
1600   STRIP_NOPS (bot);
1601
1602   if (operand_equal_p (top, bot, 0))
1603     {
1604       *mul = double_int_one;
1605       return true;
1606     }
1607
1608   code = TREE_CODE (top);
1609   switch (code)
1610     {
1611     case MULT_EXPR:
1612       mby = TREE_OPERAND (top, 1);
1613       if (TREE_CODE (mby) != INTEGER_CST)
1614         return false;
1615
1616       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1617         return false;
1618
1619       *mul = (res * tree_to_double_int (mby)).sext (precision);
1620       return true;
1621
1622     case PLUS_EXPR:
1623     case MINUS_EXPR:
1624       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1625           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1626         return false;
1627
1628       if (code == MINUS_EXPR)
1629         p1 = -p1;
1630       *mul = (p0 + p1).sext (precision);
1631       return true;
1632
1633     case INTEGER_CST:
1634       if (TREE_CODE (bot) != INTEGER_CST)
1635         return false;
1636
1637       p0 = tree_to_double_int (top).sext (precision);
1638       p1 = tree_to_double_int (bot).sext (precision);
1639       if (p1.is_zero ())
1640         return false;
1641       *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1642       return res.is_zero ();
1643
1644     default:
1645       return false;
1646     }
1647 }
1648
1649 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1650
1651 static bool
1652 may_be_unaligned_p (tree ref, tree step)
1653 {
1654   tree base;
1655   tree base_type;
1656   HOST_WIDE_INT bitsize;
1657   HOST_WIDE_INT bitpos;
1658   tree toffset;
1659   enum machine_mode mode;
1660   int unsignedp, volatilep;
1661   unsigned base_align;
1662
1663   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1664      thus they are not misaligned.  */
1665   if (TREE_CODE (ref) == TARGET_MEM_REF)
1666     return false;
1667
1668   /* The test below is basically copy of what expr.c:normal_inner_ref
1669      does to check whether the object must be loaded by parts when
1670      STRICT_ALIGNMENT is true.  */
1671   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1672                               &unsignedp, &volatilep, true);
1673   base_type = TREE_TYPE (base);
1674   base_align = get_object_alignment (base);
1675   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1676
1677   if (mode != BLKmode)
1678     {
1679       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1680
1681       if (base_align < mode_align
1682           || (bitpos % mode_align) != 0
1683           || (bitpos % BITS_PER_UNIT) != 0)
1684         return true;
1685
1686       if (toffset
1687           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1688         return true;
1689
1690       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1691         return true;
1692     }
1693
1694   return false;
1695 }
1696
1697 /* Return true if EXPR may be non-addressable.   */
1698
1699 bool
1700 may_be_nonaddressable_p (tree expr)
1701 {
1702   switch (TREE_CODE (expr))
1703     {
1704     case TARGET_MEM_REF:
1705       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1706          target, thus they are always addressable.  */
1707       return false;
1708
1709     case COMPONENT_REF:
1710       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1711              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1712
1713     case VIEW_CONVERT_EXPR:
1714       /* This kind of view-conversions may wrap non-addressable objects
1715          and make them look addressable.  After some processing the
1716          non-addressability may be uncovered again, causing ADDR_EXPRs
1717          of inappropriate objects to be built.  */
1718       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1719           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1720         return true;
1721
1722       /* ... fall through ... */
1723
1724     case ARRAY_REF:
1725     case ARRAY_RANGE_REF:
1726       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1727
1728     CASE_CONVERT:
1729       return true;
1730
1731     default:
1732       break;
1733     }
1734
1735   return false;
1736 }
1737
1738 /* Finds addresses in *OP_P inside STMT.  */
1739
1740 static void
1741 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1742 {
1743   tree base = *op_p, step = size_zero_node;
1744   struct iv *civ;
1745   struct ifs_ivopts_data ifs_ivopts_data;
1746
1747   /* Do not play with volatile memory references.  A bit too conservative,
1748      perhaps, but safe.  */
1749   if (gimple_has_volatile_ops (stmt))
1750     goto fail;
1751
1752   /* Ignore bitfields for now.  Not really something terribly complicated
1753      to handle.  TODO.  */
1754   if (TREE_CODE (base) == BIT_FIELD_REF)
1755     goto fail;
1756
1757   base = unshare_expr (base);
1758
1759   if (TREE_CODE (base) == TARGET_MEM_REF)
1760     {
1761       tree type = build_pointer_type (TREE_TYPE (base));
1762       tree astep;
1763
1764       if (TMR_BASE (base)
1765           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1766         {
1767           civ = get_iv (data, TMR_BASE (base));
1768           if (!civ)
1769             goto fail;
1770
1771           TMR_BASE (base) = civ->base;
1772           step = civ->step;
1773         }
1774       if (TMR_INDEX2 (base)
1775           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1776         {
1777           civ = get_iv (data, TMR_INDEX2 (base));
1778           if (!civ)
1779             goto fail;
1780
1781           TMR_INDEX2 (base) = civ->base;
1782           step = civ->step;
1783         }
1784       if (TMR_INDEX (base)
1785           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1786         {
1787           civ = get_iv (data, TMR_INDEX (base));
1788           if (!civ)
1789             goto fail;
1790
1791           TMR_INDEX (base) = civ->base;
1792           astep = civ->step;
1793
1794           if (astep)
1795             {
1796               if (TMR_STEP (base))
1797                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1798
1799               step = fold_build2 (PLUS_EXPR, type, step, astep);
1800             }
1801         }
1802
1803       if (integer_zerop (step))
1804         goto fail;
1805       base = tree_mem_ref_addr (type, base);
1806     }
1807   else
1808     {
1809       ifs_ivopts_data.ivopts_data = data;
1810       ifs_ivopts_data.stmt = stmt;
1811       ifs_ivopts_data.step = size_zero_node;
1812       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1813           || integer_zerop (ifs_ivopts_data.step))
1814         goto fail;
1815       step = ifs_ivopts_data.step;
1816
1817       /* Check that the base expression is addressable.  This needs
1818          to be done after substituting bases of IVs into it.  */
1819       if (may_be_nonaddressable_p (base))
1820         goto fail;
1821
1822       /* Moreover, on strict alignment platforms, check that it is
1823          sufficiently aligned.  */
1824       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1825         goto fail;
1826
1827       base = build_fold_addr_expr (base);
1828
1829       /* Substituting bases of IVs into the base expression might
1830          have caused folding opportunities.  */
1831       if (TREE_CODE (base) == ADDR_EXPR)
1832         {
1833           tree *ref = &TREE_OPERAND (base, 0);
1834           while (handled_component_p (*ref))
1835             ref = &TREE_OPERAND (*ref, 0);
1836           if (TREE_CODE (*ref) == MEM_REF)
1837             {
1838               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1839                                       TREE_OPERAND (*ref, 0),
1840                                       TREE_OPERAND (*ref, 1));
1841               if (tem)
1842                 *ref = tem;
1843             }
1844         }
1845     }
1846
1847   civ = alloc_iv (base, step);
1848   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1849   return;
1850
1851 fail:
1852   for_each_index (op_p, idx_record_use, data);
1853 }
1854
1855 /* Finds and records invariants used in STMT.  */
1856
1857 static void
1858 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1859 {
1860   ssa_op_iter iter;
1861   use_operand_p use_p;
1862   tree op;
1863
1864   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1865     {
1866       op = USE_FROM_PTR (use_p);
1867       record_invariant (data, op, false);
1868     }
1869 }
1870
1871 /* Finds interesting uses of induction variables in the statement STMT.  */
1872
1873 static void
1874 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1875 {
1876   struct iv *iv;
1877   tree op, *lhs, *rhs;
1878   ssa_op_iter iter;
1879   use_operand_p use_p;
1880   enum tree_code code;
1881
1882   find_invariants_stmt (data, stmt);
1883
1884   if (gimple_code (stmt) == GIMPLE_COND)
1885     {
1886       find_interesting_uses_cond (data, stmt);
1887       return;
1888     }
1889
1890   if (is_gimple_assign (stmt))
1891     {
1892       lhs = gimple_assign_lhs_ptr (stmt);
1893       rhs = gimple_assign_rhs1_ptr (stmt);
1894
1895       if (TREE_CODE (*lhs) == SSA_NAME)
1896         {
1897           /* If the statement defines an induction variable, the uses are not
1898              interesting by themselves.  */
1899
1900           iv = get_iv (data, *lhs);
1901
1902           if (iv && !integer_zerop (iv->step))
1903             return;
1904         }
1905
1906       code = gimple_assign_rhs_code (stmt);
1907       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1908           && (REFERENCE_CLASS_P (*rhs)
1909               || is_gimple_val (*rhs)))
1910         {
1911           if (REFERENCE_CLASS_P (*rhs))
1912             find_interesting_uses_address (data, stmt, rhs);
1913           else
1914             find_interesting_uses_op (data, *rhs);
1915
1916           if (REFERENCE_CLASS_P (*lhs))
1917             find_interesting_uses_address (data, stmt, lhs);
1918           return;
1919         }
1920       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1921         {
1922           find_interesting_uses_cond (data, stmt);
1923           return;
1924         }
1925
1926       /* TODO -- we should also handle address uses of type
1927
1928          memory = call (whatever);
1929
1930          and
1931
1932          call (memory).  */
1933     }
1934
1935   if (gimple_code (stmt) == GIMPLE_PHI
1936       && gimple_bb (stmt) == data->current_loop->header)
1937     {
1938       iv = get_iv (data, PHI_RESULT (stmt));
1939
1940       if (iv && !integer_zerop (iv->step))
1941         return;
1942     }
1943
1944   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1945     {
1946       op = USE_FROM_PTR (use_p);
1947
1948       if (TREE_CODE (op) != SSA_NAME)
1949         continue;
1950
1951       iv = get_iv (data, op);
1952       if (!iv)
1953         continue;
1954
1955       find_interesting_uses_op (data, op);
1956     }
1957 }
1958
1959 /* Finds interesting uses of induction variables outside of loops
1960    on loop exit edge EXIT.  */
1961
1962 static void
1963 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1964 {
1965   gimple phi;
1966   gimple_stmt_iterator psi;
1967   tree def;
1968
1969   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1970     {
1971       phi = gsi_stmt (psi);
1972       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1973       if (!virtual_operand_p (def))
1974         find_interesting_uses_op (data, def);
1975     }
1976 }
1977
1978 /* Finds uses of the induction variables that are interesting.  */
1979
1980 static void
1981 find_interesting_uses (struct ivopts_data *data)
1982 {
1983   basic_block bb;
1984   gimple_stmt_iterator bsi;
1985   basic_block *body = get_loop_body (data->current_loop);
1986   unsigned i;
1987   struct version_info *info;
1988   edge e;
1989
1990   if (dump_file && (dump_flags & TDF_DETAILS))
1991     fprintf (dump_file, "Uses:\n\n");
1992
1993   for (i = 0; i < data->current_loop->num_nodes; i++)
1994     {
1995       edge_iterator ei;
1996       bb = body[i];
1997
1998       FOR_EACH_EDGE (e, ei, bb->succs)
1999         if (e->dest != EXIT_BLOCK_PTR
2000             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2001           find_interesting_uses_outside (data, e);
2002
2003       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2004         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2005       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2006         if (!is_gimple_debug (gsi_stmt (bsi)))
2007           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2008     }
2009
2010   if (dump_file && (dump_flags & TDF_DETAILS))
2011     {
2012       bitmap_iterator bi;
2013
2014       fprintf (dump_file, "\n");
2015
2016       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2017         {
2018           info = ver_info (data, i);
2019           if (info->inv_id)
2020             {
2021               fprintf (dump_file, "  ");
2022               print_generic_expr (dump_file, info->name, TDF_SLIM);
2023               fprintf (dump_file, " is invariant (%d)%s\n",
2024                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2025             }
2026         }
2027
2028       fprintf (dump_file, "\n");
2029     }
2030
2031   free (body);
2032 }
2033
2034 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2035    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2036    we are at the top-level of the processed address.  */
2037
2038 static tree
2039 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2040                 unsigned HOST_WIDE_INT *offset)
2041 {
2042   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2043   enum tree_code code;
2044   tree type, orig_type = TREE_TYPE (expr);
2045   unsigned HOST_WIDE_INT off0, off1, st;
2046   tree orig_expr = expr;
2047
2048   STRIP_NOPS (expr);
2049
2050   type = TREE_TYPE (expr);
2051   code = TREE_CODE (expr);
2052   *offset = 0;
2053
2054   switch (code)
2055     {
2056     case INTEGER_CST:
2057       if (!cst_and_fits_in_hwi (expr)
2058           || integer_zerop (expr))
2059         return orig_expr;
2060
2061       *offset = int_cst_value (expr);
2062       return build_int_cst (orig_type, 0);
2063
2064     case POINTER_PLUS_EXPR:
2065     case PLUS_EXPR:
2066     case MINUS_EXPR:
2067       op0 = TREE_OPERAND (expr, 0);
2068       op1 = TREE_OPERAND (expr, 1);
2069
2070       op0 = strip_offset_1 (op0, false, false, &off0);
2071       op1 = strip_offset_1 (op1, false, false, &off1);
2072
2073       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2074       if (op0 == TREE_OPERAND (expr, 0)
2075           && op1 == TREE_OPERAND (expr, 1))
2076         return orig_expr;
2077
2078       if (integer_zerop (op1))
2079         expr = op0;
2080       else if (integer_zerop (op0))
2081         {
2082           if (code == MINUS_EXPR)
2083             expr = fold_build1 (NEGATE_EXPR, type, op1);
2084           else
2085             expr = op1;
2086         }
2087       else
2088         expr = fold_build2 (code, type, op0, op1);
2089
2090       return fold_convert (orig_type, expr);
2091
2092     case MULT_EXPR:
2093       op1 = TREE_OPERAND (expr, 1);
2094       if (!cst_and_fits_in_hwi (op1))
2095         return orig_expr;
2096
2097       op0 = TREE_OPERAND (expr, 0);
2098       op0 = strip_offset_1 (op0, false, false, &off0);
2099       if (op0 == TREE_OPERAND (expr, 0))
2100         return orig_expr;
2101
2102       *offset = off0 * int_cst_value (op1);
2103       if (integer_zerop (op0))
2104         expr = op0;
2105       else
2106         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2107
2108       return fold_convert (orig_type, expr);
2109
2110     case ARRAY_REF:
2111     case ARRAY_RANGE_REF:
2112       if (!inside_addr)
2113         return orig_expr;
2114
2115       step = array_ref_element_size (expr);
2116       if (!cst_and_fits_in_hwi (step))
2117         break;
2118
2119       st = int_cst_value (step);
2120       op1 = TREE_OPERAND (expr, 1);
2121       op1 = strip_offset_1 (op1, false, false, &off1);
2122       *offset = off1 * st;
2123
2124       if (top_compref
2125           && integer_zerop (op1))
2126         {
2127           /* Strip the component reference completely.  */
2128           op0 = TREE_OPERAND (expr, 0);
2129           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2130           *offset += off0;
2131           return op0;
2132         }
2133       break;
2134
2135     case COMPONENT_REF:
2136       if (!inside_addr)
2137         return orig_expr;
2138
2139       tmp = component_ref_field_offset (expr);
2140       if (top_compref
2141           && cst_and_fits_in_hwi (tmp))
2142         {
2143           /* Strip the component reference completely.  */
2144           op0 = TREE_OPERAND (expr, 0);
2145           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2146           *offset = off0 + int_cst_value (tmp);
2147           return op0;
2148         }
2149       break;
2150
2151     case ADDR_EXPR:
2152       op0 = TREE_OPERAND (expr, 0);
2153       op0 = strip_offset_1 (op0, true, true, &off0);
2154       *offset += off0;
2155
2156       if (op0 == TREE_OPERAND (expr, 0))
2157         return orig_expr;
2158
2159       expr = build_fold_addr_expr (op0);
2160       return fold_convert (orig_type, expr);
2161
2162     case MEM_REF:
2163       /* ???  Offset operand?  */
2164       inside_addr = false;
2165       break;
2166
2167     default:
2168       return orig_expr;
2169     }
2170
2171   /* Default handling of expressions for that we want to recurse into
2172      the first operand.  */
2173   op0 = TREE_OPERAND (expr, 0);
2174   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2175   *offset += off0;
2176
2177   if (op0 == TREE_OPERAND (expr, 0)
2178       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2179     return orig_expr;
2180
2181   expr = copy_node (expr);
2182   TREE_OPERAND (expr, 0) = op0;
2183   if (op1)
2184     TREE_OPERAND (expr, 1) = op1;
2185
2186   /* Inside address, we might strip the top level component references,
2187      thus changing type of the expression.  Handling of ADDR_EXPR
2188      will fix that.  */
2189   expr = fold_convert (orig_type, expr);
2190
2191   return expr;
2192 }
2193
2194 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2195
2196 static tree
2197 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2198 {
2199   return strip_offset_1 (expr, false, false, offset);
2200 }
2201
2202 /* Returns variant of TYPE that can be used as base for different uses.
2203    We return unsigned type with the same precision, which avoids problems
2204    with overflows.  */
2205
2206 static tree
2207 generic_type_for (tree type)
2208 {
2209   if (POINTER_TYPE_P (type))
2210     return unsigned_type_for (type);
2211
2212   if (TYPE_UNSIGNED (type))
2213     return type;
2214
2215   return unsigned_type_for (type);
2216 }
2217
2218 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2219    the bitmap to that we should store it.  */
2220
2221 static struct ivopts_data *fd_ivopts_data;
2222 static tree
2223 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2224 {
2225   bitmap *depends_on = (bitmap *) data;
2226   struct version_info *info;
2227
2228   if (TREE_CODE (*expr_p) != SSA_NAME)
2229     return NULL_TREE;
2230   info = name_info (fd_ivopts_data, *expr_p);
2231
2232   if (!info->inv_id || info->has_nonlin_use)
2233     return NULL_TREE;
2234
2235   if (!*depends_on)
2236     *depends_on = BITMAP_ALLOC (NULL);
2237   bitmap_set_bit (*depends_on, info->inv_id);
2238
2239   return NULL_TREE;
2240 }
2241
2242 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2243    position to POS.  If USE is not NULL, the candidate is set as related to
2244    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2245    replacement of the final value of the iv by a direct computation.  */
2246
2247 static struct iv_cand *
2248 add_candidate_1 (struct ivopts_data *data,
2249                  tree base, tree step, bool important, enum iv_position pos,
2250                  struct iv_use *use, gimple incremented_at)
2251 {
2252   unsigned i;
2253   struct iv_cand *cand = NULL;
2254   tree type, orig_type;
2255
2256   /* For non-original variables, make sure their values are computed in a type
2257      that does not invoke undefined behavior on overflows (since in general,
2258      we cannot prove that these induction variables are non-wrapping).  */
2259   if (pos != IP_ORIGINAL)
2260     {
2261       orig_type = TREE_TYPE (base);
2262       type = generic_type_for (orig_type);
2263       if (type != orig_type)
2264         {
2265           base = fold_convert (type, base);
2266           step = fold_convert (type, step);
2267         }
2268     }
2269
2270   for (i = 0; i < n_iv_cands (data); i++)
2271     {
2272       cand = iv_cand (data, i);
2273
2274       if (cand->pos != pos)
2275         continue;
2276
2277       if (cand->incremented_at != incremented_at
2278           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2279               && cand->ainc_use != use))
2280         continue;
2281
2282       if (!cand->iv)
2283         {
2284           if (!base && !step)
2285             break;
2286
2287           continue;
2288         }
2289
2290       if (!base && !step)
2291         continue;
2292
2293       if (operand_equal_p (base, cand->iv->base, 0)
2294           && operand_equal_p (step, cand->iv->step, 0)
2295           && (TYPE_PRECISION (TREE_TYPE (base))
2296               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2297         break;
2298     }
2299
2300   if (i == n_iv_cands (data))
2301     {
2302       cand = XCNEW (struct iv_cand);
2303       cand->id = i;
2304
2305       if (!base && !step)
2306         cand->iv = NULL;
2307       else
2308         cand->iv = alloc_iv (base, step);
2309
2310       cand->pos = pos;
2311       if (pos != IP_ORIGINAL && cand->iv)
2312         {
2313           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2314           cand->var_after = cand->var_before;
2315         }
2316       cand->important = important;
2317       cand->incremented_at = incremented_at;
2318       data->iv_candidates.safe_push (cand);
2319
2320       if (step
2321           && TREE_CODE (step) != INTEGER_CST)
2322         {
2323           fd_ivopts_data = data;
2324           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2325         }
2326
2327       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2328         cand->ainc_use = use;
2329       else
2330         cand->ainc_use = NULL;
2331
2332       if (dump_file && (dump_flags & TDF_DETAILS))
2333         dump_cand (dump_file, cand);
2334     }
2335
2336   if (important && !cand->important)
2337     {
2338       cand->important = true;
2339       if (dump_file && (dump_flags & TDF_DETAILS))
2340         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2341     }
2342
2343   if (use)
2344     {
2345       bitmap_set_bit (use->related_cands, i);
2346       if (dump_file && (dump_flags & TDF_DETAILS))
2347         fprintf (dump_file, "Candidate %d is related to use %d\n",
2348                  cand->id, use->id);
2349     }
2350
2351   return cand;
2352 }
2353
2354 /* Returns true if incrementing the induction variable at the end of the LOOP
2355    is allowed.
2356
2357    The purpose is to avoid splitting latch edge with a biv increment, thus
2358    creating a jump, possibly confusing other optimization passes and leaving
2359    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2360    is not available (so we do not have a better alternative), or if the latch
2361    edge is already nonempty.  */
2362
2363 static bool
2364 allow_ip_end_pos_p (struct loop *loop)
2365 {
2366   if (!ip_normal_pos (loop))
2367     return true;
2368
2369   if (!empty_block_p (ip_end_pos (loop)))
2370     return true;
2371
2372   return false;
2373 }
2374
2375 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2376    Important field is set to IMPORTANT.  */
2377
2378 static void
2379 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2380                         bool important, struct iv_use *use)
2381 {
2382   basic_block use_bb = gimple_bb (use->stmt);
2383   enum machine_mode mem_mode;
2384   unsigned HOST_WIDE_INT cstepi;
2385
2386   /* If we insert the increment in any position other than the standard
2387      ones, we must ensure that it is incremented once per iteration.
2388      It must not be in an inner nested loop, or one side of an if
2389      statement.  */
2390   if (use_bb->loop_father != data->current_loop
2391       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2392       || stmt_could_throw_p (use->stmt)
2393       || !cst_and_fits_in_hwi (step))
2394     return;
2395
2396   cstepi = int_cst_value (step);
2397
2398   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2399   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2400         || USE_STORE_PRE_INCREMENT (mem_mode))
2401        && GET_MODE_SIZE (mem_mode) == cstepi)
2402       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2403            || USE_STORE_PRE_DECREMENT (mem_mode))
2404           && GET_MODE_SIZE (mem_mode) == -cstepi))
2405     {
2406       enum tree_code code = MINUS_EXPR;
2407       tree new_base;
2408       tree new_step = step;
2409
2410       if (POINTER_TYPE_P (TREE_TYPE (base)))
2411         {
2412           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2413           code = POINTER_PLUS_EXPR;
2414         }
2415       else
2416         new_step = fold_convert (TREE_TYPE (base), new_step);
2417       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2418       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2419                        use->stmt);
2420     }
2421   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2422         || USE_STORE_POST_INCREMENT (mem_mode))
2423        && GET_MODE_SIZE (mem_mode) == cstepi)
2424       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2425            || USE_STORE_POST_DECREMENT (mem_mode))
2426           && GET_MODE_SIZE (mem_mode) == -cstepi))
2427     {
2428       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2429                        use->stmt);
2430     }
2431 }
2432
2433 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2434    position to POS.  If USE is not NULL, the candidate is set as related to
2435    it.  The candidate computation is scheduled on all available positions.  */
2436
2437 static void
2438 add_candidate (struct ivopts_data *data,
2439                tree base, tree step, bool important, struct iv_use *use)
2440 {
2441   if (ip_normal_pos (data->current_loop))
2442     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2443   if (ip_end_pos (data->current_loop)
2444       && allow_ip_end_pos_p (data->current_loop))
2445     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2446
2447   if (use != NULL && use->type == USE_ADDRESS)
2448     add_autoinc_candidates (data, base, step, important, use);
2449 }
2450
2451 /* Adds standard iv candidates.  */
2452
2453 static void
2454 add_standard_iv_candidates (struct ivopts_data *data)
2455 {
2456   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2457
2458   /* The same for a double-integer type if it is still fast enough.  */
2459   if (TYPE_PRECISION
2460         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2461       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2462     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2463                    build_int_cst (long_integer_type_node, 1), true, NULL);
2464
2465   /* The same for a double-integer type if it is still fast enough.  */
2466   if (TYPE_PRECISION
2467         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2468       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2469     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2470                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2471 }
2472
2473
2474 /* Adds candidates bases on the old induction variable IV.  */
2475
2476 static void
2477 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2478 {
2479   gimple phi;
2480   tree def;
2481   struct iv_cand *cand;
2482
2483   add_candidate (data, iv->base, iv->step, true, NULL);
2484
2485   /* The same, but with initial value zero.  */
2486   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2487     add_candidate (data, size_int (0), iv->step, true, NULL);
2488   else
2489     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2490                    iv->step, true, NULL);
2491
2492   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2493   if (gimple_code (phi) == GIMPLE_PHI)
2494     {
2495       /* Additionally record the possibility of leaving the original iv
2496          untouched.  */
2497       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2498       cand = add_candidate_1 (data,
2499                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2500                               SSA_NAME_DEF_STMT (def));
2501       cand->var_before = iv->ssa_name;
2502       cand->var_after = def;
2503     }
2504 }
2505
2506 /* Adds candidates based on the old induction variables.  */
2507
2508 static void
2509 add_old_ivs_candidates (struct ivopts_data *data)
2510 {
2511   unsigned i;
2512   struct iv *iv;
2513   bitmap_iterator bi;
2514
2515   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2516     {
2517       iv = ver_info (data, i)->iv;
2518       if (iv && iv->biv_p && !integer_zerop (iv->step))
2519         add_old_iv_candidates (data, iv);
2520     }
2521 }
2522
2523 /* Adds candidates based on the value of the induction variable IV and USE.  */
2524
2525 static void
2526 add_iv_value_candidates (struct ivopts_data *data,
2527                          struct iv *iv, struct iv_use *use)
2528 {
2529   unsigned HOST_WIDE_INT offset;
2530   tree base;
2531   tree basetype;
2532
2533   add_candidate (data, iv->base, iv->step, false, use);
2534
2535   /* The same, but with initial value zero.  Make such variable important,
2536      since it is generic enough so that possibly many uses may be based
2537      on it.  */
2538   basetype = TREE_TYPE (iv->base);
2539   if (POINTER_TYPE_P (basetype))
2540     basetype = sizetype;
2541   add_candidate (data, build_int_cst (basetype, 0),
2542                  iv->step, true, use);
2543
2544   /* Third, try removing the constant offset.  Make sure to even
2545      add a candidate for &a[0] vs. (T *)&a.  */
2546   base = strip_offset (iv->base, &offset);
2547   if (offset
2548       || base != iv->base)
2549     add_candidate (data, base, iv->step, false, use);
2550 }
2551
2552 /* Adds candidates based on the uses.  */
2553
2554 static void
2555 add_derived_ivs_candidates (struct ivopts_data *data)
2556 {
2557   unsigned i;
2558
2559   for (i = 0; i < n_iv_uses (data); i++)
2560     {
2561       struct iv_use *use = iv_use (data, i);
2562
2563       if (!use)
2564         continue;
2565
2566       switch (use->type)
2567         {
2568         case USE_NONLINEAR_EXPR:
2569         case USE_COMPARE:
2570         case USE_ADDRESS:
2571           /* Just add the ivs based on the value of the iv used here.  */
2572           add_iv_value_candidates (data, use->iv, use);
2573           break;
2574
2575         default:
2576           gcc_unreachable ();
2577         }
2578     }
2579 }
2580
2581 /* Record important candidates and add them to related_cands bitmaps
2582    if needed.  */
2583
2584 static void
2585 record_important_candidates (struct ivopts_data *data)
2586 {
2587   unsigned i;
2588   struct iv_use *use;
2589
2590   for (i = 0; i < n_iv_cands (data); i++)
2591     {
2592       struct iv_cand *cand = iv_cand (data, i);
2593
2594       if (cand->important)
2595         bitmap_set_bit (data->important_candidates, i);
2596     }
2597
2598   data->consider_all_candidates = (n_iv_cands (data)
2599                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2600
2601   if (data->consider_all_candidates)
2602     {
2603       /* We will not need "related_cands" bitmaps in this case,
2604          so release them to decrease peak memory consumption.  */
2605       for (i = 0; i < n_iv_uses (data); i++)
2606         {
2607           use = iv_use (data, i);
2608           BITMAP_FREE (use->related_cands);
2609         }
2610     }
2611   else
2612     {
2613       /* Add important candidates to the related_cands bitmaps.  */
2614       for (i = 0; i < n_iv_uses (data); i++)
2615         bitmap_ior_into (iv_use (data, i)->related_cands,
2616                          data->important_candidates);
2617     }
2618 }
2619
2620 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2621    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2622    we allocate a simple list to every use.  */
2623
2624 static void
2625 alloc_use_cost_map (struct ivopts_data *data)
2626 {
2627   unsigned i, size, s;
2628
2629   for (i = 0; i < n_iv_uses (data); i++)
2630     {
2631       struct iv_use *use = iv_use (data, i);
2632
2633       if (data->consider_all_candidates)
2634         size = n_iv_cands (data);
2635       else
2636         {
2637           s = bitmap_count_bits (use->related_cands);
2638
2639           /* Round up to the power of two, so that moduling by it is fast.  */
2640           size = s ? (1 << ceil_log2 (s)) : 1;
2641         }
2642
2643       use->n_map_members = size;
2644       use->cost_map = XCNEWVEC (struct cost_pair, size);
2645     }
2646 }
2647
2648 /* Returns description of computation cost of expression whose runtime
2649    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2650
2651 static comp_cost
2652 new_cost (unsigned runtime, unsigned complexity)
2653 {
2654   comp_cost cost;
2655
2656   cost.cost = runtime;
2657   cost.complexity = complexity;
2658
2659   return cost;
2660 }
2661
2662 /* Adds costs COST1 and COST2.  */
2663
2664 static comp_cost
2665 add_costs (comp_cost cost1, comp_cost cost2)
2666 {
2667   cost1.cost += cost2.cost;
2668   cost1.complexity += cost2.complexity;
2669
2670   return cost1;
2671 }
2672 /* Subtracts costs COST1 and COST2.  */
2673
2674 static comp_cost
2675 sub_costs (comp_cost cost1, comp_cost cost2)
2676 {
2677   cost1.cost -= cost2.cost;
2678   cost1.complexity -= cost2.complexity;
2679
2680   return cost1;
2681 }
2682
2683 /* Returns a negative number if COST1 < COST2, a positive number if
2684    COST1 > COST2, and 0 if COST1 = COST2.  */
2685
2686 static int
2687 compare_costs (comp_cost cost1, comp_cost cost2)
2688 {
2689   if (cost1.cost == cost2.cost)
2690     return cost1.complexity - cost2.complexity;
2691
2692   return cost1.cost - cost2.cost;
2693 }
2694
2695 /* Returns true if COST is infinite.  */
2696
2697 static bool
2698 infinite_cost_p (comp_cost cost)
2699 {
2700   return cost.cost == INFTY;
2701 }
2702
2703 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2704    on invariants DEPENDS_ON and that the value used in expressing it
2705    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2706
2707 static void
2708 set_use_iv_cost (struct ivopts_data *data,
2709                  struct iv_use *use, struct iv_cand *cand,
2710                  comp_cost cost, bitmap depends_on, tree value,
2711                  enum tree_code comp, int inv_expr_id)
2712 {
2713   unsigned i, s;
2714
2715   if (infinite_cost_p (cost))
2716     {
2717       BITMAP_FREE (depends_on);
2718       return;
2719     }
2720
2721   if (data->consider_all_candidates)
2722     {
2723       use->cost_map[cand->id].cand = cand;
2724       use->cost_map[cand->id].cost = cost;
2725       use->cost_map[cand->id].depends_on = depends_on;
2726       use->cost_map[cand->id].value = value;
2727       use->cost_map[cand->id].comp = comp;
2728       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2729       return;
2730     }
2731
2732   /* n_map_members is a power of two, so this computes modulo.  */
2733   s = cand->id & (use->n_map_members - 1);
2734   for (i = s; i < use->n_map_members; i++)
2735     if (!use->cost_map[i].cand)
2736       goto found;
2737   for (i = 0; i < s; i++)
2738     if (!use->cost_map[i].cand)
2739       goto found;
2740
2741   gcc_unreachable ();
2742
2743 found:
2744   use->cost_map[i].cand = cand;
2745   use->cost_map[i].cost = cost;
2746   use->cost_map[i].depends_on = depends_on;
2747   use->cost_map[i].value = value;
2748   use->cost_map[i].comp = comp;
2749   use->cost_map[i].inv_expr_id = inv_expr_id;
2750 }
2751
2752 /* Gets cost of (USE, CANDIDATE) pair.  */
2753
2754 static struct cost_pair *
2755 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2756                  struct iv_cand *cand)
2757 {
2758   unsigned i, s;
2759   struct cost_pair *ret;
2760
2761   if (!cand)
2762     return NULL;
2763
2764   if (data->consider_all_candidates)
2765     {
2766       ret = use->cost_map + cand->id;
2767       if (!ret->cand)
2768         return NULL;
2769
2770       return ret;
2771     }
2772
2773   /* n_map_members is a power of two, so this computes modulo.  */
2774   s = cand->id & (use->n_map_members - 1);
2775   for (i = s; i < use->n_map_members; i++)
2776     if (use->cost_map[i].cand == cand)
2777       return use->cost_map + i;
2778     else if (use->cost_map[i].cand == NULL)
2779       return NULL;
2780   for (i = 0; i < s; i++)
2781     if (use->cost_map[i].cand == cand)
2782       return use->cost_map + i;
2783     else if (use->cost_map[i].cand == NULL)
2784       return NULL;
2785
2786   return NULL;
2787 }
2788
2789 /* Returns estimate on cost of computing SEQ.  */
2790
2791 static unsigned
2792 seq_cost (rtx seq, bool speed)
2793 {
2794   unsigned cost = 0;
2795   rtx set;
2796
2797   for (; seq; seq = NEXT_INSN (seq))
2798     {
2799       set = single_set (seq);
2800       if (set)
2801         cost += set_src_cost (SET_SRC (set), speed);
2802       else
2803         cost++;
2804     }
2805
2806   return cost;
2807 }
2808
2809 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2810 static rtx
2811 produce_memory_decl_rtl (tree obj, int *regno)
2812 {
2813   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2814   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2815   rtx x;
2816
2817   gcc_assert (obj);
2818   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2819     {
2820       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2821       x = gen_rtx_SYMBOL_REF (address_mode, name);
2822       SET_SYMBOL_REF_DECL (x, obj);
2823       x = gen_rtx_MEM (DECL_MODE (obj), x);
2824       set_mem_addr_space (x, as);
2825       targetm.encode_section_info (obj, x, true);
2826     }
2827   else
2828     {
2829       x = gen_raw_REG (address_mode, (*regno)++);
2830       x = gen_rtx_MEM (DECL_MODE (obj), x);
2831       set_mem_addr_space (x, as);
2832     }
2833
2834   return x;
2835 }
2836
2837 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2838    walk_tree.  DATA contains the actual fake register number.  */
2839
2840 static tree
2841 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2842 {
2843   tree obj = NULL_TREE;
2844   rtx x = NULL_RTX;
2845   int *regno = (int *) data;
2846
2847   switch (TREE_CODE (*expr_p))
2848     {
2849     case ADDR_EXPR:
2850       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2851            handled_component_p (*expr_p);
2852            expr_p = &TREE_OPERAND (*expr_p, 0))
2853         continue;
2854       obj = *expr_p;
2855       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2856         x = produce_memory_decl_rtl (obj, regno);
2857       break;
2858
2859     case SSA_NAME:
2860       *ws = 0;
2861       obj = SSA_NAME_VAR (*expr_p);
2862       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2863       if (!obj)
2864         return NULL_TREE;
2865       if (!DECL_RTL_SET_P (obj))
2866         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2867       break;
2868
2869     case VAR_DECL:
2870     case PARM_DECL:
2871     case RESULT_DECL:
2872       *ws = 0;
2873       obj = *expr_p;
2874
2875       if (DECL_RTL_SET_P (obj))
2876         break;
2877
2878       if (DECL_MODE (obj) == BLKmode)
2879         x = produce_memory_decl_rtl (obj, regno);
2880       else
2881         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2882
2883       break;
2884
2885     default:
2886       break;
2887     }
2888
2889   if (x)
2890     {
2891       decl_rtl_to_reset.safe_push (obj);
2892       SET_DECL_RTL (obj, x);
2893     }
2894
2895   return NULL_TREE;
2896 }
2897
2898 /* Determines cost of the computation of EXPR.  */
2899
2900 static unsigned
2901 computation_cost (tree expr, bool speed)
2902 {
2903   rtx seq, rslt;
2904   tree type = TREE_TYPE (expr);
2905   unsigned cost;
2906   /* Avoid using hard regs in ways which may be unsupported.  */
2907   int regno = LAST_VIRTUAL_REGISTER + 1;
2908   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2909   enum node_frequency real_frequency = node->frequency;
2910
2911   node->frequency = NODE_FREQUENCY_NORMAL;
2912   crtl->maybe_hot_insn_p = speed;
2913   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2914   start_sequence ();
2915   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2916   seq = get_insns ();
2917   end_sequence ();
2918   default_rtl_profile ();
2919   node->frequency = real_frequency;
2920
2921   cost = seq_cost (seq, speed);
2922   if (MEM_P (rslt))
2923     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2924                           TYPE_ADDR_SPACE (type), speed);
2925   else if (!REG_P (rslt))
2926     cost += set_src_cost (rslt, speed);
2927
2928   return cost;
2929 }
2930
2931 /* Returns variable containing the value of candidate CAND at statement AT.  */
2932
2933 static tree
2934 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2935 {
2936   if (stmt_after_increment (loop, cand, stmt))
2937     return cand->var_after;
2938   else
2939     return cand->var_before;
2940 }
2941
2942 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2943    same precision that is at least as wide as the precision of TYPE, stores
2944    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2945    type of A and B.  */
2946
2947 static tree
2948 determine_common_wider_type (tree *a, tree *b)
2949 {
2950   tree wider_type = NULL;
2951   tree suba, subb;
2952   tree atype = TREE_TYPE (*a);
2953
2954   if (CONVERT_EXPR_P (*a))
2955     {
2956       suba = TREE_OPERAND (*a, 0);
2957       wider_type = TREE_TYPE (suba);
2958       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2959         return atype;
2960     }
2961   else
2962     return atype;
2963
2964   if (CONVERT_EXPR_P (*b))
2965     {
2966       subb = TREE_OPERAND (*b, 0);
2967       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2968         return atype;
2969     }
2970   else
2971     return atype;
2972
2973   *a = suba;
2974   *b = subb;
2975   return wider_type;
2976 }
2977
2978 /* Determines the expression by that USE is expressed from induction variable
2979    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2980    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2981
2982 static bool
2983 get_computation_aff (struct loop *loop,
2984                      struct iv_use *use, struct iv_cand *cand, gimple at,
2985                      struct affine_tree_combination *aff)
2986 {
2987   tree ubase = use->iv->base;
2988   tree ustep = use->iv->step;
2989   tree cbase = cand->iv->base;
2990   tree cstep = cand->iv->step, cstep_common;
2991   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2992   tree common_type, var;
2993   tree uutype;
2994   aff_tree cbase_aff, var_aff;
2995   double_int rat;
2996
2997   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2998     {
2999       /* We do not have a precision to express the values of use.  */
3000       return false;
3001     }
3002
3003   var = var_at_stmt (loop, cand, at);
3004   uutype = unsigned_type_for (utype);
3005
3006   /* If the conversion is not noop, perform it.  */
3007   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3008     {
3009       cstep = fold_convert (uutype, cstep);
3010       cbase = fold_convert (uutype, cbase);
3011       var = fold_convert (uutype, var);
3012     }
3013
3014   if (!constant_multiple_of (ustep, cstep, &rat))
3015     return false;
3016
3017   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3018      type, we achieve better folding by computing their difference in this
3019      wider type, and cast the result to UUTYPE.  We do not need to worry about
3020      overflows, as all the arithmetics will in the end be performed in UUTYPE
3021      anyway.  */
3022   common_type = determine_common_wider_type (&ubase, &cbase);
3023
3024   /* use = ubase - ratio * cbase + ratio * var.  */
3025   tree_to_aff_combination (ubase, common_type, aff);
3026   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3027   tree_to_aff_combination (var, uutype, &var_aff);
3028
3029   /* We need to shift the value if we are after the increment.  */
3030   if (stmt_after_increment (loop, cand, at))
3031     {
3032       aff_tree cstep_aff;
3033
3034       if (common_type != uutype)
3035         cstep_common = fold_convert (common_type, cstep);
3036       else
3037         cstep_common = cstep;
3038
3039       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3040       aff_combination_add (&cbase_aff, &cstep_aff);
3041     }
3042
3043   aff_combination_scale (&cbase_aff, -rat);
3044   aff_combination_add (aff, &cbase_aff);
3045   if (common_type != uutype)
3046     aff_combination_convert (aff, uutype);
3047
3048   aff_combination_scale (&var_aff, rat);
3049   aff_combination_add (aff, &var_aff);
3050
3051   return true;
3052 }
3053
3054 /* Return the type of USE.  */
3055
3056 static tree
3057 get_use_type (struct iv_use *use)
3058 {
3059   tree base_type = TREE_TYPE (use->iv->base);
3060   tree type;
3061
3062   if (use->type == USE_ADDRESS)
3063     {
3064       /* The base_type may be a void pointer.  Create a pointer type based on
3065          the mem_ref instead.  */
3066       type = build_pointer_type (TREE_TYPE (*use->op_p));
3067       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3068                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3069     }
3070   else
3071     type = base_type;
3072
3073   return type;
3074 }
3075
3076 /* Determines the expression by that USE is expressed from induction variable
3077    CAND at statement AT in LOOP.  The computation is unshared.  */
3078
3079 static tree
3080 get_computation_at (struct loop *loop,
3081                     struct iv_use *use, struct iv_cand *cand, gimple at)
3082 {
3083   aff_tree aff;
3084   tree type = get_use_type (use);
3085
3086   if (!get_computation_aff (loop, use, cand, at, &aff))
3087     return NULL_TREE;
3088   unshare_aff_combination (&aff);
3089   return fold_convert (type, aff_combination_to_tree (&aff));
3090 }
3091
3092 /* Determines the expression by that USE is expressed from induction variable
3093    CAND in LOOP.  The computation is unshared.  */
3094
3095 static tree
3096 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3097 {
3098   return get_computation_at (loop, use, cand, use->stmt);
3099 }
3100
3101 /* Adjust the cost COST for being in loop setup rather than loop body.
3102    If we're optimizing for space, the loop setup overhead is constant;
3103    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3104 static unsigned
3105 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3106 {
3107   if (cost == INFTY)
3108     return cost;
3109   else if (optimize_loop_for_speed_p (data->current_loop))
3110     return cost / avg_loop_niter (data->current_loop);
3111   else
3112     return cost;
3113 }
3114
3115 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3116    validity for a memory reference accessing memory of mode MODE in
3117    address space AS.  */
3118
3119
3120 bool
3121 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3122                                  addr_space_t as)
3123 {
3124 #define MAX_RATIO 128
3125   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3126   static vec<sbitmap> valid_mult_list;
3127   sbitmap valid_mult;
3128
3129   if (data_index >= valid_mult_list.length ())
3130     valid_mult_list.safe_grow_cleared (data_index + 1);
3131
3132   valid_mult = valid_mult_list[data_index];
3133   if (!valid_mult)
3134     {
3135       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3136       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3137       rtx addr;
3138       HOST_WIDE_INT i;
3139
3140       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3141       bitmap_clear (valid_mult);
3142       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3143       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3144         {
3145           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3146           if (memory_address_addr_space_p (mode, addr, as))
3147             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3148         }
3149
3150       if (dump_file && (dump_flags & TDF_DETAILS))
3151         {
3152           fprintf (dump_file, "  allowed multipliers:");
3153           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3154             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3155               fprintf (dump_file, " %d", (int) i);
3156           fprintf (dump_file, "\n");
3157           fprintf (dump_file, "\n");
3158         }
3159
3160       valid_mult_list[data_index] = valid_mult;
3161     }
3162
3163   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3164     return false;
3165
3166   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3167 }
3168
3169 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3170    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3171    variable is omitted.  Compute the cost for a memory reference that accesses
3172    a memory location of mode MEM_MODE in address space AS.
3173
3174    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3175    size of MEM_MODE / RATIO) is available.  To make this determination, we
3176    look at the size of the increment to be made, which is given in CSTEP.
3177    CSTEP may be zero if the step is unknown.
3178    STMT_AFTER_INC is true iff the statement we're looking at is after the
3179    increment of the original biv.
3180
3181    TODO -- there must be some better way.  This all is quite crude.  */
3182
3183 typedef struct address_cost_data_s
3184 {
3185   HOST_WIDE_INT min_offset, max_offset;
3186   unsigned costs[2][2][2][2];
3187 } *address_cost_data;
3188
3189
3190 static comp_cost
3191 get_address_cost (bool symbol_present, bool var_present,
3192                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3193                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3194                   addr_space_t as, bool speed,
3195                   bool stmt_after_inc, bool *may_autoinc)
3196 {
3197   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3198   static vec<address_cost_data> address_cost_data_list;
3199   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3200   address_cost_data data;
3201   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3202   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3203   unsigned cost, acost, complexity;
3204   bool offset_p, ratio_p, autoinc;
3205   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3206   unsigned HOST_WIDE_INT mask;
3207   unsigned bits;
3208
3209   if (data_index >= address_cost_data_list.length ())
3210     address_cost_data_list.safe_grow_cleared (data_index + 1);
3211
3212   data = address_cost_data_list[data_index];
3213   if (!data)
3214     {
3215       HOST_WIDE_INT i;
3216       HOST_WIDE_INT rat, off = 0;
3217       int old_cse_not_expected, width;
3218       unsigned sym_p, var_p, off_p, rat_p, add_c;
3219       rtx seq, addr, base;
3220       rtx reg0, reg1;
3221
3222       data = (address_cost_data) xcalloc (1, sizeof (*data));
3223
3224       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3225
3226       width = GET_MODE_BITSIZE (address_mode) - 1;
3227       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3228         width = HOST_BITS_PER_WIDE_INT - 1;
3229       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3230
3231       for (i = width; i >= 0; i--)
3232         {
3233           off = -((unsigned HOST_WIDE_INT) 1 << i);
3234           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3235           if (memory_address_addr_space_p (mem_mode, addr, as))
3236             break;
3237         }
3238       data->min_offset = (i == -1? 0 : off);
3239
3240       for (i = width; i >= 0; i--)
3241         {
3242           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3243           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3244           if (memory_address_addr_space_p (mem_mode, addr, as))
3245             break;
3246         }
3247       if (i == -1)
3248         off = 0;
3249       data->max_offset = off;
3250
3251       if (dump_file && (dump_flags & TDF_DETAILS))
3252         {
3253           fprintf (dump_file, "get_address_cost:\n");
3254           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3255                    GET_MODE_NAME (mem_mode),
3256                    data->min_offset);
3257           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3258                    GET_MODE_NAME (mem_mode),
3259                    data->max_offset);
3260         }
3261
3262       rat = 1;
3263       for (i = 2; i <= MAX_RATIO; i++)
3264         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3265           {
3266             rat = i;
3267             break;
3268           }
3269
3270       /* Compute the cost of various addressing modes.  */
3271       acost = 0;
3272       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3273       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3274
3275       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3276           || USE_STORE_PRE_DECREMENT (mem_mode))
3277         {
3278           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3279           has_predec[mem_mode]
3280             = memory_address_addr_space_p (mem_mode, addr, as);
3281         }
3282       if (USE_LOAD_POST_DECREMENT (mem_mode)
3283           || USE_STORE_POST_DECREMENT (mem_mode))
3284         {
3285           addr = gen_rtx_POST_DEC (address_mode, reg0);
3286           has_postdec[mem_mode]
3287             = memory_address_addr_space_p (mem_mode, addr, as);
3288         }
3289       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3290           || USE_STORE_PRE_DECREMENT (mem_mode))
3291         {
3292           addr = gen_rtx_PRE_INC (address_mode, reg0);
3293           has_preinc[mem_mode]
3294             = memory_address_addr_space_p (mem_mode, addr, as);
3295         }
3296       if (USE_LOAD_POST_INCREMENT (mem_mode)
3297           || USE_STORE_POST_INCREMENT (mem_mode))
3298         {
3299           addr = gen_rtx_POST_INC (address_mode, reg0);
3300           has_postinc[mem_mode]
3301             = memory_address_addr_space_p (mem_mode, addr, as);
3302         }
3303       for (i = 0; i < 16; i++)
3304         {
3305           sym_p = i & 1;
3306           var_p = (i >> 1) & 1;
3307           off_p = (i >> 2) & 1;
3308           rat_p = (i >> 3) & 1;
3309
3310           addr = reg0;
3311           if (rat_p)
3312             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3313                                    gen_int_mode (rat, address_mode));
3314
3315           if (var_p)
3316             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3317
3318           if (sym_p)
3319             {
3320               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3321               /* ??? We can run into trouble with some backends by presenting
3322                  it with symbols which haven't been properly passed through
3323                  targetm.encode_section_info.  By setting the local bit, we
3324                  enhance the probability of things working.  */
3325               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3326
3327               if (off_p)
3328                 base = gen_rtx_fmt_e (CONST, address_mode,
3329                                       gen_rtx_fmt_ee
3330                                         (PLUS, address_mode, base,
3331                                          gen_int_mode (off, address_mode)));
3332             }
3333           else if (off_p)
3334             base = gen_int_mode (off, address_mode);
3335           else
3336             base = NULL_RTX;
3337
3338           if (base)
3339             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3340
3341           start_sequence ();
3342           /* To avoid splitting addressing modes, pretend that no cse will
3343              follow.  */
3344           old_cse_not_expected = cse_not_expected;
3345           cse_not_expected = true;
3346           addr = memory_address_addr_space (mem_mode, addr, as);
3347           cse_not_expected = old_cse_not_expected;
3348           seq = get_insns ();
3349           end_sequence ();
3350
3351           acost = seq_cost (seq, speed);
3352           acost += address_cost (addr, mem_mode, as, speed);
3353
3354           if (!acost)
3355             acost = 1;
3356           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3357         }
3358
3359       /* On some targets, it is quite expensive to load symbol to a register,
3360          which makes addresses that contain symbols look much more expensive.
3361          However, the symbol will have to be loaded in any case before the
3362          loop (and quite likely we have it in register already), so it does not
3363          make much sense to penalize them too heavily.  So make some final
3364          tweaks for the SYMBOL_PRESENT modes:
3365
3366          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3367          var is cheaper, use this mode with small penalty.
3368          If VAR_PRESENT is true, try whether the mode with
3369          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3370          if this is the case, use it.  */
3371       add_c = add_cost (speed, address_mode);
3372       for (i = 0; i < 8; i++)
3373         {
3374           var_p = i & 1;
3375           off_p = (i >> 1) & 1;
3376           rat_p = (i >> 2) & 1;
3377
3378           acost = data->costs[0][1][off_p][rat_p] + 1;
3379           if (var_p)
3380             acost += add_c;
3381
3382           if (acost < data->costs[1][var_p][off_p][rat_p])
3383             data->costs[1][var_p][off_p][rat_p] = acost;
3384         }
3385
3386       if (dump_file && (dump_flags & TDF_DETAILS))
3387         {
3388           fprintf (dump_file, "Address costs:\n");
3389
3390           for (i = 0; i < 16; i++)
3391             {
3392               sym_p = i & 1;
3393               var_p = (i >> 1) & 1;
3394               off_p = (i >> 2) & 1;
3395               rat_p = (i >> 3) & 1;
3396
3397               fprintf (dump_file, "  ");
3398               if (sym_p)
3399                 fprintf (dump_file, "sym + ");
3400               if (var_p)
3401                 fprintf (dump_file, "var + ");
3402               if (off_p)
3403                 fprintf (dump_file, "cst + ");
3404               if (rat_p)
3405                 fprintf (dump_file, "rat * ");
3406
3407               acost = data->costs[sym_p][var_p][off_p][rat_p];
3408               fprintf (dump_file, "index costs %d\n", acost);
3409             }
3410           if (has_predec[mem_mode] || has_postdec[mem_mode]
3411               || has_preinc[mem_mode] || has_postinc[mem_mode])
3412             fprintf (dump_file, "  May include autoinc/dec\n");
3413           fprintf (dump_file, "\n");
3414         }
3415
3416       address_cost_data_list[data_index] = data;
3417     }
3418
3419   bits = GET_MODE_BITSIZE (address_mode);
3420   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3421   offset &= mask;
3422   if ((offset >> (bits - 1) & 1))
3423     offset |= ~mask;
3424   s_offset = offset;
3425
3426   autoinc = false;
3427   msize = GET_MODE_SIZE (mem_mode);
3428   autoinc_offset = offset;
3429   if (stmt_after_inc)
3430     autoinc_offset += ratio * cstep;
3431   if (symbol_present || var_present || ratio != 1)
3432     autoinc = false;
3433   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3434                && msize == cstep)
3435            || (has_postdec[mem_mode] && autoinc_offset == 0
3436                && msize == -cstep)
3437            || (has_preinc[mem_mode] && autoinc_offset == msize
3438                && msize == cstep)
3439            || (has_predec[mem_mode] && autoinc_offset == -msize
3440                && msize == -cstep))
3441     autoinc = true;
3442
3443   cost = 0;
3444   offset_p = (s_offset != 0
3445               && data->min_offset <= s_offset
3446               && s_offset <= data->max_offset);
3447   ratio_p = (ratio != 1
3448              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3449
3450   if (ratio != 1 && !ratio_p)
3451     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3452
3453   if (s_offset && !offset_p && !symbol_present)
3454     cost += add_cost (speed, address_mode);
3455
3456   if (may_autoinc)
3457     *may_autoinc = autoinc;
3458   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3459   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3460   return new_cost (cost + acost, complexity);
3461 }
3462
3463  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3464     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3465     calculating the operands of EXPR.  Returns true if successful, and returns
3466     the cost in COST.  */
3467
3468 static bool
3469 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3470                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3471 {
3472   comp_cost res;
3473   tree op1 = TREE_OPERAND (expr, 1);
3474   tree cst = TREE_OPERAND (mult, 1);
3475   tree multop = TREE_OPERAND (mult, 0);
3476   int m = exact_log2 (int_cst_value (cst));
3477   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3478   int sa_cost;
3479
3480   if (!(m >= 0 && m < maxm))
3481     return false;
3482
3483   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3484              ? shiftadd_cost (speed, mode, m)
3485              : (mult == op1
3486                 ? shiftsub1_cost (speed, mode, m)
3487                 : shiftsub0_cost (speed, mode, m)));
3488   res = new_cost (sa_cost, 0);
3489   res = add_costs (res, mult == op1 ? cost0 : cost1);
3490
3491   STRIP_NOPS (multop);
3492   if (!is_gimple_val (multop))
3493     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3494
3495   *cost = res;
3496   return true;
3497 }
3498
3499 /* Estimates cost of forcing expression EXPR into a variable.  */
3500
3501 static comp_cost
3502 force_expr_to_var_cost (tree expr, bool speed)
3503 {
3504   static bool costs_initialized = false;
3505   static unsigned integer_cost [2];
3506   static unsigned symbol_cost [2];
3507   static unsigned address_cost [2];
3508   tree op0, op1;
3509   comp_cost cost0, cost1, cost;
3510   enum machine_mode mode;
3511
3512   if (!costs_initialized)
3513     {
3514       tree type = build_pointer_type (integer_type_node);
3515       tree var, addr;
3516       rtx x;
3517       int i;
3518
3519       var = create_tmp_var_raw (integer_type_node, "test_var");
3520       TREE_STATIC (var) = 1;
3521       x = produce_memory_decl_rtl (var, NULL);
3522       SET_DECL_RTL (var, x);
3523
3524       addr = build1 (ADDR_EXPR, type, var);
3525
3526
3527       for (i = 0; i < 2; i++)
3528         {
3529           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3530                                                              2000), i);
3531
3532           symbol_cost[i] = computation_cost (addr, i) + 1;
3533
3534           address_cost[i]
3535             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3536           if (dump_file && (dump_flags & TDF_DETAILS))
3537             {
3538               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3539               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3540               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3541               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3542               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3543               fprintf (dump_file, "\n");
3544             }
3545         }
3546
3547       costs_initialized = true;
3548     }
3549
3550   STRIP_NOPS (expr);
3551
3552   if (SSA_VAR_P (expr))
3553     return no_cost;
3554
3555   if (is_gimple_min_invariant (expr))
3556     {
3557       if (TREE_CODE (expr) == INTEGER_CST)
3558         return new_cost (integer_cost [speed], 0);
3559
3560       if (TREE_CODE (expr) == ADDR_EXPR)
3561         {
3562           tree obj = TREE_OPERAND (expr, 0);
3563
3564           if (TREE_CODE (obj) == VAR_DECL
3565               || TREE_CODE (obj) == PARM_DECL
3566               || TREE_CODE (obj) == RESULT_DECL)
3567             return new_cost (symbol_cost [speed], 0);
3568         }
3569
3570       return new_cost (address_cost [speed], 0);
3571     }
3572
3573   switch (TREE_CODE (expr))
3574     {
3575     case POINTER_PLUS_EXPR:
3576     case PLUS_EXPR:
3577     case MINUS_EXPR:
3578     case MULT_EXPR:
3579       op0 = TREE_OPERAND (expr, 0);
3580       op1 = TREE_OPERAND (expr, 1);
3581       STRIP_NOPS (op0);
3582       STRIP_NOPS (op1);
3583
3584       if (is_gimple_val (op0))
3585         cost0 = no_cost;
3586       else
3587         cost0 = force_expr_to_var_cost (op0, speed);
3588
3589       if (is_gimple_val (op1))
3590         cost1 = no_cost;
3591       else
3592         cost1 = force_expr_to_var_cost (op1, speed);
3593
3594       break;
3595
3596     case NEGATE_EXPR:
3597       op0 = TREE_OPERAND (expr, 0);
3598       STRIP_NOPS (op0);
3599       op1 = NULL_TREE;
3600
3601       if (is_gimple_val (op0))
3602         cost0 = no_cost;
3603       else
3604         cost0 = force_expr_to_var_cost (op0, speed);
3605
3606       cost1 = no_cost;
3607       break;
3608
3609     default:
3610       /* Just an arbitrary value, FIXME.  */
3611       return new_cost (target_spill_cost[speed], 0);
3612     }
3613
3614   mode = TYPE_MODE (TREE_TYPE (expr));
3615   switch (TREE_CODE (expr))
3616     {
3617     case POINTER_PLUS_EXPR:
3618     case PLUS_EXPR:
3619     case MINUS_EXPR:
3620     case NEGATE_EXPR:
3621       cost = new_cost (add_cost (speed, mode), 0);
3622       if (TREE_CODE (expr) != NEGATE_EXPR)
3623         {
3624           tree mult = NULL_TREE;
3625           comp_cost sa_cost;
3626           if (TREE_CODE (op1) == MULT_EXPR)
3627             mult = op1;
3628           else if (TREE_CODE (op0) == MULT_EXPR)
3629             mult = op0;
3630
3631           if (mult != NULL_TREE
3632               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3633               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3634                                     speed, &sa_cost))
3635             return sa_cost;
3636         }
3637       break;
3638
3639     case MULT_EXPR:
3640       if (cst_and_fits_in_hwi (op0))
3641         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3642                                              mode, speed), 0);
3643       else if (cst_and_fits_in_hwi (op1))
3644         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3645                                              mode, speed), 0);
3646       else
3647         return new_cost (target_spill_cost [speed], 0);
3648       break;
3649
3650     default:
3651       gcc_unreachable ();
3652     }
3653
3654   cost = add_costs (cost, cost0);
3655   cost = add_costs (cost, cost1);
3656
3657   /* Bound the cost by target_spill_cost.  The parts of complicated
3658      computations often are either loop invariant or at least can
3659      be shared between several iv uses, so letting this grow without
3660      limits would not give reasonable results.  */
3661   if (cost.cost > (int) target_spill_cost [speed])
3662     cost.cost = target_spill_cost [speed];
3663
3664   return cost;
3665 }
3666
3667 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3668    invariants the computation depends on.  */
3669
3670 static comp_cost
3671 force_var_cost (struct ivopts_data *data,
3672                 tree expr, bitmap *depends_on)
3673 {
3674   if (depends_on)
3675     {
3676       fd_ivopts_data = data;
3677       walk_tree (&expr, find_depends, depends_on, NULL);
3678     }
3679
3680   return force_expr_to_var_cost (expr, data->speed);
3681 }
3682
3683 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3684    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3685    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3686    invariants the computation depends on.  */
3687
3688 static comp_cost
3689 split_address_cost (struct ivopts_data *data,
3690                     tree addr, bool *symbol_present, bool *var_present,
3691                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3692 {
3693   tree core;
3694   HOST_WIDE_INT bitsize;
3695   HOST_WIDE_INT bitpos;
3696   tree toffset;
3697   enum machine_mode mode;
3698   int unsignedp, volatilep;
3699
3700   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3701                               &unsignedp, &volatilep, false);
3702
3703   if (toffset != 0
3704       || bitpos % BITS_PER_UNIT != 0
3705       || TREE_CODE (core) != VAR_DECL)
3706     {
3707       *symbol_present = false;
3708       *var_present = true;
3709       fd_ivopts_data = data;
3710       walk_tree (&addr, find_depends, depends_on, NULL);
3711       return new_cost (target_spill_cost[data->speed], 0);
3712     }
3713
3714   *offset += bitpos / BITS_PER_UNIT;
3715   if (TREE_STATIC (core)
3716       || DECL_EXTERNAL (core))
3717     {
3718       *symbol_present = true;
3719       *var_present = false;
3720       return no_cost;
3721     }
3722
3723   *symbol_present = false;
3724   *var_present = true;
3725   return no_cost;
3726 }
3727
3728 /* Estimates cost of expressing difference of addresses E1 - E2 as
3729    var + symbol + offset.  The value of offset is added to OFFSET,
3730    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3731    part is missing.  DEPENDS_ON is a set of the invariants the computation
3732    depends on.  */
3733
3734 static comp_cost
3735 ptr_difference_cost (struct ivopts_data *data,
3736                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3737                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3738 {
3739   HOST_WIDE_INT diff = 0;
3740   aff_tree aff_e1, aff_e2;
3741   tree type;
3742
3743   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3744
3745   if (ptr_difference_const (e1, e2, &diff))
3746     {
3747       *offset += diff;
3748       *symbol_present = false;
3749       *var_present = false;
3750       return no_cost;
3751     }
3752
3753   if (integer_zerop (e2))
3754     return split_address_cost (data, TREE_OPERAND (e1, 0),
3755                                symbol_present, var_present, offset, depends_on);
3756
3757   *symbol_present = false;
3758   *var_present = true;
3759
3760   type = signed_type_for (TREE_TYPE (e1));
3761   tree_to_aff_combination (e1, type, &aff_e1);
3762   tree_to_aff_combination (e2, type, &aff_e2);
3763   aff_combination_scale (&aff_e2, double_int_minus_one);
3764   aff_combination_add (&aff_e1, &aff_e2);
3765
3766   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3767 }
3768
3769 /* Estimates cost of expressing difference E1 - E2 as
3770    var + symbol + offset.  The value of offset is added to OFFSET,
3771    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3772    part is missing.  DEPENDS_ON is a set of the invariants the computation
3773    depends on.  */
3774
3775 static comp_cost
3776 difference_cost (struct ivopts_data *data,
3777                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3778                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3779 {
3780   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3781   unsigned HOST_WIDE_INT off1, off2;
3782   aff_tree aff_e1, aff_e2;
3783   tree type;
3784
3785   e1 = strip_offset (e1, &off1);
3786   e2 = strip_offset (e2, &off2);
3787   *offset += off1 - off2;
3788
3789   STRIP_NOPS (e1);
3790   STRIP_NOPS (e2);
3791
3792   if (TREE_CODE (e1) == ADDR_EXPR)
3793     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3794                                 offset, depends_on);
3795   *symbol_present = false;
3796
3797   if (operand_equal_p (e1, e2, 0))
3798     {
3799       *var_present = false;
3800       return no_cost;
3801     }
3802
3803   *var_present = true;
3804
3805   if (integer_zerop (e2))
3806     return force_var_cost (data, e1, depends_on);
3807
3808   if (integer_zerop (e1))
3809     {
3810       comp_cost cost = force_var_cost (data, e2, depends_on);
3811       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3812       return cost;
3813     }
3814
3815   type = signed_type_for (TREE_TYPE (e1));
3816   tree_to_aff_combination (e1, type, &aff_e1);
3817   tree_to_aff_combination (e2, type, &aff_e2);
3818   aff_combination_scale (&aff_e2, double_int_minus_one);
3819   aff_combination_add (&aff_e1, &aff_e2);
3820
3821   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3822 }
3823
3824 /* Returns true if AFF1 and AFF2 are identical.  */
3825
3826 static bool
3827 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3828 {
3829   unsigned i;
3830
3831   if (aff1->n != aff2->n)
3832     return false;
3833
3834   for (i = 0; i < aff1->n; i++)
3835     {
3836       if (aff1->elts[i].coef != aff2->elts[i].coef)
3837         return false;
3838
3839       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3840         return false;
3841     }
3842   return true;
3843 }
3844
3845 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3846
3847 static int
3848 get_expr_id (struct ivopts_data *data, tree expr)
3849 {
3850   struct iv_inv_expr_ent ent;
3851   struct iv_inv_expr_ent **slot;
3852
3853   ent.expr = expr;
3854   ent.hash = iterative_hash_expr (expr, 0);
3855   slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3856   if (*slot)
3857     return (*slot)->id;
3858
3859   *slot = XNEW (struct iv_inv_expr_ent);
3860   (*slot)->expr = expr;
3861   (*slot)->hash = ent.hash;
3862   (*slot)->id = data->inv_expr_id++;
3863   return (*slot)->id;
3864 }
3865
3866 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3867    requires a new compiler generated temporary.  Returns -1 otherwise.
3868    ADDRESS_P is a flag indicating if the expression is for address
3869    computation.  */
3870
3871 static int
3872 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3873                             tree cbase, HOST_WIDE_INT ratio,
3874                             bool address_p)
3875 {
3876   aff_tree ubase_aff, cbase_aff;
3877   tree expr, ub, cb;
3878
3879   STRIP_NOPS (ubase);
3880   STRIP_NOPS (cbase);
3881   ub = ubase;
3882   cb = cbase;
3883
3884   if ((TREE_CODE (ubase) == INTEGER_CST)
3885       && (TREE_CODE (cbase) == INTEGER_CST))
3886     return -1;
3887
3888   /* Strips the constant part. */
3889   if (TREE_CODE (ubase) == PLUS_EXPR
3890       || TREE_CODE (ubase) == MINUS_EXPR
3891       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3892     {
3893       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3894         ubase = TREE_OPERAND (ubase, 0);
3895     }
3896
3897   /* Strips the constant part. */
3898   if (TREE_CODE (cbase) == PLUS_EXPR
3899       || TREE_CODE (cbase) == MINUS_EXPR
3900       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3901     {
3902       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3903         cbase = TREE_OPERAND (cbase, 0);
3904     }
3905
3906   if (address_p)
3907     {
3908       if (((TREE_CODE (ubase) == SSA_NAME)
3909            || (TREE_CODE (ubase) == ADDR_EXPR
3910                && is_gimple_min_invariant (ubase)))
3911           && (TREE_CODE (cbase) == INTEGER_CST))
3912         return -1;
3913
3914       if (((TREE_CODE (cbase) == SSA_NAME)
3915            || (TREE_CODE (cbase) == ADDR_EXPR
3916                && is_gimple_min_invariant (cbase)))
3917           && (TREE_CODE (ubase) == INTEGER_CST))
3918         return -1;
3919     }
3920
3921   if (ratio == 1)
3922     {
3923       if(operand_equal_p (ubase, cbase, 0))
3924         return -1;
3925
3926       if (TREE_CODE (ubase) == ADDR_EXPR
3927           && TREE_CODE (cbase) == ADDR_EXPR)
3928         {
3929           tree usym, csym;
3930
3931           usym = TREE_OPERAND (ubase, 0);
3932           csym = TREE_OPERAND (cbase, 0);
3933           if (TREE_CODE (usym) == ARRAY_REF)
3934             {
3935               tree ind = TREE_OPERAND (usym, 1);
3936               if (TREE_CODE (ind) == INTEGER_CST
3937                   && host_integerp (ind, 0)
3938                   && TREE_INT_CST_LOW (ind) == 0)
3939                 usym = TREE_OPERAND (usym, 0);
3940             }
3941           if (TREE_CODE (csym) == ARRAY_REF)
3942             {
3943               tree ind = TREE_OPERAND (csym, 1);
3944               if (TREE_CODE (ind) == INTEGER_CST
3945                   && host_integerp (ind, 0)
3946                   && TREE_INT_CST_LOW (ind) == 0)
3947                 csym = TREE_OPERAND (csym, 0);
3948             }
3949           if (operand_equal_p (usym, csym, 0))
3950             return -1;
3951         }
3952       /* Now do more complex comparison  */
3953       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3954       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3955       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3956         return -1;
3957     }
3958
3959   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3960   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3961
3962   aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
3963   aff_combination_add (&ubase_aff, &cbase_aff);
3964   expr = aff_combination_to_tree (&ubase_aff);
3965   return get_expr_id (data, expr);
3966 }
3967
3968
3969
3970 /* Determines the cost of the computation by that USE is expressed
3971    from induction variable CAND.  If ADDRESS_P is true, we just need
3972    to create an address from it, otherwise we want to get it into
3973    register.  A set of invariants we depend on is stored in
3974    DEPENDS_ON.  AT is the statement at that the value is computed.
3975    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3976    addressing is likely.  */
3977
3978 static comp_cost
3979 get_computation_cost_at (struct ivopts_data *data,
3980                          struct iv_use *use, struct iv_cand *cand,
3981                          bool address_p, bitmap *depends_on, gimple at,
3982                          bool *can_autoinc,
3983                          int *inv_expr_id)
3984 {
3985   tree ubase = use->iv->base, ustep = use->iv->step;
3986   tree cbase, cstep;
3987   tree utype = TREE_TYPE (ubase), ctype;
3988   unsigned HOST_WIDE_INT cstepi, offset = 0;
3989   HOST_WIDE_INT ratio, aratio;
3990   bool var_present, symbol_present, stmt_is_after_inc;
3991   comp_cost cost;
3992   double_int rat;
3993   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3994   enum machine_mode mem_mode = (address_p
3995                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
3996                                 : VOIDmode);
3997
3998   *depends_on = NULL;
3999
4000   /* Only consider real candidates.  */
4001   if (!cand->iv)
4002     return infinite_cost;
4003
4004   cbase = cand->iv->base;
4005   cstep = cand->iv->step;
4006   ctype = TREE_TYPE (cbase);
4007
4008   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4009     {
4010       /* We do not have a precision to express the values of use.  */
4011       return infinite_cost;
4012     }
4013
4014   if (address_p
4015       || (use->iv->base_object
4016           && cand->iv->base_object
4017           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4018           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4019     {
4020       /* Do not try to express address of an object with computation based
4021          on address of a different object.  This may cause problems in rtl
4022          level alias analysis (that does not expect this to be happening,
4023          as this is illegal in C), and would be unlikely to be useful
4024          anyway.  */
4025       if (use->iv->base_object
4026           && cand->iv->base_object
4027           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4028         return infinite_cost;
4029     }
4030
4031   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4032     {
4033       /* TODO -- add direct handling of this case.  */
4034       goto fallback;
4035     }
4036
4037   /* CSTEPI is removed from the offset in case statement is after the
4038      increment.  If the step is not constant, we use zero instead.
4039      This is a bit imprecise (there is the extra addition), but
4040      redundancy elimination is likely to transform the code so that
4041      it uses value of the variable before increment anyway,
4042      so it is not that much unrealistic.  */
4043   if (cst_and_fits_in_hwi (cstep))
4044     cstepi = int_cst_value (cstep);
4045   else
4046     cstepi = 0;
4047
4048   if (!constant_multiple_of (ustep, cstep, &rat))
4049     return infinite_cost;
4050
4051   if (rat.fits_shwi ())
4052     ratio = rat.to_shwi ();
4053   else
4054     return infinite_cost;
4055
4056   STRIP_NOPS (cbase);
4057   ctype = TREE_TYPE (cbase);
4058
4059   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4060
4061   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4062      or ratio == 1, it is better to handle this like
4063
4064      ubase - ratio * cbase + ratio * var
4065
4066      (also holds in the case ratio == -1, TODO.  */
4067
4068   if (cst_and_fits_in_hwi (cbase))
4069     {
4070       offset = - ratio * int_cst_value (cbase);
4071       cost = difference_cost (data,
4072                               ubase, build_int_cst (utype, 0),
4073                               &symbol_present, &var_present, &offset,
4074                               depends_on);
4075       cost.cost /= avg_loop_niter (data->current_loop);
4076     }
4077   else if (ratio == 1)
4078     {
4079       tree real_cbase = cbase;
4080
4081       /* Check to see if any adjustment is needed.  */
4082       if (cstepi == 0 && stmt_is_after_inc)
4083         {
4084           aff_tree real_cbase_aff;
4085           aff_tree cstep_aff;
4086
4087           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4088                                    &real_cbase_aff);
4089           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4090
4091           aff_combination_add (&real_cbase_aff, &cstep_aff);
4092           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4093         }
4094
4095       cost = difference_cost (data,
4096                               ubase, real_cbase,
4097                               &symbol_present, &var_present, &offset,
4098                               depends_on);
4099       cost.cost /= avg_loop_niter (data->current_loop);
4100     }
4101   else if (address_p
4102            && !POINTER_TYPE_P (ctype)
4103            && multiplier_allowed_in_address_p
4104                 (ratio, mem_mode,
4105                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4106     {
4107       cbase
4108         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4109       cost = difference_cost (data,
4110                               ubase, cbase,
4111                               &symbol_present, &var_present, &offset,
4112                               depends_on);
4113       cost.cost /= avg_loop_niter (data->current_loop);
4114     }
4115   else
4116     {
4117       cost = force_var_cost (data, cbase, depends_on);
4118       cost = add_costs (cost,
4119                         difference_cost (data,
4120                                          ubase, build_int_cst (utype, 0),
4121                                          &symbol_present, &var_present,
4122                                          &offset, depends_on));
4123       cost.cost /= avg_loop_niter (data->current_loop);
4124       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4125     }
4126
4127   if (inv_expr_id)
4128     {
4129       *inv_expr_id =
4130           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4131       /* Clear depends on.  */
4132       if (*inv_expr_id != -1 && depends_on && *depends_on)
4133         bitmap_clear (*depends_on);
4134     }
4135
4136   /* If we are after the increment, the value of the candidate is higher by
4137      one iteration.  */
4138   if (stmt_is_after_inc)
4139     offset -= ratio * cstepi;
4140
4141   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4142      (symbol/var1/const parts may be omitted).  If we are looking for an
4143      address, find the cost of addressing this.  */
4144   if (address_p)
4145     return add_costs (cost,
4146                       get_address_cost (symbol_present, var_present,
4147                                         offset, ratio, cstepi,
4148                                         mem_mode,
4149                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4150                                         speed, stmt_is_after_inc,
4151                                         can_autoinc));
4152
4153   /* Otherwise estimate the costs for computing the expression.  */
4154   if (!symbol_present && !var_present && !offset)
4155     {
4156       if (ratio != 1)
4157         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4158       return cost;
4159     }
4160
4161   /* Symbol + offset should be compile-time computable so consider that they
4162       are added once to the variable, if present.  */
4163   if (var_present && (symbol_present || offset))
4164     cost.cost += adjust_setup_cost (data,
4165                                     add_cost (speed, TYPE_MODE (ctype)));
4166
4167   /* Having offset does not affect runtime cost in case it is added to
4168      symbol, but it increases complexity.  */
4169   if (offset)
4170     cost.complexity++;
4171
4172   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4173
4174   aratio = ratio > 0 ? ratio : -ratio;
4175   if (aratio != 1)
4176     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4177   return cost;
4178
4179 fallback:
4180   if (can_autoinc)
4181     *can_autoinc = false;
4182
4183   {
4184     /* Just get the expression, expand it and measure the cost.  */
4185     tree comp = get_computation_at (data->current_loop, use, cand, at);
4186
4187     if (!comp)
4188       return infinite_cost;
4189
4190     if (address_p)
4191       comp = build_simple_mem_ref (comp);
4192
4193     return new_cost (computation_cost (comp, speed), 0);
4194   }
4195 }
4196
4197 /* Determines the cost of the computation by that USE is expressed
4198    from induction variable CAND.  If ADDRESS_P is true, we just need
4199    to create an address from it, otherwise we want to get it into
4200    register.  A set of invariants we depend on is stored in
4201    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4202    autoinc addressing is likely.  */
4203
4204 static comp_cost
4205 get_computation_cost (struct ivopts_data *data,
4206                       struct iv_use *use, struct iv_cand *cand,
4207                       bool address_p, bitmap *depends_on,
4208                       bool *can_autoinc, int *inv_expr_id)
4209 {
4210   return get_computation_cost_at (data,
4211                                   use, cand, address_p, depends_on, use->stmt,
4212                                   can_autoinc, inv_expr_id);
4213 }
4214
4215 /* Determines cost of basing replacement of USE on CAND in a generic
4216    expression.  */
4217
4218 static bool
4219 determine_use_iv_cost_generic (struct ivopts_data *data,
4220                                struct iv_use *use, struct iv_cand *cand)
4221 {
4222   bitmap depends_on;
4223   comp_cost cost;
4224   int inv_expr_id = -1;
4225
4226   /* The simple case first -- if we need to express value of the preserved
4227      original biv, the cost is 0.  This also prevents us from counting the
4228      cost of increment twice -- once at this use and once in the cost of
4229      the candidate.  */
4230   if (cand->pos == IP_ORIGINAL
4231       && cand->incremented_at == use->stmt)
4232     {
4233       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4234                        ERROR_MARK, -1);
4235       return true;
4236     }
4237
4238   cost = get_computation_cost (data, use, cand, false, &depends_on,
4239                                NULL, &inv_expr_id);
4240
4241   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4242                    inv_expr_id);
4243
4244   return !infinite_cost_p (cost);
4245 }
4246
4247 /* Determines cost of basing replacement of USE on CAND in an address.  */
4248
4249 static bool
4250 determine_use_iv_cost_address (struct ivopts_data *data,
4251                                struct iv_use *use, struct iv_cand *cand)
4252 {
4253   bitmap depends_on;
4254   bool can_autoinc;
4255   int inv_expr_id = -1;
4256   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4257                                          &can_autoinc, &inv_expr_id);
4258
4259   if (cand->ainc_use == use)
4260     {
4261       if (can_autoinc)
4262         cost.cost -= cand->cost_step;
4263       /* If we generated the candidate solely for exploiting autoincrement
4264          opportunities, and it turns out it can't be used, set the cost to
4265          infinity to make sure we ignore it.  */
4266       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4267         cost = infinite_cost;
4268     }
4269   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4270                    inv_expr_id);
4271
4272   return !infinite_cost_p (cost);
4273 }
4274
4275 /* Computes value of candidate CAND at position AT in iteration NITER, and
4276    stores it to VAL.  */
4277
4278 static void
4279 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4280                aff_tree *val)
4281 {
4282   aff_tree step, delta, nit;
4283   struct iv *iv = cand->iv;
4284   tree type = TREE_TYPE (iv->base);
4285   tree steptype = type;
4286   if (POINTER_TYPE_P (type))
4287     steptype = sizetype;
4288
4289   tree_to_aff_combination (iv->step, steptype, &step);
4290   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4291   aff_combination_convert (&nit, steptype);
4292   aff_combination_mult (&nit, &step, &delta);
4293   if (stmt_after_increment (loop, cand, at))
4294     aff_combination_add (&delta, &step);
4295
4296   tree_to_aff_combination (iv->base, type, val);
4297   aff_combination_add (val, &delta);
4298 }
4299
4300 /* Returns period of induction variable iv.  */
4301
4302 static tree
4303 iv_period (struct iv *iv)
4304 {
4305   tree step = iv->step, period, type;
4306   tree pow2div;
4307
4308   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4309
4310   type = unsigned_type_for (TREE_TYPE (step));
4311   /* Period of the iv is lcm (step, type_range)/step -1,
4312      i.e., N*type_range/step - 1. Since type range is power
4313      of two, N == (step >> num_of_ending_zeros_binary (step),
4314      so the final result is
4315
4316        (type_range >> num_of_ending_zeros_binary (step)) - 1
4317
4318   */
4319   pow2div = num_ending_zeros (step);
4320
4321   period = build_low_bits_mask (type,
4322                                 (TYPE_PRECISION (type)
4323                                  - tree_low_cst (pow2div, 1)));
4324
4325   return period;
4326 }
4327
4328 /* Returns the comparison operator used when eliminating the iv USE.  */
4329
4330 static enum tree_code
4331 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4332 {
4333   struct loop *loop = data->current_loop;
4334   basic_block ex_bb;
4335   edge exit;
4336
4337   ex_bb = gimple_bb (use->stmt);
4338   exit = EDGE_SUCC (ex_bb, 0);
4339   if (flow_bb_inside_loop_p (loop, exit->dest))
4340     exit = EDGE_SUCC (ex_bb, 1);
4341
4342   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4343 }
4344
4345 static tree
4346 strip_wrap_conserving_type_conversions (tree exp)
4347 {
4348   while (tree_ssa_useless_type_conversion (exp)
4349          && (nowrap_type_p (TREE_TYPE (exp))
4350              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4351     exp = TREE_OPERAND (exp, 0);
4352   return exp;
4353 }
4354
4355 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4356    check for an exact match.  */
4357
4358 static bool
4359 expr_equal_p (tree e, tree what)
4360 {
4361   gimple stmt;
4362   enum tree_code code;
4363
4364   e = strip_wrap_conserving_type_conversions (e);
4365   what = strip_wrap_conserving_type_conversions (what);
4366
4367   code = TREE_CODE (what);
4368   if (TREE_TYPE (e) != TREE_TYPE (what))
4369     return false;
4370
4371   if (operand_equal_p (e, what, 0))
4372     return true;
4373
4374   if (TREE_CODE (e) != SSA_NAME)
4375     return false;
4376
4377   stmt = SSA_NAME_DEF_STMT (e);
4378   if (gimple_code (stmt) != GIMPLE_ASSIGN
4379       || gimple_assign_rhs_code (stmt) != code)
4380     return false;
4381
4382   switch (get_gimple_rhs_class (code))
4383     {
4384     case GIMPLE_BINARY_RHS:
4385       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4386         return false;
4387       /* Fallthru.  */
4388
4389     case GIMPLE_UNARY_RHS:
4390     case GIMPLE_SINGLE_RHS:
4391       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4392     default:
4393       return false;
4394     }
4395 }
4396
4397 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4398    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4399    calculation is performed in non-wrapping type.
4400
4401    TODO: More generally, we could test for the situation that
4402          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4403          This would require knowing the sign of OFFSET.
4404
4405          Also, we only look for the first addition in the computation of BASE.
4406          More complex analysis would be better, but introducing it just for
4407          this optimization seems like an overkill.  */
4408
4409 static bool
4410 difference_cannot_overflow_p (tree base, tree offset)
4411 {
4412   enum tree_code code;
4413   tree e1, e2;
4414
4415   if (!nowrap_type_p (TREE_TYPE (base)))
4416     return false;
4417
4418   base = expand_simple_operations (base);
4419
4420   if (TREE_CODE (base) == SSA_NAME)
4421     {
4422       gimple stmt = SSA_NAME_DEF_STMT (base);
4423
4424       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4425         return false;
4426
4427       code = gimple_assign_rhs_code (stmt);
4428       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4429         return false;
4430
4431       e1 = gimple_assign_rhs1 (stmt);
4432       e2 = gimple_assign_rhs2 (stmt);
4433     }
4434   else
4435     {
4436       code = TREE_CODE (base);
4437       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4438         return false;
4439       e1 = TREE_OPERAND (base, 0);
4440       e2 = TREE_OPERAND (base, 1);
4441     }
4442
4443   /* TODO: deeper inspection may be necessary to prove the equality.  */
4444   switch (code)
4445     {
4446     case PLUS_EXPR:
4447       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4448     case POINTER_PLUS_EXPR:
4449       return expr_equal_p (e2, offset);
4450
4451     default:
4452       return false;
4453     }
4454 }
4455
4456 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4457    comparison with CAND.  NITER describes the number of iterations of
4458    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4459
4460    We aim to handle the following situation:
4461
4462    sometype *base, *p;
4463    int a, b, i;
4464
4465    i = a;
4466    p = p_0 = base + a;
4467
4468    do
4469      {
4470        bla (*p);
4471        p++;
4472        i++;
4473      }
4474    while (i < b);
4475
4476    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4477    We aim to optimize this to
4478
4479    p = p_0 = base + a;
4480    do
4481      {
4482        bla (*p);
4483        p++;
4484      }
4485    while (p < p_0 - a + b);
4486
4487    This preserves the correctness, since the pointer arithmetics does not
4488    overflow.  More precisely:
4489
4490    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4491       overflow in computing it or the values of p.
4492    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4493       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4494
4495 static bool
4496 iv_elimination_compare_lt (struct ivopts_data *data,
4497                            struct iv_cand *cand, enum tree_code *comp_p,
4498                            struct tree_niter_desc *niter)
4499 {
4500   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4501   struct affine_tree_combination nit, tmpa, tmpb;
4502   enum tree_code comp;
4503   HOST_WIDE_INT step;
4504
4505   /* We need to know that the candidate induction variable does not overflow.
4506      While more complex analysis may be used to prove this, for now just
4507      check that the variable appears in the original program and that it
4508      is computed in a type that guarantees no overflows.  */
4509   cand_type = TREE_TYPE (cand->iv->base);
4510   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4511     return false;
4512
4513   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4514      the calculation of the BOUND could overflow, making the comparison
4515      invalid.  */
4516   if (!data->loop_single_exit_p)
4517     return false;
4518
4519   /* We need to be able to decide whether candidate is increasing or decreasing
4520      in order to choose the right comparison operator.  */
4521   if (!cst_and_fits_in_hwi (cand->iv->step))
4522     return false;
4523   step = int_cst_value (cand->iv->step);
4524
4525   /* Check that the number of iterations matches the expected pattern:
4526      a + 1 > b ? 0 : b - a - 1.  */
4527   mbz = niter->may_be_zero;
4528   if (TREE_CODE (mbz) == GT_EXPR)
4529     {
4530       /* Handle a + 1 > b.  */
4531       tree op0 = TREE_OPERAND (mbz, 0);
4532       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4533         {
4534           a = TREE_OPERAND (op0, 0);
4535           b = TREE_OPERAND (mbz, 1);
4536         }
4537       else
4538         return false;
4539     }
4540   else if (TREE_CODE (mbz) == LT_EXPR)
4541     {
4542       tree op1 = TREE_OPERAND (mbz, 1);
4543
4544       /* Handle b < a + 1.  */
4545       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4546         {
4547           a = TREE_OPERAND (op1, 0);
4548           b = TREE_OPERAND (mbz, 0);
4549         }
4550       else
4551         return false;
4552     }
4553   else
4554     return false;
4555
4556   /* Expected number of iterations is B - A - 1.  Check that it matches
4557      the actual number, i.e., that B - A - NITER = 1.  */
4558   tree_to_aff_combination (niter->niter, nit_type, &nit);
4559   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4560   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4561   aff_combination_scale (&nit, double_int_minus_one);
4562   aff_combination_scale (&tmpa, double_int_minus_one);
4563   aff_combination_add (&tmpb, &tmpa);
4564   aff_combination_add (&tmpb, &nit);
4565   if (tmpb.n != 0 || tmpb.offset != double_int_one)
4566     return false;
4567
4568   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4569      overflow.  */
4570   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4571                         cand->iv->step,
4572                         fold_convert (TREE_TYPE (cand->iv->step), a));
4573   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4574     return false;
4575
4576   /* Determine the new comparison operator.  */
4577   comp = step < 0 ? GT_EXPR : LT_EXPR;
4578   if (*comp_p == NE_EXPR)
4579     *comp_p = comp;
4580   else if (*comp_p == EQ_EXPR)
4581     *comp_p = invert_tree_comparison (comp, false);
4582   else
4583     gcc_unreachable ();
4584
4585   return true;
4586 }
4587
4588 /* Check whether it is possible to express the condition in USE by comparison
4589    of candidate CAND.  If so, store the value compared with to BOUND, and the
4590    comparison operator to COMP.  */
4591
4592 static bool
4593 may_eliminate_iv (struct ivopts_data *data,
4594                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4595                   enum tree_code *comp)
4596 {
4597   basic_block ex_bb;
4598   edge exit;
4599   tree period;
4600   struct loop *loop = data->current_loop;
4601   aff_tree bnd;
4602   struct tree_niter_desc *desc = NULL;
4603
4604   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4605     return false;
4606
4607   /* For now works only for exits that dominate the loop latch.
4608      TODO: extend to other conditions inside loop body.  */
4609   ex_bb = gimple_bb (use->stmt);
4610   if (use->stmt != last_stmt (ex_bb)
4611       || gimple_code (use->stmt) != GIMPLE_COND
4612       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4613     return false;
4614
4615   exit = EDGE_SUCC (ex_bb, 0);
4616   if (flow_bb_inside_loop_p (loop, exit->dest))
4617     exit = EDGE_SUCC (ex_bb, 1);
4618   if (flow_bb_inside_loop_p (loop, exit->dest))
4619     return false;
4620
4621   desc = niter_for_exit (data, exit);
4622   if (!desc)
4623     return false;
4624
4625   /* Determine whether we can use the variable to test the exit condition.
4626      This is the case iff the period of the induction variable is greater
4627      than the number of iterations for which the exit condition is true.  */
4628   period = iv_period (cand->iv);
4629
4630   /* If the number of iterations is constant, compare against it directly.  */
4631   if (TREE_CODE (desc->niter) == INTEGER_CST)
4632     {
4633       /* See cand_value_at.  */
4634       if (stmt_after_increment (loop, cand, use->stmt))
4635         {
4636           if (!tree_int_cst_lt (desc->niter, period))
4637             return false;
4638         }
4639       else
4640         {
4641           if (tree_int_cst_lt (period, desc->niter))
4642             return false;
4643         }
4644     }
4645
4646   /* If not, and if this is the only possible exit of the loop, see whether
4647      we can get a conservative estimate on the number of iterations of the
4648      entire loop and compare against that instead.  */
4649   else
4650     {
4651       double_int period_value, max_niter;
4652
4653       max_niter = desc->max;
4654       if (stmt_after_increment (loop, cand, use->stmt))
4655         max_niter += double_int_one;
4656       period_value = tree_to_double_int (period);
4657       if (max_niter.ugt (period_value))
4658         {
4659           /* See if we can take advantage of inferred loop bound information.  */
4660           if (data->loop_single_exit_p)
4661             {
4662               if (!max_loop_iterations (loop, &max_niter))
4663                 return false;
4664               /* The loop bound is already adjusted by adding 1.  */
4665               if (max_niter.ugt (period_value))
4666                 return false;
4667             }
4668           else
4669             return false;
4670         }
4671     }
4672
4673   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4674
4675   *bound = aff_combination_to_tree (&bnd);
4676   *comp = iv_elimination_compare (data, use);
4677
4678   /* It is unlikely that computing the number of iterations using division
4679      would be more profitable than keeping the original induction variable.  */
4680   if (expression_expensive_p (*bound))
4681     return false;
4682
4683   /* Sometimes, it is possible to handle the situation that the number of
4684      iterations may be zero unless additional assumtions by using <
4685      instead of != in the exit condition.
4686
4687      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4688            base the exit condition on it.  However, that is often too
4689            expensive.  */
4690   if (!integer_zerop (desc->may_be_zero))
4691     return iv_elimination_compare_lt (data, cand, comp, desc);
4692
4693   return true;
4694 }
4695
4696  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4697     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4698
4699 static int
4700 parm_decl_cost (struct ivopts_data *data, tree bound)
4701 {
4702   tree sbound = bound;
4703   STRIP_NOPS (sbound);
4704
4705   if (TREE_CODE (sbound) == SSA_NAME
4706       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4707       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4708       && data->body_includes_call)
4709     return COSTS_N_INSNS (1);
4710
4711   return 0;
4712 }
4713
4714 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4715
4716 static bool
4717 determine_use_iv_cost_condition (struct ivopts_data *data,
4718                                  struct iv_use *use, struct iv_cand *cand)
4719 {
4720   tree bound = NULL_TREE;
4721   struct iv *cmp_iv;
4722   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4723   comp_cost elim_cost, express_cost, cost, bound_cost;
4724   bool ok;
4725   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4726   tree *control_var, *bound_cst;
4727   enum tree_code comp = ERROR_MARK;
4728
4729   /* Only consider real candidates.  */
4730   if (!cand->iv)
4731     {
4732       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4733                        ERROR_MARK, -1);
4734       return false;
4735     }
4736
4737   /* Try iv elimination.  */
4738   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4739     {
4740       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4741       if (elim_cost.cost == 0)
4742         elim_cost.cost = parm_decl_cost (data, bound);
4743       else if (TREE_CODE (bound) == INTEGER_CST)
4744         elim_cost.cost = 0;
4745       /* If we replace a loop condition 'i < n' with 'p < base + n',
4746          depends_on_elim will have 'base' and 'n' set, which implies
4747          that both 'base' and 'n' will be live during the loop.  More likely,
4748          'base + n' will be loop invariant, resulting in only one live value
4749          during the loop.  So in that case we clear depends_on_elim and set
4750         elim_inv_expr_id instead.  */
4751       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4752         {
4753           elim_inv_expr_id = get_expr_id (data, bound);
4754           bitmap_clear (depends_on_elim);
4755         }
4756       /* The bound is a loop invariant, so it will be only computed
4757          once.  */
4758       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4759     }
4760   else
4761     elim_cost = infinite_cost;
4762
4763   /* Try expressing the original giv.  If it is compared with an invariant,
4764      note that we cannot get rid of it.  */
4765   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4766                               NULL, &cmp_iv);
4767   gcc_assert (ok);
4768
4769   /* When the condition is a comparison of the candidate IV against
4770      zero, prefer this IV.
4771
4772      TODO: The constant that we're subtracting from the cost should
4773      be target-dependent.  This information should be added to the
4774      target costs for each backend.  */
4775   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4776       && integer_zerop (*bound_cst)
4777       && (operand_equal_p (*control_var, cand->var_after, 0)
4778           || operand_equal_p (*control_var, cand->var_before, 0)))
4779     elim_cost.cost -= 1;
4780
4781   express_cost = get_computation_cost (data, use, cand, false,
4782                                        &depends_on_express, NULL,
4783                                        &express_inv_expr_id);
4784   fd_ivopts_data = data;
4785   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4786
4787   /* Count the cost of the original bound as well.  */
4788   bound_cost = force_var_cost (data, *bound_cst, NULL);
4789   if (bound_cost.cost == 0)
4790     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4791   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4792     bound_cost.cost = 0;
4793   express_cost.cost += bound_cost.cost;
4794
4795   /* Choose the better approach, preferring the eliminated IV. */
4796   if (compare_costs (elim_cost, express_cost) <= 0)
4797     {
4798       cost = elim_cost;
4799       depends_on = depends_on_elim;
4800       depends_on_elim = NULL;
4801       inv_expr_id = elim_inv_expr_id;
4802     }
4803   else
4804     {
4805       cost = express_cost;
4806       depends_on = depends_on_express;
4807       depends_on_express = NULL;
4808       bound = NULL_TREE;
4809       comp = ERROR_MARK;
4810       inv_expr_id = express_inv_expr_id;
4811     }
4812
4813   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4814
4815   if (depends_on_elim)
4816     BITMAP_FREE (depends_on_elim);
4817   if (depends_on_express)
4818     BITMAP_FREE (depends_on_express);
4819
4820   return !infinite_cost_p (cost);
4821 }
4822
4823 /* Determines cost of basing replacement of USE on CAND.  Returns false
4824    if USE cannot be based on CAND.  */
4825
4826 static bool
4827 determine_use_iv_cost (struct ivopts_data *data,
4828                        struct iv_use *use, struct iv_cand *cand)
4829 {
4830   switch (use->type)
4831     {
4832     case USE_NONLINEAR_EXPR:
4833       return determine_use_iv_cost_generic (data, use, cand);
4834
4835     case USE_ADDRESS:
4836       return determine_use_iv_cost_address (data, use, cand);
4837
4838     case USE_COMPARE:
4839       return determine_use_iv_cost_condition (data, use, cand);
4840
4841     default:
4842       gcc_unreachable ();
4843     }
4844 }
4845
4846 /* Return true if get_computation_cost indicates that autoincrement is
4847    a possibility for the pair of USE and CAND, false otherwise.  */
4848
4849 static bool
4850 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4851                            struct iv_cand *cand)
4852 {
4853   bitmap depends_on;
4854   bool can_autoinc;
4855   comp_cost cost;
4856
4857   if (use->type != USE_ADDRESS)
4858     return false;
4859
4860   cost = get_computation_cost (data, use, cand, true, &depends_on,
4861                                &can_autoinc, NULL);
4862
4863   BITMAP_FREE (depends_on);
4864
4865   return !infinite_cost_p (cost) && can_autoinc;
4866 }
4867
4868 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4869    use that allows autoincrement, and set their AINC_USE if possible.  */
4870
4871 static void
4872 set_autoinc_for_original_candidates (struct ivopts_data *data)
4873 {
4874   unsigned i, j;
4875
4876   for (i = 0; i < n_iv_cands (data); i++)
4877     {
4878       struct iv_cand *cand = iv_cand (data, i);
4879       struct iv_use *closest_before = NULL;
4880       struct iv_use *closest_after = NULL;
4881       if (cand->pos != IP_ORIGINAL)
4882         continue;
4883
4884       for (j = 0; j < n_iv_uses (data); j++)
4885         {
4886           struct iv_use *use = iv_use (data, j);
4887           unsigned uid = gimple_uid (use->stmt);
4888
4889           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4890             continue;
4891
4892           if (uid < gimple_uid (cand->incremented_at)
4893               && (closest_before == NULL
4894                   || uid > gimple_uid (closest_before->stmt)))
4895             closest_before = use;
4896
4897           if (uid > gimple_uid (cand->incremented_at)
4898               && (closest_after == NULL
4899                   || uid < gimple_uid (closest_after->stmt)))
4900             closest_after = use;
4901         }
4902
4903       if (closest_before != NULL
4904           && autoinc_possible_for_pair (data, closest_before, cand))
4905         cand->ainc_use = closest_before;
4906       else if (closest_after != NULL
4907                && autoinc_possible_for_pair (data, closest_after, cand))
4908         cand->ainc_use = closest_after;
4909     }
4910 }
4911
4912 /* Finds the candidates for the induction variables.  */
4913
4914 static void
4915 find_iv_candidates (struct ivopts_data *data)
4916 {
4917   /* Add commonly used ivs.  */
4918   add_standard_iv_candidates (data);
4919
4920   /* Add old induction variables.  */
4921   add_old_ivs_candidates (data);
4922
4923   /* Add induction variables derived from uses.  */
4924   add_derived_ivs_candidates (data);
4925
4926   set_autoinc_for_original_candidates (data);
4927
4928   /* Record the important candidates.  */
4929   record_important_candidates (data);
4930 }
4931
4932 /* Determines costs of basing the use of the iv on an iv candidate.  */
4933
4934 static void
4935 determine_use_iv_costs (struct ivopts_data *data)
4936 {
4937   unsigned i, j;
4938   struct iv_use *use;
4939   struct iv_cand *cand;
4940   bitmap to_clear = BITMAP_ALLOC (NULL);
4941
4942   alloc_use_cost_map (data);
4943
4944   for (i = 0; i < n_iv_uses (data); i++)
4945     {
4946       use = iv_use (data, i);
4947
4948       if (data->consider_all_candidates)
4949         {
4950           for (j = 0; j < n_iv_cands (data); j++)
4951             {
4952               cand = iv_cand (data, j);
4953               determine_use_iv_cost (data, use, cand);
4954             }
4955         }
4956       else
4957         {
4958           bitmap_iterator bi;
4959
4960           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4961             {
4962               cand = iv_cand (data, j);
4963               if (!determine_use_iv_cost (data, use, cand))
4964                 bitmap_set_bit (to_clear, j);
4965             }
4966
4967           /* Remove the candidates for that the cost is infinite from
4968              the list of related candidates.  */
4969           bitmap_and_compl_into (use->related_cands, to_clear);
4970           bitmap_clear (to_clear);
4971         }
4972     }
4973
4974   BITMAP_FREE (to_clear);
4975
4976   if (dump_file && (dump_flags & TDF_DETAILS))
4977     {
4978       fprintf (dump_file, "Use-candidate costs:\n");
4979
4980       for (i = 0; i < n_iv_uses (data); i++)
4981         {
4982           use = iv_use (data, i);
4983
4984           fprintf (dump_file, "Use %d:\n", i);
4985           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4986           for (j = 0; j < use->n_map_members; j++)
4987             {
4988               if (!use->cost_map[j].cand
4989                   || infinite_cost_p (use->cost_map[j].cost))
4990                 continue;
4991
4992               fprintf (dump_file, "  %d\t%d\t%d\t",
4993                        use->cost_map[j].cand->id,
4994                        use->cost_map[j].cost.cost,
4995                        use->cost_map[j].cost.complexity);
4996               if (use->cost_map[j].depends_on)
4997                 bitmap_print (dump_file,
4998                               use->cost_map[j].depends_on, "","");
4999               if (use->cost_map[j].inv_expr_id != -1)
5000                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5001               fprintf (dump_file, "\n");
5002             }
5003
5004           fprintf (dump_file, "\n");
5005         }
5006       fprintf (dump_file, "\n");
5007     }
5008 }
5009
5010 /* Determines cost of the candidate CAND.  */
5011
5012 static void
5013 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5014 {
5015   comp_cost cost_base;
5016   unsigned cost, cost_step;
5017   tree base;
5018
5019   if (!cand->iv)
5020     {
5021       cand->cost = 0;
5022       return;
5023     }
5024
5025   /* There are two costs associated with the candidate -- its increment
5026      and its initialization.  The second is almost negligible for any loop
5027      that rolls enough, so we take it just very little into account.  */
5028
5029   base = cand->iv->base;
5030   cost_base = force_var_cost (data, base, NULL);
5031   /* It will be exceptional that the iv register happens to be initialized with
5032      the proper value at no cost.  In general, there will at least be a regcopy
5033      or a const set.  */
5034   if (cost_base.cost == 0)
5035     cost_base.cost = COSTS_N_INSNS (1);
5036   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5037
5038   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5039
5040   /* Prefer the original ivs unless we may gain something by replacing it.
5041      The reason is to make debugging simpler; so this is not relevant for
5042      artificial ivs created by other optimization passes.  */
5043   if (cand->pos != IP_ORIGINAL
5044       || !SSA_NAME_VAR (cand->var_before)
5045       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5046     cost++;
5047
5048   /* Prefer not to insert statements into latch unless there are some
5049      already (so that we do not create unnecessary jumps).  */
5050   if (cand->pos == IP_END
5051       && empty_block_p (ip_end_pos (data->current_loop)))
5052     cost++;
5053
5054   cand->cost = cost;
5055   cand->cost_step = cost_step;
5056 }
5057
5058 /* Determines costs of computation of the candidates.  */
5059
5060 static void
5061 determine_iv_costs (struct ivopts_data *data)
5062 {
5063   unsigned i;
5064
5065   if (dump_file && (dump_flags & TDF_DETAILS))
5066     {
5067       fprintf (dump_file, "Candidate costs:\n");
5068       fprintf (dump_file, "  cand\tcost\n");
5069     }
5070
5071   for (i = 0; i < n_iv_cands (data); i++)
5072     {
5073       struct iv_cand *cand = iv_cand (data, i);
5074
5075       determine_iv_cost (data, cand);
5076
5077       if (dump_file && (dump_flags & TDF_DETAILS))
5078         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5079     }
5080
5081   if (dump_file && (dump_flags & TDF_DETAILS))
5082     fprintf (dump_file, "\n");
5083 }
5084
5085 /* Calculates cost for having SIZE induction variables.  */
5086
5087 static unsigned
5088 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5089 {
5090   /* We add size to the cost, so that we prefer eliminating ivs
5091      if possible.  */
5092   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5093                                             data->body_includes_call);
5094 }
5095
5096 /* For each size of the induction variable set determine the penalty.  */
5097
5098 static void
5099 determine_set_costs (struct ivopts_data *data)
5100 {
5101   unsigned j, n;
5102   gimple phi;
5103   gimple_stmt_iterator psi;
5104   tree op;
5105   struct loop *loop = data->current_loop;
5106   bitmap_iterator bi;
5107
5108   if (dump_file && (dump_flags & TDF_DETAILS))
5109     {
5110       fprintf (dump_file, "Global costs:\n");
5111       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5112       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5113       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5114       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5115     }
5116
5117   n = 0;
5118   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5119     {
5120       phi = gsi_stmt (psi);
5121       op = PHI_RESULT (phi);
5122
5123       if (virtual_operand_p (op))
5124         continue;
5125
5126       if (get_iv (data, op))
5127         continue;
5128
5129       n++;
5130     }
5131
5132   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5133     {
5134       struct version_info *info = ver_info (data, j);
5135
5136       if (info->inv_id && info->has_nonlin_use)
5137         n++;
5138     }
5139
5140   data->regs_used = n;
5141   if (dump_file && (dump_flags & TDF_DETAILS))
5142     fprintf (dump_file, "  regs_used %d\n", n);
5143
5144   if (dump_file && (dump_flags & TDF_DETAILS))
5145     {
5146       fprintf (dump_file, "  cost for size:\n");
5147       fprintf (dump_file, "  ivs\tcost\n");
5148       for (j = 0; j <= 2 * target_avail_regs; j++)
5149         fprintf (dump_file, "  %d\t%d\n", j,
5150                  ivopts_global_cost_for_size (data, j));
5151       fprintf (dump_file, "\n");
5152     }
5153 }
5154
5155 /* Returns true if A is a cheaper cost pair than B.  */
5156
5157 static bool
5158 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5159 {
5160   int cmp;
5161
5162   if (!a)
5163     return false;
5164
5165   if (!b)
5166     return true;
5167
5168   cmp = compare_costs (a->cost, b->cost);
5169   if (cmp < 0)
5170     return true;
5171
5172   if (cmp > 0)
5173     return false;
5174
5175   /* In case the costs are the same, prefer the cheaper candidate.  */
5176   if (a->cand->cost < b->cand->cost)
5177     return true;
5178
5179   return false;
5180 }
5181
5182
5183 /* Returns candidate by that USE is expressed in IVS.  */
5184
5185 static struct cost_pair *
5186 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5187 {
5188   return ivs->cand_for_use[use->id];
5189 }
5190
5191 /* Computes the cost field of IVS structure.  */
5192
5193 static void
5194 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5195 {
5196   comp_cost cost = ivs->cand_use_cost;
5197
5198   cost.cost += ivs->cand_cost;
5199
5200   cost.cost += ivopts_global_cost_for_size (data,
5201                                             ivs->n_regs + ivs->num_used_inv_expr);
5202
5203   ivs->cost = cost;
5204 }
5205
5206 /* Remove invariants in set INVS to set IVS.  */
5207
5208 static void
5209 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5210 {
5211   bitmap_iterator bi;
5212   unsigned iid;
5213
5214   if (!invs)
5215     return;
5216
5217   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5218     {
5219       ivs->n_invariant_uses[iid]--;
5220       if (ivs->n_invariant_uses[iid] == 0)
5221         ivs->n_regs--;
5222     }
5223 }
5224
5225 /* Set USE not to be expressed by any candidate in IVS.  */
5226
5227 static void
5228 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5229                  struct iv_use *use)
5230 {
5231   unsigned uid = use->id, cid;
5232   struct cost_pair *cp;
5233
5234   cp = ivs->cand_for_use[uid];
5235   if (!cp)
5236     return;
5237   cid = cp->cand->id;
5238
5239   ivs->bad_uses++;
5240   ivs->cand_for_use[uid] = NULL;
5241   ivs->n_cand_uses[cid]--;
5242
5243   if (ivs->n_cand_uses[cid] == 0)
5244     {
5245       bitmap_clear_bit (ivs->cands, cid);
5246       /* Do not count the pseudocandidates.  */
5247       if (cp->cand->iv)
5248         ivs->n_regs--;
5249       ivs->n_cands--;
5250       ivs->cand_cost -= cp->cand->cost;
5251
5252       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5253     }
5254
5255   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5256
5257   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5258
5259   if (cp->inv_expr_id != -1)
5260     {
5261       ivs->used_inv_expr[cp->inv_expr_id]--;
5262       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5263         ivs->num_used_inv_expr--;
5264     }
5265   iv_ca_recount_cost (data, ivs);
5266 }
5267
5268 /* Add invariants in set INVS to set IVS.  */
5269
5270 static void
5271 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5272 {
5273   bitmap_iterator bi;
5274   unsigned iid;
5275
5276   if (!invs)
5277     return;
5278
5279   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5280     {
5281       ivs->n_invariant_uses[iid]++;
5282       if (ivs->n_invariant_uses[iid] == 1)
5283         ivs->n_regs++;
5284     }
5285 }
5286
5287 /* Set cost pair for USE in set IVS to CP.  */
5288
5289 static void
5290 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5291               struct iv_use *use, struct cost_pair *cp)
5292 {
5293   unsigned uid = use->id, cid;
5294
5295   if (ivs->cand_for_use[uid] == cp)
5296     return;
5297
5298   if (ivs->cand_for_use[uid])
5299     iv_ca_set_no_cp (data, ivs, use);
5300
5301   if (cp)
5302     {
5303       cid = cp->cand->id;
5304
5305       ivs->bad_uses--;
5306       ivs->cand_for_use[uid] = cp;
5307       ivs->n_cand_uses[cid]++;
5308       if (ivs->n_cand_uses[cid] == 1)
5309         {
5310           bitmap_set_bit (ivs->cands, cid);
5311           /* Do not count the pseudocandidates.  */
5312           if (cp->cand->iv)
5313             ivs->n_regs++;
5314           ivs->n_cands++;
5315           ivs->cand_cost += cp->cand->cost;
5316
5317           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5318         }
5319
5320       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5321       iv_ca_set_add_invariants (ivs, cp->depends_on);
5322
5323       if (cp->inv_expr_id != -1)
5324         {
5325           ivs->used_inv_expr[cp->inv_expr_id]++;
5326           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5327             ivs->num_used_inv_expr++;
5328         }
5329       iv_ca_recount_cost (data, ivs);
5330     }
5331 }
5332
5333 /* Extend set IVS by expressing USE by some of the candidates in it
5334    if possible. All important candidates will be considered
5335    if IMPORTANT_CANDIDATES is true.  */
5336
5337 static void
5338 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5339                struct iv_use *use, bool important_candidates)
5340 {
5341   struct cost_pair *best_cp = NULL, *cp;
5342   bitmap_iterator bi;
5343   bitmap cands;
5344   unsigned i;
5345
5346   gcc_assert (ivs->upto >= use->id);
5347
5348   if (ivs->upto == use->id)
5349     {
5350       ivs->upto++;
5351       ivs->bad_uses++;
5352     }
5353
5354   cands = (important_candidates ? data->important_candidates : ivs->cands);
5355   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5356     {
5357       struct iv_cand *cand = iv_cand (data, i);
5358
5359       cp = get_use_iv_cost (data, use, cand);
5360
5361       if (cheaper_cost_pair (cp, best_cp))
5362         best_cp = cp;
5363     }
5364
5365   iv_ca_set_cp (data, ivs, use, best_cp);
5366 }
5367
5368 /* Get cost for assignment IVS.  */
5369
5370 static comp_cost
5371 iv_ca_cost (struct iv_ca *ivs)
5372 {
5373   /* This was a conditional expression but it triggered a bug in
5374      Sun C 5.5.  */
5375   if (ivs->bad_uses)
5376     return infinite_cost;
5377   else
5378     return ivs->cost;
5379 }
5380
5381 /* Returns true if all dependences of CP are among invariants in IVS.  */
5382
5383 static bool
5384 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5385 {
5386   unsigned i;
5387   bitmap_iterator bi;
5388
5389   if (!cp->depends_on)
5390     return true;
5391
5392   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5393     {
5394       if (ivs->n_invariant_uses[i] == 0)
5395         return false;
5396     }
5397
5398   return true;
5399 }
5400
5401 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5402    it before NEXT_CHANGE.  */
5403
5404 static struct iv_ca_delta *
5405 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5406                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5407 {
5408   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5409
5410   change->use = use;
5411   change->old_cp = old_cp;
5412   change->new_cp = new_cp;
5413   change->next_change = next_change;
5414
5415   return change;
5416 }
5417
5418 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5419    are rewritten.  */
5420
5421 static struct iv_ca_delta *
5422 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5423 {
5424   struct iv_ca_delta *last;
5425
5426   if (!l2)
5427     return l1;
5428
5429   if (!l1)
5430     return l2;
5431
5432   for (last = l1; last->next_change; last = last->next_change)
5433     continue;
5434   last->next_change = l2;
5435
5436   return l1;
5437 }
5438
5439 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5440
5441 static struct iv_ca_delta *
5442 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5443 {
5444   struct iv_ca_delta *act, *next, *prev = NULL;
5445   struct cost_pair *tmp;
5446
5447   for (act = delta; act; act = next)
5448     {
5449       next = act->next_change;
5450       act->next_change = prev;
5451       prev = act;
5452
5453       tmp = act->old_cp;
5454       act->old_cp = act->new_cp;
5455       act->new_cp = tmp;
5456     }
5457
5458   return prev;
5459 }
5460
5461 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5462    reverted instead.  */
5463
5464 static void
5465 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5466                     struct iv_ca_delta *delta, bool forward)
5467 {
5468   struct cost_pair *from, *to;
5469   struct iv_ca_delta *act;
5470
5471   if (!forward)
5472     delta = iv_ca_delta_reverse (delta);
5473
5474   for (act = delta; act; act = act->next_change)
5475     {
5476       from = act->old_cp;
5477       to = act->new_cp;
5478       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5479       iv_ca_set_cp (data, ivs, act->use, to);
5480     }
5481
5482   if (!forward)
5483     iv_ca_delta_reverse (delta);
5484 }
5485
5486 /* Returns true if CAND is used in IVS.  */
5487
5488 static bool
5489 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5490 {
5491   return ivs->n_cand_uses[cand->id] > 0;
5492 }
5493
5494 /* Returns number of induction variable candidates in the set IVS.  */
5495
5496 static unsigned
5497 iv_ca_n_cands (struct iv_ca *ivs)
5498 {
5499   return ivs->n_cands;
5500 }
5501
5502 /* Free the list of changes DELTA.  */
5503
5504 static void
5505 iv_ca_delta_free (struct iv_ca_delta **delta)
5506 {
5507   struct iv_ca_delta *act, *next;
5508
5509   for (act = *delta; act; act = next)
5510     {
5511       next = act->next_change;
5512       free (act);
5513     }
5514
5515   *delta = NULL;
5516 }
5517
5518 /* Allocates new iv candidates assignment.  */
5519
5520 static struct iv_ca *
5521 iv_ca_new (struct ivopts_data *data)
5522 {
5523   struct iv_ca *nw = XNEW (struct iv_ca);
5524
5525   nw->upto = 0;
5526   nw->bad_uses = 0;
5527   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5528   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5529   nw->cands = BITMAP_ALLOC (NULL);
5530   nw->n_cands = 0;
5531   nw->n_regs = 0;
5532   nw->cand_use_cost = no_cost;
5533   nw->cand_cost = 0;
5534   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5535   nw->cost = no_cost;
5536   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5537   nw->num_used_inv_expr = 0;
5538
5539   return nw;
5540 }
5541
5542 /* Free memory occupied by the set IVS.  */
5543
5544 static void
5545 iv_ca_free (struct iv_ca **ivs)
5546 {
5547   free ((*ivs)->cand_for_use);
5548   free ((*ivs)->n_cand_uses);
5549   BITMAP_FREE ((*ivs)->cands);
5550   free ((*ivs)->n_invariant_uses);
5551   free ((*ivs)->used_inv_expr);
5552   free (*ivs);
5553   *ivs = NULL;
5554 }
5555
5556 /* Dumps IVS to FILE.  */
5557
5558 static void
5559 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5560 {
5561   const char *pref = "  invariants ";
5562   unsigned i;
5563   comp_cost cost = iv_ca_cost (ivs);
5564
5565   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5566   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5567            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5568   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5569
5570    for (i = 0; i < ivs->upto; i++)
5571     {
5572       struct iv_use *use = iv_use (data, i);
5573       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5574       if (cp)
5575         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5576                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5577       else
5578         fprintf (file, "   use:%d --> ??\n", use->id);
5579     }
5580
5581   for (i = 1; i <= data->max_inv_id; i++)
5582     if (ivs->n_invariant_uses[i])
5583       {
5584         fprintf (file, "%s%d", pref, i);
5585         pref = ", ";
5586       }
5587   fprintf (file, "\n\n");
5588 }
5589
5590 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5591    new set, and store differences in DELTA.  Number of induction variables
5592    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5593    the function will try to find a solution with mimimal iv candidates.  */
5594
5595 static comp_cost
5596 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5597               struct iv_cand *cand, struct iv_ca_delta **delta,
5598               unsigned *n_ivs, bool min_ncand)
5599 {
5600   unsigned i;
5601   comp_cost cost;
5602   struct iv_use *use;
5603   struct cost_pair *old_cp, *new_cp;
5604
5605   *delta = NULL;
5606   for (i = 0; i < ivs->upto; i++)
5607     {
5608       use = iv_use (data, i);
5609       old_cp = iv_ca_cand_for_use (ivs, use);
5610
5611       if (old_cp
5612           && old_cp->cand == cand)
5613         continue;
5614
5615       new_cp = get_use_iv_cost (data, use, cand);
5616       if (!new_cp)
5617         continue;
5618
5619       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5620         continue;
5621
5622       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5623         continue;
5624
5625       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5626     }
5627
5628   iv_ca_delta_commit (data, ivs, *delta, true);
5629   cost = iv_ca_cost (ivs);
5630   if (n_ivs)
5631     *n_ivs = iv_ca_n_cands (ivs);
5632   iv_ca_delta_commit (data, ivs, *delta, false);
5633
5634   return cost;
5635 }
5636
5637 /* Try narrowing set IVS by removing CAND.  Return the cost of
5638    the new set and store the differences in DELTA.  */
5639
5640 static comp_cost
5641 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5642               struct iv_cand *cand, struct iv_ca_delta **delta)
5643 {
5644   unsigned i, ci;
5645   struct iv_use *use;
5646   struct cost_pair *old_cp, *new_cp, *cp;
5647   bitmap_iterator bi;
5648   struct iv_cand *cnd;
5649   comp_cost cost;
5650
5651   *delta = NULL;
5652   for (i = 0; i < n_iv_uses (data); i++)
5653     {
5654       use = iv_use (data, i);
5655
5656       old_cp = iv_ca_cand_for_use (ivs, use);
5657       if (old_cp->cand != cand)
5658         continue;
5659
5660       new_cp = NULL;
5661
5662       if (data->consider_all_candidates)
5663         {
5664           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5665             {
5666               if (ci == cand->id)
5667                 continue;
5668
5669               cnd = iv_cand (data, ci);
5670
5671               cp = get_use_iv_cost (data, use, cnd);
5672               if (!cp)
5673                 continue;
5674
5675               if (!iv_ca_has_deps (ivs, cp))
5676                 continue;
5677
5678               if (!cheaper_cost_pair (cp, new_cp))
5679                 continue;
5680
5681               new_cp = cp;
5682             }
5683         }
5684       else
5685         {
5686           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5687             {
5688               if (ci == cand->id)
5689                 continue;
5690
5691               cnd = iv_cand (data, ci);
5692
5693               cp = get_use_iv_cost (data, use, cnd);
5694               if (!cp)
5695                 continue;
5696               if (!iv_ca_has_deps (ivs, cp))
5697                 continue;
5698
5699               if (!cheaper_cost_pair (cp, new_cp))
5700                 continue;
5701
5702               new_cp = cp;
5703             }
5704         }
5705
5706       if (!new_cp)
5707         {
5708           iv_ca_delta_free (delta);
5709           return infinite_cost;
5710         }
5711
5712       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5713     }
5714
5715   iv_ca_delta_commit (data, ivs, *delta, true);
5716   cost = iv_ca_cost (ivs);
5717   iv_ca_delta_commit (data, ivs, *delta, false);
5718
5719   return cost;
5720 }
5721
5722 /* Try optimizing the set of candidates IVS by removing candidates different
5723    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5724    differences in DELTA.  */
5725
5726 static comp_cost
5727 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5728              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5729 {
5730   bitmap_iterator bi;
5731   struct iv_ca_delta *act_delta, *best_delta;
5732   unsigned i;
5733   comp_cost best_cost, acost;
5734   struct iv_cand *cand;
5735
5736   best_delta = NULL;
5737   best_cost = iv_ca_cost (ivs);
5738
5739   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5740     {
5741       cand = iv_cand (data, i);
5742
5743       if (cand == except_cand)
5744         continue;
5745
5746       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5747
5748       if (compare_costs (acost, best_cost) < 0)
5749         {
5750           best_cost = acost;
5751           iv_ca_delta_free (&best_delta);
5752           best_delta = act_delta;
5753         }
5754       else
5755         iv_ca_delta_free (&act_delta);
5756     }
5757
5758   if (!best_delta)
5759     {
5760       *delta = NULL;
5761       return best_cost;
5762     }
5763
5764   /* Recurse to possibly remove other unnecessary ivs.  */
5765   iv_ca_delta_commit (data, ivs, best_delta, true);
5766   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5767   iv_ca_delta_commit (data, ivs, best_delta, false);
5768   *delta = iv_ca_delta_join (best_delta, *delta);
5769   return best_cost;
5770 }
5771
5772 /* Tries to extend the sets IVS in the best possible way in order
5773    to express the USE.  If ORIGINALP is true, prefer candidates from
5774    the original set of IVs, otherwise favor important candidates not
5775    based on any memory object.  */
5776
5777 static bool
5778 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5779                   struct iv_use *use, bool originalp)
5780 {
5781   comp_cost best_cost, act_cost;
5782   unsigned i;
5783   bitmap_iterator bi;
5784   struct iv_cand *cand;
5785   struct iv_ca_delta *best_delta = NULL, *act_delta;
5786   struct cost_pair *cp;
5787
5788   iv_ca_add_use (data, ivs, use, false);
5789   best_cost = iv_ca_cost (ivs);
5790
5791   cp = iv_ca_cand_for_use (ivs, use);
5792   if (!cp)
5793     {
5794       ivs->upto--;
5795       ivs->bad_uses--;
5796       iv_ca_add_use (data, ivs, use, true);
5797       best_cost = iv_ca_cost (ivs);
5798       cp = iv_ca_cand_for_use (ivs, use);
5799     }
5800   if (cp)
5801     {
5802       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5803       iv_ca_set_no_cp (data, ivs, use);
5804     }
5805
5806   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5807      first try important candidates not based on any memory object.  Only if
5808      this fails, try the specific ones.  Rationale -- in loops with many
5809      variables the best choice often is to use just one generic biv.  If we
5810      added here many ivs specific to the uses, the optimization algorithm later
5811      would be likely to get stuck in a local minimum, thus causing us to create
5812      too many ivs.  The approach from few ivs to more seems more likely to be
5813      successful -- starting from few ivs, replacing an expensive use by a
5814      specific iv should always be a win.  */
5815   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5816     {
5817       cand = iv_cand (data, i);
5818
5819       if (originalp && cand->pos !=IP_ORIGINAL)
5820         continue;
5821
5822       if (!originalp && cand->iv->base_object != NULL_TREE)
5823         continue;
5824
5825       if (iv_ca_cand_used_p (ivs, cand))
5826         continue;
5827
5828       cp = get_use_iv_cost (data, use, cand);
5829       if (!cp)
5830         continue;
5831
5832       iv_ca_set_cp (data, ivs, use, cp);
5833       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5834                                true);
5835       iv_ca_set_no_cp (data, ivs, use);
5836       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5837
5838       if (compare_costs (act_cost, best_cost) < 0)
5839         {
5840           best_cost = act_cost;
5841
5842           iv_ca_delta_free (&best_delta);
5843           best_delta = act_delta;
5844         }
5845       else
5846         iv_ca_delta_free (&act_delta);
5847     }
5848
5849   if (infinite_cost_p (best_cost))
5850     {
5851       for (i = 0; i < use->n_map_members; i++)
5852         {
5853           cp = use->cost_map + i;
5854           cand = cp->cand;
5855           if (!cand)
5856             continue;
5857
5858           /* Already tried this.  */
5859           if (cand->important)
5860             {
5861               if (originalp && cand->pos == IP_ORIGINAL)
5862                 continue;
5863               if (!originalp && cand->iv->base_object == NULL_TREE)
5864                 continue;
5865             }
5866
5867           if (iv_ca_cand_used_p (ivs, cand))
5868             continue;
5869
5870           act_delta = NULL;
5871           iv_ca_set_cp (data, ivs, use, cp);
5872           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5873           iv_ca_set_no_cp (data, ivs, use);
5874           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5875                                        cp, act_delta);
5876
5877           if (compare_costs (act_cost, best_cost) < 0)
5878             {
5879               best_cost = act_cost;
5880
5881               if (best_delta)
5882                 iv_ca_delta_free (&best_delta);
5883               best_delta = act_delta;
5884             }
5885           else
5886             iv_ca_delta_free (&act_delta);
5887         }
5888     }
5889
5890   iv_ca_delta_commit (data, ivs, best_delta, true);
5891   iv_ca_delta_free (&best_delta);
5892
5893   return !infinite_cost_p (best_cost);
5894 }
5895
5896 /* Finds an initial assignment of candidates to uses.  */
5897
5898 static struct iv_ca *
5899 get_initial_solution (struct ivopts_data *data, bool originalp)
5900 {
5901   struct iv_ca *ivs = iv_ca_new (data);
5902   unsigned i;
5903
5904   for (i = 0; i < n_iv_uses (data); i++)
5905     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5906       {
5907         iv_ca_free (&ivs);
5908         return NULL;
5909       }
5910
5911   return ivs;
5912 }
5913
5914 /* Tries to improve set of induction variables IVS.  */
5915
5916 static bool
5917 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5918 {
5919   unsigned i, n_ivs;
5920   comp_cost acost, best_cost = iv_ca_cost (ivs);
5921   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5922   struct iv_cand *cand;
5923
5924   /* Try extending the set of induction variables by one.  */
5925   for (i = 0; i < n_iv_cands (data); i++)
5926     {
5927       cand = iv_cand (data, i);
5928
5929       if (iv_ca_cand_used_p (ivs, cand))
5930         continue;
5931
5932       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5933       if (!act_delta)
5934         continue;
5935
5936       /* If we successfully added the candidate and the set is small enough,
5937          try optimizing it by removing other candidates.  */
5938       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5939         {
5940           iv_ca_delta_commit (data, ivs, act_delta, true);
5941           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5942           iv_ca_delta_commit (data, ivs, act_delta, false);
5943           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5944         }
5945
5946       if (compare_costs (acost, best_cost) < 0)
5947         {
5948           best_cost = acost;
5949           iv_ca_delta_free (&best_delta);
5950           best_delta = act_delta;
5951         }
5952       else
5953         iv_ca_delta_free (&act_delta);
5954     }
5955
5956   if (!best_delta)
5957     {
5958       /* Try removing the candidates from the set instead.  */
5959       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5960
5961       /* Nothing more we can do.  */
5962       if (!best_delta)
5963         return false;
5964     }
5965
5966   iv_ca_delta_commit (data, ivs, best_delta, true);
5967   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5968   iv_ca_delta_free (&best_delta);
5969   return true;
5970 }
5971
5972 /* Attempts to find the optimal set of induction variables.  We do simple
5973    greedy heuristic -- we try to replace at most one candidate in the selected
5974    solution and remove the unused ivs while this improves the cost.  */
5975
5976 static struct iv_ca *
5977 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5978 {
5979   struct iv_ca *set;
5980
5981   /* Get the initial solution.  */
5982   set = get_initial_solution (data, originalp);
5983   if (!set)
5984     {
5985       if (dump_file && (dump_flags & TDF_DETAILS))
5986         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5987       return NULL;
5988     }
5989
5990   if (dump_file && (dump_flags & TDF_DETAILS))
5991     {
5992       fprintf (dump_file, "Initial set of candidates:\n");
5993       iv_ca_dump (data, dump_file, set);
5994     }
5995
5996   while (try_improve_iv_set (data, set))
5997     {
5998       if (dump_file && (dump_flags & TDF_DETAILS))
5999         {
6000           fprintf (dump_file, "Improved to:\n");
6001           iv_ca_dump (data, dump_file, set);
6002         }
6003     }
6004
6005   return set;
6006 }
6007
6008 static struct iv_ca *
6009 find_optimal_iv_set (struct ivopts_data *data)
6010 {
6011   unsigned i;
6012   struct iv_ca *set, *origset;
6013   struct iv_use *use;
6014   comp_cost cost, origcost;
6015
6016   /* Determine the cost based on a strategy that starts with original IVs,
6017      and try again using a strategy that prefers candidates not based
6018      on any IVs.  */
6019   origset = find_optimal_iv_set_1 (data, true);
6020   set = find_optimal_iv_set_1 (data, false);
6021
6022   if (!origset && !set)
6023     return NULL;
6024
6025   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6026   cost = set ? iv_ca_cost (set) : infinite_cost;
6027
6028   if (dump_file && (dump_flags & TDF_DETAILS))
6029     {
6030       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6031                origcost.cost, origcost.complexity);
6032       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6033                cost.cost, cost.complexity);
6034     }
6035
6036   /* Choose the one with the best cost.  */
6037   if (compare_costs (origcost, cost) <= 0)
6038     {
6039       if (set)
6040         iv_ca_free (&set);
6041       set = origset;
6042     }
6043   else if (origset)
6044     iv_ca_free (&origset);
6045
6046   for (i = 0; i < n_iv_uses (data); i++)
6047     {
6048       use = iv_use (data, i);
6049       use->selected = iv_ca_cand_for_use (set, use)->cand;
6050     }
6051
6052   return set;
6053 }
6054
6055 /* Creates a new induction variable corresponding to CAND.  */
6056
6057 static void
6058 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6059 {
6060   gimple_stmt_iterator incr_pos;
6061   tree base;
6062   bool after = false;
6063
6064   if (!cand->iv)
6065     return;
6066
6067   switch (cand->pos)
6068     {
6069     case IP_NORMAL:
6070       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6071       break;
6072
6073     case IP_END:
6074       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6075       after = true;
6076       break;
6077
6078     case IP_AFTER_USE:
6079       after = true;
6080       /* fall through */
6081     case IP_BEFORE_USE:
6082       incr_pos = gsi_for_stmt (cand->incremented_at);
6083       break;
6084
6085     case IP_ORIGINAL:
6086       /* Mark that the iv is preserved.  */
6087       name_info (data, cand->var_before)->preserve_biv = true;
6088       name_info (data, cand->var_after)->preserve_biv = true;
6089
6090       /* Rewrite the increment so that it uses var_before directly.  */
6091       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6092       return;
6093     }
6094
6095   gimple_add_tmp_var (cand->var_before);
6096
6097   base = unshare_expr (cand->iv->base);
6098
6099   create_iv (base, unshare_expr (cand->iv->step),
6100              cand->var_before, data->current_loop,
6101              &incr_pos, after, &cand->var_before, &cand->var_after);
6102 }
6103
6104 /* Creates new induction variables described in SET.  */
6105
6106 static void
6107 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6108 {
6109   unsigned i;
6110   struct iv_cand *cand;
6111   bitmap_iterator bi;
6112
6113   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6114     {
6115       cand = iv_cand (data, i);
6116       create_new_iv (data, cand);
6117     }
6118
6119   if (dump_file && (dump_flags & TDF_DETAILS))
6120     {
6121       fprintf (dump_file, "\nSelected IV set: \n");
6122       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6123         {
6124           cand = iv_cand (data, i);
6125           dump_cand (dump_file, cand);
6126         }
6127       fprintf (dump_file, "\n");
6128     }
6129 }
6130
6131 /* Rewrites USE (definition of iv used in a nonlinear expression)
6132    using candidate CAND.  */
6133
6134 static void
6135 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6136                             struct iv_use *use, struct iv_cand *cand)
6137 {
6138   tree comp;
6139   tree op, tgt;
6140   gimple ass;
6141   gimple_stmt_iterator bsi;
6142
6143   /* An important special case -- if we are asked to express value of
6144      the original iv by itself, just exit; there is no need to
6145      introduce a new computation (that might also need casting the
6146      variable to unsigned and back).  */
6147   if (cand->pos == IP_ORIGINAL
6148       && cand->incremented_at == use->stmt)
6149     {
6150       enum tree_code stmt_code;
6151
6152       gcc_assert (is_gimple_assign (use->stmt));
6153       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6154
6155       /* Check whether we may leave the computation unchanged.
6156          This is the case only if it does not rely on other
6157          computations in the loop -- otherwise, the computation
6158          we rely upon may be removed in remove_unused_ivs,
6159          thus leading to ICE.  */
6160       stmt_code = gimple_assign_rhs_code (use->stmt);
6161       if (stmt_code == PLUS_EXPR
6162           || stmt_code == MINUS_EXPR
6163           || stmt_code == POINTER_PLUS_EXPR)
6164         {
6165           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6166             op = gimple_assign_rhs2 (use->stmt);
6167           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6168             op = gimple_assign_rhs1 (use->stmt);
6169           else
6170             op = NULL_TREE;
6171         }
6172       else
6173         op = NULL_TREE;
6174
6175       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6176         return;
6177     }
6178
6179   comp = get_computation (data->current_loop, use, cand);
6180   gcc_assert (comp != NULL_TREE);
6181
6182   switch (gimple_code (use->stmt))
6183     {
6184     case GIMPLE_PHI:
6185       tgt = PHI_RESULT (use->stmt);
6186
6187       /* If we should keep the biv, do not replace it.  */
6188       if (name_info (data, tgt)->preserve_biv)
6189         return;
6190
6191       bsi = gsi_after_labels (gimple_bb (use->stmt));
6192       break;
6193
6194     case GIMPLE_ASSIGN:
6195       tgt = gimple_assign_lhs (use->stmt);
6196       bsi = gsi_for_stmt (use->stmt);
6197       break;
6198
6199     default:
6200       gcc_unreachable ();
6201     }
6202
6203   if (!valid_gimple_rhs_p (comp)
6204       || (gimple_code (use->stmt) != GIMPLE_PHI
6205           /* We can't allow re-allocating the stmt as it might be pointed
6206              to still.  */
6207           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6208               >= gimple_num_ops (gsi_stmt (bsi)))))
6209     {
6210       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6211                                        true, GSI_SAME_STMT);
6212       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6213         {
6214           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6215           /* As this isn't a plain copy we have to reset alignment
6216              information.  */
6217           if (SSA_NAME_PTR_INFO (comp))
6218             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6219         }
6220     }
6221
6222   if (gimple_code (use->stmt) == GIMPLE_PHI)
6223     {
6224       ass = gimple_build_assign (tgt, comp);
6225       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6226
6227       bsi = gsi_for_stmt (use->stmt);
6228       remove_phi_node (&bsi, false);
6229     }
6230   else
6231     {
6232       gimple_assign_set_rhs_from_tree (&bsi, comp);
6233       use->stmt = gsi_stmt (bsi);
6234     }
6235 }
6236
6237 /* Performs a peephole optimization to reorder the iv update statement with
6238    a mem ref to enable instruction combining in later phases. The mem ref uses
6239    the iv value before the update, so the reordering transformation requires
6240    adjustment of the offset. CAND is the selected IV_CAND.
6241
6242    Example:
6243
6244    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6245    iv2 = iv1 + 1;
6246
6247    if (t < val)      (1)
6248      goto L;
6249    goto Head;
6250
6251
6252    directly propagating t over to (1) will introduce overlapping live range
6253    thus increase register pressure. This peephole transform it into:
6254
6255
6256    iv2 = iv1 + 1;
6257    t = MEM_REF (base, iv2, 8, 8);
6258    if (t < val)
6259      goto L;
6260    goto Head;
6261 */
6262
6263 static void
6264 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6265 {
6266   tree var_after;
6267   gimple iv_update, stmt;
6268   basic_block bb;
6269   gimple_stmt_iterator gsi, gsi_iv;
6270
6271   if (cand->pos != IP_NORMAL)
6272     return;
6273
6274   var_after = cand->var_after;
6275   iv_update = SSA_NAME_DEF_STMT (var_after);
6276
6277   bb = gimple_bb (iv_update);
6278   gsi = gsi_last_nondebug_bb (bb);
6279   stmt = gsi_stmt (gsi);
6280
6281   /* Only handle conditional statement for now.  */
6282   if (gimple_code (stmt) != GIMPLE_COND)
6283     return;
6284
6285   gsi_prev_nondebug (&gsi);
6286   stmt = gsi_stmt (gsi);
6287   if (stmt != iv_update)
6288     return;
6289
6290   gsi_prev_nondebug (&gsi);
6291   if (gsi_end_p (gsi))
6292     return;
6293
6294   stmt = gsi_stmt (gsi);
6295   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6296     return;
6297
6298   if (stmt != use->stmt)
6299     return;
6300
6301   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6302     return;
6303
6304   if (dump_file && (dump_flags & TDF_DETAILS))
6305     {
6306       fprintf (dump_file, "Reordering \n");
6307       print_gimple_stmt (dump_file, iv_update, 0, 0);
6308       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6309       fprintf (dump_file, "\n");
6310     }
6311
6312   gsi = gsi_for_stmt (use->stmt);
6313   gsi_iv = gsi_for_stmt (iv_update);
6314   gsi_move_before (&gsi_iv, &gsi);
6315
6316   cand->pos = IP_BEFORE_USE;
6317   cand->incremented_at = use->stmt;
6318 }
6319
6320 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6321
6322 static void
6323 rewrite_use_address (struct ivopts_data *data,
6324                      struct iv_use *use, struct iv_cand *cand)
6325 {
6326   aff_tree aff;
6327   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6328   tree base_hint = NULL_TREE;
6329   tree ref, iv;
6330   bool ok;
6331
6332   adjust_iv_update_pos (cand, use);
6333   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6334   gcc_assert (ok);
6335   unshare_aff_combination (&aff);
6336
6337   /* To avoid undefined overflow problems, all IV candidates use unsigned
6338      integer types.  The drawback is that this makes it impossible for
6339      create_mem_ref to distinguish an IV that is based on a memory object
6340      from one that represents simply an offset.
6341
6342      To work around this problem, we pass a hint to create_mem_ref that
6343      indicates which variable (if any) in aff is an IV based on a memory
6344      object.  Note that we only consider the candidate.  If this is not
6345      based on an object, the base of the reference is in some subexpression
6346      of the use -- but these will use pointer types, so they are recognized
6347      by the create_mem_ref heuristics anyway.  */
6348   if (cand->iv->base_object)
6349     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6350
6351   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6352   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6353                         reference_alias_ptr_type (*use->op_p),
6354                         iv, base_hint, data->speed);
6355   copy_ref_info (ref, *use->op_p);
6356   *use->op_p = ref;
6357 }
6358
6359 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6360    candidate CAND.  */
6361
6362 static void
6363 rewrite_use_compare (struct ivopts_data *data,
6364                      struct iv_use *use, struct iv_cand *cand)
6365 {
6366   tree comp, *var_p, op, bound;
6367   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6368   enum tree_code compare;
6369   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6370   bool ok;
6371
6372   bound = cp->value;
6373   if (bound)
6374     {
6375       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6376       tree var_type = TREE_TYPE (var);
6377       gimple_seq stmts;
6378
6379       if (dump_file && (dump_flags & TDF_DETAILS))
6380         {
6381           fprintf (dump_file, "Replacing exit test: ");
6382           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6383         }
6384       compare = cp->comp;
6385       bound = unshare_expr (fold_convert (var_type, bound));
6386       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6387       if (stmts)
6388         gsi_insert_seq_on_edge_immediate (
6389                 loop_preheader_edge (data->current_loop),
6390                 stmts);
6391
6392       gimple_cond_set_lhs (use->stmt, var);
6393       gimple_cond_set_code (use->stmt, compare);
6394       gimple_cond_set_rhs (use->stmt, op);
6395       return;
6396     }
6397
6398   /* The induction variable elimination failed; just express the original
6399      giv.  */
6400   comp = get_computation (data->current_loop, use, cand);
6401   gcc_assert (comp != NULL_TREE);
6402
6403   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6404   gcc_assert (ok);
6405
6406   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6407                                      true, GSI_SAME_STMT);
6408 }
6409
6410 /* Rewrites USE using candidate CAND.  */
6411
6412 static void
6413 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6414 {
6415   switch (use->type)
6416     {
6417       case USE_NONLINEAR_EXPR:
6418         rewrite_use_nonlinear_expr (data, use, cand);
6419         break;
6420
6421       case USE_ADDRESS:
6422         rewrite_use_address (data, use, cand);
6423         break;
6424
6425       case USE_COMPARE:
6426         rewrite_use_compare (data, use, cand);
6427         break;
6428
6429       default:
6430         gcc_unreachable ();
6431     }
6432
6433   update_stmt (use->stmt);
6434 }
6435
6436 /* Rewrite the uses using the selected induction variables.  */
6437
6438 static void
6439 rewrite_uses (struct ivopts_data *data)
6440 {
6441   unsigned i;
6442   struct iv_cand *cand;
6443   struct iv_use *use;
6444
6445   for (i = 0; i < n_iv_uses (data); i++)
6446     {
6447       use = iv_use (data, i);
6448       cand = use->selected;
6449       gcc_assert (cand);
6450
6451       rewrite_use (data, use, cand);
6452     }
6453 }
6454
6455 /* Removes the ivs that are not used after rewriting.  */
6456
6457 static void
6458 remove_unused_ivs (struct ivopts_data *data)
6459 {
6460   unsigned j;
6461   bitmap_iterator bi;
6462   bitmap toremove = BITMAP_ALLOC (NULL);
6463
6464   /* Figure out an order in which to release SSA DEFs so that we don't
6465      release something that we'd have to propagate into a debug stmt
6466      afterwards.  */
6467   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6468     {
6469       struct version_info *info;
6470
6471       info = ver_info (data, j);
6472       if (info->iv
6473           && !integer_zerop (info->iv->step)
6474           && !info->inv_id
6475           && !info->iv->have_use_for
6476           && !info->preserve_biv)
6477         {
6478           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6479
6480           tree def = info->iv->ssa_name;
6481
6482           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6483             {
6484               imm_use_iterator imm_iter;
6485               use_operand_p use_p;
6486               gimple stmt;
6487               int count = 0;
6488
6489               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6490                 {
6491                   if (!gimple_debug_bind_p (stmt))
6492                     continue;
6493
6494                   /* We just want to determine whether to do nothing
6495                      (count == 0), to substitute the computed
6496                      expression into a single use of the SSA DEF by
6497                      itself (count == 1), or to use a debug temp
6498                      because the SSA DEF is used multiple times or as
6499                      part of a larger expression (count > 1). */
6500                   count++;
6501                   if (gimple_debug_bind_get_value (stmt) != def)
6502                     count++;
6503
6504                   if (count > 1)
6505                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6506                 }
6507
6508               if (!count)
6509                 continue;
6510
6511               struct iv_use dummy_use;
6512               struct iv_cand *best_cand = NULL, *cand;
6513               unsigned i, best_pref = 0, cand_pref;
6514
6515               memset (&dummy_use, 0, sizeof (dummy_use));
6516               dummy_use.iv = info->iv;
6517               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6518                 {
6519                   cand = iv_use (data, i)->selected;
6520                   if (cand == best_cand)
6521                     continue;
6522                   cand_pref = operand_equal_p (cand->iv->step,
6523                                                info->iv->step, 0)
6524                     ? 4 : 0;
6525                   cand_pref
6526                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6527                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6528                     ? 2 : 0;
6529                   cand_pref
6530                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6531                     ? 1 : 0;
6532                   if (best_cand == NULL || best_pref < cand_pref)
6533                     {
6534                       best_cand = cand;
6535                       best_pref = cand_pref;
6536                     }
6537                 }
6538
6539               if (!best_cand)
6540                 continue;
6541
6542               tree comp = get_computation_at (data->current_loop,
6543                                               &dummy_use, best_cand,
6544                                               SSA_NAME_DEF_STMT (def));
6545               if (!comp)
6546                 continue;
6547
6548               if (count > 1)
6549                 {
6550                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6551                   DECL_ARTIFICIAL (vexpr) = 1;
6552                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6553                   if (SSA_NAME_VAR (def))
6554                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6555                   else
6556                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6557                   gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6558                   gimple_stmt_iterator gsi;
6559
6560                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6561                     gsi = gsi_after_labels (gimple_bb
6562                                             (SSA_NAME_DEF_STMT (def)));
6563                   else
6564                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6565
6566                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6567                   comp = vexpr;
6568                 }
6569
6570               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6571                 {
6572                   if (!gimple_debug_bind_p (stmt))
6573                     continue;
6574
6575                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6576                     SET_USE (use_p, comp);
6577
6578                   update_stmt (stmt);
6579                 }
6580             }
6581         }
6582     }
6583
6584   release_defs_bitset (toremove);
6585
6586   BITMAP_FREE (toremove);
6587 }
6588
6589 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6590    for pointer_map_traverse.  */
6591
6592 static bool
6593 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6594                       void *data ATTRIBUTE_UNUSED)
6595 {
6596   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6597
6598   free (niter);
6599   return true;
6600 }
6601
6602 /* Frees data allocated by the optimization of a single loop.  */
6603
6604 static void
6605 free_loop_data (struct ivopts_data *data)
6606 {
6607   unsigned i, j;
6608   bitmap_iterator bi;
6609   tree obj;
6610
6611   if (data->niters)
6612     {
6613       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6614       pointer_map_destroy (data->niters);
6615       data->niters = NULL;
6616     }
6617
6618   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6619     {
6620       struct version_info *info;
6621
6622       info = ver_info (data, i);
6623       free (info->iv);
6624       info->iv = NULL;
6625       info->has_nonlin_use = false;
6626       info->preserve_biv = false;
6627       info->inv_id = 0;
6628     }
6629   bitmap_clear (data->relevant);
6630   bitmap_clear (data->important_candidates);
6631
6632   for (i = 0; i < n_iv_uses (data); i++)
6633     {
6634       struct iv_use *use = iv_use (data, i);
6635
6636       free (use->iv);
6637       BITMAP_FREE (use->related_cands);
6638       for (j = 0; j < use->n_map_members; j++)
6639         if (use->cost_map[j].depends_on)
6640           BITMAP_FREE (use->cost_map[j].depends_on);
6641       free (use->cost_map);
6642       free (use);
6643     }
6644   data->iv_uses.truncate (0);
6645
6646   for (i = 0; i < n_iv_cands (data); i++)
6647     {
6648       struct iv_cand *cand = iv_cand (data, i);
6649
6650       free (cand->iv);
6651       if (cand->depends_on)
6652         BITMAP_FREE (cand->depends_on);
6653       free (cand);
6654     }
6655   data->iv_candidates.truncate (0);
6656
6657   if (data->version_info_size < num_ssa_names)
6658     {
6659       data->version_info_size = 2 * num_ssa_names;
6660       free (data->version_info);
6661       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6662     }
6663
6664   data->max_inv_id = 0;
6665
6666   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6667     SET_DECL_RTL (obj, NULL_RTX);
6668
6669   decl_rtl_to_reset.truncate (0);
6670
6671   data->inv_expr_tab.empty ();
6672   data->inv_expr_id = 0;
6673 }
6674
6675 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6676    loop tree.  */
6677
6678 static void
6679 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6680 {
6681   free_loop_data (data);
6682   free (data->version_info);
6683   BITMAP_FREE (data->relevant);
6684   BITMAP_FREE (data->important_candidates);
6685
6686   decl_rtl_to_reset.release ();
6687   data->iv_uses.release ();
6688   data->iv_candidates.release ();
6689   data->inv_expr_tab.dispose ();
6690 }
6691
6692 /* Returns true if the loop body BODY includes any function calls.  */
6693
6694 static bool
6695 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6696 {
6697   gimple_stmt_iterator gsi;
6698   unsigned i;
6699
6700   for (i = 0; i < num_nodes; i++)
6701     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6702       {
6703         gimple stmt = gsi_stmt (gsi);
6704         if (is_gimple_call (stmt)
6705             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6706           return true;
6707       }
6708   return false;
6709 }
6710
6711 /* Optimizes the LOOP.  Returns true if anything changed.  */
6712
6713 static bool
6714 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6715 {
6716   bool changed = false;
6717   struct iv_ca *iv_ca;
6718   edge exit = single_dom_exit (loop);
6719   basic_block *body;
6720
6721   gcc_assert (!data->niters);
6722   data->current_loop = loop;
6723   data->speed = optimize_loop_for_speed_p (loop);
6724
6725   if (dump_file && (dump_flags & TDF_DETAILS))
6726     {
6727       fprintf (dump_file, "Processing loop %d\n", loop->num);
6728
6729       if (exit)
6730         {
6731           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6732                    exit->src->index, exit->dest->index);
6733           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6734           fprintf (dump_file, "\n");
6735         }
6736
6737       fprintf (dump_file, "\n");
6738     }
6739
6740   body = get_loop_body (loop);
6741   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6742   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6743   free (body);
6744
6745   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6746
6747   /* For each ssa name determines whether it behaves as an induction variable
6748      in some loop.  */
6749   if (!find_induction_variables (data))
6750     goto finish;
6751
6752   /* Finds interesting uses (item 1).  */
6753   find_interesting_uses (data);
6754   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6755     goto finish;
6756
6757   /* Finds candidates for the induction variables (item 2).  */
6758   find_iv_candidates (data);
6759
6760   /* Calculates the costs (item 3, part 1).  */
6761   determine_iv_costs (data);
6762   determine_use_iv_costs (data);
6763   determine_set_costs (data);
6764
6765   /* Find the optimal set of induction variables (item 3, part 2).  */
6766   iv_ca = find_optimal_iv_set (data);
6767   if (!iv_ca)
6768     goto finish;
6769   changed = true;
6770
6771   /* Create the new induction variables (item 4, part 1).  */
6772   create_new_ivs (data, iv_ca);
6773   iv_ca_free (&iv_ca);
6774
6775   /* Rewrite the uses (item 4, part 2).  */
6776   rewrite_uses (data);
6777
6778   /* Remove the ivs that are unused after rewriting.  */
6779   remove_unused_ivs (data);
6780
6781   /* We have changed the structure of induction variables; it might happen
6782      that definitions in the scev database refer to some of them that were
6783      eliminated.  */
6784   scev_reset ();
6785
6786 finish:
6787   free_loop_data (data);
6788
6789   return changed;
6790 }
6791
6792 /* Main entry point.  Optimizes induction variables in loops.  */
6793
6794 void
6795 tree_ssa_iv_optimize (void)
6796 {
6797   struct loop *loop;
6798   struct ivopts_data data;
6799   loop_iterator li;
6800
6801   tree_ssa_iv_optimize_init (&data);
6802
6803   /* Optimize the loops starting with the innermost ones.  */
6804   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6805     {
6806       if (dump_file && (dump_flags & TDF_DETAILS))
6807         flow_loop_dump (loop, dump_file, NULL, 1);
6808
6809       tree_ssa_iv_optimize_loop (&data, loop);
6810     }
6811
6812   tree_ssa_iv_optimize_finalize (&data);
6813 }