gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "hash-set.h"
  69 #include "machmode.h"
  70 #include "vec.h"
  71 #include "double-int.h"
  72 #include "input.h"
  73 #include "alias.h"
  74 #include "symtab.h"
  75 #include "wide-int.h"
  76 #include "inchash.h"
  77 #include "tree.h"
  78 #include "fold-const.h"
  79 #include "stor-layout.h"
  80 #include "tm_p.h"
  81 #include "predict.h"
  82 #include "hard-reg-set.h"
  83 #include "input.h"
  84 #include "function.h"
  85 #include "dominance.h"
  86 #include "cfg.h"
  87 #include "basic-block.h"
  88 #include "gimple-pretty-print.h"
  89 #include "hash-map.h"
  90 #include "hash-table.h"
  91 #include "tree-ssa-alias.h"
  92 #include "internal-fn.h"
  93 #include "tree-eh.h"
  94 #include "gimple-expr.h"
  95 #include "is-a.h"
  96 #include "gimple.h"
  97 #include "gimplify.h"
  98 #include "gimple-iterator.h"
  99 #include "gimplify-me.h"
 100 #include "gimple-ssa.h"
 101 #include "plugin-api.h"
 102 #include "ipa-ref.h"
 103 #include "cgraph.h"
 104 #include "tree-cfg.h"
 105 #include "tree-phinodes.h"
 106 #include "ssa-iterators.h"
 107 #include "stringpool.h"
 108 #include "tree-ssanames.h"
 109 #include "tree-ssa-loop-ivopts.h"
 110 #include "tree-ssa-loop-manip.h"
 111 #include "tree-ssa-loop-niter.h"
 112 #include "tree-ssa-loop.h"
 113 #include "expr.h"
 114 #include "tree-dfa.h"
 115 #include "tree-ssa.h"
 116 #include "cfgloop.h"
 117 #include "tree-pass.h"
 118 #include "insn-config.h"
 119 #include "tree-chrec.h"
 120 #include "tree-scalar-evolution.h"
 121 #include "cfgloop.h"
 122 #include "params.h"
 123 #include "langhooks.h"
 124 #include "tree-affine.h"
 125 #include "target.h"
 126 #include "tree-inline.h"
 127 #include "tree-ssa-propagate.h"
 128 #include "expmed.h"
 129 #include "tree-ssa-address.h"
 130 #include "builtins.h"
 131
 132 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 133    cost of different addressing modes.  This should be moved to a TBD
 134    interface between the GIMPLE and RTL worlds.  */
 135 #include "expr.h"
 136 #include "recog.h"
 137
 138 /* The infinite cost.  */
 139 #define INFTY 10000000
 140
 141 #define AVG_LOOP_NITER(LOOP) 5
 142
 143 /* Returns the expected number of loop iterations for LOOP.
 144    The average trip count is computed from profile data if it
 145    exists. */
 146
 147 static inline HOST_WIDE_INT
 148 avg_loop_niter (struct loop *loop)
 149 {
 150   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 151   if (niter == -1)
 152     return AVG_LOOP_NITER (loop);
 153
 154   return niter;
 155 }
 156
 157 /* Representation of the induction variable.  */
 158 struct iv
 159 {
 160   tree base;            /* Initial value of the iv.  */
 161   tree base_object;     /* A memory object to that the induction variable points.  */
 162   tree step;            /* Step of the iv (constant only).  */
 163   tree ssa_name;        /* The ssa name with the value.  */
 164   bool biv_p;           /* Is it a biv?  */
 165   bool have_use_for;    /* Do we already have a use for it?  */
 166   unsigned use_id;      /* The identifier in the use if it is the case.  */
 167 };
 168
 169 /* Per-ssa version information (induction variable descriptions, etc.).  */
 170 struct version_info
 171 {
 172   tree name;            /* The ssa name.  */
 173   struct iv *iv;        /* Induction variable description.  */
 174   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 175                            an expression that is not an induction variable.  */
 176   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 177   unsigned inv_id;      /* Id of an invariant.  */
 178 };
 179
 180 /* Types of uses.  */
 181 enum use_type
 182 {
 183   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 184   USE_ADDRESS,          /* Use in an address.  */
 185   USE_COMPARE           /* Use is a compare.  */
 186 };
 187
 188 /* Cost of a computation.  */
 189 typedef struct
 190 {
 191   int cost;             /* The runtime cost.  */
 192   unsigned complexity;  /* The estimate of the complexity of the code for
 193                            the computation (in no concrete units --
 194                            complexity field should be larger for more
 195                            complex expressions and addressing modes).  */
 196 } comp_cost;
 197
 198 static const comp_cost no_cost = {0, 0};
 199 static const comp_cost infinite_cost = {INFTY, INFTY};
 200
 201 /* The candidate - cost pair.  */
 202 struct cost_pair
 203 {
 204   struct iv_cand *cand; /* The candidate.  */
 205   comp_cost cost;       /* The cost.  */
 206   bitmap depends_on;    /* The list of invariants that have to be
 207                            preserved.  */
 208   tree value;           /* For final value elimination, the expression for
 209                            the final value of the iv.  For iv elimination,
 210                            the new bound to compare with.  */
 211   enum tree_code comp;  /* For iv elimination, the comparison.  */
 212   int inv_expr_id;      /* Loop invariant expression id.  */
 213 };
 214
 215 /* Use.  */
 216 struct iv_use
 217 {
 218   unsigned id;          /* The id of the use.  */
 219   enum use_type type;   /* Type of the use.  */
 220   struct iv *iv;        /* The induction variable it is based on.  */
 221   gimple stmt;          /* Statement in that it occurs.  */
 222   tree *op_p;           /* The place where it occurs.  */
 223   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 224                            important ones.  */
 225
 226   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 227   struct cost_pair *cost_map;
 228                         /* The costs wrto the iv candidates.  */
 229
 230   struct iv_cand *selected;
 231                         /* The selected candidate.  */
 232 };
 233
 234 /* The position where the iv is computed.  */
 235 enum iv_position
 236 {
 237   IP_NORMAL,            /* At the end, just before the exit condition.  */
 238   IP_END,               /* At the end of the latch block.  */
 239   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 240   IP_AFTER_USE,         /* Immediately after a specific use.  */
 241   IP_ORIGINAL           /* The original biv.  */
 242 };
 243
 244 /* The induction variable candidate.  */
 245 struct iv_cand
 246 {
 247   unsigned id;          /* The number of the candidate.  */
 248   bool important;       /* Whether this is an "important" candidate, i.e. such
 249                            that it should be considered by all uses.  */
 250   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 251   gimple incremented_at;/* For original biv, the statement where it is
 252                            incremented.  */
 253   tree var_before;      /* The variable used for it before increment.  */
 254   tree var_after;       /* The variable used for it after increment.  */
 255   struct iv *iv;        /* The value of the candidate.  NULL for
 256                            "pseudocandidate" used to indicate the possibility
 257                            to replace the final value of an iv by direct
 258                            computation of the value.  */
 259   unsigned cost;        /* Cost of the candidate.  */
 260   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 261   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 262                               where it is incremented.  */
 263   bitmap depends_on;    /* The list of invariants that are used in step of the
 264                            biv.  */
 265 };
 266
 267 /* Loop invariant expression hashtable entry.  */
 268 struct iv_inv_expr_ent
 269 {
 270   tree expr;
 271   int id;
 272   hashval_t hash;
 273 };
 274
 275 /* The data used by the induction variable optimizations.  */
 276
 277 typedef struct iv_use *iv_use_p;
 278
 279 typedef struct iv_cand *iv_cand_p;
 280
 281 /* Hashtable helpers.  */
 282
 283 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 284 {
 285   typedef iv_inv_expr_ent value_type;
 286   typedef iv_inv_expr_ent compare_type;
 287   static inline hashval_t hash (const value_type *);
 288   static inline bool equal (const value_type *, const compare_type *);
 289 };
 290
 291 /* Hash function for loop invariant expressions.  */
 292
 293 inline hashval_t
 294 iv_inv_expr_hasher::hash (const value_type *expr)
 295 {
 296   return expr->hash;
 297 }
 298
 299 /* Hash table equality function for expressions.  */
 300
 301 inline bool
 302 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 303 {
 304   return expr1->hash == expr2->hash
 305          && operand_equal_p (expr1->expr, expr2->expr, 0);
 306 }
 307
 308 struct ivopts_data
 309 {
 310   /* The currently optimized loop.  */
 311   struct loop *current_loop;
 312
 313   /* Numbers of iterations for all exits of the current loop.  */
 314   hash_map<edge, tree_niter_desc *> *niters;
 315
 316   /* Number of registers used in it.  */
 317   unsigned regs_used;
 318
 319   /* The size of version_info array allocated.  */
 320   unsigned version_info_size;
 321
 322   /* The array of information for the ssa names.  */
 323   struct version_info *version_info;
 324
 325   /* The hashtable of loop invariant expressions created
 326      by ivopt.  */
 327   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 328
 329   /* Loop invariant expression id.  */
 330   int inv_expr_id;
 331
 332   /* The bitmap of indices in version_info whose value was changed.  */
 333   bitmap relevant;
 334
 335   /* The uses of induction variables.  */
 336   vec<iv_use_p> iv_uses;
 337
 338   /* The candidates.  */
 339   vec<iv_cand_p> iv_candidates;
 340
 341   /* A bitmap of important candidates.  */
 342   bitmap important_candidates;
 343
 344   /* Cache used by tree_to_aff_combination_expand.  */
 345   hash_map<tree, name_expansion *> *name_expansion_cache;
 346
 347   /* The maximum invariant id.  */
 348   unsigned max_inv_id;
 349
 350   /* Whether to consider just related and important candidates when replacing a
 351      use.  */
 352   bool consider_all_candidates;
 353
 354   /* Are we optimizing for speed?  */
 355   bool speed;
 356
 357   /* Whether the loop body includes any function calls.  */
 358   bool body_includes_call;
 359
 360   /* Whether the loop body can only be exited via single exit.  */
 361   bool loop_single_exit_p;
 362 };
 363
 364 /* An assignment of iv candidates to uses.  */
 365
 366 struct iv_ca
 367 {
 368   /* The number of uses covered by the assignment.  */
 369   unsigned upto;
 370
 371   /* Number of uses that cannot be expressed by the candidates in the set.  */
 372   unsigned bad_uses;
 373
 374   /* Candidate assigned to a use, together with the related costs.  */
 375   struct cost_pair **cand_for_use;
 376
 377   /* Number of times each candidate is used.  */
 378   unsigned *n_cand_uses;
 379
 380   /* The candidates used.  */
 381   bitmap cands;
 382
 383   /* The number of candidates in the set.  */
 384   unsigned n_cands;
 385
 386   /* Total number of registers needed.  */
 387   unsigned n_regs;
 388
 389   /* Total cost of expressing uses.  */
 390   comp_cost cand_use_cost;
 391
 392   /* Total cost of candidates.  */
 393   unsigned cand_cost;
 394
 395   /* Number of times each invariant is used.  */
 396   unsigned *n_invariant_uses;
 397
 398   /* The array holding the number of uses of each loop
 399      invariant expressions created by ivopt.  */
 400   unsigned *used_inv_expr;
 401
 402   /* The number of created loop invariants.  */
 403   unsigned num_used_inv_expr;
 404
 405   /* Total cost of the assignment.  */
 406   comp_cost cost;
 407 };
 408
 409 /* Difference of two iv candidate assignments.  */
 410
 411 struct iv_ca_delta
 412 {
 413   /* Changed use.  */
 414   struct iv_use *use;
 415
 416   /* An old assignment (for rollback purposes).  */
 417   struct cost_pair *old_cp;
 418
 419   /* A new assignment.  */
 420   struct cost_pair *new_cp;
 421
 422   /* Next change in the list.  */
 423   struct iv_ca_delta *next_change;
 424 };
 425
 426 /* Bound on number of candidates below that all candidates are considered.  */
 427
 428 #define CONSIDER_ALL_CANDIDATES_BOUND \
 429   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 430
 431 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 432    optimizing such a loop would help, and it would take ages).  */
 433
 434 #define MAX_CONSIDERED_USES \
 435   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 436
 437 /* If there are at most this number of ivs in the set, try removing unnecessary
 438    ivs from the set always.  */
 439
 440 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 441   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 442
 443 /* The list of trees for that the decl_rtl field must be reset is stored
 444    here.  */
 445
 446 static vec<tree> decl_rtl_to_reset;
 447
 448 static comp_cost force_expr_to_var_cost (tree, bool);
 449
 450 /* Number of uses recorded in DATA.  */
 451
 452 static inline unsigned
 453 n_iv_uses (struct ivopts_data *data)
 454 {
 455   return data->iv_uses.length ();
 456 }
 457
 458 /* Ith use recorded in DATA.  */
 459
 460 static inline struct iv_use *
 461 iv_use (struct ivopts_data *data, unsigned i)
 462 {
 463   return data->iv_uses[i];
 464 }
 465
 466 /* Number of candidates recorded in DATA.  */
 467
 468 static inline unsigned
 469 n_iv_cands (struct ivopts_data *data)
 470 {
 471   return data->iv_candidates.length ();
 472 }
 473
 474 /* Ith candidate recorded in DATA.  */
 475
 476 static inline struct iv_cand *
 477 iv_cand (struct ivopts_data *data, unsigned i)
 478 {
 479   return data->iv_candidates[i];
 480 }
 481
 482 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 483
 484 edge
 485 single_dom_exit (struct loop *loop)
 486 {
 487   edge exit = single_exit (loop);
 488
 489   if (!exit)
 490     return NULL;
 491
 492   if (!just_once_each_iteration_p (loop, exit->src))
 493     return NULL;
 494
 495   return exit;
 496 }
 497
 498 /* Dumps information about the induction variable IV to FILE.  */
 499
 500 void
 501 dump_iv (FILE *file, struct iv *iv)
 502 {
 503   if (iv->ssa_name)
 504     {
 505       fprintf (file, "ssa name ");
 506       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 507       fprintf (file, "\n");
 508     }
 509
 510   fprintf (file, "  type ");
 511   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 512   fprintf (file, "\n");
 513
 514   if (iv->step)
 515     {
 516       fprintf (file, "  base ");
 517       print_generic_expr (file, iv->base, TDF_SLIM);
 518       fprintf (file, "\n");
 519
 520       fprintf (file, "  step ");
 521       print_generic_expr (file, iv->step, TDF_SLIM);
 522       fprintf (file, "\n");
 523     }
 524   else
 525     {
 526       fprintf (file, "  invariant ");
 527       print_generic_expr (file, iv->base, TDF_SLIM);
 528       fprintf (file, "\n");
 529     }
 530
 531   if (iv->base_object)
 532     {
 533       fprintf (file, "  base object ");
 534       print_generic_expr (file, iv->base_object, TDF_SLIM);
 535       fprintf (file, "\n");
 536     }
 537
 538   if (iv->biv_p)
 539     fprintf (file, "  is a biv\n");
 540 }
 541
 542 /* Dumps information about the USE to FILE.  */
 543
 544 void
 545 dump_use (FILE *file, struct iv_use *use)
 546 {
 547   fprintf (file, "use %d\n", use->id);
 548
 549   switch (use->type)
 550     {
 551     case USE_NONLINEAR_EXPR:
 552       fprintf (file, "  generic\n");
 553       break;
 554
 555     case USE_ADDRESS:
 556       fprintf (file, "  address\n");
 557       break;
 558
 559     case USE_COMPARE:
 560       fprintf (file, "  compare\n");
 561       break;
 562
 563     default:
 564       gcc_unreachable ();
 565     }
 566
 567   fprintf (file, "  in statement ");
 568   print_gimple_stmt (file, use->stmt, 0, 0);
 569   fprintf (file, "\n");
 570
 571   fprintf (file, "  at position ");
 572   if (use->op_p)
 573     print_generic_expr (file, *use->op_p, TDF_SLIM);
 574   fprintf (file, "\n");
 575
 576   dump_iv (file, use->iv);
 577
 578   if (use->related_cands)
 579     {
 580       fprintf (file, "  related candidates ");
 581       dump_bitmap (file, use->related_cands);
 582     }
 583 }
 584
 585 /* Dumps information about the uses to FILE.  */
 586
 587 void
 588 dump_uses (FILE *file, struct ivopts_data *data)
 589 {
 590   unsigned i;
 591   struct iv_use *use;
 592
 593   for (i = 0; i < n_iv_uses (data); i++)
 594     {
 595       use = iv_use (data, i);
 596
 597       dump_use (file, use);
 598       fprintf (file, "\n");
 599     }
 600 }
 601
 602 /* Dumps information about induction variable candidate CAND to FILE.  */
 603
 604 void
 605 dump_cand (FILE *file, struct iv_cand *cand)
 606 {
 607   struct iv *iv = cand->iv;
 608
 609   fprintf (file, "candidate %d%s\n",
 610            cand->id, cand->important ? " (important)" : "");
 611
 612   if (cand->depends_on)
 613     {
 614       fprintf (file, "  depends on ");
 615       dump_bitmap (file, cand->depends_on);
 616     }
 617
 618   if (!iv)
 619     {
 620       fprintf (file, "  final value replacement\n");
 621       return;
 622     }
 623
 624   if (cand->var_before)
 625     {
 626       fprintf (file, "  var_before ");
 627       print_generic_expr (file, cand->var_before, TDF_SLIM);
 628       fprintf (file, "\n");
 629     }
 630   if (cand->var_after)
 631     {
 632       fprintf (file, "  var_after ");
 633       print_generic_expr (file, cand->var_after, TDF_SLIM);
 634       fprintf (file, "\n");
 635     }
 636
 637   switch (cand->pos)
 638     {
 639     case IP_NORMAL:
 640       fprintf (file, "  incremented before exit test\n");
 641       break;
 642
 643     case IP_BEFORE_USE:
 644       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 645       break;
 646
 647     case IP_AFTER_USE:
 648       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 649       break;
 650
 651     case IP_END:
 652       fprintf (file, "  incremented at end\n");
 653       break;
 654
 655     case IP_ORIGINAL:
 656       fprintf (file, "  original biv\n");
 657       break;
 658     }
 659
 660   dump_iv (file, iv);
 661 }
 662
 663 /* Returns the info for ssa version VER.  */
 664
 665 static inline struct version_info *
 666 ver_info (struct ivopts_data *data, unsigned ver)
 667 {
 668   return data->version_info + ver;
 669 }
 670
 671 /* Returns the info for ssa name NAME.  */
 672
 673 static inline struct version_info *
 674 name_info (struct ivopts_data *data, tree name)
 675 {
 676   return ver_info (data, SSA_NAME_VERSION (name));
 677 }
 678
 679 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 680    emitted in LOOP.  */
 681
 682 static bool
 683 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 684 {
 685   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 686
 687   gcc_assert (bb);
 688
 689   if (sbb == loop->latch)
 690     return true;
 691
 692   if (sbb != bb)
 693     return false;
 694
 695   return stmt == last_stmt (bb);
 696 }
 697
 698 /* Returns true if STMT if after the place where the original induction
 699    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 700    if the positions are identical.  */
 701
 702 static bool
 703 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 704 {
 705   basic_block cand_bb = gimple_bb (cand->incremented_at);
 706   basic_block stmt_bb = gimple_bb (stmt);
 707
 708   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 709     return false;
 710
 711   if (stmt_bb != cand_bb)
 712     return true;
 713
 714   if (true_if_equal
 715       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 716     return true;
 717   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 718 }
 719
 720 /* Returns true if STMT if after the place where the induction variable
 721    CAND is incremented in LOOP.  */
 722
 723 static bool
 724 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 725 {
 726   switch (cand->pos)
 727     {
 728     case IP_END:
 729       return false;
 730
 731     case IP_NORMAL:
 732       return stmt_after_ip_normal_pos (loop, stmt);
 733
 734     case IP_ORIGINAL:
 735     case IP_AFTER_USE:
 736       return stmt_after_inc_pos (cand, stmt, false);
 737
 738     case IP_BEFORE_USE:
 739       return stmt_after_inc_pos (cand, stmt, true);
 740
 741     default:
 742       gcc_unreachable ();
 743     }
 744 }
 745
 746 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 747
 748 static bool
 749 abnormal_ssa_name_p (tree exp)
 750 {
 751   if (!exp)
 752     return false;
 753
 754   if (TREE_CODE (exp) != SSA_NAME)
 755     return false;
 756
 757   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 758 }
 759
 760 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 761    abnormal phi node.  Callback for for_each_index.  */
 762
 763 static bool
 764 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 765                                   void *data ATTRIBUTE_UNUSED)
 766 {
 767   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 768     {
 769       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 770         return false;
 771       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 772         return false;
 773     }
 774
 775   return !abnormal_ssa_name_p (*index);
 776 }
 777
 778 /* Returns true if EXPR contains a ssa name that occurs in an
 779    abnormal phi node.  */
 780
 781 bool
 782 contains_abnormal_ssa_name_p (tree expr)
 783 {
 784   enum tree_code code;
 785   enum tree_code_class codeclass;
 786
 787   if (!expr)
 788     return false;
 789
 790   code = TREE_CODE (expr);
 791   codeclass = TREE_CODE_CLASS (code);
 792
 793   if (code == SSA_NAME)
 794     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 795
 796   if (code == INTEGER_CST
 797       || is_gimple_min_invariant (expr))
 798     return false;
 799
 800   if (code == ADDR_EXPR)
 801     return !for_each_index (&TREE_OPERAND (expr, 0),
 802                             idx_contains_abnormal_ssa_name_p,
 803                             NULL);
 804
 805   if (code == COND_EXPR)
 806     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 807       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 808       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 809
 810   switch (codeclass)
 811     {
 812     case tcc_binary:
 813     case tcc_comparison:
 814       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 815         return true;
 816
 817       /* Fallthru.  */
 818     case tcc_unary:
 819       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 820         return true;
 821
 822       break;
 823
 824     default:
 825       gcc_unreachable ();
 826     }
 827
 828   return false;
 829 }
 830
 831 /*  Returns the structure describing number of iterations determined from
 832     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 833
 834 static struct tree_niter_desc *
 835 niter_for_exit (struct ivopts_data *data, edge exit)
 836 {
 837   struct tree_niter_desc *desc;
 838   tree_niter_desc **slot;
 839
 840   if (!data->niters)
 841     {
 842       data->niters = new hash_map<edge, tree_niter_desc *>;
 843       slot = NULL;
 844     }
 845   else
 846     slot = data->niters->get (exit);
 847
 848   if (!slot)
 849     {
 850       /* Try to determine number of iterations.  We cannot safely work with ssa
 851          names that appear in phi nodes on abnormal edges, so that we do not
 852          create overlapping life ranges for them (PR 27283).  */
 853       desc = XNEW (struct tree_niter_desc);
 854       if (!number_of_iterations_exit (data->current_loop,
 855                                       exit, desc, true)
 856           || contains_abnormal_ssa_name_p (desc->niter))
 857         {
 858           XDELETE (desc);
 859           desc = NULL;
 860         }
 861       data->niters->put (exit, desc);
 862     }
 863   else
 864     desc = *slot;
 865
 866   return desc;
 867 }
 868
 869 /* Returns the structure describing number of iterations determined from
 870    single dominating exit of DATA->current_loop, or NULL if something
 871    goes wrong.  */
 872
 873 static struct tree_niter_desc *
 874 niter_for_single_dom_exit (struct ivopts_data *data)
 875 {
 876   edge exit = single_dom_exit (data->current_loop);
 877
 878   if (!exit)
 879     return NULL;
 880
 881   return niter_for_exit (data, exit);
 882 }
 883
 884 /* Initializes data structures used by the iv optimization pass, stored
 885    in DATA.  */
 886
 887 static void
 888 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 889 {
 890   data->version_info_size = 2 * num_ssa_names;
 891   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 892   data->relevant = BITMAP_ALLOC (NULL);
 893   data->important_candidates = BITMAP_ALLOC (NULL);
 894   data->max_inv_id = 0;
 895   data->niters = NULL;
 896   data->iv_uses.create (20);
 897   data->iv_candidates.create (20);
 898   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 899   data->inv_expr_id = 0;
 900   data->name_expansion_cache = NULL;
 901   decl_rtl_to_reset.create (20);
 902 }
 903
 904 /* Returns a memory object to that EXPR points.  In case we are able to
 905    determine that it does not point to any such object, NULL is returned.  */
 906
 907 static tree
 908 determine_base_object (tree expr)
 909 {
 910   enum tree_code code = TREE_CODE (expr);
 911   tree base, obj;
 912
 913   /* If this is a pointer casted to any type, we need to determine
 914      the base object for the pointer; so handle conversions before
 915      throwing away non-pointer expressions.  */
 916   if (CONVERT_EXPR_P (expr))
 917     return determine_base_object (TREE_OPERAND (expr, 0));
 918
 919   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 920     return NULL_TREE;
 921
 922   switch (code)
 923     {
 924     case INTEGER_CST:
 925       return NULL_TREE;
 926
 927     case ADDR_EXPR:
 928       obj = TREE_OPERAND (expr, 0);
 929       base = get_base_address (obj);
 930
 931       if (!base)
 932         return expr;
 933
 934       if (TREE_CODE (base) == MEM_REF)
 935         return determine_base_object (TREE_OPERAND (base, 0));
 936
 937       return fold_convert (ptr_type_node,
 938                            build_fold_addr_expr (base));
 939
 940     case POINTER_PLUS_EXPR:
 941       return determine_base_object (TREE_OPERAND (expr, 0));
 942
 943     case PLUS_EXPR:
 944     case MINUS_EXPR:
 945       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 946       gcc_unreachable ();
 947
 948     default:
 949       return fold_convert (ptr_type_node, expr);
 950     }
 951 }
 952
 953 /* Return true if address expression with non-DECL_P operand appears
 954    in EXPR.  */
 955
 956 static bool
 957 contain_complex_addr_expr (tree expr)
 958 {
 959   bool res = false;
 960
 961   STRIP_NOPS (expr);
 962   switch (TREE_CODE (expr))
 963     {
 964     case POINTER_PLUS_EXPR:
 965     case PLUS_EXPR:
 966     case MINUS_EXPR:
 967       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 968       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 969       break;
 970
 971     case ADDR_EXPR:
 972       return (!DECL_P (TREE_OPERAND (expr, 0)));
 973
 974     default:
 975       return false;
 976     }
 977
 978   return res;
 979 }
 980
 981 /* Allocates an induction variable with given initial value BASE and step STEP
 982    for loop LOOP.  */
 983
 984 static struct iv *
 985 alloc_iv (tree base, tree step)
 986 {
 987   tree expr = base;
 988   struct iv *iv = XCNEW (struct iv);
 989   gcc_assert (step != NULL_TREE);
 990
 991   /* Lower address expression in base except ones with DECL_P as operand.
 992      By doing this:
 993        1) More accurate cost can be computed for address expressions;
 994        2) Duplicate candidates won't be created for bases in different
 995           forms, like &a[0] and &a.  */
 996   STRIP_NOPS (expr);
 997   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
 998       || contain_complex_addr_expr (expr))
 999     {
1000       aff_tree comb;
1001       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1002       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1003     }
1004
1005   iv->base = base;
1006   iv->base_object = determine_base_object (base);
1007   iv->step = step;
1008   iv->biv_p = false;
1009   iv->have_use_for = false;
1010   iv->use_id = 0;
1011   iv->ssa_name = NULL_TREE;
1012
1013   return iv;
1014 }
1015
1016 /* Sets STEP and BASE for induction variable IV.  */
1017
1018 static void
1019 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1020 {
1021   struct version_info *info = name_info (data, iv);
1022
1023   gcc_assert (!info->iv);
1024
1025   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1026   info->iv = alloc_iv (base, step);
1027   info->iv->ssa_name = iv;
1028 }
1029
1030 /* Finds induction variable declaration for VAR.  */
1031
1032 static struct iv *
1033 get_iv (struct ivopts_data *data, tree var)
1034 {
1035   basic_block bb;
1036   tree type = TREE_TYPE (var);
1037
1038   if (!POINTER_TYPE_P (type)
1039       && !INTEGRAL_TYPE_P (type))
1040     return NULL;
1041
1042   if (!name_info (data, var)->iv)
1043     {
1044       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1045
1046       if (!bb
1047           || !flow_bb_inside_loop_p (data->current_loop, bb))
1048         set_iv (data, var, var, build_int_cst (type, 0));
1049     }
1050
1051   return name_info (data, var)->iv;
1052 }
1053
1054 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1055    not define a simple affine biv with nonzero step.  */
1056
1057 static tree
1058 determine_biv_step (gphi *phi)
1059 {
1060   struct loop *loop = gimple_bb (phi)->loop_father;
1061   tree name = PHI_RESULT (phi);
1062   affine_iv iv;
1063
1064   if (virtual_operand_p (name))
1065     return NULL_TREE;
1066
1067   if (!simple_iv (loop, loop, name, &iv, true))
1068     return NULL_TREE;
1069
1070   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1071 }
1072
1073 /* Finds basic ivs.  */
1074
1075 static bool
1076 find_bivs (struct ivopts_data *data)
1077 {
1078   gphi *phi;
1079   tree step, type, base;
1080   bool found = false;
1081   struct loop *loop = data->current_loop;
1082   gphi_iterator psi;
1083
1084   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1085     {
1086       phi = psi.phi ();
1087
1088       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1089         continue;
1090
1091       step = determine_biv_step (phi);
1092       if (!step)
1093         continue;
1094
1095       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1096       base = expand_simple_operations (base);
1097       if (contains_abnormal_ssa_name_p (base)
1098           || contains_abnormal_ssa_name_p (step))
1099         continue;
1100
1101       type = TREE_TYPE (PHI_RESULT (phi));
1102       base = fold_convert (type, base);
1103       if (step)
1104         {
1105           if (POINTER_TYPE_P (type))
1106             step = convert_to_ptrofftype (step);
1107           else
1108             step = fold_convert (type, step);
1109         }
1110
1111       set_iv (data, PHI_RESULT (phi), base, step);
1112       found = true;
1113     }
1114
1115   return found;
1116 }
1117
1118 /* Marks basic ivs.  */
1119
1120 static void
1121 mark_bivs (struct ivopts_data *data)
1122 {
1123   gphi *phi;
1124   gimple def;
1125   tree var;
1126   struct iv *iv, *incr_iv;
1127   struct loop *loop = data->current_loop;
1128   basic_block incr_bb;
1129   gphi_iterator psi;
1130
1131   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1132     {
1133       phi = psi.phi ();
1134
1135       iv = get_iv (data, PHI_RESULT (phi));
1136       if (!iv)
1137         continue;
1138
1139       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1140       def = SSA_NAME_DEF_STMT (var);
1141       /* Don't mark iv peeled from other one as biv.  */
1142       if (def
1143           && gimple_code (def) == GIMPLE_PHI
1144           && gimple_bb (def) == loop->header)
1145         continue;
1146
1147       incr_iv = get_iv (data, var);
1148       if (!incr_iv)
1149         continue;
1150
1151       /* If the increment is in the subloop, ignore it.  */
1152       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1153       if (incr_bb->loop_father != data->current_loop
1154           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1155         continue;
1156
1157       iv->biv_p = true;
1158       incr_iv->biv_p = true;
1159     }
1160 }
1161
1162 /* Checks whether STMT defines a linear induction variable and stores its
1163    parameters to IV.  */
1164
1165 static bool
1166 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1167 {
1168   tree lhs;
1169   struct loop *loop = data->current_loop;
1170
1171   iv->base = NULL_TREE;
1172   iv->step = NULL_TREE;
1173
1174   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1175     return false;
1176
1177   lhs = gimple_assign_lhs (stmt);
1178   if (TREE_CODE (lhs) != SSA_NAME)
1179     return false;
1180
1181   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1182     return false;
1183   iv->base = expand_simple_operations (iv->base);
1184
1185   if (contains_abnormal_ssa_name_p (iv->base)
1186       || contains_abnormal_ssa_name_p (iv->step))
1187     return false;
1188
1189   /* If STMT could throw, then do not consider STMT as defining a GIV.
1190      While this will suppress optimizations, we can not safely delete this
1191      GIV and associated statements, even if it appears it is not used.  */
1192   if (stmt_could_throw_p (stmt))
1193     return false;
1194
1195   return true;
1196 }
1197
1198 /* Finds general ivs in statement STMT.  */
1199
1200 static void
1201 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1202 {
1203   affine_iv iv;
1204
1205   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1206     return;
1207
1208   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1209 }
1210
1211 /* Finds general ivs in basic block BB.  */
1212
1213 static void
1214 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1215 {
1216   gimple_stmt_iterator bsi;
1217
1218   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1219     find_givs_in_stmt (data, gsi_stmt (bsi));
1220 }
1221
1222 /* Finds general ivs.  */
1223
1224 static void
1225 find_givs (struct ivopts_data *data)
1226 {
1227   struct loop *loop = data->current_loop;
1228   basic_block *body = get_loop_body_in_dom_order (loop);
1229   unsigned i;
1230
1231   for (i = 0; i < loop->num_nodes; i++)
1232     find_givs_in_bb (data, body[i]);
1233   free (body);
1234 }
1235
1236 /* For each ssa name defined in LOOP determines whether it is an induction
1237    variable and if so, its initial value and step.  */
1238
1239 static bool
1240 find_induction_variables (struct ivopts_data *data)
1241 {
1242   unsigned i;
1243   bitmap_iterator bi;
1244
1245   if (!find_bivs (data))
1246     return false;
1247
1248   find_givs (data);
1249   mark_bivs (data);
1250
1251   if (dump_file && (dump_flags & TDF_DETAILS))
1252     {
1253       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1254
1255       if (niter)
1256         {
1257           fprintf (dump_file, "  number of iterations ");
1258           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1259           if (!integer_zerop (niter->may_be_zero))
1260             {
1261               fprintf (dump_file, "; zero if ");
1262               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1263             }
1264           fprintf (dump_file, "\n\n");
1265         };
1266
1267       fprintf (dump_file, "Induction variables:\n\n");
1268
1269       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1270         {
1271           if (ver_info (data, i)->iv)
1272             dump_iv (dump_file, ver_info (data, i)->iv);
1273         }
1274     }
1275
1276   return true;
1277 }
1278
1279 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1280
1281 static struct iv_use *
1282 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1283             gimple stmt, enum use_type use_type)
1284 {
1285   struct iv_use *use = XCNEW (struct iv_use);
1286
1287   use->id = n_iv_uses (data);
1288   use->type = use_type;
1289   use->iv = iv;
1290   use->stmt = stmt;
1291   use->op_p = use_p;
1292   use->related_cands = BITMAP_ALLOC (NULL);
1293
1294   /* To avoid showing ssa name in the dumps, if it was not reset by the
1295      caller.  */
1296   iv->ssa_name = NULL_TREE;
1297
1298   if (dump_file && (dump_flags & TDF_DETAILS))
1299     dump_use (dump_file, use);
1300
1301   data->iv_uses.safe_push (use);
1302
1303   return use;
1304 }
1305
1306 /* Checks whether OP is a loop-level invariant and if so, records it.
1307    NONLINEAR_USE is true if the invariant is used in a way we do not
1308    handle specially.  */
1309
1310 static void
1311 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1312 {
1313   basic_block bb;
1314   struct version_info *info;
1315
1316   if (TREE_CODE (op) != SSA_NAME
1317       || virtual_operand_p (op))
1318     return;
1319
1320   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1321   if (bb
1322       && flow_bb_inside_loop_p (data->current_loop, bb))
1323     return;
1324
1325   info = name_info (data, op);
1326   info->name = op;
1327   info->has_nonlin_use |= nonlinear_use;
1328   if (!info->inv_id)
1329     info->inv_id = ++data->max_inv_id;
1330   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1331 }
1332
1333 /* Checks whether the use OP is interesting and if so, records it.  */
1334
1335 static struct iv_use *
1336 find_interesting_uses_op (struct ivopts_data *data, tree op)
1337 {
1338   struct iv *iv;
1339   struct iv *civ;
1340   gimple stmt;
1341   struct iv_use *use;
1342
1343   if (TREE_CODE (op) != SSA_NAME)
1344     return NULL;
1345
1346   iv = get_iv (data, op);
1347   if (!iv)
1348     return NULL;
1349
1350   if (iv->have_use_for)
1351     {
1352       use = iv_use (data, iv->use_id);
1353
1354       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1355       return use;
1356     }
1357
1358   if (integer_zerop (iv->step))
1359     {
1360       record_invariant (data, op, true);
1361       return NULL;
1362     }
1363   iv->have_use_for = true;
1364
1365   civ = XNEW (struct iv);
1366   *civ = *iv;
1367
1368   stmt = SSA_NAME_DEF_STMT (op);
1369   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1370               || is_gimple_assign (stmt));
1371
1372   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1373   iv->use_id = use->id;
1374
1375   return use;
1376 }
1377
1378 /* Given a condition in statement STMT, checks whether it is a compare
1379    of an induction variable and an invariant.  If this is the case,
1380    CONTROL_VAR is set to location of the iv, BOUND to the location of
1381    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1382    induction variable descriptions, and true is returned.  If this is not
1383    the case, CONTROL_VAR and BOUND are set to the arguments of the
1384    condition and false is returned.  */
1385
1386 static bool
1387 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1388                        tree **control_var, tree **bound,
1389                        struct iv **iv_var, struct iv **iv_bound)
1390 {
1391   /* The objects returned when COND has constant operands.  */
1392   static struct iv const_iv;
1393   static tree zero;
1394   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1395   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1396   bool ret = false;
1397
1398   if (gimple_code (stmt) == GIMPLE_COND)
1399     {
1400       gcond *cond_stmt = as_a <gcond *> (stmt);
1401       op0 = gimple_cond_lhs_ptr (cond_stmt);
1402       op1 = gimple_cond_rhs_ptr (cond_stmt);
1403     }
1404   else
1405     {
1406       op0 = gimple_assign_rhs1_ptr (stmt);
1407       op1 = gimple_assign_rhs2_ptr (stmt);
1408     }
1409
1410   zero = integer_zero_node;
1411   const_iv.step = integer_zero_node;
1412
1413   if (TREE_CODE (*op0) == SSA_NAME)
1414     iv0 = get_iv (data, *op0);
1415   if (TREE_CODE (*op1) == SSA_NAME)
1416     iv1 = get_iv (data, *op1);
1417
1418   /* Exactly one of the compared values must be an iv, and the other one must
1419      be an invariant.  */
1420   if (!iv0 || !iv1)
1421     goto end;
1422
1423   if (integer_zerop (iv0->step))
1424     {
1425       /* Control variable may be on the other side.  */
1426       tmp_op = op0; op0 = op1; op1 = tmp_op;
1427       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1428     }
1429   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1430
1431 end:
1432   if (control_var)
1433     *control_var = op0;;
1434   if (iv_var)
1435     *iv_var = iv0;;
1436   if (bound)
1437     *bound = op1;
1438   if (iv_bound)
1439     *iv_bound = iv1;
1440
1441   return ret;
1442 }
1443
1444 /* Checks whether the condition in STMT is interesting and if so,
1445    records it.  */
1446
1447 static void
1448 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1449 {
1450   tree *var_p, *bound_p;
1451   struct iv *var_iv, *civ;
1452
1453   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1454     {
1455       find_interesting_uses_op (data, *var_p);
1456       find_interesting_uses_op (data, *bound_p);
1457       return;
1458     }
1459
1460   civ = XNEW (struct iv);
1461   *civ = *var_iv;
1462   record_use (data, NULL, civ, stmt, USE_COMPARE);
1463 }
1464
1465 /* Returns the outermost loop EXPR is obviously invariant in
1466    relative to the loop LOOP, i.e. if all its operands are defined
1467    outside of the returned loop.  Returns NULL if EXPR is not
1468    even obviously invariant in LOOP.  */
1469
1470 struct loop *
1471 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1472 {
1473   basic_block def_bb;
1474   unsigned i, len;
1475
1476   if (is_gimple_min_invariant (expr))
1477     return current_loops->tree_root;
1478
1479   if (TREE_CODE (expr) == SSA_NAME)
1480     {
1481       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1482       if (def_bb)
1483         {
1484           if (flow_bb_inside_loop_p (loop, def_bb))
1485             return NULL;
1486           return superloop_at_depth (loop,
1487                                      loop_depth (def_bb->loop_father) + 1);
1488         }
1489
1490       return current_loops->tree_root;
1491     }
1492
1493   if (!EXPR_P (expr))
1494     return NULL;
1495
1496   unsigned maxdepth = 0;
1497   len = TREE_OPERAND_LENGTH (expr);
1498   for (i = 0; i < len; i++)
1499     {
1500       struct loop *ivloop;
1501       if (!TREE_OPERAND (expr, i))
1502         continue;
1503
1504       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1505       if (!ivloop)
1506         return NULL;
1507       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1508     }
1509
1510   return superloop_at_depth (loop, maxdepth);
1511 }
1512
1513 /* Returns true if expression EXPR is obviously invariant in LOOP,
1514    i.e. if all its operands are defined outside of the LOOP.  LOOP
1515    should not be the function body.  */
1516
1517 bool
1518 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1519 {
1520   basic_block def_bb;
1521   unsigned i, len;
1522
1523   gcc_assert (loop_depth (loop) > 0);
1524
1525   if (is_gimple_min_invariant (expr))
1526     return true;
1527
1528   if (TREE_CODE (expr) == SSA_NAME)
1529     {
1530       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1531       if (def_bb
1532           && flow_bb_inside_loop_p (loop, def_bb))
1533         return false;
1534
1535       return true;
1536     }
1537
1538   if (!EXPR_P (expr))
1539     return false;
1540
1541   len = TREE_OPERAND_LENGTH (expr);
1542   for (i = 0; i < len; i++)
1543     if (TREE_OPERAND (expr, i)
1544         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1545       return false;
1546
1547   return true;
1548 }
1549
1550 /* Cumulates the steps of indices into DATA and replaces their values with the
1551    initial ones.  Returns false when the value of the index cannot be determined.
1552    Callback for for_each_index.  */
1553
1554 struct ifs_ivopts_data
1555 {
1556   struct ivopts_data *ivopts_data;
1557   gimple stmt;
1558   tree step;
1559 };
1560
1561 static bool
1562 idx_find_step (tree base, tree *idx, void *data)
1563 {
1564   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1565   struct iv *iv;
1566   tree step, iv_base, iv_step, lbound, off;
1567   struct loop *loop = dta->ivopts_data->current_loop;
1568
1569   /* If base is a component ref, require that the offset of the reference
1570      be invariant.  */
1571   if (TREE_CODE (base) == COMPONENT_REF)
1572     {
1573       off = component_ref_field_offset (base);
1574       return expr_invariant_in_loop_p (loop, off);
1575     }
1576
1577   /* If base is array, first check whether we will be able to move the
1578      reference out of the loop (in order to take its address in strength
1579      reduction).  In order for this to work we need both lower bound
1580      and step to be loop invariants.  */
1581   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1582     {
1583       /* Moreover, for a range, the size needs to be invariant as well.  */
1584       if (TREE_CODE (base) == ARRAY_RANGE_REF
1585           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1586         return false;
1587
1588       step = array_ref_element_size (base);
1589       lbound = array_ref_low_bound (base);
1590
1591       if (!expr_invariant_in_loop_p (loop, step)
1592           || !expr_invariant_in_loop_p (loop, lbound))
1593         return false;
1594     }
1595
1596   if (TREE_CODE (*idx) != SSA_NAME)
1597     return true;
1598
1599   iv = get_iv (dta->ivopts_data, *idx);
1600   if (!iv)
1601     return false;
1602
1603   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1604           *&x[0], which is not folded and does not trigger the
1605           ARRAY_REF path below.  */
1606   *idx = iv->base;
1607
1608   if (integer_zerop (iv->step))
1609     return true;
1610
1611   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1612     {
1613       step = array_ref_element_size (base);
1614
1615       /* We only handle addresses whose step is an integer constant.  */
1616       if (TREE_CODE (step) != INTEGER_CST)
1617         return false;
1618     }
1619   else
1620     /* The step for pointer arithmetics already is 1 byte.  */
1621     step = size_one_node;
1622
1623   iv_base = iv->base;
1624   iv_step = iv->step;
1625   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1626                             sizetype, &iv_base, &iv_step, dta->stmt,
1627                             false))
1628     {
1629       /* The index might wrap.  */
1630       return false;
1631     }
1632
1633   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1634   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1635
1636   return true;
1637 }
1638
1639 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1640    object is passed to it in DATA.  */
1641
1642 static bool
1643 idx_record_use (tree base, tree *idx,
1644                 void *vdata)
1645 {
1646   struct ivopts_data *data = (struct ivopts_data *) vdata;
1647   find_interesting_uses_op (data, *idx);
1648   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1649     {
1650       find_interesting_uses_op (data, array_ref_element_size (base));
1651       find_interesting_uses_op (data, array_ref_low_bound (base));
1652     }
1653   return true;
1654 }
1655
1656 /* If we can prove that TOP = cst * BOT for some constant cst,
1657    store cst to MUL and return true.  Otherwise return false.
1658    The returned value is always sign-extended, regardless of the
1659    signedness of TOP and BOT.  */
1660
1661 static bool
1662 constant_multiple_of (tree top, tree bot, widest_int *mul)
1663 {
1664   tree mby;
1665   enum tree_code code;
1666   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1667   widest_int res, p0, p1;
1668
1669   STRIP_NOPS (top);
1670   STRIP_NOPS (bot);
1671
1672   if (operand_equal_p (top, bot, 0))
1673     {
1674       *mul = 1;
1675       return true;
1676     }
1677
1678   code = TREE_CODE (top);
1679   switch (code)
1680     {
1681     case MULT_EXPR:
1682       mby = TREE_OPERAND (top, 1);
1683       if (TREE_CODE (mby) != INTEGER_CST)
1684         return false;
1685
1686       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1687         return false;
1688
1689       *mul = wi::sext (res * wi::to_widest (mby), precision);
1690       return true;
1691
1692     case PLUS_EXPR:
1693     case MINUS_EXPR:
1694       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1695           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1696         return false;
1697
1698       if (code == MINUS_EXPR)
1699         p1 = -p1;
1700       *mul = wi::sext (p0 + p1, precision);
1701       return true;
1702
1703     case INTEGER_CST:
1704       if (TREE_CODE (bot) != INTEGER_CST)
1705         return false;
1706
1707       p0 = widest_int::from (top, SIGNED);
1708       p1 = widest_int::from (bot, SIGNED);
1709       if (p1 == 0)
1710         return false;
1711       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1712       return res == 0;
1713
1714     default:
1715       return false;
1716     }
1717 }
1718
1719 /* Return true if memory reference REF with step STEP may be unaligned.  */
1720
1721 static bool
1722 may_be_unaligned_p (tree ref, tree step)
1723 {
1724   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1725      thus they are not misaligned.  */
1726   if (TREE_CODE (ref) == TARGET_MEM_REF)
1727     return false;
1728
1729   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1730   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1731     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1732
1733   unsigned HOST_WIDE_INT bitpos;
1734   unsigned int ref_align;
1735   get_object_alignment_1 (ref, &ref_align, &bitpos);
1736   if (ref_align < align
1737       || (bitpos % align) != 0
1738       || (bitpos % BITS_PER_UNIT) != 0)
1739     return true;
1740
1741   unsigned int trailing_zeros = tree_ctz (step);
1742   if (trailing_zeros < HOST_BITS_PER_INT
1743       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1744     return true;
1745
1746   return false;
1747 }
1748
1749 /* Return true if EXPR may be non-addressable.   */
1750
1751 bool
1752 may_be_nonaddressable_p (tree expr)
1753 {
1754   switch (TREE_CODE (expr))
1755     {
1756     case TARGET_MEM_REF:
1757       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1758          target, thus they are always addressable.  */
1759       return false;
1760
1761     case COMPONENT_REF:
1762       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1763              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1764
1765     case VIEW_CONVERT_EXPR:
1766       /* This kind of view-conversions may wrap non-addressable objects
1767          and make them look addressable.  After some processing the
1768          non-addressability may be uncovered again, causing ADDR_EXPRs
1769          of inappropriate objects to be built.  */
1770       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1771           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1772         return true;
1773
1774       /* ... fall through ... */
1775
1776     case ARRAY_REF:
1777     case ARRAY_RANGE_REF:
1778       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1779
1780     CASE_CONVERT:
1781       return true;
1782
1783     default:
1784       break;
1785     }
1786
1787   return false;
1788 }
1789
1790 /* Finds addresses in *OP_P inside STMT.  */
1791
1792 static void
1793 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1794 {
1795   tree base = *op_p, step = size_zero_node;
1796   struct iv *civ;
1797   struct ifs_ivopts_data ifs_ivopts_data;
1798
1799   /* Do not play with volatile memory references.  A bit too conservative,
1800      perhaps, but safe.  */
1801   if (gimple_has_volatile_ops (stmt))
1802     goto fail;
1803
1804   /* Ignore bitfields for now.  Not really something terribly complicated
1805      to handle.  TODO.  */
1806   if (TREE_CODE (base) == BIT_FIELD_REF)
1807     goto fail;
1808
1809   base = unshare_expr (base);
1810
1811   if (TREE_CODE (base) == TARGET_MEM_REF)
1812     {
1813       tree type = build_pointer_type (TREE_TYPE (base));
1814       tree astep;
1815
1816       if (TMR_BASE (base)
1817           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1818         {
1819           civ = get_iv (data, TMR_BASE (base));
1820           if (!civ)
1821             goto fail;
1822
1823           TMR_BASE (base) = civ->base;
1824           step = civ->step;
1825         }
1826       if (TMR_INDEX2 (base)
1827           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1828         {
1829           civ = get_iv (data, TMR_INDEX2 (base));
1830           if (!civ)
1831             goto fail;
1832
1833           TMR_INDEX2 (base) = civ->base;
1834           step = civ->step;
1835         }
1836       if (TMR_INDEX (base)
1837           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1838         {
1839           civ = get_iv (data, TMR_INDEX (base));
1840           if (!civ)
1841             goto fail;
1842
1843           TMR_INDEX (base) = civ->base;
1844           astep = civ->step;
1845
1846           if (astep)
1847             {
1848               if (TMR_STEP (base))
1849                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1850
1851               step = fold_build2 (PLUS_EXPR, type, step, astep);
1852             }
1853         }
1854
1855       if (integer_zerop (step))
1856         goto fail;
1857       base = tree_mem_ref_addr (type, base);
1858     }
1859   else
1860     {
1861       ifs_ivopts_data.ivopts_data = data;
1862       ifs_ivopts_data.stmt = stmt;
1863       ifs_ivopts_data.step = size_zero_node;
1864       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1865           || integer_zerop (ifs_ivopts_data.step))
1866         goto fail;
1867       step = ifs_ivopts_data.step;
1868
1869       /* Check that the base expression is addressable.  This needs
1870          to be done after substituting bases of IVs into it.  */
1871       if (may_be_nonaddressable_p (base))
1872         goto fail;
1873
1874       /* Moreover, on strict alignment platforms, check that it is
1875          sufficiently aligned.  */
1876       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1877         goto fail;
1878
1879       base = build_fold_addr_expr (base);
1880
1881       /* Substituting bases of IVs into the base expression might
1882          have caused folding opportunities.  */
1883       if (TREE_CODE (base) == ADDR_EXPR)
1884         {
1885           tree *ref = &TREE_OPERAND (base, 0);
1886           while (handled_component_p (*ref))
1887             ref = &TREE_OPERAND (*ref, 0);
1888           if (TREE_CODE (*ref) == MEM_REF)
1889             {
1890               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1891                                       TREE_OPERAND (*ref, 0),
1892                                       TREE_OPERAND (*ref, 1));
1893               if (tem)
1894                 *ref = tem;
1895             }
1896         }
1897     }
1898
1899   civ = alloc_iv (base, step);
1900   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1901   return;
1902
1903 fail:
1904   for_each_index (op_p, idx_record_use, data);
1905 }
1906
1907 /* Finds and records invariants used in STMT.  */
1908
1909 static void
1910 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1911 {
1912   ssa_op_iter iter;
1913   use_operand_p use_p;
1914   tree op;
1915
1916   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1917     {
1918       op = USE_FROM_PTR (use_p);
1919       record_invariant (data, op, false);
1920     }
1921 }
1922
1923 /* Finds interesting uses of induction variables in the statement STMT.  */
1924
1925 static void
1926 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1927 {
1928   struct iv *iv;
1929   tree op, *lhs, *rhs;
1930   ssa_op_iter iter;
1931   use_operand_p use_p;
1932   enum tree_code code;
1933
1934   find_invariants_stmt (data, stmt);
1935
1936   if (gimple_code (stmt) == GIMPLE_COND)
1937     {
1938       find_interesting_uses_cond (data, stmt);
1939       return;
1940     }
1941
1942   if (is_gimple_assign (stmt))
1943     {
1944       lhs = gimple_assign_lhs_ptr (stmt);
1945       rhs = gimple_assign_rhs1_ptr (stmt);
1946
1947       if (TREE_CODE (*lhs) == SSA_NAME)
1948         {
1949           /* If the statement defines an induction variable, the uses are not
1950              interesting by themselves.  */
1951
1952           iv = get_iv (data, *lhs);
1953
1954           if (iv && !integer_zerop (iv->step))
1955             return;
1956         }
1957
1958       code = gimple_assign_rhs_code (stmt);
1959       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1960           && (REFERENCE_CLASS_P (*rhs)
1961               || is_gimple_val (*rhs)))
1962         {
1963           if (REFERENCE_CLASS_P (*rhs))
1964             find_interesting_uses_address (data, stmt, rhs);
1965           else
1966             find_interesting_uses_op (data, *rhs);
1967
1968           if (REFERENCE_CLASS_P (*lhs))
1969             find_interesting_uses_address (data, stmt, lhs);
1970           return;
1971         }
1972       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1973         {
1974           find_interesting_uses_cond (data, stmt);
1975           return;
1976         }
1977
1978       /* TODO -- we should also handle address uses of type
1979
1980          memory = call (whatever);
1981
1982          and
1983
1984          call (memory).  */
1985     }
1986
1987   if (gimple_code (stmt) == GIMPLE_PHI
1988       && gimple_bb (stmt) == data->current_loop->header)
1989     {
1990       iv = get_iv (data, PHI_RESULT (stmt));
1991
1992       if (iv && !integer_zerop (iv->step))
1993         return;
1994     }
1995
1996   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1997     {
1998       op = USE_FROM_PTR (use_p);
1999
2000       if (TREE_CODE (op) != SSA_NAME)
2001         continue;
2002
2003       iv = get_iv (data, op);
2004       if (!iv)
2005         continue;
2006
2007       find_interesting_uses_op (data, op);
2008     }
2009 }
2010
2011 /* Finds interesting uses of induction variables outside of loops
2012    on loop exit edge EXIT.  */
2013
2014 static void
2015 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2016 {
2017   gphi *phi;
2018   gphi_iterator psi;
2019   tree def;
2020
2021   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2022     {
2023       phi = psi.phi ();
2024       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2025       if (!virtual_operand_p (def))
2026         find_interesting_uses_op (data, def);
2027     }
2028 }
2029
2030 /* Finds uses of the induction variables that are interesting.  */
2031
2032 static void
2033 find_interesting_uses (struct ivopts_data *data)
2034 {
2035   basic_block bb;
2036   gimple_stmt_iterator bsi;
2037   basic_block *body = get_loop_body (data->current_loop);
2038   unsigned i;
2039   struct version_info *info;
2040   edge e;
2041
2042   if (dump_file && (dump_flags & TDF_DETAILS))
2043     fprintf (dump_file, "Uses:\n\n");
2044
2045   for (i = 0; i < data->current_loop->num_nodes; i++)
2046     {
2047       edge_iterator ei;
2048       bb = body[i];
2049
2050       FOR_EACH_EDGE (e, ei, bb->succs)
2051         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2052             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2053           find_interesting_uses_outside (data, e);
2054
2055       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2056         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2057       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2058         if (!is_gimple_debug (gsi_stmt (bsi)))
2059           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2060     }
2061
2062   if (dump_file && (dump_flags & TDF_DETAILS))
2063     {
2064       bitmap_iterator bi;
2065
2066       fprintf (dump_file, "\n");
2067
2068       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2069         {
2070           info = ver_info (data, i);
2071           if (info->inv_id)
2072             {
2073               fprintf (dump_file, "  ");
2074               print_generic_expr (dump_file, info->name, TDF_SLIM);
2075               fprintf (dump_file, " is invariant (%d)%s\n",
2076                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2077             }
2078         }
2079
2080       fprintf (dump_file, "\n");
2081     }
2082
2083   free (body);
2084 }
2085
2086 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2087    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2088    we are at the top-level of the processed address.  */
2089
2090 static tree
2091 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2092                 HOST_WIDE_INT *offset)
2093 {
2094   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2095   enum tree_code code;
2096   tree type, orig_type = TREE_TYPE (expr);
2097   HOST_WIDE_INT off0, off1, st;
2098   tree orig_expr = expr;
2099
2100   STRIP_NOPS (expr);
2101
2102   type = TREE_TYPE (expr);
2103   code = TREE_CODE (expr);
2104   *offset = 0;
2105
2106   switch (code)
2107     {
2108     case INTEGER_CST:
2109       if (!cst_and_fits_in_hwi (expr)
2110           || integer_zerop (expr))
2111         return orig_expr;
2112
2113       *offset = int_cst_value (expr);
2114       return build_int_cst (orig_type, 0);
2115
2116     case POINTER_PLUS_EXPR:
2117     case PLUS_EXPR:
2118     case MINUS_EXPR:
2119       op0 = TREE_OPERAND (expr, 0);
2120       op1 = TREE_OPERAND (expr, 1);
2121
2122       op0 = strip_offset_1 (op0, false, false, &off0);
2123       op1 = strip_offset_1 (op1, false, false, &off1);
2124
2125       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2126       if (op0 == TREE_OPERAND (expr, 0)
2127           && op1 == TREE_OPERAND (expr, 1))
2128         return orig_expr;
2129
2130       if (integer_zerop (op1))
2131         expr = op0;
2132       else if (integer_zerop (op0))
2133         {
2134           if (code == MINUS_EXPR)
2135             expr = fold_build1 (NEGATE_EXPR, type, op1);
2136           else
2137             expr = op1;
2138         }
2139       else
2140         expr = fold_build2 (code, type, op0, op1);
2141
2142       return fold_convert (orig_type, expr);
2143
2144     case MULT_EXPR:
2145       op1 = TREE_OPERAND (expr, 1);
2146       if (!cst_and_fits_in_hwi (op1))
2147         return orig_expr;
2148
2149       op0 = TREE_OPERAND (expr, 0);
2150       op0 = strip_offset_1 (op0, false, false, &off0);
2151       if (op0 == TREE_OPERAND (expr, 0))
2152         return orig_expr;
2153
2154       *offset = off0 * int_cst_value (op1);
2155       if (integer_zerop (op0))
2156         expr = op0;
2157       else
2158         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2159
2160       return fold_convert (orig_type, expr);
2161
2162     case ARRAY_REF:
2163     case ARRAY_RANGE_REF:
2164       if (!inside_addr)
2165         return orig_expr;
2166
2167       step = array_ref_element_size (expr);
2168       if (!cst_and_fits_in_hwi (step))
2169         break;
2170
2171       st = int_cst_value (step);
2172       op1 = TREE_OPERAND (expr, 1);
2173       op1 = strip_offset_1 (op1, false, false, &off1);
2174       *offset = off1 * st;
2175
2176       if (top_compref
2177           && integer_zerop (op1))
2178         {
2179           /* Strip the component reference completely.  */
2180           op0 = TREE_OPERAND (expr, 0);
2181           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2182           *offset += off0;
2183           return op0;
2184         }
2185       break;
2186
2187     case COMPONENT_REF:
2188       {
2189         tree field;
2190
2191         if (!inside_addr)
2192           return orig_expr;
2193
2194         tmp = component_ref_field_offset (expr);
2195         field = TREE_OPERAND (expr, 1);
2196         if (top_compref
2197             && cst_and_fits_in_hwi (tmp)
2198             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2199           {
2200             HOST_WIDE_INT boffset, abs_off;
2201
2202             /* Strip the component reference completely.  */
2203             op0 = TREE_OPERAND (expr, 0);
2204             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2205             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2206             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2207             if (boffset < 0)
2208               abs_off = -abs_off;
2209
2210             *offset = off0 + int_cst_value (tmp) + abs_off;
2211             return op0;
2212           }
2213       }
2214       break;
2215
2216     case ADDR_EXPR:
2217       op0 = TREE_OPERAND (expr, 0);
2218       op0 = strip_offset_1 (op0, true, true, &off0);
2219       *offset += off0;
2220
2221       if (op0 == TREE_OPERAND (expr, 0))
2222         return orig_expr;
2223
2224       expr = build_fold_addr_expr (op0);
2225       return fold_convert (orig_type, expr);
2226
2227     case MEM_REF:
2228       /* ???  Offset operand?  */
2229       inside_addr = false;
2230       break;
2231
2232     default:
2233       return orig_expr;
2234     }
2235
2236   /* Default handling of expressions for that we want to recurse into
2237      the first operand.  */
2238   op0 = TREE_OPERAND (expr, 0);
2239   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2240   *offset += off0;
2241
2242   if (op0 == TREE_OPERAND (expr, 0)
2243       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2244     return orig_expr;
2245
2246   expr = copy_node (expr);
2247   TREE_OPERAND (expr, 0) = op0;
2248   if (op1)
2249     TREE_OPERAND (expr, 1) = op1;
2250
2251   /* Inside address, we might strip the top level component references,
2252      thus changing type of the expression.  Handling of ADDR_EXPR
2253      will fix that.  */
2254   expr = fold_convert (orig_type, expr);
2255
2256   return expr;
2257 }
2258
2259 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2260
2261 static tree
2262 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2263 {
2264   HOST_WIDE_INT off;
2265   tree core = strip_offset_1 (expr, false, false, &off);
2266   *offset = off;
2267   return core;
2268 }
2269
2270 /* Returns variant of TYPE that can be used as base for different uses.
2271    We return unsigned type with the same precision, which avoids problems
2272    with overflows.  */
2273
2274 static tree
2275 generic_type_for (tree type)
2276 {
2277   if (POINTER_TYPE_P (type))
2278     return unsigned_type_for (type);
2279
2280   if (TYPE_UNSIGNED (type))
2281     return type;
2282
2283   return unsigned_type_for (type);
2284 }
2285
2286 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2287    the bitmap to that we should store it.  */
2288
2289 static struct ivopts_data *fd_ivopts_data;
2290 static tree
2291 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2292 {
2293   bitmap *depends_on = (bitmap *) data;
2294   struct version_info *info;
2295
2296   if (TREE_CODE (*expr_p) != SSA_NAME)
2297     return NULL_TREE;
2298   info = name_info (fd_ivopts_data, *expr_p);
2299
2300   if (!info->inv_id || info->has_nonlin_use)
2301     return NULL_TREE;
2302
2303   if (!*depends_on)
2304     *depends_on = BITMAP_ALLOC (NULL);
2305   bitmap_set_bit (*depends_on, info->inv_id);
2306
2307   return NULL_TREE;
2308 }
2309
2310 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2311    position to POS.  If USE is not NULL, the candidate is set as related to
2312    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2313    replacement of the final value of the iv by a direct computation.  */
2314
2315 static struct iv_cand *
2316 add_candidate_1 (struct ivopts_data *data,
2317                  tree base, tree step, bool important, enum iv_position pos,
2318                  struct iv_use *use, gimple incremented_at)
2319 {
2320   unsigned i;
2321   struct iv_cand *cand = NULL;
2322   tree type, orig_type;
2323
2324   /* For non-original variables, make sure their values are computed in a type
2325      that does not invoke undefined behavior on overflows (since in general,
2326      we cannot prove that these induction variables are non-wrapping).  */
2327   if (pos != IP_ORIGINAL)
2328     {
2329       orig_type = TREE_TYPE (base);
2330       type = generic_type_for (orig_type);
2331       if (type != orig_type)
2332         {
2333           base = fold_convert (type, base);
2334           step = fold_convert (type, step);
2335         }
2336     }
2337
2338   for (i = 0; i < n_iv_cands (data); i++)
2339     {
2340       cand = iv_cand (data, i);
2341
2342       if (cand->pos != pos)
2343         continue;
2344
2345       if (cand->incremented_at != incremented_at
2346           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2347               && cand->ainc_use != use))
2348         continue;
2349
2350       if (!cand->iv)
2351         {
2352           if (!base && !step)
2353             break;
2354
2355           continue;
2356         }
2357
2358       if (!base && !step)
2359         continue;
2360
2361       if (operand_equal_p (base, cand->iv->base, 0)
2362           && operand_equal_p (step, cand->iv->step, 0)
2363           && (TYPE_PRECISION (TREE_TYPE (base))
2364               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2365         break;
2366     }
2367
2368   if (i == n_iv_cands (data))
2369     {
2370       cand = XCNEW (struct iv_cand);
2371       cand->id = i;
2372
2373       if (!base && !step)
2374         cand->iv = NULL;
2375       else
2376         cand->iv = alloc_iv (base, step);
2377
2378       cand->pos = pos;
2379       if (pos != IP_ORIGINAL && cand->iv)
2380         {
2381           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2382           cand->var_after = cand->var_before;
2383         }
2384       cand->important = important;
2385       cand->incremented_at = incremented_at;
2386       data->iv_candidates.safe_push (cand);
2387
2388       if (step
2389           && TREE_CODE (step) != INTEGER_CST)
2390         {
2391           fd_ivopts_data = data;
2392           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2393         }
2394
2395       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2396         cand->ainc_use = use;
2397       else
2398         cand->ainc_use = NULL;
2399
2400       if (dump_file && (dump_flags & TDF_DETAILS))
2401         dump_cand (dump_file, cand);
2402     }
2403
2404   if (important && !cand->important)
2405     {
2406       cand->important = true;
2407       if (dump_file && (dump_flags & TDF_DETAILS))
2408         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2409     }
2410
2411   if (use)
2412     {
2413       bitmap_set_bit (use->related_cands, i);
2414       if (dump_file && (dump_flags & TDF_DETAILS))
2415         fprintf (dump_file, "Candidate %d is related to use %d\n",
2416                  cand->id, use->id);
2417     }
2418
2419   return cand;
2420 }
2421
2422 /* Returns true if incrementing the induction variable at the end of the LOOP
2423    is allowed.
2424
2425    The purpose is to avoid splitting latch edge with a biv increment, thus
2426    creating a jump, possibly confusing other optimization passes and leaving
2427    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2428    is not available (so we do not have a better alternative), or if the latch
2429    edge is already nonempty.  */
2430
2431 static bool
2432 allow_ip_end_pos_p (struct loop *loop)
2433 {
2434   if (!ip_normal_pos (loop))
2435     return true;
2436
2437   if (!empty_block_p (ip_end_pos (loop)))
2438     return true;
2439
2440   return false;
2441 }
2442
2443 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2444    Important field is set to IMPORTANT.  */
2445
2446 static void
2447 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2448                         bool important, struct iv_use *use)
2449 {
2450   basic_block use_bb = gimple_bb (use->stmt);
2451   machine_mode mem_mode;
2452   unsigned HOST_WIDE_INT cstepi;
2453
2454   /* If we insert the increment in any position other than the standard
2455      ones, we must ensure that it is incremented once per iteration.
2456      It must not be in an inner nested loop, or one side of an if
2457      statement.  */
2458   if (use_bb->loop_father != data->current_loop
2459       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2460       || stmt_could_throw_p (use->stmt)
2461       || !cst_and_fits_in_hwi (step))
2462     return;
2463
2464   cstepi = int_cst_value (step);
2465
2466   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2467   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2468         || USE_STORE_PRE_INCREMENT (mem_mode))
2469        && GET_MODE_SIZE (mem_mode) == cstepi)
2470       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2471            || USE_STORE_PRE_DECREMENT (mem_mode))
2472           && GET_MODE_SIZE (mem_mode) == -cstepi))
2473     {
2474       enum tree_code code = MINUS_EXPR;
2475       tree new_base;
2476       tree new_step = step;
2477
2478       if (POINTER_TYPE_P (TREE_TYPE (base)))
2479         {
2480           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2481           code = POINTER_PLUS_EXPR;
2482         }
2483       else
2484         new_step = fold_convert (TREE_TYPE (base), new_step);
2485       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2486       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2487                        use->stmt);
2488     }
2489   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2490         || USE_STORE_POST_INCREMENT (mem_mode))
2491        && GET_MODE_SIZE (mem_mode) == cstepi)
2492       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2493            || USE_STORE_POST_DECREMENT (mem_mode))
2494           && GET_MODE_SIZE (mem_mode) == -cstepi))
2495     {
2496       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2497                        use->stmt);
2498     }
2499 }
2500
2501 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2502    position to POS.  If USE is not NULL, the candidate is set as related to
2503    it.  The candidate computation is scheduled on all available positions.  */
2504
2505 static void
2506 add_candidate (struct ivopts_data *data,
2507                tree base, tree step, bool important, struct iv_use *use)
2508 {
2509   if (ip_normal_pos (data->current_loop))
2510     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2511   if (ip_end_pos (data->current_loop)
2512       && allow_ip_end_pos_p (data->current_loop))
2513     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2514
2515   if (use != NULL && use->type == USE_ADDRESS)
2516     add_autoinc_candidates (data, base, step, important, use);
2517 }
2518
2519 /* Adds standard iv candidates.  */
2520
2521 static void
2522 add_standard_iv_candidates (struct ivopts_data *data)
2523 {
2524   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2525
2526   /* The same for a double-integer type if it is still fast enough.  */
2527   if (TYPE_PRECISION
2528         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2529       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2530     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2531                    build_int_cst (long_integer_type_node, 1), true, NULL);
2532
2533   /* The same for a double-integer type if it is still fast enough.  */
2534   if (TYPE_PRECISION
2535         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2536       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2537     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2538                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2539 }
2540
2541
2542 /* Adds candidates bases on the old induction variable IV.  */
2543
2544 static void
2545 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2546 {
2547   gimple phi;
2548   tree def;
2549   struct iv_cand *cand;
2550
2551   add_candidate (data, iv->base, iv->step, true, NULL);
2552
2553   /* The same, but with initial value zero.  */
2554   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2555     add_candidate (data, size_int (0), iv->step, true, NULL);
2556   else
2557     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2558                    iv->step, true, NULL);
2559
2560   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2561   if (gimple_code (phi) == GIMPLE_PHI)
2562     {
2563       /* Additionally record the possibility of leaving the original iv
2564          untouched.  */
2565       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2566       /* Don't add candidate if it's from another PHI node because
2567          it's an affine iv appearing in the form of PEELED_CHREC.  */
2568       phi = SSA_NAME_DEF_STMT (def);
2569       if (gimple_code (phi) != GIMPLE_PHI)
2570         {
2571           cand = add_candidate_1 (data,
2572                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2573                                   SSA_NAME_DEF_STMT (def));
2574           cand->var_before = iv->ssa_name;
2575           cand->var_after = def;
2576         }
2577       else
2578         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2579     }
2580 }
2581
2582 /* Adds candidates based on the old induction variables.  */
2583
2584 static void
2585 add_old_ivs_candidates (struct ivopts_data *data)
2586 {
2587   unsigned i;
2588   struct iv *iv;
2589   bitmap_iterator bi;
2590
2591   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2592     {
2593       iv = ver_info (data, i)->iv;
2594       if (iv && iv->biv_p && !integer_zerop (iv->step))
2595         add_old_iv_candidates (data, iv);
2596     }
2597 }
2598
2599 /* Adds candidates based on the value of the induction variable IV and USE.  */
2600
2601 static void
2602 add_iv_value_candidates (struct ivopts_data *data,
2603                          struct iv *iv, struct iv_use *use)
2604 {
2605   unsigned HOST_WIDE_INT offset;
2606   tree base;
2607   tree basetype;
2608
2609   add_candidate (data, iv->base, iv->step, false, use);
2610
2611   /* The same, but with initial value zero.  Make such variable important,
2612      since it is generic enough so that possibly many uses may be based
2613      on it.  */
2614   basetype = TREE_TYPE (iv->base);
2615   if (POINTER_TYPE_P (basetype))
2616     basetype = sizetype;
2617   add_candidate (data, build_int_cst (basetype, 0),
2618                  iv->step, true, use);
2619
2620   /* Third, try removing the constant offset.  Make sure to even
2621      add a candidate for &a[0] vs. (T *)&a.  */
2622   base = strip_offset (iv->base, &offset);
2623   if (offset
2624       || base != iv->base)
2625     add_candidate (data, base, iv->step, false, use);
2626 }
2627
2628 /* Adds candidates based on the uses.  */
2629
2630 static void
2631 add_derived_ivs_candidates (struct ivopts_data *data)
2632 {
2633   unsigned i;
2634
2635   for (i = 0; i < n_iv_uses (data); i++)
2636     {
2637       struct iv_use *use = iv_use (data, i);
2638
2639       if (!use)
2640         continue;
2641
2642       switch (use->type)
2643         {
2644         case USE_NONLINEAR_EXPR:
2645         case USE_COMPARE:
2646         case USE_ADDRESS:
2647           /* Just add the ivs based on the value of the iv used here.  */
2648           add_iv_value_candidates (data, use->iv, use);
2649           break;
2650
2651         default:
2652           gcc_unreachable ();
2653         }
2654     }
2655 }
2656
2657 /* Record important candidates and add them to related_cands bitmaps
2658    if needed.  */
2659
2660 static void
2661 record_important_candidates (struct ivopts_data *data)
2662 {
2663   unsigned i;
2664   struct iv_use *use;
2665
2666   for (i = 0; i < n_iv_cands (data); i++)
2667     {
2668       struct iv_cand *cand = iv_cand (data, i);
2669
2670       if (cand->important)
2671         bitmap_set_bit (data->important_candidates, i);
2672     }
2673
2674   data->consider_all_candidates = (n_iv_cands (data)
2675                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2676
2677   if (data->consider_all_candidates)
2678     {
2679       /* We will not need "related_cands" bitmaps in this case,
2680          so release them to decrease peak memory consumption.  */
2681       for (i = 0; i < n_iv_uses (data); i++)
2682         {
2683           use = iv_use (data, i);
2684           BITMAP_FREE (use->related_cands);
2685         }
2686     }
2687   else
2688     {
2689       /* Add important candidates to the related_cands bitmaps.  */
2690       for (i = 0; i < n_iv_uses (data); i++)
2691         bitmap_ior_into (iv_use (data, i)->related_cands,
2692                          data->important_candidates);
2693     }
2694 }
2695
2696 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2697    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2698    we allocate a simple list to every use.  */
2699
2700 static void
2701 alloc_use_cost_map (struct ivopts_data *data)
2702 {
2703   unsigned i, size, s;
2704
2705   for (i = 0; i < n_iv_uses (data); i++)
2706     {
2707       struct iv_use *use = iv_use (data, i);
2708
2709       if (data->consider_all_candidates)
2710         size = n_iv_cands (data);
2711       else
2712         {
2713           s = bitmap_count_bits (use->related_cands);
2714
2715           /* Round up to the power of two, so that moduling by it is fast.  */
2716           size = s ? (1 << ceil_log2 (s)) : 1;
2717         }
2718
2719       use->n_map_members = size;
2720       use->cost_map = XCNEWVEC (struct cost_pair, size);
2721     }
2722 }
2723
2724 /* Returns description of computation cost of expression whose runtime
2725    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2726
2727 static comp_cost
2728 new_cost (unsigned runtime, unsigned complexity)
2729 {
2730   comp_cost cost;
2731
2732   cost.cost = runtime;
2733   cost.complexity = complexity;
2734
2735   return cost;
2736 }
2737
2738 /* Adds costs COST1 and COST2.  */
2739
2740 static comp_cost
2741 add_costs (comp_cost cost1, comp_cost cost2)
2742 {
2743   cost1.cost += cost2.cost;
2744   cost1.complexity += cost2.complexity;
2745
2746   return cost1;
2747 }
2748 /* Subtracts costs COST1 and COST2.  */
2749
2750 static comp_cost
2751 sub_costs (comp_cost cost1, comp_cost cost2)
2752 {
2753   cost1.cost -= cost2.cost;
2754   cost1.complexity -= cost2.complexity;
2755
2756   return cost1;
2757 }
2758
2759 /* Returns a negative number if COST1 < COST2, a positive number if
2760    COST1 > COST2, and 0 if COST1 = COST2.  */
2761
2762 static int
2763 compare_costs (comp_cost cost1, comp_cost cost2)
2764 {
2765   if (cost1.cost == cost2.cost)
2766     return cost1.complexity - cost2.complexity;
2767
2768   return cost1.cost - cost2.cost;
2769 }
2770
2771 /* Returns true if COST is infinite.  */
2772
2773 static bool
2774 infinite_cost_p (comp_cost cost)
2775 {
2776   return cost.cost == INFTY;
2777 }
2778
2779 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2780    on invariants DEPENDS_ON and that the value used in expressing it
2781    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2782
2783 static void
2784 set_use_iv_cost (struct ivopts_data *data,
2785                  struct iv_use *use, struct iv_cand *cand,
2786                  comp_cost cost, bitmap depends_on, tree value,
2787                  enum tree_code comp, int inv_expr_id)
2788 {
2789   unsigned i, s;
2790
2791   if (infinite_cost_p (cost))
2792     {
2793       BITMAP_FREE (depends_on);
2794       return;
2795     }
2796
2797   if (data->consider_all_candidates)
2798     {
2799       use->cost_map[cand->id].cand = cand;
2800       use->cost_map[cand->id].cost = cost;
2801       use->cost_map[cand->id].depends_on = depends_on;
2802       use->cost_map[cand->id].value = value;
2803       use->cost_map[cand->id].comp = comp;
2804       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2805       return;
2806     }
2807
2808   /* n_map_members is a power of two, so this computes modulo.  */
2809   s = cand->id & (use->n_map_members - 1);
2810   for (i = s; i < use->n_map_members; i++)
2811     if (!use->cost_map[i].cand)
2812       goto found;
2813   for (i = 0; i < s; i++)
2814     if (!use->cost_map[i].cand)
2815       goto found;
2816
2817   gcc_unreachable ();
2818
2819 found:
2820   use->cost_map[i].cand = cand;
2821   use->cost_map[i].cost = cost;
2822   use->cost_map[i].depends_on = depends_on;
2823   use->cost_map[i].value = value;
2824   use->cost_map[i].comp = comp;
2825   use->cost_map[i].inv_expr_id = inv_expr_id;
2826 }
2827
2828 /* Gets cost of (USE, CANDIDATE) pair.  */
2829
2830 static struct cost_pair *
2831 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2832                  struct iv_cand *cand)
2833 {
2834   unsigned i, s;
2835   struct cost_pair *ret;
2836
2837   if (!cand)
2838     return NULL;
2839
2840   if (data->consider_all_candidates)
2841     {
2842       ret = use->cost_map + cand->id;
2843       if (!ret->cand)
2844         return NULL;
2845
2846       return ret;
2847     }
2848
2849   /* n_map_members is a power of two, so this computes modulo.  */
2850   s = cand->id & (use->n_map_members - 1);
2851   for (i = s; i < use->n_map_members; i++)
2852     if (use->cost_map[i].cand == cand)
2853       return use->cost_map + i;
2854     else if (use->cost_map[i].cand == NULL)
2855       return NULL;
2856   for (i = 0; i < s; i++)
2857     if (use->cost_map[i].cand == cand)
2858       return use->cost_map + i;
2859     else if (use->cost_map[i].cand == NULL)
2860       return NULL;
2861
2862   return NULL;
2863 }
2864
2865 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2866 static rtx
2867 produce_memory_decl_rtl (tree obj, int *regno)
2868 {
2869   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2870   machine_mode address_mode = targetm.addr_space.address_mode (as);
2871   rtx x;
2872
2873   gcc_assert (obj);
2874   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2875     {
2876       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2877       x = gen_rtx_SYMBOL_REF (address_mode, name);
2878       SET_SYMBOL_REF_DECL (x, obj);
2879       x = gen_rtx_MEM (DECL_MODE (obj), x);
2880       set_mem_addr_space (x, as);
2881       targetm.encode_section_info (obj, x, true);
2882     }
2883   else
2884     {
2885       x = gen_raw_REG (address_mode, (*regno)++);
2886       x = gen_rtx_MEM (DECL_MODE (obj), x);
2887       set_mem_addr_space (x, as);
2888     }
2889
2890   return x;
2891 }
2892
2893 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2894    walk_tree.  DATA contains the actual fake register number.  */
2895
2896 static tree
2897 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2898 {
2899   tree obj = NULL_TREE;
2900   rtx x = NULL_RTX;
2901   int *regno = (int *) data;
2902
2903   switch (TREE_CODE (*expr_p))
2904     {
2905     case ADDR_EXPR:
2906       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2907            handled_component_p (*expr_p);
2908            expr_p = &TREE_OPERAND (*expr_p, 0))
2909         continue;
2910       obj = *expr_p;
2911       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2912         x = produce_memory_decl_rtl (obj, regno);
2913       break;
2914
2915     case SSA_NAME:
2916       *ws = 0;
2917       obj = SSA_NAME_VAR (*expr_p);
2918       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2919       if (!obj)
2920         return NULL_TREE;
2921       if (!DECL_RTL_SET_P (obj))
2922         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2923       break;
2924
2925     case VAR_DECL:
2926     case PARM_DECL:
2927     case RESULT_DECL:
2928       *ws = 0;
2929       obj = *expr_p;
2930
2931       if (DECL_RTL_SET_P (obj))
2932         break;
2933
2934       if (DECL_MODE (obj) == BLKmode)
2935         x = produce_memory_decl_rtl (obj, regno);
2936       else
2937         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2938
2939       break;
2940
2941     default:
2942       break;
2943     }
2944
2945   if (x)
2946     {
2947       decl_rtl_to_reset.safe_push (obj);
2948       SET_DECL_RTL (obj, x);
2949     }
2950
2951   return NULL_TREE;
2952 }
2953
2954 /* Determines cost of the computation of EXPR.  */
2955
2956 static unsigned
2957 computation_cost (tree expr, bool speed)
2958 {
2959   rtx_insn *seq;
2960   rtx rslt;
2961   tree type = TREE_TYPE (expr);
2962   unsigned cost;
2963   /* Avoid using hard regs in ways which may be unsupported.  */
2964   int regno = LAST_VIRTUAL_REGISTER + 1;
2965   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2966   enum node_frequency real_frequency = node->frequency;
2967
2968   node->frequency = NODE_FREQUENCY_NORMAL;
2969   crtl->maybe_hot_insn_p = speed;
2970   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2971   start_sequence ();
2972   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2973   seq = get_insns ();
2974   end_sequence ();
2975   default_rtl_profile ();
2976   node->frequency = real_frequency;
2977
2978   cost = seq_cost (seq, speed);
2979   if (MEM_P (rslt))
2980     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2981                           TYPE_ADDR_SPACE (type), speed);
2982   else if (!REG_P (rslt))
2983     cost += set_src_cost (rslt, speed);
2984
2985   return cost;
2986 }
2987
2988 /* Returns variable containing the value of candidate CAND at statement AT.  */
2989
2990 static tree
2991 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2992 {
2993   if (stmt_after_increment (loop, cand, stmt))
2994     return cand->var_after;
2995   else
2996     return cand->var_before;
2997 }
2998
2999 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3000    same precision that is at least as wide as the precision of TYPE, stores
3001    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3002    type of A and B.  */
3003
3004 static tree
3005 determine_common_wider_type (tree *a, tree *b)
3006 {
3007   tree wider_type = NULL;
3008   tree suba, subb;
3009   tree atype = TREE_TYPE (*a);
3010
3011   if (CONVERT_EXPR_P (*a))
3012     {
3013       suba = TREE_OPERAND (*a, 0);
3014       wider_type = TREE_TYPE (suba);
3015       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3016         return atype;
3017     }
3018   else
3019     return atype;
3020
3021   if (CONVERT_EXPR_P (*b))
3022     {
3023       subb = TREE_OPERAND (*b, 0);
3024       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3025         return atype;
3026     }
3027   else
3028     return atype;
3029
3030   *a = suba;
3031   *b = subb;
3032   return wider_type;
3033 }
3034
3035 /* Determines the expression by that USE is expressed from induction variable
3036    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3037    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3038
3039 static bool
3040 get_computation_aff (struct loop *loop,
3041                      struct iv_use *use, struct iv_cand *cand, gimple at,
3042                      struct aff_tree *aff)
3043 {
3044   tree ubase = use->iv->base;
3045   tree ustep = use->iv->step;
3046   tree cbase = cand->iv->base;
3047   tree cstep = cand->iv->step, cstep_common;
3048   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3049   tree common_type, var;
3050   tree uutype;
3051   aff_tree cbase_aff, var_aff;
3052   widest_int rat;
3053
3054   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3055     {
3056       /* We do not have a precision to express the values of use.  */
3057       return false;
3058     }
3059
3060   var = var_at_stmt (loop, cand, at);
3061   uutype = unsigned_type_for (utype);
3062
3063   /* If the conversion is not noop, perform it.  */
3064   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3065     {
3066       cstep = fold_convert (uutype, cstep);
3067       cbase = fold_convert (uutype, cbase);
3068       var = fold_convert (uutype, var);
3069     }
3070
3071   if (!constant_multiple_of (ustep, cstep, &rat))
3072     return false;
3073
3074   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3075      type, we achieve better folding by computing their difference in this
3076      wider type, and cast the result to UUTYPE.  We do not need to worry about
3077      overflows, as all the arithmetics will in the end be performed in UUTYPE
3078      anyway.  */
3079   common_type = determine_common_wider_type (&ubase, &cbase);
3080
3081   /* use = ubase - ratio * cbase + ratio * var.  */
3082   tree_to_aff_combination (ubase, common_type, aff);
3083   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3084   tree_to_aff_combination (var, uutype, &var_aff);
3085
3086   /* We need to shift the value if we are after the increment.  */
3087   if (stmt_after_increment (loop, cand, at))
3088     {
3089       aff_tree cstep_aff;
3090
3091       if (common_type != uutype)
3092         cstep_common = fold_convert (common_type, cstep);
3093       else
3094         cstep_common = cstep;
3095
3096       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3097       aff_combination_add (&cbase_aff, &cstep_aff);
3098     }
3099
3100   aff_combination_scale (&cbase_aff, -rat);
3101   aff_combination_add (aff, &cbase_aff);
3102   if (common_type != uutype)
3103     aff_combination_convert (aff, uutype);
3104
3105   aff_combination_scale (&var_aff, rat);
3106   aff_combination_add (aff, &var_aff);
3107
3108   return true;
3109 }
3110
3111 /* Return the type of USE.  */
3112
3113 static tree
3114 get_use_type (struct iv_use *use)
3115 {
3116   tree base_type = TREE_TYPE (use->iv->base);
3117   tree type;
3118
3119   if (use->type == USE_ADDRESS)
3120     {
3121       /* The base_type may be a void pointer.  Create a pointer type based on
3122          the mem_ref instead.  */
3123       type = build_pointer_type (TREE_TYPE (*use->op_p));
3124       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3125                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3126     }
3127   else
3128     type = base_type;
3129
3130   return type;
3131 }
3132
3133 /* Determines the expression by that USE is expressed from induction variable
3134    CAND at statement AT in LOOP.  The computation is unshared.  */
3135
3136 static tree
3137 get_computation_at (struct loop *loop,
3138                     struct iv_use *use, struct iv_cand *cand, gimple at)
3139 {
3140   aff_tree aff;
3141   tree type = get_use_type (use);
3142
3143   if (!get_computation_aff (loop, use, cand, at, &aff))
3144     return NULL_TREE;
3145   unshare_aff_combination (&aff);
3146   return fold_convert (type, aff_combination_to_tree (&aff));
3147 }
3148
3149 /* Determines the expression by that USE is expressed from induction variable
3150    CAND in LOOP.  The computation is unshared.  */
3151
3152 static tree
3153 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3154 {
3155   return get_computation_at (loop, use, cand, use->stmt);
3156 }
3157
3158 /* Adjust the cost COST for being in loop setup rather than loop body.
3159    If we're optimizing for space, the loop setup overhead is constant;
3160    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3161 static unsigned
3162 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3163 {
3164   if (cost == INFTY)
3165     return cost;
3166   else if (optimize_loop_for_speed_p (data->current_loop))
3167     return cost / avg_loop_niter (data->current_loop);
3168   else
3169     return cost;
3170 }
3171
3172 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3173    validity for a memory reference accessing memory of mode MODE in
3174    address space AS.  */
3175
3176
3177 bool
3178 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3179                                  addr_space_t as)
3180 {
3181 #define MAX_RATIO 128
3182   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3183   static vec<sbitmap> valid_mult_list;
3184   sbitmap valid_mult;
3185
3186   if (data_index >= valid_mult_list.length ())
3187     valid_mult_list.safe_grow_cleared (data_index + 1);
3188
3189   valid_mult = valid_mult_list[data_index];
3190   if (!valid_mult)
3191     {
3192       machine_mode address_mode = targetm.addr_space.address_mode (as);
3193       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3194       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3195       rtx addr, scaled;
3196       HOST_WIDE_INT i;
3197
3198       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3199       bitmap_clear (valid_mult);
3200       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3201       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3202       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3203         {
3204           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3205           if (memory_address_addr_space_p (mode, addr, as)
3206               || memory_address_addr_space_p (mode, scaled, as))
3207             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3208         }
3209
3210       if (dump_file && (dump_flags & TDF_DETAILS))
3211         {
3212           fprintf (dump_file, "  allowed multipliers:");
3213           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3214             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3215               fprintf (dump_file, " %d", (int) i);
3216           fprintf (dump_file, "\n");
3217           fprintf (dump_file, "\n");
3218         }
3219
3220       valid_mult_list[data_index] = valid_mult;
3221     }
3222
3223   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3224     return false;
3225
3226   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3227 }
3228
3229 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3230    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3231    variable is omitted.  Compute the cost for a memory reference that accesses
3232    a memory location of mode MEM_MODE in address space AS.
3233
3234    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3235    size of MEM_MODE / RATIO) is available.  To make this determination, we
3236    look at the size of the increment to be made, which is given in CSTEP.
3237    CSTEP may be zero if the step is unknown.
3238    STMT_AFTER_INC is true iff the statement we're looking at is after the
3239    increment of the original biv.
3240
3241    TODO -- there must be some better way.  This all is quite crude.  */
3242
3243 enum ainc_type
3244 {
3245   AINC_PRE_INC,         /* Pre increment.  */
3246   AINC_PRE_DEC,         /* Pre decrement.  */
3247   AINC_POST_INC,        /* Post increment.  */
3248   AINC_POST_DEC,        /* Post decrement.  */
3249   AINC_NONE             /* Also the number of auto increment types.  */
3250 };
3251
3252 typedef struct address_cost_data_s
3253 {
3254   HOST_WIDE_INT min_offset, max_offset;
3255   unsigned costs[2][2][2][2];
3256   unsigned ainc_costs[AINC_NONE];
3257 } *address_cost_data;
3258
3259
3260 static comp_cost
3261 get_address_cost (bool symbol_present, bool var_present,
3262                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3263                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3264                   addr_space_t as, bool speed,
3265                   bool stmt_after_inc, bool *may_autoinc)
3266 {
3267   machine_mode address_mode = targetm.addr_space.address_mode (as);
3268   static vec<address_cost_data> address_cost_data_list;
3269   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3270   address_cost_data data;
3271   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3272   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3273   unsigned cost, acost, complexity;
3274   enum ainc_type autoinc_type;
3275   bool offset_p, ratio_p, autoinc;
3276   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3277   unsigned HOST_WIDE_INT mask;
3278   unsigned bits;
3279
3280   if (data_index >= address_cost_data_list.length ())
3281     address_cost_data_list.safe_grow_cleared (data_index + 1);
3282
3283   data = address_cost_data_list[data_index];
3284   if (!data)
3285     {
3286       HOST_WIDE_INT i;
3287       HOST_WIDE_INT rat, off = 0;
3288       int old_cse_not_expected, width;
3289       unsigned sym_p, var_p, off_p, rat_p, add_c;
3290       rtx_insn *seq;
3291       rtx addr, base;
3292       rtx reg0, reg1;
3293
3294       data = (address_cost_data) xcalloc (1, sizeof (*data));
3295
3296       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3297
3298       width = GET_MODE_BITSIZE (address_mode) - 1;
3299       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3300         width = HOST_BITS_PER_WIDE_INT - 1;
3301       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3302
3303       for (i = width; i >= 0; i--)
3304         {
3305           off = -((unsigned HOST_WIDE_INT) 1 << i);
3306           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3307           if (memory_address_addr_space_p (mem_mode, addr, as))
3308             break;
3309         }
3310       data->min_offset = (i == -1? 0 : off);
3311
3312       for (i = width; i >= 0; i--)
3313         {
3314           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3315           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3316           if (memory_address_addr_space_p (mem_mode, addr, as))
3317             break;
3318           /* For some TARGET, like ARM THUMB1, the offset should be nature
3319              aligned.  Try an aligned offset if address_mode is not QImode.  */
3320           off = (address_mode == QImode)
3321                 ? 0
3322                 : ((unsigned HOST_WIDE_INT) 1 << i)
3323                     - GET_MODE_SIZE (address_mode);
3324           if (off > 0)
3325             {
3326               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3327               if (memory_address_addr_space_p (mem_mode, addr, as))
3328                 break;
3329             }
3330         }
3331       if (i == -1)
3332         off = 0;
3333       data->max_offset = off;
3334
3335       if (dump_file && (dump_flags & TDF_DETAILS))
3336         {
3337           fprintf (dump_file, "get_address_cost:\n");
3338           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3339                    GET_MODE_NAME (mem_mode),
3340                    data->min_offset);
3341           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3342                    GET_MODE_NAME (mem_mode),
3343                    data->max_offset);
3344         }
3345
3346       rat = 1;
3347       for (i = 2; i <= MAX_RATIO; i++)
3348         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3349           {
3350             rat = i;
3351             break;
3352           }
3353
3354       /* Compute the cost of various addressing modes.  */
3355       acost = 0;
3356       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3357       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3358
3359       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3360           || USE_STORE_PRE_DECREMENT (mem_mode))
3361         {
3362           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3363           has_predec[mem_mode]
3364             = memory_address_addr_space_p (mem_mode, addr, as);
3365
3366           if (has_predec[mem_mode])
3367             data->ainc_costs[AINC_PRE_DEC]
3368               = address_cost (addr, mem_mode, as, speed);
3369         }
3370       if (USE_LOAD_POST_DECREMENT (mem_mode)
3371           || USE_STORE_POST_DECREMENT (mem_mode))
3372         {
3373           addr = gen_rtx_POST_DEC (address_mode, reg0);
3374           has_postdec[mem_mode]
3375             = memory_address_addr_space_p (mem_mode, addr, as);
3376
3377           if (has_postdec[mem_mode])
3378             data->ainc_costs[AINC_POST_DEC]
3379               = address_cost (addr, mem_mode, as, speed);
3380         }
3381       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3382           || USE_STORE_PRE_DECREMENT (mem_mode))
3383         {
3384           addr = gen_rtx_PRE_INC (address_mode, reg0);
3385           has_preinc[mem_mode]
3386             = memory_address_addr_space_p (mem_mode, addr, as);
3387
3388           if (has_preinc[mem_mode])
3389             data->ainc_costs[AINC_PRE_INC]
3390               = address_cost (addr, mem_mode, as, speed);
3391         }
3392       if (USE_LOAD_POST_INCREMENT (mem_mode)
3393           || USE_STORE_POST_INCREMENT (mem_mode))
3394         {
3395           addr = gen_rtx_POST_INC (address_mode, reg0);
3396           has_postinc[mem_mode]
3397             = memory_address_addr_space_p (mem_mode, addr, as);
3398
3399           if (has_postinc[mem_mode])
3400             data->ainc_costs[AINC_POST_INC]
3401               = address_cost (addr, mem_mode, as, speed);
3402         }
3403       for (i = 0; i < 16; i++)
3404         {
3405           sym_p = i & 1;
3406           var_p = (i >> 1) & 1;
3407           off_p = (i >> 2) & 1;
3408           rat_p = (i >> 3) & 1;
3409
3410           addr = reg0;
3411           if (rat_p)
3412             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3413                                    gen_int_mode (rat, address_mode));
3414
3415           if (var_p)
3416             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3417
3418           if (sym_p)
3419             {
3420               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3421               /* ??? We can run into trouble with some backends by presenting
3422                  it with symbols which haven't been properly passed through
3423                  targetm.encode_section_info.  By setting the local bit, we
3424                  enhance the probability of things working.  */
3425               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3426
3427               if (off_p)
3428                 base = gen_rtx_fmt_e (CONST, address_mode,
3429                                       gen_rtx_fmt_ee
3430                                         (PLUS, address_mode, base,
3431                                          gen_int_mode (off, address_mode)));
3432             }
3433           else if (off_p)
3434             base = gen_int_mode (off, address_mode);
3435           else
3436             base = NULL_RTX;
3437
3438           if (base)
3439             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3440
3441           start_sequence ();
3442           /* To avoid splitting addressing modes, pretend that no cse will
3443              follow.  */
3444           old_cse_not_expected = cse_not_expected;
3445           cse_not_expected = true;
3446           addr = memory_address_addr_space (mem_mode, addr, as);
3447           cse_not_expected = old_cse_not_expected;
3448           seq = get_insns ();
3449           end_sequence ();
3450
3451           acost = seq_cost (seq, speed);
3452           acost += address_cost (addr, mem_mode, as, speed);
3453
3454           if (!acost)
3455             acost = 1;
3456           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3457         }
3458
3459       /* On some targets, it is quite expensive to load symbol to a register,
3460          which makes addresses that contain symbols look much more expensive.
3461          However, the symbol will have to be loaded in any case before the
3462          loop (and quite likely we have it in register already), so it does not
3463          make much sense to penalize them too heavily.  So make some final
3464          tweaks for the SYMBOL_PRESENT modes:
3465
3466          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3467          var is cheaper, use this mode with small penalty.
3468          If VAR_PRESENT is true, try whether the mode with
3469          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3470          if this is the case, use it.  */
3471       add_c = add_cost (speed, address_mode);
3472       for (i = 0; i < 8; i++)
3473         {
3474           var_p = i & 1;
3475           off_p = (i >> 1) & 1;
3476           rat_p = (i >> 2) & 1;
3477
3478           acost = data->costs[0][1][off_p][rat_p] + 1;
3479           if (var_p)
3480             acost += add_c;
3481
3482           if (acost < data->costs[1][var_p][off_p][rat_p])
3483             data->costs[1][var_p][off_p][rat_p] = acost;
3484         }
3485
3486       if (dump_file && (dump_flags & TDF_DETAILS))
3487         {
3488           fprintf (dump_file, "Address costs:\n");
3489
3490           for (i = 0; i < 16; i++)
3491             {
3492               sym_p = i & 1;
3493               var_p = (i >> 1) & 1;
3494               off_p = (i >> 2) & 1;
3495               rat_p = (i >> 3) & 1;
3496
3497               fprintf (dump_file, "  ");
3498               if (sym_p)
3499                 fprintf (dump_file, "sym + ");
3500               if (var_p)
3501                 fprintf (dump_file, "var + ");
3502               if (off_p)
3503                 fprintf (dump_file, "cst + ");
3504               if (rat_p)
3505                 fprintf (dump_file, "rat * ");
3506
3507               acost = data->costs[sym_p][var_p][off_p][rat_p];
3508               fprintf (dump_file, "index costs %d\n", acost);
3509             }
3510           if (has_predec[mem_mode] || has_postdec[mem_mode]
3511               || has_preinc[mem_mode] || has_postinc[mem_mode])
3512             fprintf (dump_file, "  May include autoinc/dec\n");
3513           fprintf (dump_file, "\n");
3514         }
3515
3516       address_cost_data_list[data_index] = data;
3517     }
3518
3519   bits = GET_MODE_BITSIZE (address_mode);
3520   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3521   offset &= mask;
3522   if ((offset >> (bits - 1) & 1))
3523     offset |= ~mask;
3524   s_offset = offset;
3525
3526   autoinc = false;
3527   autoinc_type = AINC_NONE;
3528   msize = GET_MODE_SIZE (mem_mode);
3529   autoinc_offset = offset;
3530   if (stmt_after_inc)
3531     autoinc_offset += ratio * cstep;
3532   if (symbol_present || var_present || ratio != 1)
3533     autoinc = false;
3534   else
3535     {
3536       if (has_postinc[mem_mode] && autoinc_offset == 0
3537           && msize == cstep)
3538         autoinc_type = AINC_POST_INC;
3539       else if (has_postdec[mem_mode] && autoinc_offset == 0
3540                && msize == -cstep)
3541         autoinc_type = AINC_POST_DEC;
3542       else if (has_preinc[mem_mode] && autoinc_offset == msize
3543                && msize == cstep)
3544         autoinc_type = AINC_PRE_INC;
3545       else if (has_predec[mem_mode] && autoinc_offset == -msize
3546                && msize == -cstep)
3547         autoinc_type = AINC_PRE_DEC;
3548
3549       if (autoinc_type != AINC_NONE)
3550         autoinc = true;
3551     }
3552
3553   cost = 0;
3554   offset_p = (s_offset != 0
3555               && data->min_offset <= s_offset
3556               && s_offset <= data->max_offset);
3557   ratio_p = (ratio != 1
3558              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3559
3560   if (ratio != 1 && !ratio_p)
3561     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3562
3563   if (s_offset && !offset_p && !symbol_present)
3564     cost += add_cost (speed, address_mode);
3565
3566   if (may_autoinc)
3567     *may_autoinc = autoinc;
3568   if (autoinc)
3569     acost = data->ainc_costs[autoinc_type];
3570   else
3571     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3572   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3573   return new_cost (cost + acost, complexity);
3574 }
3575
3576  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3577     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3578     calculating the operands of EXPR.  Returns true if successful, and returns
3579     the cost in COST.  */
3580
3581 static bool
3582 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3583                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3584 {
3585   comp_cost res;
3586   tree op1 = TREE_OPERAND (expr, 1);
3587   tree cst = TREE_OPERAND (mult, 1);
3588   tree multop = TREE_OPERAND (mult, 0);
3589   int m = exact_log2 (int_cst_value (cst));
3590   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3591   int sa_cost;
3592   bool equal_p = false;
3593
3594   if (!(m >= 0 && m < maxm))
3595     return false;
3596
3597   if (operand_equal_p (op1, mult, 0))
3598     equal_p = true;
3599
3600   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3601              ? shiftadd_cost (speed, mode, m)
3602              : (equal_p
3603                 ? shiftsub1_cost (speed, mode, m)
3604                 : shiftsub0_cost (speed, mode, m)));
3605   res = new_cost (sa_cost, 0);
3606   res = add_costs (res, equal_p ? cost0 : cost1);
3607
3608   STRIP_NOPS (multop);
3609   if (!is_gimple_val (multop))
3610     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3611
3612   *cost = res;
3613   return true;
3614 }
3615
3616 /* Estimates cost of forcing expression EXPR into a variable.  */
3617
3618 static comp_cost
3619 force_expr_to_var_cost (tree expr, bool speed)
3620 {
3621   static bool costs_initialized = false;
3622   static unsigned integer_cost [2];
3623   static unsigned symbol_cost [2];
3624   static unsigned address_cost [2];
3625   tree op0, op1;
3626   comp_cost cost0, cost1, cost;
3627   machine_mode mode;
3628
3629   if (!costs_initialized)
3630     {
3631       tree type = build_pointer_type (integer_type_node);
3632       tree var, addr;
3633       rtx x;
3634       int i;
3635
3636       var = create_tmp_var_raw (integer_type_node, "test_var");
3637       TREE_STATIC (var) = 1;
3638       x = produce_memory_decl_rtl (var, NULL);
3639       SET_DECL_RTL (var, x);
3640
3641       addr = build1 (ADDR_EXPR, type, var);
3642
3643
3644       for (i = 0; i < 2; i++)
3645         {
3646           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3647                                                              2000), i);
3648
3649           symbol_cost[i] = computation_cost (addr, i) + 1;
3650
3651           address_cost[i]
3652             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3653           if (dump_file && (dump_flags & TDF_DETAILS))
3654             {
3655               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3656               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3657               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3658               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3659               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3660               fprintf (dump_file, "\n");
3661             }
3662         }
3663
3664       costs_initialized = true;
3665     }
3666
3667   STRIP_NOPS (expr);
3668
3669   if (SSA_VAR_P (expr))
3670     return no_cost;
3671
3672   if (is_gimple_min_invariant (expr))
3673     {
3674       if (TREE_CODE (expr) == INTEGER_CST)
3675         return new_cost (integer_cost [speed], 0);
3676
3677       if (TREE_CODE (expr) == ADDR_EXPR)
3678         {
3679           tree obj = TREE_OPERAND (expr, 0);
3680
3681           if (TREE_CODE (obj) == VAR_DECL
3682               || TREE_CODE (obj) == PARM_DECL
3683               || TREE_CODE (obj) == RESULT_DECL)
3684             return new_cost (symbol_cost [speed], 0);
3685         }
3686
3687       return new_cost (address_cost [speed], 0);
3688     }
3689
3690   switch (TREE_CODE (expr))
3691     {
3692     case POINTER_PLUS_EXPR:
3693     case PLUS_EXPR:
3694     case MINUS_EXPR:
3695     case MULT_EXPR:
3696       op0 = TREE_OPERAND (expr, 0);
3697       op1 = TREE_OPERAND (expr, 1);
3698       STRIP_NOPS (op0);
3699       STRIP_NOPS (op1);
3700       break;
3701
3702     CASE_CONVERT:
3703     case NEGATE_EXPR:
3704       op0 = TREE_OPERAND (expr, 0);
3705       STRIP_NOPS (op0);
3706       op1 = NULL_TREE;
3707       break;
3708
3709     default:
3710       /* Just an arbitrary value, FIXME.  */
3711       return new_cost (target_spill_cost[speed], 0);
3712     }
3713
3714   if (op0 == NULL_TREE
3715       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3716     cost0 = no_cost;
3717   else
3718     cost0 = force_expr_to_var_cost (op0, speed);
3719
3720   if (op1 == NULL_TREE
3721       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3722     cost1 = no_cost;
3723   else
3724     cost1 = force_expr_to_var_cost (op1, speed);
3725
3726   mode = TYPE_MODE (TREE_TYPE (expr));
3727   switch (TREE_CODE (expr))
3728     {
3729     case POINTER_PLUS_EXPR:
3730     case PLUS_EXPR:
3731     case MINUS_EXPR:
3732     case NEGATE_EXPR:
3733       cost = new_cost (add_cost (speed, mode), 0);
3734       if (TREE_CODE (expr) != NEGATE_EXPR)
3735         {
3736           tree mult = NULL_TREE;
3737           comp_cost sa_cost;
3738           if (TREE_CODE (op1) == MULT_EXPR)
3739             mult = op1;
3740           else if (TREE_CODE (op0) == MULT_EXPR)
3741             mult = op0;
3742
3743           if (mult != NULL_TREE
3744               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3745               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3746                                     speed, &sa_cost))
3747             return sa_cost;
3748         }
3749       break;
3750
3751     CASE_CONVERT:
3752       {
3753         tree inner_mode, outer_mode;
3754         outer_mode = TREE_TYPE (expr);
3755         inner_mode = TREE_TYPE (op0);
3756         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3757                                        TYPE_MODE (inner_mode), speed), 0);
3758       }
3759       break;
3760
3761     case MULT_EXPR:
3762       if (cst_and_fits_in_hwi (op0))
3763         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3764                                              mode, speed), 0);
3765       else if (cst_and_fits_in_hwi (op1))
3766         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3767                                              mode, speed), 0);
3768       else
3769         return new_cost (target_spill_cost [speed], 0);
3770       break;
3771
3772     default:
3773       gcc_unreachable ();
3774     }
3775
3776   cost = add_costs (cost, cost0);
3777   cost = add_costs (cost, cost1);
3778
3779   /* Bound the cost by target_spill_cost.  The parts of complicated
3780      computations often are either loop invariant or at least can
3781      be shared between several iv uses, so letting this grow without
3782      limits would not give reasonable results.  */
3783   if (cost.cost > (int) target_spill_cost [speed])
3784     cost.cost = target_spill_cost [speed];
3785
3786   return cost;
3787 }
3788
3789 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3790    invariants the computation depends on.  */
3791
3792 static comp_cost
3793 force_var_cost (struct ivopts_data *data,
3794                 tree expr, bitmap *depends_on)
3795 {
3796   if (depends_on)
3797     {
3798       fd_ivopts_data = data;
3799       walk_tree (&expr, find_depends, depends_on, NULL);
3800     }
3801
3802   return force_expr_to_var_cost (expr, data->speed);
3803 }
3804
3805 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3806    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3807    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3808    invariants the computation depends on.  */
3809
3810 static comp_cost
3811 split_address_cost (struct ivopts_data *data,
3812                     tree addr, bool *symbol_present, bool *var_present,
3813                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3814 {
3815   tree core;
3816   HOST_WIDE_INT bitsize;
3817   HOST_WIDE_INT bitpos;
3818   tree toffset;
3819   machine_mode mode;
3820   int unsignedp, volatilep;
3821
3822   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3823                               &unsignedp, &volatilep, false);
3824
3825   if (toffset != 0
3826       || bitpos % BITS_PER_UNIT != 0
3827       || TREE_CODE (core) != VAR_DECL)
3828     {
3829       *symbol_present = false;
3830       *var_present = true;
3831       fd_ivopts_data = data;
3832       walk_tree (&addr, find_depends, depends_on, NULL);
3833       return new_cost (target_spill_cost[data->speed], 0);
3834     }
3835
3836   *offset += bitpos / BITS_PER_UNIT;
3837   if (TREE_STATIC (core)
3838       || DECL_EXTERNAL (core))
3839     {
3840       *symbol_present = true;
3841       *var_present = false;
3842       return no_cost;
3843     }
3844
3845   *symbol_present = false;
3846   *var_present = true;
3847   return no_cost;
3848 }
3849
3850 /* Estimates cost of expressing difference of addresses E1 - E2 as
3851    var + symbol + offset.  The value of offset is added to OFFSET,
3852    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3853    part is missing.  DEPENDS_ON is a set of the invariants the computation
3854    depends on.  */
3855
3856 static comp_cost
3857 ptr_difference_cost (struct ivopts_data *data,
3858                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3859                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3860 {
3861   HOST_WIDE_INT diff = 0;
3862   aff_tree aff_e1, aff_e2;
3863   tree type;
3864
3865   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3866
3867   if (ptr_difference_const (e1, e2, &diff))
3868     {
3869       *offset += diff;
3870       *symbol_present = false;
3871       *var_present = false;
3872       return no_cost;
3873     }
3874
3875   if (integer_zerop (e2))
3876     return split_address_cost (data, TREE_OPERAND (e1, 0),
3877                                symbol_present, var_present, offset, depends_on);
3878
3879   *symbol_present = false;
3880   *var_present = true;
3881
3882   type = signed_type_for (TREE_TYPE (e1));
3883   tree_to_aff_combination (e1, type, &aff_e1);
3884   tree_to_aff_combination (e2, type, &aff_e2);
3885   aff_combination_scale (&aff_e2, -1);
3886   aff_combination_add (&aff_e1, &aff_e2);
3887
3888   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3889 }
3890
3891 /* Estimates cost of expressing difference E1 - E2 as
3892    var + symbol + offset.  The value of offset is added to OFFSET,
3893    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3894    part is missing.  DEPENDS_ON is a set of the invariants the computation
3895    depends on.  */
3896
3897 static comp_cost
3898 difference_cost (struct ivopts_data *data,
3899                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3900                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3901 {
3902   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3903   unsigned HOST_WIDE_INT off1, off2;
3904   aff_tree aff_e1, aff_e2;
3905   tree type;
3906
3907   e1 = strip_offset (e1, &off1);
3908   e2 = strip_offset (e2, &off2);
3909   *offset += off1 - off2;
3910
3911   STRIP_NOPS (e1);
3912   STRIP_NOPS (e2);
3913
3914   if (TREE_CODE (e1) == ADDR_EXPR)
3915     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3916                                 offset, depends_on);
3917   *symbol_present = false;
3918
3919   if (operand_equal_p (e1, e2, 0))
3920     {
3921       *var_present = false;
3922       return no_cost;
3923     }
3924
3925   *var_present = true;
3926
3927   if (integer_zerop (e2))
3928     return force_var_cost (data, e1, depends_on);
3929
3930   if (integer_zerop (e1))
3931     {
3932       comp_cost cost = force_var_cost (data, e2, depends_on);
3933       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3934       return cost;
3935     }
3936
3937   type = signed_type_for (TREE_TYPE (e1));
3938   tree_to_aff_combination (e1, type, &aff_e1);
3939   tree_to_aff_combination (e2, type, &aff_e2);
3940   aff_combination_scale (&aff_e2, -1);
3941   aff_combination_add (&aff_e1, &aff_e2);
3942
3943   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3944 }
3945
3946 /* Returns true if AFF1 and AFF2 are identical.  */
3947
3948 static bool
3949 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3950 {
3951   unsigned i;
3952
3953   if (aff1->n != aff2->n)
3954     return false;
3955
3956   for (i = 0; i < aff1->n; i++)
3957     {
3958       if (aff1->elts[i].coef != aff2->elts[i].coef)
3959         return false;
3960
3961       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3962         return false;
3963     }
3964   return true;
3965 }
3966
3967 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3968
3969 static int
3970 get_expr_id (struct ivopts_data *data, tree expr)
3971 {
3972   struct iv_inv_expr_ent ent;
3973   struct iv_inv_expr_ent **slot;
3974
3975   ent.expr = expr;
3976   ent.hash = iterative_hash_expr (expr, 0);
3977   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3978   if (*slot)
3979     return (*slot)->id;
3980
3981   *slot = XNEW (struct iv_inv_expr_ent);
3982   (*slot)->expr = expr;
3983   (*slot)->hash = ent.hash;
3984   (*slot)->id = data->inv_expr_id++;
3985   return (*slot)->id;
3986 }
3987
3988 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3989    requires a new compiler generated temporary.  Returns -1 otherwise.
3990    ADDRESS_P is a flag indicating if the expression is for address
3991    computation.  */
3992
3993 static int
3994 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3995                             tree cbase, HOST_WIDE_INT ratio,
3996                             bool address_p)
3997 {
3998   aff_tree ubase_aff, cbase_aff;
3999   tree expr, ub, cb;
4000
4001   STRIP_NOPS (ubase);
4002   STRIP_NOPS (cbase);
4003   ub = ubase;
4004   cb = cbase;
4005
4006   if ((TREE_CODE (ubase) == INTEGER_CST)
4007       && (TREE_CODE (cbase) == INTEGER_CST))
4008     return -1;
4009
4010   /* Strips the constant part. */
4011   if (TREE_CODE (ubase) == PLUS_EXPR
4012       || TREE_CODE (ubase) == MINUS_EXPR
4013       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4014     {
4015       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4016         ubase = TREE_OPERAND (ubase, 0);
4017     }
4018
4019   /* Strips the constant part. */
4020   if (TREE_CODE (cbase) == PLUS_EXPR
4021       || TREE_CODE (cbase) == MINUS_EXPR
4022       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4023     {
4024       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4025         cbase = TREE_OPERAND (cbase, 0);
4026     }
4027
4028   if (address_p)
4029     {
4030       if (((TREE_CODE (ubase) == SSA_NAME)
4031            || (TREE_CODE (ubase) == ADDR_EXPR
4032                && is_gimple_min_invariant (ubase)))
4033           && (TREE_CODE (cbase) == INTEGER_CST))
4034         return -1;
4035
4036       if (((TREE_CODE (cbase) == SSA_NAME)
4037            || (TREE_CODE (cbase) == ADDR_EXPR
4038                && is_gimple_min_invariant (cbase)))
4039           && (TREE_CODE (ubase) == INTEGER_CST))
4040         return -1;
4041     }
4042
4043   if (ratio == 1)
4044     {
4045       if (operand_equal_p (ubase, cbase, 0))
4046         return -1;
4047
4048       if (TREE_CODE (ubase) == ADDR_EXPR
4049           && TREE_CODE (cbase) == ADDR_EXPR)
4050         {
4051           tree usym, csym;
4052
4053           usym = TREE_OPERAND (ubase, 0);
4054           csym = TREE_OPERAND (cbase, 0);
4055           if (TREE_CODE (usym) == ARRAY_REF)
4056             {
4057               tree ind = TREE_OPERAND (usym, 1);
4058               if (TREE_CODE (ind) == INTEGER_CST
4059                   && tree_fits_shwi_p (ind)
4060                   && tree_to_shwi (ind) == 0)
4061                 usym = TREE_OPERAND (usym, 0);
4062             }
4063           if (TREE_CODE (csym) == ARRAY_REF)
4064             {
4065               tree ind = TREE_OPERAND (csym, 1);
4066               if (TREE_CODE (ind) == INTEGER_CST
4067                   && tree_fits_shwi_p (ind)
4068                   && tree_to_shwi (ind) == 0)
4069                 csym = TREE_OPERAND (csym, 0);
4070             }
4071           if (operand_equal_p (usym, csym, 0))
4072             return -1;
4073         }
4074       /* Now do more complex comparison  */
4075       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4076       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4077       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4078         return -1;
4079     }
4080
4081   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4082   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4083
4084   aff_combination_scale (&cbase_aff, -1 * ratio);
4085   aff_combination_add (&ubase_aff, &cbase_aff);
4086   expr = aff_combination_to_tree (&ubase_aff);
4087   return get_expr_id (data, expr);
4088 }
4089
4090
4091
4092 /* Determines the cost of the computation by that USE is expressed
4093    from induction variable CAND.  If ADDRESS_P is true, we just need
4094    to create an address from it, otherwise we want to get it into
4095    register.  A set of invariants we depend on is stored in
4096    DEPENDS_ON.  AT is the statement at that the value is computed.
4097    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4098    addressing is likely.  */
4099
4100 static comp_cost
4101 get_computation_cost_at (struct ivopts_data *data,
4102                          struct iv_use *use, struct iv_cand *cand,
4103                          bool address_p, bitmap *depends_on, gimple at,
4104                          bool *can_autoinc,
4105                          int *inv_expr_id)
4106 {
4107   tree ubase = use->iv->base, ustep = use->iv->step;
4108   tree cbase, cstep;
4109   tree utype = TREE_TYPE (ubase), ctype;
4110   unsigned HOST_WIDE_INT cstepi, offset = 0;
4111   HOST_WIDE_INT ratio, aratio;
4112   bool var_present, symbol_present, stmt_is_after_inc;
4113   comp_cost cost;
4114   widest_int rat;
4115   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4116   machine_mode mem_mode = (address_p
4117                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4118                                 : VOIDmode);
4119
4120   *depends_on = NULL;
4121
4122   /* Only consider real candidates.  */
4123   if (!cand->iv)
4124     return infinite_cost;
4125
4126   cbase = cand->iv->base;
4127   cstep = cand->iv->step;
4128   ctype = TREE_TYPE (cbase);
4129
4130   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4131     {
4132       /* We do not have a precision to express the values of use.  */
4133       return infinite_cost;
4134     }
4135
4136   if (address_p
4137       || (use->iv->base_object
4138           && cand->iv->base_object
4139           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4140           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4141     {
4142       /* Do not try to express address of an object with computation based
4143          on address of a different object.  This may cause problems in rtl
4144          level alias analysis (that does not expect this to be happening,
4145          as this is illegal in C), and would be unlikely to be useful
4146          anyway.  */
4147       if (use->iv->base_object
4148           && cand->iv->base_object
4149           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4150         return infinite_cost;
4151     }
4152
4153   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4154     {
4155       /* TODO -- add direct handling of this case.  */
4156       goto fallback;
4157     }
4158
4159   /* CSTEPI is removed from the offset in case statement is after the
4160      increment.  If the step is not constant, we use zero instead.
4161      This is a bit imprecise (there is the extra addition), but
4162      redundancy elimination is likely to transform the code so that
4163      it uses value of the variable before increment anyway,
4164      so it is not that much unrealistic.  */
4165   if (cst_and_fits_in_hwi (cstep))
4166     cstepi = int_cst_value (cstep);
4167   else
4168     cstepi = 0;
4169
4170   if (!constant_multiple_of (ustep, cstep, &rat))
4171     return infinite_cost;
4172
4173   if (wi::fits_shwi_p (rat))
4174     ratio = rat.to_shwi ();
4175   else
4176     return infinite_cost;
4177
4178   STRIP_NOPS (cbase);
4179   ctype = TREE_TYPE (cbase);
4180
4181   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4182
4183   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4184      or ratio == 1, it is better to handle this like
4185
4186      ubase - ratio * cbase + ratio * var
4187
4188      (also holds in the case ratio == -1, TODO.  */
4189
4190   if (cst_and_fits_in_hwi (cbase))
4191     {
4192       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4193       cost = difference_cost (data,
4194                               ubase, build_int_cst (utype, 0),
4195                               &symbol_present, &var_present, &offset,
4196                               depends_on);
4197       cost.cost /= avg_loop_niter (data->current_loop);
4198     }
4199   else if (ratio == 1)
4200     {
4201       tree real_cbase = cbase;
4202
4203       /* Check to see if any adjustment is needed.  */
4204       if (cstepi == 0 && stmt_is_after_inc)
4205         {
4206           aff_tree real_cbase_aff;
4207           aff_tree cstep_aff;
4208
4209           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4210                                    &real_cbase_aff);
4211           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4212
4213           aff_combination_add (&real_cbase_aff, &cstep_aff);
4214           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4215         }
4216
4217       cost = difference_cost (data,
4218                               ubase, real_cbase,
4219                               &symbol_present, &var_present, &offset,
4220                               depends_on);
4221       cost.cost /= avg_loop_niter (data->current_loop);
4222     }
4223   else if (address_p
4224            && !POINTER_TYPE_P (ctype)
4225            && multiplier_allowed_in_address_p
4226                 (ratio, mem_mode,
4227                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4228     {
4229       cbase
4230         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4231       cost = difference_cost (data,
4232                               ubase, cbase,
4233                               &symbol_present, &var_present, &offset,
4234                               depends_on);
4235       cost.cost /= avg_loop_niter (data->current_loop);
4236     }
4237   else
4238     {
4239       cost = force_var_cost (data, cbase, depends_on);
4240       cost = add_costs (cost,
4241                         difference_cost (data,
4242                                          ubase, build_int_cst (utype, 0),
4243                                          &symbol_present, &var_present,
4244                                          &offset, depends_on));
4245       cost.cost /= avg_loop_niter (data->current_loop);
4246       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4247     }
4248
4249   if (inv_expr_id)
4250     {
4251       *inv_expr_id =
4252           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4253       /* Clear depends on.  */
4254       if (*inv_expr_id != -1 && depends_on && *depends_on)
4255         bitmap_clear (*depends_on);
4256     }
4257
4258   /* If we are after the increment, the value of the candidate is higher by
4259      one iteration.  */
4260   if (stmt_is_after_inc)
4261     offset -= ratio * cstepi;
4262
4263   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4264      (symbol/var1/const parts may be omitted).  If we are looking for an
4265      address, find the cost of addressing this.  */
4266   if (address_p)
4267     return add_costs (cost,
4268                       get_address_cost (symbol_present, var_present,
4269                                         offset, ratio, cstepi,
4270                                         mem_mode,
4271                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4272                                         speed, stmt_is_after_inc,
4273                                         can_autoinc));
4274
4275   /* Otherwise estimate the costs for computing the expression.  */
4276   if (!symbol_present && !var_present && !offset)
4277     {
4278       if (ratio != 1)
4279         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4280       return cost;
4281     }
4282
4283   /* Symbol + offset should be compile-time computable so consider that they
4284       are added once to the variable, if present.  */
4285   if (var_present && (symbol_present || offset))
4286     cost.cost += adjust_setup_cost (data,
4287                                     add_cost (speed, TYPE_MODE (ctype)));
4288
4289   /* Having offset does not affect runtime cost in case it is added to
4290      symbol, but it increases complexity.  */
4291   if (offset)
4292     cost.complexity++;
4293
4294   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4295
4296   aratio = ratio > 0 ? ratio : -ratio;
4297   if (aratio != 1)
4298     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4299   return cost;
4300
4301 fallback:
4302   if (can_autoinc)
4303     *can_autoinc = false;
4304
4305   {
4306     /* Just get the expression, expand it and measure the cost.  */
4307     tree comp = get_computation_at (data->current_loop, use, cand, at);
4308
4309     if (!comp)
4310       return infinite_cost;
4311
4312     if (address_p)
4313       comp = build_simple_mem_ref (comp);
4314
4315     return new_cost (computation_cost (comp, speed), 0);
4316   }
4317 }
4318
4319 /* Determines the cost of the computation by that USE is expressed
4320    from induction variable CAND.  If ADDRESS_P is true, we just need
4321    to create an address from it, otherwise we want to get it into
4322    register.  A set of invariants we depend on is stored in
4323    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4324    autoinc addressing is likely.  */
4325
4326 static comp_cost
4327 get_computation_cost (struct ivopts_data *data,
4328                       struct iv_use *use, struct iv_cand *cand,
4329                       bool address_p, bitmap *depends_on,
4330                       bool *can_autoinc, int *inv_expr_id)
4331 {
4332   return get_computation_cost_at (data,
4333                                   use, cand, address_p, depends_on, use->stmt,
4334                                   can_autoinc, inv_expr_id);
4335 }
4336
4337 /* Determines cost of basing replacement of USE on CAND in a generic
4338    expression.  */
4339
4340 static bool
4341 determine_use_iv_cost_generic (struct ivopts_data *data,
4342                                struct iv_use *use, struct iv_cand *cand)
4343 {
4344   bitmap depends_on;
4345   comp_cost cost;
4346   int inv_expr_id = -1;
4347
4348   /* The simple case first -- if we need to express value of the preserved
4349      original biv, the cost is 0.  This also prevents us from counting the
4350      cost of increment twice -- once at this use and once in the cost of
4351      the candidate.  */
4352   if (cand->pos == IP_ORIGINAL
4353       && cand->incremented_at == use->stmt)
4354     {
4355       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4356                        ERROR_MARK, -1);
4357       return true;
4358     }
4359
4360   cost = get_computation_cost (data, use, cand, false, &depends_on,
4361                                NULL, &inv_expr_id);
4362
4363   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4364                    inv_expr_id);
4365
4366   return !infinite_cost_p (cost);
4367 }
4368
4369 /* Determines cost of basing replacement of USE on CAND in an address.  */
4370
4371 static bool
4372 determine_use_iv_cost_address (struct ivopts_data *data,
4373                                struct iv_use *use, struct iv_cand *cand)
4374 {
4375   bitmap depends_on;
4376   bool can_autoinc;
4377   int inv_expr_id = -1;
4378   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4379                                          &can_autoinc, &inv_expr_id);
4380
4381   if (cand->ainc_use == use)
4382     {
4383       if (can_autoinc)
4384         cost.cost -= cand->cost_step;
4385       /* If we generated the candidate solely for exploiting autoincrement
4386          opportunities, and it turns out it can't be used, set the cost to
4387          infinity to make sure we ignore it.  */
4388       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4389         cost = infinite_cost;
4390     }
4391   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4392                    inv_expr_id);
4393
4394   return !infinite_cost_p (cost);
4395 }
4396
4397 /* Computes value of candidate CAND at position AT in iteration NITER, and
4398    stores it to VAL.  */
4399
4400 static void
4401 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4402                aff_tree *val)
4403 {
4404   aff_tree step, delta, nit;
4405   struct iv *iv = cand->iv;
4406   tree type = TREE_TYPE (iv->base);
4407   tree steptype = type;
4408   if (POINTER_TYPE_P (type))
4409     steptype = sizetype;
4410   steptype = unsigned_type_for (type);
4411
4412   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4413   aff_combination_convert (&step, steptype);
4414   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4415   aff_combination_convert (&nit, steptype);
4416   aff_combination_mult (&nit, &step, &delta);
4417   if (stmt_after_increment (loop, cand, at))
4418     aff_combination_add (&delta, &step);
4419
4420   tree_to_aff_combination (iv->base, type, val);
4421   if (!POINTER_TYPE_P (type))
4422     aff_combination_convert (val, steptype);
4423   aff_combination_add (val, &delta);
4424 }
4425
4426 /* Returns period of induction variable iv.  */
4427
4428 static tree
4429 iv_period (struct iv *iv)
4430 {
4431   tree step = iv->step, period, type;
4432   tree pow2div;
4433
4434   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4435
4436   type = unsigned_type_for (TREE_TYPE (step));
4437   /* Period of the iv is lcm (step, type_range)/step -1,
4438      i.e., N*type_range/step - 1. Since type range is power
4439      of two, N == (step >> num_of_ending_zeros_binary (step),
4440      so the final result is
4441
4442        (type_range >> num_of_ending_zeros_binary (step)) - 1
4443
4444   */
4445   pow2div = num_ending_zeros (step);
4446
4447   period = build_low_bits_mask (type,
4448                                 (TYPE_PRECISION (type)
4449                                  - tree_to_uhwi (pow2div)));
4450
4451   return period;
4452 }
4453
4454 /* Returns the comparison operator used when eliminating the iv USE.  */
4455
4456 static enum tree_code
4457 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4458 {
4459   struct loop *loop = data->current_loop;
4460   basic_block ex_bb;
4461   edge exit;
4462
4463   ex_bb = gimple_bb (use->stmt);
4464   exit = EDGE_SUCC (ex_bb, 0);
4465   if (flow_bb_inside_loop_p (loop, exit->dest))
4466     exit = EDGE_SUCC (ex_bb, 1);
4467
4468   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4469 }
4470
4471 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4472    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4473    calculation is performed in non-wrapping type.
4474
4475    TODO: More generally, we could test for the situation that
4476          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4477          This would require knowing the sign of OFFSET.  */
4478
4479 static bool
4480 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4481 {
4482   enum tree_code code;
4483   tree e1, e2;
4484   aff_tree aff_e1, aff_e2, aff_offset;
4485
4486   if (!nowrap_type_p (TREE_TYPE (base)))
4487     return false;
4488
4489   base = expand_simple_operations (base);
4490
4491   if (TREE_CODE (base) == SSA_NAME)
4492     {
4493       gimple stmt = SSA_NAME_DEF_STMT (base);
4494
4495       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4496         return false;
4497
4498       code = gimple_assign_rhs_code (stmt);
4499       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4500         return false;
4501
4502       e1 = gimple_assign_rhs1 (stmt);
4503       e2 = gimple_assign_rhs2 (stmt);
4504     }
4505   else
4506     {
4507       code = TREE_CODE (base);
4508       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4509         return false;
4510       e1 = TREE_OPERAND (base, 0);
4511       e2 = TREE_OPERAND (base, 1);
4512     }
4513
4514   /* Use affine expansion as deeper inspection to prove the equality.  */
4515   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4516                                   &aff_e2, &data->name_expansion_cache);
4517   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4518                                   &aff_offset, &data->name_expansion_cache);
4519   aff_combination_scale (&aff_offset, -1);
4520   switch (code)
4521     {
4522     case PLUS_EXPR:
4523       aff_combination_add (&aff_e2, &aff_offset);
4524       if (aff_combination_zero_p (&aff_e2))
4525         return true;
4526
4527       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4528                                       &aff_e1, &data->name_expansion_cache);
4529       aff_combination_add (&aff_e1, &aff_offset);
4530       return aff_combination_zero_p (&aff_e1);
4531
4532     case POINTER_PLUS_EXPR:
4533       aff_combination_add (&aff_e2, &aff_offset);
4534       return aff_combination_zero_p (&aff_e2);
4535
4536     default:
4537       return false;
4538     }
4539 }
4540
4541 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4542    comparison with CAND.  NITER describes the number of iterations of
4543    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4544
4545    We aim to handle the following situation:
4546
4547    sometype *base, *p;
4548    int a, b, i;
4549
4550    i = a;
4551    p = p_0 = base + a;
4552
4553    do
4554      {
4555        bla (*p);
4556        p++;
4557        i++;
4558      }
4559    while (i < b);
4560
4561    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4562    We aim to optimize this to
4563
4564    p = p_0 = base + a;
4565    do
4566      {
4567        bla (*p);
4568        p++;
4569      }
4570    while (p < p_0 - a + b);
4571
4572    This preserves the correctness, since the pointer arithmetics does not
4573    overflow.  More precisely:
4574
4575    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4576       overflow in computing it or the values of p.
4577    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4578       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4579
4580 static bool
4581 iv_elimination_compare_lt (struct ivopts_data *data,
4582                            struct iv_cand *cand, enum tree_code *comp_p,
4583                            struct tree_niter_desc *niter)
4584 {
4585   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4586   struct aff_tree nit, tmpa, tmpb;
4587   enum tree_code comp;
4588   HOST_WIDE_INT step;
4589
4590   /* We need to know that the candidate induction variable does not overflow.
4591      While more complex analysis may be used to prove this, for now just
4592      check that the variable appears in the original program and that it
4593      is computed in a type that guarantees no overflows.  */
4594   cand_type = TREE_TYPE (cand->iv->base);
4595   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4596     return false;
4597
4598   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4599      the calculation of the BOUND could overflow, making the comparison
4600      invalid.  */
4601   if (!data->loop_single_exit_p)
4602     return false;
4603
4604   /* We need to be able to decide whether candidate is increasing or decreasing
4605      in order to choose the right comparison operator.  */
4606   if (!cst_and_fits_in_hwi (cand->iv->step))
4607     return false;
4608   step = int_cst_value (cand->iv->step);
4609
4610   /* Check that the number of iterations matches the expected pattern:
4611      a + 1 > b ? 0 : b - a - 1.  */
4612   mbz = niter->may_be_zero;
4613   if (TREE_CODE (mbz) == GT_EXPR)
4614     {
4615       /* Handle a + 1 > b.  */
4616       tree op0 = TREE_OPERAND (mbz, 0);
4617       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4618         {
4619           a = TREE_OPERAND (op0, 0);
4620           b = TREE_OPERAND (mbz, 1);
4621         }
4622       else
4623         return false;
4624     }
4625   else if (TREE_CODE (mbz) == LT_EXPR)
4626     {
4627       tree op1 = TREE_OPERAND (mbz, 1);
4628
4629       /* Handle b < a + 1.  */
4630       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4631         {
4632           a = TREE_OPERAND (op1, 0);
4633           b = TREE_OPERAND (mbz, 0);
4634         }
4635       else
4636         return false;
4637     }
4638   else
4639     return false;
4640
4641   /* Expected number of iterations is B - A - 1.  Check that it matches
4642      the actual number, i.e., that B - A - NITER = 1.  */
4643   tree_to_aff_combination (niter->niter, nit_type, &nit);
4644   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4645   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4646   aff_combination_scale (&nit, -1);
4647   aff_combination_scale (&tmpa, -1);
4648   aff_combination_add (&tmpb, &tmpa);
4649   aff_combination_add (&tmpb, &nit);
4650   if (tmpb.n != 0 || tmpb.offset != 1)
4651     return false;
4652
4653   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4654      overflow.  */
4655   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4656                         cand->iv->step,
4657                         fold_convert (TREE_TYPE (cand->iv->step), a));
4658   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4659     return false;
4660
4661   /* Determine the new comparison operator.  */
4662   comp = step < 0 ? GT_EXPR : LT_EXPR;
4663   if (*comp_p == NE_EXPR)
4664     *comp_p = comp;
4665   else if (*comp_p == EQ_EXPR)
4666     *comp_p = invert_tree_comparison (comp, false);
4667   else
4668     gcc_unreachable ();
4669
4670   return true;
4671 }
4672
4673 /* Check whether it is possible to express the condition in USE by comparison
4674    of candidate CAND.  If so, store the value compared with to BOUND, and the
4675    comparison operator to COMP.  */
4676
4677 static bool
4678 may_eliminate_iv (struct ivopts_data *data,
4679                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4680                   enum tree_code *comp)
4681 {
4682   basic_block ex_bb;
4683   edge exit;
4684   tree period;
4685   struct loop *loop = data->current_loop;
4686   aff_tree bnd;
4687   struct tree_niter_desc *desc = NULL;
4688
4689   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4690     return false;
4691
4692   /* For now works only for exits that dominate the loop latch.
4693      TODO: extend to other conditions inside loop body.  */
4694   ex_bb = gimple_bb (use->stmt);
4695   if (use->stmt != last_stmt (ex_bb)
4696       || gimple_code (use->stmt) != GIMPLE_COND
4697       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4698     return false;
4699
4700   exit = EDGE_SUCC (ex_bb, 0);
4701   if (flow_bb_inside_loop_p (loop, exit->dest))
4702     exit = EDGE_SUCC (ex_bb, 1);
4703   if (flow_bb_inside_loop_p (loop, exit->dest))
4704     return false;
4705
4706   desc = niter_for_exit (data, exit);
4707   if (!desc)
4708     return false;
4709
4710   /* Determine whether we can use the variable to test the exit condition.
4711      This is the case iff the period of the induction variable is greater
4712      than the number of iterations for which the exit condition is true.  */
4713   period = iv_period (cand->iv);
4714
4715   /* If the number of iterations is constant, compare against it directly.  */
4716   if (TREE_CODE (desc->niter) == INTEGER_CST)
4717     {
4718       /* See cand_value_at.  */
4719       if (stmt_after_increment (loop, cand, use->stmt))
4720         {
4721           if (!tree_int_cst_lt (desc->niter, period))
4722             return false;
4723         }
4724       else
4725         {
4726           if (tree_int_cst_lt (period, desc->niter))
4727             return false;
4728         }
4729     }
4730
4731   /* If not, and if this is the only possible exit of the loop, see whether
4732      we can get a conservative estimate on the number of iterations of the
4733      entire loop and compare against that instead.  */
4734   else
4735     {
4736       widest_int period_value, max_niter;
4737
4738       max_niter = desc->max;
4739       if (stmt_after_increment (loop, cand, use->stmt))
4740         max_niter += 1;
4741       period_value = wi::to_widest (period);
4742       if (wi::gtu_p (max_niter, period_value))
4743         {
4744           /* See if we can take advantage of inferred loop bound information.  */
4745           if (data->loop_single_exit_p)
4746             {
4747               if (!max_loop_iterations (loop, &max_niter))
4748                 return false;
4749               /* The loop bound is already adjusted by adding 1.  */
4750               if (wi::gtu_p (max_niter, period_value))
4751                 return false;
4752             }
4753           else
4754             return false;
4755         }
4756     }
4757
4758   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4759
4760   *bound = fold_convert (TREE_TYPE (cand->iv->base),
4761                          aff_combination_to_tree (&bnd));
4762   *comp = iv_elimination_compare (data, use);
4763
4764   /* It is unlikely that computing the number of iterations using division
4765      would be more profitable than keeping the original induction variable.  */
4766   if (expression_expensive_p (*bound))
4767     return false;
4768
4769   /* Sometimes, it is possible to handle the situation that the number of
4770      iterations may be zero unless additional assumtions by using <
4771      instead of != in the exit condition.
4772
4773      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4774            base the exit condition on it.  However, that is often too
4775            expensive.  */
4776   if (!integer_zerop (desc->may_be_zero))
4777     return iv_elimination_compare_lt (data, cand, comp, desc);
4778
4779   return true;
4780 }
4781
4782  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4783     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4784
4785 static int
4786 parm_decl_cost (struct ivopts_data *data, tree bound)
4787 {
4788   tree sbound = bound;
4789   STRIP_NOPS (sbound);
4790
4791   if (TREE_CODE (sbound) == SSA_NAME
4792       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4793       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4794       && data->body_includes_call)
4795     return COSTS_N_INSNS (1);
4796
4797   return 0;
4798 }
4799
4800 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4801
4802 static bool
4803 determine_use_iv_cost_condition (struct ivopts_data *data,
4804                                  struct iv_use *use, struct iv_cand *cand)
4805 {
4806   tree bound = NULL_TREE;
4807   struct iv *cmp_iv;
4808   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4809   comp_cost elim_cost, express_cost, cost, bound_cost;
4810   bool ok;
4811   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4812   tree *control_var, *bound_cst;
4813   enum tree_code comp = ERROR_MARK;
4814
4815   /* Only consider real candidates.  */
4816   if (!cand->iv)
4817     {
4818       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4819                        ERROR_MARK, -1);
4820       return false;
4821     }
4822
4823   /* Try iv elimination.  */
4824   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4825     {
4826       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4827       if (elim_cost.cost == 0)
4828         elim_cost.cost = parm_decl_cost (data, bound);
4829       else if (TREE_CODE (bound) == INTEGER_CST)
4830         elim_cost.cost = 0;
4831       /* If we replace a loop condition 'i < n' with 'p < base + n',
4832          depends_on_elim will have 'base' and 'n' set, which implies
4833          that both 'base' and 'n' will be live during the loop.  More likely,
4834          'base + n' will be loop invariant, resulting in only one live value
4835          during the loop.  So in that case we clear depends_on_elim and set
4836         elim_inv_expr_id instead.  */
4837       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4838         {
4839           elim_inv_expr_id = get_expr_id (data, bound);
4840           bitmap_clear (depends_on_elim);
4841         }
4842       /* The bound is a loop invariant, so it will be only computed
4843          once.  */
4844       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4845     }
4846   else
4847     elim_cost = infinite_cost;
4848
4849   /* Try expressing the original giv.  If it is compared with an invariant,
4850      note that we cannot get rid of it.  */
4851   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4852                               NULL, &cmp_iv);
4853   gcc_assert (ok);
4854
4855   /* When the condition is a comparison of the candidate IV against
4856      zero, prefer this IV.
4857
4858      TODO: The constant that we're subtracting from the cost should
4859      be target-dependent.  This information should be added to the
4860      target costs for each backend.  */
4861   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4862       && integer_zerop (*bound_cst)
4863       && (operand_equal_p (*control_var, cand->var_after, 0)
4864           || operand_equal_p (*control_var, cand->var_before, 0)))
4865     elim_cost.cost -= 1;
4866
4867   express_cost = get_computation_cost (data, use, cand, false,
4868                                        &depends_on_express, NULL,
4869                                        &express_inv_expr_id);
4870   fd_ivopts_data = data;
4871   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4872
4873   /* Count the cost of the original bound as well.  */
4874   bound_cost = force_var_cost (data, *bound_cst, NULL);
4875   if (bound_cost.cost == 0)
4876     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4877   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4878     bound_cost.cost = 0;
4879   express_cost.cost += bound_cost.cost;
4880
4881   /* Choose the better approach, preferring the eliminated IV. */
4882   if (compare_costs (elim_cost, express_cost) <= 0)
4883     {
4884       cost = elim_cost;
4885       depends_on = depends_on_elim;
4886       depends_on_elim = NULL;
4887       inv_expr_id = elim_inv_expr_id;
4888     }
4889   else
4890     {
4891       cost = express_cost;
4892       depends_on = depends_on_express;
4893       depends_on_express = NULL;
4894       bound = NULL_TREE;
4895       comp = ERROR_MARK;
4896       inv_expr_id = express_inv_expr_id;
4897     }
4898
4899   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4900
4901   if (depends_on_elim)
4902     BITMAP_FREE (depends_on_elim);
4903   if (depends_on_express)
4904     BITMAP_FREE (depends_on_express);
4905
4906   return !infinite_cost_p (cost);
4907 }
4908
4909 /* Determines cost of basing replacement of USE on CAND.  Returns false
4910    if USE cannot be based on CAND.  */
4911
4912 static bool
4913 determine_use_iv_cost (struct ivopts_data *data,
4914                        struct iv_use *use, struct iv_cand *cand)
4915 {
4916   switch (use->type)
4917     {
4918     case USE_NONLINEAR_EXPR:
4919       return determine_use_iv_cost_generic (data, use, cand);
4920
4921     case USE_ADDRESS:
4922       return determine_use_iv_cost_address (data, use, cand);
4923
4924     case USE_COMPARE:
4925       return determine_use_iv_cost_condition (data, use, cand);
4926
4927     default:
4928       gcc_unreachable ();
4929     }
4930 }
4931
4932 /* Return true if get_computation_cost indicates that autoincrement is
4933    a possibility for the pair of USE and CAND, false otherwise.  */
4934
4935 static bool
4936 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4937                            struct iv_cand *cand)
4938 {
4939   bitmap depends_on;
4940   bool can_autoinc;
4941   comp_cost cost;
4942
4943   if (use->type != USE_ADDRESS)
4944     return false;
4945
4946   cost = get_computation_cost (data, use, cand, true, &depends_on,
4947                                &can_autoinc, NULL);
4948
4949   BITMAP_FREE (depends_on);
4950
4951   return !infinite_cost_p (cost) && can_autoinc;
4952 }
4953
4954 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4955    use that allows autoincrement, and set their AINC_USE if possible.  */
4956
4957 static void
4958 set_autoinc_for_original_candidates (struct ivopts_data *data)
4959 {
4960   unsigned i, j;
4961
4962   for (i = 0; i < n_iv_cands (data); i++)
4963     {
4964       struct iv_cand *cand = iv_cand (data, i);
4965       struct iv_use *closest_before = NULL;
4966       struct iv_use *closest_after = NULL;
4967       if (cand->pos != IP_ORIGINAL)
4968         continue;
4969
4970       for (j = 0; j < n_iv_uses (data); j++)
4971         {
4972           struct iv_use *use = iv_use (data, j);
4973           unsigned uid = gimple_uid (use->stmt);
4974
4975           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4976             continue;
4977
4978           if (uid < gimple_uid (cand->incremented_at)
4979               && (closest_before == NULL
4980                   || uid > gimple_uid (closest_before->stmt)))
4981             closest_before = use;
4982
4983           if (uid > gimple_uid (cand->incremented_at)
4984               && (closest_after == NULL
4985                   || uid < gimple_uid (closest_after->stmt)))
4986             closest_after = use;
4987         }
4988
4989       if (closest_before != NULL
4990           && autoinc_possible_for_pair (data, closest_before, cand))
4991         cand->ainc_use = closest_before;
4992       else if (closest_after != NULL
4993                && autoinc_possible_for_pair (data, closest_after, cand))
4994         cand->ainc_use = closest_after;
4995     }
4996 }
4997
4998 /* Finds the candidates for the induction variables.  */
4999
5000 static void
5001 find_iv_candidates (struct ivopts_data *data)
5002 {
5003   /* Add commonly used ivs.  */
5004   add_standard_iv_candidates (data);
5005
5006   /* Add old induction variables.  */
5007   add_old_ivs_candidates (data);
5008
5009   /* Add induction variables derived from uses.  */
5010   add_derived_ivs_candidates (data);
5011
5012   set_autoinc_for_original_candidates (data);
5013
5014   /* Record the important candidates.  */
5015   record_important_candidates (data);
5016 }
5017
5018 /* Determines costs of basing the use of the iv on an iv candidate.  */
5019
5020 static void
5021 determine_use_iv_costs (struct ivopts_data *data)
5022 {
5023   unsigned i, j;
5024   struct iv_use *use;
5025   struct iv_cand *cand;
5026   bitmap to_clear = BITMAP_ALLOC (NULL);
5027
5028   alloc_use_cost_map (data);
5029
5030   for (i = 0; i < n_iv_uses (data); i++)
5031     {
5032       use = iv_use (data, i);
5033
5034       if (data->consider_all_candidates)
5035         {
5036           for (j = 0; j < n_iv_cands (data); j++)
5037             {
5038               cand = iv_cand (data, j);
5039               determine_use_iv_cost (data, use, cand);
5040             }
5041         }
5042       else
5043         {
5044           bitmap_iterator bi;
5045
5046           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5047             {
5048               cand = iv_cand (data, j);
5049               if (!determine_use_iv_cost (data, use, cand))
5050                 bitmap_set_bit (to_clear, j);
5051             }
5052
5053           /* Remove the candidates for that the cost is infinite from
5054              the list of related candidates.  */
5055           bitmap_and_compl_into (use->related_cands, to_clear);
5056           bitmap_clear (to_clear);
5057         }
5058     }
5059
5060   BITMAP_FREE (to_clear);
5061
5062   if (dump_file && (dump_flags & TDF_DETAILS))
5063     {
5064       fprintf (dump_file, "Use-candidate costs:\n");
5065
5066       for (i = 0; i < n_iv_uses (data); i++)
5067         {
5068           use = iv_use (data, i);
5069
5070           fprintf (dump_file, "Use %d:\n", i);
5071           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5072           for (j = 0; j < use->n_map_members; j++)
5073             {
5074               if (!use->cost_map[j].cand
5075                   || infinite_cost_p (use->cost_map[j].cost))
5076                 continue;
5077
5078               fprintf (dump_file, "  %d\t%d\t%d\t",
5079                        use->cost_map[j].cand->id,
5080                        use->cost_map[j].cost.cost,
5081                        use->cost_map[j].cost.complexity);
5082               if (use->cost_map[j].depends_on)
5083                 bitmap_print (dump_file,
5084                               use->cost_map[j].depends_on, "","");
5085               if (use->cost_map[j].inv_expr_id != -1)
5086                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5087               fprintf (dump_file, "\n");
5088             }
5089
5090           fprintf (dump_file, "\n");
5091         }
5092       fprintf (dump_file, "\n");
5093     }
5094 }
5095
5096 /* Determines cost of the candidate CAND.  */
5097
5098 static void
5099 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5100 {
5101   comp_cost cost_base;
5102   unsigned cost, cost_step;
5103   tree base;
5104
5105   if (!cand->iv)
5106     {
5107       cand->cost = 0;
5108       return;
5109     }
5110
5111   /* There are two costs associated with the candidate -- its increment
5112      and its initialization.  The second is almost negligible for any loop
5113      that rolls enough, so we take it just very little into account.  */
5114
5115   base = cand->iv->base;
5116   cost_base = force_var_cost (data, base, NULL);
5117   /* It will be exceptional that the iv register happens to be initialized with
5118      the proper value at no cost.  In general, there will at least be a regcopy
5119      or a const set.  */
5120   if (cost_base.cost == 0)
5121     cost_base.cost = COSTS_N_INSNS (1);
5122   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5123
5124   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5125
5126   /* Prefer the original ivs unless we may gain something by replacing it.
5127      The reason is to make debugging simpler; so this is not relevant for
5128      artificial ivs created by other optimization passes.  */
5129   if (cand->pos != IP_ORIGINAL
5130       || !SSA_NAME_VAR (cand->var_before)
5131       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5132     cost++;
5133
5134   /* Prefer not to insert statements into latch unless there are some
5135      already (so that we do not create unnecessary jumps).  */
5136   if (cand->pos == IP_END
5137       && empty_block_p (ip_end_pos (data->current_loop)))
5138     cost++;
5139
5140   cand->cost = cost;
5141   cand->cost_step = cost_step;
5142 }
5143
5144 /* Determines costs of computation of the candidates.  */
5145
5146 static void
5147 determine_iv_costs (struct ivopts_data *data)
5148 {
5149   unsigned i;
5150
5151   if (dump_file && (dump_flags & TDF_DETAILS))
5152     {
5153       fprintf (dump_file, "Candidate costs:\n");
5154       fprintf (dump_file, "  cand\tcost\n");
5155     }
5156
5157   for (i = 0; i < n_iv_cands (data); i++)
5158     {
5159       struct iv_cand *cand = iv_cand (data, i);
5160
5161       determine_iv_cost (data, cand);
5162
5163       if (dump_file && (dump_flags & TDF_DETAILS))
5164         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5165     }
5166
5167   if (dump_file && (dump_flags & TDF_DETAILS))
5168     fprintf (dump_file, "\n");
5169 }
5170
5171 /* Calculates cost for having SIZE induction variables.  */
5172
5173 static unsigned
5174 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5175 {
5176   /* We add size to the cost, so that we prefer eliminating ivs
5177      if possible.  */
5178   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5179                                             data->body_includes_call);
5180 }
5181
5182 /* For each size of the induction variable set determine the penalty.  */
5183
5184 static void
5185 determine_set_costs (struct ivopts_data *data)
5186 {
5187   unsigned j, n;
5188   gphi *phi;
5189   gphi_iterator psi;
5190   tree op;
5191   struct loop *loop = data->current_loop;
5192   bitmap_iterator bi;
5193
5194   if (dump_file && (dump_flags & TDF_DETAILS))
5195     {
5196       fprintf (dump_file, "Global costs:\n");
5197       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5198       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5199       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5200       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5201     }
5202
5203   n = 0;
5204   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5205     {
5206       phi = psi.phi ();
5207       op = PHI_RESULT (phi);
5208
5209       if (virtual_operand_p (op))
5210         continue;
5211
5212       if (get_iv (data, op))
5213         continue;
5214
5215       n++;
5216     }
5217
5218   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5219     {
5220       struct version_info *info = ver_info (data, j);
5221
5222       if (info->inv_id && info->has_nonlin_use)
5223         n++;
5224     }
5225
5226   data->regs_used = n;
5227   if (dump_file && (dump_flags & TDF_DETAILS))
5228     fprintf (dump_file, "  regs_used %d\n", n);
5229
5230   if (dump_file && (dump_flags & TDF_DETAILS))
5231     {
5232       fprintf (dump_file, "  cost for size:\n");
5233       fprintf (dump_file, "  ivs\tcost\n");
5234       for (j = 0; j <= 2 * target_avail_regs; j++)
5235         fprintf (dump_file, "  %d\t%d\n", j,
5236                  ivopts_global_cost_for_size (data, j));
5237       fprintf (dump_file, "\n");
5238     }
5239 }
5240
5241 /* Returns true if A is a cheaper cost pair than B.  */
5242
5243 static bool
5244 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5245 {
5246   int cmp;
5247
5248   if (!a)
5249     return false;
5250
5251   if (!b)
5252     return true;
5253
5254   cmp = compare_costs (a->cost, b->cost);
5255   if (cmp < 0)
5256     return true;
5257
5258   if (cmp > 0)
5259     return false;
5260
5261   /* In case the costs are the same, prefer the cheaper candidate.  */
5262   if (a->cand->cost < b->cand->cost)
5263     return true;
5264
5265   return false;
5266 }
5267
5268
5269 /* Returns candidate by that USE is expressed in IVS.  */
5270
5271 static struct cost_pair *
5272 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5273 {
5274   return ivs->cand_for_use[use->id];
5275 }
5276
5277 /* Computes the cost field of IVS structure.  */
5278
5279 static void
5280 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5281 {
5282   comp_cost cost = ivs->cand_use_cost;
5283
5284   cost.cost += ivs->cand_cost;
5285
5286   cost.cost += ivopts_global_cost_for_size (data,
5287                                             ivs->n_regs + ivs->num_used_inv_expr);
5288
5289   ivs->cost = cost;
5290 }
5291
5292 /* Remove invariants in set INVS to set IVS.  */
5293
5294 static void
5295 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5296 {
5297   bitmap_iterator bi;
5298   unsigned iid;
5299
5300   if (!invs)
5301     return;
5302
5303   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5304     {
5305       ivs->n_invariant_uses[iid]--;
5306       if (ivs->n_invariant_uses[iid] == 0)
5307         ivs->n_regs--;
5308     }
5309 }
5310
5311 /* Set USE not to be expressed by any candidate in IVS.  */
5312
5313 static void
5314 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5315                  struct iv_use *use)
5316 {
5317   unsigned uid = use->id, cid;
5318   struct cost_pair *cp;
5319
5320   cp = ivs->cand_for_use[uid];
5321   if (!cp)
5322     return;
5323   cid = cp->cand->id;
5324
5325   ivs->bad_uses++;
5326   ivs->cand_for_use[uid] = NULL;
5327   ivs->n_cand_uses[cid]--;
5328
5329   if (ivs->n_cand_uses[cid] == 0)
5330     {
5331       bitmap_clear_bit (ivs->cands, cid);
5332       /* Do not count the pseudocandidates.  */
5333       if (cp->cand->iv)
5334         ivs->n_regs--;
5335       ivs->n_cands--;
5336       ivs->cand_cost -= cp->cand->cost;
5337
5338       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5339     }
5340
5341   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5342
5343   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5344
5345   if (cp->inv_expr_id != -1)
5346     {
5347       ivs->used_inv_expr[cp->inv_expr_id]--;
5348       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5349         ivs->num_used_inv_expr--;
5350     }
5351   iv_ca_recount_cost (data, ivs);
5352 }
5353
5354 /* Add invariants in set INVS to set IVS.  */
5355
5356 static void
5357 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5358 {
5359   bitmap_iterator bi;
5360   unsigned iid;
5361
5362   if (!invs)
5363     return;
5364
5365   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5366     {
5367       ivs->n_invariant_uses[iid]++;
5368       if (ivs->n_invariant_uses[iid] == 1)
5369         ivs->n_regs++;
5370     }
5371 }
5372
5373 /* Set cost pair for USE in set IVS to CP.  */
5374
5375 static void
5376 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5377               struct iv_use *use, struct cost_pair *cp)
5378 {
5379   unsigned uid = use->id, cid;
5380
5381   if (ivs->cand_for_use[uid] == cp)
5382     return;
5383
5384   if (ivs->cand_for_use[uid])
5385     iv_ca_set_no_cp (data, ivs, use);
5386
5387   if (cp)
5388     {
5389       cid = cp->cand->id;
5390
5391       ivs->bad_uses--;
5392       ivs->cand_for_use[uid] = cp;
5393       ivs->n_cand_uses[cid]++;
5394       if (ivs->n_cand_uses[cid] == 1)
5395         {
5396           bitmap_set_bit (ivs->cands, cid);
5397           /* Do not count the pseudocandidates.  */
5398           if (cp->cand->iv)
5399             ivs->n_regs++;
5400           ivs->n_cands++;
5401           ivs->cand_cost += cp->cand->cost;
5402
5403           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5404         }
5405
5406       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5407       iv_ca_set_add_invariants (ivs, cp->depends_on);
5408
5409       if (cp->inv_expr_id != -1)
5410         {
5411           ivs->used_inv_expr[cp->inv_expr_id]++;
5412           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5413             ivs->num_used_inv_expr++;
5414         }
5415       iv_ca_recount_cost (data, ivs);
5416     }
5417 }
5418
5419 /* Extend set IVS by expressing USE by some of the candidates in it
5420    if possible.  Consider all important candidates if candidates in
5421    set IVS don't give any result.  */
5422
5423 static void
5424 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5425                struct iv_use *use)
5426 {
5427   struct cost_pair *best_cp = NULL, *cp;
5428   bitmap_iterator bi;
5429   unsigned i;
5430   struct iv_cand *cand;
5431
5432   gcc_assert (ivs->upto >= use->id);
5433   ivs->upto++;
5434   ivs->bad_uses++;
5435
5436   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5437     {
5438       cand = iv_cand (data, i);
5439       cp = get_use_iv_cost (data, use, cand);
5440       if (cheaper_cost_pair (cp, best_cp))
5441         best_cp = cp;
5442     }
5443
5444   if (best_cp == NULL)
5445     {
5446       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5447         {
5448           cand = iv_cand (data, i);
5449           cp = get_use_iv_cost (data, use, cand);
5450           if (cheaper_cost_pair (cp, best_cp))
5451             best_cp = cp;
5452         }
5453     }
5454
5455   iv_ca_set_cp (data, ivs, use, best_cp);
5456 }
5457
5458 /* Get cost for assignment IVS.  */
5459
5460 static comp_cost
5461 iv_ca_cost (struct iv_ca *ivs)
5462 {
5463   /* This was a conditional expression but it triggered a bug in
5464      Sun C 5.5.  */
5465   if (ivs->bad_uses)
5466     return infinite_cost;
5467   else
5468     return ivs->cost;
5469 }
5470
5471 /* Returns true if all dependences of CP are among invariants in IVS.  */
5472
5473 static bool
5474 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5475 {
5476   unsigned i;
5477   bitmap_iterator bi;
5478
5479   if (!cp->depends_on)
5480     return true;
5481
5482   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5483     {
5484       if (ivs->n_invariant_uses[i] == 0)
5485         return false;
5486     }
5487
5488   return true;
5489 }
5490
5491 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5492    it before NEXT_CHANGE.  */
5493
5494 static struct iv_ca_delta *
5495 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5496                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5497 {
5498   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5499
5500   change->use = use;
5501   change->old_cp = old_cp;
5502   change->new_cp = new_cp;
5503   change->next_change = next_change;
5504
5505   return change;
5506 }
5507
5508 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5509    are rewritten.  */
5510
5511 static struct iv_ca_delta *
5512 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5513 {
5514   struct iv_ca_delta *last;
5515
5516   if (!l2)
5517     return l1;
5518
5519   if (!l1)
5520     return l2;
5521
5522   for (last = l1; last->next_change; last = last->next_change)
5523     continue;
5524   last->next_change = l2;
5525
5526   return l1;
5527 }
5528
5529 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5530
5531 static struct iv_ca_delta *
5532 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5533 {
5534   struct iv_ca_delta *act, *next, *prev = NULL;
5535   struct cost_pair *tmp;
5536
5537   for (act = delta; act; act = next)
5538     {
5539       next = act->next_change;
5540       act->next_change = prev;
5541       prev = act;
5542
5543       tmp = act->old_cp;
5544       act->old_cp = act->new_cp;
5545       act->new_cp = tmp;
5546     }
5547
5548   return prev;
5549 }
5550
5551 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5552    reverted instead.  */
5553
5554 static void
5555 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5556                     struct iv_ca_delta *delta, bool forward)
5557 {
5558   struct cost_pair *from, *to;
5559   struct iv_ca_delta *act;
5560
5561   if (!forward)
5562     delta = iv_ca_delta_reverse (delta);
5563
5564   for (act = delta; act; act = act->next_change)
5565     {
5566       from = act->old_cp;
5567       to = act->new_cp;
5568       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5569       iv_ca_set_cp (data, ivs, act->use, to);
5570     }
5571
5572   if (!forward)
5573     iv_ca_delta_reverse (delta);
5574 }
5575
5576 /* Returns true if CAND is used in IVS.  */
5577
5578 static bool
5579 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5580 {
5581   return ivs->n_cand_uses[cand->id] > 0;
5582 }
5583
5584 /* Returns number of induction variable candidates in the set IVS.  */
5585
5586 static unsigned
5587 iv_ca_n_cands (struct iv_ca *ivs)
5588 {
5589   return ivs->n_cands;
5590 }
5591
5592 /* Free the list of changes DELTA.  */
5593
5594 static void
5595 iv_ca_delta_free (struct iv_ca_delta **delta)
5596 {
5597   struct iv_ca_delta *act, *next;
5598
5599   for (act = *delta; act; act = next)
5600     {
5601       next = act->next_change;
5602       free (act);
5603     }
5604
5605   *delta = NULL;
5606 }
5607
5608 /* Allocates new iv candidates assignment.  */
5609
5610 static struct iv_ca *
5611 iv_ca_new (struct ivopts_data *data)
5612 {
5613   struct iv_ca *nw = XNEW (struct iv_ca);
5614
5615   nw->upto = 0;
5616   nw->bad_uses = 0;
5617   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5618   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5619   nw->cands = BITMAP_ALLOC (NULL);
5620   nw->n_cands = 0;
5621   nw->n_regs = 0;
5622   nw->cand_use_cost = no_cost;
5623   nw->cand_cost = 0;
5624   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5625   nw->cost = no_cost;
5626   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5627   nw->num_used_inv_expr = 0;
5628
5629   return nw;
5630 }
5631
5632 /* Free memory occupied by the set IVS.  */
5633
5634 static void
5635 iv_ca_free (struct iv_ca **ivs)
5636 {
5637   free ((*ivs)->cand_for_use);
5638   free ((*ivs)->n_cand_uses);
5639   BITMAP_FREE ((*ivs)->cands);
5640   free ((*ivs)->n_invariant_uses);
5641   free ((*ivs)->used_inv_expr);
5642   free (*ivs);
5643   *ivs = NULL;
5644 }
5645
5646 /* Dumps IVS to FILE.  */
5647
5648 static void
5649 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5650 {
5651   const char *pref = "  invariants ";
5652   unsigned i;
5653   comp_cost cost = iv_ca_cost (ivs);
5654
5655   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5656   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5657            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5658   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5659
5660    for (i = 0; i < ivs->upto; i++)
5661     {
5662       struct iv_use *use = iv_use (data, i);
5663       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5664       if (cp)
5665         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5666                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5667       else
5668         fprintf (file, "   use:%d --> ??\n", use->id);
5669     }
5670
5671   for (i = 1; i <= data->max_inv_id; i++)
5672     if (ivs->n_invariant_uses[i])
5673       {
5674         fprintf (file, "%s%d", pref, i);
5675         pref = ", ";
5676       }
5677   fprintf (file, "\n\n");
5678 }
5679
5680 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5681    new set, and store differences in DELTA.  Number of induction variables
5682    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5683    the function will try to find a solution with mimimal iv candidates.  */
5684
5685 static comp_cost
5686 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5687               struct iv_cand *cand, struct iv_ca_delta **delta,
5688               unsigned *n_ivs, bool min_ncand)
5689 {
5690   unsigned i;
5691   comp_cost cost;
5692   struct iv_use *use;
5693   struct cost_pair *old_cp, *new_cp;
5694
5695   *delta = NULL;
5696   for (i = 0; i < ivs->upto; i++)
5697     {
5698       use = iv_use (data, i);
5699       old_cp = iv_ca_cand_for_use (ivs, use);
5700
5701       if (old_cp
5702           && old_cp->cand == cand)
5703         continue;
5704
5705       new_cp = get_use_iv_cost (data, use, cand);
5706       if (!new_cp)
5707         continue;
5708
5709       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5710         continue;
5711
5712       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5713         continue;
5714
5715       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5716     }
5717
5718   iv_ca_delta_commit (data, ivs, *delta, true);
5719   cost = iv_ca_cost (ivs);
5720   if (n_ivs)
5721     *n_ivs = iv_ca_n_cands (ivs);
5722   iv_ca_delta_commit (data, ivs, *delta, false);
5723
5724   return cost;
5725 }
5726
5727 /* Try narrowing set IVS by removing CAND.  Return the cost of
5728    the new set and store the differences in DELTA.  START is
5729    the candidate with which we start narrowing.  */
5730
5731 static comp_cost
5732 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5733               struct iv_cand *cand, struct iv_cand *start,
5734               struct iv_ca_delta **delta)
5735 {
5736   unsigned i, ci;
5737   struct iv_use *use;
5738   struct cost_pair *old_cp, *new_cp, *cp;
5739   bitmap_iterator bi;
5740   struct iv_cand *cnd;
5741   comp_cost cost, best_cost, acost;
5742
5743   *delta = NULL;
5744   for (i = 0; i < n_iv_uses (data); i++)
5745     {
5746       use = iv_use (data, i);
5747
5748       old_cp = iv_ca_cand_for_use (ivs, use);
5749       if (old_cp->cand != cand)
5750         continue;
5751
5752       best_cost = iv_ca_cost (ivs);
5753       /* Start narrowing with START.  */
5754       new_cp = get_use_iv_cost (data, use, start);
5755
5756       if (data->consider_all_candidates)
5757         {
5758           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5759             {
5760               if (ci == cand->id || (start && ci == start->id))
5761                 continue;
5762
5763               cnd = iv_cand (data, ci);
5764
5765               cp = get_use_iv_cost (data, use, cnd);
5766               if (!cp)
5767                 continue;
5768
5769               iv_ca_set_cp (data, ivs, use, cp);
5770               acost = iv_ca_cost (ivs);
5771
5772               if (compare_costs (acost, best_cost) < 0)
5773                 {
5774                   best_cost = acost;
5775                   new_cp = cp;
5776                 }
5777             }
5778         }
5779       else
5780         {
5781           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5782             {
5783               if (ci == cand->id || (start && ci == start->id))
5784                 continue;
5785
5786               cnd = iv_cand (data, ci);
5787
5788               cp = get_use_iv_cost (data, use, cnd);
5789               if (!cp)
5790                 continue;
5791
5792               iv_ca_set_cp (data, ivs, use, cp);
5793               acost = iv_ca_cost (ivs);
5794
5795               if (compare_costs (acost, best_cost) < 0)
5796                 {
5797                   best_cost = acost;
5798                   new_cp = cp;
5799                 }
5800             }
5801         }
5802       /* Restore to old cp for use.  */
5803       iv_ca_set_cp (data, ivs, use, old_cp);
5804
5805       if (!new_cp)
5806         {
5807           iv_ca_delta_free (delta);
5808           return infinite_cost;
5809         }
5810
5811       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5812     }
5813
5814   iv_ca_delta_commit (data, ivs, *delta, true);
5815   cost = iv_ca_cost (ivs);
5816   iv_ca_delta_commit (data, ivs, *delta, false);
5817
5818   return cost;
5819 }
5820
5821 /* Try optimizing the set of candidates IVS by removing candidates different
5822    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5823    differences in DELTA.  */
5824
5825 static comp_cost
5826 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5827              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5828 {
5829   bitmap_iterator bi;
5830   struct iv_ca_delta *act_delta, *best_delta;
5831   unsigned i;
5832   comp_cost best_cost, acost;
5833   struct iv_cand *cand;
5834
5835   best_delta = NULL;
5836   best_cost = iv_ca_cost (ivs);
5837
5838   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5839     {
5840       cand = iv_cand (data, i);
5841
5842       if (cand == except_cand)
5843         continue;
5844
5845       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5846
5847       if (compare_costs (acost, best_cost) < 0)
5848         {
5849           best_cost = acost;
5850           iv_ca_delta_free (&best_delta);
5851           best_delta = act_delta;
5852         }
5853       else
5854         iv_ca_delta_free (&act_delta);
5855     }
5856
5857   if (!best_delta)
5858     {
5859       *delta = NULL;
5860       return best_cost;
5861     }
5862
5863   /* Recurse to possibly remove other unnecessary ivs.  */
5864   iv_ca_delta_commit (data, ivs, best_delta, true);
5865   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5866   iv_ca_delta_commit (data, ivs, best_delta, false);
5867   *delta = iv_ca_delta_join (best_delta, *delta);
5868   return best_cost;
5869 }
5870
5871 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
5872    cheaper local cost for USE than BEST_CP.  Return pointer to
5873    the corresponding cost_pair, otherwise just return BEST_CP.  */
5874
5875 static struct cost_pair*
5876 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
5877                         unsigned int cand_idx, struct iv_cand *old_cand,
5878                         struct cost_pair *best_cp)
5879 {
5880   struct iv_cand *cand;
5881   struct cost_pair *cp;
5882
5883   gcc_assert (old_cand != NULL && best_cp != NULL);
5884   if (cand_idx == old_cand->id)
5885     return best_cp;
5886
5887   cand = iv_cand (data, cand_idx);
5888   cp = get_use_iv_cost (data, use, cand);
5889   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
5890     return cp;
5891
5892   return best_cp;
5893 }
5894
5895 /* Try breaking local optimal fixed-point for IVS by replacing candidates
5896    which are used by more than one iv uses.  For each of those candidates,
5897    this function tries to represent iv uses under that candidate using
5898    other ones with lower local cost, then tries to prune the new set.
5899    If the new set has lower cost, It returns the new cost after recording
5900    candidate replacement in list DELTA.  */
5901
5902 static comp_cost
5903 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
5904                struct iv_ca_delta **delta)
5905 {
5906   bitmap_iterator bi, bj;
5907   unsigned int i, j, k;
5908   struct iv_use *use;
5909   struct iv_cand *cand;
5910   comp_cost orig_cost, acost;
5911   struct iv_ca_delta *act_delta, *tmp_delta;
5912   struct cost_pair *old_cp, *best_cp = NULL;
5913
5914   *delta = NULL;
5915   orig_cost = iv_ca_cost (ivs);
5916
5917   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5918     {
5919       if (ivs->n_cand_uses[i] == 1
5920           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
5921         continue;
5922
5923       cand = iv_cand (data, i);
5924
5925       act_delta = NULL;
5926       /*  Represent uses under current candidate using other ones with
5927           lower local cost.  */
5928       for (j = 0; j < ivs->upto; j++)
5929         {
5930           use = iv_use (data, j);
5931           old_cp = iv_ca_cand_for_use (ivs, use);
5932
5933           if (old_cp->cand != cand)
5934             continue;
5935
5936           best_cp = old_cp;
5937           if (data->consider_all_candidates)
5938             for (k = 0; k < n_iv_cands (data); k++)
5939               best_cp = cheaper_cost_with_cand (data, use, k,
5940                                                 old_cp->cand, best_cp);
5941           else
5942             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
5943               best_cp = cheaper_cost_with_cand (data, use, k,
5944                                                 old_cp->cand, best_cp);
5945
5946           if (best_cp == old_cp)
5947             continue;
5948
5949           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
5950         }
5951       /* No need for further prune.  */
5952       if (!act_delta)
5953         continue;
5954
5955       /* Prune the new candidate set.  */
5956       iv_ca_delta_commit (data, ivs, act_delta, true);
5957       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
5958       iv_ca_delta_commit (data, ivs, act_delta, false);
5959       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5960
5961       if (compare_costs (acost, orig_cost) < 0)
5962         {
5963           *delta = act_delta;
5964           return acost;
5965         }
5966       else
5967         iv_ca_delta_free (&act_delta);
5968     }
5969
5970   return orig_cost;
5971 }
5972
5973 /* Tries to extend the sets IVS in the best possible way in order
5974    to express the USE.  If ORIGINALP is true, prefer candidates from
5975    the original set of IVs, otherwise favor important candidates not
5976    based on any memory object.  */
5977
5978 static bool
5979 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5980                   struct iv_use *use, bool originalp)
5981 {
5982   comp_cost best_cost, act_cost;
5983   unsigned i;
5984   bitmap_iterator bi;
5985   struct iv_cand *cand;
5986   struct iv_ca_delta *best_delta = NULL, *act_delta;
5987   struct cost_pair *cp;
5988
5989   iv_ca_add_use (data, ivs, use);
5990   best_cost = iv_ca_cost (ivs);
5991   cp = iv_ca_cand_for_use (ivs, use);
5992   if (cp)
5993     {
5994       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5995       iv_ca_set_no_cp (data, ivs, use);
5996     }
5997
5998   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5999      first try important candidates not based on any memory object.  Only if
6000      this fails, try the specific ones.  Rationale -- in loops with many
6001      variables the best choice often is to use just one generic biv.  If we
6002      added here many ivs specific to the uses, the optimization algorithm later
6003      would be likely to get stuck in a local minimum, thus causing us to create
6004      too many ivs.  The approach from few ivs to more seems more likely to be
6005      successful -- starting from few ivs, replacing an expensive use by a
6006      specific iv should always be a win.  */
6007   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6008     {
6009       cand = iv_cand (data, i);
6010
6011       if (originalp && cand->pos !=IP_ORIGINAL)
6012         continue;
6013
6014       if (!originalp && cand->iv->base_object != NULL_TREE)
6015         continue;
6016
6017       if (iv_ca_cand_used_p (ivs, cand))
6018         continue;
6019
6020       cp = get_use_iv_cost (data, use, cand);
6021       if (!cp)
6022         continue;
6023
6024       iv_ca_set_cp (data, ivs, use, cp);
6025       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6026                                true);
6027       iv_ca_set_no_cp (data, ivs, use);
6028       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6029
6030       if (compare_costs (act_cost, best_cost) < 0)
6031         {
6032           best_cost = act_cost;
6033
6034           iv_ca_delta_free (&best_delta);
6035           best_delta = act_delta;
6036         }
6037       else
6038         iv_ca_delta_free (&act_delta);
6039     }
6040
6041   if (infinite_cost_p (best_cost))
6042     {
6043       for (i = 0; i < use->n_map_members; i++)
6044         {
6045           cp = use->cost_map + i;
6046           cand = cp->cand;
6047           if (!cand)
6048             continue;
6049
6050           /* Already tried this.  */
6051           if (cand->important)
6052             {
6053               if (originalp && cand->pos == IP_ORIGINAL)
6054                 continue;
6055               if (!originalp && cand->iv->base_object == NULL_TREE)
6056                 continue;
6057             }
6058
6059           if (iv_ca_cand_used_p (ivs, cand))
6060             continue;
6061
6062           act_delta = NULL;
6063           iv_ca_set_cp (data, ivs, use, cp);
6064           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6065           iv_ca_set_no_cp (data, ivs, use);
6066           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6067                                        cp, act_delta);
6068
6069           if (compare_costs (act_cost, best_cost) < 0)
6070             {
6071               best_cost = act_cost;
6072
6073               if (best_delta)
6074                 iv_ca_delta_free (&best_delta);
6075               best_delta = act_delta;
6076             }
6077           else
6078             iv_ca_delta_free (&act_delta);
6079         }
6080     }
6081
6082   iv_ca_delta_commit (data, ivs, best_delta, true);
6083   iv_ca_delta_free (&best_delta);
6084
6085   return !infinite_cost_p (best_cost);
6086 }
6087
6088 /* Finds an initial assignment of candidates to uses.  */
6089
6090 static struct iv_ca *
6091 get_initial_solution (struct ivopts_data *data, bool originalp)
6092 {
6093   struct iv_ca *ivs = iv_ca_new (data);
6094   unsigned i;
6095
6096   for (i = 0; i < n_iv_uses (data); i++)
6097     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6098       {
6099         iv_ca_free (&ivs);
6100         return NULL;
6101       }
6102
6103   return ivs;
6104 }
6105
6106 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6107    points to a bool variable, this function tries to break local
6108    optimal fixed-point by replacing candidates in IVS if it's true.  */
6109
6110 static bool
6111 try_improve_iv_set (struct ivopts_data *data,
6112                     struct iv_ca *ivs, bool *try_replace_p)
6113 {
6114   unsigned i, n_ivs;
6115   comp_cost acost, best_cost = iv_ca_cost (ivs);
6116   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6117   struct iv_cand *cand;
6118
6119   /* Try extending the set of induction variables by one.  */
6120   for (i = 0; i < n_iv_cands (data); i++)
6121     {
6122       cand = iv_cand (data, i);
6123
6124       if (iv_ca_cand_used_p (ivs, cand))
6125         continue;
6126
6127       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6128       if (!act_delta)
6129         continue;
6130
6131       /* If we successfully added the candidate and the set is small enough,
6132          try optimizing it by removing other candidates.  */
6133       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6134         {
6135           iv_ca_delta_commit (data, ivs, act_delta, true);
6136           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6137           iv_ca_delta_commit (data, ivs, act_delta, false);
6138           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6139         }
6140
6141       if (compare_costs (acost, best_cost) < 0)
6142         {
6143           best_cost = acost;
6144           iv_ca_delta_free (&best_delta);
6145           best_delta = act_delta;
6146         }
6147       else
6148         iv_ca_delta_free (&act_delta);
6149     }
6150
6151   if (!best_delta)
6152     {
6153       /* Try removing the candidates from the set instead.  */
6154       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6155
6156       if (!best_delta && *try_replace_p)
6157         {
6158           *try_replace_p = false;
6159           /* So far candidate selecting algorithm tends to choose fewer IVs
6160              so that it can handle cases in which loops have many variables
6161              but the best choice is often to use only one general biv.  One
6162              weakness is it can't handle opposite cases, in which different
6163              candidates should be chosen with respect to each use.  To solve
6164              the problem, we replace candidates in a manner described by the
6165              comments of iv_ca_replace, thus give general algorithm a chance
6166              to break local optimal fixed-point in these cases.  */
6167           best_cost = iv_ca_replace (data, ivs, &best_delta);
6168         }
6169
6170       if (!best_delta)
6171         return false;
6172     }
6173
6174   iv_ca_delta_commit (data, ivs, best_delta, true);
6175   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6176   iv_ca_delta_free (&best_delta);
6177   return true;
6178 }
6179
6180 /* Attempts to find the optimal set of induction variables.  We do simple
6181    greedy heuristic -- we try to replace at most one candidate in the selected
6182    solution and remove the unused ivs while this improves the cost.  */
6183
6184 static struct iv_ca *
6185 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6186 {
6187   struct iv_ca *set;
6188   bool try_replace_p = true;
6189
6190   /* Get the initial solution.  */
6191   set = get_initial_solution (data, originalp);
6192   if (!set)
6193     {
6194       if (dump_file && (dump_flags & TDF_DETAILS))
6195         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6196       return NULL;
6197     }
6198
6199   if (dump_file && (dump_flags & TDF_DETAILS))
6200     {
6201       fprintf (dump_file, "Initial set of candidates:\n");
6202       iv_ca_dump (data, dump_file, set);
6203     }
6204
6205   while (try_improve_iv_set (data, set, &try_replace_p))
6206     {
6207       if (dump_file && (dump_flags & TDF_DETAILS))
6208         {
6209           fprintf (dump_file, "Improved to:\n");
6210           iv_ca_dump (data, dump_file, set);
6211         }
6212     }
6213
6214   return set;
6215 }
6216
6217 static struct iv_ca *
6218 find_optimal_iv_set (struct ivopts_data *data)
6219 {
6220   unsigned i;
6221   struct iv_ca *set, *origset;
6222   struct iv_use *use;
6223   comp_cost cost, origcost;
6224
6225   /* Determine the cost based on a strategy that starts with original IVs,
6226      and try again using a strategy that prefers candidates not based
6227      on any IVs.  */
6228   origset = find_optimal_iv_set_1 (data, true);
6229   set = find_optimal_iv_set_1 (data, false);
6230
6231   if (!origset && !set)
6232     return NULL;
6233
6234   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6235   cost = set ? iv_ca_cost (set) : infinite_cost;
6236
6237   if (dump_file && (dump_flags & TDF_DETAILS))
6238     {
6239       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6240                origcost.cost, origcost.complexity);
6241       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6242                cost.cost, cost.complexity);
6243     }
6244
6245   /* Choose the one with the best cost.  */
6246   if (compare_costs (origcost, cost) <= 0)
6247     {
6248       if (set)
6249         iv_ca_free (&set);
6250       set = origset;
6251     }
6252   else if (origset)
6253     iv_ca_free (&origset);
6254
6255   for (i = 0; i < n_iv_uses (data); i++)
6256     {
6257       use = iv_use (data, i);
6258       use->selected = iv_ca_cand_for_use (set, use)->cand;
6259     }
6260
6261   return set;
6262 }
6263
6264 /* Creates a new induction variable corresponding to CAND.  */
6265
6266 static void
6267 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6268 {
6269   gimple_stmt_iterator incr_pos;
6270   tree base;
6271   bool after = false;
6272
6273   if (!cand->iv)
6274     return;
6275
6276   switch (cand->pos)
6277     {
6278     case IP_NORMAL:
6279       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6280       break;
6281
6282     case IP_END:
6283       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6284       after = true;
6285       break;
6286
6287     case IP_AFTER_USE:
6288       after = true;
6289       /* fall through */
6290     case IP_BEFORE_USE:
6291       incr_pos = gsi_for_stmt (cand->incremented_at);
6292       break;
6293
6294     case IP_ORIGINAL:
6295       /* Mark that the iv is preserved.  */
6296       name_info (data, cand->var_before)->preserve_biv = true;
6297       name_info (data, cand->var_after)->preserve_biv = true;
6298
6299       /* Rewrite the increment so that it uses var_before directly.  */
6300       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6301       return;
6302     }
6303
6304   gimple_add_tmp_var (cand->var_before);
6305
6306   base = unshare_expr (cand->iv->base);
6307
6308   create_iv (base, unshare_expr (cand->iv->step),
6309              cand->var_before, data->current_loop,
6310              &incr_pos, after, &cand->var_before, &cand->var_after);
6311 }
6312
6313 /* Creates new induction variables described in SET.  */
6314
6315 static void
6316 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6317 {
6318   unsigned i;
6319   struct iv_cand *cand;
6320   bitmap_iterator bi;
6321
6322   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6323     {
6324       cand = iv_cand (data, i);
6325       create_new_iv (data, cand);
6326     }
6327
6328   if (dump_file && (dump_flags & TDF_DETAILS))
6329     {
6330       fprintf (dump_file, "\nSelected IV set: \n");
6331       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6332         {
6333           cand = iv_cand (data, i);
6334           dump_cand (dump_file, cand);
6335         }
6336       fprintf (dump_file, "\n");
6337     }
6338 }
6339
6340 /* Rewrites USE (definition of iv used in a nonlinear expression)
6341    using candidate CAND.  */
6342
6343 static void
6344 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6345                             struct iv_use *use, struct iv_cand *cand)
6346 {
6347   tree comp;
6348   tree op, tgt;
6349   gassign *ass;
6350   gimple_stmt_iterator bsi;
6351
6352   /* An important special case -- if we are asked to express value of
6353      the original iv by itself, just exit; there is no need to
6354      introduce a new computation (that might also need casting the
6355      variable to unsigned and back).  */
6356   if (cand->pos == IP_ORIGINAL
6357       && cand->incremented_at == use->stmt)
6358     {
6359       enum tree_code stmt_code;
6360
6361       gcc_assert (is_gimple_assign (use->stmt));
6362       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6363
6364       /* Check whether we may leave the computation unchanged.
6365          This is the case only if it does not rely on other
6366          computations in the loop -- otherwise, the computation
6367          we rely upon may be removed in remove_unused_ivs,
6368          thus leading to ICE.  */
6369       stmt_code = gimple_assign_rhs_code (use->stmt);
6370       if (stmt_code == PLUS_EXPR
6371           || stmt_code == MINUS_EXPR
6372           || stmt_code == POINTER_PLUS_EXPR)
6373         {
6374           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6375             op = gimple_assign_rhs2 (use->stmt);
6376           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6377             op = gimple_assign_rhs1 (use->stmt);
6378           else
6379             op = NULL_TREE;
6380         }
6381       else
6382         op = NULL_TREE;
6383
6384       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6385         return;
6386     }
6387
6388   comp = get_computation (data->current_loop, use, cand);
6389   gcc_assert (comp != NULL_TREE);
6390
6391   switch (gimple_code (use->stmt))
6392     {
6393     case GIMPLE_PHI:
6394       tgt = PHI_RESULT (use->stmt);
6395
6396       /* If we should keep the biv, do not replace it.  */
6397       if (name_info (data, tgt)->preserve_biv)
6398         return;
6399
6400       bsi = gsi_after_labels (gimple_bb (use->stmt));
6401       break;
6402
6403     case GIMPLE_ASSIGN:
6404       tgt = gimple_assign_lhs (use->stmt);
6405       bsi = gsi_for_stmt (use->stmt);
6406       break;
6407
6408     default:
6409       gcc_unreachable ();
6410     }
6411
6412   if (!valid_gimple_rhs_p (comp)
6413       || (gimple_code (use->stmt) != GIMPLE_PHI
6414           /* We can't allow re-allocating the stmt as it might be pointed
6415              to still.  */
6416           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6417               >= gimple_num_ops (gsi_stmt (bsi)))))
6418     {
6419       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6420                                        true, GSI_SAME_STMT);
6421       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6422         {
6423           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6424           /* As this isn't a plain copy we have to reset alignment
6425              information.  */
6426           if (SSA_NAME_PTR_INFO (comp))
6427             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6428         }
6429     }
6430
6431   if (gimple_code (use->stmt) == GIMPLE_PHI)
6432     {
6433       ass = gimple_build_assign (tgt, comp);
6434       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6435
6436       bsi = gsi_for_stmt (use->stmt);
6437       remove_phi_node (&bsi, false);
6438     }
6439   else
6440     {
6441       gimple_assign_set_rhs_from_tree (&bsi, comp);
6442       use->stmt = gsi_stmt (bsi);
6443     }
6444 }
6445
6446 /* Performs a peephole optimization to reorder the iv update statement with
6447    a mem ref to enable instruction combining in later phases. The mem ref uses
6448    the iv value before the update, so the reordering transformation requires
6449    adjustment of the offset. CAND is the selected IV_CAND.
6450
6451    Example:
6452
6453    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6454    iv2 = iv1 + 1;
6455
6456    if (t < val)      (1)
6457      goto L;
6458    goto Head;
6459
6460
6461    directly propagating t over to (1) will introduce overlapping live range
6462    thus increase register pressure. This peephole transform it into:
6463
6464
6465    iv2 = iv1 + 1;
6466    t = MEM_REF (base, iv2, 8, 8);
6467    if (t < val)
6468      goto L;
6469    goto Head;
6470 */
6471
6472 static void
6473 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6474 {
6475   tree var_after;
6476   gimple iv_update, stmt;
6477   basic_block bb;
6478   gimple_stmt_iterator gsi, gsi_iv;
6479
6480   if (cand->pos != IP_NORMAL)
6481     return;
6482
6483   var_after = cand->var_after;
6484   iv_update = SSA_NAME_DEF_STMT (var_after);
6485
6486   bb = gimple_bb (iv_update);
6487   gsi = gsi_last_nondebug_bb (bb);
6488   stmt = gsi_stmt (gsi);
6489
6490   /* Only handle conditional statement for now.  */
6491   if (gimple_code (stmt) != GIMPLE_COND)
6492     return;
6493
6494   gsi_prev_nondebug (&gsi);
6495   stmt = gsi_stmt (gsi);
6496   if (stmt != iv_update)
6497     return;
6498
6499   gsi_prev_nondebug (&gsi);
6500   if (gsi_end_p (gsi))
6501     return;
6502
6503   stmt = gsi_stmt (gsi);
6504   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6505     return;
6506
6507   if (stmt != use->stmt)
6508     return;
6509
6510   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6511     return;
6512
6513   if (dump_file && (dump_flags & TDF_DETAILS))
6514     {
6515       fprintf (dump_file, "Reordering \n");
6516       print_gimple_stmt (dump_file, iv_update, 0, 0);
6517       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6518       fprintf (dump_file, "\n");
6519     }
6520
6521   gsi = gsi_for_stmt (use->stmt);
6522   gsi_iv = gsi_for_stmt (iv_update);
6523   gsi_move_before (&gsi_iv, &gsi);
6524
6525   cand->pos = IP_BEFORE_USE;
6526   cand->incremented_at = use->stmt;
6527 }
6528
6529 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6530
6531 static void
6532 rewrite_use_address (struct ivopts_data *data,
6533                      struct iv_use *use, struct iv_cand *cand)
6534 {
6535   aff_tree aff;
6536   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6537   tree base_hint = NULL_TREE;
6538   tree ref, iv;
6539   bool ok;
6540
6541   adjust_iv_update_pos (cand, use);
6542   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6543   gcc_assert (ok);
6544   unshare_aff_combination (&aff);
6545
6546   /* To avoid undefined overflow problems, all IV candidates use unsigned
6547      integer types.  The drawback is that this makes it impossible for
6548      create_mem_ref to distinguish an IV that is based on a memory object
6549      from one that represents simply an offset.
6550
6551      To work around this problem, we pass a hint to create_mem_ref that
6552      indicates which variable (if any) in aff is an IV based on a memory
6553      object.  Note that we only consider the candidate.  If this is not
6554      based on an object, the base of the reference is in some subexpression
6555      of the use -- but these will use pointer types, so they are recognized
6556      by the create_mem_ref heuristics anyway.  */
6557   if (cand->iv->base_object)
6558     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6559
6560   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6561   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6562                         reference_alias_ptr_type (*use->op_p),
6563                         iv, base_hint, data->speed);
6564   copy_ref_info (ref, *use->op_p);
6565   *use->op_p = ref;
6566 }
6567
6568 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6569    candidate CAND.  */
6570
6571 static void
6572 rewrite_use_compare (struct ivopts_data *data,
6573                      struct iv_use *use, struct iv_cand *cand)
6574 {
6575   tree comp, *var_p, op, bound;
6576   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6577   enum tree_code compare;
6578   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6579   bool ok;
6580
6581   bound = cp->value;
6582   if (bound)
6583     {
6584       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6585       tree var_type = TREE_TYPE (var);
6586       gimple_seq stmts;
6587
6588       if (dump_file && (dump_flags & TDF_DETAILS))
6589         {
6590           fprintf (dump_file, "Replacing exit test: ");
6591           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6592         }
6593       compare = cp->comp;
6594       bound = unshare_expr (fold_convert (var_type, bound));
6595       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6596       if (stmts)
6597         gsi_insert_seq_on_edge_immediate (
6598                 loop_preheader_edge (data->current_loop),
6599                 stmts);
6600
6601       gcond *cond_stmt = as_a <gcond *> (use->stmt);
6602       gimple_cond_set_lhs (cond_stmt, var);
6603       gimple_cond_set_code (cond_stmt, compare);
6604       gimple_cond_set_rhs (cond_stmt, op);
6605       return;
6606     }
6607
6608   /* The induction variable elimination failed; just express the original
6609      giv.  */
6610   comp = get_computation (data->current_loop, use, cand);
6611   gcc_assert (comp != NULL_TREE);
6612
6613   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6614   gcc_assert (ok);
6615
6616   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6617                                      true, GSI_SAME_STMT);
6618 }
6619
6620 /* Rewrites USE using candidate CAND.  */
6621
6622 static void
6623 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6624 {
6625   switch (use->type)
6626     {
6627       case USE_NONLINEAR_EXPR:
6628         rewrite_use_nonlinear_expr (data, use, cand);
6629         break;
6630
6631       case USE_ADDRESS:
6632         rewrite_use_address (data, use, cand);
6633         break;
6634
6635       case USE_COMPARE:
6636         rewrite_use_compare (data, use, cand);
6637         break;
6638
6639       default:
6640         gcc_unreachable ();
6641     }
6642
6643   update_stmt (use->stmt);
6644 }
6645
6646 /* Rewrite the uses using the selected induction variables.  */
6647
6648 static void
6649 rewrite_uses (struct ivopts_data *data)
6650 {
6651   unsigned i;
6652   struct iv_cand *cand;
6653   struct iv_use *use;
6654
6655   for (i = 0; i < n_iv_uses (data); i++)
6656     {
6657       use = iv_use (data, i);
6658       cand = use->selected;
6659       gcc_assert (cand);
6660
6661       rewrite_use (data, use, cand);
6662     }
6663 }
6664
6665 /* Removes the ivs that are not used after rewriting.  */
6666
6667 static void
6668 remove_unused_ivs (struct ivopts_data *data)
6669 {
6670   unsigned j;
6671   bitmap_iterator bi;
6672   bitmap toremove = BITMAP_ALLOC (NULL);
6673
6674   /* Figure out an order in which to release SSA DEFs so that we don't
6675      release something that we'd have to propagate into a debug stmt
6676      afterwards.  */
6677   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6678     {
6679       struct version_info *info;
6680
6681       info = ver_info (data, j);
6682       if (info->iv
6683           && !integer_zerop (info->iv->step)
6684           && !info->inv_id
6685           && !info->iv->have_use_for
6686           && !info->preserve_biv)
6687         {
6688           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6689
6690           tree def = info->iv->ssa_name;
6691
6692           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6693             {
6694               imm_use_iterator imm_iter;
6695               use_operand_p use_p;
6696               gimple stmt;
6697               int count = 0;
6698
6699               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6700                 {
6701                   if (!gimple_debug_bind_p (stmt))
6702                     continue;
6703
6704                   /* We just want to determine whether to do nothing
6705                      (count == 0), to substitute the computed
6706                      expression into a single use of the SSA DEF by
6707                      itself (count == 1), or to use a debug temp
6708                      because the SSA DEF is used multiple times or as
6709                      part of a larger expression (count > 1). */
6710                   count++;
6711                   if (gimple_debug_bind_get_value (stmt) != def)
6712                     count++;
6713
6714                   if (count > 1)
6715                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6716                 }
6717
6718               if (!count)
6719                 continue;
6720
6721               struct iv_use dummy_use;
6722               struct iv_cand *best_cand = NULL, *cand;
6723               unsigned i, best_pref = 0, cand_pref;
6724
6725               memset (&dummy_use, 0, sizeof (dummy_use));
6726               dummy_use.iv = info->iv;
6727               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6728                 {
6729                   cand = iv_use (data, i)->selected;
6730                   if (cand == best_cand)
6731                     continue;
6732                   cand_pref = operand_equal_p (cand->iv->step,
6733                                                info->iv->step, 0)
6734                     ? 4 : 0;
6735                   cand_pref
6736                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6737                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6738                     ? 2 : 0;
6739                   cand_pref
6740                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6741                     ? 1 : 0;
6742                   if (best_cand == NULL || best_pref < cand_pref)
6743                     {
6744                       best_cand = cand;
6745                       best_pref = cand_pref;
6746                     }
6747                 }
6748
6749               if (!best_cand)
6750                 continue;
6751
6752               tree comp = get_computation_at (data->current_loop,
6753                                               &dummy_use, best_cand,
6754                                               SSA_NAME_DEF_STMT (def));
6755               if (!comp)
6756                 continue;
6757
6758               if (count > 1)
6759                 {
6760                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6761                   DECL_ARTIFICIAL (vexpr) = 1;
6762                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6763                   if (SSA_NAME_VAR (def))
6764                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6765                   else
6766                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6767                   gdebug *def_temp
6768                     = gimple_build_debug_bind (vexpr, comp, NULL);
6769                   gimple_stmt_iterator gsi;
6770
6771                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6772                     gsi = gsi_after_labels (gimple_bb
6773                                             (SSA_NAME_DEF_STMT (def)));
6774                   else
6775                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6776
6777                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6778                   comp = vexpr;
6779                 }
6780
6781               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6782                 {
6783                   if (!gimple_debug_bind_p (stmt))
6784                     continue;
6785
6786                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6787                     SET_USE (use_p, comp);
6788
6789                   update_stmt (stmt);
6790                 }
6791             }
6792         }
6793     }
6794
6795   release_defs_bitset (toremove);
6796
6797   BITMAP_FREE (toremove);
6798 }
6799
6800 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6801    for hash_map::traverse.  */
6802
6803 bool
6804 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6805 {
6806   free (value);
6807   return true;
6808 }
6809
6810 /* Frees data allocated by the optimization of a single loop.  */
6811
6812 static void
6813 free_loop_data (struct ivopts_data *data)
6814 {
6815   unsigned i, j;
6816   bitmap_iterator bi;
6817   tree obj;
6818
6819   if (data->niters)
6820     {
6821       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6822       delete data->niters;
6823       data->niters = NULL;
6824     }
6825
6826   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6827     {
6828       struct version_info *info;
6829
6830       info = ver_info (data, i);
6831       free (info->iv);
6832       info->iv = NULL;
6833       info->has_nonlin_use = false;
6834       info->preserve_biv = false;
6835       info->inv_id = 0;
6836     }
6837   bitmap_clear (data->relevant);
6838   bitmap_clear (data->important_candidates);
6839
6840   for (i = 0; i < n_iv_uses (data); i++)
6841     {
6842       struct iv_use *use = iv_use (data, i);
6843
6844       free (use->iv);
6845       BITMAP_FREE (use->related_cands);
6846       for (j = 0; j < use->n_map_members; j++)
6847         if (use->cost_map[j].depends_on)
6848           BITMAP_FREE (use->cost_map[j].depends_on);
6849       free (use->cost_map);
6850       free (use);
6851     }
6852   data->iv_uses.truncate (0);
6853
6854   for (i = 0; i < n_iv_cands (data); i++)
6855     {
6856       struct iv_cand *cand = iv_cand (data, i);
6857
6858       free (cand->iv);
6859       if (cand->depends_on)
6860         BITMAP_FREE (cand->depends_on);
6861       free (cand);
6862     }
6863   data->iv_candidates.truncate (0);
6864
6865   if (data->version_info_size < num_ssa_names)
6866     {
6867       data->version_info_size = 2 * num_ssa_names;
6868       free (data->version_info);
6869       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6870     }
6871
6872   data->max_inv_id = 0;
6873
6874   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6875     SET_DECL_RTL (obj, NULL_RTX);
6876
6877   decl_rtl_to_reset.truncate (0);
6878
6879   data->inv_expr_tab->empty ();
6880   data->inv_expr_id = 0;
6881 }
6882
6883 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6884    loop tree.  */
6885
6886 static void
6887 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6888 {
6889   free_loop_data (data);
6890   free (data->version_info);
6891   BITMAP_FREE (data->relevant);
6892   BITMAP_FREE (data->important_candidates);
6893
6894   decl_rtl_to_reset.release ();
6895   data->iv_uses.release ();
6896   data->iv_candidates.release ();
6897   delete data->inv_expr_tab;
6898   data->inv_expr_tab = NULL;
6899   free_affine_expand_cache (&data->name_expansion_cache);
6900 }
6901
6902 /* Returns true if the loop body BODY includes any function calls.  */
6903
6904 static bool
6905 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6906 {
6907   gimple_stmt_iterator gsi;
6908   unsigned i;
6909
6910   for (i = 0; i < num_nodes; i++)
6911     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6912       {
6913         gimple stmt = gsi_stmt (gsi);
6914         if (is_gimple_call (stmt)
6915             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6916           return true;
6917       }
6918   return false;
6919 }
6920
6921 /* Optimizes the LOOP.  Returns true if anything changed.  */
6922
6923 static bool
6924 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6925 {
6926   bool changed = false;
6927   struct iv_ca *iv_ca;
6928   edge exit = single_dom_exit (loop);
6929   basic_block *body;
6930
6931   gcc_assert (!data->niters);
6932   data->current_loop = loop;
6933   data->speed = optimize_loop_for_speed_p (loop);
6934
6935   if (dump_file && (dump_flags & TDF_DETAILS))
6936     {
6937       fprintf (dump_file, "Processing loop %d\n", loop->num);
6938
6939       if (exit)
6940         {
6941           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6942                    exit->src->index, exit->dest->index);
6943           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6944           fprintf (dump_file, "\n");
6945         }
6946
6947       fprintf (dump_file, "\n");
6948     }
6949
6950   body = get_loop_body (loop);
6951   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6952   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6953   free (body);
6954
6955   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6956
6957   /* For each ssa name determines whether it behaves as an induction variable
6958      in some loop.  */
6959   if (!find_induction_variables (data))
6960     goto finish;
6961
6962   /* Finds interesting uses (item 1).  */
6963   find_interesting_uses (data);
6964   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6965     goto finish;
6966
6967   /* Finds candidates for the induction variables (item 2).  */
6968   find_iv_candidates (data);
6969
6970   /* Calculates the costs (item 3, part 1).  */
6971   determine_iv_costs (data);
6972   determine_use_iv_costs (data);
6973   determine_set_costs (data);
6974
6975   /* Find the optimal set of induction variables (item 3, part 2).  */
6976   iv_ca = find_optimal_iv_set (data);
6977   if (!iv_ca)
6978     goto finish;
6979   changed = true;
6980
6981   /* Create the new induction variables (item 4, part 1).  */
6982   create_new_ivs (data, iv_ca);
6983   iv_ca_free (&iv_ca);
6984
6985   /* Rewrite the uses (item 4, part 2).  */
6986   rewrite_uses (data);
6987
6988   /* Remove the ivs that are unused after rewriting.  */
6989   remove_unused_ivs (data);
6990
6991   /* We have changed the structure of induction variables; it might happen
6992      that definitions in the scev database refer to some of them that were
6993      eliminated.  */
6994   scev_reset ();
6995
6996 finish:
6997   free_loop_data (data);
6998
6999   return changed;
7000 }
7001
7002 /* Main entry point.  Optimizes induction variables in loops.  */
7003
7004 void
7005 tree_ssa_iv_optimize (void)
7006 {
7007   struct loop *loop;
7008   struct ivopts_data data;
7009
7010   tree_ssa_iv_optimize_init (&data);
7011
7012   /* Optimize the loops starting with the innermost ones.  */
7013   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7014     {
7015       if (dump_file && (dump_flags & TDF_DETAILS))
7016         flow_loop_dump (loop, dump_file, NULL, 1);
7017
7018       tree_ssa_iv_optimize_loop (&data, loop);
7019     }
7020
7021   tree_ssa_iv_optimize_finalize (&data);
7022 }