gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "backend.h"
  68 #include "cfghooks.h"
  69 #include "tree.h"
  70 #include "gimple.h"
  71 #include "rtl.h"
  72 #include "ssa.h"
  73 #include "alias.h"
  74 #include "fold-const.h"
  75 #include "stor-layout.h"
  76 #include "tm_p.h"
  77 #include "gimple-pretty-print.h"
  78 #include "internal-fn.h"
  79 #include "tree-eh.h"
  80 #include "gimplify.h"
  81 #include "gimple-iterator.h"
  82 #include "gimplify-me.h"
  83 #include "cgraph.h"
  84 #include "tree-cfg.h"
  85 #include "tree-ssa-loop-ivopts.h"
  86 #include "tree-ssa-loop-manip.h"
  87 #include "tree-ssa-loop-niter.h"
  88 #include "tree-ssa-loop.h"
  89 #include "flags.h"
  90 #include "insn-config.h"
  91 #include "expmed.h"
  92 #include "dojump.h"
  93 #include "explow.h"
  94 #include "calls.h"
  95 #include "emit-rtl.h"
  96 #include "varasm.h"
  97 #include "stmt.h"
  98 #include "expr.h"
  99 #include "tree-dfa.h"
 100 #include "tree-ssa.h"
 101 #include "cfgloop.h"
 102 #include "tree-pass.h"
 103 #include "tree-chrec.h"
 104 #include "tree-scalar-evolution.h"
 105 #include "params.h"
 106 #include "langhooks.h"
 107 #include "tree-affine.h"
 108 #include "target.h"
 109 #include "tree-inline.h"
 110 #include "tree-ssa-propagate.h"
 111 #include "tree-ssa-address.h"
 112 #include "builtins.h"
 113 #include "tree-vectorizer.h"
 114
 115 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 116    cost of different addressing modes.  This should be moved to a TBD
 117    interface between the GIMPLE and RTL worlds.  */
 118 #include "recog.h"
 119
 120 /* The infinite cost.  */
 121 #define INFTY 10000000
 122
 123 #define AVG_LOOP_NITER(LOOP) 5
 124
 125 /* Returns the expected number of loop iterations for LOOP.
 126    The average trip count is computed from profile data if it
 127    exists. */
 128
 129 static inline HOST_WIDE_INT
 130 avg_loop_niter (struct loop *loop)
 131 {
 132   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 133   if (niter == -1)
 134     return AVG_LOOP_NITER (loop);
 135
 136   return niter;
 137 }
 138
 139 /* Representation of the induction variable.  */
 140 struct iv
 141 {
 142   tree base;            /* Initial value of the iv.  */
 143   tree base_object;     /* A memory object to that the induction variable points.  */
 144   tree step;            /* Step of the iv (constant only).  */
 145   tree ssa_name;        /* The ssa name with the value.  */
 146   unsigned use_id;      /* The identifier in the use if it is the case.  */
 147   bool biv_p;           /* Is it a biv?  */
 148   bool have_use_for;    /* Do we already have a use for it?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150 };
 151
 152 /* Per-ssa version information (induction variable descriptions, etc.).  */
 153 struct version_info
 154 {
 155   tree name;            /* The ssa name.  */
 156   struct iv *iv;        /* Induction variable description.  */
 157   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 158                            an expression that is not an induction variable.  */
 159   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 160   unsigned inv_id;      /* Id of an invariant.  */
 161 };
 162
 163 /* Types of uses.  */
 164 enum use_type
 165 {
 166   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 167   USE_ADDRESS,          /* Use in an address.  */
 168   USE_COMPARE           /* Use is a compare.  */
 169 };
 170
 171 /* Cost of a computation.  */
 172 struct comp_cost
 173 {
 174   int cost;             /* The runtime cost.  */
 175   unsigned complexity;  /* The estimate of the complexity of the code for
 176                            the computation (in no concrete units --
 177                            complexity field should be larger for more
 178                            complex expressions and addressing modes).  */
 179 };
 180
 181 static const comp_cost no_cost = {0, 0};
 182 static const comp_cost infinite_cost = {INFTY, INFTY};
 183
 184 /* The candidate - cost pair.  */
 185 struct cost_pair
 186 {
 187   struct iv_cand *cand; /* The candidate.  */
 188   comp_cost cost;       /* The cost.  */
 189   bitmap depends_on;    /* The list of invariants that have to be
 190                            preserved.  */
 191   tree value;           /* For final value elimination, the expression for
 192                            the final value of the iv.  For iv elimination,
 193                            the new bound to compare with.  */
 194   enum tree_code comp;  /* For iv elimination, the comparison.  */
 195   int inv_expr_id;      /* Loop invariant expression id.  */
 196 };
 197
 198 /* Use.  */
 199 struct iv_use
 200 {
 201   unsigned id;          /* The id of the use.  */
 202   unsigned sub_id;      /* The id of the sub use.  */
 203   enum use_type type;   /* Type of the use.  */
 204   struct iv *iv;        /* The induction variable it is based on.  */
 205   gimple stmt;          /* Statement in that it occurs.  */
 206   tree *op_p;           /* The place where it occurs.  */
 207   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 208                            important ones.  */
 209
 210   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 211   struct cost_pair *cost_map;
 212                         /* The costs wrto the iv candidates.  */
 213
 214   struct iv_cand *selected;
 215                         /* The selected candidate.  */
 216
 217   struct iv_use *next;  /* The next sub use.  */
 218   tree addr_base;       /* Base address with const offset stripped.  */
 219   unsigned HOST_WIDE_INT addr_offset;
 220                         /* Const offset stripped from base address.  */
 221 };
 222
 223 /* The position where the iv is computed.  */
 224 enum iv_position
 225 {
 226   IP_NORMAL,            /* At the end, just before the exit condition.  */
 227   IP_END,               /* At the end of the latch block.  */
 228   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 229   IP_AFTER_USE,         /* Immediately after a specific use.  */
 230   IP_ORIGINAL           /* The original biv.  */
 231 };
 232
 233 /* The induction variable candidate.  */
 234 struct iv_cand
 235 {
 236   unsigned id;          /* The number of the candidate.  */
 237   bool important;       /* Whether this is an "important" candidate, i.e. such
 238                            that it should be considered by all uses.  */
 239   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 240   gimple incremented_at;/* For original biv, the statement where it is
 241                            incremented.  */
 242   tree var_before;      /* The variable used for it before increment.  */
 243   tree var_after;       /* The variable used for it after increment.  */
 244   struct iv *iv;        /* The value of the candidate.  NULL for
 245                            "pseudocandidate" used to indicate the possibility
 246                            to replace the final value of an iv by direct
 247                            computation of the value.  */
 248   unsigned cost;        /* Cost of the candidate.  */
 249   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 250   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 251                               where it is incremented.  */
 252   bitmap depends_on;    /* The list of invariants that are used in step of the
 253                            biv.  */
 254 };
 255
 256 /* Loop invariant expression hashtable entry.  */
 257 struct iv_inv_expr_ent
 258 {
 259   tree expr;
 260   int id;
 261   hashval_t hash;
 262 };
 263
 264 /* The data used by the induction variable optimizations.  */
 265
 266 typedef struct iv_use *iv_use_p;
 267
 268 typedef struct iv_cand *iv_cand_p;
 269
 270 /* Hashtable helpers.  */
 271
 272 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 273 {
 274   static inline hashval_t hash (const iv_inv_expr_ent *);
 275   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 276 };
 277
 278 /* Hash function for loop invariant expressions.  */
 279
 280 inline hashval_t
 281 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 282 {
 283   return expr->hash;
 284 }
 285
 286 /* Hash table equality function for expressions.  */
 287
 288 inline bool
 289 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 290                            const iv_inv_expr_ent *expr2)
 291 {
 292   return expr1->hash == expr2->hash
 293          && operand_equal_p (expr1->expr, expr2->expr, 0);
 294 }
 295
 296 struct ivopts_data
 297 {
 298   /* The currently optimized loop.  */
 299   struct loop *current_loop;
 300   source_location loop_loc;
 301
 302   /* Numbers of iterations for all exits of the current loop.  */
 303   hash_map<edge, tree_niter_desc *> *niters;
 304
 305   /* Number of registers used in it.  */
 306   unsigned regs_used;
 307
 308   /* The size of version_info array allocated.  */
 309   unsigned version_info_size;
 310
 311   /* The array of information for the ssa names.  */
 312   struct version_info *version_info;
 313
 314   /* The hashtable of loop invariant expressions created
 315      by ivopt.  */
 316   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 317
 318   /* Loop invariant expression id.  */
 319   int inv_expr_id;
 320
 321   /* The bitmap of indices in version_info whose value was changed.  */
 322   bitmap relevant;
 323
 324   /* The uses of induction variables.  */
 325   vec<iv_use_p> iv_uses;
 326
 327   /* The candidates.  */
 328   vec<iv_cand_p> iv_candidates;
 329
 330   /* A bitmap of important candidates.  */
 331   bitmap important_candidates;
 332
 333   /* Cache used by tree_to_aff_combination_expand.  */
 334   hash_map<tree, name_expansion *> *name_expansion_cache;
 335
 336   /* The maximum invariant id.  */
 337   unsigned max_inv_id;
 338
 339   /* Obstack for iv structure.  */
 340   struct obstack iv_obstack;
 341
 342   /* Whether to consider just related and important candidates when replacing a
 343      use.  */
 344   bool consider_all_candidates;
 345
 346   /* Are we optimizing for speed?  */
 347   bool speed;
 348
 349   /* Whether the loop body includes any function calls.  */
 350   bool body_includes_call;
 351
 352   /* Whether the loop body can only be exited via single exit.  */
 353   bool loop_single_exit_p;
 354 };
 355
 356 /* An assignment of iv candidates to uses.  */
 357
 358 struct iv_ca
 359 {
 360   /* The number of uses covered by the assignment.  */
 361   unsigned upto;
 362
 363   /* Number of uses that cannot be expressed by the candidates in the set.  */
 364   unsigned bad_uses;
 365
 366   /* Candidate assigned to a use, together with the related costs.  */
 367   struct cost_pair **cand_for_use;
 368
 369   /* Number of times each candidate is used.  */
 370   unsigned *n_cand_uses;
 371
 372   /* The candidates used.  */
 373   bitmap cands;
 374
 375   /* The number of candidates in the set.  */
 376   unsigned n_cands;
 377
 378   /* Total number of registers needed.  */
 379   unsigned n_regs;
 380
 381   /* Total cost of expressing uses.  */
 382   comp_cost cand_use_cost;
 383
 384   /* Total cost of candidates.  */
 385   unsigned cand_cost;
 386
 387   /* Number of times each invariant is used.  */
 388   unsigned *n_invariant_uses;
 389
 390   /* The array holding the number of uses of each loop
 391      invariant expressions created by ivopt.  */
 392   unsigned *used_inv_expr;
 393
 394   /* The number of created loop invariants.  */
 395   unsigned num_used_inv_expr;
 396
 397   /* Total cost of the assignment.  */
 398   comp_cost cost;
 399 };
 400
 401 /* Difference of two iv candidate assignments.  */
 402
 403 struct iv_ca_delta
 404 {
 405   /* Changed use.  */
 406   struct iv_use *use;
 407
 408   /* An old assignment (for rollback purposes).  */
 409   struct cost_pair *old_cp;
 410
 411   /* A new assignment.  */
 412   struct cost_pair *new_cp;
 413
 414   /* Next change in the list.  */
 415   struct iv_ca_delta *next_change;
 416 };
 417
 418 /* Bound on number of candidates below that all candidates are considered.  */
 419
 420 #define CONSIDER_ALL_CANDIDATES_BOUND \
 421   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 422
 423 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 424    optimizing such a loop would help, and it would take ages).  */
 425
 426 #define MAX_CONSIDERED_USES \
 427   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 428
 429 /* If there are at most this number of ivs in the set, try removing unnecessary
 430    ivs from the set always.  */
 431
 432 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 433   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 434
 435 /* The list of trees for that the decl_rtl field must be reset is stored
 436    here.  */
 437
 438 static vec<tree> decl_rtl_to_reset;
 439
 440 static comp_cost force_expr_to_var_cost (tree, bool);
 441
 442 /* Number of uses recorded in DATA.  */
 443
 444 static inline unsigned
 445 n_iv_uses (struct ivopts_data *data)
 446 {
 447   return data->iv_uses.length ();
 448 }
 449
 450 /* Ith use recorded in DATA.  */
 451
 452 static inline struct iv_use *
 453 iv_use (struct ivopts_data *data, unsigned i)
 454 {
 455   return data->iv_uses[i];
 456 }
 457
 458 /* Number of candidates recorded in DATA.  */
 459
 460 static inline unsigned
 461 n_iv_cands (struct ivopts_data *data)
 462 {
 463   return data->iv_candidates.length ();
 464 }
 465
 466 /* Ith candidate recorded in DATA.  */
 467
 468 static inline struct iv_cand *
 469 iv_cand (struct ivopts_data *data, unsigned i)
 470 {
 471   return data->iv_candidates[i];
 472 }
 473
 474 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 475
 476 edge
 477 single_dom_exit (struct loop *loop)
 478 {
 479   edge exit = single_exit (loop);
 480
 481   if (!exit)
 482     return NULL;
 483
 484   if (!just_once_each_iteration_p (loop, exit->src))
 485     return NULL;
 486
 487   return exit;
 488 }
 489
 490 /* Dumps information about the induction variable IV to FILE.  */
 491
 492 void
 493 dump_iv (FILE *file, struct iv *iv, bool dump_name)
 494 {
 495   if (iv->ssa_name && dump_name)
 496     {
 497       fprintf (file, "ssa name ");
 498       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 499       fprintf (file, "\n");
 500     }
 501
 502   fprintf (file, "  type ");
 503   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 504   fprintf (file, "\n");
 505
 506   if (iv->step)
 507     {
 508       fprintf (file, "  base ");
 509       print_generic_expr (file, iv->base, TDF_SLIM);
 510       fprintf (file, "\n");
 511
 512       fprintf (file, "  step ");
 513       print_generic_expr (file, iv->step, TDF_SLIM);
 514       fprintf (file, "\n");
 515     }
 516   else
 517     {
 518       fprintf (file, "  invariant ");
 519       print_generic_expr (file, iv->base, TDF_SLIM);
 520       fprintf (file, "\n");
 521     }
 522
 523   if (iv->base_object)
 524     {
 525       fprintf (file, "  base object ");
 526       print_generic_expr (file, iv->base_object, TDF_SLIM);
 527       fprintf (file, "\n");
 528     }
 529
 530   if (iv->biv_p)
 531     fprintf (file, "  is a biv\n");
 532 }
 533
 534 /* Dumps information about the USE to FILE.  */
 535
 536 void
 537 dump_use (FILE *file, struct iv_use *use)
 538 {
 539   fprintf (file, "use %d", use->id);
 540   if (use->sub_id)
 541     fprintf (file, ".%d", use->sub_id);
 542
 543   fprintf (file, "\n");
 544
 545   switch (use->type)
 546     {
 547     case USE_NONLINEAR_EXPR:
 548       fprintf (file, "  generic\n");
 549       break;
 550
 551     case USE_ADDRESS:
 552       fprintf (file, "  address\n");
 553       break;
 554
 555     case USE_COMPARE:
 556       fprintf (file, "  compare\n");
 557       break;
 558
 559     default:
 560       gcc_unreachable ();
 561     }
 562
 563   fprintf (file, "  in statement ");
 564   print_gimple_stmt (file, use->stmt, 0, 0);
 565   fprintf (file, "\n");
 566
 567   fprintf (file, "  at position ");
 568   if (use->op_p)
 569     print_generic_expr (file, *use->op_p, TDF_SLIM);
 570   fprintf (file, "\n");
 571
 572   dump_iv (file, use->iv, false);
 573
 574   if (use->related_cands)
 575     {
 576       fprintf (file, "  related candidates ");
 577       dump_bitmap (file, use->related_cands);
 578     }
 579 }
 580
 581 /* Dumps information about the uses to FILE.  */
 582
 583 void
 584 dump_uses (FILE *file, struct ivopts_data *data)
 585 {
 586   unsigned i;
 587   struct iv_use *use;
 588
 589   for (i = 0; i < n_iv_uses (data); i++)
 590     {
 591       use = iv_use (data, i);
 592       do
 593         {
 594           dump_use (file, use);
 595           use = use->next;
 596         }
 597       while (use);
 598       fprintf (file, "\n");
 599     }
 600 }
 601
 602 /* Dumps information about induction variable candidate CAND to FILE.  */
 603
 604 void
 605 dump_cand (FILE *file, struct iv_cand *cand)
 606 {
 607   struct iv *iv = cand->iv;
 608
 609   fprintf (file, "candidate %d%s\n",
 610            cand->id, cand->important ? " (important)" : "");
 611
 612   if (cand->depends_on)
 613     {
 614       fprintf (file, "  depends on ");
 615       dump_bitmap (file, cand->depends_on);
 616     }
 617
 618   if (!iv)
 619     {
 620       fprintf (file, "  final value replacement\n");
 621       return;
 622     }
 623
 624   if (cand->var_before)
 625     {
 626       fprintf (file, "  var_before ");
 627       print_generic_expr (file, cand->var_before, TDF_SLIM);
 628       fprintf (file, "\n");
 629     }
 630   if (cand->var_after)
 631     {
 632       fprintf (file, "  var_after ");
 633       print_generic_expr (file, cand->var_after, TDF_SLIM);
 634       fprintf (file, "\n");
 635     }
 636
 637   switch (cand->pos)
 638     {
 639     case IP_NORMAL:
 640       fprintf (file, "  incremented before exit test\n");
 641       break;
 642
 643     case IP_BEFORE_USE:
 644       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 645       break;
 646
 647     case IP_AFTER_USE:
 648       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 649       break;
 650
 651     case IP_END:
 652       fprintf (file, "  incremented at end\n");
 653       break;
 654
 655     case IP_ORIGINAL:
 656       fprintf (file, "  original biv\n");
 657       break;
 658     }
 659
 660   dump_iv (file, iv, false);
 661 }
 662
 663 /* Returns the info for ssa version VER.  */
 664
 665 static inline struct version_info *
 666 ver_info (struct ivopts_data *data, unsigned ver)
 667 {
 668   return data->version_info + ver;
 669 }
 670
 671 /* Returns the info for ssa name NAME.  */
 672
 673 static inline struct version_info *
 674 name_info (struct ivopts_data *data, tree name)
 675 {
 676   return ver_info (data, SSA_NAME_VERSION (name));
 677 }
 678
 679 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 680    emitted in LOOP.  */
 681
 682 static bool
 683 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 684 {
 685   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 686
 687   gcc_assert (bb);
 688
 689   if (sbb == loop->latch)
 690     return true;
 691
 692   if (sbb != bb)
 693     return false;
 694
 695   return stmt == last_stmt (bb);
 696 }
 697
 698 /* Returns true if STMT if after the place where the original induction
 699    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 700    if the positions are identical.  */
 701
 702 static bool
 703 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 704 {
 705   basic_block cand_bb = gimple_bb (cand->incremented_at);
 706   basic_block stmt_bb = gimple_bb (stmt);
 707
 708   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 709     return false;
 710
 711   if (stmt_bb != cand_bb)
 712     return true;
 713
 714   if (true_if_equal
 715       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 716     return true;
 717   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 718 }
 719
 720 /* Returns true if STMT if after the place where the induction variable
 721    CAND is incremented in LOOP.  */
 722
 723 static bool
 724 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 725 {
 726   switch (cand->pos)
 727     {
 728     case IP_END:
 729       return false;
 730
 731     case IP_NORMAL:
 732       return stmt_after_ip_normal_pos (loop, stmt);
 733
 734     case IP_ORIGINAL:
 735     case IP_AFTER_USE:
 736       return stmt_after_inc_pos (cand, stmt, false);
 737
 738     case IP_BEFORE_USE:
 739       return stmt_after_inc_pos (cand, stmt, true);
 740
 741     default:
 742       gcc_unreachable ();
 743     }
 744 }
 745
 746 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 747
 748 static bool
 749 abnormal_ssa_name_p (tree exp)
 750 {
 751   if (!exp)
 752     return false;
 753
 754   if (TREE_CODE (exp) != SSA_NAME)
 755     return false;
 756
 757   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 758 }
 759
 760 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 761    abnormal phi node.  Callback for for_each_index.  */
 762
 763 static bool
 764 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 765                                   void *data ATTRIBUTE_UNUSED)
 766 {
 767   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 768     {
 769       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 770         return false;
 771       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 772         return false;
 773     }
 774
 775   return !abnormal_ssa_name_p (*index);
 776 }
 777
 778 /* Returns true if EXPR contains a ssa name that occurs in an
 779    abnormal phi node.  */
 780
 781 bool
 782 contains_abnormal_ssa_name_p (tree expr)
 783 {
 784   enum tree_code code;
 785   enum tree_code_class codeclass;
 786
 787   if (!expr)
 788     return false;
 789
 790   code = TREE_CODE (expr);
 791   codeclass = TREE_CODE_CLASS (code);
 792
 793   if (code == SSA_NAME)
 794     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 795
 796   if (code == INTEGER_CST
 797       || is_gimple_min_invariant (expr))
 798     return false;
 799
 800   if (code == ADDR_EXPR)
 801     return !for_each_index (&TREE_OPERAND (expr, 0),
 802                             idx_contains_abnormal_ssa_name_p,
 803                             NULL);
 804
 805   if (code == COND_EXPR)
 806     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 807       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 808       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 809
 810   switch (codeclass)
 811     {
 812     case tcc_binary:
 813     case tcc_comparison:
 814       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 815         return true;
 816
 817       /* Fallthru.  */
 818     case tcc_unary:
 819       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 820         return true;
 821
 822       break;
 823
 824     default:
 825       gcc_unreachable ();
 826     }
 827
 828   return false;
 829 }
 830
 831 /*  Returns the structure describing number of iterations determined from
 832     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 833
 834 static struct tree_niter_desc *
 835 niter_for_exit (struct ivopts_data *data, edge exit)
 836 {
 837   struct tree_niter_desc *desc;
 838   tree_niter_desc **slot;
 839
 840   if (!data->niters)
 841     {
 842       data->niters = new hash_map<edge, tree_niter_desc *>;
 843       slot = NULL;
 844     }
 845   else
 846     slot = data->niters->get (exit);
 847
 848   if (!slot)
 849     {
 850       /* Try to determine number of iterations.  We cannot safely work with ssa
 851          names that appear in phi nodes on abnormal edges, so that we do not
 852          create overlapping life ranges for them (PR 27283).  */
 853       desc = XNEW (struct tree_niter_desc);
 854       if (!number_of_iterations_exit (data->current_loop,
 855                                       exit, desc, true)
 856           || contains_abnormal_ssa_name_p (desc->niter))
 857         {
 858           XDELETE (desc);
 859           desc = NULL;
 860         }
 861       data->niters->put (exit, desc);
 862     }
 863   else
 864     desc = *slot;
 865
 866   return desc;
 867 }
 868
 869 /* Returns the structure describing number of iterations determined from
 870    single dominating exit of DATA->current_loop, or NULL if something
 871    goes wrong.  */
 872
 873 static struct tree_niter_desc *
 874 niter_for_single_dom_exit (struct ivopts_data *data)
 875 {
 876   edge exit = single_dom_exit (data->current_loop);
 877
 878   if (!exit)
 879     return NULL;
 880
 881   return niter_for_exit (data, exit);
 882 }
 883
 884 /* Initializes data structures used by the iv optimization pass, stored
 885    in DATA.  */
 886
 887 static void
 888 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 889 {
 890   data->version_info_size = 2 * num_ssa_names;
 891   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 892   data->relevant = BITMAP_ALLOC (NULL);
 893   data->important_candidates = BITMAP_ALLOC (NULL);
 894   data->max_inv_id = 0;
 895   data->niters = NULL;
 896   data->iv_uses.create (20);
 897   data->iv_candidates.create (20);
 898   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 899   data->inv_expr_id = 0;
 900   data->name_expansion_cache = NULL;
 901   decl_rtl_to_reset.create (20);
 902   gcc_obstack_init (&data->iv_obstack);
 903 }
 904
 905 /* Returns a memory object to that EXPR points.  In case we are able to
 906    determine that it does not point to any such object, NULL is returned.  */
 907
 908 static tree
 909 determine_base_object (tree expr)
 910 {
 911   enum tree_code code = TREE_CODE (expr);
 912   tree base, obj;
 913
 914   /* If this is a pointer casted to any type, we need to determine
 915      the base object for the pointer; so handle conversions before
 916      throwing away non-pointer expressions.  */
 917   if (CONVERT_EXPR_P (expr))
 918     return determine_base_object (TREE_OPERAND (expr, 0));
 919
 920   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 921     return NULL_TREE;
 922
 923   switch (code)
 924     {
 925     case INTEGER_CST:
 926       return NULL_TREE;
 927
 928     case ADDR_EXPR:
 929       obj = TREE_OPERAND (expr, 0);
 930       base = get_base_address (obj);
 931
 932       if (!base)
 933         return expr;
 934
 935       if (TREE_CODE (base) == MEM_REF)
 936         return determine_base_object (TREE_OPERAND (base, 0));
 937
 938       return fold_convert (ptr_type_node,
 939                            build_fold_addr_expr (base));
 940
 941     case POINTER_PLUS_EXPR:
 942       return determine_base_object (TREE_OPERAND (expr, 0));
 943
 944     case PLUS_EXPR:
 945     case MINUS_EXPR:
 946       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 947       gcc_unreachable ();
 948
 949     default:
 950       return fold_convert (ptr_type_node, expr);
 951     }
 952 }
 953
 954 /* Return true if address expression with non-DECL_P operand appears
 955    in EXPR.  */
 956
 957 static bool
 958 contain_complex_addr_expr (tree expr)
 959 {
 960   bool res = false;
 961
 962   STRIP_NOPS (expr);
 963   switch (TREE_CODE (expr))
 964     {
 965     case POINTER_PLUS_EXPR:
 966     case PLUS_EXPR:
 967     case MINUS_EXPR:
 968       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 969       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 970       break;
 971
 972     case ADDR_EXPR:
 973       return (!DECL_P (TREE_OPERAND (expr, 0)));
 974
 975     default:
 976       return false;
 977     }
 978
 979   return res;
 980 }
 981
 982 /* Allocates an induction variable with given initial value BASE and step STEP
 983    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
 984
 985 static struct iv *
 986 alloc_iv (struct ivopts_data *data, tree base, tree step,
 987           bool no_overflow = false)
 988 {
 989   tree expr = base;
 990   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
 991                                               sizeof (struct iv));
 992   gcc_assert (step != NULL_TREE);
 993
 994   /* Lower address expression in base except ones with DECL_P as operand.
 995      By doing this:
 996        1) More accurate cost can be computed for address expressions;
 997        2) Duplicate candidates won't be created for bases in different
 998           forms, like &a[0] and &a.  */
 999   STRIP_NOPS (expr);
1000   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1001       || contain_complex_addr_expr (expr))
1002     {
1003       aff_tree comb;
1004       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1005       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1006     }
1007
1008   iv->base = base;
1009   iv->base_object = determine_base_object (base);
1010   iv->step = step;
1011   iv->biv_p = false;
1012   iv->have_use_for = false;
1013   iv->use_id = 0;
1014   iv->ssa_name = NULL_TREE;
1015   iv->no_overflow = no_overflow;
1016
1017   return iv;
1018 }
1019
1020 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1021    doesn't overflow.  */
1022
1023 static void
1024 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1025         bool no_overflow)
1026 {
1027   struct version_info *info = name_info (data, iv);
1028
1029   gcc_assert (!info->iv);
1030
1031   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1032   info->iv = alloc_iv (data, base, step, no_overflow);
1033   info->iv->ssa_name = iv;
1034 }
1035
1036 /* Finds induction variable declaration for VAR.  */
1037
1038 static struct iv *
1039 get_iv (struct ivopts_data *data, tree var)
1040 {
1041   basic_block bb;
1042   tree type = TREE_TYPE (var);
1043
1044   if (!POINTER_TYPE_P (type)
1045       && !INTEGRAL_TYPE_P (type))
1046     return NULL;
1047
1048   if (!name_info (data, var)->iv)
1049     {
1050       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1051
1052       if (!bb
1053           || !flow_bb_inside_loop_p (data->current_loop, bb))
1054         set_iv (data, var, var, build_int_cst (type, 0), true);
1055     }
1056
1057   return name_info (data, var)->iv;
1058 }
1059
1060 /* Return the first non-invariant ssa var found in EXPR.  */
1061
1062 static tree
1063 extract_single_var_from_expr (tree expr)
1064 {
1065   int i, n;
1066   tree tmp;
1067   enum tree_code code;
1068
1069   if (!expr || is_gimple_min_invariant (expr))
1070     return NULL;
1071
1072   code = TREE_CODE (expr);
1073   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1074     {
1075       n = TREE_OPERAND_LENGTH (expr);
1076       for (i = 0; i < n; i++)
1077         {
1078           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1079
1080           if (tmp)
1081             return tmp;
1082         }
1083     }
1084   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1085 }
1086
1087 /* Finds basic ivs.  */
1088
1089 static bool
1090 find_bivs (struct ivopts_data *data)
1091 {
1092   gphi *phi;
1093   affine_iv iv;
1094   tree step, type, base, stop;
1095   bool found = false;
1096   struct loop *loop = data->current_loop;
1097   gphi_iterator psi;
1098
1099   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1100     {
1101       phi = psi.phi ();
1102
1103       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1104         continue;
1105
1106       if (virtual_operand_p (PHI_RESULT (phi)))
1107         continue;
1108
1109       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1110         continue;
1111
1112       if (integer_zerop (iv.step))
1113         continue;
1114
1115       step = iv.step;
1116       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1117       /* Stop expanding iv base at the first ssa var referred by iv step.
1118          Ideally we should stop at any ssa var, because that's expensive
1119          and unusual to happen, we just do it on the first one.
1120
1121          See PR64705 for the rationale.  */
1122       stop = extract_single_var_from_expr (step);
1123       base = expand_simple_operations (base, stop);
1124       if (contains_abnormal_ssa_name_p (base)
1125           || contains_abnormal_ssa_name_p (step))
1126         continue;
1127
1128       type = TREE_TYPE (PHI_RESULT (phi));
1129       base = fold_convert (type, base);
1130       if (step)
1131         {
1132           if (POINTER_TYPE_P (type))
1133             step = convert_to_ptrofftype (step);
1134           else
1135             step = fold_convert (type, step);
1136         }
1137
1138       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1139       found = true;
1140     }
1141
1142   return found;
1143 }
1144
1145 /* Marks basic ivs.  */
1146
1147 static void
1148 mark_bivs (struct ivopts_data *data)
1149 {
1150   gphi *phi;
1151   gimple def;
1152   tree var;
1153   struct iv *iv, *incr_iv;
1154   struct loop *loop = data->current_loop;
1155   basic_block incr_bb;
1156   gphi_iterator psi;
1157
1158   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1159     {
1160       phi = psi.phi ();
1161
1162       iv = get_iv (data, PHI_RESULT (phi));
1163       if (!iv)
1164         continue;
1165
1166       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1167       def = SSA_NAME_DEF_STMT (var);
1168       /* Don't mark iv peeled from other one as biv.  */
1169       if (def
1170           && gimple_code (def) == GIMPLE_PHI
1171           && gimple_bb (def) == loop->header)
1172         continue;
1173
1174       incr_iv = get_iv (data, var);
1175       if (!incr_iv)
1176         continue;
1177
1178       /* If the increment is in the subloop, ignore it.  */
1179       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1180       if (incr_bb->loop_father != data->current_loop
1181           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1182         continue;
1183
1184       iv->biv_p = true;
1185       incr_iv->biv_p = true;
1186     }
1187 }
1188
1189 /* Checks whether STMT defines a linear induction variable and stores its
1190    parameters to IV.  */
1191
1192 static bool
1193 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1194 {
1195   tree lhs, stop;
1196   struct loop *loop = data->current_loop;
1197
1198   iv->base = NULL_TREE;
1199   iv->step = NULL_TREE;
1200
1201   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1202     return false;
1203
1204   lhs = gimple_assign_lhs (stmt);
1205   if (TREE_CODE (lhs) != SSA_NAME)
1206     return false;
1207
1208   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1209     return false;
1210
1211   /* Stop expanding iv base at the first ssa var referred by iv step.
1212      Ideally we should stop at any ssa var, because that's expensive
1213      and unusual to happen, we just do it on the first one.
1214
1215      See PR64705 for the rationale.  */
1216   stop = extract_single_var_from_expr (iv->step);
1217   iv->base = expand_simple_operations (iv->base, stop);
1218   if (contains_abnormal_ssa_name_p (iv->base)
1219       || contains_abnormal_ssa_name_p (iv->step))
1220     return false;
1221
1222   /* If STMT could throw, then do not consider STMT as defining a GIV.
1223      While this will suppress optimizations, we can not safely delete this
1224      GIV and associated statements, even if it appears it is not used.  */
1225   if (stmt_could_throw_p (stmt))
1226     return false;
1227
1228   return true;
1229 }
1230
1231 /* Finds general ivs in statement STMT.  */
1232
1233 static void
1234 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1235 {
1236   affine_iv iv;
1237
1238   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1239     return;
1240
1241   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1242 }
1243
1244 /* Finds general ivs in basic block BB.  */
1245
1246 static void
1247 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1248 {
1249   gimple_stmt_iterator bsi;
1250
1251   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1252     find_givs_in_stmt (data, gsi_stmt (bsi));
1253 }
1254
1255 /* Finds general ivs.  */
1256
1257 static void
1258 find_givs (struct ivopts_data *data)
1259 {
1260   struct loop *loop = data->current_loop;
1261   basic_block *body = get_loop_body_in_dom_order (loop);
1262   unsigned i;
1263
1264   for (i = 0; i < loop->num_nodes; i++)
1265     find_givs_in_bb (data, body[i]);
1266   free (body);
1267 }
1268
1269 /* For each ssa name defined in LOOP determines whether it is an induction
1270    variable and if so, its initial value and step.  */
1271
1272 static bool
1273 find_induction_variables (struct ivopts_data *data)
1274 {
1275   unsigned i;
1276   bitmap_iterator bi;
1277
1278   if (!find_bivs (data))
1279     return false;
1280
1281   find_givs (data);
1282   mark_bivs (data);
1283
1284   if (dump_file && (dump_flags & TDF_DETAILS))
1285     {
1286       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1287
1288       if (niter)
1289         {
1290           fprintf (dump_file, "  number of iterations ");
1291           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1292           if (!integer_zerop (niter->may_be_zero))
1293             {
1294               fprintf (dump_file, "; zero if ");
1295               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1296             }
1297           fprintf (dump_file, "\n\n");
1298         };
1299
1300       fprintf (dump_file, "Induction variables:\n\n");
1301
1302       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1303         {
1304           if (ver_info (data, i)->iv)
1305             dump_iv (dump_file, ver_info (data, i)->iv, true);
1306         }
1307     }
1308
1309   return true;
1310 }
1311
1312 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1313    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1314    is the const offset stripped from IV base.  For uses of other types,
1315    ADDR_BASE and ADDR_OFFSET are zero by default.  */
1316
1317 static struct iv_use *
1318 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1319             gimple stmt, enum use_type use_type, tree addr_base = NULL,
1320             unsigned HOST_WIDE_INT addr_offset = 0)
1321 {
1322   struct iv_use *use = XCNEW (struct iv_use);
1323
1324   use->id = n_iv_uses (data);
1325   use->sub_id = 0;
1326   use->type = use_type;
1327   use->iv = iv;
1328   use->stmt = stmt;
1329   use->op_p = use_p;
1330   use->related_cands = BITMAP_ALLOC (NULL);
1331   use->next = NULL;
1332   use->addr_base = addr_base;
1333   use->addr_offset = addr_offset;
1334
1335   data->iv_uses.safe_push (use);
1336
1337   return use;
1338 }
1339
1340 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1341    The sub use is recorded under the one whose use id is ID_GROUP.  */
1342
1343 static struct iv_use *
1344 record_sub_use (struct ivopts_data *data, tree *use_p,
1345                     struct iv *iv, gimple stmt, enum use_type use_type,
1346                     tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1347                     unsigned int id_group)
1348 {
1349   struct iv_use *use = XCNEW (struct iv_use);
1350   struct iv_use *group = iv_use (data, id_group);
1351
1352   use->id = group->id;
1353   use->sub_id = 0;
1354   use->type = use_type;
1355   use->iv = iv;
1356   use->stmt = stmt;
1357   use->op_p = use_p;
1358   use->related_cands = NULL;
1359   use->addr_base = addr_base;
1360   use->addr_offset = addr_offset;
1361
1362   /* Sub use list is maintained in offset ascending order.  */
1363   if (addr_offset <= group->addr_offset)
1364     {
1365       use->related_cands = group->related_cands;
1366       group->related_cands = NULL;
1367       use->next = group;
1368       data->iv_uses[id_group] = use;
1369     }
1370   else
1371     {
1372       struct iv_use *pre;
1373       do
1374         {
1375           pre = group;
1376           group = group->next;
1377         }
1378       while (group && addr_offset > group->addr_offset);
1379       use->next = pre->next;
1380       pre->next = use;
1381     }
1382
1383   return use;
1384 }
1385
1386 /* Checks whether OP is a loop-level invariant and if so, records it.
1387    NONLINEAR_USE is true if the invariant is used in a way we do not
1388    handle specially.  */
1389
1390 static void
1391 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1392 {
1393   basic_block bb;
1394   struct version_info *info;
1395
1396   if (TREE_CODE (op) != SSA_NAME
1397       || virtual_operand_p (op))
1398     return;
1399
1400   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1401   if (bb
1402       && flow_bb_inside_loop_p (data->current_loop, bb))
1403     return;
1404
1405   info = name_info (data, op);
1406   info->name = op;
1407   info->has_nonlin_use |= nonlinear_use;
1408   if (!info->inv_id)
1409     info->inv_id = ++data->max_inv_id;
1410   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1411 }
1412
1413 /* Checks whether the use OP is interesting and if so, records it.  */
1414
1415 static struct iv_use *
1416 find_interesting_uses_op (struct ivopts_data *data, tree op)
1417 {
1418   struct iv *iv;
1419   gimple stmt;
1420   struct iv_use *use;
1421
1422   if (TREE_CODE (op) != SSA_NAME)
1423     return NULL;
1424
1425   iv = get_iv (data, op);
1426   if (!iv)
1427     return NULL;
1428
1429   if (iv->have_use_for)
1430     {
1431       use = iv_use (data, iv->use_id);
1432
1433       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1434       return use;
1435     }
1436
1437   if (integer_zerop (iv->step))
1438     {
1439       record_invariant (data, op, true);
1440       return NULL;
1441     }
1442   iv->have_use_for = true;
1443
1444   stmt = SSA_NAME_DEF_STMT (op);
1445   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1446               || is_gimple_assign (stmt));
1447
1448   use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1449   iv->use_id = use->id;
1450
1451   return use;
1452 }
1453
1454 /* Given a condition in statement STMT, checks whether it is a compare
1455    of an induction variable and an invariant.  If this is the case,
1456    CONTROL_VAR is set to location of the iv, BOUND to the location of
1457    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1458    induction variable descriptions, and true is returned.  If this is not
1459    the case, CONTROL_VAR and BOUND are set to the arguments of the
1460    condition and false is returned.  */
1461
1462 static bool
1463 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1464                        tree **control_var, tree **bound,
1465                        struct iv **iv_var, struct iv **iv_bound)
1466 {
1467   /* The objects returned when COND has constant operands.  */
1468   static struct iv const_iv;
1469   static tree zero;
1470   tree *op0 = &zero, *op1 = &zero;
1471   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1472   bool ret = false;
1473
1474   if (gimple_code (stmt) == GIMPLE_COND)
1475     {
1476       gcond *cond_stmt = as_a <gcond *> (stmt);
1477       op0 = gimple_cond_lhs_ptr (cond_stmt);
1478       op1 = gimple_cond_rhs_ptr (cond_stmt);
1479     }
1480   else
1481     {
1482       op0 = gimple_assign_rhs1_ptr (stmt);
1483       op1 = gimple_assign_rhs2_ptr (stmt);
1484     }
1485
1486   zero = integer_zero_node;
1487   const_iv.step = integer_zero_node;
1488
1489   if (TREE_CODE (*op0) == SSA_NAME)
1490     iv0 = get_iv (data, *op0);
1491   if (TREE_CODE (*op1) == SSA_NAME)
1492     iv1 = get_iv (data, *op1);
1493
1494   /* Exactly one of the compared values must be an iv, and the other one must
1495      be an invariant.  */
1496   if (!iv0 || !iv1)
1497     goto end;
1498
1499   if (integer_zerop (iv0->step))
1500     {
1501       /* Control variable may be on the other side.  */
1502       std::swap (op0, op1);
1503       std::swap (iv0, iv1);
1504     }
1505   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1506
1507 end:
1508   if (control_var)
1509     *control_var = op0;
1510   if (iv_var)
1511     *iv_var = iv0;
1512   if (bound)
1513     *bound = op1;
1514   if (iv_bound)
1515     *iv_bound = iv1;
1516
1517   return ret;
1518 }
1519
1520 /* Checks whether the condition in STMT is interesting and if so,
1521    records it.  */
1522
1523 static void
1524 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1525 {
1526   tree *var_p, *bound_p;
1527   struct iv *var_iv;
1528
1529   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1530     {
1531       find_interesting_uses_op (data, *var_p);
1532       find_interesting_uses_op (data, *bound_p);
1533       return;
1534     }
1535
1536   record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1537 }
1538
1539 /* Returns the outermost loop EXPR is obviously invariant in
1540    relative to the loop LOOP, i.e. if all its operands are defined
1541    outside of the returned loop.  Returns NULL if EXPR is not
1542    even obviously invariant in LOOP.  */
1543
1544 struct loop *
1545 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1546 {
1547   basic_block def_bb;
1548   unsigned i, len;
1549
1550   if (is_gimple_min_invariant (expr))
1551     return current_loops->tree_root;
1552
1553   if (TREE_CODE (expr) == SSA_NAME)
1554     {
1555       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1556       if (def_bb)
1557         {
1558           if (flow_bb_inside_loop_p (loop, def_bb))
1559             return NULL;
1560           return superloop_at_depth (loop,
1561                                      loop_depth (def_bb->loop_father) + 1);
1562         }
1563
1564       return current_loops->tree_root;
1565     }
1566
1567   if (!EXPR_P (expr))
1568     return NULL;
1569
1570   unsigned maxdepth = 0;
1571   len = TREE_OPERAND_LENGTH (expr);
1572   for (i = 0; i < len; i++)
1573     {
1574       struct loop *ivloop;
1575       if (!TREE_OPERAND (expr, i))
1576         continue;
1577
1578       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1579       if (!ivloop)
1580         return NULL;
1581       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1582     }
1583
1584   return superloop_at_depth (loop, maxdepth);
1585 }
1586
1587 /* Returns true if expression EXPR is obviously invariant in LOOP,
1588    i.e. if all its operands are defined outside of the LOOP.  LOOP
1589    should not be the function body.  */
1590
1591 bool
1592 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1593 {
1594   basic_block def_bb;
1595   unsigned i, len;
1596
1597   gcc_assert (loop_depth (loop) > 0);
1598
1599   if (is_gimple_min_invariant (expr))
1600     return true;
1601
1602   if (TREE_CODE (expr) == SSA_NAME)
1603     {
1604       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1605       if (def_bb
1606           && flow_bb_inside_loop_p (loop, def_bb))
1607         return false;
1608
1609       return true;
1610     }
1611
1612   if (!EXPR_P (expr))
1613     return false;
1614
1615   len = TREE_OPERAND_LENGTH (expr);
1616   for (i = 0; i < len; i++)
1617     if (TREE_OPERAND (expr, i)
1618         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1619       return false;
1620
1621   return true;
1622 }
1623
1624 /* Cumulates the steps of indices into DATA and replaces their values with the
1625    initial ones.  Returns false when the value of the index cannot be determined.
1626    Callback for for_each_index.  */
1627
1628 struct ifs_ivopts_data
1629 {
1630   struct ivopts_data *ivopts_data;
1631   gimple stmt;
1632   tree step;
1633 };
1634
1635 static bool
1636 idx_find_step (tree base, tree *idx, void *data)
1637 {
1638   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1639   struct iv *iv;
1640   bool use_overflow_semantics = false;
1641   tree step, iv_base, iv_step, lbound, off;
1642   struct loop *loop = dta->ivopts_data->current_loop;
1643
1644   /* If base is a component ref, require that the offset of the reference
1645      be invariant.  */
1646   if (TREE_CODE (base) == COMPONENT_REF)
1647     {
1648       off = component_ref_field_offset (base);
1649       return expr_invariant_in_loop_p (loop, off);
1650     }
1651
1652   /* If base is array, first check whether we will be able to move the
1653      reference out of the loop (in order to take its address in strength
1654      reduction).  In order for this to work we need both lower bound
1655      and step to be loop invariants.  */
1656   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1657     {
1658       /* Moreover, for a range, the size needs to be invariant as well.  */
1659       if (TREE_CODE (base) == ARRAY_RANGE_REF
1660           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1661         return false;
1662
1663       step = array_ref_element_size (base);
1664       lbound = array_ref_low_bound (base);
1665
1666       if (!expr_invariant_in_loop_p (loop, step)
1667           || !expr_invariant_in_loop_p (loop, lbound))
1668         return false;
1669     }
1670
1671   if (TREE_CODE (*idx) != SSA_NAME)
1672     return true;
1673
1674   iv = get_iv (dta->ivopts_data, *idx);
1675   if (!iv)
1676     return false;
1677
1678   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1679           *&x[0], which is not folded and does not trigger the
1680           ARRAY_REF path below.  */
1681   *idx = iv->base;
1682
1683   if (integer_zerop (iv->step))
1684     return true;
1685
1686   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1687     {
1688       step = array_ref_element_size (base);
1689
1690       /* We only handle addresses whose step is an integer constant.  */
1691       if (TREE_CODE (step) != INTEGER_CST)
1692         return false;
1693     }
1694   else
1695     /* The step for pointer arithmetics already is 1 byte.  */
1696     step = size_one_node;
1697
1698   iv_base = iv->base;
1699   iv_step = iv->step;
1700   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1701     use_overflow_semantics = true;
1702
1703   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1704                             sizetype, &iv_base, &iv_step, dta->stmt,
1705                             use_overflow_semantics))
1706     {
1707       /* The index might wrap.  */
1708       return false;
1709     }
1710
1711   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1712   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1713
1714   return true;
1715 }
1716
1717 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1718    object is passed to it in DATA.  */
1719
1720 static bool
1721 idx_record_use (tree base, tree *idx,
1722                 void *vdata)
1723 {
1724   struct ivopts_data *data = (struct ivopts_data *) vdata;
1725   find_interesting_uses_op (data, *idx);
1726   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1727     {
1728       find_interesting_uses_op (data, array_ref_element_size (base));
1729       find_interesting_uses_op (data, array_ref_low_bound (base));
1730     }
1731   return true;
1732 }
1733
1734 /* If we can prove that TOP = cst * BOT for some constant cst,
1735    store cst to MUL and return true.  Otherwise return false.
1736    The returned value is always sign-extended, regardless of the
1737    signedness of TOP and BOT.  */
1738
1739 static bool
1740 constant_multiple_of (tree top, tree bot, widest_int *mul)
1741 {
1742   tree mby;
1743   enum tree_code code;
1744   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1745   widest_int res, p0, p1;
1746
1747   STRIP_NOPS (top);
1748   STRIP_NOPS (bot);
1749
1750   if (operand_equal_p (top, bot, 0))
1751     {
1752       *mul = 1;
1753       return true;
1754     }
1755
1756   code = TREE_CODE (top);
1757   switch (code)
1758     {
1759     case MULT_EXPR:
1760       mby = TREE_OPERAND (top, 1);
1761       if (TREE_CODE (mby) != INTEGER_CST)
1762         return false;
1763
1764       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1765         return false;
1766
1767       *mul = wi::sext (res * wi::to_widest (mby), precision);
1768       return true;
1769
1770     case PLUS_EXPR:
1771     case MINUS_EXPR:
1772       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1773           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1774         return false;
1775
1776       if (code == MINUS_EXPR)
1777         p1 = -p1;
1778       *mul = wi::sext (p0 + p1, precision);
1779       return true;
1780
1781     case INTEGER_CST:
1782       if (TREE_CODE (bot) != INTEGER_CST)
1783         return false;
1784
1785       p0 = widest_int::from (top, SIGNED);
1786       p1 = widest_int::from (bot, SIGNED);
1787       if (p1 == 0)
1788         return false;
1789       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1790       return res == 0;
1791
1792     default:
1793       return false;
1794     }
1795 }
1796
1797 /* Return true if memory reference REF with step STEP may be unaligned.  */
1798
1799 static bool
1800 may_be_unaligned_p (tree ref, tree step)
1801 {
1802   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1803      thus they are not misaligned.  */
1804   if (TREE_CODE (ref) == TARGET_MEM_REF)
1805     return false;
1806
1807   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1808   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1809     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1810
1811   unsigned HOST_WIDE_INT bitpos;
1812   unsigned int ref_align;
1813   get_object_alignment_1 (ref, &ref_align, &bitpos);
1814   if (ref_align < align
1815       || (bitpos % align) != 0
1816       || (bitpos % BITS_PER_UNIT) != 0)
1817     return true;
1818
1819   unsigned int trailing_zeros = tree_ctz (step);
1820   if (trailing_zeros < HOST_BITS_PER_INT
1821       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1822     return true;
1823
1824   return false;
1825 }
1826
1827 /* Return true if EXPR may be non-addressable.   */
1828
1829 bool
1830 may_be_nonaddressable_p (tree expr)
1831 {
1832   switch (TREE_CODE (expr))
1833     {
1834     case TARGET_MEM_REF:
1835       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1836          target, thus they are always addressable.  */
1837       return false;
1838
1839     case COMPONENT_REF:
1840       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1841              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1842
1843     case VIEW_CONVERT_EXPR:
1844       /* This kind of view-conversions may wrap non-addressable objects
1845          and make them look addressable.  After some processing the
1846          non-addressability may be uncovered again, causing ADDR_EXPRs
1847          of inappropriate objects to be built.  */
1848       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1849           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1850         return true;
1851
1852       /* ... fall through ... */
1853
1854     case ARRAY_REF:
1855     case ARRAY_RANGE_REF:
1856       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1857
1858     CASE_CONVERT:
1859       return true;
1860
1861     default:
1862       break;
1863     }
1864
1865   return false;
1866 }
1867
1868 static tree
1869 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1870
1871 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1872    If there is an existing use which has same stripped iv base and step,
1873    this function records this one as a sub use to that; otherwise records
1874    it as a normal one.  */
1875
1876 static struct iv_use *
1877 record_group_use (struct ivopts_data *data, tree *use_p,
1878                   struct iv *iv, gimple stmt, enum use_type use_type)
1879 {
1880   unsigned int i;
1881   struct iv_use *use;
1882   tree addr_base;
1883   unsigned HOST_WIDE_INT addr_offset;
1884
1885   /* Only support sub use for address type uses, that is, with base
1886      object.  */
1887   if (!iv->base_object)
1888     return record_use (data, use_p, iv, stmt, use_type);
1889
1890   addr_base = strip_offset (iv->base, &addr_offset);
1891   for (i = 0; i < n_iv_uses (data); i++)
1892     {
1893       use = iv_use (data, i);
1894       if (use->type != USE_ADDRESS || !use->iv->base_object)
1895         continue;
1896
1897       /* Check if it has the same stripped base and step.  */
1898       if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1899           && operand_equal_p (iv->step, use->iv->step, 0)
1900           && operand_equal_p (addr_base, use->addr_base, 0))
1901         break;
1902     }
1903
1904   if (i == n_iv_uses (data))
1905     return record_use (data, use_p, iv, stmt,
1906                        use_type, addr_base, addr_offset);
1907   else
1908     return record_sub_use (data, use_p, iv, stmt,
1909                            use_type, addr_base, addr_offset, i);
1910 }
1911
1912 /* Finds addresses in *OP_P inside STMT.  */
1913
1914 static void
1915 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1916 {
1917   tree base = *op_p, step = size_zero_node;
1918   struct iv *civ;
1919   struct ifs_ivopts_data ifs_ivopts_data;
1920
1921   /* Do not play with volatile memory references.  A bit too conservative,
1922      perhaps, but safe.  */
1923   if (gimple_has_volatile_ops (stmt))
1924     goto fail;
1925
1926   /* Ignore bitfields for now.  Not really something terribly complicated
1927      to handle.  TODO.  */
1928   if (TREE_CODE (base) == BIT_FIELD_REF)
1929     goto fail;
1930
1931   base = unshare_expr (base);
1932
1933   if (TREE_CODE (base) == TARGET_MEM_REF)
1934     {
1935       tree type = build_pointer_type (TREE_TYPE (base));
1936       tree astep;
1937
1938       if (TMR_BASE (base)
1939           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1940         {
1941           civ = get_iv (data, TMR_BASE (base));
1942           if (!civ)
1943             goto fail;
1944
1945           TMR_BASE (base) = civ->base;
1946           step = civ->step;
1947         }
1948       if (TMR_INDEX2 (base)
1949           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1950         {
1951           civ = get_iv (data, TMR_INDEX2 (base));
1952           if (!civ)
1953             goto fail;
1954
1955           TMR_INDEX2 (base) = civ->base;
1956           step = civ->step;
1957         }
1958       if (TMR_INDEX (base)
1959           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1960         {
1961           civ = get_iv (data, TMR_INDEX (base));
1962           if (!civ)
1963             goto fail;
1964
1965           TMR_INDEX (base) = civ->base;
1966           astep = civ->step;
1967
1968           if (astep)
1969             {
1970               if (TMR_STEP (base))
1971                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1972
1973               step = fold_build2 (PLUS_EXPR, type, step, astep);
1974             }
1975         }
1976
1977       if (integer_zerop (step))
1978         goto fail;
1979       base = tree_mem_ref_addr (type, base);
1980     }
1981   else
1982     {
1983       ifs_ivopts_data.ivopts_data = data;
1984       ifs_ivopts_data.stmt = stmt;
1985       ifs_ivopts_data.step = size_zero_node;
1986       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1987           || integer_zerop (ifs_ivopts_data.step))
1988         goto fail;
1989       step = ifs_ivopts_data.step;
1990
1991       /* Check that the base expression is addressable.  This needs
1992          to be done after substituting bases of IVs into it.  */
1993       if (may_be_nonaddressable_p (base))
1994         goto fail;
1995
1996       /* Moreover, on strict alignment platforms, check that it is
1997          sufficiently aligned.  */
1998       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1999         goto fail;
2000
2001       base = build_fold_addr_expr (base);
2002
2003       /* Substituting bases of IVs into the base expression might
2004          have caused folding opportunities.  */
2005       if (TREE_CODE (base) == ADDR_EXPR)
2006         {
2007           tree *ref = &TREE_OPERAND (base, 0);
2008           while (handled_component_p (*ref))
2009             ref = &TREE_OPERAND (*ref, 0);
2010           if (TREE_CODE (*ref) == MEM_REF)
2011             {
2012               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2013                                       TREE_OPERAND (*ref, 0),
2014                                       TREE_OPERAND (*ref, 1));
2015               if (tem)
2016                 *ref = tem;
2017             }
2018         }
2019     }
2020
2021   civ = alloc_iv (data, base, step);
2022   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2023   return;
2024
2025 fail:
2026   for_each_index (op_p, idx_record_use, data);
2027 }
2028
2029 /* Finds and records invariants used in STMT.  */
2030
2031 static void
2032 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2033 {
2034   ssa_op_iter iter;
2035   use_operand_p use_p;
2036   tree op;
2037
2038   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2039     {
2040       op = USE_FROM_PTR (use_p);
2041       record_invariant (data, op, false);
2042     }
2043 }
2044
2045 /* Finds interesting uses of induction variables in the statement STMT.  */
2046
2047 static void
2048 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2049 {
2050   struct iv *iv;
2051   tree op, *lhs, *rhs;
2052   ssa_op_iter iter;
2053   use_operand_p use_p;
2054   enum tree_code code;
2055
2056   find_invariants_stmt (data, stmt);
2057
2058   if (gimple_code (stmt) == GIMPLE_COND)
2059     {
2060       find_interesting_uses_cond (data, stmt);
2061       return;
2062     }
2063
2064   if (is_gimple_assign (stmt))
2065     {
2066       lhs = gimple_assign_lhs_ptr (stmt);
2067       rhs = gimple_assign_rhs1_ptr (stmt);
2068
2069       if (TREE_CODE (*lhs) == SSA_NAME)
2070         {
2071           /* If the statement defines an induction variable, the uses are not
2072              interesting by themselves.  */
2073
2074           iv = get_iv (data, *lhs);
2075
2076           if (iv && !integer_zerop (iv->step))
2077             return;
2078         }
2079
2080       code = gimple_assign_rhs_code (stmt);
2081       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2082           && (REFERENCE_CLASS_P (*rhs)
2083               || is_gimple_val (*rhs)))
2084         {
2085           if (REFERENCE_CLASS_P (*rhs))
2086             find_interesting_uses_address (data, stmt, rhs);
2087           else
2088             find_interesting_uses_op (data, *rhs);
2089
2090           if (REFERENCE_CLASS_P (*lhs))
2091             find_interesting_uses_address (data, stmt, lhs);
2092           return;
2093         }
2094       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2095         {
2096           find_interesting_uses_cond (data, stmt);
2097           return;
2098         }
2099
2100       /* TODO -- we should also handle address uses of type
2101
2102          memory = call (whatever);
2103
2104          and
2105
2106          call (memory).  */
2107     }
2108
2109   if (gimple_code (stmt) == GIMPLE_PHI
2110       && gimple_bb (stmt) == data->current_loop->header)
2111     {
2112       iv = get_iv (data, PHI_RESULT (stmt));
2113
2114       if (iv && !integer_zerop (iv->step))
2115         return;
2116     }
2117
2118   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2119     {
2120       op = USE_FROM_PTR (use_p);
2121
2122       if (TREE_CODE (op) != SSA_NAME)
2123         continue;
2124
2125       iv = get_iv (data, op);
2126       if (!iv)
2127         continue;
2128
2129       find_interesting_uses_op (data, op);
2130     }
2131 }
2132
2133 /* Finds interesting uses of induction variables outside of loops
2134    on loop exit edge EXIT.  */
2135
2136 static void
2137 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2138 {
2139   gphi *phi;
2140   gphi_iterator psi;
2141   tree def;
2142
2143   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2144     {
2145       phi = psi.phi ();
2146       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2147       if (!virtual_operand_p (def))
2148         find_interesting_uses_op (data, def);
2149     }
2150 }
2151
2152 /* Finds uses of the induction variables that are interesting.  */
2153
2154 static void
2155 find_interesting_uses (struct ivopts_data *data)
2156 {
2157   basic_block bb;
2158   gimple_stmt_iterator bsi;
2159   basic_block *body = get_loop_body (data->current_loop);
2160   unsigned i;
2161   struct version_info *info;
2162   edge e;
2163
2164   if (dump_file && (dump_flags & TDF_DETAILS))
2165     fprintf (dump_file, "Uses:\n\n");
2166
2167   for (i = 0; i < data->current_loop->num_nodes; i++)
2168     {
2169       edge_iterator ei;
2170       bb = body[i];
2171
2172       FOR_EACH_EDGE (e, ei, bb->succs)
2173         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2174             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2175           find_interesting_uses_outside (data, e);
2176
2177       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2178         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2179       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2180         if (!is_gimple_debug (gsi_stmt (bsi)))
2181           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2182     }
2183
2184   if (dump_file && (dump_flags & TDF_DETAILS))
2185     {
2186       bitmap_iterator bi;
2187
2188       fprintf (dump_file, "\n");
2189
2190       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2191         {
2192           info = ver_info (data, i);
2193           if (info->inv_id)
2194             {
2195               fprintf (dump_file, "  ");
2196               print_generic_expr (dump_file, info->name, TDF_SLIM);
2197               fprintf (dump_file, " is invariant (%d)%s\n",
2198                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2199             }
2200         }
2201
2202       fprintf (dump_file, "\n");
2203     }
2204
2205   free (body);
2206 }
2207
2208 /* Compute maximum offset of [base + offset] addressing mode
2209    for memory reference represented by USE.  */
2210
2211 static HOST_WIDE_INT
2212 compute_max_addr_offset (struct iv_use *use)
2213 {
2214   int width;
2215   rtx reg, addr;
2216   HOST_WIDE_INT i, off;
2217   unsigned list_index, num;
2218   addr_space_t as;
2219   machine_mode mem_mode, addr_mode;
2220   static vec<HOST_WIDE_INT> max_offset_list;
2221
2222   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2223   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2224
2225   num = max_offset_list.length ();
2226   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2227   if (list_index >= num)
2228     {
2229       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2230       for (; num < max_offset_list.length (); num++)
2231         max_offset_list[num] = -1;
2232     }
2233
2234   off = max_offset_list[list_index];
2235   if (off != -1)
2236     return off;
2237
2238   addr_mode = targetm.addr_space.address_mode (as);
2239   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2240   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2241
2242   width = GET_MODE_BITSIZE (addr_mode) - 1;
2243   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2244     width = HOST_BITS_PER_WIDE_INT - 1;
2245
2246   for (i = width; i > 0; i--)
2247     {
2248       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2249       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2250       if (memory_address_addr_space_p (mem_mode, addr, as))
2251         break;
2252
2253       /* For some strict-alignment targets, the offset must be naturally
2254          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2255       off = ((unsigned HOST_WIDE_INT) 1 << i);
2256       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2257         {
2258           off -= GET_MODE_SIZE (mem_mode);
2259           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2260           if (memory_address_addr_space_p (mem_mode, addr, as))
2261             break;
2262         }
2263     }
2264   if (i == 0)
2265     off = 0;
2266
2267   max_offset_list[list_index] = off;
2268   return off;
2269 }
2270
2271 /* Check if all small groups should be split.  Return true if and
2272    only if:
2273
2274      1) At least one groups contain two uses with different offsets.
2275      2) No group contains more than two uses with different offsets.
2276
2277    Return false otherwise.  We want to split such groups because:
2278
2279      1) Small groups don't have much benefit and may interfer with
2280         general candidate selection.
2281      2) Size for problem with only small groups is usually small and
2282         general algorithm can handle it well.
2283
2284    TODO -- Above claim may not hold when auto increment is supported.  */
2285
2286 static bool
2287 split_all_small_groups (struct ivopts_data *data)
2288 {
2289   bool split_p = false;
2290   unsigned int i, n, distinct;
2291   struct iv_use *pre, *use;
2292
2293   n = n_iv_uses (data);
2294   for (i = 0; i < n; i++)
2295     {
2296       use = iv_use (data, i);
2297       if (!use->next)
2298         continue;
2299
2300       distinct = 1;
2301       gcc_assert (use->type == USE_ADDRESS);
2302       for (pre = use, use = use->next; use; pre = use, use = use->next)
2303         {
2304           if (pre->addr_offset != use->addr_offset)
2305             distinct++;
2306
2307           if (distinct > 2)
2308             return false;
2309         }
2310       if (distinct == 2)
2311         split_p = true;
2312     }
2313
2314   return split_p;
2315 }
2316
2317 /* For each group of address type uses, this function further groups
2318    these uses according to the maximum offset supported by target's
2319    [base + offset] addressing mode.  */
2320
2321 static void
2322 group_address_uses (struct ivopts_data *data)
2323 {
2324   HOST_WIDE_INT max_offset = -1;
2325   unsigned int i, n, sub_id;
2326   struct iv_use *pre, *use;
2327   unsigned HOST_WIDE_INT addr_offset_first;
2328
2329   /* Reset max offset to split all small groups.  */
2330   if (split_all_small_groups (data))
2331     max_offset = 0;
2332
2333   n = n_iv_uses (data);
2334   for (i = 0; i < n; i++)
2335     {
2336       use = iv_use (data, i);
2337       if (!use->next)
2338         continue;
2339
2340       gcc_assert (use->type == USE_ADDRESS);
2341       if (max_offset != 0)
2342         max_offset = compute_max_addr_offset (use);
2343
2344       while (use)
2345         {
2346           sub_id = 0;
2347           addr_offset_first = use->addr_offset;
2348           /* Only uses with offset that can fit in offset part against
2349              the first use can be grouped together.  */
2350           for (pre = use, use = use->next;
2351                use && (use->addr_offset - addr_offset_first
2352                        <= (unsigned HOST_WIDE_INT) max_offset);
2353                pre = use, use = use->next)
2354             {
2355               use->id = pre->id;
2356               use->sub_id = ++sub_id;
2357             }
2358
2359           /* Break the list and create new group.  */
2360           if (use)
2361             {
2362               pre->next = NULL;
2363               use->id = n_iv_uses (data);
2364               use->related_cands = BITMAP_ALLOC (NULL);
2365               data->iv_uses.safe_push (use);
2366             }
2367         }
2368     }
2369
2370   if (dump_file && (dump_flags & TDF_DETAILS))
2371     dump_uses (dump_file, data);
2372 }
2373
2374 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2375    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2376    we are at the top-level of the processed address.  */
2377
2378 static tree
2379 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2380                 HOST_WIDE_INT *offset)
2381 {
2382   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2383   enum tree_code code;
2384   tree type, orig_type = TREE_TYPE (expr);
2385   HOST_WIDE_INT off0, off1, st;
2386   tree orig_expr = expr;
2387
2388   STRIP_NOPS (expr);
2389
2390   type = TREE_TYPE (expr);
2391   code = TREE_CODE (expr);
2392   *offset = 0;
2393
2394   switch (code)
2395     {
2396     case INTEGER_CST:
2397       if (!cst_and_fits_in_hwi (expr)
2398           || integer_zerop (expr))
2399         return orig_expr;
2400
2401       *offset = int_cst_value (expr);
2402       return build_int_cst (orig_type, 0);
2403
2404     case POINTER_PLUS_EXPR:
2405     case PLUS_EXPR:
2406     case MINUS_EXPR:
2407       op0 = TREE_OPERAND (expr, 0);
2408       op1 = TREE_OPERAND (expr, 1);
2409
2410       op0 = strip_offset_1 (op0, false, false, &off0);
2411       op1 = strip_offset_1 (op1, false, false, &off1);
2412
2413       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2414       if (op0 == TREE_OPERAND (expr, 0)
2415           && op1 == TREE_OPERAND (expr, 1))
2416         return orig_expr;
2417
2418       if (integer_zerop (op1))
2419         expr = op0;
2420       else if (integer_zerop (op0))
2421         {
2422           if (code == MINUS_EXPR)
2423             expr = fold_build1 (NEGATE_EXPR, type, op1);
2424           else
2425             expr = op1;
2426         }
2427       else
2428         expr = fold_build2 (code, type, op0, op1);
2429
2430       return fold_convert (orig_type, expr);
2431
2432     case MULT_EXPR:
2433       op1 = TREE_OPERAND (expr, 1);
2434       if (!cst_and_fits_in_hwi (op1))
2435         return orig_expr;
2436
2437       op0 = TREE_OPERAND (expr, 0);
2438       op0 = strip_offset_1 (op0, false, false, &off0);
2439       if (op0 == TREE_OPERAND (expr, 0))
2440         return orig_expr;
2441
2442       *offset = off0 * int_cst_value (op1);
2443       if (integer_zerop (op0))
2444         expr = op0;
2445       else
2446         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2447
2448       return fold_convert (orig_type, expr);
2449
2450     case ARRAY_REF:
2451     case ARRAY_RANGE_REF:
2452       if (!inside_addr)
2453         return orig_expr;
2454
2455       step = array_ref_element_size (expr);
2456       if (!cst_and_fits_in_hwi (step))
2457         break;
2458
2459       st = int_cst_value (step);
2460       op1 = TREE_OPERAND (expr, 1);
2461       op1 = strip_offset_1 (op1, false, false, &off1);
2462       *offset = off1 * st;
2463
2464       if (top_compref
2465           && integer_zerop (op1))
2466         {
2467           /* Strip the component reference completely.  */
2468           op0 = TREE_OPERAND (expr, 0);
2469           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2470           *offset += off0;
2471           return op0;
2472         }
2473       break;
2474
2475     case COMPONENT_REF:
2476       {
2477         tree field;
2478
2479         if (!inside_addr)
2480           return orig_expr;
2481
2482         tmp = component_ref_field_offset (expr);
2483         field = TREE_OPERAND (expr, 1);
2484         if (top_compref
2485             && cst_and_fits_in_hwi (tmp)
2486             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2487           {
2488             HOST_WIDE_INT boffset, abs_off;
2489
2490             /* Strip the component reference completely.  */
2491             op0 = TREE_OPERAND (expr, 0);
2492             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2493             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2494             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2495             if (boffset < 0)
2496               abs_off = -abs_off;
2497
2498             *offset = off0 + int_cst_value (tmp) + abs_off;
2499             return op0;
2500           }
2501       }
2502       break;
2503
2504     case ADDR_EXPR:
2505       op0 = TREE_OPERAND (expr, 0);
2506       op0 = strip_offset_1 (op0, true, true, &off0);
2507       *offset += off0;
2508
2509       if (op0 == TREE_OPERAND (expr, 0))
2510         return orig_expr;
2511
2512       expr = build_fold_addr_expr (op0);
2513       return fold_convert (orig_type, expr);
2514
2515     case MEM_REF:
2516       /* ???  Offset operand?  */
2517       inside_addr = false;
2518       break;
2519
2520     default:
2521       return orig_expr;
2522     }
2523
2524   /* Default handling of expressions for that we want to recurse into
2525      the first operand.  */
2526   op0 = TREE_OPERAND (expr, 0);
2527   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2528   *offset += off0;
2529
2530   if (op0 == TREE_OPERAND (expr, 0)
2531       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2532     return orig_expr;
2533
2534   expr = copy_node (expr);
2535   TREE_OPERAND (expr, 0) = op0;
2536   if (op1)
2537     TREE_OPERAND (expr, 1) = op1;
2538
2539   /* Inside address, we might strip the top level component references,
2540      thus changing type of the expression.  Handling of ADDR_EXPR
2541      will fix that.  */
2542   expr = fold_convert (orig_type, expr);
2543
2544   return expr;
2545 }
2546
2547 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2548
2549 static tree
2550 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2551 {
2552   HOST_WIDE_INT off;
2553   tree core = strip_offset_1 (expr, false, false, &off);
2554   *offset = off;
2555   return core;
2556 }
2557
2558 /* Returns variant of TYPE that can be used as base for different uses.
2559    We return unsigned type with the same precision, which avoids problems
2560    with overflows.  */
2561
2562 static tree
2563 generic_type_for (tree type)
2564 {
2565   if (POINTER_TYPE_P (type))
2566     return unsigned_type_for (type);
2567
2568   if (TYPE_UNSIGNED (type))
2569     return type;
2570
2571   return unsigned_type_for (type);
2572 }
2573
2574 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2575    the bitmap to that we should store it.  */
2576
2577 static struct ivopts_data *fd_ivopts_data;
2578 static tree
2579 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2580 {
2581   bitmap *depends_on = (bitmap *) data;
2582   struct version_info *info;
2583
2584   if (TREE_CODE (*expr_p) != SSA_NAME)
2585     return NULL_TREE;
2586   info = name_info (fd_ivopts_data, *expr_p);
2587
2588   if (!info->inv_id || info->has_nonlin_use)
2589     return NULL_TREE;
2590
2591   if (!*depends_on)
2592     *depends_on = BITMAP_ALLOC (NULL);
2593   bitmap_set_bit (*depends_on, info->inv_id);
2594
2595   return NULL_TREE;
2596 }
2597
2598 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2599    position to POS.  If USE is not NULL, the candidate is set as related to
2600    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2601    replacement of the final value of the iv by a direct computation.  */
2602
2603 static struct iv_cand *
2604 add_candidate_1 (struct ivopts_data *data,
2605                  tree base, tree step, bool important, enum iv_position pos,
2606                  struct iv_use *use, gimple incremented_at)
2607 {
2608   unsigned i;
2609   struct iv_cand *cand = NULL;
2610   tree type, orig_type;
2611
2612   /* For non-original variables, make sure their values are computed in a type
2613      that does not invoke undefined behavior on overflows (since in general,
2614      we cannot prove that these induction variables are non-wrapping).  */
2615   if (pos != IP_ORIGINAL)
2616     {
2617       orig_type = TREE_TYPE (base);
2618       type = generic_type_for (orig_type);
2619       if (type != orig_type)
2620         {
2621           base = fold_convert (type, base);
2622           step = fold_convert (type, step);
2623         }
2624     }
2625
2626   for (i = 0; i < n_iv_cands (data); i++)
2627     {
2628       cand = iv_cand (data, i);
2629
2630       if (cand->pos != pos)
2631         continue;
2632
2633       if (cand->incremented_at != incremented_at
2634           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2635               && cand->ainc_use != use))
2636         continue;
2637
2638       if (!cand->iv)
2639         {
2640           if (!base && !step)
2641             break;
2642
2643           continue;
2644         }
2645
2646       if (!base && !step)
2647         continue;
2648
2649       if (operand_equal_p (base, cand->iv->base, 0)
2650           && operand_equal_p (step, cand->iv->step, 0)
2651           && (TYPE_PRECISION (TREE_TYPE (base))
2652               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2653         break;
2654     }
2655
2656   if (i == n_iv_cands (data))
2657     {
2658       cand = XCNEW (struct iv_cand);
2659       cand->id = i;
2660
2661       if (!base && !step)
2662         cand->iv = NULL;
2663       else
2664         cand->iv = alloc_iv (data, base, step);
2665
2666       cand->pos = pos;
2667       if (pos != IP_ORIGINAL && cand->iv)
2668         {
2669           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2670           cand->var_after = cand->var_before;
2671         }
2672       cand->important = important;
2673       cand->incremented_at = incremented_at;
2674       data->iv_candidates.safe_push (cand);
2675
2676       if (step
2677           && TREE_CODE (step) != INTEGER_CST)
2678         {
2679           fd_ivopts_data = data;
2680           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2681         }
2682
2683       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2684         cand->ainc_use = use;
2685       else
2686         cand->ainc_use = NULL;
2687
2688       if (dump_file && (dump_flags & TDF_DETAILS))
2689         dump_cand (dump_file, cand);
2690     }
2691
2692   if (important && !cand->important)
2693     {
2694       cand->important = true;
2695       if (dump_file && (dump_flags & TDF_DETAILS))
2696         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2697     }
2698
2699   if (use)
2700     {
2701       bitmap_set_bit (use->related_cands, i);
2702       if (dump_file && (dump_flags & TDF_DETAILS))
2703         fprintf (dump_file, "Candidate %d is related to use %d\n",
2704                  cand->id, use->id);
2705     }
2706
2707   return cand;
2708 }
2709
2710 /* Returns true if incrementing the induction variable at the end of the LOOP
2711    is allowed.
2712
2713    The purpose is to avoid splitting latch edge with a biv increment, thus
2714    creating a jump, possibly confusing other optimization passes and leaving
2715    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2716    is not available (so we do not have a better alternative), or if the latch
2717    edge is already nonempty.  */
2718
2719 static bool
2720 allow_ip_end_pos_p (struct loop *loop)
2721 {
2722   if (!ip_normal_pos (loop))
2723     return true;
2724
2725   if (!empty_block_p (ip_end_pos (loop)))
2726     return true;
2727
2728   return false;
2729 }
2730
2731 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2732    Important field is set to IMPORTANT.  */
2733
2734 static void
2735 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2736                         bool important, struct iv_use *use)
2737 {
2738   basic_block use_bb = gimple_bb (use->stmt);
2739   machine_mode mem_mode;
2740   unsigned HOST_WIDE_INT cstepi;
2741
2742   /* If we insert the increment in any position other than the standard
2743      ones, we must ensure that it is incremented once per iteration.
2744      It must not be in an inner nested loop, or one side of an if
2745      statement.  */
2746   if (use_bb->loop_father != data->current_loop
2747       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2748       || stmt_could_throw_p (use->stmt)
2749       || !cst_and_fits_in_hwi (step))
2750     return;
2751
2752   cstepi = int_cst_value (step);
2753
2754   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2755   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2756         || USE_STORE_PRE_INCREMENT (mem_mode))
2757        && GET_MODE_SIZE (mem_mode) == cstepi)
2758       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2759            || USE_STORE_PRE_DECREMENT (mem_mode))
2760           && GET_MODE_SIZE (mem_mode) == -cstepi))
2761     {
2762       enum tree_code code = MINUS_EXPR;
2763       tree new_base;
2764       tree new_step = step;
2765
2766       if (POINTER_TYPE_P (TREE_TYPE (base)))
2767         {
2768           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2769           code = POINTER_PLUS_EXPR;
2770         }
2771       else
2772         new_step = fold_convert (TREE_TYPE (base), new_step);
2773       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2774       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2775                        use->stmt);
2776     }
2777   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2778         || USE_STORE_POST_INCREMENT (mem_mode))
2779        && GET_MODE_SIZE (mem_mode) == cstepi)
2780       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2781            || USE_STORE_POST_DECREMENT (mem_mode))
2782           && GET_MODE_SIZE (mem_mode) == -cstepi))
2783     {
2784       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2785                        use->stmt);
2786     }
2787 }
2788
2789 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2790    position to POS.  If USE is not NULL, the candidate is set as related to
2791    it.  The candidate computation is scheduled before exit condition and at
2792    the end of loop.  */
2793
2794 static void
2795 add_candidate (struct ivopts_data *data,
2796                tree base, tree step, bool important, struct iv_use *use)
2797 {
2798   gcc_assert (use == NULL || use->sub_id == 0);
2799
2800   if (ip_normal_pos (data->current_loop))
2801     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2802   if (ip_end_pos (data->current_loop)
2803       && allow_ip_end_pos_p (data->current_loop))
2804     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2805 }
2806
2807 /* Adds standard iv candidates.  */
2808
2809 static void
2810 add_standard_iv_candidates (struct ivopts_data *data)
2811 {
2812   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2813
2814   /* The same for a double-integer type if it is still fast enough.  */
2815   if (TYPE_PRECISION
2816         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2817       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2818     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2819                    build_int_cst (long_integer_type_node, 1), true, NULL);
2820
2821   /* The same for a double-integer type if it is still fast enough.  */
2822   if (TYPE_PRECISION
2823         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2824       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2825     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2826                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2827 }
2828
2829
2830 /* Adds candidates bases on the old induction variable IV.  */
2831
2832 static void
2833 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
2834 {
2835   gimple phi;
2836   tree def;
2837   struct iv_cand *cand;
2838
2839   add_candidate (data, iv->base, iv->step, true, NULL);
2840
2841   /* The same, but with initial value zero.  */
2842   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2843     add_candidate (data, size_int (0), iv->step, true, NULL);
2844   else
2845     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2846                    iv->step, true, NULL);
2847
2848   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2849   if (gimple_code (phi) == GIMPLE_PHI)
2850     {
2851       /* Additionally record the possibility of leaving the original iv
2852          untouched.  */
2853       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2854       /* Don't add candidate if it's from another PHI node because
2855          it's an affine iv appearing in the form of PEELED_CHREC.  */
2856       phi = SSA_NAME_DEF_STMT (def);
2857       if (gimple_code (phi) != GIMPLE_PHI)
2858         {
2859           cand = add_candidate_1 (data,
2860                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2861                                   SSA_NAME_DEF_STMT (def));
2862           cand->var_before = iv->ssa_name;
2863           cand->var_after = def;
2864         }
2865       else
2866         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2867     }
2868 }
2869
2870 /* Adds candidates based on the old induction variables.  */
2871
2872 static void
2873 add_iv_candidate_for_bivs (struct ivopts_data *data)
2874 {
2875   unsigned i;
2876   struct iv *iv;
2877   bitmap_iterator bi;
2878
2879   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2880     {
2881       iv = ver_info (data, i)->iv;
2882       if (iv && iv->biv_p && !integer_zerop (iv->step))
2883         add_iv_candidate_for_biv (data, iv);
2884     }
2885 }
2886
2887 /* Adds candidates based on the value of USE's iv.  */
2888
2889 static void
2890 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
2891 {
2892   unsigned HOST_WIDE_INT offset;
2893   tree base;
2894   tree basetype;
2895   struct iv *iv = use->iv;
2896
2897   add_candidate (data, iv->base, iv->step, false, use);
2898
2899   /* The same, but with initial value zero.  Make such variable important,
2900      since it is generic enough so that possibly many uses may be based
2901      on it.  */
2902   basetype = TREE_TYPE (iv->base);
2903   if (POINTER_TYPE_P (basetype))
2904     basetype = sizetype;
2905   add_candidate (data, build_int_cst (basetype, 0), iv->step, true, use);
2906
2907   /* Third, try removing the constant offset.  Make sure to even
2908      add a candidate for &a[0] vs. (T *)&a.  */
2909   base = strip_offset (iv->base, &offset);
2910   if (offset || base != iv->base)
2911     add_candidate (data, base, iv->step, false, use);
2912
2913   /* At last, add auto-incremental candidates.  Make such variables
2914      important since other iv uses with same base object may be based
2915      on it.  */
2916   if (use != NULL && use->type == USE_ADDRESS)
2917     add_autoinc_candidates (data, iv->base, iv->step, true, use);
2918 }
2919
2920 /* Adds candidates based on the uses.  */
2921
2922 static void
2923 add_iv_candidate_for_uses (struct ivopts_data *data)
2924 {
2925   unsigned i;
2926
2927   for (i = 0; i < n_iv_uses (data); i++)
2928     {
2929       struct iv_use *use = iv_use (data, i);
2930
2931       if (!use)
2932         continue;
2933
2934       switch (use->type)
2935         {
2936         case USE_NONLINEAR_EXPR:
2937         case USE_COMPARE:
2938         case USE_ADDRESS:
2939           /* Just add the ivs based on the value of the iv used here.  */
2940           add_iv_candidate_for_use (data, use);
2941           break;
2942
2943         default:
2944           gcc_unreachable ();
2945         }
2946     }
2947 }
2948
2949 /* Record important candidates and add them to related_cands bitmaps
2950    if needed.  */
2951
2952 static void
2953 record_important_candidates (struct ivopts_data *data)
2954 {
2955   unsigned i;
2956   struct iv_use *use;
2957
2958   for (i = 0; i < n_iv_cands (data); i++)
2959     {
2960       struct iv_cand *cand = iv_cand (data, i);
2961
2962       if (cand->important)
2963         bitmap_set_bit (data->important_candidates, i);
2964     }
2965
2966   data->consider_all_candidates = (n_iv_cands (data)
2967                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2968
2969   if (data->consider_all_candidates)
2970     {
2971       /* We will not need "related_cands" bitmaps in this case,
2972          so release them to decrease peak memory consumption.  */
2973       for (i = 0; i < n_iv_uses (data); i++)
2974         {
2975           use = iv_use (data, i);
2976           BITMAP_FREE (use->related_cands);
2977         }
2978     }
2979   else
2980     {
2981       /* Add important candidates to the related_cands bitmaps.  */
2982       for (i = 0; i < n_iv_uses (data); i++)
2983         bitmap_ior_into (iv_use (data, i)->related_cands,
2984                          data->important_candidates);
2985     }
2986 }
2987
2988 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2989    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2990    we allocate a simple list to every use.  */
2991
2992 static void
2993 alloc_use_cost_map (struct ivopts_data *data)
2994 {
2995   unsigned i, size, s;
2996
2997   for (i = 0; i < n_iv_uses (data); i++)
2998     {
2999       struct iv_use *use = iv_use (data, i);
3000
3001       if (data->consider_all_candidates)
3002         size = n_iv_cands (data);
3003       else
3004         {
3005           s = bitmap_count_bits (use->related_cands);
3006
3007           /* Round up to the power of two, so that moduling by it is fast.  */
3008           size = s ? (1 << ceil_log2 (s)) : 1;
3009         }
3010
3011       use->n_map_members = size;
3012       use->cost_map = XCNEWVEC (struct cost_pair, size);
3013     }
3014 }
3015
3016 /* Returns description of computation cost of expression whose runtime
3017    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
3018
3019 static comp_cost
3020 new_cost (unsigned runtime, unsigned complexity)
3021 {
3022   comp_cost cost;
3023
3024   cost.cost = runtime;
3025   cost.complexity = complexity;
3026
3027   return cost;
3028 }
3029
3030 /* Returns true if COST is infinite.  */
3031
3032 static bool
3033 infinite_cost_p (comp_cost cost)
3034 {
3035   return cost.cost == INFTY;
3036 }
3037
3038 /* Adds costs COST1 and COST2.  */
3039
3040 static comp_cost
3041 add_costs (comp_cost cost1, comp_cost cost2)
3042 {
3043   if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3044     return infinite_cost;
3045
3046   cost1.cost += cost2.cost;
3047   cost1.complexity += cost2.complexity;
3048
3049   return cost1;
3050 }
3051 /* Subtracts costs COST1 and COST2.  */
3052
3053 static comp_cost
3054 sub_costs (comp_cost cost1, comp_cost cost2)
3055 {
3056   cost1.cost -= cost2.cost;
3057   cost1.complexity -= cost2.complexity;
3058
3059   return cost1;
3060 }
3061
3062 /* Returns a negative number if COST1 < COST2, a positive number if
3063    COST1 > COST2, and 0 if COST1 = COST2.  */
3064
3065 static int
3066 compare_costs (comp_cost cost1, comp_cost cost2)
3067 {
3068   if (cost1.cost == cost2.cost)
3069     return cost1.complexity - cost2.complexity;
3070
3071   return cost1.cost - cost2.cost;
3072 }
3073
3074 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3075    on invariants DEPENDS_ON and that the value used in expressing it
3076    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3077
3078 static void
3079 set_use_iv_cost (struct ivopts_data *data,
3080                  struct iv_use *use, struct iv_cand *cand,
3081                  comp_cost cost, bitmap depends_on, tree value,
3082                  enum tree_code comp, int inv_expr_id)
3083 {
3084   unsigned i, s;
3085
3086   if (infinite_cost_p (cost))
3087     {
3088       BITMAP_FREE (depends_on);
3089       return;
3090     }
3091
3092   if (data->consider_all_candidates)
3093     {
3094       use->cost_map[cand->id].cand = cand;
3095       use->cost_map[cand->id].cost = cost;
3096       use->cost_map[cand->id].depends_on = depends_on;
3097       use->cost_map[cand->id].value = value;
3098       use->cost_map[cand->id].comp = comp;
3099       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3100       return;
3101     }
3102
3103   /* n_map_members is a power of two, so this computes modulo.  */
3104   s = cand->id & (use->n_map_members - 1);
3105   for (i = s; i < use->n_map_members; i++)
3106     if (!use->cost_map[i].cand)
3107       goto found;
3108   for (i = 0; i < s; i++)
3109     if (!use->cost_map[i].cand)
3110       goto found;
3111
3112   gcc_unreachable ();
3113
3114 found:
3115   use->cost_map[i].cand = cand;
3116   use->cost_map[i].cost = cost;
3117   use->cost_map[i].depends_on = depends_on;
3118   use->cost_map[i].value = value;
3119   use->cost_map[i].comp = comp;
3120   use->cost_map[i].inv_expr_id = inv_expr_id;
3121 }
3122
3123 /* Gets cost of (USE, CANDIDATE) pair.  */
3124
3125 static struct cost_pair *
3126 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3127                  struct iv_cand *cand)
3128 {
3129   unsigned i, s;
3130   struct cost_pair *ret;
3131
3132   if (!cand)
3133     return NULL;
3134
3135   if (data->consider_all_candidates)
3136     {
3137       ret = use->cost_map + cand->id;
3138       if (!ret->cand)
3139         return NULL;
3140
3141       return ret;
3142     }
3143
3144   /* n_map_members is a power of two, so this computes modulo.  */
3145   s = cand->id & (use->n_map_members - 1);
3146   for (i = s; i < use->n_map_members; i++)
3147     if (use->cost_map[i].cand == cand)
3148       return use->cost_map + i;
3149     else if (use->cost_map[i].cand == NULL)
3150       return NULL;
3151   for (i = 0; i < s; i++)
3152     if (use->cost_map[i].cand == cand)
3153       return use->cost_map + i;
3154     else if (use->cost_map[i].cand == NULL)
3155       return NULL;
3156
3157   return NULL;
3158 }
3159
3160 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3161 static rtx
3162 produce_memory_decl_rtl (tree obj, int *regno)
3163 {
3164   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3165   machine_mode address_mode = targetm.addr_space.address_mode (as);
3166   rtx x;
3167
3168   gcc_assert (obj);
3169   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3170     {
3171       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3172       x = gen_rtx_SYMBOL_REF (address_mode, name);
3173       SET_SYMBOL_REF_DECL (x, obj);
3174       x = gen_rtx_MEM (DECL_MODE (obj), x);
3175       set_mem_addr_space (x, as);
3176       targetm.encode_section_info (obj, x, true);
3177     }
3178   else
3179     {
3180       x = gen_raw_REG (address_mode, (*regno)++);
3181       x = gen_rtx_MEM (DECL_MODE (obj), x);
3182       set_mem_addr_space (x, as);
3183     }
3184
3185   return x;
3186 }
3187
3188 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3189    walk_tree.  DATA contains the actual fake register number.  */
3190
3191 static tree
3192 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3193 {
3194   tree obj = NULL_TREE;
3195   rtx x = NULL_RTX;
3196   int *regno = (int *) data;
3197
3198   switch (TREE_CODE (*expr_p))
3199     {
3200     case ADDR_EXPR:
3201       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3202            handled_component_p (*expr_p);
3203            expr_p = &TREE_OPERAND (*expr_p, 0))
3204         continue;
3205       obj = *expr_p;
3206       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3207         x = produce_memory_decl_rtl (obj, regno);
3208       break;
3209
3210     case SSA_NAME:
3211       *ws = 0;
3212       obj = SSA_NAME_VAR (*expr_p);
3213       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3214       if (!obj)
3215         return NULL_TREE;
3216       if (!DECL_RTL_SET_P (obj))
3217         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3218       break;
3219
3220     case VAR_DECL:
3221     case PARM_DECL:
3222     case RESULT_DECL:
3223       *ws = 0;
3224       obj = *expr_p;
3225
3226       if (DECL_RTL_SET_P (obj))
3227         break;
3228
3229       if (DECL_MODE (obj) == BLKmode)
3230         x = produce_memory_decl_rtl (obj, regno);
3231       else
3232         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3233
3234       break;
3235
3236     default:
3237       break;
3238     }
3239
3240   if (x)
3241     {
3242       decl_rtl_to_reset.safe_push (obj);
3243       SET_DECL_RTL (obj, x);
3244     }
3245
3246   return NULL_TREE;
3247 }
3248
3249 /* Determines cost of the computation of EXPR.  */
3250
3251 static unsigned
3252 computation_cost (tree expr, bool speed)
3253 {
3254   rtx_insn *seq;
3255   rtx rslt;
3256   tree type = TREE_TYPE (expr);
3257   unsigned cost;
3258   /* Avoid using hard regs in ways which may be unsupported.  */
3259   int regno = LAST_VIRTUAL_REGISTER + 1;
3260   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3261   enum node_frequency real_frequency = node->frequency;
3262
3263   node->frequency = NODE_FREQUENCY_NORMAL;
3264   crtl->maybe_hot_insn_p = speed;
3265   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3266   start_sequence ();
3267   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3268   seq = get_insns ();
3269   end_sequence ();
3270   default_rtl_profile ();
3271   node->frequency = real_frequency;
3272
3273   cost = seq_cost (seq, speed);
3274   if (MEM_P (rslt))
3275     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3276                           TYPE_ADDR_SPACE (type), speed);
3277   else if (!REG_P (rslt))
3278     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3279
3280   return cost;
3281 }
3282
3283 /* Returns variable containing the value of candidate CAND at statement AT.  */
3284
3285 static tree
3286 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3287 {
3288   if (stmt_after_increment (loop, cand, stmt))
3289     return cand->var_after;
3290   else
3291     return cand->var_before;
3292 }
3293
3294 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3295    same precision that is at least as wide as the precision of TYPE, stores
3296    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3297    type of A and B.  */
3298
3299 static tree
3300 determine_common_wider_type (tree *a, tree *b)
3301 {
3302   tree wider_type = NULL;
3303   tree suba, subb;
3304   tree atype = TREE_TYPE (*a);
3305
3306   if (CONVERT_EXPR_P (*a))
3307     {
3308       suba = TREE_OPERAND (*a, 0);
3309       wider_type = TREE_TYPE (suba);
3310       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3311         return atype;
3312     }
3313   else
3314     return atype;
3315
3316   if (CONVERT_EXPR_P (*b))
3317     {
3318       subb = TREE_OPERAND (*b, 0);
3319       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3320         return atype;
3321     }
3322   else
3323     return atype;
3324
3325   *a = suba;
3326   *b = subb;
3327   return wider_type;
3328 }
3329
3330 /* Determines the expression by that USE is expressed from induction variable
3331    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3332    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3333
3334 static bool
3335 get_computation_aff (struct loop *loop,
3336                      struct iv_use *use, struct iv_cand *cand, gimple at,
3337                      struct aff_tree *aff)
3338 {
3339   tree ubase = use->iv->base;
3340   tree ustep = use->iv->step;
3341   tree cbase = cand->iv->base;
3342   tree cstep = cand->iv->step, cstep_common;
3343   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3344   tree common_type, var;
3345   tree uutype;
3346   aff_tree cbase_aff, var_aff;
3347   widest_int rat;
3348
3349   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3350     {
3351       /* We do not have a precision to express the values of use.  */
3352       return false;
3353     }
3354
3355   var = var_at_stmt (loop, cand, at);
3356   uutype = unsigned_type_for (utype);
3357
3358   /* If the conversion is not noop, perform it.  */
3359   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3360     {
3361       cstep = fold_convert (uutype, cstep);
3362       cbase = fold_convert (uutype, cbase);
3363       var = fold_convert (uutype, var);
3364     }
3365
3366   if (!constant_multiple_of (ustep, cstep, &rat))
3367     return false;
3368
3369   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3370      type, we achieve better folding by computing their difference in this
3371      wider type, and cast the result to UUTYPE.  We do not need to worry about
3372      overflows, as all the arithmetics will in the end be performed in UUTYPE
3373      anyway.  */
3374   common_type = determine_common_wider_type (&ubase, &cbase);
3375
3376   /* use = ubase - ratio * cbase + ratio * var.  */
3377   tree_to_aff_combination (ubase, common_type, aff);
3378   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3379   tree_to_aff_combination (var, uutype, &var_aff);
3380
3381   /* We need to shift the value if we are after the increment.  */
3382   if (stmt_after_increment (loop, cand, at))
3383     {
3384       aff_tree cstep_aff;
3385
3386       if (common_type != uutype)
3387         cstep_common = fold_convert (common_type, cstep);
3388       else
3389         cstep_common = cstep;
3390
3391       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3392       aff_combination_add (&cbase_aff, &cstep_aff);
3393     }
3394
3395   aff_combination_scale (&cbase_aff, -rat);
3396   aff_combination_add (aff, &cbase_aff);
3397   if (common_type != uutype)
3398     aff_combination_convert (aff, uutype);
3399
3400   aff_combination_scale (&var_aff, rat);
3401   aff_combination_add (aff, &var_aff);
3402
3403   return true;
3404 }
3405
3406 /* Return the type of USE.  */
3407
3408 static tree
3409 get_use_type (struct iv_use *use)
3410 {
3411   tree base_type = TREE_TYPE (use->iv->base);
3412   tree type;
3413
3414   if (use->type == USE_ADDRESS)
3415     {
3416       /* The base_type may be a void pointer.  Create a pointer type based on
3417          the mem_ref instead.  */
3418       type = build_pointer_type (TREE_TYPE (*use->op_p));
3419       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3420                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3421     }
3422   else
3423     type = base_type;
3424
3425   return type;
3426 }
3427
3428 /* Determines the expression by that USE is expressed from induction variable
3429    CAND at statement AT in LOOP.  The computation is unshared.  */
3430
3431 static tree
3432 get_computation_at (struct loop *loop,
3433                     struct iv_use *use, struct iv_cand *cand, gimple at)
3434 {
3435   aff_tree aff;
3436   tree type = get_use_type (use);
3437
3438   if (!get_computation_aff (loop, use, cand, at, &aff))
3439     return NULL_TREE;
3440   unshare_aff_combination (&aff);
3441   return fold_convert (type, aff_combination_to_tree (&aff));
3442 }
3443
3444 /* Determines the expression by that USE is expressed from induction variable
3445    CAND in LOOP.  The computation is unshared.  */
3446
3447 static tree
3448 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3449 {
3450   return get_computation_at (loop, use, cand, use->stmt);
3451 }
3452
3453 /* Adjust the cost COST for being in loop setup rather than loop body.
3454    If we're optimizing for space, the loop setup overhead is constant;
3455    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3456 static unsigned
3457 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3458 {
3459   if (cost == INFTY)
3460     return cost;
3461   else if (optimize_loop_for_speed_p (data->current_loop))
3462     return cost / avg_loop_niter (data->current_loop);
3463   else
3464     return cost;
3465 }
3466
3467 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3468    validity for a memory reference accessing memory of mode MODE in
3469    address space AS.  */
3470
3471
3472 bool
3473 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3474                                  addr_space_t as)
3475 {
3476 #define MAX_RATIO 128
3477   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3478   static vec<sbitmap> valid_mult_list;
3479   sbitmap valid_mult;
3480
3481   if (data_index >= valid_mult_list.length ())
3482     valid_mult_list.safe_grow_cleared (data_index + 1);
3483
3484   valid_mult = valid_mult_list[data_index];
3485   if (!valid_mult)
3486     {
3487       machine_mode address_mode = targetm.addr_space.address_mode (as);
3488       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3489       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3490       rtx addr, scaled;
3491       HOST_WIDE_INT i;
3492
3493       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3494       bitmap_clear (valid_mult);
3495       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3496       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3497       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3498         {
3499           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3500           if (memory_address_addr_space_p (mode, addr, as)
3501               || memory_address_addr_space_p (mode, scaled, as))
3502             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3503         }
3504
3505       if (dump_file && (dump_flags & TDF_DETAILS))
3506         {
3507           fprintf (dump_file, "  allowed multipliers:");
3508           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3509             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3510               fprintf (dump_file, " %d", (int) i);
3511           fprintf (dump_file, "\n");
3512           fprintf (dump_file, "\n");
3513         }
3514
3515       valid_mult_list[data_index] = valid_mult;
3516     }
3517
3518   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3519     return false;
3520
3521   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3522 }
3523
3524 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3525    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3526    variable is omitted.  Compute the cost for a memory reference that accesses
3527    a memory location of mode MEM_MODE in address space AS.
3528
3529    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3530    size of MEM_MODE / RATIO) is available.  To make this determination, we
3531    look at the size of the increment to be made, which is given in CSTEP.
3532    CSTEP may be zero if the step is unknown.
3533    STMT_AFTER_INC is true iff the statement we're looking at is after the
3534    increment of the original biv.
3535
3536    TODO -- there must be some better way.  This all is quite crude.  */
3537
3538 enum ainc_type
3539 {
3540   AINC_PRE_INC,         /* Pre increment.  */
3541   AINC_PRE_DEC,         /* Pre decrement.  */
3542   AINC_POST_INC,        /* Post increment.  */
3543   AINC_POST_DEC,        /* Post decrement.  */
3544   AINC_NONE             /* Also the number of auto increment types.  */
3545 };
3546
3547 typedef struct address_cost_data_s
3548 {
3549   HOST_WIDE_INT min_offset, max_offset;
3550   unsigned costs[2][2][2][2];
3551   unsigned ainc_costs[AINC_NONE];
3552 } *address_cost_data;
3553
3554
3555 static comp_cost
3556 get_address_cost (bool symbol_present, bool var_present,
3557                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3558                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3559                   addr_space_t as, bool speed,
3560                   bool stmt_after_inc, bool *may_autoinc)
3561 {
3562   machine_mode address_mode = targetm.addr_space.address_mode (as);
3563   static vec<address_cost_data> address_cost_data_list;
3564   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3565   address_cost_data data;
3566   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3567   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3568   unsigned cost, acost, complexity;
3569   enum ainc_type autoinc_type;
3570   bool offset_p, ratio_p, autoinc;
3571   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3572   unsigned HOST_WIDE_INT mask;
3573   unsigned bits;
3574
3575   if (data_index >= address_cost_data_list.length ())
3576     address_cost_data_list.safe_grow_cleared (data_index + 1);
3577
3578   data = address_cost_data_list[data_index];
3579   if (!data)
3580     {
3581       HOST_WIDE_INT i;
3582       HOST_WIDE_INT rat, off = 0;
3583       int old_cse_not_expected, width;
3584       unsigned sym_p, var_p, off_p, rat_p, add_c;
3585       rtx_insn *seq;
3586       rtx addr, base;
3587       rtx reg0, reg1;
3588
3589       data = (address_cost_data) xcalloc (1, sizeof (*data));
3590
3591       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3592
3593       width = GET_MODE_BITSIZE (address_mode) - 1;
3594       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3595         width = HOST_BITS_PER_WIDE_INT - 1;
3596       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3597
3598       for (i = width; i >= 0; i--)
3599         {
3600           off = -((unsigned HOST_WIDE_INT) 1 << i);
3601           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3602           if (memory_address_addr_space_p (mem_mode, addr, as))
3603             break;
3604         }
3605       data->min_offset = (i == -1? 0 : off);
3606
3607       for (i = width; i >= 0; i--)
3608         {
3609           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3610           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3611           if (memory_address_addr_space_p (mem_mode, addr, as))
3612             break;
3613           /* For some strict-alignment targets, the offset must be naturally
3614              aligned.  Try an aligned offset if mem_mode is not QImode.  */
3615           off = mem_mode != QImode
3616                 ? ((unsigned HOST_WIDE_INT) 1 << i)
3617                     - GET_MODE_SIZE (mem_mode)
3618                 : 0;
3619           if (off > 0)
3620             {
3621               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3622               if (memory_address_addr_space_p (mem_mode, addr, as))
3623                 break;
3624             }
3625         }
3626       if (i == -1)
3627         off = 0;
3628       data->max_offset = off;
3629
3630       if (dump_file && (dump_flags & TDF_DETAILS))
3631         {
3632           fprintf (dump_file, "get_address_cost:\n");
3633           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3634                    GET_MODE_NAME (mem_mode),
3635                    data->min_offset);
3636           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3637                    GET_MODE_NAME (mem_mode),
3638                    data->max_offset);
3639         }
3640
3641       rat = 1;
3642       for (i = 2; i <= MAX_RATIO; i++)
3643         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3644           {
3645             rat = i;
3646             break;
3647           }
3648
3649       /* Compute the cost of various addressing modes.  */
3650       acost = 0;
3651       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3652       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3653
3654       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3655           || USE_STORE_PRE_DECREMENT (mem_mode))
3656         {
3657           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3658           has_predec[mem_mode]
3659             = memory_address_addr_space_p (mem_mode, addr, as);
3660
3661           if (has_predec[mem_mode])
3662             data->ainc_costs[AINC_PRE_DEC]
3663               = address_cost (addr, mem_mode, as, speed);
3664         }
3665       if (USE_LOAD_POST_DECREMENT (mem_mode)
3666           || USE_STORE_POST_DECREMENT (mem_mode))
3667         {
3668           addr = gen_rtx_POST_DEC (address_mode, reg0);
3669           has_postdec[mem_mode]
3670             = memory_address_addr_space_p (mem_mode, addr, as);
3671
3672           if (has_postdec[mem_mode])
3673             data->ainc_costs[AINC_POST_DEC]
3674               = address_cost (addr, mem_mode, as, speed);
3675         }
3676       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3677           || USE_STORE_PRE_DECREMENT (mem_mode))
3678         {
3679           addr = gen_rtx_PRE_INC (address_mode, reg0);
3680           has_preinc[mem_mode]
3681             = memory_address_addr_space_p (mem_mode, addr, as);
3682
3683           if (has_preinc[mem_mode])
3684             data->ainc_costs[AINC_PRE_INC]
3685               = address_cost (addr, mem_mode, as, speed);
3686         }
3687       if (USE_LOAD_POST_INCREMENT (mem_mode)
3688           || USE_STORE_POST_INCREMENT (mem_mode))
3689         {
3690           addr = gen_rtx_POST_INC (address_mode, reg0);
3691           has_postinc[mem_mode]
3692             = memory_address_addr_space_p (mem_mode, addr, as);
3693
3694           if (has_postinc[mem_mode])
3695             data->ainc_costs[AINC_POST_INC]
3696               = address_cost (addr, mem_mode, as, speed);
3697         }
3698       for (i = 0; i < 16; i++)
3699         {
3700           sym_p = i & 1;
3701           var_p = (i >> 1) & 1;
3702           off_p = (i >> 2) & 1;
3703           rat_p = (i >> 3) & 1;
3704
3705           addr = reg0;
3706           if (rat_p)
3707             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3708                                    gen_int_mode (rat, address_mode));
3709
3710           if (var_p)
3711             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3712
3713           if (sym_p)
3714             {
3715               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3716               /* ??? We can run into trouble with some backends by presenting
3717                  it with symbols which haven't been properly passed through
3718                  targetm.encode_section_info.  By setting the local bit, we
3719                  enhance the probability of things working.  */
3720               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3721
3722               if (off_p)
3723                 base = gen_rtx_fmt_e (CONST, address_mode,
3724                                       gen_rtx_fmt_ee
3725                                         (PLUS, address_mode, base,
3726                                          gen_int_mode (off, address_mode)));
3727             }
3728           else if (off_p)
3729             base = gen_int_mode (off, address_mode);
3730           else
3731             base = NULL_RTX;
3732
3733           if (base)
3734             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3735
3736           start_sequence ();
3737           /* To avoid splitting addressing modes, pretend that no cse will
3738              follow.  */
3739           old_cse_not_expected = cse_not_expected;
3740           cse_not_expected = true;
3741           addr = memory_address_addr_space (mem_mode, addr, as);
3742           cse_not_expected = old_cse_not_expected;
3743           seq = get_insns ();
3744           end_sequence ();
3745
3746           acost = seq_cost (seq, speed);
3747           acost += address_cost (addr, mem_mode, as, speed);
3748
3749           if (!acost)
3750             acost = 1;
3751           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3752         }
3753
3754       /* On some targets, it is quite expensive to load symbol to a register,
3755          which makes addresses that contain symbols look much more expensive.
3756          However, the symbol will have to be loaded in any case before the
3757          loop (and quite likely we have it in register already), so it does not
3758          make much sense to penalize them too heavily.  So make some final
3759          tweaks for the SYMBOL_PRESENT modes:
3760
3761          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3762          var is cheaper, use this mode with small penalty.
3763          If VAR_PRESENT is true, try whether the mode with
3764          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3765          if this is the case, use it.  */
3766       add_c = add_cost (speed, address_mode);
3767       for (i = 0; i < 8; i++)
3768         {
3769           var_p = i & 1;
3770           off_p = (i >> 1) & 1;
3771           rat_p = (i >> 2) & 1;
3772
3773           acost = data->costs[0][1][off_p][rat_p] + 1;
3774           if (var_p)
3775             acost += add_c;
3776
3777           if (acost < data->costs[1][var_p][off_p][rat_p])
3778             data->costs[1][var_p][off_p][rat_p] = acost;
3779         }
3780
3781       if (dump_file && (dump_flags & TDF_DETAILS))
3782         {
3783           fprintf (dump_file, "Address costs:\n");
3784
3785           for (i = 0; i < 16; i++)
3786             {
3787               sym_p = i & 1;
3788               var_p = (i >> 1) & 1;
3789               off_p = (i >> 2) & 1;
3790               rat_p = (i >> 3) & 1;
3791
3792               fprintf (dump_file, "  ");
3793               if (sym_p)
3794                 fprintf (dump_file, "sym + ");
3795               if (var_p)
3796                 fprintf (dump_file, "var + ");
3797               if (off_p)
3798                 fprintf (dump_file, "cst + ");
3799               if (rat_p)
3800                 fprintf (dump_file, "rat * ");
3801
3802               acost = data->costs[sym_p][var_p][off_p][rat_p];
3803               fprintf (dump_file, "index costs %d\n", acost);
3804             }
3805           if (has_predec[mem_mode] || has_postdec[mem_mode]
3806               || has_preinc[mem_mode] || has_postinc[mem_mode])
3807             fprintf (dump_file, "  May include autoinc/dec\n");
3808           fprintf (dump_file, "\n");
3809         }
3810
3811       address_cost_data_list[data_index] = data;
3812     }
3813
3814   bits = GET_MODE_BITSIZE (address_mode);
3815   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3816   offset &= mask;
3817   if ((offset >> (bits - 1) & 1))
3818     offset |= ~mask;
3819   s_offset = offset;
3820
3821   autoinc = false;
3822   autoinc_type = AINC_NONE;
3823   msize = GET_MODE_SIZE (mem_mode);
3824   autoinc_offset = offset;
3825   if (stmt_after_inc)
3826     autoinc_offset += ratio * cstep;
3827   if (symbol_present || var_present || ratio != 1)
3828     autoinc = false;
3829   else
3830     {
3831       if (has_postinc[mem_mode] && autoinc_offset == 0
3832           && msize == cstep)
3833         autoinc_type = AINC_POST_INC;
3834       else if (has_postdec[mem_mode] && autoinc_offset == 0
3835                && msize == -cstep)
3836         autoinc_type = AINC_POST_DEC;
3837       else if (has_preinc[mem_mode] && autoinc_offset == msize
3838                && msize == cstep)
3839         autoinc_type = AINC_PRE_INC;
3840       else if (has_predec[mem_mode] && autoinc_offset == -msize
3841                && msize == -cstep)
3842         autoinc_type = AINC_PRE_DEC;
3843
3844       if (autoinc_type != AINC_NONE)
3845         autoinc = true;
3846     }
3847
3848   cost = 0;
3849   offset_p = (s_offset != 0
3850               && data->min_offset <= s_offset
3851               && s_offset <= data->max_offset);
3852   ratio_p = (ratio != 1
3853              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3854
3855   if (ratio != 1 && !ratio_p)
3856     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3857
3858   if (s_offset && !offset_p && !symbol_present)
3859     cost += add_cost (speed, address_mode);
3860
3861   if (may_autoinc)
3862     *may_autoinc = autoinc;
3863   if (autoinc)
3864     acost = data->ainc_costs[autoinc_type];
3865   else
3866     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3867   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3868   return new_cost (cost + acost, complexity);
3869 }
3870
3871  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3872     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3873     calculating the operands of EXPR.  Returns true if successful, and returns
3874     the cost in COST.  */
3875
3876 static bool
3877 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3878                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3879 {
3880   comp_cost res;
3881   tree op1 = TREE_OPERAND (expr, 1);
3882   tree cst = TREE_OPERAND (mult, 1);
3883   tree multop = TREE_OPERAND (mult, 0);
3884   int m = exact_log2 (int_cst_value (cst));
3885   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3886   int as_cost, sa_cost;
3887   bool mult_in_op1;
3888
3889   if (!(m >= 0 && m < maxm))
3890     return false;
3891
3892   STRIP_NOPS (op1);
3893   mult_in_op1 = operand_equal_p (op1, mult, 0);
3894
3895   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3896
3897   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3898      use that in preference to a shift insn followed by an add insn.  */
3899   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3900              ? shiftadd_cost (speed, mode, m)
3901              : (mult_in_op1
3902                 ? shiftsub1_cost (speed, mode, m)
3903                 : shiftsub0_cost (speed, mode, m)));
3904
3905   res = new_cost (MIN (as_cost, sa_cost), 0);
3906   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3907
3908   STRIP_NOPS (multop);
3909   if (!is_gimple_val (multop))
3910     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3911
3912   *cost = res;
3913   return true;
3914 }
3915
3916 /* Estimates cost of forcing expression EXPR into a variable.  */
3917
3918 static comp_cost
3919 force_expr_to_var_cost (tree expr, bool speed)
3920 {
3921   static bool costs_initialized = false;
3922   static unsigned integer_cost [2];
3923   static unsigned symbol_cost [2];
3924   static unsigned address_cost [2];
3925   tree op0, op1;
3926   comp_cost cost0, cost1, cost;
3927   machine_mode mode;
3928
3929   if (!costs_initialized)
3930     {
3931       tree type = build_pointer_type (integer_type_node);
3932       tree var, addr;
3933       rtx x;
3934       int i;
3935
3936       var = create_tmp_var_raw (integer_type_node, "test_var");
3937       TREE_STATIC (var) = 1;
3938       x = produce_memory_decl_rtl (var, NULL);
3939       SET_DECL_RTL (var, x);
3940
3941       addr = build1 (ADDR_EXPR, type, var);
3942
3943
3944       for (i = 0; i < 2; i++)
3945         {
3946           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3947                                                              2000), i);
3948
3949           symbol_cost[i] = computation_cost (addr, i) + 1;
3950
3951           address_cost[i]
3952             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3953           if (dump_file && (dump_flags & TDF_DETAILS))
3954             {
3955               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3956               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3957               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3958               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3959               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3960               fprintf (dump_file, "\n");
3961             }
3962         }
3963
3964       costs_initialized = true;
3965     }
3966
3967   STRIP_NOPS (expr);
3968
3969   if (SSA_VAR_P (expr))
3970     return no_cost;
3971
3972   if (is_gimple_min_invariant (expr))
3973     {
3974       if (TREE_CODE (expr) == INTEGER_CST)
3975         return new_cost (integer_cost [speed], 0);
3976
3977       if (TREE_CODE (expr) == ADDR_EXPR)
3978         {
3979           tree obj = TREE_OPERAND (expr, 0);
3980
3981           if (TREE_CODE (obj) == VAR_DECL
3982               || TREE_CODE (obj) == PARM_DECL
3983               || TREE_CODE (obj) == RESULT_DECL)
3984             return new_cost (symbol_cost [speed], 0);
3985         }
3986
3987       return new_cost (address_cost [speed], 0);
3988     }
3989
3990   switch (TREE_CODE (expr))
3991     {
3992     case POINTER_PLUS_EXPR:
3993     case PLUS_EXPR:
3994     case MINUS_EXPR:
3995     case MULT_EXPR:
3996       op0 = TREE_OPERAND (expr, 0);
3997       op1 = TREE_OPERAND (expr, 1);
3998       STRIP_NOPS (op0);
3999       STRIP_NOPS (op1);
4000       break;
4001
4002     CASE_CONVERT:
4003     case NEGATE_EXPR:
4004       op0 = TREE_OPERAND (expr, 0);
4005       STRIP_NOPS (op0);
4006       op1 = NULL_TREE;
4007       break;
4008
4009     default:
4010       /* Just an arbitrary value, FIXME.  */
4011       return new_cost (target_spill_cost[speed], 0);
4012     }
4013
4014   if (op0 == NULL_TREE
4015       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4016     cost0 = no_cost;
4017   else
4018     cost0 = force_expr_to_var_cost (op0, speed);
4019
4020   if (op1 == NULL_TREE
4021       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4022     cost1 = no_cost;
4023   else
4024     cost1 = force_expr_to_var_cost (op1, speed);
4025
4026   mode = TYPE_MODE (TREE_TYPE (expr));
4027   switch (TREE_CODE (expr))
4028     {
4029     case POINTER_PLUS_EXPR:
4030     case PLUS_EXPR:
4031     case MINUS_EXPR:
4032     case NEGATE_EXPR:
4033       cost = new_cost (add_cost (speed, mode), 0);
4034       if (TREE_CODE (expr) != NEGATE_EXPR)
4035         {
4036           tree mult = NULL_TREE;
4037           comp_cost sa_cost;
4038           if (TREE_CODE (op1) == MULT_EXPR)
4039             mult = op1;
4040           else if (TREE_CODE (op0) == MULT_EXPR)
4041             mult = op0;
4042
4043           if (mult != NULL_TREE
4044               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4045               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4046                                     speed, &sa_cost))
4047             return sa_cost;
4048         }
4049       break;
4050
4051     CASE_CONVERT:
4052       {
4053         tree inner_mode, outer_mode;
4054         outer_mode = TREE_TYPE (expr);
4055         inner_mode = TREE_TYPE (op0);
4056         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4057                                        TYPE_MODE (inner_mode), speed), 0);
4058       }
4059       break;
4060
4061     case MULT_EXPR:
4062       if (cst_and_fits_in_hwi (op0))
4063         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4064                                              mode, speed), 0);
4065       else if (cst_and_fits_in_hwi (op1))
4066         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4067                                              mode, speed), 0);
4068       else
4069         return new_cost (target_spill_cost [speed], 0);
4070       break;
4071
4072     default:
4073       gcc_unreachable ();
4074     }
4075
4076   cost = add_costs (cost, cost0);
4077   cost = add_costs (cost, cost1);
4078
4079   /* Bound the cost by target_spill_cost.  The parts of complicated
4080      computations often are either loop invariant or at least can
4081      be shared between several iv uses, so letting this grow without
4082      limits would not give reasonable results.  */
4083   if (cost.cost > (int) target_spill_cost [speed])
4084     cost.cost = target_spill_cost [speed];
4085
4086   return cost;
4087 }
4088
4089 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4090    invariants the computation depends on.  */
4091
4092 static comp_cost
4093 force_var_cost (struct ivopts_data *data,
4094                 tree expr, bitmap *depends_on)
4095 {
4096   if (depends_on)
4097     {
4098       fd_ivopts_data = data;
4099       walk_tree (&expr, find_depends, depends_on, NULL);
4100     }
4101
4102   return force_expr_to_var_cost (expr, data->speed);
4103 }
4104
4105 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4106    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4107    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4108    invariants the computation depends on.  */
4109
4110 static comp_cost
4111 split_address_cost (struct ivopts_data *data,
4112                     tree addr, bool *symbol_present, bool *var_present,
4113                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4114 {
4115   tree core;
4116   HOST_WIDE_INT bitsize;
4117   HOST_WIDE_INT bitpos;
4118   tree toffset;
4119   machine_mode mode;
4120   int unsignedp, volatilep;
4121
4122   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4123                               &unsignedp, &volatilep, false);
4124
4125   if (toffset != 0
4126       || bitpos % BITS_PER_UNIT != 0
4127       || TREE_CODE (core) != VAR_DECL)
4128     {
4129       *symbol_present = false;
4130       *var_present = true;
4131       fd_ivopts_data = data;
4132       walk_tree (&addr, find_depends, depends_on, NULL);
4133       return new_cost (target_spill_cost[data->speed], 0);
4134     }
4135
4136   *offset += bitpos / BITS_PER_UNIT;
4137   if (TREE_STATIC (core)
4138       || DECL_EXTERNAL (core))
4139     {
4140       *symbol_present = true;
4141       *var_present = false;
4142       return no_cost;
4143     }
4144
4145   *symbol_present = false;
4146   *var_present = true;
4147   return no_cost;
4148 }
4149
4150 /* Estimates cost of expressing difference of addresses E1 - E2 as
4151    var + symbol + offset.  The value of offset is added to OFFSET,
4152    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4153    part is missing.  DEPENDS_ON is a set of the invariants the computation
4154    depends on.  */
4155
4156 static comp_cost
4157 ptr_difference_cost (struct ivopts_data *data,
4158                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4159                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4160 {
4161   HOST_WIDE_INT diff = 0;
4162   aff_tree aff_e1, aff_e2;
4163   tree type;
4164
4165   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4166
4167   if (ptr_difference_const (e1, e2, &diff))
4168     {
4169       *offset += diff;
4170       *symbol_present = false;
4171       *var_present = false;
4172       return no_cost;
4173     }
4174
4175   if (integer_zerop (e2))
4176     return split_address_cost (data, TREE_OPERAND (e1, 0),
4177                                symbol_present, var_present, offset, depends_on);
4178
4179   *symbol_present = false;
4180   *var_present = true;
4181
4182   type = signed_type_for (TREE_TYPE (e1));
4183   tree_to_aff_combination (e1, type, &aff_e1);
4184   tree_to_aff_combination (e2, type, &aff_e2);
4185   aff_combination_scale (&aff_e2, -1);
4186   aff_combination_add (&aff_e1, &aff_e2);
4187
4188   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4189 }
4190
4191 /* Estimates cost of expressing difference E1 - E2 as
4192    var + symbol + offset.  The value of offset is added to OFFSET,
4193    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4194    part is missing.  DEPENDS_ON is a set of the invariants the computation
4195    depends on.  */
4196
4197 static comp_cost
4198 difference_cost (struct ivopts_data *data,
4199                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4200                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4201 {
4202   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4203   unsigned HOST_WIDE_INT off1, off2;
4204   aff_tree aff_e1, aff_e2;
4205   tree type;
4206
4207   e1 = strip_offset (e1, &off1);
4208   e2 = strip_offset (e2, &off2);
4209   *offset += off1 - off2;
4210
4211   STRIP_NOPS (e1);
4212   STRIP_NOPS (e2);
4213
4214   if (TREE_CODE (e1) == ADDR_EXPR)
4215     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4216                                 offset, depends_on);
4217   *symbol_present = false;
4218
4219   if (operand_equal_p (e1, e2, 0))
4220     {
4221       *var_present = false;
4222       return no_cost;
4223     }
4224
4225   *var_present = true;
4226
4227   if (integer_zerop (e2))
4228     return force_var_cost (data, e1, depends_on);
4229
4230   if (integer_zerop (e1))
4231     {
4232       comp_cost cost = force_var_cost (data, e2, depends_on);
4233       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4234       return cost;
4235     }
4236
4237   type = signed_type_for (TREE_TYPE (e1));
4238   tree_to_aff_combination (e1, type, &aff_e1);
4239   tree_to_aff_combination (e2, type, &aff_e2);
4240   aff_combination_scale (&aff_e2, -1);
4241   aff_combination_add (&aff_e1, &aff_e2);
4242
4243   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4244 }
4245
4246 /* Returns true if AFF1 and AFF2 are identical.  */
4247
4248 static bool
4249 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4250 {
4251   unsigned i;
4252
4253   if (aff1->n != aff2->n)
4254     return false;
4255
4256   for (i = 0; i < aff1->n; i++)
4257     {
4258       if (aff1->elts[i].coef != aff2->elts[i].coef)
4259         return false;
4260
4261       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4262         return false;
4263     }
4264   return true;
4265 }
4266
4267 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4268
4269 static int
4270 get_expr_id (struct ivopts_data *data, tree expr)
4271 {
4272   struct iv_inv_expr_ent ent;
4273   struct iv_inv_expr_ent **slot;
4274
4275   ent.expr = expr;
4276   ent.hash = iterative_hash_expr (expr, 0);
4277   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4278   if (*slot)
4279     return (*slot)->id;
4280
4281   *slot = XNEW (struct iv_inv_expr_ent);
4282   (*slot)->expr = expr;
4283   (*slot)->hash = ent.hash;
4284   (*slot)->id = data->inv_expr_id++;
4285   return (*slot)->id;
4286 }
4287
4288 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4289    requires a new compiler generated temporary.  Returns -1 otherwise.
4290    ADDRESS_P is a flag indicating if the expression is for address
4291    computation.  */
4292
4293 static int
4294 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4295                             tree cbase, HOST_WIDE_INT ratio,
4296                             bool address_p)
4297 {
4298   aff_tree ubase_aff, cbase_aff;
4299   tree expr, ub, cb;
4300
4301   STRIP_NOPS (ubase);
4302   STRIP_NOPS (cbase);
4303   ub = ubase;
4304   cb = cbase;
4305
4306   if ((TREE_CODE (ubase) == INTEGER_CST)
4307       && (TREE_CODE (cbase) == INTEGER_CST))
4308     return -1;
4309
4310   /* Strips the constant part. */
4311   if (TREE_CODE (ubase) == PLUS_EXPR
4312       || TREE_CODE (ubase) == MINUS_EXPR
4313       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4314     {
4315       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4316         ubase = TREE_OPERAND (ubase, 0);
4317     }
4318
4319   /* Strips the constant part. */
4320   if (TREE_CODE (cbase) == PLUS_EXPR
4321       || TREE_CODE (cbase) == MINUS_EXPR
4322       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4323     {
4324       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4325         cbase = TREE_OPERAND (cbase, 0);
4326     }
4327
4328   if (address_p)
4329     {
4330       if (((TREE_CODE (ubase) == SSA_NAME)
4331            || (TREE_CODE (ubase) == ADDR_EXPR
4332                && is_gimple_min_invariant (ubase)))
4333           && (TREE_CODE (cbase) == INTEGER_CST))
4334         return -1;
4335
4336       if (((TREE_CODE (cbase) == SSA_NAME)
4337            || (TREE_CODE (cbase) == ADDR_EXPR
4338                && is_gimple_min_invariant (cbase)))
4339           && (TREE_CODE (ubase) == INTEGER_CST))
4340         return -1;
4341     }
4342
4343   if (ratio == 1)
4344     {
4345       if (operand_equal_p (ubase, cbase, 0))
4346         return -1;
4347
4348       if (TREE_CODE (ubase) == ADDR_EXPR
4349           && TREE_CODE (cbase) == ADDR_EXPR)
4350         {
4351           tree usym, csym;
4352
4353           usym = TREE_OPERAND (ubase, 0);
4354           csym = TREE_OPERAND (cbase, 0);
4355           if (TREE_CODE (usym) == ARRAY_REF)
4356             {
4357               tree ind = TREE_OPERAND (usym, 1);
4358               if (TREE_CODE (ind) == INTEGER_CST
4359                   && tree_fits_shwi_p (ind)
4360                   && tree_to_shwi (ind) == 0)
4361                 usym = TREE_OPERAND (usym, 0);
4362             }
4363           if (TREE_CODE (csym) == ARRAY_REF)
4364             {
4365               tree ind = TREE_OPERAND (csym, 1);
4366               if (TREE_CODE (ind) == INTEGER_CST
4367                   && tree_fits_shwi_p (ind)
4368                   && tree_to_shwi (ind) == 0)
4369                 csym = TREE_OPERAND (csym, 0);
4370             }
4371           if (operand_equal_p (usym, csym, 0))
4372             return -1;
4373         }
4374       /* Now do more complex comparison  */
4375       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4376       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4377       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4378         return -1;
4379     }
4380
4381   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4382   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4383
4384   aff_combination_scale (&cbase_aff, -1 * ratio);
4385   aff_combination_add (&ubase_aff, &cbase_aff);
4386   expr = aff_combination_to_tree (&ubase_aff);
4387   return get_expr_id (data, expr);
4388 }
4389
4390
4391
4392 /* Determines the cost of the computation by that USE is expressed
4393    from induction variable CAND.  If ADDRESS_P is true, we just need
4394    to create an address from it, otherwise we want to get it into
4395    register.  A set of invariants we depend on is stored in
4396    DEPENDS_ON.  AT is the statement at that the value is computed.
4397    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4398    addressing is likely.  */
4399
4400 static comp_cost
4401 get_computation_cost_at (struct ivopts_data *data,
4402                          struct iv_use *use, struct iv_cand *cand,
4403                          bool address_p, bitmap *depends_on, gimple at,
4404                          bool *can_autoinc,
4405                          int *inv_expr_id)
4406 {
4407   tree ubase = use->iv->base, ustep = use->iv->step;
4408   tree cbase, cstep;
4409   tree utype = TREE_TYPE (ubase), ctype;
4410   unsigned HOST_WIDE_INT cstepi, offset = 0;
4411   HOST_WIDE_INT ratio, aratio;
4412   bool var_present, symbol_present, stmt_is_after_inc;
4413   comp_cost cost;
4414   widest_int rat;
4415   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4416   machine_mode mem_mode = (address_p
4417                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4418                                 : VOIDmode);
4419
4420   *depends_on = NULL;
4421
4422   /* Only consider real candidates.  */
4423   if (!cand->iv)
4424     return infinite_cost;
4425
4426   cbase = cand->iv->base;
4427   cstep = cand->iv->step;
4428   ctype = TREE_TYPE (cbase);
4429
4430   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4431     {
4432       /* We do not have a precision to express the values of use.  */
4433       return infinite_cost;
4434     }
4435
4436   if (address_p
4437       || (use->iv->base_object
4438           && cand->iv->base_object
4439           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4440           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4441     {
4442       /* Do not try to express address of an object with computation based
4443          on address of a different object.  This may cause problems in rtl
4444          level alias analysis (that does not expect this to be happening,
4445          as this is illegal in C), and would be unlikely to be useful
4446          anyway.  */
4447       if (use->iv->base_object
4448           && cand->iv->base_object
4449           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4450         return infinite_cost;
4451     }
4452
4453   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4454     {
4455       /* TODO -- add direct handling of this case.  */
4456       goto fallback;
4457     }
4458
4459   /* CSTEPI is removed from the offset in case statement is after the
4460      increment.  If the step is not constant, we use zero instead.
4461      This is a bit imprecise (there is the extra addition), but
4462      redundancy elimination is likely to transform the code so that
4463      it uses value of the variable before increment anyway,
4464      so it is not that much unrealistic.  */
4465   if (cst_and_fits_in_hwi (cstep))
4466     cstepi = int_cst_value (cstep);
4467   else
4468     cstepi = 0;
4469
4470   if (!constant_multiple_of (ustep, cstep, &rat))
4471     return infinite_cost;
4472
4473   if (wi::fits_shwi_p (rat))
4474     ratio = rat.to_shwi ();
4475   else
4476     return infinite_cost;
4477
4478   STRIP_NOPS (cbase);
4479   ctype = TREE_TYPE (cbase);
4480
4481   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4482
4483   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4484      or ratio == 1, it is better to handle this like
4485
4486      ubase - ratio * cbase + ratio * var
4487
4488      (also holds in the case ratio == -1, TODO.  */
4489
4490   if (cst_and_fits_in_hwi (cbase))
4491     {
4492       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4493       cost = difference_cost (data,
4494                               ubase, build_int_cst (utype, 0),
4495                               &symbol_present, &var_present, &offset,
4496                               depends_on);
4497       cost.cost /= avg_loop_niter (data->current_loop);
4498     }
4499   else if (ratio == 1)
4500     {
4501       tree real_cbase = cbase;
4502
4503       /* Check to see if any adjustment is needed.  */
4504       if (cstepi == 0 && stmt_is_after_inc)
4505         {
4506           aff_tree real_cbase_aff;
4507           aff_tree cstep_aff;
4508
4509           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4510                                    &real_cbase_aff);
4511           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4512
4513           aff_combination_add (&real_cbase_aff, &cstep_aff);
4514           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4515         }
4516
4517       cost = difference_cost (data,
4518                               ubase, real_cbase,
4519                               &symbol_present, &var_present, &offset,
4520                               depends_on);
4521       cost.cost /= avg_loop_niter (data->current_loop);
4522     }
4523   else if (address_p
4524            && !POINTER_TYPE_P (ctype)
4525            && multiplier_allowed_in_address_p
4526                 (ratio, mem_mode,
4527                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4528     {
4529       cbase
4530         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4531       cost = difference_cost (data,
4532                               ubase, cbase,
4533                               &symbol_present, &var_present, &offset,
4534                               depends_on);
4535       cost.cost /= avg_loop_niter (data->current_loop);
4536     }
4537   else
4538     {
4539       cost = force_var_cost (data, cbase, depends_on);
4540       cost = add_costs (cost,
4541                         difference_cost (data,
4542                                          ubase, build_int_cst (utype, 0),
4543                                          &symbol_present, &var_present,
4544                                          &offset, depends_on));
4545       cost.cost /= avg_loop_niter (data->current_loop);
4546       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4547     }
4548
4549   /* Set of invariants depended on by sub use has already been computed
4550      for the first use in the group.  */
4551   if (use->sub_id)
4552     {
4553       cost.cost = 0;
4554       if (depends_on && *depends_on)
4555         bitmap_clear (*depends_on);
4556     }
4557   else if (inv_expr_id)
4558     {
4559       *inv_expr_id =
4560           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4561       /* Clear depends on.  */
4562       if (*inv_expr_id != -1 && depends_on && *depends_on)
4563         bitmap_clear (*depends_on);
4564     }
4565
4566   /* If we are after the increment, the value of the candidate is higher by
4567      one iteration.  */
4568   if (stmt_is_after_inc)
4569     offset -= ratio * cstepi;
4570
4571   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4572      (symbol/var1/const parts may be omitted).  If we are looking for an
4573      address, find the cost of addressing this.  */
4574   if (address_p)
4575     return add_costs (cost,
4576                       get_address_cost (symbol_present, var_present,
4577                                         offset, ratio, cstepi,
4578                                         mem_mode,
4579                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4580                                         speed, stmt_is_after_inc,
4581                                         can_autoinc));
4582
4583   /* Otherwise estimate the costs for computing the expression.  */
4584   if (!symbol_present && !var_present && !offset)
4585     {
4586       if (ratio != 1)
4587         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4588       return cost;
4589     }
4590
4591   /* Symbol + offset should be compile-time computable so consider that they
4592       are added once to the variable, if present.  */
4593   if (var_present && (symbol_present || offset))
4594     cost.cost += adjust_setup_cost (data,
4595                                     add_cost (speed, TYPE_MODE (ctype)));
4596
4597   /* Having offset does not affect runtime cost in case it is added to
4598      symbol, but it increases complexity.  */
4599   if (offset)
4600     cost.complexity++;
4601
4602   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4603
4604   aratio = ratio > 0 ? ratio : -ratio;
4605   if (aratio != 1)
4606     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4607   return cost;
4608
4609 fallback:
4610   if (can_autoinc)
4611     *can_autoinc = false;
4612
4613   {
4614     /* Just get the expression, expand it and measure the cost.  */
4615     tree comp = get_computation_at (data->current_loop, use, cand, at);
4616
4617     if (!comp)
4618       return infinite_cost;
4619
4620     if (address_p)
4621       comp = build_simple_mem_ref (comp);
4622
4623     return new_cost (computation_cost (comp, speed), 0);
4624   }
4625 }
4626
4627 /* Determines the cost of the computation by that USE is expressed
4628    from induction variable CAND.  If ADDRESS_P is true, we just need
4629    to create an address from it, otherwise we want to get it into
4630    register.  A set of invariants we depend on is stored in
4631    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4632    autoinc addressing is likely.  */
4633
4634 static comp_cost
4635 get_computation_cost (struct ivopts_data *data,
4636                       struct iv_use *use, struct iv_cand *cand,
4637                       bool address_p, bitmap *depends_on,
4638                       bool *can_autoinc, int *inv_expr_id)
4639 {
4640   return get_computation_cost_at (data,
4641                                   use, cand, address_p, depends_on, use->stmt,
4642                                   can_autoinc, inv_expr_id);
4643 }
4644
4645 /* Determines cost of basing replacement of USE on CAND in a generic
4646    expression.  */
4647
4648 static bool
4649 determine_use_iv_cost_generic (struct ivopts_data *data,
4650                                struct iv_use *use, struct iv_cand *cand)
4651 {
4652   bitmap depends_on;
4653   comp_cost cost;
4654   int inv_expr_id = -1;
4655
4656   /* The simple case first -- if we need to express value of the preserved
4657      original biv, the cost is 0.  This also prevents us from counting the
4658      cost of increment twice -- once at this use and once in the cost of
4659      the candidate.  */
4660   if (cand->pos == IP_ORIGINAL
4661       && cand->incremented_at == use->stmt)
4662     {
4663       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4664                        ERROR_MARK, -1);
4665       return true;
4666     }
4667
4668   cost = get_computation_cost (data, use, cand, false, &depends_on,
4669                                NULL, &inv_expr_id);
4670
4671   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4672                    inv_expr_id);
4673
4674   return !infinite_cost_p (cost);
4675 }
4676
4677 /* Determines cost of basing replacement of USE on CAND in an address.  */
4678
4679 static bool
4680 determine_use_iv_cost_address (struct ivopts_data *data,
4681                                struct iv_use *use, struct iv_cand *cand)
4682 {
4683   bitmap depends_on;
4684   bool can_autoinc;
4685   int inv_expr_id = -1;
4686   struct iv_use *sub_use;
4687   comp_cost sub_cost;
4688   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4689                                          &can_autoinc, &inv_expr_id);
4690
4691   if (cand->ainc_use == use)
4692     {
4693       if (can_autoinc)
4694         cost.cost -= cand->cost_step;
4695       /* If we generated the candidate solely for exploiting autoincrement
4696          opportunities, and it turns out it can't be used, set the cost to
4697          infinity to make sure we ignore it.  */
4698       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4699         cost = infinite_cost;
4700     }
4701   for (sub_use = use->next;
4702        sub_use && !infinite_cost_p (cost);
4703        sub_use = sub_use->next)
4704     {
4705        sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4706                                         &can_autoinc, &inv_expr_id);
4707        cost = add_costs (cost, sub_cost);
4708     }
4709
4710   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4711                    inv_expr_id);
4712
4713   return !infinite_cost_p (cost);
4714 }
4715
4716 /* Computes value of candidate CAND at position AT in iteration NITER, and
4717    stores it to VAL.  */
4718
4719 static void
4720 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4721                aff_tree *val)
4722 {
4723   aff_tree step, delta, nit;
4724   struct iv *iv = cand->iv;
4725   tree type = TREE_TYPE (iv->base);
4726   tree steptype = type;
4727   if (POINTER_TYPE_P (type))
4728     steptype = sizetype;
4729   steptype = unsigned_type_for (type);
4730
4731   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4732   aff_combination_convert (&step, steptype);
4733   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4734   aff_combination_convert (&nit, steptype);
4735   aff_combination_mult (&nit, &step, &delta);
4736   if (stmt_after_increment (loop, cand, at))
4737     aff_combination_add (&delta, &step);
4738
4739   tree_to_aff_combination (iv->base, type, val);
4740   if (!POINTER_TYPE_P (type))
4741     aff_combination_convert (val, steptype);
4742   aff_combination_add (val, &delta);
4743 }
4744
4745 /* Returns period of induction variable iv.  */
4746
4747 static tree
4748 iv_period (struct iv *iv)
4749 {
4750   tree step = iv->step, period, type;
4751   tree pow2div;
4752
4753   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4754
4755   type = unsigned_type_for (TREE_TYPE (step));
4756   /* Period of the iv is lcm (step, type_range)/step -1,
4757      i.e., N*type_range/step - 1. Since type range is power
4758      of two, N == (step >> num_of_ending_zeros_binary (step),
4759      so the final result is
4760
4761        (type_range >> num_of_ending_zeros_binary (step)) - 1
4762
4763   */
4764   pow2div = num_ending_zeros (step);
4765
4766   period = build_low_bits_mask (type,
4767                                 (TYPE_PRECISION (type)
4768                                  - tree_to_uhwi (pow2div)));
4769
4770   return period;
4771 }
4772
4773 /* Returns the comparison operator used when eliminating the iv USE.  */
4774
4775 static enum tree_code
4776 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4777 {
4778   struct loop *loop = data->current_loop;
4779   basic_block ex_bb;
4780   edge exit;
4781
4782   ex_bb = gimple_bb (use->stmt);
4783   exit = EDGE_SUCC (ex_bb, 0);
4784   if (flow_bb_inside_loop_p (loop, exit->dest))
4785     exit = EDGE_SUCC (ex_bb, 1);
4786
4787   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4788 }
4789
4790 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4791    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4792    calculation is performed in non-wrapping type.
4793
4794    TODO: More generally, we could test for the situation that
4795          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4796          This would require knowing the sign of OFFSET.  */
4797
4798 static bool
4799 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4800 {
4801   enum tree_code code;
4802   tree e1, e2;
4803   aff_tree aff_e1, aff_e2, aff_offset;
4804
4805   if (!nowrap_type_p (TREE_TYPE (base)))
4806     return false;
4807
4808   base = expand_simple_operations (base);
4809
4810   if (TREE_CODE (base) == SSA_NAME)
4811     {
4812       gimple stmt = SSA_NAME_DEF_STMT (base);
4813
4814       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4815         return false;
4816
4817       code = gimple_assign_rhs_code (stmt);
4818       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4819         return false;
4820
4821       e1 = gimple_assign_rhs1 (stmt);
4822       e2 = gimple_assign_rhs2 (stmt);
4823     }
4824   else
4825     {
4826       code = TREE_CODE (base);
4827       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4828         return false;
4829       e1 = TREE_OPERAND (base, 0);
4830       e2 = TREE_OPERAND (base, 1);
4831     }
4832
4833   /* Use affine expansion as deeper inspection to prove the equality.  */
4834   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4835                                   &aff_e2, &data->name_expansion_cache);
4836   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4837                                   &aff_offset, &data->name_expansion_cache);
4838   aff_combination_scale (&aff_offset, -1);
4839   switch (code)
4840     {
4841     case PLUS_EXPR:
4842       aff_combination_add (&aff_e2, &aff_offset);
4843       if (aff_combination_zero_p (&aff_e2))
4844         return true;
4845
4846       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4847                                       &aff_e1, &data->name_expansion_cache);
4848       aff_combination_add (&aff_e1, &aff_offset);
4849       return aff_combination_zero_p (&aff_e1);
4850
4851     case POINTER_PLUS_EXPR:
4852       aff_combination_add (&aff_e2, &aff_offset);
4853       return aff_combination_zero_p (&aff_e2);
4854
4855     default:
4856       return false;
4857     }
4858 }
4859
4860 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4861    comparison with CAND.  NITER describes the number of iterations of
4862    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4863
4864    We aim to handle the following situation:
4865
4866    sometype *base, *p;
4867    int a, b, i;
4868
4869    i = a;
4870    p = p_0 = base + a;
4871
4872    do
4873      {
4874        bla (*p);
4875        p++;
4876        i++;
4877      }
4878    while (i < b);
4879
4880    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4881    We aim to optimize this to
4882
4883    p = p_0 = base + a;
4884    do
4885      {
4886        bla (*p);
4887        p++;
4888      }
4889    while (p < p_0 - a + b);
4890
4891    This preserves the correctness, since the pointer arithmetics does not
4892    overflow.  More precisely:
4893
4894    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4895       overflow in computing it or the values of p.
4896    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4897       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4898
4899 static bool
4900 iv_elimination_compare_lt (struct ivopts_data *data,
4901                            struct iv_cand *cand, enum tree_code *comp_p,
4902                            struct tree_niter_desc *niter)
4903 {
4904   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4905   struct aff_tree nit, tmpa, tmpb;
4906   enum tree_code comp;
4907   HOST_WIDE_INT step;
4908
4909   /* We need to know that the candidate induction variable does not overflow.
4910      While more complex analysis may be used to prove this, for now just
4911      check that the variable appears in the original program and that it
4912      is computed in a type that guarantees no overflows.  */
4913   cand_type = TREE_TYPE (cand->iv->base);
4914   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4915     return false;
4916
4917   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4918      the calculation of the BOUND could overflow, making the comparison
4919      invalid.  */
4920   if (!data->loop_single_exit_p)
4921     return false;
4922
4923   /* We need to be able to decide whether candidate is increasing or decreasing
4924      in order to choose the right comparison operator.  */
4925   if (!cst_and_fits_in_hwi (cand->iv->step))
4926     return false;
4927   step = int_cst_value (cand->iv->step);
4928
4929   /* Check that the number of iterations matches the expected pattern:
4930      a + 1 > b ? 0 : b - a - 1.  */
4931   mbz = niter->may_be_zero;
4932   if (TREE_CODE (mbz) == GT_EXPR)
4933     {
4934       /* Handle a + 1 > b.  */
4935       tree op0 = TREE_OPERAND (mbz, 0);
4936       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4937         {
4938           a = TREE_OPERAND (op0, 0);
4939           b = TREE_OPERAND (mbz, 1);
4940         }
4941       else
4942         return false;
4943     }
4944   else if (TREE_CODE (mbz) == LT_EXPR)
4945     {
4946       tree op1 = TREE_OPERAND (mbz, 1);
4947
4948       /* Handle b < a + 1.  */
4949       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4950         {
4951           a = TREE_OPERAND (op1, 0);
4952           b = TREE_OPERAND (mbz, 0);
4953         }
4954       else
4955         return false;
4956     }
4957   else
4958     return false;
4959
4960   /* Expected number of iterations is B - A - 1.  Check that it matches
4961      the actual number, i.e., that B - A - NITER = 1.  */
4962   tree_to_aff_combination (niter->niter, nit_type, &nit);
4963   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4964   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4965   aff_combination_scale (&nit, -1);
4966   aff_combination_scale (&tmpa, -1);
4967   aff_combination_add (&tmpb, &tmpa);
4968   aff_combination_add (&tmpb, &nit);
4969   if (tmpb.n != 0 || tmpb.offset != 1)
4970     return false;
4971
4972   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4973      overflow.  */
4974   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4975                         cand->iv->step,
4976                         fold_convert (TREE_TYPE (cand->iv->step), a));
4977   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4978     return false;
4979
4980   /* Determine the new comparison operator.  */
4981   comp = step < 0 ? GT_EXPR : LT_EXPR;
4982   if (*comp_p == NE_EXPR)
4983     *comp_p = comp;
4984   else if (*comp_p == EQ_EXPR)
4985     *comp_p = invert_tree_comparison (comp, false);
4986   else
4987     gcc_unreachable ();
4988
4989   return true;
4990 }
4991
4992 /* Check whether it is possible to express the condition in USE by comparison
4993    of candidate CAND.  If so, store the value compared with to BOUND, and the
4994    comparison operator to COMP.  */
4995
4996 static bool
4997 may_eliminate_iv (struct ivopts_data *data,
4998                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4999                   enum tree_code *comp)
5000 {
5001   basic_block ex_bb;
5002   edge exit;
5003   tree period;
5004   struct loop *loop = data->current_loop;
5005   aff_tree bnd;
5006   struct tree_niter_desc *desc = NULL;
5007
5008   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5009     return false;
5010
5011   /* For now works only for exits that dominate the loop latch.
5012      TODO: extend to other conditions inside loop body.  */
5013   ex_bb = gimple_bb (use->stmt);
5014   if (use->stmt != last_stmt (ex_bb)
5015       || gimple_code (use->stmt) != GIMPLE_COND
5016       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5017     return false;
5018
5019   exit = EDGE_SUCC (ex_bb, 0);
5020   if (flow_bb_inside_loop_p (loop, exit->dest))
5021     exit = EDGE_SUCC (ex_bb, 1);
5022   if (flow_bb_inside_loop_p (loop, exit->dest))
5023     return false;
5024
5025   desc = niter_for_exit (data, exit);
5026   if (!desc)
5027     return false;
5028
5029   /* Determine whether we can use the variable to test the exit condition.
5030      This is the case iff the period of the induction variable is greater
5031      than the number of iterations for which the exit condition is true.  */
5032   period = iv_period (cand->iv);
5033
5034   /* If the number of iterations is constant, compare against it directly.  */
5035   if (TREE_CODE (desc->niter) == INTEGER_CST)
5036     {
5037       /* See cand_value_at.  */
5038       if (stmt_after_increment (loop, cand, use->stmt))
5039         {
5040           if (!tree_int_cst_lt (desc->niter, period))
5041             return false;
5042         }
5043       else
5044         {
5045           if (tree_int_cst_lt (period, desc->niter))
5046             return false;
5047         }
5048     }
5049
5050   /* If not, and if this is the only possible exit of the loop, see whether
5051      we can get a conservative estimate on the number of iterations of the
5052      entire loop and compare against that instead.  */
5053   else
5054     {
5055       widest_int period_value, max_niter;
5056
5057       max_niter = desc->max;
5058       if (stmt_after_increment (loop, cand, use->stmt))
5059         max_niter += 1;
5060       period_value = wi::to_widest (period);
5061       if (wi::gtu_p (max_niter, period_value))
5062         {
5063           /* See if we can take advantage of inferred loop bound information.  */
5064           if (data->loop_single_exit_p)
5065             {
5066               if (!max_loop_iterations (loop, &max_niter))
5067                 return false;
5068               /* The loop bound is already adjusted by adding 1.  */
5069               if (wi::gtu_p (max_niter, period_value))
5070                 return false;
5071             }
5072           else
5073             return false;
5074         }
5075     }
5076
5077   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5078
5079   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5080                          aff_combination_to_tree (&bnd));
5081   *comp = iv_elimination_compare (data, use);
5082
5083   /* It is unlikely that computing the number of iterations using division
5084      would be more profitable than keeping the original induction variable.  */
5085   if (expression_expensive_p (*bound))
5086     return false;
5087
5088   /* Sometimes, it is possible to handle the situation that the number of
5089      iterations may be zero unless additional assumtions by using <
5090      instead of != in the exit condition.
5091
5092      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5093            base the exit condition on it.  However, that is often too
5094            expensive.  */
5095   if (!integer_zerop (desc->may_be_zero))
5096     return iv_elimination_compare_lt (data, cand, comp, desc);
5097
5098   return true;
5099 }
5100
5101  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5102     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5103
5104 static int
5105 parm_decl_cost (struct ivopts_data *data, tree bound)
5106 {
5107   tree sbound = bound;
5108   STRIP_NOPS (sbound);
5109
5110   if (TREE_CODE (sbound) == SSA_NAME
5111       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5112       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5113       && data->body_includes_call)
5114     return COSTS_N_INSNS (1);
5115
5116   return 0;
5117 }
5118
5119 /* Determines cost of basing replacement of USE on CAND in a condition.  */
5120
5121 static bool
5122 determine_use_iv_cost_condition (struct ivopts_data *data,
5123                                  struct iv_use *use, struct iv_cand *cand)
5124 {
5125   tree bound = NULL_TREE;
5126   struct iv *cmp_iv;
5127   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5128   comp_cost elim_cost, express_cost, cost, bound_cost;
5129   bool ok;
5130   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5131   tree *control_var, *bound_cst;
5132   enum tree_code comp = ERROR_MARK;
5133
5134   /* Only consider real candidates.  */
5135   if (!cand->iv)
5136     {
5137       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5138                        ERROR_MARK, -1);
5139       return false;
5140     }
5141
5142   /* Try iv elimination.  */
5143   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5144     {
5145       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5146       if (elim_cost.cost == 0)
5147         elim_cost.cost = parm_decl_cost (data, bound);
5148       else if (TREE_CODE (bound) == INTEGER_CST)
5149         elim_cost.cost = 0;
5150       /* If we replace a loop condition 'i < n' with 'p < base + n',
5151          depends_on_elim will have 'base' and 'n' set, which implies
5152          that both 'base' and 'n' will be live during the loop.  More likely,
5153          'base + n' will be loop invariant, resulting in only one live value
5154          during the loop.  So in that case we clear depends_on_elim and set
5155         elim_inv_expr_id instead.  */
5156       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5157         {
5158           elim_inv_expr_id = get_expr_id (data, bound);
5159           bitmap_clear (depends_on_elim);
5160         }
5161       /* The bound is a loop invariant, so it will be only computed
5162          once.  */
5163       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5164     }
5165   else
5166     elim_cost = infinite_cost;
5167
5168   /* Try expressing the original giv.  If it is compared with an invariant,
5169      note that we cannot get rid of it.  */
5170   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5171                               NULL, &cmp_iv);
5172   gcc_assert (ok);
5173
5174   /* When the condition is a comparison of the candidate IV against
5175      zero, prefer this IV.
5176
5177      TODO: The constant that we're subtracting from the cost should
5178      be target-dependent.  This information should be added to the
5179      target costs for each backend.  */
5180   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5181       && integer_zerop (*bound_cst)
5182       && (operand_equal_p (*control_var, cand->var_after, 0)
5183           || operand_equal_p (*control_var, cand->var_before, 0)))
5184     elim_cost.cost -= 1;
5185
5186   express_cost = get_computation_cost (data, use, cand, false,
5187                                        &depends_on_express, NULL,
5188                                        &express_inv_expr_id);
5189   fd_ivopts_data = data;
5190   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5191
5192   /* Count the cost of the original bound as well.  */
5193   bound_cost = force_var_cost (data, *bound_cst, NULL);
5194   if (bound_cost.cost == 0)
5195     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5196   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5197     bound_cost.cost = 0;
5198   express_cost.cost += bound_cost.cost;
5199
5200   /* Choose the better approach, preferring the eliminated IV. */
5201   if (compare_costs (elim_cost, express_cost) <= 0)
5202     {
5203       cost = elim_cost;
5204       depends_on = depends_on_elim;
5205       depends_on_elim = NULL;
5206       inv_expr_id = elim_inv_expr_id;
5207     }
5208   else
5209     {
5210       cost = express_cost;
5211       depends_on = depends_on_express;
5212       depends_on_express = NULL;
5213       bound = NULL_TREE;
5214       comp = ERROR_MARK;
5215       inv_expr_id = express_inv_expr_id;
5216     }
5217
5218   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5219
5220   if (depends_on_elim)
5221     BITMAP_FREE (depends_on_elim);
5222   if (depends_on_express)
5223     BITMAP_FREE (depends_on_express);
5224
5225   return !infinite_cost_p (cost);
5226 }
5227
5228 /* Determines cost of basing replacement of USE on CAND.  Returns false
5229    if USE cannot be based on CAND.  */
5230
5231 static bool
5232 determine_use_iv_cost (struct ivopts_data *data,
5233                        struct iv_use *use, struct iv_cand *cand)
5234 {
5235   switch (use->type)
5236     {
5237     case USE_NONLINEAR_EXPR:
5238       return determine_use_iv_cost_generic (data, use, cand);
5239
5240     case USE_ADDRESS:
5241       return determine_use_iv_cost_address (data, use, cand);
5242
5243     case USE_COMPARE:
5244       return determine_use_iv_cost_condition (data, use, cand);
5245
5246     default:
5247       gcc_unreachable ();
5248     }
5249 }
5250
5251 /* Return true if get_computation_cost indicates that autoincrement is
5252    a possibility for the pair of USE and CAND, false otherwise.  */
5253
5254 static bool
5255 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5256                            struct iv_cand *cand)
5257 {
5258   bitmap depends_on;
5259   bool can_autoinc;
5260   comp_cost cost;
5261
5262   if (use->type != USE_ADDRESS)
5263     return false;
5264
5265   cost = get_computation_cost (data, use, cand, true, &depends_on,
5266                                &can_autoinc, NULL);
5267
5268   BITMAP_FREE (depends_on);
5269
5270   return !infinite_cost_p (cost) && can_autoinc;
5271 }
5272
5273 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5274    use that allows autoincrement, and set their AINC_USE if possible.  */
5275
5276 static void
5277 set_autoinc_for_original_candidates (struct ivopts_data *data)
5278 {
5279   unsigned i, j;
5280
5281   for (i = 0; i < n_iv_cands (data); i++)
5282     {
5283       struct iv_cand *cand = iv_cand (data, i);
5284       struct iv_use *closest_before = NULL;
5285       struct iv_use *closest_after = NULL;
5286       if (cand->pos != IP_ORIGINAL)
5287         continue;
5288
5289       for (j = 0; j < n_iv_uses (data); j++)
5290         {
5291           struct iv_use *use = iv_use (data, j);
5292           unsigned uid = gimple_uid (use->stmt);
5293
5294           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5295             continue;
5296
5297           if (uid < gimple_uid (cand->incremented_at)
5298               && (closest_before == NULL
5299                   || uid > gimple_uid (closest_before->stmt)))
5300             closest_before = use;
5301
5302           if (uid > gimple_uid (cand->incremented_at)
5303               && (closest_after == NULL
5304                   || uid < gimple_uid (closest_after->stmt)))
5305             closest_after = use;
5306         }
5307
5308       if (closest_before != NULL
5309           && autoinc_possible_for_pair (data, closest_before, cand))
5310         cand->ainc_use = closest_before;
5311       else if (closest_after != NULL
5312                && autoinc_possible_for_pair (data, closest_after, cand))
5313         cand->ainc_use = closest_after;
5314     }
5315 }
5316
5317 /* Finds the candidates for the induction variables.  */
5318
5319 static void
5320 find_iv_candidates (struct ivopts_data *data)
5321 {
5322   /* Add commonly used ivs.  */
5323   add_standard_iv_candidates (data);
5324
5325   /* Add old induction variables.  */
5326   add_iv_candidate_for_bivs (data);
5327
5328   /* Add induction variables derived from uses.  */
5329   add_iv_candidate_for_uses (data);
5330
5331   set_autoinc_for_original_candidates (data);
5332
5333   /* Record the important candidates.  */
5334   record_important_candidates (data);
5335 }
5336
5337 /* Determines costs of basing the use of the iv on an iv candidate.  */
5338
5339 static void
5340 determine_use_iv_costs (struct ivopts_data *data)
5341 {
5342   unsigned i, j;
5343   struct iv_use *use;
5344   struct iv_cand *cand;
5345   bitmap to_clear = BITMAP_ALLOC (NULL);
5346
5347   alloc_use_cost_map (data);
5348
5349   for (i = 0; i < n_iv_uses (data); i++)
5350     {
5351       use = iv_use (data, i);
5352
5353       if (data->consider_all_candidates)
5354         {
5355           for (j = 0; j < n_iv_cands (data); j++)
5356             {
5357               cand = iv_cand (data, j);
5358               determine_use_iv_cost (data, use, cand);
5359             }
5360         }
5361       else
5362         {
5363           bitmap_iterator bi;
5364
5365           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5366             {
5367               cand = iv_cand (data, j);
5368               if (!determine_use_iv_cost (data, use, cand))
5369                 bitmap_set_bit (to_clear, j);
5370             }
5371
5372           /* Remove the candidates for that the cost is infinite from
5373              the list of related candidates.  */
5374           bitmap_and_compl_into (use->related_cands, to_clear);
5375           bitmap_clear (to_clear);
5376         }
5377     }
5378
5379   BITMAP_FREE (to_clear);
5380
5381   if (dump_file && (dump_flags & TDF_DETAILS))
5382     {
5383       fprintf (dump_file, "Use-candidate costs:\n");
5384
5385       for (i = 0; i < n_iv_uses (data); i++)
5386         {
5387           use = iv_use (data, i);
5388
5389           fprintf (dump_file, "Use %d:\n", i);
5390           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5391           for (j = 0; j < use->n_map_members; j++)
5392             {
5393               if (!use->cost_map[j].cand
5394                   || infinite_cost_p (use->cost_map[j].cost))
5395                 continue;
5396
5397               fprintf (dump_file, "  %d\t%d\t%d\t",
5398                        use->cost_map[j].cand->id,
5399                        use->cost_map[j].cost.cost,
5400                        use->cost_map[j].cost.complexity);
5401               if (use->cost_map[j].depends_on)
5402                 bitmap_print (dump_file,
5403                               use->cost_map[j].depends_on, "","");
5404               if (use->cost_map[j].inv_expr_id != -1)
5405                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5406               fprintf (dump_file, "\n");
5407             }
5408
5409           fprintf (dump_file, "\n");
5410         }
5411       fprintf (dump_file, "\n");
5412     }
5413 }
5414
5415 /* Determines cost of the candidate CAND.  */
5416
5417 static void
5418 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5419 {
5420   comp_cost cost_base;
5421   unsigned cost, cost_step;
5422   tree base;
5423
5424   if (!cand->iv)
5425     {
5426       cand->cost = 0;
5427       return;
5428     }
5429
5430   /* There are two costs associated with the candidate -- its increment
5431      and its initialization.  The second is almost negligible for any loop
5432      that rolls enough, so we take it just very little into account.  */
5433
5434   base = cand->iv->base;
5435   cost_base = force_var_cost (data, base, NULL);
5436   /* It will be exceptional that the iv register happens to be initialized with
5437      the proper value at no cost.  In general, there will at least be a regcopy
5438      or a const set.  */
5439   if (cost_base.cost == 0)
5440     cost_base.cost = COSTS_N_INSNS (1);
5441   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5442
5443   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5444
5445   /* Prefer the original ivs unless we may gain something by replacing it.
5446      The reason is to make debugging simpler; so this is not relevant for
5447      artificial ivs created by other optimization passes.  */
5448   if (cand->pos != IP_ORIGINAL
5449       || !SSA_NAME_VAR (cand->var_before)
5450       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5451     cost++;
5452
5453   /* Prefer not to insert statements into latch unless there are some
5454      already (so that we do not create unnecessary jumps).  */
5455   if (cand->pos == IP_END
5456       && empty_block_p (ip_end_pos (data->current_loop)))
5457     cost++;
5458
5459   cand->cost = cost;
5460   cand->cost_step = cost_step;
5461 }
5462
5463 /* Determines costs of computation of the candidates.  */
5464
5465 static void
5466 determine_iv_costs (struct ivopts_data *data)
5467 {
5468   unsigned i;
5469
5470   if (dump_file && (dump_flags & TDF_DETAILS))
5471     {
5472       fprintf (dump_file, "Candidate costs:\n");
5473       fprintf (dump_file, "  cand\tcost\n");
5474     }
5475
5476   for (i = 0; i < n_iv_cands (data); i++)
5477     {
5478       struct iv_cand *cand = iv_cand (data, i);
5479
5480       determine_iv_cost (data, cand);
5481
5482       if (dump_file && (dump_flags & TDF_DETAILS))
5483         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5484     }
5485
5486   if (dump_file && (dump_flags & TDF_DETAILS))
5487     fprintf (dump_file, "\n");
5488 }
5489
5490 /* Calculates cost for having SIZE induction variables.  */
5491
5492 static unsigned
5493 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5494 {
5495   /* We add size to the cost, so that we prefer eliminating ivs
5496      if possible.  */
5497   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5498                                             data->body_includes_call);
5499 }
5500
5501 /* For each size of the induction variable set determine the penalty.  */
5502
5503 static void
5504 determine_set_costs (struct ivopts_data *data)
5505 {
5506   unsigned j, n;
5507   gphi *phi;
5508   gphi_iterator psi;
5509   tree op;
5510   struct loop *loop = data->current_loop;
5511   bitmap_iterator bi;
5512
5513   if (dump_file && (dump_flags & TDF_DETAILS))
5514     {
5515       fprintf (dump_file, "Global costs:\n");
5516       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5517       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5518       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5519       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5520     }
5521
5522   n = 0;
5523   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5524     {
5525       phi = psi.phi ();
5526       op = PHI_RESULT (phi);
5527
5528       if (virtual_operand_p (op))
5529         continue;
5530
5531       if (get_iv (data, op))
5532         continue;
5533
5534       n++;
5535     }
5536
5537   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5538     {
5539       struct version_info *info = ver_info (data, j);
5540
5541       if (info->inv_id && info->has_nonlin_use)
5542         n++;
5543     }
5544
5545   data->regs_used = n;
5546   if (dump_file && (dump_flags & TDF_DETAILS))
5547     fprintf (dump_file, "  regs_used %d\n", n);
5548
5549   if (dump_file && (dump_flags & TDF_DETAILS))
5550     {
5551       fprintf (dump_file, "  cost for size:\n");
5552       fprintf (dump_file, "  ivs\tcost\n");
5553       for (j = 0; j <= 2 * target_avail_regs; j++)
5554         fprintf (dump_file, "  %d\t%d\n", j,
5555                  ivopts_global_cost_for_size (data, j));
5556       fprintf (dump_file, "\n");
5557     }
5558 }
5559
5560 /* Returns true if A is a cheaper cost pair than B.  */
5561
5562 static bool
5563 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5564 {
5565   int cmp;
5566
5567   if (!a)
5568     return false;
5569
5570   if (!b)
5571     return true;
5572
5573   cmp = compare_costs (a->cost, b->cost);
5574   if (cmp < 0)
5575     return true;
5576
5577   if (cmp > 0)
5578     return false;
5579
5580   /* In case the costs are the same, prefer the cheaper candidate.  */
5581   if (a->cand->cost < b->cand->cost)
5582     return true;
5583
5584   return false;
5585 }
5586
5587
5588 /* Returns candidate by that USE is expressed in IVS.  */
5589
5590 static struct cost_pair *
5591 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5592 {
5593   return ivs->cand_for_use[use->id];
5594 }
5595
5596 /* Computes the cost field of IVS structure.  */
5597
5598 static void
5599 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5600 {
5601   comp_cost cost = ivs->cand_use_cost;
5602
5603   cost.cost += ivs->cand_cost;
5604
5605   cost.cost += ivopts_global_cost_for_size (data,
5606                                             ivs->n_regs + ivs->num_used_inv_expr);
5607
5608   ivs->cost = cost;
5609 }
5610
5611 /* Remove invariants in set INVS to set IVS.  */
5612
5613 static void
5614 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5615 {
5616   bitmap_iterator bi;
5617   unsigned iid;
5618
5619   if (!invs)
5620     return;
5621
5622   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5623     {
5624       ivs->n_invariant_uses[iid]--;
5625       if (ivs->n_invariant_uses[iid] == 0)
5626         ivs->n_regs--;
5627     }
5628 }
5629
5630 /* Set USE not to be expressed by any candidate in IVS.  */
5631
5632 static void
5633 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5634                  struct iv_use *use)
5635 {
5636   unsigned uid = use->id, cid;
5637   struct cost_pair *cp;
5638
5639   cp = ivs->cand_for_use[uid];
5640   if (!cp)
5641     return;
5642   cid = cp->cand->id;
5643
5644   ivs->bad_uses++;
5645   ivs->cand_for_use[uid] = NULL;
5646   ivs->n_cand_uses[cid]--;
5647
5648   if (ivs->n_cand_uses[cid] == 0)
5649     {
5650       bitmap_clear_bit (ivs->cands, cid);
5651       /* Do not count the pseudocandidates.  */
5652       if (cp->cand->iv)
5653         ivs->n_regs--;
5654       ivs->n_cands--;
5655       ivs->cand_cost -= cp->cand->cost;
5656
5657       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5658     }
5659
5660   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5661
5662   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5663
5664   if (cp->inv_expr_id != -1)
5665     {
5666       ivs->used_inv_expr[cp->inv_expr_id]--;
5667       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5668         ivs->num_used_inv_expr--;
5669     }
5670   iv_ca_recount_cost (data, ivs);
5671 }
5672
5673 /* Add invariants in set INVS to set IVS.  */
5674
5675 static void
5676 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5677 {
5678   bitmap_iterator bi;
5679   unsigned iid;
5680
5681   if (!invs)
5682     return;
5683
5684   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5685     {
5686       ivs->n_invariant_uses[iid]++;
5687       if (ivs->n_invariant_uses[iid] == 1)
5688         ivs->n_regs++;
5689     }
5690 }
5691
5692 /* Set cost pair for USE in set IVS to CP.  */
5693
5694 static void
5695 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5696               struct iv_use *use, struct cost_pair *cp)
5697 {
5698   unsigned uid = use->id, cid;
5699
5700   if (ivs->cand_for_use[uid] == cp)
5701     return;
5702
5703   if (ivs->cand_for_use[uid])
5704     iv_ca_set_no_cp (data, ivs, use);
5705
5706   if (cp)
5707     {
5708       cid = cp->cand->id;
5709
5710       ivs->bad_uses--;
5711       ivs->cand_for_use[uid] = cp;
5712       ivs->n_cand_uses[cid]++;
5713       if (ivs->n_cand_uses[cid] == 1)
5714         {
5715           bitmap_set_bit (ivs->cands, cid);
5716           /* Do not count the pseudocandidates.  */
5717           if (cp->cand->iv)
5718             ivs->n_regs++;
5719           ivs->n_cands++;
5720           ivs->cand_cost += cp->cand->cost;
5721
5722           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5723         }
5724
5725       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5726       iv_ca_set_add_invariants (ivs, cp->depends_on);
5727
5728       if (cp->inv_expr_id != -1)
5729         {
5730           ivs->used_inv_expr[cp->inv_expr_id]++;
5731           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5732             ivs->num_used_inv_expr++;
5733         }
5734       iv_ca_recount_cost (data, ivs);
5735     }
5736 }
5737
5738 /* Extend set IVS by expressing USE by some of the candidates in it
5739    if possible.  Consider all important candidates if candidates in
5740    set IVS don't give any result.  */
5741
5742 static void
5743 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5744                struct iv_use *use)
5745 {
5746   struct cost_pair *best_cp = NULL, *cp;
5747   bitmap_iterator bi;
5748   unsigned i;
5749   struct iv_cand *cand;
5750
5751   gcc_assert (ivs->upto >= use->id);
5752   ivs->upto++;
5753   ivs->bad_uses++;
5754
5755   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5756     {
5757       cand = iv_cand (data, i);
5758       cp = get_use_iv_cost (data, use, cand);
5759       if (cheaper_cost_pair (cp, best_cp))
5760         best_cp = cp;
5761     }
5762
5763   if (best_cp == NULL)
5764     {
5765       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5766         {
5767           cand = iv_cand (data, i);
5768           cp = get_use_iv_cost (data, use, cand);
5769           if (cheaper_cost_pair (cp, best_cp))
5770             best_cp = cp;
5771         }
5772     }
5773
5774   iv_ca_set_cp (data, ivs, use, best_cp);
5775 }
5776
5777 /* Get cost for assignment IVS.  */
5778
5779 static comp_cost
5780 iv_ca_cost (struct iv_ca *ivs)
5781 {
5782   /* This was a conditional expression but it triggered a bug in
5783      Sun C 5.5.  */
5784   if (ivs->bad_uses)
5785     return infinite_cost;
5786   else
5787     return ivs->cost;
5788 }
5789
5790 /* Returns true if all dependences of CP are among invariants in IVS.  */
5791
5792 static bool
5793 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5794 {
5795   unsigned i;
5796   bitmap_iterator bi;
5797
5798   if (!cp->depends_on)
5799     return true;
5800
5801   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5802     {
5803       if (ivs->n_invariant_uses[i] == 0)
5804         return false;
5805     }
5806
5807   return true;
5808 }
5809
5810 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5811    it before NEXT_CHANGE.  */
5812
5813 static struct iv_ca_delta *
5814 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5815                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5816 {
5817   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5818
5819   change->use = use;
5820   change->old_cp = old_cp;
5821   change->new_cp = new_cp;
5822   change->next_change = next_change;
5823
5824   return change;
5825 }
5826
5827 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5828    are rewritten.  */
5829
5830 static struct iv_ca_delta *
5831 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5832 {
5833   struct iv_ca_delta *last;
5834
5835   if (!l2)
5836     return l1;
5837
5838   if (!l1)
5839     return l2;
5840
5841   for (last = l1; last->next_change; last = last->next_change)
5842     continue;
5843   last->next_change = l2;
5844
5845   return l1;
5846 }
5847
5848 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5849
5850 static struct iv_ca_delta *
5851 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5852 {
5853   struct iv_ca_delta *act, *next, *prev = NULL;
5854
5855   for (act = delta; act; act = next)
5856     {
5857       next = act->next_change;
5858       act->next_change = prev;
5859       prev = act;
5860
5861       std::swap (act->old_cp, act->new_cp);
5862     }
5863
5864   return prev;
5865 }
5866
5867 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5868    reverted instead.  */
5869
5870 static void
5871 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5872                     struct iv_ca_delta *delta, bool forward)
5873 {
5874   struct cost_pair *from, *to;
5875   struct iv_ca_delta *act;
5876
5877   if (!forward)
5878     delta = iv_ca_delta_reverse (delta);
5879
5880   for (act = delta; act; act = act->next_change)
5881     {
5882       from = act->old_cp;
5883       to = act->new_cp;
5884       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5885       iv_ca_set_cp (data, ivs, act->use, to);
5886     }
5887
5888   if (!forward)
5889     iv_ca_delta_reverse (delta);
5890 }
5891
5892 /* Returns true if CAND is used in IVS.  */
5893
5894 static bool
5895 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5896 {
5897   return ivs->n_cand_uses[cand->id] > 0;
5898 }
5899
5900 /* Returns number of induction variable candidates in the set IVS.  */
5901
5902 static unsigned
5903 iv_ca_n_cands (struct iv_ca *ivs)
5904 {
5905   return ivs->n_cands;
5906 }
5907
5908 /* Free the list of changes DELTA.  */
5909
5910 static void
5911 iv_ca_delta_free (struct iv_ca_delta **delta)
5912 {
5913   struct iv_ca_delta *act, *next;
5914
5915   for (act = *delta; act; act = next)
5916     {
5917       next = act->next_change;
5918       free (act);
5919     }
5920
5921   *delta = NULL;
5922 }
5923
5924 /* Allocates new iv candidates assignment.  */
5925
5926 static struct iv_ca *
5927 iv_ca_new (struct ivopts_data *data)
5928 {
5929   struct iv_ca *nw = XNEW (struct iv_ca);
5930
5931   nw->upto = 0;
5932   nw->bad_uses = 0;
5933   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5934   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5935   nw->cands = BITMAP_ALLOC (NULL);
5936   nw->n_cands = 0;
5937   nw->n_regs = 0;
5938   nw->cand_use_cost = no_cost;
5939   nw->cand_cost = 0;
5940   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5941   nw->cost = no_cost;
5942   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5943   nw->num_used_inv_expr = 0;
5944
5945   return nw;
5946 }
5947
5948 /* Free memory occupied by the set IVS.  */
5949
5950 static void
5951 iv_ca_free (struct iv_ca **ivs)
5952 {
5953   free ((*ivs)->cand_for_use);
5954   free ((*ivs)->n_cand_uses);
5955   BITMAP_FREE ((*ivs)->cands);
5956   free ((*ivs)->n_invariant_uses);
5957   free ((*ivs)->used_inv_expr);
5958   free (*ivs);
5959   *ivs = NULL;
5960 }
5961
5962 /* Dumps IVS to FILE.  */
5963
5964 static void
5965 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5966 {
5967   const char *pref = "  invariants ";
5968   unsigned i;
5969   comp_cost cost = iv_ca_cost (ivs);
5970
5971   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5972   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5973            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5974   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5975
5976    for (i = 0; i < ivs->upto; i++)
5977     {
5978       struct iv_use *use = iv_use (data, i);
5979       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5980       if (cp)
5981         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5982                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5983       else
5984         fprintf (file, "   use:%d --> ??\n", use->id);
5985     }
5986
5987   for (i = 1; i <= data->max_inv_id; i++)
5988     if (ivs->n_invariant_uses[i])
5989       {
5990         fprintf (file, "%s%d", pref, i);
5991         pref = ", ";
5992       }
5993   fprintf (file, "\n\n");
5994 }
5995
5996 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5997    new set, and store differences in DELTA.  Number of induction variables
5998    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5999    the function will try to find a solution with mimimal iv candidates.  */
6000
6001 static comp_cost
6002 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6003               struct iv_cand *cand, struct iv_ca_delta **delta,
6004               unsigned *n_ivs, bool min_ncand)
6005 {
6006   unsigned i;
6007   comp_cost cost;
6008   struct iv_use *use;
6009   struct cost_pair *old_cp, *new_cp;
6010
6011   *delta = NULL;
6012   for (i = 0; i < ivs->upto; i++)
6013     {
6014       use = iv_use (data, i);
6015       old_cp = iv_ca_cand_for_use (ivs, use);
6016
6017       if (old_cp
6018           && old_cp->cand == cand)
6019         continue;
6020
6021       new_cp = get_use_iv_cost (data, use, cand);
6022       if (!new_cp)
6023         continue;
6024
6025       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6026         continue;
6027
6028       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6029         continue;
6030
6031       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6032     }
6033
6034   iv_ca_delta_commit (data, ivs, *delta, true);
6035   cost = iv_ca_cost (ivs);
6036   if (n_ivs)
6037     *n_ivs = iv_ca_n_cands (ivs);
6038   iv_ca_delta_commit (data, ivs, *delta, false);
6039
6040   return cost;
6041 }
6042
6043 /* Try narrowing set IVS by removing CAND.  Return the cost of
6044    the new set and store the differences in DELTA.  START is
6045    the candidate with which we start narrowing.  */
6046
6047 static comp_cost
6048 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6049               struct iv_cand *cand, struct iv_cand *start,
6050               struct iv_ca_delta **delta)
6051 {
6052   unsigned i, ci;
6053   struct iv_use *use;
6054   struct cost_pair *old_cp, *new_cp, *cp;
6055   bitmap_iterator bi;
6056   struct iv_cand *cnd;
6057   comp_cost cost, best_cost, acost;
6058
6059   *delta = NULL;
6060   for (i = 0; i < n_iv_uses (data); i++)
6061     {
6062       use = iv_use (data, i);
6063
6064       old_cp = iv_ca_cand_for_use (ivs, use);
6065       if (old_cp->cand != cand)
6066         continue;
6067
6068       best_cost = iv_ca_cost (ivs);
6069       /* Start narrowing with START.  */
6070       new_cp = get_use_iv_cost (data, use, start);
6071
6072       if (data->consider_all_candidates)
6073         {
6074           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6075             {
6076               if (ci == cand->id || (start && ci == start->id))
6077                 continue;
6078
6079               cnd = iv_cand (data, ci);
6080
6081               cp = get_use_iv_cost (data, use, cnd);
6082               if (!cp)
6083                 continue;
6084
6085               iv_ca_set_cp (data, ivs, use, cp);
6086               acost = iv_ca_cost (ivs);
6087
6088               if (compare_costs (acost, best_cost) < 0)
6089                 {
6090                   best_cost = acost;
6091                   new_cp = cp;
6092                 }
6093             }
6094         }
6095       else
6096         {
6097           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6098             {
6099               if (ci == cand->id || (start && ci == start->id))
6100                 continue;
6101
6102               cnd = iv_cand (data, ci);
6103
6104               cp = get_use_iv_cost (data, use, cnd);
6105               if (!cp)
6106                 continue;
6107
6108               iv_ca_set_cp (data, ivs, use, cp);
6109               acost = iv_ca_cost (ivs);
6110
6111               if (compare_costs (acost, best_cost) < 0)
6112                 {
6113                   best_cost = acost;
6114                   new_cp = cp;
6115                 }
6116             }
6117         }
6118       /* Restore to old cp for use.  */
6119       iv_ca_set_cp (data, ivs, use, old_cp);
6120
6121       if (!new_cp)
6122         {
6123           iv_ca_delta_free (delta);
6124           return infinite_cost;
6125         }
6126
6127       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6128     }
6129
6130   iv_ca_delta_commit (data, ivs, *delta, true);
6131   cost = iv_ca_cost (ivs);
6132   iv_ca_delta_commit (data, ivs, *delta, false);
6133
6134   return cost;
6135 }
6136
6137 /* Try optimizing the set of candidates IVS by removing candidates different
6138    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6139    differences in DELTA.  */
6140
6141 static comp_cost
6142 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6143              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6144 {
6145   bitmap_iterator bi;
6146   struct iv_ca_delta *act_delta, *best_delta;
6147   unsigned i;
6148   comp_cost best_cost, acost;
6149   struct iv_cand *cand;
6150
6151   best_delta = NULL;
6152   best_cost = iv_ca_cost (ivs);
6153
6154   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6155     {
6156       cand = iv_cand (data, i);
6157
6158       if (cand == except_cand)
6159         continue;
6160
6161       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6162
6163       if (compare_costs (acost, best_cost) < 0)
6164         {
6165           best_cost = acost;
6166           iv_ca_delta_free (&best_delta);
6167           best_delta = act_delta;
6168         }
6169       else
6170         iv_ca_delta_free (&act_delta);
6171     }
6172
6173   if (!best_delta)
6174     {
6175       *delta = NULL;
6176       return best_cost;
6177     }
6178
6179   /* Recurse to possibly remove other unnecessary ivs.  */
6180   iv_ca_delta_commit (data, ivs, best_delta, true);
6181   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6182   iv_ca_delta_commit (data, ivs, best_delta, false);
6183   *delta = iv_ca_delta_join (best_delta, *delta);
6184   return best_cost;
6185 }
6186
6187 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6188    cheaper local cost for USE than BEST_CP.  Return pointer to
6189    the corresponding cost_pair, otherwise just return BEST_CP.  */
6190
6191 static struct cost_pair*
6192 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6193                         unsigned int cand_idx, struct iv_cand *old_cand,
6194                         struct cost_pair *best_cp)
6195 {
6196   struct iv_cand *cand;
6197   struct cost_pair *cp;
6198
6199   gcc_assert (old_cand != NULL && best_cp != NULL);
6200   if (cand_idx == old_cand->id)
6201     return best_cp;
6202
6203   cand = iv_cand (data, cand_idx);
6204   cp = get_use_iv_cost (data, use, cand);
6205   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6206     return cp;
6207
6208   return best_cp;
6209 }
6210
6211 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6212    which are used by more than one iv uses.  For each of those candidates,
6213    this function tries to represent iv uses under that candidate using
6214    other ones with lower local cost, then tries to prune the new set.
6215    If the new set has lower cost, It returns the new cost after recording
6216    candidate replacement in list DELTA.  */
6217
6218 static comp_cost
6219 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6220                struct iv_ca_delta **delta)
6221 {
6222   bitmap_iterator bi, bj;
6223   unsigned int i, j, k;
6224   struct iv_use *use;
6225   struct iv_cand *cand;
6226   comp_cost orig_cost, acost;
6227   struct iv_ca_delta *act_delta, *tmp_delta;
6228   struct cost_pair *old_cp, *best_cp = NULL;
6229
6230   *delta = NULL;
6231   orig_cost = iv_ca_cost (ivs);
6232
6233   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6234     {
6235       if (ivs->n_cand_uses[i] == 1
6236           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6237         continue;
6238
6239       cand = iv_cand (data, i);
6240
6241       act_delta = NULL;
6242       /*  Represent uses under current candidate using other ones with
6243           lower local cost.  */
6244       for (j = 0; j < ivs->upto; j++)
6245         {
6246           use = iv_use (data, j);
6247           old_cp = iv_ca_cand_for_use (ivs, use);
6248
6249           if (old_cp->cand != cand)
6250             continue;
6251
6252           best_cp = old_cp;
6253           if (data->consider_all_candidates)
6254             for (k = 0; k < n_iv_cands (data); k++)
6255               best_cp = cheaper_cost_with_cand (data, use, k,
6256                                                 old_cp->cand, best_cp);
6257           else
6258             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6259               best_cp = cheaper_cost_with_cand (data, use, k,
6260                                                 old_cp->cand, best_cp);
6261
6262           if (best_cp == old_cp)
6263             continue;
6264
6265           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6266         }
6267       /* No need for further prune.  */
6268       if (!act_delta)
6269         continue;
6270
6271       /* Prune the new candidate set.  */
6272       iv_ca_delta_commit (data, ivs, act_delta, true);
6273       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6274       iv_ca_delta_commit (data, ivs, act_delta, false);
6275       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6276
6277       if (compare_costs (acost, orig_cost) < 0)
6278         {
6279           *delta = act_delta;
6280           return acost;
6281         }
6282       else
6283         iv_ca_delta_free (&act_delta);
6284     }
6285
6286   return orig_cost;
6287 }
6288
6289 /* Tries to extend the sets IVS in the best possible way in order
6290    to express the USE.  If ORIGINALP is true, prefer candidates from
6291    the original set of IVs, otherwise favor important candidates not
6292    based on any memory object.  */
6293
6294 static bool
6295 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6296                   struct iv_use *use, bool originalp)
6297 {
6298   comp_cost best_cost, act_cost;
6299   unsigned i;
6300   bitmap_iterator bi;
6301   struct iv_cand *cand;
6302   struct iv_ca_delta *best_delta = NULL, *act_delta;
6303   struct cost_pair *cp;
6304
6305   iv_ca_add_use (data, ivs, use);
6306   best_cost = iv_ca_cost (ivs);
6307   cp = iv_ca_cand_for_use (ivs, use);
6308   if (cp)
6309     {
6310       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6311       iv_ca_set_no_cp (data, ivs, use);
6312     }
6313
6314   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6315      first try important candidates not based on any memory object.  Only if
6316      this fails, try the specific ones.  Rationale -- in loops with many
6317      variables the best choice often is to use just one generic biv.  If we
6318      added here many ivs specific to the uses, the optimization algorithm later
6319      would be likely to get stuck in a local minimum, thus causing us to create
6320      too many ivs.  The approach from few ivs to more seems more likely to be
6321      successful -- starting from few ivs, replacing an expensive use by a
6322      specific iv should always be a win.  */
6323   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6324     {
6325       cand = iv_cand (data, i);
6326
6327       if (originalp && cand->pos !=IP_ORIGINAL)
6328         continue;
6329
6330       if (!originalp && cand->iv->base_object != NULL_TREE)
6331         continue;
6332
6333       if (iv_ca_cand_used_p (ivs, cand))
6334         continue;
6335
6336       cp = get_use_iv_cost (data, use, cand);
6337       if (!cp)
6338         continue;
6339
6340       iv_ca_set_cp (data, ivs, use, cp);
6341       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6342                                true);
6343       iv_ca_set_no_cp (data, ivs, use);
6344       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6345
6346       if (compare_costs (act_cost, best_cost) < 0)
6347         {
6348           best_cost = act_cost;
6349
6350           iv_ca_delta_free (&best_delta);
6351           best_delta = act_delta;
6352         }
6353       else
6354         iv_ca_delta_free (&act_delta);
6355     }
6356
6357   if (infinite_cost_p (best_cost))
6358     {
6359       for (i = 0; i < use->n_map_members; i++)
6360         {
6361           cp = use->cost_map + i;
6362           cand = cp->cand;
6363           if (!cand)
6364             continue;
6365
6366           /* Already tried this.  */
6367           if (cand->important)
6368             {
6369               if (originalp && cand->pos == IP_ORIGINAL)
6370                 continue;
6371               if (!originalp && cand->iv->base_object == NULL_TREE)
6372                 continue;
6373             }
6374
6375           if (iv_ca_cand_used_p (ivs, cand))
6376             continue;
6377
6378           act_delta = NULL;
6379           iv_ca_set_cp (data, ivs, use, cp);
6380           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6381           iv_ca_set_no_cp (data, ivs, use);
6382           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6383                                        cp, act_delta);
6384
6385           if (compare_costs (act_cost, best_cost) < 0)
6386             {
6387               best_cost = act_cost;
6388
6389               if (best_delta)
6390                 iv_ca_delta_free (&best_delta);
6391               best_delta = act_delta;
6392             }
6393           else
6394             iv_ca_delta_free (&act_delta);
6395         }
6396     }
6397
6398   iv_ca_delta_commit (data, ivs, best_delta, true);
6399   iv_ca_delta_free (&best_delta);
6400
6401   return !infinite_cost_p (best_cost);
6402 }
6403
6404 /* Finds an initial assignment of candidates to uses.  */
6405
6406 static struct iv_ca *
6407 get_initial_solution (struct ivopts_data *data, bool originalp)
6408 {
6409   struct iv_ca *ivs = iv_ca_new (data);
6410   unsigned i;
6411
6412   for (i = 0; i < n_iv_uses (data); i++)
6413     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6414       {
6415         iv_ca_free (&ivs);
6416         return NULL;
6417       }
6418
6419   return ivs;
6420 }
6421
6422 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6423    points to a bool variable, this function tries to break local
6424    optimal fixed-point by replacing candidates in IVS if it's true.  */
6425
6426 static bool
6427 try_improve_iv_set (struct ivopts_data *data,
6428                     struct iv_ca *ivs, bool *try_replace_p)
6429 {
6430   unsigned i, n_ivs;
6431   comp_cost acost, best_cost = iv_ca_cost (ivs);
6432   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6433   struct iv_cand *cand;
6434
6435   /* Try extending the set of induction variables by one.  */
6436   for (i = 0; i < n_iv_cands (data); i++)
6437     {
6438       cand = iv_cand (data, i);
6439
6440       if (iv_ca_cand_used_p (ivs, cand))
6441         continue;
6442
6443       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6444       if (!act_delta)
6445         continue;
6446
6447       /* If we successfully added the candidate and the set is small enough,
6448          try optimizing it by removing other candidates.  */
6449       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6450         {
6451           iv_ca_delta_commit (data, ivs, act_delta, true);
6452           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6453           iv_ca_delta_commit (data, ivs, act_delta, false);
6454           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6455         }
6456
6457       if (compare_costs (acost, best_cost) < 0)
6458         {
6459           best_cost = acost;
6460           iv_ca_delta_free (&best_delta);
6461           best_delta = act_delta;
6462         }
6463       else
6464         iv_ca_delta_free (&act_delta);
6465     }
6466
6467   if (!best_delta)
6468     {
6469       /* Try removing the candidates from the set instead.  */
6470       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6471
6472       if (!best_delta && *try_replace_p)
6473         {
6474           *try_replace_p = false;
6475           /* So far candidate selecting algorithm tends to choose fewer IVs
6476              so that it can handle cases in which loops have many variables
6477              but the best choice is often to use only one general biv.  One
6478              weakness is it can't handle opposite cases, in which different
6479              candidates should be chosen with respect to each use.  To solve
6480              the problem, we replace candidates in a manner described by the
6481              comments of iv_ca_replace, thus give general algorithm a chance
6482              to break local optimal fixed-point in these cases.  */
6483           best_cost = iv_ca_replace (data, ivs, &best_delta);
6484         }
6485
6486       if (!best_delta)
6487         return false;
6488     }
6489
6490   iv_ca_delta_commit (data, ivs, best_delta, true);
6491   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6492   iv_ca_delta_free (&best_delta);
6493   return true;
6494 }
6495
6496 /* Attempts to find the optimal set of induction variables.  We do simple
6497    greedy heuristic -- we try to replace at most one candidate in the selected
6498    solution and remove the unused ivs while this improves the cost.  */
6499
6500 static struct iv_ca *
6501 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6502 {
6503   struct iv_ca *set;
6504   bool try_replace_p = true;
6505
6506   /* Get the initial solution.  */
6507   set = get_initial_solution (data, originalp);
6508   if (!set)
6509     {
6510       if (dump_file && (dump_flags & TDF_DETAILS))
6511         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6512       return NULL;
6513     }
6514
6515   if (dump_file && (dump_flags & TDF_DETAILS))
6516     {
6517       fprintf (dump_file, "Initial set of candidates:\n");
6518       iv_ca_dump (data, dump_file, set);
6519     }
6520
6521   while (try_improve_iv_set (data, set, &try_replace_p))
6522     {
6523       if (dump_file && (dump_flags & TDF_DETAILS))
6524         {
6525           fprintf (dump_file, "Improved to:\n");
6526           iv_ca_dump (data, dump_file, set);
6527         }
6528     }
6529
6530   return set;
6531 }
6532
6533 static struct iv_ca *
6534 find_optimal_iv_set (struct ivopts_data *data)
6535 {
6536   unsigned i;
6537   struct iv_ca *set, *origset;
6538   struct iv_use *use;
6539   comp_cost cost, origcost;
6540
6541   /* Determine the cost based on a strategy that starts with original IVs,
6542      and try again using a strategy that prefers candidates not based
6543      on any IVs.  */
6544   origset = find_optimal_iv_set_1 (data, true);
6545   set = find_optimal_iv_set_1 (data, false);
6546
6547   if (!origset && !set)
6548     return NULL;
6549
6550   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6551   cost = set ? iv_ca_cost (set) : infinite_cost;
6552
6553   if (dump_file && (dump_flags & TDF_DETAILS))
6554     {
6555       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6556                origcost.cost, origcost.complexity);
6557       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6558                cost.cost, cost.complexity);
6559     }
6560
6561   /* Choose the one with the best cost.  */
6562   if (compare_costs (origcost, cost) <= 0)
6563     {
6564       if (set)
6565         iv_ca_free (&set);
6566       set = origset;
6567     }
6568   else if (origset)
6569     iv_ca_free (&origset);
6570
6571   for (i = 0; i < n_iv_uses (data); i++)
6572     {
6573       use = iv_use (data, i);
6574       use->selected = iv_ca_cand_for_use (set, use)->cand;
6575     }
6576
6577   return set;
6578 }
6579
6580 /* Creates a new induction variable corresponding to CAND.  */
6581
6582 static void
6583 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6584 {
6585   gimple_stmt_iterator incr_pos;
6586   tree base;
6587   bool after = false;
6588
6589   if (!cand->iv)
6590     return;
6591
6592   switch (cand->pos)
6593     {
6594     case IP_NORMAL:
6595       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6596       break;
6597
6598     case IP_END:
6599       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6600       after = true;
6601       break;
6602
6603     case IP_AFTER_USE:
6604       after = true;
6605       /* fall through */
6606     case IP_BEFORE_USE:
6607       incr_pos = gsi_for_stmt (cand->incremented_at);
6608       break;
6609
6610     case IP_ORIGINAL:
6611       /* Mark that the iv is preserved.  */
6612       name_info (data, cand->var_before)->preserve_biv = true;
6613       name_info (data, cand->var_after)->preserve_biv = true;
6614
6615       /* Rewrite the increment so that it uses var_before directly.  */
6616       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6617       return;
6618     }
6619
6620   gimple_add_tmp_var (cand->var_before);
6621
6622   base = unshare_expr (cand->iv->base);
6623
6624   create_iv (base, unshare_expr (cand->iv->step),
6625              cand->var_before, data->current_loop,
6626              &incr_pos, after, &cand->var_before, &cand->var_after);
6627 }
6628
6629 /* Creates new induction variables described in SET.  */
6630
6631 static void
6632 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6633 {
6634   unsigned i;
6635   struct iv_cand *cand;
6636   bitmap_iterator bi;
6637
6638   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6639     {
6640       cand = iv_cand (data, i);
6641       create_new_iv (data, cand);
6642     }
6643
6644   if (dump_file && (dump_flags & TDF_DETAILS))
6645     {
6646       fprintf (dump_file, "Selected IV set for loop %d",
6647                data->current_loop->num);
6648       if (data->loop_loc != UNKNOWN_LOCATION)
6649         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6650                  LOCATION_LINE (data->loop_loc));
6651       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6652       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6653         {
6654           cand = iv_cand (data, i);
6655           dump_cand (dump_file, cand);
6656         }
6657       fprintf (dump_file, "\n");
6658     }
6659 }
6660
6661 /* Rewrites USE (definition of iv used in a nonlinear expression)
6662    using candidate CAND.  */
6663
6664 static void
6665 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6666                             struct iv_use *use, struct iv_cand *cand)
6667 {
6668   tree comp;
6669   tree op, tgt;
6670   gassign *ass;
6671   gimple_stmt_iterator bsi;
6672
6673   /* An important special case -- if we are asked to express value of
6674      the original iv by itself, just exit; there is no need to
6675      introduce a new computation (that might also need casting the
6676      variable to unsigned and back).  */
6677   if (cand->pos == IP_ORIGINAL
6678       && cand->incremented_at == use->stmt)
6679     {
6680       enum tree_code stmt_code;
6681
6682       gcc_assert (is_gimple_assign (use->stmt));
6683       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6684
6685       /* Check whether we may leave the computation unchanged.
6686          This is the case only if it does not rely on other
6687          computations in the loop -- otherwise, the computation
6688          we rely upon may be removed in remove_unused_ivs,
6689          thus leading to ICE.  */
6690       stmt_code = gimple_assign_rhs_code (use->stmt);
6691       if (stmt_code == PLUS_EXPR
6692           || stmt_code == MINUS_EXPR
6693           || stmt_code == POINTER_PLUS_EXPR)
6694         {
6695           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6696             op = gimple_assign_rhs2 (use->stmt);
6697           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6698             op = gimple_assign_rhs1 (use->stmt);
6699           else
6700             op = NULL_TREE;
6701         }
6702       else
6703         op = NULL_TREE;
6704
6705       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6706         return;
6707     }
6708
6709   comp = get_computation (data->current_loop, use, cand);
6710   gcc_assert (comp != NULL_TREE);
6711
6712   switch (gimple_code (use->stmt))
6713     {
6714     case GIMPLE_PHI:
6715       tgt = PHI_RESULT (use->stmt);
6716
6717       /* If we should keep the biv, do not replace it.  */
6718       if (name_info (data, tgt)->preserve_biv)
6719         return;
6720
6721       bsi = gsi_after_labels (gimple_bb (use->stmt));
6722       break;
6723
6724     case GIMPLE_ASSIGN:
6725       tgt = gimple_assign_lhs (use->stmt);
6726       bsi = gsi_for_stmt (use->stmt);
6727       break;
6728
6729     default:
6730       gcc_unreachable ();
6731     }
6732
6733   if (!valid_gimple_rhs_p (comp)
6734       || (gimple_code (use->stmt) != GIMPLE_PHI
6735           /* We can't allow re-allocating the stmt as it might be pointed
6736              to still.  */
6737           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6738               >= gimple_num_ops (gsi_stmt (bsi)))))
6739     {
6740       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6741                                        true, GSI_SAME_STMT);
6742       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6743         {
6744           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6745           /* As this isn't a plain copy we have to reset alignment
6746              information.  */
6747           if (SSA_NAME_PTR_INFO (comp))
6748             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6749         }
6750     }
6751
6752   if (gimple_code (use->stmt) == GIMPLE_PHI)
6753     {
6754       ass = gimple_build_assign (tgt, comp);
6755       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6756
6757       bsi = gsi_for_stmt (use->stmt);
6758       remove_phi_node (&bsi, false);
6759     }
6760   else
6761     {
6762       gimple_assign_set_rhs_from_tree (&bsi, comp);
6763       use->stmt = gsi_stmt (bsi);
6764     }
6765 }
6766
6767 /* Performs a peephole optimization to reorder the iv update statement with
6768    a mem ref to enable instruction combining in later phases. The mem ref uses
6769    the iv value before the update, so the reordering transformation requires
6770    adjustment of the offset. CAND is the selected IV_CAND.
6771
6772    Example:
6773
6774    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6775    iv2 = iv1 + 1;
6776
6777    if (t < val)      (1)
6778      goto L;
6779    goto Head;
6780
6781
6782    directly propagating t over to (1) will introduce overlapping live range
6783    thus increase register pressure. This peephole transform it into:
6784
6785
6786    iv2 = iv1 + 1;
6787    t = MEM_REF (base, iv2, 8, 8);
6788    if (t < val)
6789      goto L;
6790    goto Head;
6791 */
6792
6793 static void
6794 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6795 {
6796   tree var_after;
6797   gimple iv_update, stmt;
6798   basic_block bb;
6799   gimple_stmt_iterator gsi, gsi_iv;
6800
6801   if (cand->pos != IP_NORMAL)
6802     return;
6803
6804   var_after = cand->var_after;
6805   iv_update = SSA_NAME_DEF_STMT (var_after);
6806
6807   bb = gimple_bb (iv_update);
6808   gsi = gsi_last_nondebug_bb (bb);
6809   stmt = gsi_stmt (gsi);
6810
6811   /* Only handle conditional statement for now.  */
6812   if (gimple_code (stmt) != GIMPLE_COND)
6813     return;
6814
6815   gsi_prev_nondebug (&gsi);
6816   stmt = gsi_stmt (gsi);
6817   if (stmt != iv_update)
6818     return;
6819
6820   gsi_prev_nondebug (&gsi);
6821   if (gsi_end_p (gsi))
6822     return;
6823
6824   stmt = gsi_stmt (gsi);
6825   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6826     return;
6827
6828   if (stmt != use->stmt)
6829     return;
6830
6831   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6832     return;
6833
6834   if (dump_file && (dump_flags & TDF_DETAILS))
6835     {
6836       fprintf (dump_file, "Reordering \n");
6837       print_gimple_stmt (dump_file, iv_update, 0, 0);
6838       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6839       fprintf (dump_file, "\n");
6840     }
6841
6842   gsi = gsi_for_stmt (use->stmt);
6843   gsi_iv = gsi_for_stmt (iv_update);
6844   gsi_move_before (&gsi_iv, &gsi);
6845
6846   cand->pos = IP_BEFORE_USE;
6847   cand->incremented_at = use->stmt;
6848 }
6849
6850 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6851
6852 static void
6853 rewrite_use_address_1 (struct ivopts_data *data,
6854                        struct iv_use *use, struct iv_cand *cand)
6855 {
6856   aff_tree aff;
6857   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6858   tree base_hint = NULL_TREE;
6859   tree ref, iv;
6860   bool ok;
6861
6862   adjust_iv_update_pos (cand, use);
6863   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6864   gcc_assert (ok);
6865   unshare_aff_combination (&aff);
6866
6867   /* To avoid undefined overflow problems, all IV candidates use unsigned
6868      integer types.  The drawback is that this makes it impossible for
6869      create_mem_ref to distinguish an IV that is based on a memory object
6870      from one that represents simply an offset.
6871
6872      To work around this problem, we pass a hint to create_mem_ref that
6873      indicates which variable (if any) in aff is an IV based on a memory
6874      object.  Note that we only consider the candidate.  If this is not
6875      based on an object, the base of the reference is in some subexpression
6876      of the use -- but these will use pointer types, so they are recognized
6877      by the create_mem_ref heuristics anyway.  */
6878   if (cand->iv->base_object)
6879     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6880
6881   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6882   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6883                         reference_alias_ptr_type (*use->op_p),
6884                         iv, base_hint, data->speed);
6885   copy_ref_info (ref, *use->op_p);
6886   *use->op_p = ref;
6887 }
6888
6889 /* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
6890    first use of a group, rewrites sub uses in the group too.  */
6891
6892 static void
6893 rewrite_use_address (struct ivopts_data *data,
6894                       struct iv_use *use, struct iv_cand *cand)
6895 {
6896   struct iv_use *next;
6897
6898   gcc_assert (use->sub_id == 0);
6899   rewrite_use_address_1 (data, use, cand);
6900   update_stmt (use->stmt);
6901
6902   for (next = use->next; next != NULL; next = next->next)
6903     {
6904       rewrite_use_address_1 (data, next, cand);
6905       update_stmt (next->stmt);
6906     }
6907
6908   return;
6909 }
6910
6911 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6912    candidate CAND.  */
6913
6914 static void
6915 rewrite_use_compare (struct ivopts_data *data,
6916                      struct iv_use *use, struct iv_cand *cand)
6917 {
6918   tree comp, *var_p, op, bound;
6919   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6920   enum tree_code compare;
6921   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6922   bool ok;
6923
6924   bound = cp->value;
6925   if (bound)
6926     {
6927       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6928       tree var_type = TREE_TYPE (var);
6929       gimple_seq stmts;
6930
6931       if (dump_file && (dump_flags & TDF_DETAILS))
6932         {
6933           fprintf (dump_file, "Replacing exit test: ");
6934           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6935         }
6936       compare = cp->comp;
6937       bound = unshare_expr (fold_convert (var_type, bound));
6938       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6939       if (stmts)
6940         gsi_insert_seq_on_edge_immediate (
6941                 loop_preheader_edge (data->current_loop),
6942                 stmts);
6943
6944       gcond *cond_stmt = as_a <gcond *> (use->stmt);
6945       gimple_cond_set_lhs (cond_stmt, var);
6946       gimple_cond_set_code (cond_stmt, compare);
6947       gimple_cond_set_rhs (cond_stmt, op);
6948       return;
6949     }
6950
6951   /* The induction variable elimination failed; just express the original
6952      giv.  */
6953   comp = get_computation (data->current_loop, use, cand);
6954   gcc_assert (comp != NULL_TREE);
6955
6956   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6957   gcc_assert (ok);
6958
6959   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6960                                      true, GSI_SAME_STMT);
6961 }
6962
6963 /* Rewrites USE using candidate CAND.  */
6964
6965 static void
6966 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6967 {
6968   switch (use->type)
6969     {
6970       case USE_NONLINEAR_EXPR:
6971         rewrite_use_nonlinear_expr (data, use, cand);
6972         break;
6973
6974       case USE_ADDRESS:
6975         rewrite_use_address (data, use, cand);
6976         break;
6977
6978       case USE_COMPARE:
6979         rewrite_use_compare (data, use, cand);
6980         break;
6981
6982       default:
6983         gcc_unreachable ();
6984     }
6985
6986   update_stmt (use->stmt);
6987 }
6988
6989 /* Rewrite the uses using the selected induction variables.  */
6990
6991 static void
6992 rewrite_uses (struct ivopts_data *data)
6993 {
6994   unsigned i;
6995   struct iv_cand *cand;
6996   struct iv_use *use;
6997
6998   for (i = 0; i < n_iv_uses (data); i++)
6999     {
7000       use = iv_use (data, i);
7001       cand = use->selected;
7002       gcc_assert (cand);
7003
7004       rewrite_use (data, use, cand);
7005     }
7006 }
7007
7008 /* Removes the ivs that are not used after rewriting.  */
7009
7010 static void
7011 remove_unused_ivs (struct ivopts_data *data)
7012 {
7013   unsigned j;
7014   bitmap_iterator bi;
7015   bitmap toremove = BITMAP_ALLOC (NULL);
7016
7017   /* Figure out an order in which to release SSA DEFs so that we don't
7018      release something that we'd have to propagate into a debug stmt
7019      afterwards.  */
7020   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7021     {
7022       struct version_info *info;
7023
7024       info = ver_info (data, j);
7025       if (info->iv
7026           && !integer_zerop (info->iv->step)
7027           && !info->inv_id
7028           && !info->iv->have_use_for
7029           && !info->preserve_biv)
7030         {
7031           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7032
7033           tree def = info->iv->ssa_name;
7034
7035           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7036             {
7037               imm_use_iterator imm_iter;
7038               use_operand_p use_p;
7039               gimple stmt;
7040               int count = 0;
7041
7042               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7043                 {
7044                   if (!gimple_debug_bind_p (stmt))
7045                     continue;
7046
7047                   /* We just want to determine whether to do nothing
7048                      (count == 0), to substitute the computed
7049                      expression into a single use of the SSA DEF by
7050                      itself (count == 1), or to use a debug temp
7051                      because the SSA DEF is used multiple times or as
7052                      part of a larger expression (count > 1). */
7053                   count++;
7054                   if (gimple_debug_bind_get_value (stmt) != def)
7055                     count++;
7056
7057                   if (count > 1)
7058                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7059                 }
7060
7061               if (!count)
7062                 continue;
7063
7064               struct iv_use dummy_use;
7065               struct iv_cand *best_cand = NULL, *cand;
7066               unsigned i, best_pref = 0, cand_pref;
7067
7068               memset (&dummy_use, 0, sizeof (dummy_use));
7069               dummy_use.iv = info->iv;
7070               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7071                 {
7072                   cand = iv_use (data, i)->selected;
7073                   if (cand == best_cand)
7074                     continue;
7075                   cand_pref = operand_equal_p (cand->iv->step,
7076                                                info->iv->step, 0)
7077                     ? 4 : 0;
7078                   cand_pref
7079                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7080                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7081                     ? 2 : 0;
7082                   cand_pref
7083                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7084                     ? 1 : 0;
7085                   if (best_cand == NULL || best_pref < cand_pref)
7086                     {
7087                       best_cand = cand;
7088                       best_pref = cand_pref;
7089                     }
7090                 }
7091
7092               if (!best_cand)
7093                 continue;
7094
7095               tree comp = get_computation_at (data->current_loop,
7096                                               &dummy_use, best_cand,
7097                                               SSA_NAME_DEF_STMT (def));
7098               if (!comp)
7099                 continue;
7100
7101               if (count > 1)
7102                 {
7103                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7104                   DECL_ARTIFICIAL (vexpr) = 1;
7105                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7106                   if (SSA_NAME_VAR (def))
7107                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7108                   else
7109                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7110                   gdebug *def_temp
7111                     = gimple_build_debug_bind (vexpr, comp, NULL);
7112                   gimple_stmt_iterator gsi;
7113
7114                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7115                     gsi = gsi_after_labels (gimple_bb
7116                                             (SSA_NAME_DEF_STMT (def)));
7117                   else
7118                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7119
7120                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7121                   comp = vexpr;
7122                 }
7123
7124               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7125                 {
7126                   if (!gimple_debug_bind_p (stmt))
7127                     continue;
7128
7129                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7130                     SET_USE (use_p, comp);
7131
7132                   update_stmt (stmt);
7133                 }
7134             }
7135         }
7136     }
7137
7138   release_defs_bitset (toremove);
7139
7140   BITMAP_FREE (toremove);
7141 }
7142
7143 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7144    for hash_map::traverse.  */
7145
7146 bool
7147 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7148 {
7149   free (value);
7150   return true;
7151 }
7152
7153 /* Frees data allocated by the optimization of a single loop.  */
7154
7155 static void
7156 free_loop_data (struct ivopts_data *data)
7157 {
7158   unsigned i, j;
7159   bitmap_iterator bi;
7160   tree obj;
7161
7162   if (data->niters)
7163     {
7164       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7165       delete data->niters;
7166       data->niters = NULL;
7167     }
7168
7169   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7170     {
7171       struct version_info *info;
7172
7173       info = ver_info (data, i);
7174       info->iv = NULL;
7175       info->has_nonlin_use = false;
7176       info->preserve_biv = false;
7177       info->inv_id = 0;
7178     }
7179   bitmap_clear (data->relevant);
7180   bitmap_clear (data->important_candidates);
7181
7182   for (i = 0; i < n_iv_uses (data); i++)
7183     {
7184       struct iv_use *use = iv_use (data, i);
7185       struct iv_use *pre = use, *sub = use->next;
7186
7187       while (sub)
7188         {
7189           gcc_assert (sub->related_cands == NULL);
7190           gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7191
7192           pre = sub;
7193           sub = sub->next;
7194           free (pre);
7195         }
7196
7197       BITMAP_FREE (use->related_cands);
7198       for (j = 0; j < use->n_map_members; j++)
7199         if (use->cost_map[j].depends_on)
7200           BITMAP_FREE (use->cost_map[j].depends_on);
7201       free (use->cost_map);
7202       free (use);
7203     }
7204   data->iv_uses.truncate (0);
7205
7206   for (i = 0; i < n_iv_cands (data); i++)
7207     {
7208       struct iv_cand *cand = iv_cand (data, i);
7209
7210       if (cand->depends_on)
7211         BITMAP_FREE (cand->depends_on);
7212       free (cand);
7213     }
7214   data->iv_candidates.truncate (0);
7215
7216   if (data->version_info_size < num_ssa_names)
7217     {
7218       data->version_info_size = 2 * num_ssa_names;
7219       free (data->version_info);
7220       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7221     }
7222
7223   data->max_inv_id = 0;
7224
7225   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7226     SET_DECL_RTL (obj, NULL_RTX);
7227
7228   decl_rtl_to_reset.truncate (0);
7229
7230   data->inv_expr_tab->empty ();
7231   data->inv_expr_id = 0;
7232 }
7233
7234 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7235    loop tree.  */
7236
7237 static void
7238 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7239 {
7240   free_loop_data (data);
7241   free (data->version_info);
7242   BITMAP_FREE (data->relevant);
7243   BITMAP_FREE (data->important_candidates);
7244
7245   decl_rtl_to_reset.release ();
7246   data->iv_uses.release ();
7247   data->iv_candidates.release ();
7248   delete data->inv_expr_tab;
7249   data->inv_expr_tab = NULL;
7250   free_affine_expand_cache (&data->name_expansion_cache);
7251   obstack_free (&data->iv_obstack, NULL);
7252 }
7253
7254 /* Returns true if the loop body BODY includes any function calls.  */
7255
7256 static bool
7257 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7258 {
7259   gimple_stmt_iterator gsi;
7260   unsigned i;
7261
7262   for (i = 0; i < num_nodes; i++)
7263     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7264       {
7265         gimple stmt = gsi_stmt (gsi);
7266         if (is_gimple_call (stmt)
7267             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7268           return true;
7269       }
7270   return false;
7271 }
7272
7273 /* Optimizes the LOOP.  Returns true if anything changed.  */
7274
7275 static bool
7276 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7277 {
7278   bool changed = false;
7279   struct iv_ca *iv_ca;
7280   edge exit = single_dom_exit (loop);
7281   basic_block *body;
7282
7283   gcc_assert (!data->niters);
7284   data->current_loop = loop;
7285   data->loop_loc = find_loop_location (loop);
7286   data->speed = optimize_loop_for_speed_p (loop);
7287
7288   if (dump_file && (dump_flags & TDF_DETAILS))
7289     {
7290       fprintf (dump_file, "Processing loop %d", loop->num);
7291       if (data->loop_loc != UNKNOWN_LOCATION)
7292         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7293                  LOCATION_LINE (data->loop_loc));
7294       fprintf (dump_file, "\n");
7295
7296       if (exit)
7297         {
7298           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7299                    exit->src->index, exit->dest->index);
7300           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7301           fprintf (dump_file, "\n");
7302         }
7303
7304       fprintf (dump_file, "\n");
7305     }
7306
7307   body = get_loop_body (loop);
7308   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7309   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7310   free (body);
7311
7312   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7313
7314   /* For each ssa name determines whether it behaves as an induction variable
7315      in some loop.  */
7316   if (!find_induction_variables (data))
7317     goto finish;
7318
7319   /* Finds interesting uses (item 1).  */
7320   find_interesting_uses (data);
7321   group_address_uses (data);
7322   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7323     goto finish;
7324
7325   /* Finds candidates for the induction variables (item 2).  */
7326   find_iv_candidates (data);
7327
7328   /* Calculates the costs (item 3, part 1).  */
7329   determine_iv_costs (data);
7330   determine_use_iv_costs (data);
7331   determine_set_costs (data);
7332
7333   /* Find the optimal set of induction variables (item 3, part 2).  */
7334   iv_ca = find_optimal_iv_set (data);
7335   if (!iv_ca)
7336     goto finish;
7337   changed = true;
7338
7339   /* Create the new induction variables (item 4, part 1).  */
7340   create_new_ivs (data, iv_ca);
7341   iv_ca_free (&iv_ca);
7342
7343   /* Rewrite the uses (item 4, part 2).  */
7344   rewrite_uses (data);
7345
7346   /* Remove the ivs that are unused after rewriting.  */
7347   remove_unused_ivs (data);
7348
7349   /* We have changed the structure of induction variables; it might happen
7350      that definitions in the scev database refer to some of them that were
7351      eliminated.  */
7352   scev_reset ();
7353
7354 finish:
7355   free_loop_data (data);
7356
7357   return changed;
7358 }
7359
7360 /* Main entry point.  Optimizes induction variables in loops.  */
7361
7362 void
7363 tree_ssa_iv_optimize (void)
7364 {
7365   struct loop *loop;
7366   struct ivopts_data data;
7367
7368   tree_ssa_iv_optimize_init (&data);
7369
7370   /* Optimize the loops starting with the innermost ones.  */
7371   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7372     {
7373       if (dump_file && (dump_flags & TDF_DETAILS))
7374         flow_loop_dump (loop, dump_file, NULL, 1);
7375
7376       tree_ssa_iv_optimize_loop (&data, loop);
7377     }
7378
7379   tree_ssa_iv_optimize_finalize (&data);
7380 }