gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "gimple-pretty-print.h"
  73 #include "tree-flow.h"
  74 #include "cfgloop.h"
  75 #include "tree-pass.h"
  76 #include "ggc.h"
  77 #include "insn-config.h"
  78 #include "pointer-set.h"
  79 #include "hashtab.h"
  80 #include "tree-chrec.h"
  81 #include "tree-scalar-evolution.h"
  82 #include "cfgloop.h"
  83 #include "params.h"
  84 #include "langhooks.h"
  85 #include "tree-affine.h"
  86 #include "target.h"
  87 #include "tree-inline.h"
  88 #include "tree-ssa-propagate.h"
  89 #include "expmed.h"
  90
  91 /* FIXME: Expressions are expanded to RTL in this pass to determine the
  92    cost of different addressing modes.  This should be moved to a TBD
  93    interface between the GIMPLE and RTL worlds.  */
  94 #include "expr.h"
  95 #include "recog.h"
  96
  97 /* The infinite cost.  */
  98 #define INFTY 10000000
  99
 100 #define AVG_LOOP_NITER(LOOP) 5
 101
 102 /* Returns the expected number of loop iterations for LOOP.
 103    The average trip count is computed from profile data if it
 104    exists. */
 105
 106 static inline HOST_WIDE_INT
 107 avg_loop_niter (struct loop *loop)
 108 {
 109   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 110   if (niter == -1)
 111     return AVG_LOOP_NITER (loop);
 112
 113   return niter;
 114 }
 115
 116 /* Representation of the induction variable.  */
 117 struct iv
 118 {
 119   tree base;            /* Initial value of the iv.  */
 120   tree base_object;     /* A memory object to that the induction variable points.  */
 121   tree step;            /* Step of the iv (constant only).  */
 122   tree ssa_name;        /* The ssa name with the value.  */
 123   bool biv_p;           /* Is it a biv?  */
 124   bool have_use_for;    /* Do we already have a use for it?  */
 125   unsigned use_id;      /* The identifier in the use if it is the case.  */
 126 };
 127
 128 /* Per-ssa version information (induction variable descriptions, etc.).  */
 129 struct version_info
 130 {
 131   tree name;            /* The ssa name.  */
 132   struct iv *iv;        /* Induction variable description.  */
 133   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 134                            an expression that is not an induction variable.  */
 135   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 136   unsigned inv_id;      /* Id of an invariant.  */
 137 };
 138
 139 /* Types of uses.  */
 140 enum use_type
 141 {
 142   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 143   USE_ADDRESS,          /* Use in an address.  */
 144   USE_COMPARE           /* Use is a compare.  */
 145 };
 146
 147 /* Cost of a computation.  */
 148 typedef struct
 149 {
 150   int cost;             /* The runtime cost.  */
 151   unsigned complexity;  /* The estimate of the complexity of the code for
 152                            the computation (in no concrete units --
 153                            complexity field should be larger for more
 154                            complex expressions and addressing modes).  */
 155 } comp_cost;
 156
 157 static const comp_cost no_cost = {0, 0};
 158 static const comp_cost infinite_cost = {INFTY, INFTY};
 159
 160 /* The candidate - cost pair.  */
 161 struct cost_pair
 162 {
 163   struct iv_cand *cand; /* The candidate.  */
 164   comp_cost cost;       /* The cost.  */
 165   bitmap depends_on;    /* The list of invariants that have to be
 166                            preserved.  */
 167   tree value;           /* For final value elimination, the expression for
 168                            the final value of the iv.  For iv elimination,
 169                            the new bound to compare with.  */
 170   enum tree_code comp;  /* For iv elimination, the comparison.  */
 171   int inv_expr_id;      /* Loop invariant expression id.  */
 172 };
 173
 174 /* Use.  */
 175 struct iv_use
 176 {
 177   unsigned id;          /* The id of the use.  */
 178   enum use_type type;   /* Type of the use.  */
 179   struct iv *iv;        /* The induction variable it is based on.  */
 180   gimple stmt;          /* Statement in that it occurs.  */
 181   tree *op_p;           /* The place where it occurs.  */
 182   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 183                            important ones.  */
 184
 185   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 186   struct cost_pair *cost_map;
 187                         /* The costs wrto the iv candidates.  */
 188
 189   struct iv_cand *selected;
 190                         /* The selected candidate.  */
 191 };
 192
 193 /* The position where the iv is computed.  */
 194 enum iv_position
 195 {
 196   IP_NORMAL,            /* At the end, just before the exit condition.  */
 197   IP_END,               /* At the end of the latch block.  */
 198   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 199   IP_AFTER_USE,         /* Immediately after a specific use.  */
 200   IP_ORIGINAL           /* The original biv.  */
 201 };
 202
 203 /* The induction variable candidate.  */
 204 struct iv_cand
 205 {
 206   unsigned id;          /* The number of the candidate.  */
 207   bool important;       /* Whether this is an "important" candidate, i.e. such
 208                            that it should be considered by all uses.  */
 209   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 210   gimple incremented_at;/* For original biv, the statement where it is
 211                            incremented.  */
 212   tree var_before;      /* The variable used for it before increment.  */
 213   tree var_after;       /* The variable used for it after increment.  */
 214   struct iv *iv;        /* The value of the candidate.  NULL for
 215                            "pseudocandidate" used to indicate the possibility
 216                            to replace the final value of an iv by direct
 217                            computation of the value.  */
 218   unsigned cost;        /* Cost of the candidate.  */
 219   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 220   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 221                               where it is incremented.  */
 222   bitmap depends_on;    /* The list of invariants that are used in step of the
 223                            biv.  */
 224 };
 225
 226 /* Loop invariant expression hashtable entry.  */
 227 struct iv_inv_expr_ent
 228 {
 229   tree expr;
 230   int id;
 231   hashval_t hash;
 232 };
 233
 234 /* The data used by the induction variable optimizations.  */
 235
 236 typedef struct iv_use *iv_use_p;
 237
 238 typedef struct iv_cand *iv_cand_p;
 239
 240 struct ivopts_data
 241 {
 242   /* The currently optimized loop.  */
 243   struct loop *current_loop;
 244
 245   /* Numbers of iterations for all exits of the current loop.  */
 246   struct pointer_map_t *niters;
 247
 248   /* Number of registers used in it.  */
 249   unsigned regs_used;
 250
 251   /* The size of version_info array allocated.  */
 252   unsigned version_info_size;
 253
 254   /* The array of information for the ssa names.  */
 255   struct version_info *version_info;
 256
 257   /* The hashtable of loop invariant expressions created
 258      by ivopt.  */
 259   htab_t inv_expr_tab;
 260
 261   /* Loop invariant expression id.  */
 262   int inv_expr_id;
 263
 264   /* The bitmap of indices in version_info whose value was changed.  */
 265   bitmap relevant;
 266
 267   /* The uses of induction variables.  */
 268   vec<iv_use_p> iv_uses;
 269
 270   /* The candidates.  */
 271   vec<iv_cand_p> iv_candidates;
 272
 273   /* A bitmap of important candidates.  */
 274   bitmap important_candidates;
 275
 276   /* The maximum invariant id.  */
 277   unsigned max_inv_id;
 278
 279   /* Whether to consider just related and important candidates when replacing a
 280      use.  */
 281   bool consider_all_candidates;
 282
 283   /* Are we optimizing for speed?  */
 284   bool speed;
 285
 286   /* Whether the loop body includes any function calls.  */
 287   bool body_includes_call;
 288
 289   /* Whether the loop body can only be exited via single exit.  */
 290   bool loop_single_exit_p;
 291 };
 292
 293 /* An assignment of iv candidates to uses.  */
 294
 295 struct iv_ca
 296 {
 297   /* The number of uses covered by the assignment.  */
 298   unsigned upto;
 299
 300   /* Number of uses that cannot be expressed by the candidates in the set.  */
 301   unsigned bad_uses;
 302
 303   /* Candidate assigned to a use, together with the related costs.  */
 304   struct cost_pair **cand_for_use;
 305
 306   /* Number of times each candidate is used.  */
 307   unsigned *n_cand_uses;
 308
 309   /* The candidates used.  */
 310   bitmap cands;
 311
 312   /* The number of candidates in the set.  */
 313   unsigned n_cands;
 314
 315   /* Total number of registers needed.  */
 316   unsigned n_regs;
 317
 318   /* Total cost of expressing uses.  */
 319   comp_cost cand_use_cost;
 320
 321   /* Total cost of candidates.  */
 322   unsigned cand_cost;
 323
 324   /* Number of times each invariant is used.  */
 325   unsigned *n_invariant_uses;
 326
 327   /* The array holding the number of uses of each loop
 328      invariant expressions created by ivopt.  */
 329   unsigned *used_inv_expr;
 330
 331   /* The number of created loop invariants.  */
 332   unsigned num_used_inv_expr;
 333
 334   /* Total cost of the assignment.  */
 335   comp_cost cost;
 336 };
 337
 338 /* Difference of two iv candidate assignments.  */
 339
 340 struct iv_ca_delta
 341 {
 342   /* Changed use.  */
 343   struct iv_use *use;
 344
 345   /* An old assignment (for rollback purposes).  */
 346   struct cost_pair *old_cp;
 347
 348   /* A new assignment.  */
 349   struct cost_pair *new_cp;
 350
 351   /* Next change in the list.  */
 352   struct iv_ca_delta *next_change;
 353 };
 354
 355 /* Bound on number of candidates below that all candidates are considered.  */
 356
 357 #define CONSIDER_ALL_CANDIDATES_BOUND \
 358   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 359
 360 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 361    optimizing such a loop would help, and it would take ages).  */
 362
 363 #define MAX_CONSIDERED_USES \
 364   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 365
 366 /* If there are at most this number of ivs in the set, try removing unnecessary
 367    ivs from the set always.  */
 368
 369 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 370   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 371
 372 /* The list of trees for that the decl_rtl field must be reset is stored
 373    here.  */
 374
 375 static vec<tree> decl_rtl_to_reset;
 376
 377 static comp_cost force_expr_to_var_cost (tree, bool);
 378
 379 /* Number of uses recorded in DATA.  */
 380
 381 static inline unsigned
 382 n_iv_uses (struct ivopts_data *data)
 383 {
 384   return data->iv_uses.length ();
 385 }
 386
 387 /* Ith use recorded in DATA.  */
 388
 389 static inline struct iv_use *
 390 iv_use (struct ivopts_data *data, unsigned i)
 391 {
 392   return data->iv_uses[i];
 393 }
 394
 395 /* Number of candidates recorded in DATA.  */
 396
 397 static inline unsigned
 398 n_iv_cands (struct ivopts_data *data)
 399 {
 400   return data->iv_candidates.length ();
 401 }
 402
 403 /* Ith candidate recorded in DATA.  */
 404
 405 static inline struct iv_cand *
 406 iv_cand (struct ivopts_data *data, unsigned i)
 407 {
 408   return data->iv_candidates[i];
 409 }
 410
 411 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 412
 413 edge
 414 single_dom_exit (struct loop *loop)
 415 {
 416   edge exit = single_exit (loop);
 417
 418   if (!exit)
 419     return NULL;
 420
 421   if (!just_once_each_iteration_p (loop, exit->src))
 422     return NULL;
 423
 424   return exit;
 425 }
 426
 427 /* Dumps information about the induction variable IV to FILE.  */
 428
 429 extern void dump_iv (FILE *, struct iv *);
 430 void
 431 dump_iv (FILE *file, struct iv *iv)
 432 {
 433   if (iv->ssa_name)
 434     {
 435       fprintf (file, "ssa name ");
 436       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 437       fprintf (file, "\n");
 438     }
 439
 440   fprintf (file, "  type ");
 441   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 442   fprintf (file, "\n");
 443
 444   if (iv->step)
 445     {
 446       fprintf (file, "  base ");
 447       print_generic_expr (file, iv->base, TDF_SLIM);
 448       fprintf (file, "\n");
 449
 450       fprintf (file, "  step ");
 451       print_generic_expr (file, iv->step, TDF_SLIM);
 452       fprintf (file, "\n");
 453     }
 454   else
 455     {
 456       fprintf (file, "  invariant ");
 457       print_generic_expr (file, iv->base, TDF_SLIM);
 458       fprintf (file, "\n");
 459     }
 460
 461   if (iv->base_object)
 462     {
 463       fprintf (file, "  base object ");
 464       print_generic_expr (file, iv->base_object, TDF_SLIM);
 465       fprintf (file, "\n");
 466     }
 467
 468   if (iv->biv_p)
 469     fprintf (file, "  is a biv\n");
 470 }
 471
 472 /* Dumps information about the USE to FILE.  */
 473
 474 extern void dump_use (FILE *, struct iv_use *);
 475 void
 476 dump_use (FILE *file, struct iv_use *use)
 477 {
 478   fprintf (file, "use %d\n", use->id);
 479
 480   switch (use->type)
 481     {
 482     case USE_NONLINEAR_EXPR:
 483       fprintf (file, "  generic\n");
 484       break;
 485
 486     case USE_ADDRESS:
 487       fprintf (file, "  address\n");
 488       break;
 489
 490     case USE_COMPARE:
 491       fprintf (file, "  compare\n");
 492       break;
 493
 494     default:
 495       gcc_unreachable ();
 496     }
 497
 498   fprintf (file, "  in statement ");
 499   print_gimple_stmt (file, use->stmt, 0, 0);
 500   fprintf (file, "\n");
 501
 502   fprintf (file, "  at position ");
 503   if (use->op_p)
 504     print_generic_expr (file, *use->op_p, TDF_SLIM);
 505   fprintf (file, "\n");
 506
 507   dump_iv (file, use->iv);
 508
 509   if (use->related_cands)
 510     {
 511       fprintf (file, "  related candidates ");
 512       dump_bitmap (file, use->related_cands);
 513     }
 514 }
 515
 516 /* Dumps information about the uses to FILE.  */
 517
 518 extern void dump_uses (FILE *, struct ivopts_data *);
 519 void
 520 dump_uses (FILE *file, struct ivopts_data *data)
 521 {
 522   unsigned i;
 523   struct iv_use *use;
 524
 525   for (i = 0; i < n_iv_uses (data); i++)
 526     {
 527       use = iv_use (data, i);
 528
 529       dump_use (file, use);
 530       fprintf (file, "\n");
 531     }
 532 }
 533
 534 /* Dumps information about induction variable candidate CAND to FILE.  */
 535
 536 extern void dump_cand (FILE *, struct iv_cand *);
 537 void
 538 dump_cand (FILE *file, struct iv_cand *cand)
 539 {
 540   struct iv *iv = cand->iv;
 541
 542   fprintf (file, "candidate %d%s\n",
 543            cand->id, cand->important ? " (important)" : "");
 544
 545   if (cand->depends_on)
 546     {
 547       fprintf (file, "  depends on ");
 548       dump_bitmap (file, cand->depends_on);
 549     }
 550
 551   if (!iv)
 552     {
 553       fprintf (file, "  final value replacement\n");
 554       return;
 555     }
 556
 557   if (cand->var_before)
 558     {
 559       fprintf (file, "  var_before ");
 560       print_generic_expr (file, cand->var_before, TDF_SLIM);
 561       fprintf (file, "\n");
 562     }
 563   if (cand->var_after)
 564     {
 565       fprintf (file, "  var_after ");
 566       print_generic_expr (file, cand->var_after, TDF_SLIM);
 567       fprintf (file, "\n");
 568     }
 569
 570   switch (cand->pos)
 571     {
 572     case IP_NORMAL:
 573       fprintf (file, "  incremented before exit test\n");
 574       break;
 575
 576     case IP_BEFORE_USE:
 577       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 578       break;
 579
 580     case IP_AFTER_USE:
 581       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 582       break;
 583
 584     case IP_END:
 585       fprintf (file, "  incremented at end\n");
 586       break;
 587
 588     case IP_ORIGINAL:
 589       fprintf (file, "  original biv\n");
 590       break;
 591     }
 592
 593   dump_iv (file, iv);
 594 }
 595
 596 /* Returns the info for ssa version VER.  */
 597
 598 static inline struct version_info *
 599 ver_info (struct ivopts_data *data, unsigned ver)
 600 {
 601   return data->version_info + ver;
 602 }
 603
 604 /* Returns the info for ssa name NAME.  */
 605
 606 static inline struct version_info *
 607 name_info (struct ivopts_data *data, tree name)
 608 {
 609   return ver_info (data, SSA_NAME_VERSION (name));
 610 }
 611
 612 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 613    emitted in LOOP.  */
 614
 615 static bool
 616 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 617 {
 618   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 619
 620   gcc_assert (bb);
 621
 622   if (sbb == loop->latch)
 623     return true;
 624
 625   if (sbb != bb)
 626     return false;
 627
 628   return stmt == last_stmt (bb);
 629 }
 630
 631 /* Returns true if STMT if after the place where the original induction
 632    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 633    if the positions are identical.  */
 634
 635 static bool
 636 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 637 {
 638   basic_block cand_bb = gimple_bb (cand->incremented_at);
 639   basic_block stmt_bb = gimple_bb (stmt);
 640
 641   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 642     return false;
 643
 644   if (stmt_bb != cand_bb)
 645     return true;
 646
 647   if (true_if_equal
 648       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 649     return true;
 650   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 651 }
 652
 653 /* Returns true if STMT if after the place where the induction variable
 654    CAND is incremented in LOOP.  */
 655
 656 static bool
 657 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 658 {
 659   switch (cand->pos)
 660     {
 661     case IP_END:
 662       return false;
 663
 664     case IP_NORMAL:
 665       return stmt_after_ip_normal_pos (loop, stmt);
 666
 667     case IP_ORIGINAL:
 668     case IP_AFTER_USE:
 669       return stmt_after_inc_pos (cand, stmt, false);
 670
 671     case IP_BEFORE_USE:
 672       return stmt_after_inc_pos (cand, stmt, true);
 673
 674     default:
 675       gcc_unreachable ();
 676     }
 677 }
 678
 679 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 680
 681 static bool
 682 abnormal_ssa_name_p (tree exp)
 683 {
 684   if (!exp)
 685     return false;
 686
 687   if (TREE_CODE (exp) != SSA_NAME)
 688     return false;
 689
 690   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 691 }
 692
 693 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 694    abnormal phi node.  Callback for for_each_index.  */
 695
 696 static bool
 697 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 698                                   void *data ATTRIBUTE_UNUSED)
 699 {
 700   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 701     {
 702       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 703         return false;
 704       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 705         return false;
 706     }
 707
 708   return !abnormal_ssa_name_p (*index);
 709 }
 710
 711 /* Returns true if EXPR contains a ssa name that occurs in an
 712    abnormal phi node.  */
 713
 714 bool
 715 contains_abnormal_ssa_name_p (tree expr)
 716 {
 717   enum tree_code code;
 718   enum tree_code_class codeclass;
 719
 720   if (!expr)
 721     return false;
 722
 723   code = TREE_CODE (expr);
 724   codeclass = TREE_CODE_CLASS (code);
 725
 726   if (code == SSA_NAME)
 727     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 728
 729   if (code == INTEGER_CST
 730       || is_gimple_min_invariant (expr))
 731     return false;
 732
 733   if (code == ADDR_EXPR)
 734     return !for_each_index (&TREE_OPERAND (expr, 0),
 735                             idx_contains_abnormal_ssa_name_p,
 736                             NULL);
 737
 738   if (code == COND_EXPR)
 739     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 740       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 741       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 742
 743   switch (codeclass)
 744     {
 745     case tcc_binary:
 746     case tcc_comparison:
 747       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 748         return true;
 749
 750       /* Fallthru.  */
 751     case tcc_unary:
 752       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 753         return true;
 754
 755       break;
 756
 757     default:
 758       gcc_unreachable ();
 759     }
 760
 761   return false;
 762 }
 763
 764 /*  Returns the structure describing number of iterations determined from
 765     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 766
 767 static struct tree_niter_desc *
 768 niter_for_exit (struct ivopts_data *data, edge exit)
 769 {
 770   struct tree_niter_desc *desc;
 771   void **slot;
 772
 773   if (!data->niters)
 774     {
 775       data->niters = pointer_map_create ();
 776       slot = NULL;
 777     }
 778   else
 779     slot = pointer_map_contains (data->niters, exit);
 780
 781   if (!slot)
 782     {
 783       /* Try to determine number of iterations.  We cannot safely work with ssa
 784          names that appear in phi nodes on abnormal edges, so that we do not
 785          create overlapping life ranges for them (PR 27283).  */
 786       desc = XNEW (struct tree_niter_desc);
 787       if (!number_of_iterations_exit (data->current_loop,
 788                                       exit, desc, true)
 789           || contains_abnormal_ssa_name_p (desc->niter))
 790         {
 791           XDELETE (desc);
 792           desc = NULL;
 793         }
 794       slot = pointer_map_insert (data->niters, exit);
 795       *slot = desc;
 796     }
 797   else
 798     desc = (struct tree_niter_desc *) *slot;
 799
 800   return desc;
 801 }
 802
 803 /* Returns the structure describing number of iterations determined from
 804    single dominating exit of DATA->current_loop, or NULL if something
 805    goes wrong.  */
 806
 807 static struct tree_niter_desc *
 808 niter_for_single_dom_exit (struct ivopts_data *data)
 809 {
 810   edge exit = single_dom_exit (data->current_loop);
 811
 812   if (!exit)
 813     return NULL;
 814
 815   return niter_for_exit (data, exit);
 816 }
 817
 818 /* Hash table equality function for expressions.  */
 819
 820 static int
 821 htab_inv_expr_eq (const void *ent1, const void *ent2)
 822 {
 823   const struct iv_inv_expr_ent *expr1 =
 824       (const struct iv_inv_expr_ent *)ent1;
 825   const struct iv_inv_expr_ent *expr2 =
 826       (const struct iv_inv_expr_ent *)ent2;
 827
 828   return expr1->hash == expr2->hash
 829          && operand_equal_p (expr1->expr, expr2->expr, 0);
 830 }
 831
 832 /* Hash function for loop invariant expressions.  */
 833
 834 static hashval_t
 835 htab_inv_expr_hash (const void *ent)
 836 {
 837   const struct iv_inv_expr_ent *expr =
 838       (const struct iv_inv_expr_ent *)ent;
 839   return expr->hash;
 840 }
 841
 842 /* Initializes data structures used by the iv optimization pass, stored
 843    in DATA.  */
 844
 845 static void
 846 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 847 {
 848   data->version_info_size = 2 * num_ssa_names;
 849   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 850   data->relevant = BITMAP_ALLOC (NULL);
 851   data->important_candidates = BITMAP_ALLOC (NULL);
 852   data->max_inv_id = 0;
 853   data->niters = NULL;
 854   data->iv_uses.create (20);
 855   data->iv_candidates.create (20);
 856   data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
 857                                     htab_inv_expr_eq, free);
 858   data->inv_expr_id = 0;
 859   decl_rtl_to_reset.create (20);
 860 }
 861
 862 /* Returns a memory object to that EXPR points.  In case we are able to
 863    determine that it does not point to any such object, NULL is returned.  */
 864
 865 static tree
 866 determine_base_object (tree expr)
 867 {
 868   enum tree_code code = TREE_CODE (expr);
 869   tree base, obj;
 870
 871   /* If this is a pointer casted to any type, we need to determine
 872      the base object for the pointer; so handle conversions before
 873      throwing away non-pointer expressions.  */
 874   if (CONVERT_EXPR_P (expr))
 875     return determine_base_object (TREE_OPERAND (expr, 0));
 876
 877   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 878     return NULL_TREE;
 879
 880   switch (code)
 881     {
 882     case INTEGER_CST:
 883       return NULL_TREE;
 884
 885     case ADDR_EXPR:
 886       obj = TREE_OPERAND (expr, 0);
 887       base = get_base_address (obj);
 888
 889       if (!base)
 890         return expr;
 891
 892       if (TREE_CODE (base) == MEM_REF)
 893         return determine_base_object (TREE_OPERAND (base, 0));
 894
 895       return fold_convert (ptr_type_node,
 896                            build_fold_addr_expr (base));
 897
 898     case POINTER_PLUS_EXPR:
 899       return determine_base_object (TREE_OPERAND (expr, 0));
 900
 901     case PLUS_EXPR:
 902     case MINUS_EXPR:
 903       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 904       gcc_unreachable ();
 905
 906     default:
 907       return fold_convert (ptr_type_node, expr);
 908     }
 909 }
 910
 911 /* Allocates an induction variable with given initial value BASE and step STEP
 912    for loop LOOP.  */
 913
 914 static struct iv *
 915 alloc_iv (tree base, tree step)
 916 {
 917   struct iv *iv = XCNEW (struct iv);
 918   gcc_assert (step != NULL_TREE);
 919
 920   iv->base = base;
 921   iv->base_object = determine_base_object (base);
 922   iv->step = step;
 923   iv->biv_p = false;
 924   iv->have_use_for = false;
 925   iv->use_id = 0;
 926   iv->ssa_name = NULL_TREE;
 927
 928   return iv;
 929 }
 930
 931 /* Sets STEP and BASE for induction variable IV.  */
 932
 933 static void
 934 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 935 {
 936   struct version_info *info = name_info (data, iv);
 937
 938   gcc_assert (!info->iv);
 939
 940   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 941   info->iv = alloc_iv (base, step);
 942   info->iv->ssa_name = iv;
 943 }
 944
 945 /* Finds induction variable declaration for VAR.  */
 946
 947 static struct iv *
 948 get_iv (struct ivopts_data *data, tree var)
 949 {
 950   basic_block bb;
 951   tree type = TREE_TYPE (var);
 952
 953   if (!POINTER_TYPE_P (type)
 954       && !INTEGRAL_TYPE_P (type))
 955     return NULL;
 956
 957   if (!name_info (data, var)->iv)
 958     {
 959       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 960
 961       if (!bb
 962           || !flow_bb_inside_loop_p (data->current_loop, bb))
 963         set_iv (data, var, var, build_int_cst (type, 0));
 964     }
 965
 966   return name_info (data, var)->iv;
 967 }
 968
 969 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 970    not define a simple affine biv with nonzero step.  */
 971
 972 static tree
 973 determine_biv_step (gimple phi)
 974 {
 975   struct loop *loop = gimple_bb (phi)->loop_father;
 976   tree name = PHI_RESULT (phi);
 977   affine_iv iv;
 978
 979   if (virtual_operand_p (name))
 980     return NULL_TREE;
 981
 982   if (!simple_iv (loop, loop, name, &iv, true))
 983     return NULL_TREE;
 984
 985   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 986 }
 987
 988 /* Finds basic ivs.  */
 989
 990 static bool
 991 find_bivs (struct ivopts_data *data)
 992 {
 993   gimple phi;
 994   tree step, type, base;
 995   bool found = false;
 996   struct loop *loop = data->current_loop;
 997   gimple_stmt_iterator psi;
 998
 999   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1000     {
1001       phi = gsi_stmt (psi);
1002
1003       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1004         continue;
1005
1006       step = determine_biv_step (phi);
1007       if (!step)
1008         continue;
1009
1010       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1011       base = expand_simple_operations (base);
1012       if (contains_abnormal_ssa_name_p (base)
1013           || contains_abnormal_ssa_name_p (step))
1014         continue;
1015
1016       type = TREE_TYPE (PHI_RESULT (phi));
1017       base = fold_convert (type, base);
1018       if (step)
1019         {
1020           if (POINTER_TYPE_P (type))
1021             step = convert_to_ptrofftype (step);
1022           else
1023             step = fold_convert (type, step);
1024         }
1025
1026       set_iv (data, PHI_RESULT (phi), base, step);
1027       found = true;
1028     }
1029
1030   return found;
1031 }
1032
1033 /* Marks basic ivs.  */
1034
1035 static void
1036 mark_bivs (struct ivopts_data *data)
1037 {
1038   gimple phi;
1039   tree var;
1040   struct iv *iv, *incr_iv;
1041   struct loop *loop = data->current_loop;
1042   basic_block incr_bb;
1043   gimple_stmt_iterator psi;
1044
1045   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1046     {
1047       phi = gsi_stmt (psi);
1048
1049       iv = get_iv (data, PHI_RESULT (phi));
1050       if (!iv)
1051         continue;
1052
1053       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1054       incr_iv = get_iv (data, var);
1055       if (!incr_iv)
1056         continue;
1057
1058       /* If the increment is in the subloop, ignore it.  */
1059       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1060       if (incr_bb->loop_father != data->current_loop
1061           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1062         continue;
1063
1064       iv->biv_p = true;
1065       incr_iv->biv_p = true;
1066     }
1067 }
1068
1069 /* Checks whether STMT defines a linear induction variable and stores its
1070    parameters to IV.  */
1071
1072 static bool
1073 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1074 {
1075   tree lhs;
1076   struct loop *loop = data->current_loop;
1077
1078   iv->base = NULL_TREE;
1079   iv->step = NULL_TREE;
1080
1081   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1082     return false;
1083
1084   lhs = gimple_assign_lhs (stmt);
1085   if (TREE_CODE (lhs) != SSA_NAME)
1086     return false;
1087
1088   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1089     return false;
1090   iv->base = expand_simple_operations (iv->base);
1091
1092   if (contains_abnormal_ssa_name_p (iv->base)
1093       || contains_abnormal_ssa_name_p (iv->step))
1094     return false;
1095
1096   /* If STMT could throw, then do not consider STMT as defining a GIV.
1097      While this will suppress optimizations, we can not safely delete this
1098      GIV and associated statements, even if it appears it is not used.  */
1099   if (stmt_could_throw_p (stmt))
1100     return false;
1101
1102   return true;
1103 }
1104
1105 /* Finds general ivs in statement STMT.  */
1106
1107 static void
1108 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1109 {
1110   affine_iv iv;
1111
1112   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1113     return;
1114
1115   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1116 }
1117
1118 /* Finds general ivs in basic block BB.  */
1119
1120 static void
1121 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1122 {
1123   gimple_stmt_iterator bsi;
1124
1125   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1126     find_givs_in_stmt (data, gsi_stmt (bsi));
1127 }
1128
1129 /* Finds general ivs.  */
1130
1131 static void
1132 find_givs (struct ivopts_data *data)
1133 {
1134   struct loop *loop = data->current_loop;
1135   basic_block *body = get_loop_body_in_dom_order (loop);
1136   unsigned i;
1137
1138   for (i = 0; i < loop->num_nodes; i++)
1139     find_givs_in_bb (data, body[i]);
1140   free (body);
1141 }
1142
1143 /* For each ssa name defined in LOOP determines whether it is an induction
1144    variable and if so, its initial value and step.  */
1145
1146 static bool
1147 find_induction_variables (struct ivopts_data *data)
1148 {
1149   unsigned i;
1150   bitmap_iterator bi;
1151
1152   if (!find_bivs (data))
1153     return false;
1154
1155   find_givs (data);
1156   mark_bivs (data);
1157
1158   if (dump_file && (dump_flags & TDF_DETAILS))
1159     {
1160       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1161
1162       if (niter)
1163         {
1164           fprintf (dump_file, "  number of iterations ");
1165           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1166           if (!integer_zerop (niter->may_be_zero))
1167             {
1168               fprintf (dump_file, "; zero if ");
1169               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1170             }
1171           fprintf (dump_file, "\n\n");
1172         };
1173
1174       fprintf (dump_file, "Induction variables:\n\n");
1175
1176       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1177         {
1178           if (ver_info (data, i)->iv)
1179             dump_iv (dump_file, ver_info (data, i)->iv);
1180         }
1181     }
1182
1183   return true;
1184 }
1185
1186 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1187
1188 static struct iv_use *
1189 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1190             gimple stmt, enum use_type use_type)
1191 {
1192   struct iv_use *use = XCNEW (struct iv_use);
1193
1194   use->id = n_iv_uses (data);
1195   use->type = use_type;
1196   use->iv = iv;
1197   use->stmt = stmt;
1198   use->op_p = use_p;
1199   use->related_cands = BITMAP_ALLOC (NULL);
1200
1201   /* To avoid showing ssa name in the dumps, if it was not reset by the
1202      caller.  */
1203   iv->ssa_name = NULL_TREE;
1204
1205   if (dump_file && (dump_flags & TDF_DETAILS))
1206     dump_use (dump_file, use);
1207
1208   data->iv_uses.safe_push (use);
1209
1210   return use;
1211 }
1212
1213 /* Checks whether OP is a loop-level invariant and if so, records it.
1214    NONLINEAR_USE is true if the invariant is used in a way we do not
1215    handle specially.  */
1216
1217 static void
1218 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1219 {
1220   basic_block bb;
1221   struct version_info *info;
1222
1223   if (TREE_CODE (op) != SSA_NAME
1224       || virtual_operand_p (op))
1225     return;
1226
1227   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1228   if (bb
1229       && flow_bb_inside_loop_p (data->current_loop, bb))
1230     return;
1231
1232   info = name_info (data, op);
1233   info->name = op;
1234   info->has_nonlin_use |= nonlinear_use;
1235   if (!info->inv_id)
1236     info->inv_id = ++data->max_inv_id;
1237   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1238 }
1239
1240 /* Checks whether the use OP is interesting and if so, records it.  */
1241
1242 static struct iv_use *
1243 find_interesting_uses_op (struct ivopts_data *data, tree op)
1244 {
1245   struct iv *iv;
1246   struct iv *civ;
1247   gimple stmt;
1248   struct iv_use *use;
1249
1250   if (TREE_CODE (op) != SSA_NAME)
1251     return NULL;
1252
1253   iv = get_iv (data, op);
1254   if (!iv)
1255     return NULL;
1256
1257   if (iv->have_use_for)
1258     {
1259       use = iv_use (data, iv->use_id);
1260
1261       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1262       return use;
1263     }
1264
1265   if (integer_zerop (iv->step))
1266     {
1267       record_invariant (data, op, true);
1268       return NULL;
1269     }
1270   iv->have_use_for = true;
1271
1272   civ = XNEW (struct iv);
1273   *civ = *iv;
1274
1275   stmt = SSA_NAME_DEF_STMT (op);
1276   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1277               || is_gimple_assign (stmt));
1278
1279   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1280   iv->use_id = use->id;
1281
1282   return use;
1283 }
1284
1285 /* Given a condition in statement STMT, checks whether it is a compare
1286    of an induction variable and an invariant.  If this is the case,
1287    CONTROL_VAR is set to location of the iv, BOUND to the location of
1288    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1289    induction variable descriptions, and true is returned.  If this is not
1290    the case, CONTROL_VAR and BOUND are set to the arguments of the
1291    condition and false is returned.  */
1292
1293 static bool
1294 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1295                        tree **control_var, tree **bound,
1296                        struct iv **iv_var, struct iv **iv_bound)
1297 {
1298   /* The objects returned when COND has constant operands.  */
1299   static struct iv const_iv;
1300   static tree zero;
1301   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1302   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1303   bool ret = false;
1304
1305   if (gimple_code (stmt) == GIMPLE_COND)
1306     {
1307       op0 = gimple_cond_lhs_ptr (stmt);
1308       op1 = gimple_cond_rhs_ptr (stmt);
1309     }
1310   else
1311     {
1312       op0 = gimple_assign_rhs1_ptr (stmt);
1313       op1 = gimple_assign_rhs2_ptr (stmt);
1314     }
1315
1316   zero = integer_zero_node;
1317   const_iv.step = integer_zero_node;
1318
1319   if (TREE_CODE (*op0) == SSA_NAME)
1320     iv0 = get_iv (data, *op0);
1321   if (TREE_CODE (*op1) == SSA_NAME)
1322     iv1 = get_iv (data, *op1);
1323
1324   /* Exactly one of the compared values must be an iv, and the other one must
1325      be an invariant.  */
1326   if (!iv0 || !iv1)
1327     goto end;
1328
1329   if (integer_zerop (iv0->step))
1330     {
1331       /* Control variable may be on the other side.  */
1332       tmp_op = op0; op0 = op1; op1 = tmp_op;
1333       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1334     }
1335   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1336
1337 end:
1338   if (control_var)
1339     *control_var = op0;;
1340   if (iv_var)
1341     *iv_var = iv0;;
1342   if (bound)
1343     *bound = op1;
1344   if (iv_bound)
1345     *iv_bound = iv1;
1346
1347   return ret;
1348 }
1349
1350 /* Checks whether the condition in STMT is interesting and if so,
1351    records it.  */
1352
1353 static void
1354 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1355 {
1356   tree *var_p, *bound_p;
1357   struct iv *var_iv, *civ;
1358
1359   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1360     {
1361       find_interesting_uses_op (data, *var_p);
1362       find_interesting_uses_op (data, *bound_p);
1363       return;
1364     }
1365
1366   civ = XNEW (struct iv);
1367   *civ = *var_iv;
1368   record_use (data, NULL, civ, stmt, USE_COMPARE);
1369 }
1370
1371 /* Returns true if expression EXPR is obviously invariant in LOOP,
1372    i.e. if all its operands are defined outside of the LOOP.  LOOP
1373    should not be the function body.  */
1374
1375 bool
1376 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1377 {
1378   basic_block def_bb;
1379   unsigned i, len;
1380
1381   gcc_assert (loop_depth (loop) > 0);
1382
1383   if (is_gimple_min_invariant (expr))
1384     return true;
1385
1386   if (TREE_CODE (expr) == SSA_NAME)
1387     {
1388       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1389       if (def_bb
1390           && flow_bb_inside_loop_p (loop, def_bb))
1391         return false;
1392
1393       return true;
1394     }
1395
1396   if (!EXPR_P (expr))
1397     return false;
1398
1399   len = TREE_OPERAND_LENGTH (expr);
1400   for (i = 0; i < len; i++)
1401     if (TREE_OPERAND (expr, i)
1402         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1403       return false;
1404
1405   return true;
1406 }
1407
1408 /* Returns true if statement STMT is obviously invariant in LOOP,
1409    i.e. if all its operands on the RHS are defined outside of the LOOP.
1410    LOOP should not be the function body.  */
1411
1412 bool
1413 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1414 {
1415   unsigned i;
1416   tree lhs;
1417
1418   gcc_assert (loop_depth (loop) > 0);
1419
1420   lhs = gimple_get_lhs (stmt);
1421   for (i = 0; i < gimple_num_ops (stmt); i++)
1422     {
1423       tree op = gimple_op (stmt, i);
1424       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1425         return false;
1426     }
1427
1428   return true;
1429 }
1430
1431 /* Cumulates the steps of indices into DATA and replaces their values with the
1432    initial ones.  Returns false when the value of the index cannot be determined.
1433    Callback for for_each_index.  */
1434
1435 struct ifs_ivopts_data
1436 {
1437   struct ivopts_data *ivopts_data;
1438   gimple stmt;
1439   tree step;
1440 };
1441
1442 static bool
1443 idx_find_step (tree base, tree *idx, void *data)
1444 {
1445   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1446   struct iv *iv;
1447   tree step, iv_base, iv_step, lbound, off;
1448   struct loop *loop = dta->ivopts_data->current_loop;
1449
1450   /* If base is a component ref, require that the offset of the reference
1451      be invariant.  */
1452   if (TREE_CODE (base) == COMPONENT_REF)
1453     {
1454       off = component_ref_field_offset (base);
1455       return expr_invariant_in_loop_p (loop, off);
1456     }
1457
1458   /* If base is array, first check whether we will be able to move the
1459      reference out of the loop (in order to take its address in strength
1460      reduction).  In order for this to work we need both lower bound
1461      and step to be loop invariants.  */
1462   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1463     {
1464       /* Moreover, for a range, the size needs to be invariant as well.  */
1465       if (TREE_CODE (base) == ARRAY_RANGE_REF
1466           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1467         return false;
1468
1469       step = array_ref_element_size (base);
1470       lbound = array_ref_low_bound (base);
1471
1472       if (!expr_invariant_in_loop_p (loop, step)
1473           || !expr_invariant_in_loop_p (loop, lbound))
1474         return false;
1475     }
1476
1477   if (TREE_CODE (*idx) != SSA_NAME)
1478     return true;
1479
1480   iv = get_iv (dta->ivopts_data, *idx);
1481   if (!iv)
1482     return false;
1483
1484   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1485           *&x[0], which is not folded and does not trigger the
1486           ARRAY_REF path below.  */
1487   *idx = iv->base;
1488
1489   if (integer_zerop (iv->step))
1490     return true;
1491
1492   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1493     {
1494       step = array_ref_element_size (base);
1495
1496       /* We only handle addresses whose step is an integer constant.  */
1497       if (TREE_CODE (step) != INTEGER_CST)
1498         return false;
1499     }
1500   else
1501     /* The step for pointer arithmetics already is 1 byte.  */
1502     step = size_one_node;
1503
1504   iv_base = iv->base;
1505   iv_step = iv->step;
1506   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1507                             sizetype, &iv_base, &iv_step, dta->stmt,
1508                             false))
1509     {
1510       /* The index might wrap.  */
1511       return false;
1512     }
1513
1514   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1515   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1516
1517   return true;
1518 }
1519
1520 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1521    object is passed to it in DATA.  */
1522
1523 static bool
1524 idx_record_use (tree base, tree *idx,
1525                 void *vdata)
1526 {
1527   struct ivopts_data *data = (struct ivopts_data *) vdata;
1528   find_interesting_uses_op (data, *idx);
1529   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1530     {
1531       find_interesting_uses_op (data, array_ref_element_size (base));
1532       find_interesting_uses_op (data, array_ref_low_bound (base));
1533     }
1534   return true;
1535 }
1536
1537 /* If we can prove that TOP = cst * BOT for some constant cst,
1538    store cst to MUL and return true.  Otherwise return false.
1539    The returned value is always sign-extended, regardless of the
1540    signedness of TOP and BOT.  */
1541
1542 static bool
1543 constant_multiple_of (tree top, tree bot, double_int *mul)
1544 {
1545   tree mby;
1546   enum tree_code code;
1547   double_int res, p0, p1;
1548   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1549
1550   STRIP_NOPS (top);
1551   STRIP_NOPS (bot);
1552
1553   if (operand_equal_p (top, bot, 0))
1554     {
1555       *mul = double_int_one;
1556       return true;
1557     }
1558
1559   code = TREE_CODE (top);
1560   switch (code)
1561     {
1562     case MULT_EXPR:
1563       mby = TREE_OPERAND (top, 1);
1564       if (TREE_CODE (mby) != INTEGER_CST)
1565         return false;
1566
1567       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1568         return false;
1569
1570       *mul = (res * tree_to_double_int (mby)).sext (precision);
1571       return true;
1572
1573     case PLUS_EXPR:
1574     case MINUS_EXPR:
1575       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1576           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1577         return false;
1578
1579       if (code == MINUS_EXPR)
1580         p1 = -p1;
1581       *mul = (p0 + p1).sext (precision);
1582       return true;
1583
1584     case INTEGER_CST:
1585       if (TREE_CODE (bot) != INTEGER_CST)
1586         return false;
1587
1588       p0 = tree_to_double_int (top).sext (precision);
1589       p1 = tree_to_double_int (bot).sext (precision);
1590       if (p1.is_zero ())
1591         return false;
1592       *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1593       return res.is_zero ();
1594
1595     default:
1596       return false;
1597     }
1598 }
1599
1600 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1601
1602 static bool
1603 may_be_unaligned_p (tree ref, tree step)
1604 {
1605   tree base;
1606   tree base_type;
1607   HOST_WIDE_INT bitsize;
1608   HOST_WIDE_INT bitpos;
1609   tree toffset;
1610   enum machine_mode mode;
1611   int unsignedp, volatilep;
1612   unsigned base_align;
1613
1614   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1615      thus they are not misaligned.  */
1616   if (TREE_CODE (ref) == TARGET_MEM_REF)
1617     return false;
1618
1619   /* The test below is basically copy of what expr.c:normal_inner_ref
1620      does to check whether the object must be loaded by parts when
1621      STRICT_ALIGNMENT is true.  */
1622   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1623                               &unsignedp, &volatilep, true);
1624   base_type = TREE_TYPE (base);
1625   base_align = get_object_alignment (base);
1626   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1627
1628   if (mode != BLKmode)
1629     {
1630       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1631
1632       if (base_align < mode_align
1633           || (bitpos % mode_align) != 0
1634           || (bitpos % BITS_PER_UNIT) != 0)
1635         return true;
1636
1637       if (toffset
1638           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1639         return true;
1640
1641       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1642         return true;
1643     }
1644
1645   return false;
1646 }
1647
1648 /* Return true if EXPR may be non-addressable.   */
1649
1650 bool
1651 may_be_nonaddressable_p (tree expr)
1652 {
1653   switch (TREE_CODE (expr))
1654     {
1655     case TARGET_MEM_REF:
1656       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1657          target, thus they are always addressable.  */
1658       return false;
1659
1660     case COMPONENT_REF:
1661       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1662              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1663
1664     case VIEW_CONVERT_EXPR:
1665       /* This kind of view-conversions may wrap non-addressable objects
1666          and make them look addressable.  After some processing the
1667          non-addressability may be uncovered again, causing ADDR_EXPRs
1668          of inappropriate objects to be built.  */
1669       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1670           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1671         return true;
1672
1673       /* ... fall through ... */
1674
1675     case ARRAY_REF:
1676     case ARRAY_RANGE_REF:
1677       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1678
1679     CASE_CONVERT:
1680       return true;
1681
1682     default:
1683       break;
1684     }
1685
1686   return false;
1687 }
1688
1689 /* Finds addresses in *OP_P inside STMT.  */
1690
1691 static void
1692 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1693 {
1694   tree base = *op_p, step = size_zero_node;
1695   struct iv *civ;
1696   struct ifs_ivopts_data ifs_ivopts_data;
1697
1698   /* Do not play with volatile memory references.  A bit too conservative,
1699      perhaps, but safe.  */
1700   if (gimple_has_volatile_ops (stmt))
1701     goto fail;
1702
1703   /* Ignore bitfields for now.  Not really something terribly complicated
1704      to handle.  TODO.  */
1705   if (TREE_CODE (base) == BIT_FIELD_REF)
1706     goto fail;
1707
1708   base = unshare_expr (base);
1709
1710   if (TREE_CODE (base) == TARGET_MEM_REF)
1711     {
1712       tree type = build_pointer_type (TREE_TYPE (base));
1713       tree astep;
1714
1715       if (TMR_BASE (base)
1716           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1717         {
1718           civ = get_iv (data, TMR_BASE (base));
1719           if (!civ)
1720             goto fail;
1721
1722           TMR_BASE (base) = civ->base;
1723           step = civ->step;
1724         }
1725       if (TMR_INDEX2 (base)
1726           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1727         {
1728           civ = get_iv (data, TMR_INDEX2 (base));
1729           if (!civ)
1730             goto fail;
1731
1732           TMR_INDEX2 (base) = civ->base;
1733           step = civ->step;
1734         }
1735       if (TMR_INDEX (base)
1736           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1737         {
1738           civ = get_iv (data, TMR_INDEX (base));
1739           if (!civ)
1740             goto fail;
1741
1742           TMR_INDEX (base) = civ->base;
1743           astep = civ->step;
1744
1745           if (astep)
1746             {
1747               if (TMR_STEP (base))
1748                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1749
1750               step = fold_build2 (PLUS_EXPR, type, step, astep);
1751             }
1752         }
1753
1754       if (integer_zerop (step))
1755         goto fail;
1756       base = tree_mem_ref_addr (type, base);
1757     }
1758   else
1759     {
1760       ifs_ivopts_data.ivopts_data = data;
1761       ifs_ivopts_data.stmt = stmt;
1762       ifs_ivopts_data.step = size_zero_node;
1763       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1764           || integer_zerop (ifs_ivopts_data.step))
1765         goto fail;
1766       step = ifs_ivopts_data.step;
1767
1768       /* Check that the base expression is addressable.  This needs
1769          to be done after substituting bases of IVs into it.  */
1770       if (may_be_nonaddressable_p (base))
1771         goto fail;
1772
1773       /* Moreover, on strict alignment platforms, check that it is
1774          sufficiently aligned.  */
1775       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1776         goto fail;
1777
1778       base = build_fold_addr_expr (base);
1779
1780       /* Substituting bases of IVs into the base expression might
1781          have caused folding opportunities.  */
1782       if (TREE_CODE (base) == ADDR_EXPR)
1783         {
1784           tree *ref = &TREE_OPERAND (base, 0);
1785           while (handled_component_p (*ref))
1786             ref = &TREE_OPERAND (*ref, 0);
1787           if (TREE_CODE (*ref) == MEM_REF)
1788             {
1789               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1790                                       TREE_OPERAND (*ref, 0),
1791                                       TREE_OPERAND (*ref, 1));
1792               if (tem)
1793                 *ref = tem;
1794             }
1795         }
1796     }
1797
1798   civ = alloc_iv (base, step);
1799   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1800   return;
1801
1802 fail:
1803   for_each_index (op_p, idx_record_use, data);
1804 }
1805
1806 /* Finds and records invariants used in STMT.  */
1807
1808 static void
1809 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1810 {
1811   ssa_op_iter iter;
1812   use_operand_p use_p;
1813   tree op;
1814
1815   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1816     {
1817       op = USE_FROM_PTR (use_p);
1818       record_invariant (data, op, false);
1819     }
1820 }
1821
1822 /* Finds interesting uses of induction variables in the statement STMT.  */
1823
1824 static void
1825 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1826 {
1827   struct iv *iv;
1828   tree op, *lhs, *rhs;
1829   ssa_op_iter iter;
1830   use_operand_p use_p;
1831   enum tree_code code;
1832
1833   find_invariants_stmt (data, stmt);
1834
1835   if (gimple_code (stmt) == GIMPLE_COND)
1836     {
1837       find_interesting_uses_cond (data, stmt);
1838       return;
1839     }
1840
1841   if (is_gimple_assign (stmt))
1842     {
1843       lhs = gimple_assign_lhs_ptr (stmt);
1844       rhs = gimple_assign_rhs1_ptr (stmt);
1845
1846       if (TREE_CODE (*lhs) == SSA_NAME)
1847         {
1848           /* If the statement defines an induction variable, the uses are not
1849              interesting by themselves.  */
1850
1851           iv = get_iv (data, *lhs);
1852
1853           if (iv && !integer_zerop (iv->step))
1854             return;
1855         }
1856
1857       code = gimple_assign_rhs_code (stmt);
1858       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1859           && (REFERENCE_CLASS_P (*rhs)
1860               || is_gimple_val (*rhs)))
1861         {
1862           if (REFERENCE_CLASS_P (*rhs))
1863             find_interesting_uses_address (data, stmt, rhs);
1864           else
1865             find_interesting_uses_op (data, *rhs);
1866
1867           if (REFERENCE_CLASS_P (*lhs))
1868             find_interesting_uses_address (data, stmt, lhs);
1869           return;
1870         }
1871       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1872         {
1873           find_interesting_uses_cond (data, stmt);
1874           return;
1875         }
1876
1877       /* TODO -- we should also handle address uses of type
1878
1879          memory = call (whatever);
1880
1881          and
1882
1883          call (memory).  */
1884     }
1885
1886   if (gimple_code (stmt) == GIMPLE_PHI
1887       && gimple_bb (stmt) == data->current_loop->header)
1888     {
1889       iv = get_iv (data, PHI_RESULT (stmt));
1890
1891       if (iv && !integer_zerop (iv->step))
1892         return;
1893     }
1894
1895   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1896     {
1897       op = USE_FROM_PTR (use_p);
1898
1899       if (TREE_CODE (op) != SSA_NAME)
1900         continue;
1901
1902       iv = get_iv (data, op);
1903       if (!iv)
1904         continue;
1905
1906       find_interesting_uses_op (data, op);
1907     }
1908 }
1909
1910 /* Finds interesting uses of induction variables outside of loops
1911    on loop exit edge EXIT.  */
1912
1913 static void
1914 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1915 {
1916   gimple phi;
1917   gimple_stmt_iterator psi;
1918   tree def;
1919
1920   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1921     {
1922       phi = gsi_stmt (psi);
1923       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1924       if (!virtual_operand_p (def))
1925         find_interesting_uses_op (data, def);
1926     }
1927 }
1928
1929 /* Finds uses of the induction variables that are interesting.  */
1930
1931 static void
1932 find_interesting_uses (struct ivopts_data *data)
1933 {
1934   basic_block bb;
1935   gimple_stmt_iterator bsi;
1936   basic_block *body = get_loop_body (data->current_loop);
1937   unsigned i;
1938   struct version_info *info;
1939   edge e;
1940
1941   if (dump_file && (dump_flags & TDF_DETAILS))
1942     fprintf (dump_file, "Uses:\n\n");
1943
1944   for (i = 0; i < data->current_loop->num_nodes; i++)
1945     {
1946       edge_iterator ei;
1947       bb = body[i];
1948
1949       FOR_EACH_EDGE (e, ei, bb->succs)
1950         if (e->dest != EXIT_BLOCK_PTR
1951             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1952           find_interesting_uses_outside (data, e);
1953
1954       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1955         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1956       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1957         if (!is_gimple_debug (gsi_stmt (bsi)))
1958           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1959     }
1960
1961   if (dump_file && (dump_flags & TDF_DETAILS))
1962     {
1963       bitmap_iterator bi;
1964
1965       fprintf (dump_file, "\n");
1966
1967       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1968         {
1969           info = ver_info (data, i);
1970           if (info->inv_id)
1971             {
1972               fprintf (dump_file, "  ");
1973               print_generic_expr (dump_file, info->name, TDF_SLIM);
1974               fprintf (dump_file, " is invariant (%d)%s\n",
1975                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1976             }
1977         }
1978
1979       fprintf (dump_file, "\n");
1980     }
1981
1982   free (body);
1983 }
1984
1985 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1986    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1987    we are at the top-level of the processed address.  */
1988
1989 static tree
1990 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1991                 unsigned HOST_WIDE_INT *offset)
1992 {
1993   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1994   enum tree_code code;
1995   tree type, orig_type = TREE_TYPE (expr);
1996   unsigned HOST_WIDE_INT off0, off1, st;
1997   tree orig_expr = expr;
1998
1999   STRIP_NOPS (expr);
2000
2001   type = TREE_TYPE (expr);
2002   code = TREE_CODE (expr);
2003   *offset = 0;
2004
2005   switch (code)
2006     {
2007     case INTEGER_CST:
2008       if (!cst_and_fits_in_hwi (expr)
2009           || integer_zerop (expr))
2010         return orig_expr;
2011
2012       *offset = int_cst_value (expr);
2013       return build_int_cst (orig_type, 0);
2014
2015     case POINTER_PLUS_EXPR:
2016     case PLUS_EXPR:
2017     case MINUS_EXPR:
2018       op0 = TREE_OPERAND (expr, 0);
2019       op1 = TREE_OPERAND (expr, 1);
2020
2021       op0 = strip_offset_1 (op0, false, false, &off0);
2022       op1 = strip_offset_1 (op1, false, false, &off1);
2023
2024       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2025       if (op0 == TREE_OPERAND (expr, 0)
2026           && op1 == TREE_OPERAND (expr, 1))
2027         return orig_expr;
2028
2029       if (integer_zerop (op1))
2030         expr = op0;
2031       else if (integer_zerop (op0))
2032         {
2033           if (code == MINUS_EXPR)
2034             expr = fold_build1 (NEGATE_EXPR, type, op1);
2035           else
2036             expr = op1;
2037         }
2038       else
2039         expr = fold_build2 (code, type, op0, op1);
2040
2041       return fold_convert (orig_type, expr);
2042
2043     case MULT_EXPR:
2044       op1 = TREE_OPERAND (expr, 1);
2045       if (!cst_and_fits_in_hwi (op1))
2046         return orig_expr;
2047
2048       op0 = TREE_OPERAND (expr, 0);
2049       op0 = strip_offset_1 (op0, false, false, &off0);
2050       if (op0 == TREE_OPERAND (expr, 0))
2051         return orig_expr;
2052
2053       *offset = off0 * int_cst_value (op1);
2054       if (integer_zerop (op0))
2055         expr = op0;
2056       else
2057         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2058
2059       return fold_convert (orig_type, expr);
2060
2061     case ARRAY_REF:
2062     case ARRAY_RANGE_REF:
2063       if (!inside_addr)
2064         return orig_expr;
2065
2066       step = array_ref_element_size (expr);
2067       if (!cst_and_fits_in_hwi (step))
2068         break;
2069
2070       st = int_cst_value (step);
2071       op1 = TREE_OPERAND (expr, 1);
2072       op1 = strip_offset_1 (op1, false, false, &off1);
2073       *offset = off1 * st;
2074
2075       if (top_compref
2076           && integer_zerop (op1))
2077         {
2078           /* Strip the component reference completely.  */
2079           op0 = TREE_OPERAND (expr, 0);
2080           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2081           *offset += off0;
2082           return op0;
2083         }
2084       break;
2085
2086     case COMPONENT_REF:
2087       if (!inside_addr)
2088         return orig_expr;
2089
2090       tmp = component_ref_field_offset (expr);
2091       if (top_compref
2092           && cst_and_fits_in_hwi (tmp))
2093         {
2094           /* Strip the component reference completely.  */
2095           op0 = TREE_OPERAND (expr, 0);
2096           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2097           *offset = off0 + int_cst_value (tmp);
2098           return op0;
2099         }
2100       break;
2101
2102     case ADDR_EXPR:
2103       op0 = TREE_OPERAND (expr, 0);
2104       op0 = strip_offset_1 (op0, true, true, &off0);
2105       *offset += off0;
2106
2107       if (op0 == TREE_OPERAND (expr, 0))
2108         return orig_expr;
2109
2110       expr = build_fold_addr_expr (op0);
2111       return fold_convert (orig_type, expr);
2112
2113     case MEM_REF:
2114       /* ???  Offset operand?  */
2115       inside_addr = false;
2116       break;
2117
2118     default:
2119       return orig_expr;
2120     }
2121
2122   /* Default handling of expressions for that we want to recurse into
2123      the first operand.  */
2124   op0 = TREE_OPERAND (expr, 0);
2125   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2126   *offset += off0;
2127
2128   if (op0 == TREE_OPERAND (expr, 0)
2129       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2130     return orig_expr;
2131
2132   expr = copy_node (expr);
2133   TREE_OPERAND (expr, 0) = op0;
2134   if (op1)
2135     TREE_OPERAND (expr, 1) = op1;
2136
2137   /* Inside address, we might strip the top level component references,
2138      thus changing type of the expression.  Handling of ADDR_EXPR
2139      will fix that.  */
2140   expr = fold_convert (orig_type, expr);
2141
2142   return expr;
2143 }
2144
2145 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2146
2147 static tree
2148 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2149 {
2150   return strip_offset_1 (expr, false, false, offset);
2151 }
2152
2153 /* Returns variant of TYPE that can be used as base for different uses.
2154    We return unsigned type with the same precision, which avoids problems
2155    with overflows.  */
2156
2157 static tree
2158 generic_type_for (tree type)
2159 {
2160   if (POINTER_TYPE_P (type))
2161     return unsigned_type_for (type);
2162
2163   if (TYPE_UNSIGNED (type))
2164     return type;
2165
2166   return unsigned_type_for (type);
2167 }
2168
2169 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2170    the bitmap to that we should store it.  */
2171
2172 static struct ivopts_data *fd_ivopts_data;
2173 static tree
2174 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2175 {
2176   bitmap *depends_on = (bitmap *) data;
2177   struct version_info *info;
2178
2179   if (TREE_CODE (*expr_p) != SSA_NAME)
2180     return NULL_TREE;
2181   info = name_info (fd_ivopts_data, *expr_p);
2182
2183   if (!info->inv_id || info->has_nonlin_use)
2184     return NULL_TREE;
2185
2186   if (!*depends_on)
2187     *depends_on = BITMAP_ALLOC (NULL);
2188   bitmap_set_bit (*depends_on, info->inv_id);
2189
2190   return NULL_TREE;
2191 }
2192
2193 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2194    position to POS.  If USE is not NULL, the candidate is set as related to
2195    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2196    replacement of the final value of the iv by a direct computation.  */
2197
2198 static struct iv_cand *
2199 add_candidate_1 (struct ivopts_data *data,
2200                  tree base, tree step, bool important, enum iv_position pos,
2201                  struct iv_use *use, gimple incremented_at)
2202 {
2203   unsigned i;
2204   struct iv_cand *cand = NULL;
2205   tree type, orig_type;
2206
2207   /* For non-original variables, make sure their values are computed in a type
2208      that does not invoke undefined behavior on overflows (since in general,
2209      we cannot prove that these induction variables are non-wrapping).  */
2210   if (pos != IP_ORIGINAL)
2211     {
2212       orig_type = TREE_TYPE (base);
2213       type = generic_type_for (orig_type);
2214       if (type != orig_type)
2215         {
2216           base = fold_convert (type, base);
2217           step = fold_convert (type, step);
2218         }
2219     }
2220
2221   for (i = 0; i < n_iv_cands (data); i++)
2222     {
2223       cand = iv_cand (data, i);
2224
2225       if (cand->pos != pos)
2226         continue;
2227
2228       if (cand->incremented_at != incremented_at
2229           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2230               && cand->ainc_use != use))
2231         continue;
2232
2233       if (!cand->iv)
2234         {
2235           if (!base && !step)
2236             break;
2237
2238           continue;
2239         }
2240
2241       if (!base && !step)
2242         continue;
2243
2244       if (operand_equal_p (base, cand->iv->base, 0)
2245           && operand_equal_p (step, cand->iv->step, 0)
2246           && (TYPE_PRECISION (TREE_TYPE (base))
2247               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2248         break;
2249     }
2250
2251   if (i == n_iv_cands (data))
2252     {
2253       cand = XCNEW (struct iv_cand);
2254       cand->id = i;
2255
2256       if (!base && !step)
2257         cand->iv = NULL;
2258       else
2259         cand->iv = alloc_iv (base, step);
2260
2261       cand->pos = pos;
2262       if (pos != IP_ORIGINAL && cand->iv)
2263         {
2264           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2265           cand->var_after = cand->var_before;
2266         }
2267       cand->important = important;
2268       cand->incremented_at = incremented_at;
2269       data->iv_candidates.safe_push (cand);
2270
2271       if (step
2272           && TREE_CODE (step) != INTEGER_CST)
2273         {
2274           fd_ivopts_data = data;
2275           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2276         }
2277
2278       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2279         cand->ainc_use = use;
2280       else
2281         cand->ainc_use = NULL;
2282
2283       if (dump_file && (dump_flags & TDF_DETAILS))
2284         dump_cand (dump_file, cand);
2285     }
2286
2287   if (important && !cand->important)
2288     {
2289       cand->important = true;
2290       if (dump_file && (dump_flags & TDF_DETAILS))
2291         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2292     }
2293
2294   if (use)
2295     {
2296       bitmap_set_bit (use->related_cands, i);
2297       if (dump_file && (dump_flags & TDF_DETAILS))
2298         fprintf (dump_file, "Candidate %d is related to use %d\n",
2299                  cand->id, use->id);
2300     }
2301
2302   return cand;
2303 }
2304
2305 /* Returns true if incrementing the induction variable at the end of the LOOP
2306    is allowed.
2307
2308    The purpose is to avoid splitting latch edge with a biv increment, thus
2309    creating a jump, possibly confusing other optimization passes and leaving
2310    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2311    is not available (so we do not have a better alternative), or if the latch
2312    edge is already nonempty.  */
2313
2314 static bool
2315 allow_ip_end_pos_p (struct loop *loop)
2316 {
2317   if (!ip_normal_pos (loop))
2318     return true;
2319
2320   if (!empty_block_p (ip_end_pos (loop)))
2321     return true;
2322
2323   return false;
2324 }
2325
2326 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2327    Important field is set to IMPORTANT.  */
2328
2329 static void
2330 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2331                         bool important, struct iv_use *use)
2332 {
2333   basic_block use_bb = gimple_bb (use->stmt);
2334   enum machine_mode mem_mode;
2335   unsigned HOST_WIDE_INT cstepi;
2336
2337   /* If we insert the increment in any position other than the standard
2338      ones, we must ensure that it is incremented once per iteration.
2339      It must not be in an inner nested loop, or one side of an if
2340      statement.  */
2341   if (use_bb->loop_father != data->current_loop
2342       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2343       || stmt_could_throw_p (use->stmt)
2344       || !cst_and_fits_in_hwi (step))
2345     return;
2346
2347   cstepi = int_cst_value (step);
2348
2349   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2350   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2351         || USE_STORE_PRE_INCREMENT (mem_mode))
2352        && GET_MODE_SIZE (mem_mode) == cstepi)
2353       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2354            || USE_STORE_PRE_DECREMENT (mem_mode))
2355           && GET_MODE_SIZE (mem_mode) == -cstepi))
2356     {
2357       enum tree_code code = MINUS_EXPR;
2358       tree new_base;
2359       tree new_step = step;
2360
2361       if (POINTER_TYPE_P (TREE_TYPE (base)))
2362         {
2363           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2364           code = POINTER_PLUS_EXPR;
2365         }
2366       else
2367         new_step = fold_convert (TREE_TYPE (base), new_step);
2368       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2369       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2370                        use->stmt);
2371     }
2372   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2373         || USE_STORE_POST_INCREMENT (mem_mode))
2374        && GET_MODE_SIZE (mem_mode) == cstepi)
2375       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2376            || USE_STORE_POST_DECREMENT (mem_mode))
2377           && GET_MODE_SIZE (mem_mode) == -cstepi))
2378     {
2379       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2380                        use->stmt);
2381     }
2382 }
2383
2384 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2385    position to POS.  If USE is not NULL, the candidate is set as related to
2386    it.  The candidate computation is scheduled on all available positions.  */
2387
2388 static void
2389 add_candidate (struct ivopts_data *data,
2390                tree base, tree step, bool important, struct iv_use *use)
2391 {
2392   if (ip_normal_pos (data->current_loop))
2393     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2394   if (ip_end_pos (data->current_loop)
2395       && allow_ip_end_pos_p (data->current_loop))
2396     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2397
2398   if (use != NULL && use->type == USE_ADDRESS)
2399     add_autoinc_candidates (data, base, step, important, use);
2400 }
2401
2402 /* Adds standard iv candidates.  */
2403
2404 static void
2405 add_standard_iv_candidates (struct ivopts_data *data)
2406 {
2407   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2408
2409   /* The same for a double-integer type if it is still fast enough.  */
2410   if (TYPE_PRECISION
2411         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2412       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2413     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2414                    build_int_cst (long_integer_type_node, 1), true, NULL);
2415
2416   /* The same for a double-integer type if it is still fast enough.  */
2417   if (TYPE_PRECISION
2418         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2419       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2420     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2421                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2422 }
2423
2424
2425 /* Adds candidates bases on the old induction variable IV.  */
2426
2427 static void
2428 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2429 {
2430   gimple phi;
2431   tree def;
2432   struct iv_cand *cand;
2433
2434   add_candidate (data, iv->base, iv->step, true, NULL);
2435
2436   /* The same, but with initial value zero.  */
2437   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2438     add_candidate (data, size_int (0), iv->step, true, NULL);
2439   else
2440     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2441                    iv->step, true, NULL);
2442
2443   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2444   if (gimple_code (phi) == GIMPLE_PHI)
2445     {
2446       /* Additionally record the possibility of leaving the original iv
2447          untouched.  */
2448       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2449       cand = add_candidate_1 (data,
2450                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2451                               SSA_NAME_DEF_STMT (def));
2452       cand->var_before = iv->ssa_name;
2453       cand->var_after = def;
2454     }
2455 }
2456
2457 /* Adds candidates based on the old induction variables.  */
2458
2459 static void
2460 add_old_ivs_candidates (struct ivopts_data *data)
2461 {
2462   unsigned i;
2463   struct iv *iv;
2464   bitmap_iterator bi;
2465
2466   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2467     {
2468       iv = ver_info (data, i)->iv;
2469       if (iv && iv->biv_p && !integer_zerop (iv->step))
2470         add_old_iv_candidates (data, iv);
2471     }
2472 }
2473
2474 /* Adds candidates based on the value of the induction variable IV and USE.  */
2475
2476 static void
2477 add_iv_value_candidates (struct ivopts_data *data,
2478                          struct iv *iv, struct iv_use *use)
2479 {
2480   unsigned HOST_WIDE_INT offset;
2481   tree base;
2482   tree basetype;
2483
2484   add_candidate (data, iv->base, iv->step, false, use);
2485
2486   /* The same, but with initial value zero.  Make such variable important,
2487      since it is generic enough so that possibly many uses may be based
2488      on it.  */
2489   basetype = TREE_TYPE (iv->base);
2490   if (POINTER_TYPE_P (basetype))
2491     basetype = sizetype;
2492   add_candidate (data, build_int_cst (basetype, 0),
2493                  iv->step, true, use);
2494
2495   /* Third, try removing the constant offset.  Make sure to even
2496      add a candidate for &a[0] vs. (T *)&a.  */
2497   base = strip_offset (iv->base, &offset);
2498   if (offset
2499       || base != iv->base)
2500     add_candidate (data, base, iv->step, false, use);
2501 }
2502
2503 /* Adds candidates based on the uses.  */
2504
2505 static void
2506 add_derived_ivs_candidates (struct ivopts_data *data)
2507 {
2508   unsigned i;
2509
2510   for (i = 0; i < n_iv_uses (data); i++)
2511     {
2512       struct iv_use *use = iv_use (data, i);
2513
2514       if (!use)
2515         continue;
2516
2517       switch (use->type)
2518         {
2519         case USE_NONLINEAR_EXPR:
2520         case USE_COMPARE:
2521         case USE_ADDRESS:
2522           /* Just add the ivs based on the value of the iv used here.  */
2523           add_iv_value_candidates (data, use->iv, use);
2524           break;
2525
2526         default:
2527           gcc_unreachable ();
2528         }
2529     }
2530 }
2531
2532 /* Record important candidates and add them to related_cands bitmaps
2533    if needed.  */
2534
2535 static void
2536 record_important_candidates (struct ivopts_data *data)
2537 {
2538   unsigned i;
2539   struct iv_use *use;
2540
2541   for (i = 0; i < n_iv_cands (data); i++)
2542     {
2543       struct iv_cand *cand = iv_cand (data, i);
2544
2545       if (cand->important)
2546         bitmap_set_bit (data->important_candidates, i);
2547     }
2548
2549   data->consider_all_candidates = (n_iv_cands (data)
2550                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2551
2552   if (data->consider_all_candidates)
2553     {
2554       /* We will not need "related_cands" bitmaps in this case,
2555          so release them to decrease peak memory consumption.  */
2556       for (i = 0; i < n_iv_uses (data); i++)
2557         {
2558           use = iv_use (data, i);
2559           BITMAP_FREE (use->related_cands);
2560         }
2561     }
2562   else
2563     {
2564       /* Add important candidates to the related_cands bitmaps.  */
2565       for (i = 0; i < n_iv_uses (data); i++)
2566         bitmap_ior_into (iv_use (data, i)->related_cands,
2567                          data->important_candidates);
2568     }
2569 }
2570
2571 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2572    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2573    we allocate a simple list to every use.  */
2574
2575 static void
2576 alloc_use_cost_map (struct ivopts_data *data)
2577 {
2578   unsigned i, size, s, j;
2579
2580   for (i = 0; i < n_iv_uses (data); i++)
2581     {
2582       struct iv_use *use = iv_use (data, i);
2583       bitmap_iterator bi;
2584
2585       if (data->consider_all_candidates)
2586         size = n_iv_cands (data);
2587       else
2588         {
2589           s = 0;
2590           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2591             {
2592               s++;
2593             }
2594
2595           /* Round up to the power of two, so that moduling by it is fast.  */
2596           for (size = 1; size < s; size <<= 1)
2597             continue;
2598         }
2599
2600       use->n_map_members = size;
2601       use->cost_map = XCNEWVEC (struct cost_pair, size);
2602     }
2603 }
2604
2605 /* Returns description of computation cost of expression whose runtime
2606    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2607
2608 static comp_cost
2609 new_cost (unsigned runtime, unsigned complexity)
2610 {
2611   comp_cost cost;
2612
2613   cost.cost = runtime;
2614   cost.complexity = complexity;
2615
2616   return cost;
2617 }
2618
2619 /* Adds costs COST1 and COST2.  */
2620
2621 static comp_cost
2622 add_costs (comp_cost cost1, comp_cost cost2)
2623 {
2624   cost1.cost += cost2.cost;
2625   cost1.complexity += cost2.complexity;
2626
2627   return cost1;
2628 }
2629 /* Subtracts costs COST1 and COST2.  */
2630
2631 static comp_cost
2632 sub_costs (comp_cost cost1, comp_cost cost2)
2633 {
2634   cost1.cost -= cost2.cost;
2635   cost1.complexity -= cost2.complexity;
2636
2637   return cost1;
2638 }
2639
2640 /* Returns a negative number if COST1 < COST2, a positive number if
2641    COST1 > COST2, and 0 if COST1 = COST2.  */
2642
2643 static int
2644 compare_costs (comp_cost cost1, comp_cost cost2)
2645 {
2646   if (cost1.cost == cost2.cost)
2647     return cost1.complexity - cost2.complexity;
2648
2649   return cost1.cost - cost2.cost;
2650 }
2651
2652 /* Returns true if COST is infinite.  */
2653
2654 static bool
2655 infinite_cost_p (comp_cost cost)
2656 {
2657   return cost.cost == INFTY;
2658 }
2659
2660 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2661    on invariants DEPENDS_ON and that the value used in expressing it
2662    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2663
2664 static void
2665 set_use_iv_cost (struct ivopts_data *data,
2666                  struct iv_use *use, struct iv_cand *cand,
2667                  comp_cost cost, bitmap depends_on, tree value,
2668                  enum tree_code comp, int inv_expr_id)
2669 {
2670   unsigned i, s;
2671
2672   if (infinite_cost_p (cost))
2673     {
2674       BITMAP_FREE (depends_on);
2675       return;
2676     }
2677
2678   if (data->consider_all_candidates)
2679     {
2680       use->cost_map[cand->id].cand = cand;
2681       use->cost_map[cand->id].cost = cost;
2682       use->cost_map[cand->id].depends_on = depends_on;
2683       use->cost_map[cand->id].value = value;
2684       use->cost_map[cand->id].comp = comp;
2685       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2686       return;
2687     }
2688
2689   /* n_map_members is a power of two, so this computes modulo.  */
2690   s = cand->id & (use->n_map_members - 1);
2691   for (i = s; i < use->n_map_members; i++)
2692     if (!use->cost_map[i].cand)
2693       goto found;
2694   for (i = 0; i < s; i++)
2695     if (!use->cost_map[i].cand)
2696       goto found;
2697
2698   gcc_unreachable ();
2699
2700 found:
2701   use->cost_map[i].cand = cand;
2702   use->cost_map[i].cost = cost;
2703   use->cost_map[i].depends_on = depends_on;
2704   use->cost_map[i].value = value;
2705   use->cost_map[i].comp = comp;
2706   use->cost_map[i].inv_expr_id = inv_expr_id;
2707 }
2708
2709 /* Gets cost of (USE, CANDIDATE) pair.  */
2710
2711 static struct cost_pair *
2712 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2713                  struct iv_cand *cand)
2714 {
2715   unsigned i, s;
2716   struct cost_pair *ret;
2717
2718   if (!cand)
2719     return NULL;
2720
2721   if (data->consider_all_candidates)
2722     {
2723       ret = use->cost_map + cand->id;
2724       if (!ret->cand)
2725         return NULL;
2726
2727       return ret;
2728     }
2729
2730   /* n_map_members is a power of two, so this computes modulo.  */
2731   s = cand->id & (use->n_map_members - 1);
2732   for (i = s; i < use->n_map_members; i++)
2733     if (use->cost_map[i].cand == cand)
2734       return use->cost_map + i;
2735
2736   for (i = 0; i < s; i++)
2737     if (use->cost_map[i].cand == cand)
2738       return use->cost_map + i;
2739
2740   return NULL;
2741 }
2742
2743 /* Returns estimate on cost of computing SEQ.  */
2744
2745 static unsigned
2746 seq_cost (rtx seq, bool speed)
2747 {
2748   unsigned cost = 0;
2749   rtx set;
2750
2751   for (; seq; seq = NEXT_INSN (seq))
2752     {
2753       set = single_set (seq);
2754       if (set)
2755         cost += set_src_cost (SET_SRC (set), speed);
2756       else
2757         cost++;
2758     }
2759
2760   return cost;
2761 }
2762
2763 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2764 static rtx
2765 produce_memory_decl_rtl (tree obj, int *regno)
2766 {
2767   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2768   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2769   rtx x;
2770
2771   gcc_assert (obj);
2772   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2773     {
2774       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2775       x = gen_rtx_SYMBOL_REF (address_mode, name);
2776       SET_SYMBOL_REF_DECL (x, obj);
2777       x = gen_rtx_MEM (DECL_MODE (obj), x);
2778       set_mem_addr_space (x, as);
2779       targetm.encode_section_info (obj, x, true);
2780     }
2781   else
2782     {
2783       x = gen_raw_REG (address_mode, (*regno)++);
2784       x = gen_rtx_MEM (DECL_MODE (obj), x);
2785       set_mem_addr_space (x, as);
2786     }
2787
2788   return x;
2789 }
2790
2791 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2792    walk_tree.  DATA contains the actual fake register number.  */
2793
2794 static tree
2795 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2796 {
2797   tree obj = NULL_TREE;
2798   rtx x = NULL_RTX;
2799   int *regno = (int *) data;
2800
2801   switch (TREE_CODE (*expr_p))
2802     {
2803     case ADDR_EXPR:
2804       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2805            handled_component_p (*expr_p);
2806            expr_p = &TREE_OPERAND (*expr_p, 0))
2807         continue;
2808       obj = *expr_p;
2809       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2810         x = produce_memory_decl_rtl (obj, regno);
2811       break;
2812
2813     case SSA_NAME:
2814       *ws = 0;
2815       obj = SSA_NAME_VAR (*expr_p);
2816       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2817       if (!obj)
2818         return NULL_TREE;
2819       if (!DECL_RTL_SET_P (obj))
2820         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2821       break;
2822
2823     case VAR_DECL:
2824     case PARM_DECL:
2825     case RESULT_DECL:
2826       *ws = 0;
2827       obj = *expr_p;
2828
2829       if (DECL_RTL_SET_P (obj))
2830         break;
2831
2832       if (DECL_MODE (obj) == BLKmode)
2833         x = produce_memory_decl_rtl (obj, regno);
2834       else
2835         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2836
2837       break;
2838
2839     default:
2840       break;
2841     }
2842
2843   if (x)
2844     {
2845       decl_rtl_to_reset.safe_push (obj);
2846       SET_DECL_RTL (obj, x);
2847     }
2848
2849   return NULL_TREE;
2850 }
2851
2852 /* Determines cost of the computation of EXPR.  */
2853
2854 static unsigned
2855 computation_cost (tree expr, bool speed)
2856 {
2857   rtx seq, rslt;
2858   tree type = TREE_TYPE (expr);
2859   unsigned cost;
2860   /* Avoid using hard regs in ways which may be unsupported.  */
2861   int regno = LAST_VIRTUAL_REGISTER + 1;
2862   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2863   enum node_frequency real_frequency = node->frequency;
2864
2865   node->frequency = NODE_FREQUENCY_NORMAL;
2866   crtl->maybe_hot_insn_p = speed;
2867   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2868   start_sequence ();
2869   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2870   seq = get_insns ();
2871   end_sequence ();
2872   default_rtl_profile ();
2873   node->frequency = real_frequency;
2874
2875   cost = seq_cost (seq, speed);
2876   if (MEM_P (rslt))
2877     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2878                           TYPE_ADDR_SPACE (type), speed);
2879   else if (!REG_P (rslt))
2880     cost += set_src_cost (rslt, speed);
2881
2882   return cost;
2883 }
2884
2885 /* Returns variable containing the value of candidate CAND at statement AT.  */
2886
2887 static tree
2888 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2889 {
2890   if (stmt_after_increment (loop, cand, stmt))
2891     return cand->var_after;
2892   else
2893     return cand->var_before;
2894 }
2895
2896 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2897    same precision that is at least as wide as the precision of TYPE, stores
2898    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2899    type of A and B.  */
2900
2901 static tree
2902 determine_common_wider_type (tree *a, tree *b)
2903 {
2904   tree wider_type = NULL;
2905   tree suba, subb;
2906   tree atype = TREE_TYPE (*a);
2907
2908   if (CONVERT_EXPR_P (*a))
2909     {
2910       suba = TREE_OPERAND (*a, 0);
2911       wider_type = TREE_TYPE (suba);
2912       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2913         return atype;
2914     }
2915   else
2916     return atype;
2917
2918   if (CONVERT_EXPR_P (*b))
2919     {
2920       subb = TREE_OPERAND (*b, 0);
2921       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2922         return atype;
2923     }
2924   else
2925     return atype;
2926
2927   *a = suba;
2928   *b = subb;
2929   return wider_type;
2930 }
2931
2932 /* Determines the expression by that USE is expressed from induction variable
2933    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2934    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2935
2936 static bool
2937 get_computation_aff (struct loop *loop,
2938                      struct iv_use *use, struct iv_cand *cand, gimple at,
2939                      struct affine_tree_combination *aff)
2940 {
2941   tree ubase = use->iv->base;
2942   tree ustep = use->iv->step;
2943   tree cbase = cand->iv->base;
2944   tree cstep = cand->iv->step, cstep_common;
2945   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2946   tree common_type, var;
2947   tree uutype;
2948   aff_tree cbase_aff, var_aff;
2949   double_int rat;
2950
2951   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2952     {
2953       /* We do not have a precision to express the values of use.  */
2954       return false;
2955     }
2956
2957   var = var_at_stmt (loop, cand, at);
2958   uutype = unsigned_type_for (utype);
2959
2960   /* If the conversion is not noop, perform it.  */
2961   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2962     {
2963       cstep = fold_convert (uutype, cstep);
2964       cbase = fold_convert (uutype, cbase);
2965       var = fold_convert (uutype, var);
2966     }
2967
2968   if (!constant_multiple_of (ustep, cstep, &rat))
2969     return false;
2970
2971   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2972      type, we achieve better folding by computing their difference in this
2973      wider type, and cast the result to UUTYPE.  We do not need to worry about
2974      overflows, as all the arithmetics will in the end be performed in UUTYPE
2975      anyway.  */
2976   common_type = determine_common_wider_type (&ubase, &cbase);
2977
2978   /* use = ubase - ratio * cbase + ratio * var.  */
2979   tree_to_aff_combination (ubase, common_type, aff);
2980   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2981   tree_to_aff_combination (var, uutype, &var_aff);
2982
2983   /* We need to shift the value if we are after the increment.  */
2984   if (stmt_after_increment (loop, cand, at))
2985     {
2986       aff_tree cstep_aff;
2987
2988       if (common_type != uutype)
2989         cstep_common = fold_convert (common_type, cstep);
2990       else
2991         cstep_common = cstep;
2992
2993       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2994       aff_combination_add (&cbase_aff, &cstep_aff);
2995     }
2996
2997   aff_combination_scale (&cbase_aff, -rat);
2998   aff_combination_add (aff, &cbase_aff);
2999   if (common_type != uutype)
3000     aff_combination_convert (aff, uutype);
3001
3002   aff_combination_scale (&var_aff, rat);
3003   aff_combination_add (aff, &var_aff);
3004
3005   return true;
3006 }
3007
3008 /* Return the type of USE.  */
3009
3010 static tree
3011 get_use_type (struct iv_use *use)
3012 {
3013   tree base_type = TREE_TYPE (use->iv->base);
3014   tree type;
3015
3016   if (use->type == USE_ADDRESS)
3017     {
3018       /* The base_type may be a void pointer.  Create a pointer type based on
3019          the mem_ref instead.  */
3020       type = build_pointer_type (TREE_TYPE (*use->op_p));
3021       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3022                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3023     }
3024   else
3025     type = base_type;
3026
3027   return type;
3028 }
3029
3030 /* Determines the expression by that USE is expressed from induction variable
3031    CAND at statement AT in LOOP.  The computation is unshared.  */
3032
3033 static tree
3034 get_computation_at (struct loop *loop,
3035                     struct iv_use *use, struct iv_cand *cand, gimple at)
3036 {
3037   aff_tree aff;
3038   tree type = get_use_type (use);
3039
3040   if (!get_computation_aff (loop, use, cand, at, &aff))
3041     return NULL_TREE;
3042   unshare_aff_combination (&aff);
3043   return fold_convert (type, aff_combination_to_tree (&aff));
3044 }
3045
3046 /* Determines the expression by that USE is expressed from induction variable
3047    CAND in LOOP.  The computation is unshared.  */
3048
3049 static tree
3050 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3051 {
3052   return get_computation_at (loop, use, cand, use->stmt);
3053 }
3054
3055 /* Adjust the cost COST for being in loop setup rather than loop body.
3056    If we're optimizing for space, the loop setup overhead is constant;
3057    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3058 static unsigned
3059 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3060 {
3061   if (cost == INFTY)
3062     return cost;
3063   else if (optimize_loop_for_speed_p (data->current_loop))
3064     return cost / avg_loop_niter (data->current_loop);
3065   else
3066     return cost;
3067 }
3068
3069 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3070    validity for a memory reference accessing memory of mode MODE in
3071    address space AS.  */
3072
3073
3074 bool
3075 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3076                                  addr_space_t as)
3077 {
3078 #define MAX_RATIO 128
3079   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3080   static vec<sbitmap> valid_mult_list;
3081   sbitmap valid_mult;
3082
3083   if (data_index >= valid_mult_list.length ())
3084     valid_mult_list.safe_grow_cleared (data_index + 1);
3085
3086   valid_mult = valid_mult_list[data_index];
3087   if (!valid_mult)
3088     {
3089       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3090       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3091       rtx addr;
3092       HOST_WIDE_INT i;
3093
3094       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3095       bitmap_clear (valid_mult);
3096       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3097       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3098         {
3099           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3100           if (memory_address_addr_space_p (mode, addr, as))
3101             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3102         }
3103
3104       if (dump_file && (dump_flags & TDF_DETAILS))
3105         {
3106           fprintf (dump_file, "  allowed multipliers:");
3107           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3108             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3109               fprintf (dump_file, " %d", (int) i);
3110           fprintf (dump_file, "\n");
3111           fprintf (dump_file, "\n");
3112         }
3113
3114       valid_mult_list[data_index] = valid_mult;
3115     }
3116
3117   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3118     return false;
3119
3120   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3121 }
3122
3123 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3124    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3125    variable is omitted.  Compute the cost for a memory reference that accesses
3126    a memory location of mode MEM_MODE in address space AS.
3127
3128    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3129    size of MEM_MODE / RATIO) is available.  To make this determination, we
3130    look at the size of the increment to be made, which is given in CSTEP.
3131    CSTEP may be zero if the step is unknown.
3132    STMT_AFTER_INC is true iff the statement we're looking at is after the
3133    increment of the original biv.
3134
3135    TODO -- there must be some better way.  This all is quite crude.  */
3136
3137 typedef struct address_cost_data_s
3138 {
3139   HOST_WIDE_INT min_offset, max_offset;
3140   unsigned costs[2][2][2][2];
3141 } *address_cost_data;
3142
3143
3144 static comp_cost
3145 get_address_cost (bool symbol_present, bool var_present,
3146                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3147                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3148                   addr_space_t as, bool speed,
3149                   bool stmt_after_inc, bool *may_autoinc)
3150 {
3151   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3152   static vec<address_cost_data> address_cost_data_list;
3153   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3154   address_cost_data data;
3155   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3156   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3157   unsigned cost, acost, complexity;
3158   bool offset_p, ratio_p, autoinc;
3159   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3160   unsigned HOST_WIDE_INT mask;
3161   unsigned bits;
3162
3163   if (data_index >= address_cost_data_list.length ())
3164     address_cost_data_list.safe_grow_cleared (data_index + 1);
3165
3166   data = address_cost_data_list[data_index];
3167   if (!data)
3168     {
3169       HOST_WIDE_INT i;
3170       HOST_WIDE_INT rat, off = 0;
3171       int old_cse_not_expected, width;
3172       unsigned sym_p, var_p, off_p, rat_p, add_c;
3173       rtx seq, addr, base;
3174       rtx reg0, reg1;
3175
3176       data = (address_cost_data) xcalloc (1, sizeof (*data));
3177
3178       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3179
3180       width = GET_MODE_BITSIZE (address_mode) - 1;
3181       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3182         width = HOST_BITS_PER_WIDE_INT - 1;
3183       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3184
3185       for (i = width; i >= 0; i--)
3186         {
3187           off = -((unsigned HOST_WIDE_INT) 1 << i);
3188           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3189           if (memory_address_addr_space_p (mem_mode, addr, as))
3190             break;
3191         }
3192       data->min_offset = (i == -1? 0 : off);
3193
3194       for (i = width; i >= 0; i--)
3195         {
3196           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3197           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3198           if (memory_address_addr_space_p (mem_mode, addr, as))
3199             break;
3200         }
3201       if (i == -1)
3202         off = 0;
3203       data->max_offset = off;
3204
3205       if (dump_file && (dump_flags & TDF_DETAILS))
3206         {
3207           fprintf (dump_file, "get_address_cost:\n");
3208           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3209                    GET_MODE_NAME (mem_mode),
3210                    data->min_offset);
3211           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3212                    GET_MODE_NAME (mem_mode),
3213                    data->max_offset);
3214         }
3215
3216       rat = 1;
3217       for (i = 2; i <= MAX_RATIO; i++)
3218         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3219           {
3220             rat = i;
3221             break;
3222           }
3223
3224       /* Compute the cost of various addressing modes.  */
3225       acost = 0;
3226       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3227       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3228
3229       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3230           || USE_STORE_PRE_DECREMENT (mem_mode))
3231         {
3232           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3233           has_predec[mem_mode]
3234             = memory_address_addr_space_p (mem_mode, addr, as);
3235         }
3236       if (USE_LOAD_POST_DECREMENT (mem_mode)
3237           || USE_STORE_POST_DECREMENT (mem_mode))
3238         {
3239           addr = gen_rtx_POST_DEC (address_mode, reg0);
3240           has_postdec[mem_mode]
3241             = memory_address_addr_space_p (mem_mode, addr, as);
3242         }
3243       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3244           || USE_STORE_PRE_DECREMENT (mem_mode))
3245         {
3246           addr = gen_rtx_PRE_INC (address_mode, reg0);
3247           has_preinc[mem_mode]
3248             = memory_address_addr_space_p (mem_mode, addr, as);
3249         }
3250       if (USE_LOAD_POST_INCREMENT (mem_mode)
3251           || USE_STORE_POST_INCREMENT (mem_mode))
3252         {
3253           addr = gen_rtx_POST_INC (address_mode, reg0);
3254           has_postinc[mem_mode]
3255             = memory_address_addr_space_p (mem_mode, addr, as);
3256         }
3257       for (i = 0; i < 16; i++)
3258         {
3259           sym_p = i & 1;
3260           var_p = (i >> 1) & 1;
3261           off_p = (i >> 2) & 1;
3262           rat_p = (i >> 3) & 1;
3263
3264           addr = reg0;
3265           if (rat_p)
3266             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3267                                    gen_int_mode (rat, address_mode));
3268
3269           if (var_p)
3270             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3271
3272           if (sym_p)
3273             {
3274               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3275               /* ??? We can run into trouble with some backends by presenting
3276                  it with symbols which haven't been properly passed through
3277                  targetm.encode_section_info.  By setting the local bit, we
3278                  enhance the probability of things working.  */
3279               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3280
3281               if (off_p)
3282                 base = gen_rtx_fmt_e (CONST, address_mode,
3283                                       gen_rtx_fmt_ee
3284                                         (PLUS, address_mode, base,
3285                                          gen_int_mode (off, address_mode)));
3286             }
3287           else if (off_p)
3288             base = gen_int_mode (off, address_mode);
3289           else
3290             base = NULL_RTX;
3291
3292           if (base)
3293             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3294
3295           start_sequence ();
3296           /* To avoid splitting addressing modes, pretend that no cse will
3297              follow.  */
3298           old_cse_not_expected = cse_not_expected;
3299           cse_not_expected = true;
3300           addr = memory_address_addr_space (mem_mode, addr, as);
3301           cse_not_expected = old_cse_not_expected;
3302           seq = get_insns ();
3303           end_sequence ();
3304
3305           acost = seq_cost (seq, speed);
3306           acost += address_cost (addr, mem_mode, as, speed);
3307
3308           if (!acost)
3309             acost = 1;
3310           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3311         }
3312
3313       /* On some targets, it is quite expensive to load symbol to a register,
3314          which makes addresses that contain symbols look much more expensive.
3315          However, the symbol will have to be loaded in any case before the
3316          loop (and quite likely we have it in register already), so it does not
3317          make much sense to penalize them too heavily.  So make some final
3318          tweaks for the SYMBOL_PRESENT modes:
3319
3320          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3321          var is cheaper, use this mode with small penalty.
3322          If VAR_PRESENT is true, try whether the mode with
3323          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3324          if this is the case, use it.  */
3325       add_c = add_cost (speed, address_mode);
3326       for (i = 0; i < 8; i++)
3327         {
3328           var_p = i & 1;
3329           off_p = (i >> 1) & 1;
3330           rat_p = (i >> 2) & 1;
3331
3332           acost = data->costs[0][1][off_p][rat_p] + 1;
3333           if (var_p)
3334             acost += add_c;
3335
3336           if (acost < data->costs[1][var_p][off_p][rat_p])
3337             data->costs[1][var_p][off_p][rat_p] = acost;
3338         }
3339
3340       if (dump_file && (dump_flags & TDF_DETAILS))
3341         {
3342           fprintf (dump_file, "Address costs:\n");
3343
3344           for (i = 0; i < 16; i++)
3345             {
3346               sym_p = i & 1;
3347               var_p = (i >> 1) & 1;
3348               off_p = (i >> 2) & 1;
3349               rat_p = (i >> 3) & 1;
3350
3351               fprintf (dump_file, "  ");
3352               if (sym_p)
3353                 fprintf (dump_file, "sym + ");
3354               if (var_p)
3355                 fprintf (dump_file, "var + ");
3356               if (off_p)
3357                 fprintf (dump_file, "cst + ");
3358               if (rat_p)
3359                 fprintf (dump_file, "rat * ");
3360
3361               acost = data->costs[sym_p][var_p][off_p][rat_p];
3362               fprintf (dump_file, "index costs %d\n", acost);
3363             }
3364           if (has_predec[mem_mode] || has_postdec[mem_mode]
3365               || has_preinc[mem_mode] || has_postinc[mem_mode])
3366             fprintf (dump_file, "  May include autoinc/dec\n");
3367           fprintf (dump_file, "\n");
3368         }
3369
3370       address_cost_data_list[data_index] = data;
3371     }
3372
3373   bits = GET_MODE_BITSIZE (address_mode);
3374   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3375   offset &= mask;
3376   if ((offset >> (bits - 1) & 1))
3377     offset |= ~mask;
3378   s_offset = offset;
3379
3380   autoinc = false;
3381   msize = GET_MODE_SIZE (mem_mode);
3382   autoinc_offset = offset;
3383   if (stmt_after_inc)
3384     autoinc_offset += ratio * cstep;
3385   if (symbol_present || var_present || ratio != 1)
3386     autoinc = false;
3387   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3388                && msize == cstep)
3389            || (has_postdec[mem_mode] && autoinc_offset == 0
3390                && msize == -cstep)
3391            || (has_preinc[mem_mode] && autoinc_offset == msize
3392                && msize == cstep)
3393            || (has_predec[mem_mode] && autoinc_offset == -msize
3394                && msize == -cstep))
3395     autoinc = true;
3396
3397   cost = 0;
3398   offset_p = (s_offset != 0
3399               && data->min_offset <= s_offset
3400               && s_offset <= data->max_offset);
3401   ratio_p = (ratio != 1
3402              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3403
3404   if (ratio != 1 && !ratio_p)
3405     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3406
3407   if (s_offset && !offset_p && !symbol_present)
3408     cost += add_cost (speed, address_mode);
3409
3410   if (may_autoinc)
3411     *may_autoinc = autoinc;
3412   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3413   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3414   return new_cost (cost + acost, complexity);
3415 }
3416
3417  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3418     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3419     calculating the operands of EXPR.  Returns true if successful, and returns
3420     the cost in COST.  */
3421
3422 static bool
3423 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3424                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3425 {
3426   comp_cost res;
3427   tree op1 = TREE_OPERAND (expr, 1);
3428   tree cst = TREE_OPERAND (mult, 1);
3429   tree multop = TREE_OPERAND (mult, 0);
3430   int m = exact_log2 (int_cst_value (cst));
3431   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3432   int sa_cost;
3433
3434   if (!(m >= 0 && m < maxm))
3435     return false;
3436
3437   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3438              ? shiftadd_cost (speed, mode, m)
3439              : (mult == op1
3440                 ? shiftsub1_cost (speed, mode, m)
3441                 : shiftsub0_cost (speed, mode, m)));
3442   res = new_cost (sa_cost, 0);
3443   res = add_costs (res, mult == op1 ? cost0 : cost1);
3444
3445   STRIP_NOPS (multop);
3446   if (!is_gimple_val (multop))
3447     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3448
3449   *cost = res;
3450   return true;
3451 }
3452
3453 /* Estimates cost of forcing expression EXPR into a variable.  */
3454
3455 static comp_cost
3456 force_expr_to_var_cost (tree expr, bool speed)
3457 {
3458   static bool costs_initialized = false;
3459   static unsigned integer_cost [2];
3460   static unsigned symbol_cost [2];
3461   static unsigned address_cost [2];
3462   tree op0, op1;
3463   comp_cost cost0, cost1, cost;
3464   enum machine_mode mode;
3465
3466   if (!costs_initialized)
3467     {
3468       tree type = build_pointer_type (integer_type_node);
3469       tree var, addr;
3470       rtx x;
3471       int i;
3472
3473       var = create_tmp_var_raw (integer_type_node, "test_var");
3474       TREE_STATIC (var) = 1;
3475       x = produce_memory_decl_rtl (var, NULL);
3476       SET_DECL_RTL (var, x);
3477
3478       addr = build1 (ADDR_EXPR, type, var);
3479
3480
3481       for (i = 0; i < 2; i++)
3482         {
3483           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3484                                                              2000), i);
3485
3486           symbol_cost[i] = computation_cost (addr, i) + 1;
3487
3488           address_cost[i]
3489             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3490           if (dump_file && (dump_flags & TDF_DETAILS))
3491             {
3492               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3493               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3494               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3495               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3496               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3497               fprintf (dump_file, "\n");
3498             }
3499         }
3500
3501       costs_initialized = true;
3502     }
3503
3504   STRIP_NOPS (expr);
3505
3506   if (SSA_VAR_P (expr))
3507     return no_cost;
3508
3509   if (is_gimple_min_invariant (expr))
3510     {
3511       if (TREE_CODE (expr) == INTEGER_CST)
3512         return new_cost (integer_cost [speed], 0);
3513
3514       if (TREE_CODE (expr) == ADDR_EXPR)
3515         {
3516           tree obj = TREE_OPERAND (expr, 0);
3517
3518           if (TREE_CODE (obj) == VAR_DECL
3519               || TREE_CODE (obj) == PARM_DECL
3520               || TREE_CODE (obj) == RESULT_DECL)
3521             return new_cost (symbol_cost [speed], 0);
3522         }
3523
3524       return new_cost (address_cost [speed], 0);
3525     }
3526
3527   switch (TREE_CODE (expr))
3528     {
3529     case POINTER_PLUS_EXPR:
3530     case PLUS_EXPR:
3531     case MINUS_EXPR:
3532     case MULT_EXPR:
3533       op0 = TREE_OPERAND (expr, 0);
3534       op1 = TREE_OPERAND (expr, 1);
3535       STRIP_NOPS (op0);
3536       STRIP_NOPS (op1);
3537
3538       if (is_gimple_val (op0))
3539         cost0 = no_cost;
3540       else
3541         cost0 = force_expr_to_var_cost (op0, speed);
3542
3543       if (is_gimple_val (op1))
3544         cost1 = no_cost;
3545       else
3546         cost1 = force_expr_to_var_cost (op1, speed);
3547
3548       break;
3549
3550     case NEGATE_EXPR:
3551       op0 = TREE_OPERAND (expr, 0);
3552       STRIP_NOPS (op0);
3553       op1 = NULL_TREE;
3554
3555       if (is_gimple_val (op0))
3556         cost0 = no_cost;
3557       else
3558         cost0 = force_expr_to_var_cost (op0, speed);
3559
3560       cost1 = no_cost;
3561       break;
3562
3563     default:
3564       /* Just an arbitrary value, FIXME.  */
3565       return new_cost (target_spill_cost[speed], 0);
3566     }
3567
3568   mode = TYPE_MODE (TREE_TYPE (expr));
3569   switch (TREE_CODE (expr))
3570     {
3571     case POINTER_PLUS_EXPR:
3572     case PLUS_EXPR:
3573     case MINUS_EXPR:
3574     case NEGATE_EXPR:
3575       cost = new_cost (add_cost (speed, mode), 0);
3576       if (TREE_CODE (expr) != NEGATE_EXPR)
3577         {
3578           tree mult = NULL_TREE;
3579           comp_cost sa_cost;
3580           if (TREE_CODE (op1) == MULT_EXPR)
3581             mult = op1;
3582           else if (TREE_CODE (op0) == MULT_EXPR)
3583             mult = op0;
3584
3585           if (mult != NULL_TREE
3586               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3587               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3588                                     speed, &sa_cost))
3589             return sa_cost;
3590         }
3591       break;
3592
3593     case MULT_EXPR:
3594       if (cst_and_fits_in_hwi (op0))
3595         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3596                                              mode, speed), 0);
3597       else if (cst_and_fits_in_hwi (op1))
3598         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3599                                              mode, speed), 0);
3600       else
3601         return new_cost (target_spill_cost [speed], 0);
3602       break;
3603
3604     default:
3605       gcc_unreachable ();
3606     }
3607
3608   cost = add_costs (cost, cost0);
3609   cost = add_costs (cost, cost1);
3610
3611   /* Bound the cost by target_spill_cost.  The parts of complicated
3612      computations often are either loop invariant or at least can
3613      be shared between several iv uses, so letting this grow without
3614      limits would not give reasonable results.  */
3615   if (cost.cost > (int) target_spill_cost [speed])
3616     cost.cost = target_spill_cost [speed];
3617
3618   return cost;
3619 }
3620
3621 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3622    invariants the computation depends on.  */
3623
3624 static comp_cost
3625 force_var_cost (struct ivopts_data *data,
3626                 tree expr, bitmap *depends_on)
3627 {
3628   if (depends_on)
3629     {
3630       fd_ivopts_data = data;
3631       walk_tree (&expr, find_depends, depends_on, NULL);
3632     }
3633
3634   return force_expr_to_var_cost (expr, data->speed);
3635 }
3636
3637 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3638    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3639    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3640    invariants the computation depends on.  */
3641
3642 static comp_cost
3643 split_address_cost (struct ivopts_data *data,
3644                     tree addr, bool *symbol_present, bool *var_present,
3645                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3646 {
3647   tree core;
3648   HOST_WIDE_INT bitsize;
3649   HOST_WIDE_INT bitpos;
3650   tree toffset;
3651   enum machine_mode mode;
3652   int unsignedp, volatilep;
3653
3654   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3655                               &unsignedp, &volatilep, false);
3656
3657   if (toffset != 0
3658       || bitpos % BITS_PER_UNIT != 0
3659       || TREE_CODE (core) != VAR_DECL)
3660     {
3661       *symbol_present = false;
3662       *var_present = true;
3663       fd_ivopts_data = data;
3664       walk_tree (&addr, find_depends, depends_on, NULL);
3665       return new_cost (target_spill_cost[data->speed], 0);
3666     }
3667
3668   *offset += bitpos / BITS_PER_UNIT;
3669   if (TREE_STATIC (core)
3670       || DECL_EXTERNAL (core))
3671     {
3672       *symbol_present = true;
3673       *var_present = false;
3674       return no_cost;
3675     }
3676
3677   *symbol_present = false;
3678   *var_present = true;
3679   return no_cost;
3680 }
3681
3682 /* Estimates cost of expressing difference of addresses E1 - E2 as
3683    var + symbol + offset.  The value of offset is added to OFFSET,
3684    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3685    part is missing.  DEPENDS_ON is a set of the invariants the computation
3686    depends on.  */
3687
3688 static comp_cost
3689 ptr_difference_cost (struct ivopts_data *data,
3690                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3691                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3692 {
3693   HOST_WIDE_INT diff = 0;
3694   aff_tree aff_e1, aff_e2;
3695   tree type;
3696
3697   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3698
3699   if (ptr_difference_const (e1, e2, &diff))
3700     {
3701       *offset += diff;
3702       *symbol_present = false;
3703       *var_present = false;
3704       return no_cost;
3705     }
3706
3707   if (integer_zerop (e2))
3708     return split_address_cost (data, TREE_OPERAND (e1, 0),
3709                                symbol_present, var_present, offset, depends_on);
3710
3711   *symbol_present = false;
3712   *var_present = true;
3713
3714   type = signed_type_for (TREE_TYPE (e1));
3715   tree_to_aff_combination (e1, type, &aff_e1);
3716   tree_to_aff_combination (e2, type, &aff_e2);
3717   aff_combination_scale (&aff_e2, double_int_minus_one);
3718   aff_combination_add (&aff_e1, &aff_e2);
3719
3720   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3721 }
3722
3723 /* Estimates cost of expressing difference E1 - E2 as
3724    var + symbol + offset.  The value of offset is added to OFFSET,
3725    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3726    part is missing.  DEPENDS_ON is a set of the invariants the computation
3727    depends on.  */
3728
3729 static comp_cost
3730 difference_cost (struct ivopts_data *data,
3731                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3732                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3733 {
3734   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3735   unsigned HOST_WIDE_INT off1, off2;
3736   aff_tree aff_e1, aff_e2;
3737   tree type;
3738
3739   e1 = strip_offset (e1, &off1);
3740   e2 = strip_offset (e2, &off2);
3741   *offset += off1 - off2;
3742
3743   STRIP_NOPS (e1);
3744   STRIP_NOPS (e2);
3745
3746   if (TREE_CODE (e1) == ADDR_EXPR)
3747     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3748                                 offset, depends_on);
3749   *symbol_present = false;
3750
3751   if (operand_equal_p (e1, e2, 0))
3752     {
3753       *var_present = false;
3754       return no_cost;
3755     }
3756
3757   *var_present = true;
3758
3759   if (integer_zerop (e2))
3760     return force_var_cost (data, e1, depends_on);
3761
3762   if (integer_zerop (e1))
3763     {
3764       comp_cost cost = force_var_cost (data, e2, depends_on);
3765       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3766       return cost;
3767     }
3768
3769   type = signed_type_for (TREE_TYPE (e1));
3770   tree_to_aff_combination (e1, type, &aff_e1);
3771   tree_to_aff_combination (e2, type, &aff_e2);
3772   aff_combination_scale (&aff_e2, double_int_minus_one);
3773   aff_combination_add (&aff_e1, &aff_e2);
3774
3775   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3776 }
3777
3778 /* Returns true if AFF1 and AFF2 are identical.  */
3779
3780 static bool
3781 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3782 {
3783   unsigned i;
3784
3785   if (aff1->n != aff2->n)
3786     return false;
3787
3788   for (i = 0; i < aff1->n; i++)
3789     {
3790       if (aff1->elts[i].coef != aff2->elts[i].coef)
3791         return false;
3792
3793       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3794         return false;
3795     }
3796   return true;
3797 }
3798
3799 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3800
3801 static int
3802 get_expr_id (struct ivopts_data *data, tree expr)
3803 {
3804   struct iv_inv_expr_ent ent;
3805   struct iv_inv_expr_ent **slot;
3806
3807   ent.expr = expr;
3808   ent.hash = iterative_hash_expr (expr, 0);
3809   slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3810                                                      &ent, INSERT);
3811   if (*slot)
3812     return (*slot)->id;
3813
3814   *slot = XNEW (struct iv_inv_expr_ent);
3815   (*slot)->expr = expr;
3816   (*slot)->hash = ent.hash;
3817   (*slot)->id = data->inv_expr_id++;
3818   return (*slot)->id;
3819 }
3820
3821 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3822    requires a new compiler generated temporary.  Returns -1 otherwise.
3823    ADDRESS_P is a flag indicating if the expression is for address
3824    computation.  */
3825
3826 static int
3827 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3828                             tree cbase, HOST_WIDE_INT ratio,
3829                             bool address_p)
3830 {
3831   aff_tree ubase_aff, cbase_aff;
3832   tree expr, ub, cb;
3833
3834   STRIP_NOPS (ubase);
3835   STRIP_NOPS (cbase);
3836   ub = ubase;
3837   cb = cbase;
3838
3839   if ((TREE_CODE (ubase) == INTEGER_CST)
3840       && (TREE_CODE (cbase) == INTEGER_CST))
3841     return -1;
3842
3843   /* Strips the constant part. */
3844   if (TREE_CODE (ubase) == PLUS_EXPR
3845       || TREE_CODE (ubase) == MINUS_EXPR
3846       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3847     {
3848       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3849         ubase = TREE_OPERAND (ubase, 0);
3850     }
3851
3852   /* Strips the constant part. */
3853   if (TREE_CODE (cbase) == PLUS_EXPR
3854       || TREE_CODE (cbase) == MINUS_EXPR
3855       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3856     {
3857       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3858         cbase = TREE_OPERAND (cbase, 0);
3859     }
3860
3861   if (address_p)
3862     {
3863       if (((TREE_CODE (ubase) == SSA_NAME)
3864            || (TREE_CODE (ubase) == ADDR_EXPR
3865                && is_gimple_min_invariant (ubase)))
3866           && (TREE_CODE (cbase) == INTEGER_CST))
3867         return -1;
3868
3869       if (((TREE_CODE (cbase) == SSA_NAME)
3870            || (TREE_CODE (cbase) == ADDR_EXPR
3871                && is_gimple_min_invariant (cbase)))
3872           && (TREE_CODE (ubase) == INTEGER_CST))
3873         return -1;
3874     }
3875
3876   if (ratio == 1)
3877     {
3878       if(operand_equal_p (ubase, cbase, 0))
3879         return -1;
3880
3881       if (TREE_CODE (ubase) == ADDR_EXPR
3882           && TREE_CODE (cbase) == ADDR_EXPR)
3883         {
3884           tree usym, csym;
3885
3886           usym = TREE_OPERAND (ubase, 0);
3887           csym = TREE_OPERAND (cbase, 0);
3888           if (TREE_CODE (usym) == ARRAY_REF)
3889             {
3890               tree ind = TREE_OPERAND (usym, 1);
3891               if (TREE_CODE (ind) == INTEGER_CST
3892                   && host_integerp (ind, 0)
3893                   && TREE_INT_CST_LOW (ind) == 0)
3894                 usym = TREE_OPERAND (usym, 0);
3895             }
3896           if (TREE_CODE (csym) == ARRAY_REF)
3897             {
3898               tree ind = TREE_OPERAND (csym, 1);
3899               if (TREE_CODE (ind) == INTEGER_CST
3900                   && host_integerp (ind, 0)
3901                   && TREE_INT_CST_LOW (ind) == 0)
3902                 csym = TREE_OPERAND (csym, 0);
3903             }
3904           if (operand_equal_p (usym, csym, 0))
3905             return -1;
3906         }
3907       /* Now do more complex comparison  */
3908       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3909       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3910       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3911         return -1;
3912     }
3913
3914   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3915   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3916
3917   aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
3918   aff_combination_add (&ubase_aff, &cbase_aff);
3919   expr = aff_combination_to_tree (&ubase_aff);
3920   return get_expr_id (data, expr);
3921 }
3922
3923
3924
3925 /* Determines the cost of the computation by that USE is expressed
3926    from induction variable CAND.  If ADDRESS_P is true, we just need
3927    to create an address from it, otherwise we want to get it into
3928    register.  A set of invariants we depend on is stored in
3929    DEPENDS_ON.  AT is the statement at that the value is computed.
3930    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3931    addressing is likely.  */
3932
3933 static comp_cost
3934 get_computation_cost_at (struct ivopts_data *data,
3935                          struct iv_use *use, struct iv_cand *cand,
3936                          bool address_p, bitmap *depends_on, gimple at,
3937                          bool *can_autoinc,
3938                          int *inv_expr_id)
3939 {
3940   tree ubase = use->iv->base, ustep = use->iv->step;
3941   tree cbase, cstep;
3942   tree utype = TREE_TYPE (ubase), ctype;
3943   unsigned HOST_WIDE_INT cstepi, offset = 0;
3944   HOST_WIDE_INT ratio, aratio;
3945   bool var_present, symbol_present, stmt_is_after_inc;
3946   comp_cost cost;
3947   double_int rat;
3948   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3949   enum machine_mode mem_mode = (address_p
3950                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
3951                                 : VOIDmode);
3952
3953   *depends_on = NULL;
3954
3955   /* Only consider real candidates.  */
3956   if (!cand->iv)
3957     return infinite_cost;
3958
3959   cbase = cand->iv->base;
3960   cstep = cand->iv->step;
3961   ctype = TREE_TYPE (cbase);
3962
3963   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3964     {
3965       /* We do not have a precision to express the values of use.  */
3966       return infinite_cost;
3967     }
3968
3969   if (address_p
3970       || (use->iv->base_object
3971           && cand->iv->base_object
3972           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
3973           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
3974     {
3975       /* Do not try to express address of an object with computation based
3976          on address of a different object.  This may cause problems in rtl
3977          level alias analysis (that does not expect this to be happening,
3978          as this is illegal in C), and would be unlikely to be useful
3979          anyway.  */
3980       if (use->iv->base_object
3981           && cand->iv->base_object
3982           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3983         return infinite_cost;
3984     }
3985
3986   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3987     {
3988       /* TODO -- add direct handling of this case.  */
3989       goto fallback;
3990     }
3991
3992   /* CSTEPI is removed from the offset in case statement is after the
3993      increment.  If the step is not constant, we use zero instead.
3994      This is a bit imprecise (there is the extra addition), but
3995      redundancy elimination is likely to transform the code so that
3996      it uses value of the variable before increment anyway,
3997      so it is not that much unrealistic.  */
3998   if (cst_and_fits_in_hwi (cstep))
3999     cstepi = int_cst_value (cstep);
4000   else
4001     cstepi = 0;
4002
4003   if (!constant_multiple_of (ustep, cstep, &rat))
4004     return infinite_cost;
4005
4006   if (rat.fits_shwi ())
4007     ratio = rat.to_shwi ();
4008   else
4009     return infinite_cost;
4010
4011   STRIP_NOPS (cbase);
4012   ctype = TREE_TYPE (cbase);
4013
4014   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4015
4016   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4017      or ratio == 1, it is better to handle this like
4018
4019      ubase - ratio * cbase + ratio * var
4020
4021      (also holds in the case ratio == -1, TODO.  */
4022
4023   if (cst_and_fits_in_hwi (cbase))
4024     {
4025       offset = - ratio * int_cst_value (cbase);
4026       cost = difference_cost (data,
4027                               ubase, build_int_cst (utype, 0),
4028                               &symbol_present, &var_present, &offset,
4029                               depends_on);
4030       cost.cost /= avg_loop_niter (data->current_loop);
4031     }
4032   else if (ratio == 1)
4033     {
4034       tree real_cbase = cbase;
4035
4036       /* Check to see if any adjustment is needed.  */
4037       if (cstepi == 0 && stmt_is_after_inc)
4038         {
4039           aff_tree real_cbase_aff;
4040           aff_tree cstep_aff;
4041
4042           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4043                                    &real_cbase_aff);
4044           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4045
4046           aff_combination_add (&real_cbase_aff, &cstep_aff);
4047           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4048         }
4049
4050       cost = difference_cost (data,
4051                               ubase, real_cbase,
4052                               &symbol_present, &var_present, &offset,
4053                               depends_on);
4054       cost.cost /= avg_loop_niter (data->current_loop);
4055     }
4056   else if (address_p
4057            && !POINTER_TYPE_P (ctype)
4058            && multiplier_allowed_in_address_p
4059                 (ratio, mem_mode,
4060                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4061     {
4062       cbase
4063         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4064       cost = difference_cost (data,
4065                               ubase, cbase,
4066                               &symbol_present, &var_present, &offset,
4067                               depends_on);
4068       cost.cost /= avg_loop_niter (data->current_loop);
4069     }
4070   else
4071     {
4072       cost = force_var_cost (data, cbase, depends_on);
4073       cost = add_costs (cost,
4074                         difference_cost (data,
4075                                          ubase, build_int_cst (utype, 0),
4076                                          &symbol_present, &var_present,
4077                                          &offset, depends_on));
4078       cost.cost /= avg_loop_niter (data->current_loop);
4079       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4080     }
4081
4082   if (inv_expr_id)
4083     {
4084       *inv_expr_id =
4085           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4086       /* Clear depends on.  */
4087       if (*inv_expr_id != -1 && depends_on && *depends_on)
4088         bitmap_clear (*depends_on);
4089     }
4090
4091   /* If we are after the increment, the value of the candidate is higher by
4092      one iteration.  */
4093   if (stmt_is_after_inc)
4094     offset -= ratio * cstepi;
4095
4096   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4097      (symbol/var1/const parts may be omitted).  If we are looking for an
4098      address, find the cost of addressing this.  */
4099   if (address_p)
4100     return add_costs (cost,
4101                       get_address_cost (symbol_present, var_present,
4102                                         offset, ratio, cstepi,
4103                                         mem_mode,
4104                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4105                                         speed, stmt_is_after_inc,
4106                                         can_autoinc));
4107
4108   /* Otherwise estimate the costs for computing the expression.  */
4109   if (!symbol_present && !var_present && !offset)
4110     {
4111       if (ratio != 1)
4112         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4113       return cost;
4114     }
4115
4116   /* Symbol + offset should be compile-time computable so consider that they
4117       are added once to the variable, if present.  */
4118   if (var_present && (symbol_present || offset))
4119     cost.cost += adjust_setup_cost (data,
4120                                     add_cost (speed, TYPE_MODE (ctype)));
4121
4122   /* Having offset does not affect runtime cost in case it is added to
4123      symbol, but it increases complexity.  */
4124   if (offset)
4125     cost.complexity++;
4126
4127   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4128
4129   aratio = ratio > 0 ? ratio : -ratio;
4130   if (aratio != 1)
4131     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4132   return cost;
4133
4134 fallback:
4135   if (can_autoinc)
4136     *can_autoinc = false;
4137
4138   {
4139     /* Just get the expression, expand it and measure the cost.  */
4140     tree comp = get_computation_at (data->current_loop, use, cand, at);
4141
4142     if (!comp)
4143       return infinite_cost;
4144
4145     if (address_p)
4146       comp = build_simple_mem_ref (comp);
4147
4148     return new_cost (computation_cost (comp, speed), 0);
4149   }
4150 }
4151
4152 /* Determines the cost of the computation by that USE is expressed
4153    from induction variable CAND.  If ADDRESS_P is true, we just need
4154    to create an address from it, otherwise we want to get it into
4155    register.  A set of invariants we depend on is stored in
4156    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4157    autoinc addressing is likely.  */
4158
4159 static comp_cost
4160 get_computation_cost (struct ivopts_data *data,
4161                       struct iv_use *use, struct iv_cand *cand,
4162                       bool address_p, bitmap *depends_on,
4163                       bool *can_autoinc, int *inv_expr_id)
4164 {
4165   return get_computation_cost_at (data,
4166                                   use, cand, address_p, depends_on, use->stmt,
4167                                   can_autoinc, inv_expr_id);
4168 }
4169
4170 /* Determines cost of basing replacement of USE on CAND in a generic
4171    expression.  */
4172
4173 static bool
4174 determine_use_iv_cost_generic (struct ivopts_data *data,
4175                                struct iv_use *use, struct iv_cand *cand)
4176 {
4177   bitmap depends_on;
4178   comp_cost cost;
4179   int inv_expr_id = -1;
4180
4181   /* The simple case first -- if we need to express value of the preserved
4182      original biv, the cost is 0.  This also prevents us from counting the
4183      cost of increment twice -- once at this use and once in the cost of
4184      the candidate.  */
4185   if (cand->pos == IP_ORIGINAL
4186       && cand->incremented_at == use->stmt)
4187     {
4188       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4189                        ERROR_MARK, -1);
4190       return true;
4191     }
4192
4193   cost = get_computation_cost (data, use, cand, false, &depends_on,
4194                                NULL, &inv_expr_id);
4195
4196   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4197                    inv_expr_id);
4198
4199   return !infinite_cost_p (cost);
4200 }
4201
4202 /* Determines cost of basing replacement of USE on CAND in an address.  */
4203
4204 static bool
4205 determine_use_iv_cost_address (struct ivopts_data *data,
4206                                struct iv_use *use, struct iv_cand *cand)
4207 {
4208   bitmap depends_on;
4209   bool can_autoinc;
4210   int inv_expr_id = -1;
4211   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4212                                          &can_autoinc, &inv_expr_id);
4213
4214   if (cand->ainc_use == use)
4215     {
4216       if (can_autoinc)
4217         cost.cost -= cand->cost_step;
4218       /* If we generated the candidate solely for exploiting autoincrement
4219          opportunities, and it turns out it can't be used, set the cost to
4220          infinity to make sure we ignore it.  */
4221       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4222         cost = infinite_cost;
4223     }
4224   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4225                    inv_expr_id);
4226
4227   return !infinite_cost_p (cost);
4228 }
4229
4230 /* Computes value of candidate CAND at position AT in iteration NITER, and
4231    stores it to VAL.  */
4232
4233 static void
4234 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4235                aff_tree *val)
4236 {
4237   aff_tree step, delta, nit;
4238   struct iv *iv = cand->iv;
4239   tree type = TREE_TYPE (iv->base);
4240   tree steptype = type;
4241   if (POINTER_TYPE_P (type))
4242     steptype = sizetype;
4243
4244   tree_to_aff_combination (iv->step, steptype, &step);
4245   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4246   aff_combination_convert (&nit, steptype);
4247   aff_combination_mult (&nit, &step, &delta);
4248   if (stmt_after_increment (loop, cand, at))
4249     aff_combination_add (&delta, &step);
4250
4251   tree_to_aff_combination (iv->base, type, val);
4252   aff_combination_add (val, &delta);
4253 }
4254
4255 /* Returns period of induction variable iv.  */
4256
4257 static tree
4258 iv_period (struct iv *iv)
4259 {
4260   tree step = iv->step, period, type;
4261   tree pow2div;
4262
4263   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4264
4265   type = unsigned_type_for (TREE_TYPE (step));
4266   /* Period of the iv is lcm (step, type_range)/step -1,
4267      i.e., N*type_range/step - 1. Since type range is power
4268      of two, N == (step >> num_of_ending_zeros_binary (step),
4269      so the final result is
4270
4271        (type_range >> num_of_ending_zeros_binary (step)) - 1
4272
4273   */
4274   pow2div = num_ending_zeros (step);
4275
4276   period = build_low_bits_mask (type,
4277                                 (TYPE_PRECISION (type)
4278                                  - tree_low_cst (pow2div, 1)));
4279
4280   return period;
4281 }
4282
4283 /* Returns the comparison operator used when eliminating the iv USE.  */
4284
4285 static enum tree_code
4286 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4287 {
4288   struct loop *loop = data->current_loop;
4289   basic_block ex_bb;
4290   edge exit;
4291
4292   ex_bb = gimple_bb (use->stmt);
4293   exit = EDGE_SUCC (ex_bb, 0);
4294   if (flow_bb_inside_loop_p (loop, exit->dest))
4295     exit = EDGE_SUCC (ex_bb, 1);
4296
4297   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4298 }
4299
4300 static tree
4301 strip_wrap_conserving_type_conversions (tree exp)
4302 {
4303   while (tree_ssa_useless_type_conversion (exp)
4304          && (nowrap_type_p (TREE_TYPE (exp))
4305              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4306     exp = TREE_OPERAND (exp, 0);
4307   return exp;
4308 }
4309
4310 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4311    check for an exact match.  */
4312
4313 static bool
4314 expr_equal_p (tree e, tree what)
4315 {
4316   gimple stmt;
4317   enum tree_code code;
4318
4319   e = strip_wrap_conserving_type_conversions (e);
4320   what = strip_wrap_conserving_type_conversions (what);
4321
4322   code = TREE_CODE (what);
4323   if (TREE_TYPE (e) != TREE_TYPE (what))
4324     return false;
4325
4326   if (operand_equal_p (e, what, 0))
4327     return true;
4328
4329   if (TREE_CODE (e) != SSA_NAME)
4330     return false;
4331
4332   stmt = SSA_NAME_DEF_STMT (e);
4333   if (gimple_code (stmt) != GIMPLE_ASSIGN
4334       || gimple_assign_rhs_code (stmt) != code)
4335     return false;
4336
4337   switch (get_gimple_rhs_class (code))
4338     {
4339     case GIMPLE_BINARY_RHS:
4340       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4341         return false;
4342       /* Fallthru.  */
4343
4344     case GIMPLE_UNARY_RHS:
4345     case GIMPLE_SINGLE_RHS:
4346       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4347     default:
4348       return false;
4349     }
4350 }
4351
4352 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4353    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4354    calculation is performed in non-wrapping type.
4355
4356    TODO: More generally, we could test for the situation that
4357          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4358          This would require knowing the sign of OFFSET.
4359
4360          Also, we only look for the first addition in the computation of BASE.
4361          More complex analysis would be better, but introducing it just for
4362          this optimization seems like an overkill.  */
4363
4364 static bool
4365 difference_cannot_overflow_p (tree base, tree offset)
4366 {
4367   enum tree_code code;
4368   tree e1, e2;
4369
4370   if (!nowrap_type_p (TREE_TYPE (base)))
4371     return false;
4372
4373   base = expand_simple_operations (base);
4374
4375   if (TREE_CODE (base) == SSA_NAME)
4376     {
4377       gimple stmt = SSA_NAME_DEF_STMT (base);
4378
4379       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4380         return false;
4381
4382       code = gimple_assign_rhs_code (stmt);
4383       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4384         return false;
4385
4386       e1 = gimple_assign_rhs1 (stmt);
4387       e2 = gimple_assign_rhs2 (stmt);
4388     }
4389   else
4390     {
4391       code = TREE_CODE (base);
4392       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4393         return false;
4394       e1 = TREE_OPERAND (base, 0);
4395       e2 = TREE_OPERAND (base, 1);
4396     }
4397
4398   /* TODO: deeper inspection may be necessary to prove the equality.  */
4399   switch (code)
4400     {
4401     case PLUS_EXPR:
4402       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4403     case POINTER_PLUS_EXPR:
4404       return expr_equal_p (e2, offset);
4405
4406     default:
4407       return false;
4408     }
4409 }
4410
4411 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4412    comparison with CAND.  NITER describes the number of iterations of
4413    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4414
4415    We aim to handle the following situation:
4416
4417    sometype *base, *p;
4418    int a, b, i;
4419
4420    i = a;
4421    p = p_0 = base + a;
4422
4423    do
4424      {
4425        bla (*p);
4426        p++;
4427        i++;
4428      }
4429    while (i < b);
4430
4431    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4432    We aim to optimize this to
4433
4434    p = p_0 = base + a;
4435    do
4436      {
4437        bla (*p);
4438        p++;
4439      }
4440    while (p < p_0 - a + b);
4441
4442    This preserves the correctness, since the pointer arithmetics does not
4443    overflow.  More precisely:
4444
4445    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4446       overflow in computing it or the values of p.
4447    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4448       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4449
4450 static bool
4451 iv_elimination_compare_lt (struct ivopts_data *data,
4452                            struct iv_cand *cand, enum tree_code *comp_p,
4453                            struct tree_niter_desc *niter)
4454 {
4455   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4456   struct affine_tree_combination nit, tmpa, tmpb;
4457   enum tree_code comp;
4458   HOST_WIDE_INT step;
4459
4460   /* We need to know that the candidate induction variable does not overflow.
4461      While more complex analysis may be used to prove this, for now just
4462      check that the variable appears in the original program and that it
4463      is computed in a type that guarantees no overflows.  */
4464   cand_type = TREE_TYPE (cand->iv->base);
4465   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4466     return false;
4467
4468   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4469      the calculation of the BOUND could overflow, making the comparison
4470      invalid.  */
4471   if (!data->loop_single_exit_p)
4472     return false;
4473
4474   /* We need to be able to decide whether candidate is increasing or decreasing
4475      in order to choose the right comparison operator.  */
4476   if (!cst_and_fits_in_hwi (cand->iv->step))
4477     return false;
4478   step = int_cst_value (cand->iv->step);
4479
4480   /* Check that the number of iterations matches the expected pattern:
4481      a + 1 > b ? 0 : b - a - 1.  */
4482   mbz = niter->may_be_zero;
4483   if (TREE_CODE (mbz) == GT_EXPR)
4484     {
4485       /* Handle a + 1 > b.  */
4486       tree op0 = TREE_OPERAND (mbz, 0);
4487       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4488         {
4489           a = TREE_OPERAND (op0, 0);
4490           b = TREE_OPERAND (mbz, 1);
4491         }
4492       else
4493         return false;
4494     }
4495   else if (TREE_CODE (mbz) == LT_EXPR)
4496     {
4497       tree op1 = TREE_OPERAND (mbz, 1);
4498
4499       /* Handle b < a + 1.  */
4500       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4501         {
4502           a = TREE_OPERAND (op1, 0);
4503           b = TREE_OPERAND (mbz, 0);
4504         }
4505       else
4506         return false;
4507     }
4508   else
4509     return false;
4510
4511   /* Expected number of iterations is B - A - 1.  Check that it matches
4512      the actual number, i.e., that B - A - NITER = 1.  */
4513   tree_to_aff_combination (niter->niter, nit_type, &nit);
4514   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4515   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4516   aff_combination_scale (&nit, double_int_minus_one);
4517   aff_combination_scale (&tmpa, double_int_minus_one);
4518   aff_combination_add (&tmpb, &tmpa);
4519   aff_combination_add (&tmpb, &nit);
4520   if (tmpb.n != 0 || tmpb.offset != double_int_one)
4521     return false;
4522
4523   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4524      overflow.  */
4525   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4526                         cand->iv->step,
4527                         fold_convert (TREE_TYPE (cand->iv->step), a));
4528   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4529     return false;
4530
4531   /* Determine the new comparison operator.  */
4532   comp = step < 0 ? GT_EXPR : LT_EXPR;
4533   if (*comp_p == NE_EXPR)
4534     *comp_p = comp;
4535   else if (*comp_p == EQ_EXPR)
4536     *comp_p = invert_tree_comparison (comp, false);
4537   else
4538     gcc_unreachable ();
4539
4540   return true;
4541 }
4542
4543 /* Check whether it is possible to express the condition in USE by comparison
4544    of candidate CAND.  If so, store the value compared with to BOUND, and the
4545    comparison operator to COMP.  */
4546
4547 static bool
4548 may_eliminate_iv (struct ivopts_data *data,
4549                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4550                   enum tree_code *comp)
4551 {
4552   basic_block ex_bb;
4553   edge exit;
4554   tree period;
4555   struct loop *loop = data->current_loop;
4556   aff_tree bnd;
4557   struct tree_niter_desc *desc = NULL;
4558
4559   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4560     return false;
4561
4562   /* For now works only for exits that dominate the loop latch.
4563      TODO: extend to other conditions inside loop body.  */
4564   ex_bb = gimple_bb (use->stmt);
4565   if (use->stmt != last_stmt (ex_bb)
4566       || gimple_code (use->stmt) != GIMPLE_COND
4567       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4568     return false;
4569
4570   exit = EDGE_SUCC (ex_bb, 0);
4571   if (flow_bb_inside_loop_p (loop, exit->dest))
4572     exit = EDGE_SUCC (ex_bb, 1);
4573   if (flow_bb_inside_loop_p (loop, exit->dest))
4574     return false;
4575
4576   desc = niter_for_exit (data, exit);
4577   if (!desc)
4578     return false;
4579
4580   /* Determine whether we can use the variable to test the exit condition.
4581      This is the case iff the period of the induction variable is greater
4582      than the number of iterations for which the exit condition is true.  */
4583   period = iv_period (cand->iv);
4584
4585   /* If the number of iterations is constant, compare against it directly.  */
4586   if (TREE_CODE (desc->niter) == INTEGER_CST)
4587     {
4588       /* See cand_value_at.  */
4589       if (stmt_after_increment (loop, cand, use->stmt))
4590         {
4591           if (!tree_int_cst_lt (desc->niter, period))
4592             return false;
4593         }
4594       else
4595         {
4596           if (tree_int_cst_lt (period, desc->niter))
4597             return false;
4598         }
4599     }
4600
4601   /* If not, and if this is the only possible exit of the loop, see whether
4602      we can get a conservative estimate on the number of iterations of the
4603      entire loop and compare against that instead.  */
4604   else
4605     {
4606       double_int period_value, max_niter;
4607
4608       max_niter = desc->max;
4609       if (stmt_after_increment (loop, cand, use->stmt))
4610         max_niter += double_int_one;
4611       period_value = tree_to_double_int (period);
4612       if (max_niter.ugt (period_value))
4613         {
4614           /* See if we can take advantage of inferred loop bound information.  */
4615           if (data->loop_single_exit_p)
4616             {
4617               if (!max_loop_iterations (loop, &max_niter))
4618                 return false;
4619               /* The loop bound is already adjusted by adding 1.  */
4620               if (max_niter.ugt (period_value))
4621                 return false;
4622             }
4623           else
4624             return false;
4625         }
4626     }
4627
4628   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4629
4630   *bound = aff_combination_to_tree (&bnd);
4631   *comp = iv_elimination_compare (data, use);
4632
4633   /* It is unlikely that computing the number of iterations using division
4634      would be more profitable than keeping the original induction variable.  */
4635   if (expression_expensive_p (*bound))
4636     return false;
4637
4638   /* Sometimes, it is possible to handle the situation that the number of
4639      iterations may be zero unless additional assumtions by using <
4640      instead of != in the exit condition.
4641
4642      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4643            base the exit condition on it.  However, that is often too
4644            expensive.  */
4645   if (!integer_zerop (desc->may_be_zero))
4646     return iv_elimination_compare_lt (data, cand, comp, desc);
4647
4648   return true;
4649 }
4650
4651  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4652     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4653
4654 static int
4655 parm_decl_cost (struct ivopts_data *data, tree bound)
4656 {
4657   tree sbound = bound;
4658   STRIP_NOPS (sbound);
4659
4660   if (TREE_CODE (sbound) == SSA_NAME
4661       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4662       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4663       && data->body_includes_call)
4664     return COSTS_N_INSNS (1);
4665
4666   return 0;
4667 }
4668
4669 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4670
4671 static bool
4672 determine_use_iv_cost_condition (struct ivopts_data *data,
4673                                  struct iv_use *use, struct iv_cand *cand)
4674 {
4675   tree bound = NULL_TREE;
4676   struct iv *cmp_iv;
4677   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4678   comp_cost elim_cost, express_cost, cost, bound_cost;
4679   bool ok;
4680   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4681   tree *control_var, *bound_cst;
4682   enum tree_code comp = ERROR_MARK;
4683
4684   /* Only consider real candidates.  */
4685   if (!cand->iv)
4686     {
4687       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4688                        ERROR_MARK, -1);
4689       return false;
4690     }
4691
4692   /* Try iv elimination.  */
4693   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4694     {
4695       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4696       if (elim_cost.cost == 0)
4697         elim_cost.cost = parm_decl_cost (data, bound);
4698       else if (TREE_CODE (bound) == INTEGER_CST)
4699         elim_cost.cost = 0;
4700       /* If we replace a loop condition 'i < n' with 'p < base + n',
4701          depends_on_elim will have 'base' and 'n' set, which implies
4702          that both 'base' and 'n' will be live during the loop.  More likely,
4703          'base + n' will be loop invariant, resulting in only one live value
4704          during the loop.  So in that case we clear depends_on_elim and set
4705         elim_inv_expr_id instead.  */
4706       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4707         {
4708           elim_inv_expr_id = get_expr_id (data, bound);
4709           bitmap_clear (depends_on_elim);
4710         }
4711       /* The bound is a loop invariant, so it will be only computed
4712          once.  */
4713       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4714     }
4715   else
4716     elim_cost = infinite_cost;
4717
4718   /* Try expressing the original giv.  If it is compared with an invariant,
4719      note that we cannot get rid of it.  */
4720   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4721                               NULL, &cmp_iv);
4722   gcc_assert (ok);
4723
4724   /* When the condition is a comparison of the candidate IV against
4725      zero, prefer this IV.
4726
4727      TODO: The constant that we're subtracting from the cost should
4728      be target-dependent.  This information should be added to the
4729      target costs for each backend.  */
4730   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4731       && integer_zerop (*bound_cst)
4732       && (operand_equal_p (*control_var, cand->var_after, 0)
4733           || operand_equal_p (*control_var, cand->var_before, 0)))
4734     elim_cost.cost -= 1;
4735
4736   express_cost = get_computation_cost (data, use, cand, false,
4737                                        &depends_on_express, NULL,
4738                                        &express_inv_expr_id);
4739   fd_ivopts_data = data;
4740   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4741
4742   /* Count the cost of the original bound as well.  */
4743   bound_cost = force_var_cost (data, *bound_cst, NULL);
4744   if (bound_cost.cost == 0)
4745     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4746   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4747     bound_cost.cost = 0;
4748   express_cost.cost += bound_cost.cost;
4749
4750   /* Choose the better approach, preferring the eliminated IV. */
4751   if (compare_costs (elim_cost, express_cost) <= 0)
4752     {
4753       cost = elim_cost;
4754       depends_on = depends_on_elim;
4755       depends_on_elim = NULL;
4756       inv_expr_id = elim_inv_expr_id;
4757     }
4758   else
4759     {
4760       cost = express_cost;
4761       depends_on = depends_on_express;
4762       depends_on_express = NULL;
4763       bound = NULL_TREE;
4764       comp = ERROR_MARK;
4765       inv_expr_id = express_inv_expr_id;
4766     }
4767
4768   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4769
4770   if (depends_on_elim)
4771     BITMAP_FREE (depends_on_elim);
4772   if (depends_on_express)
4773     BITMAP_FREE (depends_on_express);
4774
4775   return !infinite_cost_p (cost);
4776 }
4777
4778 /* Determines cost of basing replacement of USE on CAND.  Returns false
4779    if USE cannot be based on CAND.  */
4780
4781 static bool
4782 determine_use_iv_cost (struct ivopts_data *data,
4783                        struct iv_use *use, struct iv_cand *cand)
4784 {
4785   switch (use->type)
4786     {
4787     case USE_NONLINEAR_EXPR:
4788       return determine_use_iv_cost_generic (data, use, cand);
4789
4790     case USE_ADDRESS:
4791       return determine_use_iv_cost_address (data, use, cand);
4792
4793     case USE_COMPARE:
4794       return determine_use_iv_cost_condition (data, use, cand);
4795
4796     default:
4797       gcc_unreachable ();
4798     }
4799 }
4800
4801 /* Return true if get_computation_cost indicates that autoincrement is
4802    a possibility for the pair of USE and CAND, false otherwise.  */
4803
4804 static bool
4805 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4806                            struct iv_cand *cand)
4807 {
4808   bitmap depends_on;
4809   bool can_autoinc;
4810   comp_cost cost;
4811
4812   if (use->type != USE_ADDRESS)
4813     return false;
4814
4815   cost = get_computation_cost (data, use, cand, true, &depends_on,
4816                                &can_autoinc, NULL);
4817
4818   BITMAP_FREE (depends_on);
4819
4820   return !infinite_cost_p (cost) && can_autoinc;
4821 }
4822
4823 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4824    use that allows autoincrement, and set their AINC_USE if possible.  */
4825
4826 static void
4827 set_autoinc_for_original_candidates (struct ivopts_data *data)
4828 {
4829   unsigned i, j;
4830
4831   for (i = 0; i < n_iv_cands (data); i++)
4832     {
4833       struct iv_cand *cand = iv_cand (data, i);
4834       struct iv_use *closest = NULL;
4835       if (cand->pos != IP_ORIGINAL)
4836         continue;
4837       for (j = 0; j < n_iv_uses (data); j++)
4838         {
4839           struct iv_use *use = iv_use (data, j);
4840           unsigned uid = gimple_uid (use->stmt);
4841           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4842               || uid > gimple_uid (cand->incremented_at))
4843             continue;
4844           if (closest == NULL || uid > gimple_uid (closest->stmt))
4845             closest = use;
4846         }
4847       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4848         continue;
4849       cand->ainc_use = closest;
4850     }
4851 }
4852
4853 /* Finds the candidates for the induction variables.  */
4854
4855 static void
4856 find_iv_candidates (struct ivopts_data *data)
4857 {
4858   /* Add commonly used ivs.  */
4859   add_standard_iv_candidates (data);
4860
4861   /* Add old induction variables.  */
4862   add_old_ivs_candidates (data);
4863
4864   /* Add induction variables derived from uses.  */
4865   add_derived_ivs_candidates (data);
4866
4867   set_autoinc_for_original_candidates (data);
4868
4869   /* Record the important candidates.  */
4870   record_important_candidates (data);
4871 }
4872
4873 /* Determines costs of basing the use of the iv on an iv candidate.  */
4874
4875 static void
4876 determine_use_iv_costs (struct ivopts_data *data)
4877 {
4878   unsigned i, j;
4879   struct iv_use *use;
4880   struct iv_cand *cand;
4881   bitmap to_clear = BITMAP_ALLOC (NULL);
4882
4883   alloc_use_cost_map (data);
4884
4885   for (i = 0; i < n_iv_uses (data); i++)
4886     {
4887       use = iv_use (data, i);
4888
4889       if (data->consider_all_candidates)
4890         {
4891           for (j = 0; j < n_iv_cands (data); j++)
4892             {
4893               cand = iv_cand (data, j);
4894               determine_use_iv_cost (data, use, cand);
4895             }
4896         }
4897       else
4898         {
4899           bitmap_iterator bi;
4900
4901           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4902             {
4903               cand = iv_cand (data, j);
4904               if (!determine_use_iv_cost (data, use, cand))
4905                 bitmap_set_bit (to_clear, j);
4906             }
4907
4908           /* Remove the candidates for that the cost is infinite from
4909              the list of related candidates.  */
4910           bitmap_and_compl_into (use->related_cands, to_clear);
4911           bitmap_clear (to_clear);
4912         }
4913     }
4914
4915   BITMAP_FREE (to_clear);
4916
4917   if (dump_file && (dump_flags & TDF_DETAILS))
4918     {
4919       fprintf (dump_file, "Use-candidate costs:\n");
4920
4921       for (i = 0; i < n_iv_uses (data); i++)
4922         {
4923           use = iv_use (data, i);
4924
4925           fprintf (dump_file, "Use %d:\n", i);
4926           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4927           for (j = 0; j < use->n_map_members; j++)
4928             {
4929               if (!use->cost_map[j].cand
4930                   || infinite_cost_p (use->cost_map[j].cost))
4931                 continue;
4932
4933               fprintf (dump_file, "  %d\t%d\t%d\t",
4934                        use->cost_map[j].cand->id,
4935                        use->cost_map[j].cost.cost,
4936                        use->cost_map[j].cost.complexity);
4937               if (use->cost_map[j].depends_on)
4938                 bitmap_print (dump_file,
4939                               use->cost_map[j].depends_on, "","");
4940               if (use->cost_map[j].inv_expr_id != -1)
4941                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4942               fprintf (dump_file, "\n");
4943             }
4944
4945           fprintf (dump_file, "\n");
4946         }
4947       fprintf (dump_file, "\n");
4948     }
4949 }
4950
4951 /* Determines cost of the candidate CAND.  */
4952
4953 static void
4954 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4955 {
4956   comp_cost cost_base;
4957   unsigned cost, cost_step;
4958   tree base;
4959
4960   if (!cand->iv)
4961     {
4962       cand->cost = 0;
4963       return;
4964     }
4965
4966   /* There are two costs associated with the candidate -- its increment
4967      and its initialization.  The second is almost negligible for any loop
4968      that rolls enough, so we take it just very little into account.  */
4969
4970   base = cand->iv->base;
4971   cost_base = force_var_cost (data, base, NULL);
4972   /* It will be exceptional that the iv register happens to be initialized with
4973      the proper value at no cost.  In general, there will at least be a regcopy
4974      or a const set.  */
4975   if (cost_base.cost == 0)
4976     cost_base.cost = COSTS_N_INSNS (1);
4977   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
4978
4979   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4980
4981   /* Prefer the original ivs unless we may gain something by replacing it.
4982      The reason is to make debugging simpler; so this is not relevant for
4983      artificial ivs created by other optimization passes.  */
4984   if (cand->pos != IP_ORIGINAL
4985       || !SSA_NAME_VAR (cand->var_before)
4986       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4987     cost++;
4988
4989   /* Prefer not to insert statements into latch unless there are some
4990      already (so that we do not create unnecessary jumps).  */
4991   if (cand->pos == IP_END
4992       && empty_block_p (ip_end_pos (data->current_loop)))
4993     cost++;
4994
4995   cand->cost = cost;
4996   cand->cost_step = cost_step;
4997 }
4998
4999 /* Determines costs of computation of the candidates.  */
5000
5001 static void
5002 determine_iv_costs (struct ivopts_data *data)
5003 {
5004   unsigned i;
5005
5006   if (dump_file && (dump_flags & TDF_DETAILS))
5007     {
5008       fprintf (dump_file, "Candidate costs:\n");
5009       fprintf (dump_file, "  cand\tcost\n");
5010     }
5011
5012   for (i = 0; i < n_iv_cands (data); i++)
5013     {
5014       struct iv_cand *cand = iv_cand (data, i);
5015
5016       determine_iv_cost (data, cand);
5017
5018       if (dump_file && (dump_flags & TDF_DETAILS))
5019         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5020     }
5021
5022   if (dump_file && (dump_flags & TDF_DETAILS))
5023     fprintf (dump_file, "\n");
5024 }
5025
5026 /* Calculates cost for having SIZE induction variables.  */
5027
5028 static unsigned
5029 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5030 {
5031   /* We add size to the cost, so that we prefer eliminating ivs
5032      if possible.  */
5033   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5034                                             data->body_includes_call);
5035 }
5036
5037 /* For each size of the induction variable set determine the penalty.  */
5038
5039 static void
5040 determine_set_costs (struct ivopts_data *data)
5041 {
5042   unsigned j, n;
5043   gimple phi;
5044   gimple_stmt_iterator psi;
5045   tree op;
5046   struct loop *loop = data->current_loop;
5047   bitmap_iterator bi;
5048
5049   if (dump_file && (dump_flags & TDF_DETAILS))
5050     {
5051       fprintf (dump_file, "Global costs:\n");
5052       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5053       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5054       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5055       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5056     }
5057
5058   n = 0;
5059   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5060     {
5061       phi = gsi_stmt (psi);
5062       op = PHI_RESULT (phi);
5063
5064       if (virtual_operand_p (op))
5065         continue;
5066
5067       if (get_iv (data, op))
5068         continue;
5069
5070       n++;
5071     }
5072
5073   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5074     {
5075       struct version_info *info = ver_info (data, j);
5076
5077       if (info->inv_id && info->has_nonlin_use)
5078         n++;
5079     }
5080
5081   data->regs_used = n;
5082   if (dump_file && (dump_flags & TDF_DETAILS))
5083     fprintf (dump_file, "  regs_used %d\n", n);
5084
5085   if (dump_file && (dump_flags & TDF_DETAILS))
5086     {
5087       fprintf (dump_file, "  cost for size:\n");
5088       fprintf (dump_file, "  ivs\tcost\n");
5089       for (j = 0; j <= 2 * target_avail_regs; j++)
5090         fprintf (dump_file, "  %d\t%d\n", j,
5091                  ivopts_global_cost_for_size (data, j));
5092       fprintf (dump_file, "\n");
5093     }
5094 }
5095
5096 /* Returns true if A is a cheaper cost pair than B.  */
5097
5098 static bool
5099 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5100 {
5101   int cmp;
5102
5103   if (!a)
5104     return false;
5105
5106   if (!b)
5107     return true;
5108
5109   cmp = compare_costs (a->cost, b->cost);
5110   if (cmp < 0)
5111     return true;
5112
5113   if (cmp > 0)
5114     return false;
5115
5116   /* In case the costs are the same, prefer the cheaper candidate.  */
5117   if (a->cand->cost < b->cand->cost)
5118     return true;
5119
5120   return false;
5121 }
5122
5123
5124 /* Returns candidate by that USE is expressed in IVS.  */
5125
5126 static struct cost_pair *
5127 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5128 {
5129   return ivs->cand_for_use[use->id];
5130 }
5131
5132 /* Computes the cost field of IVS structure.  */
5133
5134 static void
5135 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5136 {
5137   comp_cost cost = ivs->cand_use_cost;
5138
5139   cost.cost += ivs->cand_cost;
5140
5141   cost.cost += ivopts_global_cost_for_size (data,
5142                                             ivs->n_regs + ivs->num_used_inv_expr);
5143
5144   ivs->cost = cost;
5145 }
5146
5147 /* Remove invariants in set INVS to set IVS.  */
5148
5149 static void
5150 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5151 {
5152   bitmap_iterator bi;
5153   unsigned iid;
5154
5155   if (!invs)
5156     return;
5157
5158   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5159     {
5160       ivs->n_invariant_uses[iid]--;
5161       if (ivs->n_invariant_uses[iid] == 0)
5162         ivs->n_regs--;
5163     }
5164 }
5165
5166 /* Set USE not to be expressed by any candidate in IVS.  */
5167
5168 static void
5169 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5170                  struct iv_use *use)
5171 {
5172   unsigned uid = use->id, cid;
5173   struct cost_pair *cp;
5174
5175   cp = ivs->cand_for_use[uid];
5176   if (!cp)
5177     return;
5178   cid = cp->cand->id;
5179
5180   ivs->bad_uses++;
5181   ivs->cand_for_use[uid] = NULL;
5182   ivs->n_cand_uses[cid]--;
5183
5184   if (ivs->n_cand_uses[cid] == 0)
5185     {
5186       bitmap_clear_bit (ivs->cands, cid);
5187       /* Do not count the pseudocandidates.  */
5188       if (cp->cand->iv)
5189         ivs->n_regs--;
5190       ivs->n_cands--;
5191       ivs->cand_cost -= cp->cand->cost;
5192
5193       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5194     }
5195
5196   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5197
5198   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5199
5200   if (cp->inv_expr_id != -1)
5201     {
5202       ivs->used_inv_expr[cp->inv_expr_id]--;
5203       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5204         ivs->num_used_inv_expr--;
5205     }
5206   iv_ca_recount_cost (data, ivs);
5207 }
5208
5209 /* Add invariants in set INVS to set IVS.  */
5210
5211 static void
5212 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5213 {
5214   bitmap_iterator bi;
5215   unsigned iid;
5216
5217   if (!invs)
5218     return;
5219
5220   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5221     {
5222       ivs->n_invariant_uses[iid]++;
5223       if (ivs->n_invariant_uses[iid] == 1)
5224         ivs->n_regs++;
5225     }
5226 }
5227
5228 /* Set cost pair for USE in set IVS to CP.  */
5229
5230 static void
5231 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5232               struct iv_use *use, struct cost_pair *cp)
5233 {
5234   unsigned uid = use->id, cid;
5235
5236   if (ivs->cand_for_use[uid] == cp)
5237     return;
5238
5239   if (ivs->cand_for_use[uid])
5240     iv_ca_set_no_cp (data, ivs, use);
5241
5242   if (cp)
5243     {
5244       cid = cp->cand->id;
5245
5246       ivs->bad_uses--;
5247       ivs->cand_for_use[uid] = cp;
5248       ivs->n_cand_uses[cid]++;
5249       if (ivs->n_cand_uses[cid] == 1)
5250         {
5251           bitmap_set_bit (ivs->cands, cid);
5252           /* Do not count the pseudocandidates.  */
5253           if (cp->cand->iv)
5254             ivs->n_regs++;
5255           ivs->n_cands++;
5256           ivs->cand_cost += cp->cand->cost;
5257
5258           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5259         }
5260
5261       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5262       iv_ca_set_add_invariants (ivs, cp->depends_on);
5263
5264       if (cp->inv_expr_id != -1)
5265         {
5266           ivs->used_inv_expr[cp->inv_expr_id]++;
5267           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5268             ivs->num_used_inv_expr++;
5269         }
5270       iv_ca_recount_cost (data, ivs);
5271     }
5272 }
5273
5274 /* Extend set IVS by expressing USE by some of the candidates in it
5275    if possible. All important candidates will be considered
5276    if IMPORTANT_CANDIDATES is true.  */
5277
5278 static void
5279 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5280                struct iv_use *use, bool important_candidates)
5281 {
5282   struct cost_pair *best_cp = NULL, *cp;
5283   bitmap_iterator bi;
5284   bitmap cands;
5285   unsigned i;
5286
5287   gcc_assert (ivs->upto >= use->id);
5288
5289   if (ivs->upto == use->id)
5290     {
5291       ivs->upto++;
5292       ivs->bad_uses++;
5293     }
5294
5295   cands = (important_candidates ? data->important_candidates : ivs->cands);
5296   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5297     {
5298       struct iv_cand *cand = iv_cand (data, i);
5299
5300       cp = get_use_iv_cost (data, use, cand);
5301
5302       if (cheaper_cost_pair (cp, best_cp))
5303         best_cp = cp;
5304     }
5305
5306   iv_ca_set_cp (data, ivs, use, best_cp);
5307 }
5308
5309 /* Get cost for assignment IVS.  */
5310
5311 static comp_cost
5312 iv_ca_cost (struct iv_ca *ivs)
5313 {
5314   /* This was a conditional expression but it triggered a bug in
5315      Sun C 5.5.  */
5316   if (ivs->bad_uses)
5317     return infinite_cost;
5318   else
5319     return ivs->cost;
5320 }
5321
5322 /* Returns true if all dependences of CP are among invariants in IVS.  */
5323
5324 static bool
5325 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5326 {
5327   unsigned i;
5328   bitmap_iterator bi;
5329
5330   if (!cp->depends_on)
5331     return true;
5332
5333   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5334     {
5335       if (ivs->n_invariant_uses[i] == 0)
5336         return false;
5337     }
5338
5339   return true;
5340 }
5341
5342 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5343    it before NEXT_CHANGE.  */
5344
5345 static struct iv_ca_delta *
5346 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5347                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5348 {
5349   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5350
5351   change->use = use;
5352   change->old_cp = old_cp;
5353   change->new_cp = new_cp;
5354   change->next_change = next_change;
5355
5356   return change;
5357 }
5358
5359 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5360    are rewritten.  */
5361
5362 static struct iv_ca_delta *
5363 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5364 {
5365   struct iv_ca_delta *last;
5366
5367   if (!l2)
5368     return l1;
5369
5370   if (!l1)
5371     return l2;
5372
5373   for (last = l1; last->next_change; last = last->next_change)
5374     continue;
5375   last->next_change = l2;
5376
5377   return l1;
5378 }
5379
5380 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5381
5382 static struct iv_ca_delta *
5383 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5384 {
5385   struct iv_ca_delta *act, *next, *prev = NULL;
5386   struct cost_pair *tmp;
5387
5388   for (act = delta; act; act = next)
5389     {
5390       next = act->next_change;
5391       act->next_change = prev;
5392       prev = act;
5393
5394       tmp = act->old_cp;
5395       act->old_cp = act->new_cp;
5396       act->new_cp = tmp;
5397     }
5398
5399   return prev;
5400 }
5401
5402 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5403    reverted instead.  */
5404
5405 static void
5406 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5407                     struct iv_ca_delta *delta, bool forward)
5408 {
5409   struct cost_pair *from, *to;
5410   struct iv_ca_delta *act;
5411
5412   if (!forward)
5413     delta = iv_ca_delta_reverse (delta);
5414
5415   for (act = delta; act; act = act->next_change)
5416     {
5417       from = act->old_cp;
5418       to = act->new_cp;
5419       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5420       iv_ca_set_cp (data, ivs, act->use, to);
5421     }
5422
5423   if (!forward)
5424     iv_ca_delta_reverse (delta);
5425 }
5426
5427 /* Returns true if CAND is used in IVS.  */
5428
5429 static bool
5430 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5431 {
5432   return ivs->n_cand_uses[cand->id] > 0;
5433 }
5434
5435 /* Returns number of induction variable candidates in the set IVS.  */
5436
5437 static unsigned
5438 iv_ca_n_cands (struct iv_ca *ivs)
5439 {
5440   return ivs->n_cands;
5441 }
5442
5443 /* Free the list of changes DELTA.  */
5444
5445 static void
5446 iv_ca_delta_free (struct iv_ca_delta **delta)
5447 {
5448   struct iv_ca_delta *act, *next;
5449
5450   for (act = *delta; act; act = next)
5451     {
5452       next = act->next_change;
5453       free (act);
5454     }
5455
5456   *delta = NULL;
5457 }
5458
5459 /* Allocates new iv candidates assignment.  */
5460
5461 static struct iv_ca *
5462 iv_ca_new (struct ivopts_data *data)
5463 {
5464   struct iv_ca *nw = XNEW (struct iv_ca);
5465
5466   nw->upto = 0;
5467   nw->bad_uses = 0;
5468   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5469   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5470   nw->cands = BITMAP_ALLOC (NULL);
5471   nw->n_cands = 0;
5472   nw->n_regs = 0;
5473   nw->cand_use_cost = no_cost;
5474   nw->cand_cost = 0;
5475   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5476   nw->cost = no_cost;
5477   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5478   nw->num_used_inv_expr = 0;
5479
5480   return nw;
5481 }
5482
5483 /* Free memory occupied by the set IVS.  */
5484
5485 static void
5486 iv_ca_free (struct iv_ca **ivs)
5487 {
5488   free ((*ivs)->cand_for_use);
5489   free ((*ivs)->n_cand_uses);
5490   BITMAP_FREE ((*ivs)->cands);
5491   free ((*ivs)->n_invariant_uses);
5492   free ((*ivs)->used_inv_expr);
5493   free (*ivs);
5494   *ivs = NULL;
5495 }
5496
5497 /* Dumps IVS to FILE.  */
5498
5499 static void
5500 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5501 {
5502   const char *pref = "  invariants ";
5503   unsigned i;
5504   comp_cost cost = iv_ca_cost (ivs);
5505
5506   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5507   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5508            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5509   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5510
5511    for (i = 0; i < ivs->upto; i++)
5512     {
5513       struct iv_use *use = iv_use (data, i);
5514       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5515       if (cp)
5516         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5517                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5518       else
5519         fprintf (file, "   use:%d --> ??\n", use->id);
5520     }
5521
5522   for (i = 1; i <= data->max_inv_id; i++)
5523     if (ivs->n_invariant_uses[i])
5524       {
5525         fprintf (file, "%s%d", pref, i);
5526         pref = ", ";
5527       }
5528   fprintf (file, "\n\n");
5529 }
5530
5531 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5532    new set, and store differences in DELTA.  Number of induction variables
5533    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5534    the function will try to find a solution with mimimal iv candidates.  */
5535
5536 static comp_cost
5537 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5538               struct iv_cand *cand, struct iv_ca_delta **delta,
5539               unsigned *n_ivs, bool min_ncand)
5540 {
5541   unsigned i;
5542   comp_cost cost;
5543   struct iv_use *use;
5544   struct cost_pair *old_cp, *new_cp;
5545
5546   *delta = NULL;
5547   for (i = 0; i < ivs->upto; i++)
5548     {
5549       use = iv_use (data, i);
5550       old_cp = iv_ca_cand_for_use (ivs, use);
5551
5552       if (old_cp
5553           && old_cp->cand == cand)
5554         continue;
5555
5556       new_cp = get_use_iv_cost (data, use, cand);
5557       if (!new_cp)
5558         continue;
5559
5560       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5561         continue;
5562
5563       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5564         continue;
5565
5566       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5567     }
5568
5569   iv_ca_delta_commit (data, ivs, *delta, true);
5570   cost = iv_ca_cost (ivs);
5571   if (n_ivs)
5572     *n_ivs = iv_ca_n_cands (ivs);
5573   iv_ca_delta_commit (data, ivs, *delta, false);
5574
5575   return cost;
5576 }
5577
5578 /* Try narrowing set IVS by removing CAND.  Return the cost of
5579    the new set and store the differences in DELTA.  */
5580
5581 static comp_cost
5582 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5583               struct iv_cand *cand, struct iv_ca_delta **delta)
5584 {
5585   unsigned i, ci;
5586   struct iv_use *use;
5587   struct cost_pair *old_cp, *new_cp, *cp;
5588   bitmap_iterator bi;
5589   struct iv_cand *cnd;
5590   comp_cost cost;
5591
5592   *delta = NULL;
5593   for (i = 0; i < n_iv_uses (data); i++)
5594     {
5595       use = iv_use (data, i);
5596
5597       old_cp = iv_ca_cand_for_use (ivs, use);
5598       if (old_cp->cand != cand)
5599         continue;
5600
5601       new_cp = NULL;
5602
5603       if (data->consider_all_candidates)
5604         {
5605           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5606             {
5607               if (ci == cand->id)
5608                 continue;
5609
5610               cnd = iv_cand (data, ci);
5611
5612               cp = get_use_iv_cost (data, use, cnd);
5613               if (!cp)
5614                 continue;
5615
5616               if (!iv_ca_has_deps (ivs, cp))
5617                 continue;
5618
5619               if (!cheaper_cost_pair (cp, new_cp))
5620                 continue;
5621
5622               new_cp = cp;
5623             }
5624         }
5625       else
5626         {
5627           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5628             {
5629               if (ci == cand->id)
5630                 continue;
5631
5632               cnd = iv_cand (data, ci);
5633
5634               cp = get_use_iv_cost (data, use, cnd);
5635               if (!cp)
5636                 continue;
5637               if (!iv_ca_has_deps (ivs, cp))
5638                 continue;
5639
5640               if (!cheaper_cost_pair (cp, new_cp))
5641                 continue;
5642
5643               new_cp = cp;
5644             }
5645         }
5646
5647       if (!new_cp)
5648         {
5649           iv_ca_delta_free (delta);
5650           return infinite_cost;
5651         }
5652
5653       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5654     }
5655
5656   iv_ca_delta_commit (data, ivs, *delta, true);
5657   cost = iv_ca_cost (ivs);
5658   iv_ca_delta_commit (data, ivs, *delta, false);
5659
5660   return cost;
5661 }
5662
5663 /* Try optimizing the set of candidates IVS by removing candidates different
5664    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5665    differences in DELTA.  */
5666
5667 static comp_cost
5668 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5669              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5670 {
5671   bitmap_iterator bi;
5672   struct iv_ca_delta *act_delta, *best_delta;
5673   unsigned i;
5674   comp_cost best_cost, acost;
5675   struct iv_cand *cand;
5676
5677   best_delta = NULL;
5678   best_cost = iv_ca_cost (ivs);
5679
5680   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5681     {
5682       cand = iv_cand (data, i);
5683
5684       if (cand == except_cand)
5685         continue;
5686
5687       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5688
5689       if (compare_costs (acost, best_cost) < 0)
5690         {
5691           best_cost = acost;
5692           iv_ca_delta_free (&best_delta);
5693           best_delta = act_delta;
5694         }
5695       else
5696         iv_ca_delta_free (&act_delta);
5697     }
5698
5699   if (!best_delta)
5700     {
5701       *delta = NULL;
5702       return best_cost;
5703     }
5704
5705   /* Recurse to possibly remove other unnecessary ivs.  */
5706   iv_ca_delta_commit (data, ivs, best_delta, true);
5707   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5708   iv_ca_delta_commit (data, ivs, best_delta, false);
5709   *delta = iv_ca_delta_join (best_delta, *delta);
5710   return best_cost;
5711 }
5712
5713 /* Tries to extend the sets IVS in the best possible way in order
5714    to express the USE.  If ORIGINALP is true, prefer candidates from
5715    the original set of IVs, otherwise favor important candidates not
5716    based on any memory object.  */
5717
5718 static bool
5719 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5720                   struct iv_use *use, bool originalp)
5721 {
5722   comp_cost best_cost, act_cost;
5723   unsigned i;
5724   bitmap_iterator bi;
5725   struct iv_cand *cand;
5726   struct iv_ca_delta *best_delta = NULL, *act_delta;
5727   struct cost_pair *cp;
5728
5729   iv_ca_add_use (data, ivs, use, false);
5730   best_cost = iv_ca_cost (ivs);
5731
5732   cp = iv_ca_cand_for_use (ivs, use);
5733   if (!cp)
5734     {
5735       ivs->upto--;
5736       ivs->bad_uses--;
5737       iv_ca_add_use (data, ivs, use, true);
5738       best_cost = iv_ca_cost (ivs);
5739       cp = iv_ca_cand_for_use (ivs, use);
5740     }
5741   if (cp)
5742     {
5743       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5744       iv_ca_set_no_cp (data, ivs, use);
5745     }
5746
5747   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5748      first try important candidates not based on any memory object.  Only if
5749      this fails, try the specific ones.  Rationale -- in loops with many
5750      variables the best choice often is to use just one generic biv.  If we
5751      added here many ivs specific to the uses, the optimization algorithm later
5752      would be likely to get stuck in a local minimum, thus causing us to create
5753      too many ivs.  The approach from few ivs to more seems more likely to be
5754      successful -- starting from few ivs, replacing an expensive use by a
5755      specific iv should always be a win.  */
5756   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5757     {
5758       cand = iv_cand (data, i);
5759
5760       if (originalp && cand->pos !=IP_ORIGINAL)
5761         continue;
5762
5763       if (!originalp && cand->iv->base_object != NULL_TREE)
5764         continue;
5765
5766       if (iv_ca_cand_used_p (ivs, cand))
5767         continue;
5768
5769       cp = get_use_iv_cost (data, use, cand);
5770       if (!cp)
5771         continue;
5772
5773       iv_ca_set_cp (data, ivs, use, cp);
5774       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5775                                true);
5776       iv_ca_set_no_cp (data, ivs, use);
5777       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5778
5779       if (compare_costs (act_cost, best_cost) < 0)
5780         {
5781           best_cost = act_cost;
5782
5783           iv_ca_delta_free (&best_delta);
5784           best_delta = act_delta;
5785         }
5786       else
5787         iv_ca_delta_free (&act_delta);
5788     }
5789
5790   if (infinite_cost_p (best_cost))
5791     {
5792       for (i = 0; i < use->n_map_members; i++)
5793         {
5794           cp = use->cost_map + i;
5795           cand = cp->cand;
5796           if (!cand)
5797             continue;
5798
5799           /* Already tried this.  */
5800           if (cand->important)
5801             {
5802               if (originalp && cand->pos == IP_ORIGINAL)
5803                 continue;
5804               if (!originalp && cand->iv->base_object == NULL_TREE)
5805                 continue;
5806             }
5807
5808           if (iv_ca_cand_used_p (ivs, cand))
5809             continue;
5810
5811           act_delta = NULL;
5812           iv_ca_set_cp (data, ivs, use, cp);
5813           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5814           iv_ca_set_no_cp (data, ivs, use);
5815           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5816                                        cp, act_delta);
5817
5818           if (compare_costs (act_cost, best_cost) < 0)
5819             {
5820               best_cost = act_cost;
5821
5822               if (best_delta)
5823                 iv_ca_delta_free (&best_delta);
5824               best_delta = act_delta;
5825             }
5826           else
5827             iv_ca_delta_free (&act_delta);
5828         }
5829     }
5830
5831   iv_ca_delta_commit (data, ivs, best_delta, true);
5832   iv_ca_delta_free (&best_delta);
5833
5834   return !infinite_cost_p (best_cost);
5835 }
5836
5837 /* Finds an initial assignment of candidates to uses.  */
5838
5839 static struct iv_ca *
5840 get_initial_solution (struct ivopts_data *data, bool originalp)
5841 {
5842   struct iv_ca *ivs = iv_ca_new (data);
5843   unsigned i;
5844
5845   for (i = 0; i < n_iv_uses (data); i++)
5846     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5847       {
5848         iv_ca_free (&ivs);
5849         return NULL;
5850       }
5851
5852   return ivs;
5853 }
5854
5855 /* Tries to improve set of induction variables IVS.  */
5856
5857 static bool
5858 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5859 {
5860   unsigned i, n_ivs;
5861   comp_cost acost, best_cost = iv_ca_cost (ivs);
5862   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5863   struct iv_cand *cand;
5864
5865   /* Try extending the set of induction variables by one.  */
5866   for (i = 0; i < n_iv_cands (data); i++)
5867     {
5868       cand = iv_cand (data, i);
5869
5870       if (iv_ca_cand_used_p (ivs, cand))
5871         continue;
5872
5873       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5874       if (!act_delta)
5875         continue;
5876
5877       /* If we successfully added the candidate and the set is small enough,
5878          try optimizing it by removing other candidates.  */
5879       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5880         {
5881           iv_ca_delta_commit (data, ivs, act_delta, true);
5882           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5883           iv_ca_delta_commit (data, ivs, act_delta, false);
5884           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5885         }
5886
5887       if (compare_costs (acost, best_cost) < 0)
5888         {
5889           best_cost = acost;
5890           iv_ca_delta_free (&best_delta);
5891           best_delta = act_delta;
5892         }
5893       else
5894         iv_ca_delta_free (&act_delta);
5895     }
5896
5897   if (!best_delta)
5898     {
5899       /* Try removing the candidates from the set instead.  */
5900       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5901
5902       /* Nothing more we can do.  */
5903       if (!best_delta)
5904         return false;
5905     }
5906
5907   iv_ca_delta_commit (data, ivs, best_delta, true);
5908   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5909   iv_ca_delta_free (&best_delta);
5910   return true;
5911 }
5912
5913 /* Attempts to find the optimal set of induction variables.  We do simple
5914    greedy heuristic -- we try to replace at most one candidate in the selected
5915    solution and remove the unused ivs while this improves the cost.  */
5916
5917 static struct iv_ca *
5918 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5919 {
5920   struct iv_ca *set;
5921
5922   /* Get the initial solution.  */
5923   set = get_initial_solution (data, originalp);
5924   if (!set)
5925     {
5926       if (dump_file && (dump_flags & TDF_DETAILS))
5927         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5928       return NULL;
5929     }
5930
5931   if (dump_file && (dump_flags & TDF_DETAILS))
5932     {
5933       fprintf (dump_file, "Initial set of candidates:\n");
5934       iv_ca_dump (data, dump_file, set);
5935     }
5936
5937   while (try_improve_iv_set (data, set))
5938     {
5939       if (dump_file && (dump_flags & TDF_DETAILS))
5940         {
5941           fprintf (dump_file, "Improved to:\n");
5942           iv_ca_dump (data, dump_file, set);
5943         }
5944     }
5945
5946   return set;
5947 }
5948
5949 static struct iv_ca *
5950 find_optimal_iv_set (struct ivopts_data *data)
5951 {
5952   unsigned i;
5953   struct iv_ca *set, *origset;
5954   struct iv_use *use;
5955   comp_cost cost, origcost;
5956
5957   /* Determine the cost based on a strategy that starts with original IVs,
5958      and try again using a strategy that prefers candidates not based
5959      on any IVs.  */
5960   origset = find_optimal_iv_set_1 (data, true);
5961   set = find_optimal_iv_set_1 (data, false);
5962
5963   if (!origset && !set)
5964     return NULL;
5965
5966   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5967   cost = set ? iv_ca_cost (set) : infinite_cost;
5968
5969   if (dump_file && (dump_flags & TDF_DETAILS))
5970     {
5971       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5972                origcost.cost, origcost.complexity);
5973       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5974                cost.cost, cost.complexity);
5975     }
5976
5977   /* Choose the one with the best cost.  */
5978   if (compare_costs (origcost, cost) <= 0)
5979     {
5980       if (set)
5981         iv_ca_free (&set);
5982       set = origset;
5983     }
5984   else if (origset)
5985     iv_ca_free (&origset);
5986
5987   for (i = 0; i < n_iv_uses (data); i++)
5988     {
5989       use = iv_use (data, i);
5990       use->selected = iv_ca_cand_for_use (set, use)->cand;
5991     }
5992
5993   return set;
5994 }
5995
5996 /* Creates a new induction variable corresponding to CAND.  */
5997
5998 static void
5999 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6000 {
6001   gimple_stmt_iterator incr_pos;
6002   tree base;
6003   bool after = false;
6004
6005   if (!cand->iv)
6006     return;
6007
6008   switch (cand->pos)
6009     {
6010     case IP_NORMAL:
6011       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6012       break;
6013
6014     case IP_END:
6015       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6016       after = true;
6017       break;
6018
6019     case IP_AFTER_USE:
6020       after = true;
6021       /* fall through */
6022     case IP_BEFORE_USE:
6023       incr_pos = gsi_for_stmt (cand->incremented_at);
6024       break;
6025
6026     case IP_ORIGINAL:
6027       /* Mark that the iv is preserved.  */
6028       name_info (data, cand->var_before)->preserve_biv = true;
6029       name_info (data, cand->var_after)->preserve_biv = true;
6030
6031       /* Rewrite the increment so that it uses var_before directly.  */
6032       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6033       return;
6034     }
6035
6036   gimple_add_tmp_var (cand->var_before);
6037
6038   base = unshare_expr (cand->iv->base);
6039
6040   create_iv (base, unshare_expr (cand->iv->step),
6041              cand->var_before, data->current_loop,
6042              &incr_pos, after, &cand->var_before, &cand->var_after);
6043 }
6044
6045 /* Creates new induction variables described in SET.  */
6046
6047 static void
6048 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6049 {
6050   unsigned i;
6051   struct iv_cand *cand;
6052   bitmap_iterator bi;
6053
6054   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6055     {
6056       cand = iv_cand (data, i);
6057       create_new_iv (data, cand);
6058     }
6059
6060   if (dump_file && (dump_flags & TDF_DETAILS))
6061     {
6062       fprintf (dump_file, "\nSelected IV set: \n");
6063       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6064         {
6065           cand = iv_cand (data, i);
6066           dump_cand (dump_file, cand);
6067         }
6068       fprintf (dump_file, "\n");
6069     }
6070 }
6071
6072 /* Rewrites USE (definition of iv used in a nonlinear expression)
6073    using candidate CAND.  */
6074
6075 static void
6076 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6077                             struct iv_use *use, struct iv_cand *cand)
6078 {
6079   tree comp;
6080   tree op, tgt;
6081   gimple ass;
6082   gimple_stmt_iterator bsi;
6083
6084   /* An important special case -- if we are asked to express value of
6085      the original iv by itself, just exit; there is no need to
6086      introduce a new computation (that might also need casting the
6087      variable to unsigned and back).  */
6088   if (cand->pos == IP_ORIGINAL
6089       && cand->incremented_at == use->stmt)
6090     {
6091       tree step, ctype, utype;
6092       enum tree_code incr_code = PLUS_EXPR, old_code;
6093
6094       gcc_assert (is_gimple_assign (use->stmt));
6095       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6096
6097       step = cand->iv->step;
6098       ctype = TREE_TYPE (step);
6099       utype = TREE_TYPE (cand->var_after);
6100       if (TREE_CODE (step) == NEGATE_EXPR)
6101         {
6102           incr_code = MINUS_EXPR;
6103           step = TREE_OPERAND (step, 0);
6104         }
6105
6106       /* Check whether we may leave the computation unchanged.
6107          This is the case only if it does not rely on other
6108          computations in the loop -- otherwise, the computation
6109          we rely upon may be removed in remove_unused_ivs,
6110          thus leading to ICE.  */
6111       old_code = gimple_assign_rhs_code (use->stmt);
6112       if (old_code == PLUS_EXPR
6113           || old_code == MINUS_EXPR
6114           || old_code == POINTER_PLUS_EXPR)
6115         {
6116           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6117             op = gimple_assign_rhs2 (use->stmt);
6118           else if (old_code != MINUS_EXPR
6119                    && gimple_assign_rhs2 (use->stmt) == cand->var_before)
6120             op = gimple_assign_rhs1 (use->stmt);
6121           else
6122             op = NULL_TREE;
6123         }
6124       else
6125         op = NULL_TREE;
6126
6127       if (op
6128           && (TREE_CODE (op) == INTEGER_CST
6129               || operand_equal_p (op, step, 0)))
6130         return;
6131
6132       /* Otherwise, add the necessary computations to express
6133          the iv.  */
6134       op = fold_convert (ctype, cand->var_before);
6135       comp = fold_convert (utype,
6136                            build2 (incr_code, ctype, op,
6137                                    unshare_expr (step)));
6138     }
6139   else
6140     {
6141       comp = get_computation (data->current_loop, use, cand);
6142       gcc_assert (comp != NULL_TREE);
6143     }
6144
6145   switch (gimple_code (use->stmt))
6146     {
6147     case GIMPLE_PHI:
6148       tgt = PHI_RESULT (use->stmt);
6149
6150       /* If we should keep the biv, do not replace it.  */
6151       if (name_info (data, tgt)->preserve_biv)
6152         return;
6153
6154       bsi = gsi_after_labels (gimple_bb (use->stmt));
6155       break;
6156
6157     case GIMPLE_ASSIGN:
6158       tgt = gimple_assign_lhs (use->stmt);
6159       bsi = gsi_for_stmt (use->stmt);
6160       break;
6161
6162     default:
6163       gcc_unreachable ();
6164     }
6165
6166   if (!valid_gimple_rhs_p (comp)
6167       || (gimple_code (use->stmt) != GIMPLE_PHI
6168           /* We can't allow re-allocating the stmt as it might be pointed
6169              to still.  */
6170           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6171               >= gimple_num_ops (gsi_stmt (bsi)))))
6172     {
6173       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6174                                        true, GSI_SAME_STMT);
6175       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6176         {
6177           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6178           /* As this isn't a plain copy we have to reset alignment
6179              information.  */
6180           if (SSA_NAME_PTR_INFO (comp))
6181             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6182         }
6183     }
6184
6185   if (gimple_code (use->stmt) == GIMPLE_PHI)
6186     {
6187       ass = gimple_build_assign (tgt, comp);
6188       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6189
6190       bsi = gsi_for_stmt (use->stmt);
6191       remove_phi_node (&bsi, false);
6192     }
6193   else
6194     {
6195       gimple_assign_set_rhs_from_tree (&bsi, comp);
6196       use->stmt = gsi_stmt (bsi);
6197     }
6198 }
6199
6200 /* Performs a peephole optimization to reorder the iv update statement with
6201    a mem ref to enable instruction combining in later phases. The mem ref uses
6202    the iv value before the update, so the reordering transformation requires
6203    adjustment of the offset. CAND is the selected IV_CAND.
6204
6205    Example:
6206
6207    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6208    iv2 = iv1 + 1;
6209
6210    if (t < val)      (1)
6211      goto L;
6212    goto Head;
6213
6214
6215    directly propagating t over to (1) will introduce overlapping live range
6216    thus increase register pressure. This peephole transform it into:
6217
6218
6219    iv2 = iv1 + 1;
6220    t = MEM_REF (base, iv2, 8, 8);
6221    if (t < val)
6222      goto L;
6223    goto Head;
6224 */
6225
6226 static void
6227 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6228 {
6229   tree var_after;
6230   gimple iv_update, stmt;
6231   basic_block bb;
6232   gimple_stmt_iterator gsi, gsi_iv;
6233
6234   if (cand->pos != IP_NORMAL)
6235     return;
6236
6237   var_after = cand->var_after;
6238   iv_update = SSA_NAME_DEF_STMT (var_after);
6239
6240   bb = gimple_bb (iv_update);
6241   gsi = gsi_last_nondebug_bb (bb);
6242   stmt = gsi_stmt (gsi);
6243
6244   /* Only handle conditional statement for now.  */
6245   if (gimple_code (stmt) != GIMPLE_COND)
6246     return;
6247
6248   gsi_prev_nondebug (&gsi);
6249   stmt = gsi_stmt (gsi);
6250   if (stmt != iv_update)
6251     return;
6252
6253   gsi_prev_nondebug (&gsi);
6254   if (gsi_end_p (gsi))
6255     return;
6256
6257   stmt = gsi_stmt (gsi);
6258   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6259     return;
6260
6261   if (stmt != use->stmt)
6262     return;
6263
6264   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6265     return;
6266
6267   if (dump_file && (dump_flags & TDF_DETAILS))
6268     {
6269       fprintf (dump_file, "Reordering \n");
6270       print_gimple_stmt (dump_file, iv_update, 0, 0);
6271       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6272       fprintf (dump_file, "\n");
6273     }
6274
6275   gsi = gsi_for_stmt (use->stmt);
6276   gsi_iv = gsi_for_stmt (iv_update);
6277   gsi_move_before (&gsi_iv, &gsi);
6278
6279   cand->pos = IP_BEFORE_USE;
6280   cand->incremented_at = use->stmt;
6281 }
6282
6283 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6284
6285 static void
6286 rewrite_use_address (struct ivopts_data *data,
6287                      struct iv_use *use, struct iv_cand *cand)
6288 {
6289   aff_tree aff;
6290   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6291   tree base_hint = NULL_TREE;
6292   tree ref, iv;
6293   bool ok;
6294
6295   adjust_iv_update_pos (cand, use);
6296   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6297   gcc_assert (ok);
6298   unshare_aff_combination (&aff);
6299
6300   /* To avoid undefined overflow problems, all IV candidates use unsigned
6301      integer types.  The drawback is that this makes it impossible for
6302      create_mem_ref to distinguish an IV that is based on a memory object
6303      from one that represents simply an offset.
6304
6305      To work around this problem, we pass a hint to create_mem_ref that
6306      indicates which variable (if any) in aff is an IV based on a memory
6307      object.  Note that we only consider the candidate.  If this is not
6308      based on an object, the base of the reference is in some subexpression
6309      of the use -- but these will use pointer types, so they are recognized
6310      by the create_mem_ref heuristics anyway.  */
6311   if (cand->iv->base_object)
6312     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6313
6314   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6315   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6316                         reference_alias_ptr_type (*use->op_p),
6317                         iv, base_hint, data->speed);
6318   copy_ref_info (ref, *use->op_p);
6319   *use->op_p = ref;
6320 }
6321
6322 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6323    candidate CAND.  */
6324
6325 static void
6326 rewrite_use_compare (struct ivopts_data *data,
6327                      struct iv_use *use, struct iv_cand *cand)
6328 {
6329   tree comp, *var_p, op, bound;
6330   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6331   enum tree_code compare;
6332   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6333   bool ok;
6334
6335   bound = cp->value;
6336   if (bound)
6337     {
6338       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6339       tree var_type = TREE_TYPE (var);
6340       gimple_seq stmts;
6341
6342       if (dump_file && (dump_flags & TDF_DETAILS))
6343         {
6344           fprintf (dump_file, "Replacing exit test: ");
6345           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6346         }
6347       compare = cp->comp;
6348       bound = unshare_expr (fold_convert (var_type, bound));
6349       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6350       if (stmts)
6351         gsi_insert_seq_on_edge_immediate (
6352                 loop_preheader_edge (data->current_loop),
6353                 stmts);
6354
6355       gimple_cond_set_lhs (use->stmt, var);
6356       gimple_cond_set_code (use->stmt, compare);
6357       gimple_cond_set_rhs (use->stmt, op);
6358       return;
6359     }
6360
6361   /* The induction variable elimination failed; just express the original
6362      giv.  */
6363   comp = get_computation (data->current_loop, use, cand);
6364   gcc_assert (comp != NULL_TREE);
6365
6366   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6367   gcc_assert (ok);
6368
6369   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6370                                      true, GSI_SAME_STMT);
6371 }
6372
6373 /* Rewrites USE using candidate CAND.  */
6374
6375 static void
6376 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6377 {
6378   switch (use->type)
6379     {
6380       case USE_NONLINEAR_EXPR:
6381         rewrite_use_nonlinear_expr (data, use, cand);
6382         break;
6383
6384       case USE_ADDRESS:
6385         rewrite_use_address (data, use, cand);
6386         break;
6387
6388       case USE_COMPARE:
6389         rewrite_use_compare (data, use, cand);
6390         break;
6391
6392       default:
6393         gcc_unreachable ();
6394     }
6395
6396   update_stmt (use->stmt);
6397 }
6398
6399 /* Rewrite the uses using the selected induction variables.  */
6400
6401 static void
6402 rewrite_uses (struct ivopts_data *data)
6403 {
6404   unsigned i;
6405   struct iv_cand *cand;
6406   struct iv_use *use;
6407
6408   for (i = 0; i < n_iv_uses (data); i++)
6409     {
6410       use = iv_use (data, i);
6411       cand = use->selected;
6412       gcc_assert (cand);
6413
6414       rewrite_use (data, use, cand);
6415     }
6416 }
6417
6418 /* Removes the ivs that are not used after rewriting.  */
6419
6420 static void
6421 remove_unused_ivs (struct ivopts_data *data)
6422 {
6423   unsigned j;
6424   bitmap_iterator bi;
6425   bitmap toremove = BITMAP_ALLOC (NULL);
6426
6427   /* Figure out an order in which to release SSA DEFs so that we don't
6428      release something that we'd have to propagate into a debug stmt
6429      afterwards.  */
6430   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6431     {
6432       struct version_info *info;
6433
6434       info = ver_info (data, j);
6435       if (info->iv
6436           && !integer_zerop (info->iv->step)
6437           && !info->inv_id
6438           && !info->iv->have_use_for
6439           && !info->preserve_biv)
6440         {
6441           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6442
6443           tree def = info->iv->ssa_name;
6444
6445           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6446             {
6447               imm_use_iterator imm_iter;
6448               use_operand_p use_p;
6449               gimple stmt;
6450               int count = 0;
6451
6452               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6453                 {
6454                   if (!gimple_debug_bind_p (stmt))
6455                     continue;
6456
6457                   /* We just want to determine whether to do nothing
6458                      (count == 0), to substitute the computed
6459                      expression into a single use of the SSA DEF by
6460                      itself (count == 1), or to use a debug temp
6461                      because the SSA DEF is used multiple times or as
6462                      part of a larger expression (count > 1). */
6463                   count++;
6464                   if (gimple_debug_bind_get_value (stmt) != def)
6465                     count++;
6466
6467                   if (count > 1)
6468                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6469                 }
6470
6471               if (!count)
6472                 continue;
6473
6474               struct iv_use dummy_use;
6475               struct iv_cand *best_cand = NULL, *cand;
6476               unsigned i, best_pref = 0, cand_pref;
6477
6478               memset (&dummy_use, 0, sizeof (dummy_use));
6479               dummy_use.iv = info->iv;
6480               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6481                 {
6482                   cand = iv_use (data, i)->selected;
6483                   if (cand == best_cand)
6484                     continue;
6485                   cand_pref = operand_equal_p (cand->iv->step,
6486                                                info->iv->step, 0)
6487                     ? 4 : 0;
6488                   cand_pref
6489                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6490                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6491                     ? 2 : 0;
6492                   cand_pref
6493                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6494                     ? 1 : 0;
6495                   if (best_cand == NULL || best_pref < cand_pref)
6496                     {
6497                       best_cand = cand;
6498                       best_pref = cand_pref;
6499                     }
6500                 }
6501
6502               if (!best_cand)
6503                 continue;
6504
6505               tree comp = get_computation_at (data->current_loop,
6506                                               &dummy_use, best_cand,
6507                                               SSA_NAME_DEF_STMT (def));
6508               if (!comp)
6509                 continue;
6510
6511               if (count > 1)
6512                 {
6513                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6514                   DECL_ARTIFICIAL (vexpr) = 1;
6515                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6516                   if (SSA_NAME_VAR (def))
6517                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6518                   else
6519                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6520                   gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6521                   gimple_stmt_iterator gsi;
6522
6523                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6524                     gsi = gsi_after_labels (gimple_bb
6525                                             (SSA_NAME_DEF_STMT (def)));
6526                   else
6527                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6528
6529                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6530                   comp = vexpr;
6531                 }
6532
6533               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6534                 {
6535                   if (!gimple_debug_bind_p (stmt))
6536                     continue;
6537
6538                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6539                     SET_USE (use_p, comp);
6540
6541                   update_stmt (stmt);
6542                 }
6543             }
6544         }
6545     }
6546
6547   release_defs_bitset (toremove);
6548
6549   BITMAP_FREE (toremove);
6550 }
6551
6552 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6553    for pointer_map_traverse.  */
6554
6555 static bool
6556 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6557                       void *data ATTRIBUTE_UNUSED)
6558 {
6559   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6560
6561   free (niter);
6562   return true;
6563 }
6564
6565 /* Frees data allocated by the optimization of a single loop.  */
6566
6567 static void
6568 free_loop_data (struct ivopts_data *data)
6569 {
6570   unsigned i, j;
6571   bitmap_iterator bi;
6572   tree obj;
6573
6574   if (data->niters)
6575     {
6576       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6577       pointer_map_destroy (data->niters);
6578       data->niters = NULL;
6579     }
6580
6581   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6582     {
6583       struct version_info *info;
6584
6585       info = ver_info (data, i);
6586       free (info->iv);
6587       info->iv = NULL;
6588       info->has_nonlin_use = false;
6589       info->preserve_biv = false;
6590       info->inv_id = 0;
6591     }
6592   bitmap_clear (data->relevant);
6593   bitmap_clear (data->important_candidates);
6594
6595   for (i = 0; i < n_iv_uses (data); i++)
6596     {
6597       struct iv_use *use = iv_use (data, i);
6598
6599       free (use->iv);
6600       BITMAP_FREE (use->related_cands);
6601       for (j = 0; j < use->n_map_members; j++)
6602         if (use->cost_map[j].depends_on)
6603           BITMAP_FREE (use->cost_map[j].depends_on);
6604       free (use->cost_map);
6605       free (use);
6606     }
6607   data->iv_uses.truncate (0);
6608
6609   for (i = 0; i < n_iv_cands (data); i++)
6610     {
6611       struct iv_cand *cand = iv_cand (data, i);
6612
6613       free (cand->iv);
6614       if (cand->depends_on)
6615         BITMAP_FREE (cand->depends_on);
6616       free (cand);
6617     }
6618   data->iv_candidates.truncate (0);
6619
6620   if (data->version_info_size < num_ssa_names)
6621     {
6622       data->version_info_size = 2 * num_ssa_names;
6623       free (data->version_info);
6624       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6625     }
6626
6627   data->max_inv_id = 0;
6628
6629   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6630     SET_DECL_RTL (obj, NULL_RTX);
6631
6632   decl_rtl_to_reset.truncate (0);
6633
6634   htab_empty (data->inv_expr_tab);
6635   data->inv_expr_id = 0;
6636 }
6637
6638 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6639    loop tree.  */
6640
6641 static void
6642 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6643 {
6644   free_loop_data (data);
6645   free (data->version_info);
6646   BITMAP_FREE (data->relevant);
6647   BITMAP_FREE (data->important_candidates);
6648
6649   decl_rtl_to_reset.release ();
6650   data->iv_uses.release ();
6651   data->iv_candidates.release ();
6652   htab_delete (data->inv_expr_tab);
6653 }
6654
6655 /* Returns true if the loop body BODY includes any function calls.  */
6656
6657 static bool
6658 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6659 {
6660   gimple_stmt_iterator gsi;
6661   unsigned i;
6662
6663   for (i = 0; i < num_nodes; i++)
6664     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6665       {
6666         gimple stmt = gsi_stmt (gsi);
6667         if (is_gimple_call (stmt)
6668             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6669           return true;
6670       }
6671   return false;
6672 }
6673
6674 /* Optimizes the LOOP.  Returns true if anything changed.  */
6675
6676 static bool
6677 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6678 {
6679   bool changed = false;
6680   struct iv_ca *iv_ca;
6681   edge exit = single_dom_exit (loop);
6682   basic_block *body;
6683
6684   gcc_assert (!data->niters);
6685   data->current_loop = loop;
6686   data->speed = optimize_loop_for_speed_p (loop);
6687
6688   if (dump_file && (dump_flags & TDF_DETAILS))
6689     {
6690       fprintf (dump_file, "Processing loop %d\n", loop->num);
6691
6692       if (exit)
6693         {
6694           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6695                    exit->src->index, exit->dest->index);
6696           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6697           fprintf (dump_file, "\n");
6698         }
6699
6700       fprintf (dump_file, "\n");
6701     }
6702
6703   body = get_loop_body (loop);
6704   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6705   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6706   free (body);
6707
6708   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6709
6710   /* For each ssa name determines whether it behaves as an induction variable
6711      in some loop.  */
6712   if (!find_induction_variables (data))
6713     goto finish;
6714
6715   /* Finds interesting uses (item 1).  */
6716   find_interesting_uses (data);
6717   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6718     goto finish;
6719
6720   /* Finds candidates for the induction variables (item 2).  */
6721   find_iv_candidates (data);
6722
6723   /* Calculates the costs (item 3, part 1).  */
6724   determine_iv_costs (data);
6725   determine_use_iv_costs (data);
6726   determine_set_costs (data);
6727
6728   /* Find the optimal set of induction variables (item 3, part 2).  */
6729   iv_ca = find_optimal_iv_set (data);
6730   if (!iv_ca)
6731     goto finish;
6732   changed = true;
6733
6734   /* Create the new induction variables (item 4, part 1).  */
6735   create_new_ivs (data, iv_ca);
6736   iv_ca_free (&iv_ca);
6737
6738   /* Rewrite the uses (item 4, part 2).  */
6739   rewrite_uses (data);
6740
6741   /* Remove the ivs that are unused after rewriting.  */
6742   remove_unused_ivs (data);
6743
6744   /* We have changed the structure of induction variables; it might happen
6745      that definitions in the scev database refer to some of them that were
6746      eliminated.  */
6747   scev_reset ();
6748
6749 finish:
6750   free_loop_data (data);
6751
6752   return changed;
6753 }
6754
6755 /* Main entry point.  Optimizes induction variables in loops.  */
6756
6757 void
6758 tree_ssa_iv_optimize (void)
6759 {
6760   struct loop *loop;
6761   struct ivopts_data data;
6762   loop_iterator li;
6763
6764   tree_ssa_iv_optimize_init (&data);
6765
6766   /* Optimize the loops starting with the innermost ones.  */
6767   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6768     {
6769       if (dump_file && (dump_flags & TDF_DETAILS))
6770         flow_loop_dump (loop, dump_file, NULL, 1);
6771
6772       tree_ssa_iv_optimize_loop (&data, loop);
6773     }
6774
6775   tree_ssa_iv_optimize_finalize (&data);
6776 }