gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "stor-layout.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "gimple-pretty-print.h"
  73 #include "hash-map.h"
  74 #include "hash-table.h"
  75 #include "tree-ssa-alias.h"
  76 #include "internal-fn.h"
  77 #include "tree-eh.h"
  78 #include "gimple-expr.h"
  79 #include "is-a.h"
  80 #include "gimple.h"
  81 #include "gimplify.h"
  82 #include "gimple-iterator.h"
  83 #include "gimplify-me.h"
  84 #include "gimple-ssa.h"
  85 #include "cgraph.h"
  86 #include "tree-cfg.h"
  87 #include "tree-phinodes.h"
  88 #include "ssa-iterators.h"
  89 #include "stringpool.h"
  90 #include "tree-ssanames.h"
  91 #include "tree-ssa-loop-ivopts.h"
  92 #include "tree-ssa-loop-manip.h"
  93 #include "tree-ssa-loop-niter.h"
  94 #include "tree-ssa-loop.h"
  95 #include "expr.h"
  96 #include "tree-dfa.h"
  97 #include "tree-ssa.h"
  98 #include "cfgloop.h"
  99 #include "tree-pass.h"
 100 #include "insn-config.h"
 101 #include "tree-chrec.h"
 102 #include "tree-scalar-evolution.h"
 103 #include "cfgloop.h"
 104 #include "params.h"
 105 #include "langhooks.h"
 106 #include "tree-affine.h"
 107 #include "target.h"
 108 #include "tree-inline.h"
 109 #include "tree-ssa-propagate.h"
 110 #include "expmed.h"
 111 #include "tree-ssa-address.h"
 112 #include "builtins.h"
 113
 114 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 115    cost of different addressing modes.  This should be moved to a TBD
 116    interface between the GIMPLE and RTL worlds.  */
 117 #include "expr.h"
 118 #include "recog.h"
 119
 120 /* The infinite cost.  */
 121 #define INFTY 10000000
 122
 123 #define AVG_LOOP_NITER(LOOP) 5
 124
 125 /* Returns the expected number of loop iterations for LOOP.
 126    The average trip count is computed from profile data if it
 127    exists. */
 128
 129 static inline HOST_WIDE_INT
 130 avg_loop_niter (struct loop *loop)
 131 {
 132   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 133   if (niter == -1)
 134     return AVG_LOOP_NITER (loop);
 135
 136   return niter;
 137 }
 138
 139 /* Representation of the induction variable.  */
 140 struct iv
 141 {
 142   tree base;            /* Initial value of the iv.  */
 143   tree base_object;     /* A memory object to that the induction variable points.  */
 144   tree step;            /* Step of the iv (constant only).  */
 145   tree ssa_name;        /* The ssa name with the value.  */
 146   bool biv_p;           /* Is it a biv?  */
 147   bool have_use_for;    /* Do we already have a use for it?  */
 148   unsigned use_id;      /* The identifier in the use if it is the case.  */
 149 };
 150
 151 /* Per-ssa version information (induction variable descriptions, etc.).  */
 152 struct version_info
 153 {
 154   tree name;            /* The ssa name.  */
 155   struct iv *iv;        /* Induction variable description.  */
 156   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 157                            an expression that is not an induction variable.  */
 158   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 159   unsigned inv_id;      /* Id of an invariant.  */
 160 };
 161
 162 /* Types of uses.  */
 163 enum use_type
 164 {
 165   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 166   USE_ADDRESS,          /* Use in an address.  */
 167   USE_COMPARE           /* Use is a compare.  */
 168 };
 169
 170 /* Cost of a computation.  */
 171 typedef struct
 172 {
 173   int cost;             /* The runtime cost.  */
 174   unsigned complexity;  /* The estimate of the complexity of the code for
 175                            the computation (in no concrete units --
 176                            complexity field should be larger for more
 177                            complex expressions and addressing modes).  */
 178 } comp_cost;
 179
 180 static const comp_cost no_cost = {0, 0};
 181 static const comp_cost infinite_cost = {INFTY, INFTY};
 182
 183 /* The candidate - cost pair.  */
 184 struct cost_pair
 185 {
 186   struct iv_cand *cand; /* The candidate.  */
 187   comp_cost cost;       /* The cost.  */
 188   bitmap depends_on;    /* The list of invariants that have to be
 189                            preserved.  */
 190   tree value;           /* For final value elimination, the expression for
 191                            the final value of the iv.  For iv elimination,
 192                            the new bound to compare with.  */
 193   enum tree_code comp;  /* For iv elimination, the comparison.  */
 194   int inv_expr_id;      /* Loop invariant expression id.  */
 195 };
 196
 197 /* Use.  */
 198 struct iv_use
 199 {
 200   unsigned id;          /* The id of the use.  */
 201   enum use_type type;   /* Type of the use.  */
 202   struct iv *iv;        /* The induction variable it is based on.  */
 203   gimple stmt;          /* Statement in that it occurs.  */
 204   tree *op_p;           /* The place where it occurs.  */
 205   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 206                            important ones.  */
 207
 208   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 209   struct cost_pair *cost_map;
 210                         /* The costs wrto the iv candidates.  */
 211
 212   struct iv_cand *selected;
 213                         /* The selected candidate.  */
 214 };
 215
 216 /* The position where the iv is computed.  */
 217 enum iv_position
 218 {
 219   IP_NORMAL,            /* At the end, just before the exit condition.  */
 220   IP_END,               /* At the end of the latch block.  */
 221   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 222   IP_AFTER_USE,         /* Immediately after a specific use.  */
 223   IP_ORIGINAL           /* The original biv.  */
 224 };
 225
 226 /* The induction variable candidate.  */
 227 struct iv_cand
 228 {
 229   unsigned id;          /* The number of the candidate.  */
 230   bool important;       /* Whether this is an "important" candidate, i.e. such
 231                            that it should be considered by all uses.  */
 232   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 233   gimple incremented_at;/* For original biv, the statement where it is
 234                            incremented.  */
 235   tree var_before;      /* The variable used for it before increment.  */
 236   tree var_after;       /* The variable used for it after increment.  */
 237   struct iv *iv;        /* The value of the candidate.  NULL for
 238                            "pseudocandidate" used to indicate the possibility
 239                            to replace the final value of an iv by direct
 240                            computation of the value.  */
 241   unsigned cost;        /* Cost of the candidate.  */
 242   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 243   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 244                               where it is incremented.  */
 245   bitmap depends_on;    /* The list of invariants that are used in step of the
 246                            biv.  */
 247 };
 248
 249 /* Loop invariant expression hashtable entry.  */
 250 struct iv_inv_expr_ent
 251 {
 252   tree expr;
 253   int id;
 254   hashval_t hash;
 255 };
 256
 257 /* The data used by the induction variable optimizations.  */
 258
 259 typedef struct iv_use *iv_use_p;
 260
 261 typedef struct iv_cand *iv_cand_p;
 262
 263 /* Hashtable helpers.  */
 264
 265 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 266 {
 267   typedef iv_inv_expr_ent value_type;
 268   typedef iv_inv_expr_ent compare_type;
 269   static inline hashval_t hash (const value_type *);
 270   static inline bool equal (const value_type *, const compare_type *);
 271 };
 272
 273 /* Hash function for loop invariant expressions.  */
 274
 275 inline hashval_t
 276 iv_inv_expr_hasher::hash (const value_type *expr)
 277 {
 278   return expr->hash;
 279 }
 280
 281 /* Hash table equality function for expressions.  */
 282
 283 inline bool
 284 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 285 {
 286   return expr1->hash == expr2->hash
 287          && operand_equal_p (expr1->expr, expr2->expr, 0);
 288 }
 289
 290 struct ivopts_data
 291 {
 292   /* The currently optimized loop.  */
 293   struct loop *current_loop;
 294
 295   /* Numbers of iterations for all exits of the current loop.  */
 296   hash_map<edge, tree_niter_desc *> *niters;
 297
 298   /* Number of registers used in it.  */
 299   unsigned regs_used;
 300
 301   /* The size of version_info array allocated.  */
 302   unsigned version_info_size;
 303
 304   /* The array of information for the ssa names.  */
 305   struct version_info *version_info;
 306
 307   /* The hashtable of loop invariant expressions created
 308      by ivopt.  */
 309   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 310
 311   /* Loop invariant expression id.  */
 312   int inv_expr_id;
 313
 314   /* The bitmap of indices in version_info whose value was changed.  */
 315   bitmap relevant;
 316
 317   /* The uses of induction variables.  */
 318   vec<iv_use_p> iv_uses;
 319
 320   /* The candidates.  */
 321   vec<iv_cand_p> iv_candidates;
 322
 323   /* A bitmap of important candidates.  */
 324   bitmap important_candidates;
 325
 326   /* The maximum invariant id.  */
 327   unsigned max_inv_id;
 328
 329   /* Whether to consider just related and important candidates when replacing a
 330      use.  */
 331   bool consider_all_candidates;
 332
 333   /* Are we optimizing for speed?  */
 334   bool speed;
 335
 336   /* Whether the loop body includes any function calls.  */
 337   bool body_includes_call;
 338
 339   /* Whether the loop body can only be exited via single exit.  */
 340   bool loop_single_exit_p;
 341 };
 342
 343 /* An assignment of iv candidates to uses.  */
 344
 345 struct iv_ca
 346 {
 347   /* The number of uses covered by the assignment.  */
 348   unsigned upto;
 349
 350   /* Number of uses that cannot be expressed by the candidates in the set.  */
 351   unsigned bad_uses;
 352
 353   /* Candidate assigned to a use, together with the related costs.  */
 354   struct cost_pair **cand_for_use;
 355
 356   /* Number of times each candidate is used.  */
 357   unsigned *n_cand_uses;
 358
 359   /* The candidates used.  */
 360   bitmap cands;
 361
 362   /* The number of candidates in the set.  */
 363   unsigned n_cands;
 364
 365   /* Total number of registers needed.  */
 366   unsigned n_regs;
 367
 368   /* Total cost of expressing uses.  */
 369   comp_cost cand_use_cost;
 370
 371   /* Total cost of candidates.  */
 372   unsigned cand_cost;
 373
 374   /* Number of times each invariant is used.  */
 375   unsigned *n_invariant_uses;
 376
 377   /* The array holding the number of uses of each loop
 378      invariant expressions created by ivopt.  */
 379   unsigned *used_inv_expr;
 380
 381   /* The number of created loop invariants.  */
 382   unsigned num_used_inv_expr;
 383
 384   /* Total cost of the assignment.  */
 385   comp_cost cost;
 386 };
 387
 388 /* Difference of two iv candidate assignments.  */
 389
 390 struct iv_ca_delta
 391 {
 392   /* Changed use.  */
 393   struct iv_use *use;
 394
 395   /* An old assignment (for rollback purposes).  */
 396   struct cost_pair *old_cp;
 397
 398   /* A new assignment.  */
 399   struct cost_pair *new_cp;
 400
 401   /* Next change in the list.  */
 402   struct iv_ca_delta *next_change;
 403 };
 404
 405 /* Bound on number of candidates below that all candidates are considered.  */
 406
 407 #define CONSIDER_ALL_CANDIDATES_BOUND \
 408   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 409
 410 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 411    optimizing such a loop would help, and it would take ages).  */
 412
 413 #define MAX_CONSIDERED_USES \
 414   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 415
 416 /* If there are at most this number of ivs in the set, try removing unnecessary
 417    ivs from the set always.  */
 418
 419 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 420   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 421
 422 /* The list of trees for that the decl_rtl field must be reset is stored
 423    here.  */
 424
 425 static vec<tree> decl_rtl_to_reset;
 426
 427 static comp_cost force_expr_to_var_cost (tree, bool);
 428
 429 /* Number of uses recorded in DATA.  */
 430
 431 static inline unsigned
 432 n_iv_uses (struct ivopts_data *data)
 433 {
 434   return data->iv_uses.length ();
 435 }
 436
 437 /* Ith use recorded in DATA.  */
 438
 439 static inline struct iv_use *
 440 iv_use (struct ivopts_data *data, unsigned i)
 441 {
 442   return data->iv_uses[i];
 443 }
 444
 445 /* Number of candidates recorded in DATA.  */
 446
 447 static inline unsigned
 448 n_iv_cands (struct ivopts_data *data)
 449 {
 450   return data->iv_candidates.length ();
 451 }
 452
 453 /* Ith candidate recorded in DATA.  */
 454
 455 static inline struct iv_cand *
 456 iv_cand (struct ivopts_data *data, unsigned i)
 457 {
 458   return data->iv_candidates[i];
 459 }
 460
 461 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 462
 463 edge
 464 single_dom_exit (struct loop *loop)
 465 {
 466   edge exit = single_exit (loop);
 467
 468   if (!exit)
 469     return NULL;
 470
 471   if (!just_once_each_iteration_p (loop, exit->src))
 472     return NULL;
 473
 474   return exit;
 475 }
 476
 477 /* Dumps information about the induction variable IV to FILE.  */
 478
 479 void
 480 dump_iv (FILE *file, struct iv *iv)
 481 {
 482   if (iv->ssa_name)
 483     {
 484       fprintf (file, "ssa name ");
 485       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 486       fprintf (file, "\n");
 487     }
 488
 489   fprintf (file, "  type ");
 490   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 491   fprintf (file, "\n");
 492
 493   if (iv->step)
 494     {
 495       fprintf (file, "  base ");
 496       print_generic_expr (file, iv->base, TDF_SLIM);
 497       fprintf (file, "\n");
 498
 499       fprintf (file, "  step ");
 500       print_generic_expr (file, iv->step, TDF_SLIM);
 501       fprintf (file, "\n");
 502     }
 503   else
 504     {
 505       fprintf (file, "  invariant ");
 506       print_generic_expr (file, iv->base, TDF_SLIM);
 507       fprintf (file, "\n");
 508     }
 509
 510   if (iv->base_object)
 511     {
 512       fprintf (file, "  base object ");
 513       print_generic_expr (file, iv->base_object, TDF_SLIM);
 514       fprintf (file, "\n");
 515     }
 516
 517   if (iv->biv_p)
 518     fprintf (file, "  is a biv\n");
 519 }
 520
 521 /* Dumps information about the USE to FILE.  */
 522
 523 void
 524 dump_use (FILE *file, struct iv_use *use)
 525 {
 526   fprintf (file, "use %d\n", use->id);
 527
 528   switch (use->type)
 529     {
 530     case USE_NONLINEAR_EXPR:
 531       fprintf (file, "  generic\n");
 532       break;
 533
 534     case USE_ADDRESS:
 535       fprintf (file, "  address\n");
 536       break;
 537
 538     case USE_COMPARE:
 539       fprintf (file, "  compare\n");
 540       break;
 541
 542     default:
 543       gcc_unreachable ();
 544     }
 545
 546   fprintf (file, "  in statement ");
 547   print_gimple_stmt (file, use->stmt, 0, 0);
 548   fprintf (file, "\n");
 549
 550   fprintf (file, "  at position ");
 551   if (use->op_p)
 552     print_generic_expr (file, *use->op_p, TDF_SLIM);
 553   fprintf (file, "\n");
 554
 555   dump_iv (file, use->iv);
 556
 557   if (use->related_cands)
 558     {
 559       fprintf (file, "  related candidates ");
 560       dump_bitmap (file, use->related_cands);
 561     }
 562 }
 563
 564 /* Dumps information about the uses to FILE.  */
 565
 566 void
 567 dump_uses (FILE *file, struct ivopts_data *data)
 568 {
 569   unsigned i;
 570   struct iv_use *use;
 571
 572   for (i = 0; i < n_iv_uses (data); i++)
 573     {
 574       use = iv_use (data, i);
 575
 576       dump_use (file, use);
 577       fprintf (file, "\n");
 578     }
 579 }
 580
 581 /* Dumps information about induction variable candidate CAND to FILE.  */
 582
 583 void
 584 dump_cand (FILE *file, struct iv_cand *cand)
 585 {
 586   struct iv *iv = cand->iv;
 587
 588   fprintf (file, "candidate %d%s\n",
 589            cand->id, cand->important ? " (important)" : "");
 590
 591   if (cand->depends_on)
 592     {
 593       fprintf (file, "  depends on ");
 594       dump_bitmap (file, cand->depends_on);
 595     }
 596
 597   if (!iv)
 598     {
 599       fprintf (file, "  final value replacement\n");
 600       return;
 601     }
 602
 603   if (cand->var_before)
 604     {
 605       fprintf (file, "  var_before ");
 606       print_generic_expr (file, cand->var_before, TDF_SLIM);
 607       fprintf (file, "\n");
 608     }
 609   if (cand->var_after)
 610     {
 611       fprintf (file, "  var_after ");
 612       print_generic_expr (file, cand->var_after, TDF_SLIM);
 613       fprintf (file, "\n");
 614     }
 615
 616   switch (cand->pos)
 617     {
 618     case IP_NORMAL:
 619       fprintf (file, "  incremented before exit test\n");
 620       break;
 621
 622     case IP_BEFORE_USE:
 623       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 624       break;
 625
 626     case IP_AFTER_USE:
 627       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 628       break;
 629
 630     case IP_END:
 631       fprintf (file, "  incremented at end\n");
 632       break;
 633
 634     case IP_ORIGINAL:
 635       fprintf (file, "  original biv\n");
 636       break;
 637     }
 638
 639   dump_iv (file, iv);
 640 }
 641
 642 /* Returns the info for ssa version VER.  */
 643
 644 static inline struct version_info *
 645 ver_info (struct ivopts_data *data, unsigned ver)
 646 {
 647   return data->version_info + ver;
 648 }
 649
 650 /* Returns the info for ssa name NAME.  */
 651
 652 static inline struct version_info *
 653 name_info (struct ivopts_data *data, tree name)
 654 {
 655   return ver_info (data, SSA_NAME_VERSION (name));
 656 }
 657
 658 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 659    emitted in LOOP.  */
 660
 661 static bool
 662 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 663 {
 664   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 665
 666   gcc_assert (bb);
 667
 668   if (sbb == loop->latch)
 669     return true;
 670
 671   if (sbb != bb)
 672     return false;
 673
 674   return stmt == last_stmt (bb);
 675 }
 676
 677 /* Returns true if STMT if after the place where the original induction
 678    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 679    if the positions are identical.  */
 680
 681 static bool
 682 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 683 {
 684   basic_block cand_bb = gimple_bb (cand->incremented_at);
 685   basic_block stmt_bb = gimple_bb (stmt);
 686
 687   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 688     return false;
 689
 690   if (stmt_bb != cand_bb)
 691     return true;
 692
 693   if (true_if_equal
 694       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 695     return true;
 696   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 697 }
 698
 699 /* Returns true if STMT if after the place where the induction variable
 700    CAND is incremented in LOOP.  */
 701
 702 static bool
 703 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 704 {
 705   switch (cand->pos)
 706     {
 707     case IP_END:
 708       return false;
 709
 710     case IP_NORMAL:
 711       return stmt_after_ip_normal_pos (loop, stmt);
 712
 713     case IP_ORIGINAL:
 714     case IP_AFTER_USE:
 715       return stmt_after_inc_pos (cand, stmt, false);
 716
 717     case IP_BEFORE_USE:
 718       return stmt_after_inc_pos (cand, stmt, true);
 719
 720     default:
 721       gcc_unreachable ();
 722     }
 723 }
 724
 725 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 726
 727 static bool
 728 abnormal_ssa_name_p (tree exp)
 729 {
 730   if (!exp)
 731     return false;
 732
 733   if (TREE_CODE (exp) != SSA_NAME)
 734     return false;
 735
 736   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 737 }
 738
 739 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 740    abnormal phi node.  Callback for for_each_index.  */
 741
 742 static bool
 743 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 744                                   void *data ATTRIBUTE_UNUSED)
 745 {
 746   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 747     {
 748       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 749         return false;
 750       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 751         return false;
 752     }
 753
 754   return !abnormal_ssa_name_p (*index);
 755 }
 756
 757 /* Returns true if EXPR contains a ssa name that occurs in an
 758    abnormal phi node.  */
 759
 760 bool
 761 contains_abnormal_ssa_name_p (tree expr)
 762 {
 763   enum tree_code code;
 764   enum tree_code_class codeclass;
 765
 766   if (!expr)
 767     return false;
 768
 769   code = TREE_CODE (expr);
 770   codeclass = TREE_CODE_CLASS (code);
 771
 772   if (code == SSA_NAME)
 773     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 774
 775   if (code == INTEGER_CST
 776       || is_gimple_min_invariant (expr))
 777     return false;
 778
 779   if (code == ADDR_EXPR)
 780     return !for_each_index (&TREE_OPERAND (expr, 0),
 781                             idx_contains_abnormal_ssa_name_p,
 782                             NULL);
 783
 784   if (code == COND_EXPR)
 785     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 786       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 787       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 788
 789   switch (codeclass)
 790     {
 791     case tcc_binary:
 792     case tcc_comparison:
 793       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 794         return true;
 795
 796       /* Fallthru.  */
 797     case tcc_unary:
 798       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 799         return true;
 800
 801       break;
 802
 803     default:
 804       gcc_unreachable ();
 805     }
 806
 807   return false;
 808 }
 809
 810 /*  Returns the structure describing number of iterations determined from
 811     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 812
 813 static struct tree_niter_desc *
 814 niter_for_exit (struct ivopts_data *data, edge exit)
 815 {
 816   struct tree_niter_desc *desc;
 817   tree_niter_desc **slot;
 818
 819   if (!data->niters)
 820     {
 821       data->niters = new hash_map<edge, tree_niter_desc *>;
 822       slot = NULL;
 823     }
 824   else
 825     slot = data->niters->get (exit);
 826
 827   if (!slot)
 828     {
 829       /* Try to determine number of iterations.  We cannot safely work with ssa
 830          names that appear in phi nodes on abnormal edges, so that we do not
 831          create overlapping life ranges for them (PR 27283).  */
 832       desc = XNEW (struct tree_niter_desc);
 833       if (!number_of_iterations_exit (data->current_loop,
 834                                       exit, desc, true)
 835           || contains_abnormal_ssa_name_p (desc->niter))
 836         {
 837           XDELETE (desc);
 838           desc = NULL;
 839         }
 840       data->niters->put (exit, desc);
 841     }
 842   else
 843     desc = *slot;
 844
 845   return desc;
 846 }
 847
 848 /* Returns the structure describing number of iterations determined from
 849    single dominating exit of DATA->current_loop, or NULL if something
 850    goes wrong.  */
 851
 852 static struct tree_niter_desc *
 853 niter_for_single_dom_exit (struct ivopts_data *data)
 854 {
 855   edge exit = single_dom_exit (data->current_loop);
 856
 857   if (!exit)
 858     return NULL;
 859
 860   return niter_for_exit (data, exit);
 861 }
 862
 863 /* Initializes data structures used by the iv optimization pass, stored
 864    in DATA.  */
 865
 866 static void
 867 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 868 {
 869   data->version_info_size = 2 * num_ssa_names;
 870   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 871   data->relevant = BITMAP_ALLOC (NULL);
 872   data->important_candidates = BITMAP_ALLOC (NULL);
 873   data->max_inv_id = 0;
 874   data->niters = NULL;
 875   data->iv_uses.create (20);
 876   data->iv_candidates.create (20);
 877   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 878   data->inv_expr_id = 0;
 879   decl_rtl_to_reset.create (20);
 880 }
 881
 882 /* Returns a memory object to that EXPR points.  In case we are able to
 883    determine that it does not point to any such object, NULL is returned.  */
 884
 885 static tree
 886 determine_base_object (tree expr)
 887 {
 888   enum tree_code code = TREE_CODE (expr);
 889   tree base, obj;
 890
 891   /* If this is a pointer casted to any type, we need to determine
 892      the base object for the pointer; so handle conversions before
 893      throwing away non-pointer expressions.  */
 894   if (CONVERT_EXPR_P (expr))
 895     return determine_base_object (TREE_OPERAND (expr, 0));
 896
 897   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 898     return NULL_TREE;
 899
 900   switch (code)
 901     {
 902     case INTEGER_CST:
 903       return NULL_TREE;
 904
 905     case ADDR_EXPR:
 906       obj = TREE_OPERAND (expr, 0);
 907       base = get_base_address (obj);
 908
 909       if (!base)
 910         return expr;
 911
 912       if (TREE_CODE (base) == MEM_REF)
 913         return determine_base_object (TREE_OPERAND (base, 0));
 914
 915       return fold_convert (ptr_type_node,
 916                            build_fold_addr_expr (base));
 917
 918     case POINTER_PLUS_EXPR:
 919       return determine_base_object (TREE_OPERAND (expr, 0));
 920
 921     case PLUS_EXPR:
 922     case MINUS_EXPR:
 923       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 924       gcc_unreachable ();
 925
 926     default:
 927       return fold_convert (ptr_type_node, expr);
 928     }
 929 }
 930
 931 /* Return true if address expression with non-DECL_P operand appears
 932    in EXPR.  */
 933
 934 static bool
 935 contain_complex_addr_expr (tree expr)
 936 {
 937   bool res = false;
 938
 939   STRIP_NOPS (expr);
 940   switch (TREE_CODE (expr))
 941     {
 942     case POINTER_PLUS_EXPR:
 943     case PLUS_EXPR:
 944     case MINUS_EXPR:
 945       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 946       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 947       break;
 948
 949     case ADDR_EXPR:
 950       return (!DECL_P (TREE_OPERAND (expr, 0)));
 951
 952     default:
 953       return false;
 954     }
 955
 956   return res;
 957 }
 958
 959 /* Allocates an induction variable with given initial value BASE and step STEP
 960    for loop LOOP.  */
 961
 962 static struct iv *
 963 alloc_iv (tree base, tree step)
 964 {
 965   tree expr = base;
 966   struct iv *iv = XCNEW (struct iv);
 967   gcc_assert (step != NULL_TREE);
 968
 969   /* Lower address expression in base except ones with DECL_P as operand.
 970      By doing this:
 971        1) More accurate cost can be computed for address expressions;
 972        2) Duplicate candidates won't be created for bases in different
 973           forms, like &a[0] and &a.  */
 974   STRIP_NOPS (expr);
 975   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
 976       || contain_complex_addr_expr (expr))
 977     {
 978       aff_tree comb;
 979       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
 980       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
 981     }
 982
 983   iv->base = base;
 984   iv->base_object = determine_base_object (base);
 985   iv->step = step;
 986   iv->biv_p = false;
 987   iv->have_use_for = false;
 988   iv->use_id = 0;
 989   iv->ssa_name = NULL_TREE;
 990
 991   return iv;
 992 }
 993
 994 /* Sets STEP and BASE for induction variable IV.  */
 995
 996 static void
 997 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 998 {
 999   struct version_info *info = name_info (data, iv);
1000
1001   gcc_assert (!info->iv);
1002
1003   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1004   info->iv = alloc_iv (base, step);
1005   info->iv->ssa_name = iv;
1006 }
1007
1008 /* Finds induction variable declaration for VAR.  */
1009
1010 static struct iv *
1011 get_iv (struct ivopts_data *data, tree var)
1012 {
1013   basic_block bb;
1014   tree type = TREE_TYPE (var);
1015
1016   if (!POINTER_TYPE_P (type)
1017       && !INTEGRAL_TYPE_P (type))
1018     return NULL;
1019
1020   if (!name_info (data, var)->iv)
1021     {
1022       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1023
1024       if (!bb
1025           || !flow_bb_inside_loop_p (data->current_loop, bb))
1026         set_iv (data, var, var, build_int_cst (type, 0));
1027     }
1028
1029   return name_info (data, var)->iv;
1030 }
1031
1032 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1033    not define a simple affine biv with nonzero step.  */
1034
1035 static tree
1036 determine_biv_step (gimple phi)
1037 {
1038   struct loop *loop = gimple_bb (phi)->loop_father;
1039   tree name = PHI_RESULT (phi);
1040   affine_iv iv;
1041
1042   if (virtual_operand_p (name))
1043     return NULL_TREE;
1044
1045   if (!simple_iv (loop, loop, name, &iv, true))
1046     return NULL_TREE;
1047
1048   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1049 }
1050
1051 /* Finds basic ivs.  */
1052
1053 static bool
1054 find_bivs (struct ivopts_data *data)
1055 {
1056   gimple phi;
1057   tree step, type, base;
1058   bool found = false;
1059   struct loop *loop = data->current_loop;
1060   gimple_stmt_iterator psi;
1061
1062   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1063     {
1064       phi = gsi_stmt (psi);
1065
1066       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1067         continue;
1068
1069       step = determine_biv_step (phi);
1070       if (!step)
1071         continue;
1072
1073       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1074       base = expand_simple_operations (base);
1075       if (contains_abnormal_ssa_name_p (base)
1076           || contains_abnormal_ssa_name_p (step))
1077         continue;
1078
1079       type = TREE_TYPE (PHI_RESULT (phi));
1080       base = fold_convert (type, base);
1081       if (step)
1082         {
1083           if (POINTER_TYPE_P (type))
1084             step = convert_to_ptrofftype (step);
1085           else
1086             step = fold_convert (type, step);
1087         }
1088
1089       set_iv (data, PHI_RESULT (phi), base, step);
1090       found = true;
1091     }
1092
1093   return found;
1094 }
1095
1096 /* Marks basic ivs.  */
1097
1098 static void
1099 mark_bivs (struct ivopts_data *data)
1100 {
1101   gimple phi, def;
1102   tree var;
1103   struct iv *iv, *incr_iv;
1104   struct loop *loop = data->current_loop;
1105   basic_block incr_bb;
1106   gimple_stmt_iterator psi;
1107
1108   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1109     {
1110       phi = gsi_stmt (psi);
1111
1112       iv = get_iv (data, PHI_RESULT (phi));
1113       if (!iv)
1114         continue;
1115
1116       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1117       def = SSA_NAME_DEF_STMT (var);
1118       /* Don't mark iv peeled from other one as biv.  */
1119       if (def
1120           && gimple_code (def) == GIMPLE_PHI
1121           && gimple_bb (def) == loop->header)
1122         continue;
1123
1124       incr_iv = get_iv (data, var);
1125       if (!incr_iv)
1126         continue;
1127
1128       /* If the increment is in the subloop, ignore it.  */
1129       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1130       if (incr_bb->loop_father != data->current_loop
1131           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1132         continue;
1133
1134       iv->biv_p = true;
1135       incr_iv->biv_p = true;
1136     }
1137 }
1138
1139 /* Checks whether STMT defines a linear induction variable and stores its
1140    parameters to IV.  */
1141
1142 static bool
1143 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1144 {
1145   tree lhs;
1146   struct loop *loop = data->current_loop;
1147
1148   iv->base = NULL_TREE;
1149   iv->step = NULL_TREE;
1150
1151   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1152     return false;
1153
1154   lhs = gimple_assign_lhs (stmt);
1155   if (TREE_CODE (lhs) != SSA_NAME)
1156     return false;
1157
1158   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1159     return false;
1160   iv->base = expand_simple_operations (iv->base);
1161
1162   if (contains_abnormal_ssa_name_p (iv->base)
1163       || contains_abnormal_ssa_name_p (iv->step))
1164     return false;
1165
1166   /* If STMT could throw, then do not consider STMT as defining a GIV.
1167      While this will suppress optimizations, we can not safely delete this
1168      GIV and associated statements, even if it appears it is not used.  */
1169   if (stmt_could_throw_p (stmt))
1170     return false;
1171
1172   return true;
1173 }
1174
1175 /* Finds general ivs in statement STMT.  */
1176
1177 static void
1178 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1179 {
1180   affine_iv iv;
1181
1182   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1183     return;
1184
1185   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1186 }
1187
1188 /* Finds general ivs in basic block BB.  */
1189
1190 static void
1191 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1192 {
1193   gimple_stmt_iterator bsi;
1194
1195   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1196     find_givs_in_stmt (data, gsi_stmt (bsi));
1197 }
1198
1199 /* Finds general ivs.  */
1200
1201 static void
1202 find_givs (struct ivopts_data *data)
1203 {
1204   struct loop *loop = data->current_loop;
1205   basic_block *body = get_loop_body_in_dom_order (loop);
1206   unsigned i;
1207
1208   for (i = 0; i < loop->num_nodes; i++)
1209     find_givs_in_bb (data, body[i]);
1210   free (body);
1211 }
1212
1213 /* For each ssa name defined in LOOP determines whether it is an induction
1214    variable and if so, its initial value and step.  */
1215
1216 static bool
1217 find_induction_variables (struct ivopts_data *data)
1218 {
1219   unsigned i;
1220   bitmap_iterator bi;
1221
1222   if (!find_bivs (data))
1223     return false;
1224
1225   find_givs (data);
1226   mark_bivs (data);
1227
1228   if (dump_file && (dump_flags & TDF_DETAILS))
1229     {
1230       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1231
1232       if (niter)
1233         {
1234           fprintf (dump_file, "  number of iterations ");
1235           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1236           if (!integer_zerop (niter->may_be_zero))
1237             {
1238               fprintf (dump_file, "; zero if ");
1239               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1240             }
1241           fprintf (dump_file, "\n\n");
1242         };
1243
1244       fprintf (dump_file, "Induction variables:\n\n");
1245
1246       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1247         {
1248           if (ver_info (data, i)->iv)
1249             dump_iv (dump_file, ver_info (data, i)->iv);
1250         }
1251     }
1252
1253   return true;
1254 }
1255
1256 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1257
1258 static struct iv_use *
1259 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1260             gimple stmt, enum use_type use_type)
1261 {
1262   struct iv_use *use = XCNEW (struct iv_use);
1263
1264   use->id = n_iv_uses (data);
1265   use->type = use_type;
1266   use->iv = iv;
1267   use->stmt = stmt;
1268   use->op_p = use_p;
1269   use->related_cands = BITMAP_ALLOC (NULL);
1270
1271   /* To avoid showing ssa name in the dumps, if it was not reset by the
1272      caller.  */
1273   iv->ssa_name = NULL_TREE;
1274
1275   if (dump_file && (dump_flags & TDF_DETAILS))
1276     dump_use (dump_file, use);
1277
1278   data->iv_uses.safe_push (use);
1279
1280   return use;
1281 }
1282
1283 /* Checks whether OP is a loop-level invariant and if so, records it.
1284    NONLINEAR_USE is true if the invariant is used in a way we do not
1285    handle specially.  */
1286
1287 static void
1288 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1289 {
1290   basic_block bb;
1291   struct version_info *info;
1292
1293   if (TREE_CODE (op) != SSA_NAME
1294       || virtual_operand_p (op))
1295     return;
1296
1297   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1298   if (bb
1299       && flow_bb_inside_loop_p (data->current_loop, bb))
1300     return;
1301
1302   info = name_info (data, op);
1303   info->name = op;
1304   info->has_nonlin_use |= nonlinear_use;
1305   if (!info->inv_id)
1306     info->inv_id = ++data->max_inv_id;
1307   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1308 }
1309
1310 /* Checks whether the use OP is interesting and if so, records it.  */
1311
1312 static struct iv_use *
1313 find_interesting_uses_op (struct ivopts_data *data, tree op)
1314 {
1315   struct iv *iv;
1316   struct iv *civ;
1317   gimple stmt;
1318   struct iv_use *use;
1319
1320   if (TREE_CODE (op) != SSA_NAME)
1321     return NULL;
1322
1323   iv = get_iv (data, op);
1324   if (!iv)
1325     return NULL;
1326
1327   if (iv->have_use_for)
1328     {
1329       use = iv_use (data, iv->use_id);
1330
1331       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1332       return use;
1333     }
1334
1335   if (integer_zerop (iv->step))
1336     {
1337       record_invariant (data, op, true);
1338       return NULL;
1339     }
1340   iv->have_use_for = true;
1341
1342   civ = XNEW (struct iv);
1343   *civ = *iv;
1344
1345   stmt = SSA_NAME_DEF_STMT (op);
1346   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1347               || is_gimple_assign (stmt));
1348
1349   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1350   iv->use_id = use->id;
1351
1352   return use;
1353 }
1354
1355 /* Given a condition in statement STMT, checks whether it is a compare
1356    of an induction variable and an invariant.  If this is the case,
1357    CONTROL_VAR is set to location of the iv, BOUND to the location of
1358    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1359    induction variable descriptions, and true is returned.  If this is not
1360    the case, CONTROL_VAR and BOUND are set to the arguments of the
1361    condition and false is returned.  */
1362
1363 static bool
1364 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1365                        tree **control_var, tree **bound,
1366                        struct iv **iv_var, struct iv **iv_bound)
1367 {
1368   /* The objects returned when COND has constant operands.  */
1369   static struct iv const_iv;
1370   static tree zero;
1371   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1372   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1373   bool ret = false;
1374
1375   if (gimple_code (stmt) == GIMPLE_COND)
1376     {
1377       op0 = gimple_cond_lhs_ptr (stmt);
1378       op1 = gimple_cond_rhs_ptr (stmt);
1379     }
1380   else
1381     {
1382       op0 = gimple_assign_rhs1_ptr (stmt);
1383       op1 = gimple_assign_rhs2_ptr (stmt);
1384     }
1385
1386   zero = integer_zero_node;
1387   const_iv.step = integer_zero_node;
1388
1389   if (TREE_CODE (*op0) == SSA_NAME)
1390     iv0 = get_iv (data, *op0);
1391   if (TREE_CODE (*op1) == SSA_NAME)
1392     iv1 = get_iv (data, *op1);
1393
1394   /* Exactly one of the compared values must be an iv, and the other one must
1395      be an invariant.  */
1396   if (!iv0 || !iv1)
1397     goto end;
1398
1399   if (integer_zerop (iv0->step))
1400     {
1401       /* Control variable may be on the other side.  */
1402       tmp_op = op0; op0 = op1; op1 = tmp_op;
1403       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1404     }
1405   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1406
1407 end:
1408   if (control_var)
1409     *control_var = op0;;
1410   if (iv_var)
1411     *iv_var = iv0;;
1412   if (bound)
1413     *bound = op1;
1414   if (iv_bound)
1415     *iv_bound = iv1;
1416
1417   return ret;
1418 }
1419
1420 /* Checks whether the condition in STMT is interesting and if so,
1421    records it.  */
1422
1423 static void
1424 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1425 {
1426   tree *var_p, *bound_p;
1427   struct iv *var_iv, *civ;
1428
1429   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1430     {
1431       find_interesting_uses_op (data, *var_p);
1432       find_interesting_uses_op (data, *bound_p);
1433       return;
1434     }
1435
1436   civ = XNEW (struct iv);
1437   *civ = *var_iv;
1438   record_use (data, NULL, civ, stmt, USE_COMPARE);
1439 }
1440
1441 /* Returns the outermost loop EXPR is obviously invariant in
1442    relative to the loop LOOP, i.e. if all its operands are defined
1443    outside of the returned loop.  Returns NULL if EXPR is not
1444    even obviously invariant in LOOP.  */
1445
1446 struct loop *
1447 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1448 {
1449   basic_block def_bb;
1450   unsigned i, len;
1451
1452   if (is_gimple_min_invariant (expr))
1453     return current_loops->tree_root;
1454
1455   if (TREE_CODE (expr) == SSA_NAME)
1456     {
1457       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1458       if (def_bb)
1459         {
1460           if (flow_bb_inside_loop_p (loop, def_bb))
1461             return NULL;
1462           return superloop_at_depth (loop,
1463                                      loop_depth (def_bb->loop_father) + 1);
1464         }
1465
1466       return current_loops->tree_root;
1467     }
1468
1469   if (!EXPR_P (expr))
1470     return NULL;
1471
1472   unsigned maxdepth = 0;
1473   len = TREE_OPERAND_LENGTH (expr);
1474   for (i = 0; i < len; i++)
1475     {
1476       struct loop *ivloop;
1477       if (!TREE_OPERAND (expr, i))
1478         continue;
1479
1480       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1481       if (!ivloop)
1482         return NULL;
1483       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1484     }
1485
1486   return superloop_at_depth (loop, maxdepth);
1487 }
1488
1489 /* Returns true if expression EXPR is obviously invariant in LOOP,
1490    i.e. if all its operands are defined outside of the LOOP.  LOOP
1491    should not be the function body.  */
1492
1493 bool
1494 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1495 {
1496   basic_block def_bb;
1497   unsigned i, len;
1498
1499   gcc_assert (loop_depth (loop) > 0);
1500
1501   if (is_gimple_min_invariant (expr))
1502     return true;
1503
1504   if (TREE_CODE (expr) == SSA_NAME)
1505     {
1506       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1507       if (def_bb
1508           && flow_bb_inside_loop_p (loop, def_bb))
1509         return false;
1510
1511       return true;
1512     }
1513
1514   if (!EXPR_P (expr))
1515     return false;
1516
1517   len = TREE_OPERAND_LENGTH (expr);
1518   for (i = 0; i < len; i++)
1519     if (TREE_OPERAND (expr, i)
1520         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1521       return false;
1522
1523   return true;
1524 }
1525
1526 /* Cumulates the steps of indices into DATA and replaces their values with the
1527    initial ones.  Returns false when the value of the index cannot be determined.
1528    Callback for for_each_index.  */
1529
1530 struct ifs_ivopts_data
1531 {
1532   struct ivopts_data *ivopts_data;
1533   gimple stmt;
1534   tree step;
1535 };
1536
1537 static bool
1538 idx_find_step (tree base, tree *idx, void *data)
1539 {
1540   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1541   struct iv *iv;
1542   tree step, iv_base, iv_step, lbound, off;
1543   struct loop *loop = dta->ivopts_data->current_loop;
1544
1545   /* If base is a component ref, require that the offset of the reference
1546      be invariant.  */
1547   if (TREE_CODE (base) == COMPONENT_REF)
1548     {
1549       off = component_ref_field_offset (base);
1550       return expr_invariant_in_loop_p (loop, off);
1551     }
1552
1553   /* If base is array, first check whether we will be able to move the
1554      reference out of the loop (in order to take its address in strength
1555      reduction).  In order for this to work we need both lower bound
1556      and step to be loop invariants.  */
1557   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1558     {
1559       /* Moreover, for a range, the size needs to be invariant as well.  */
1560       if (TREE_CODE (base) == ARRAY_RANGE_REF
1561           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1562         return false;
1563
1564       step = array_ref_element_size (base);
1565       lbound = array_ref_low_bound (base);
1566
1567       if (!expr_invariant_in_loop_p (loop, step)
1568           || !expr_invariant_in_loop_p (loop, lbound))
1569         return false;
1570     }
1571
1572   if (TREE_CODE (*idx) != SSA_NAME)
1573     return true;
1574
1575   iv = get_iv (dta->ivopts_data, *idx);
1576   if (!iv)
1577     return false;
1578
1579   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1580           *&x[0], which is not folded and does not trigger the
1581           ARRAY_REF path below.  */
1582   *idx = iv->base;
1583
1584   if (integer_zerop (iv->step))
1585     return true;
1586
1587   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1588     {
1589       step = array_ref_element_size (base);
1590
1591       /* We only handle addresses whose step is an integer constant.  */
1592       if (TREE_CODE (step) != INTEGER_CST)
1593         return false;
1594     }
1595   else
1596     /* The step for pointer arithmetics already is 1 byte.  */
1597     step = size_one_node;
1598
1599   iv_base = iv->base;
1600   iv_step = iv->step;
1601   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1602                             sizetype, &iv_base, &iv_step, dta->stmt,
1603                             false))
1604     {
1605       /* The index might wrap.  */
1606       return false;
1607     }
1608
1609   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1610   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1611
1612   return true;
1613 }
1614
1615 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1616    object is passed to it in DATA.  */
1617
1618 static bool
1619 idx_record_use (tree base, tree *idx,
1620                 void *vdata)
1621 {
1622   struct ivopts_data *data = (struct ivopts_data *) vdata;
1623   find_interesting_uses_op (data, *idx);
1624   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1625     {
1626       find_interesting_uses_op (data, array_ref_element_size (base));
1627       find_interesting_uses_op (data, array_ref_low_bound (base));
1628     }
1629   return true;
1630 }
1631
1632 /* If we can prove that TOP = cst * BOT for some constant cst,
1633    store cst to MUL and return true.  Otherwise return false.
1634    The returned value is always sign-extended, regardless of the
1635    signedness of TOP and BOT.  */
1636
1637 static bool
1638 constant_multiple_of (tree top, tree bot, widest_int *mul)
1639 {
1640   tree mby;
1641   enum tree_code code;
1642   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1643   widest_int res, p0, p1;
1644
1645   STRIP_NOPS (top);
1646   STRIP_NOPS (bot);
1647
1648   if (operand_equal_p (top, bot, 0))
1649     {
1650       *mul = 1;
1651       return true;
1652     }
1653
1654   code = TREE_CODE (top);
1655   switch (code)
1656     {
1657     case MULT_EXPR:
1658       mby = TREE_OPERAND (top, 1);
1659       if (TREE_CODE (mby) != INTEGER_CST)
1660         return false;
1661
1662       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1663         return false;
1664
1665       *mul = wi::sext (res * wi::to_widest (mby), precision);
1666       return true;
1667
1668     case PLUS_EXPR:
1669     case MINUS_EXPR:
1670       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1671           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1672         return false;
1673
1674       if (code == MINUS_EXPR)
1675         p1 = -p1;
1676       *mul = wi::sext (p0 + p1, precision);
1677       return true;
1678
1679     case INTEGER_CST:
1680       if (TREE_CODE (bot) != INTEGER_CST)
1681         return false;
1682
1683       p0 = widest_int::from (top, SIGNED);
1684       p1 = widest_int::from (bot, SIGNED);
1685       if (p1 == 0)
1686         return false;
1687       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1688       return res == 0;
1689
1690     default:
1691       return false;
1692     }
1693 }
1694
1695 /* Return true if memory reference REF with step STEP may be unaligned.  */
1696
1697 static bool
1698 may_be_unaligned_p (tree ref, tree step)
1699 {
1700   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1701      thus they are not misaligned.  */
1702   if (TREE_CODE (ref) == TARGET_MEM_REF)
1703     return false;
1704
1705   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1706   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1707     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1708
1709   unsigned HOST_WIDE_INT bitpos;
1710   unsigned int ref_align;
1711   get_object_alignment_1 (ref, &ref_align, &bitpos);
1712   if (ref_align < align
1713       || (bitpos % align) != 0
1714       || (bitpos % BITS_PER_UNIT) != 0)
1715     return true;
1716
1717   unsigned int trailing_zeros = tree_ctz (step);
1718   if (trailing_zeros < HOST_BITS_PER_INT
1719       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1720     return true;
1721
1722   return false;
1723 }
1724
1725 /* Return true if EXPR may be non-addressable.   */
1726
1727 bool
1728 may_be_nonaddressable_p (tree expr)
1729 {
1730   switch (TREE_CODE (expr))
1731     {
1732     case TARGET_MEM_REF:
1733       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1734          target, thus they are always addressable.  */
1735       return false;
1736
1737     case COMPONENT_REF:
1738       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1739              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1740
1741     case VIEW_CONVERT_EXPR:
1742       /* This kind of view-conversions may wrap non-addressable objects
1743          and make them look addressable.  After some processing the
1744          non-addressability may be uncovered again, causing ADDR_EXPRs
1745          of inappropriate objects to be built.  */
1746       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1747           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1748         return true;
1749
1750       /* ... fall through ... */
1751
1752     case ARRAY_REF:
1753     case ARRAY_RANGE_REF:
1754       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1755
1756     CASE_CONVERT:
1757       return true;
1758
1759     default:
1760       break;
1761     }
1762
1763   return false;
1764 }
1765
1766 /* Finds addresses in *OP_P inside STMT.  */
1767
1768 static void
1769 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1770 {
1771   tree base = *op_p, step = size_zero_node;
1772   struct iv *civ;
1773   struct ifs_ivopts_data ifs_ivopts_data;
1774
1775   /* Do not play with volatile memory references.  A bit too conservative,
1776      perhaps, but safe.  */
1777   if (gimple_has_volatile_ops (stmt))
1778     goto fail;
1779
1780   /* Ignore bitfields for now.  Not really something terribly complicated
1781      to handle.  TODO.  */
1782   if (TREE_CODE (base) == BIT_FIELD_REF)
1783     goto fail;
1784
1785   base = unshare_expr (base);
1786
1787   if (TREE_CODE (base) == TARGET_MEM_REF)
1788     {
1789       tree type = build_pointer_type (TREE_TYPE (base));
1790       tree astep;
1791
1792       if (TMR_BASE (base)
1793           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1794         {
1795           civ = get_iv (data, TMR_BASE (base));
1796           if (!civ)
1797             goto fail;
1798
1799           TMR_BASE (base) = civ->base;
1800           step = civ->step;
1801         }
1802       if (TMR_INDEX2 (base)
1803           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1804         {
1805           civ = get_iv (data, TMR_INDEX2 (base));
1806           if (!civ)
1807             goto fail;
1808
1809           TMR_INDEX2 (base) = civ->base;
1810           step = civ->step;
1811         }
1812       if (TMR_INDEX (base)
1813           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1814         {
1815           civ = get_iv (data, TMR_INDEX (base));
1816           if (!civ)
1817             goto fail;
1818
1819           TMR_INDEX (base) = civ->base;
1820           astep = civ->step;
1821
1822           if (astep)
1823             {
1824               if (TMR_STEP (base))
1825                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1826
1827               step = fold_build2 (PLUS_EXPR, type, step, astep);
1828             }
1829         }
1830
1831       if (integer_zerop (step))
1832         goto fail;
1833       base = tree_mem_ref_addr (type, base);
1834     }
1835   else
1836     {
1837       ifs_ivopts_data.ivopts_data = data;
1838       ifs_ivopts_data.stmt = stmt;
1839       ifs_ivopts_data.step = size_zero_node;
1840       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1841           || integer_zerop (ifs_ivopts_data.step))
1842         goto fail;
1843       step = ifs_ivopts_data.step;
1844
1845       /* Check that the base expression is addressable.  This needs
1846          to be done after substituting bases of IVs into it.  */
1847       if (may_be_nonaddressable_p (base))
1848         goto fail;
1849
1850       /* Moreover, on strict alignment platforms, check that it is
1851          sufficiently aligned.  */
1852       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1853         goto fail;
1854
1855       base = build_fold_addr_expr (base);
1856
1857       /* Substituting bases of IVs into the base expression might
1858          have caused folding opportunities.  */
1859       if (TREE_CODE (base) == ADDR_EXPR)
1860         {
1861           tree *ref = &TREE_OPERAND (base, 0);
1862           while (handled_component_p (*ref))
1863             ref = &TREE_OPERAND (*ref, 0);
1864           if (TREE_CODE (*ref) == MEM_REF)
1865             {
1866               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1867                                       TREE_OPERAND (*ref, 0),
1868                                       TREE_OPERAND (*ref, 1));
1869               if (tem)
1870                 *ref = tem;
1871             }
1872         }
1873     }
1874
1875   civ = alloc_iv (base, step);
1876   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1877   return;
1878
1879 fail:
1880   for_each_index (op_p, idx_record_use, data);
1881 }
1882
1883 /* Finds and records invariants used in STMT.  */
1884
1885 static void
1886 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1887 {
1888   ssa_op_iter iter;
1889   use_operand_p use_p;
1890   tree op;
1891
1892   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1893     {
1894       op = USE_FROM_PTR (use_p);
1895       record_invariant (data, op, false);
1896     }
1897 }
1898
1899 /* Finds interesting uses of induction variables in the statement STMT.  */
1900
1901 static void
1902 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1903 {
1904   struct iv *iv;
1905   tree op, *lhs, *rhs;
1906   ssa_op_iter iter;
1907   use_operand_p use_p;
1908   enum tree_code code;
1909
1910   find_invariants_stmt (data, stmt);
1911
1912   if (gimple_code (stmt) == GIMPLE_COND)
1913     {
1914       find_interesting_uses_cond (data, stmt);
1915       return;
1916     }
1917
1918   if (is_gimple_assign (stmt))
1919     {
1920       lhs = gimple_assign_lhs_ptr (stmt);
1921       rhs = gimple_assign_rhs1_ptr (stmt);
1922
1923       if (TREE_CODE (*lhs) == SSA_NAME)
1924         {
1925           /* If the statement defines an induction variable, the uses are not
1926              interesting by themselves.  */
1927
1928           iv = get_iv (data, *lhs);
1929
1930           if (iv && !integer_zerop (iv->step))
1931             return;
1932         }
1933
1934       code = gimple_assign_rhs_code (stmt);
1935       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1936           && (REFERENCE_CLASS_P (*rhs)
1937               || is_gimple_val (*rhs)))
1938         {
1939           if (REFERENCE_CLASS_P (*rhs))
1940             find_interesting_uses_address (data, stmt, rhs);
1941           else
1942             find_interesting_uses_op (data, *rhs);
1943
1944           if (REFERENCE_CLASS_P (*lhs))
1945             find_interesting_uses_address (data, stmt, lhs);
1946           return;
1947         }
1948       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1949         {
1950           find_interesting_uses_cond (data, stmt);
1951           return;
1952         }
1953
1954       /* TODO -- we should also handle address uses of type
1955
1956          memory = call (whatever);
1957
1958          and
1959
1960          call (memory).  */
1961     }
1962
1963   if (gimple_code (stmt) == GIMPLE_PHI
1964       && gimple_bb (stmt) == data->current_loop->header)
1965     {
1966       iv = get_iv (data, PHI_RESULT (stmt));
1967
1968       if (iv && !integer_zerop (iv->step))
1969         return;
1970     }
1971
1972   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1973     {
1974       op = USE_FROM_PTR (use_p);
1975
1976       if (TREE_CODE (op) != SSA_NAME)
1977         continue;
1978
1979       iv = get_iv (data, op);
1980       if (!iv)
1981         continue;
1982
1983       find_interesting_uses_op (data, op);
1984     }
1985 }
1986
1987 /* Finds interesting uses of induction variables outside of loops
1988    on loop exit edge EXIT.  */
1989
1990 static void
1991 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1992 {
1993   gimple phi;
1994   gimple_stmt_iterator psi;
1995   tree def;
1996
1997   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1998     {
1999       phi = gsi_stmt (psi);
2000       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2001       if (!virtual_operand_p (def))
2002         find_interesting_uses_op (data, def);
2003     }
2004 }
2005
2006 /* Finds uses of the induction variables that are interesting.  */
2007
2008 static void
2009 find_interesting_uses (struct ivopts_data *data)
2010 {
2011   basic_block bb;
2012   gimple_stmt_iterator bsi;
2013   basic_block *body = get_loop_body (data->current_loop);
2014   unsigned i;
2015   struct version_info *info;
2016   edge e;
2017
2018   if (dump_file && (dump_flags & TDF_DETAILS))
2019     fprintf (dump_file, "Uses:\n\n");
2020
2021   for (i = 0; i < data->current_loop->num_nodes; i++)
2022     {
2023       edge_iterator ei;
2024       bb = body[i];
2025
2026       FOR_EACH_EDGE (e, ei, bb->succs)
2027         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2028             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2029           find_interesting_uses_outside (data, e);
2030
2031       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2032         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2033       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2034         if (!is_gimple_debug (gsi_stmt (bsi)))
2035           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2036     }
2037
2038   if (dump_file && (dump_flags & TDF_DETAILS))
2039     {
2040       bitmap_iterator bi;
2041
2042       fprintf (dump_file, "\n");
2043
2044       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2045         {
2046           info = ver_info (data, i);
2047           if (info->inv_id)
2048             {
2049               fprintf (dump_file, "  ");
2050               print_generic_expr (dump_file, info->name, TDF_SLIM);
2051               fprintf (dump_file, " is invariant (%d)%s\n",
2052                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2053             }
2054         }
2055
2056       fprintf (dump_file, "\n");
2057     }
2058
2059   free (body);
2060 }
2061
2062 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2063    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2064    we are at the top-level of the processed address.  */
2065
2066 static tree
2067 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2068                 HOST_WIDE_INT *offset)
2069 {
2070   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2071   enum tree_code code;
2072   tree type, orig_type = TREE_TYPE (expr);
2073   HOST_WIDE_INT off0, off1, st;
2074   tree orig_expr = expr;
2075
2076   STRIP_NOPS (expr);
2077
2078   type = TREE_TYPE (expr);
2079   code = TREE_CODE (expr);
2080   *offset = 0;
2081
2082   switch (code)
2083     {
2084     case INTEGER_CST:
2085       if (!cst_and_fits_in_hwi (expr)
2086           || integer_zerop (expr))
2087         return orig_expr;
2088
2089       *offset = int_cst_value (expr);
2090       return build_int_cst (orig_type, 0);
2091
2092     case POINTER_PLUS_EXPR:
2093     case PLUS_EXPR:
2094     case MINUS_EXPR:
2095       op0 = TREE_OPERAND (expr, 0);
2096       op1 = TREE_OPERAND (expr, 1);
2097
2098       op0 = strip_offset_1 (op0, false, false, &off0);
2099       op1 = strip_offset_1 (op1, false, false, &off1);
2100
2101       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2102       if (op0 == TREE_OPERAND (expr, 0)
2103           && op1 == TREE_OPERAND (expr, 1))
2104         return orig_expr;
2105
2106       if (integer_zerop (op1))
2107         expr = op0;
2108       else if (integer_zerop (op0))
2109         {
2110           if (code == MINUS_EXPR)
2111             expr = fold_build1 (NEGATE_EXPR, type, op1);
2112           else
2113             expr = op1;
2114         }
2115       else
2116         expr = fold_build2 (code, type, op0, op1);
2117
2118       return fold_convert (orig_type, expr);
2119
2120     case MULT_EXPR:
2121       op1 = TREE_OPERAND (expr, 1);
2122       if (!cst_and_fits_in_hwi (op1))
2123         return orig_expr;
2124
2125       op0 = TREE_OPERAND (expr, 0);
2126       op0 = strip_offset_1 (op0, false, false, &off0);
2127       if (op0 == TREE_OPERAND (expr, 0))
2128         return orig_expr;
2129
2130       *offset = off0 * int_cst_value (op1);
2131       if (integer_zerop (op0))
2132         expr = op0;
2133       else
2134         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2135
2136       return fold_convert (orig_type, expr);
2137
2138     case ARRAY_REF:
2139     case ARRAY_RANGE_REF:
2140       if (!inside_addr)
2141         return orig_expr;
2142
2143       step = array_ref_element_size (expr);
2144       if (!cst_and_fits_in_hwi (step))
2145         break;
2146
2147       st = int_cst_value (step);
2148       op1 = TREE_OPERAND (expr, 1);
2149       op1 = strip_offset_1 (op1, false, false, &off1);
2150       *offset = off1 * st;
2151
2152       if (top_compref
2153           && integer_zerop (op1))
2154         {
2155           /* Strip the component reference completely.  */
2156           op0 = TREE_OPERAND (expr, 0);
2157           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2158           *offset += off0;
2159           return op0;
2160         }
2161       break;
2162
2163     case COMPONENT_REF:
2164       {
2165         tree field;
2166
2167         if (!inside_addr)
2168           return orig_expr;
2169
2170         tmp = component_ref_field_offset (expr);
2171         field = TREE_OPERAND (expr, 1);
2172         if (top_compref
2173             && cst_and_fits_in_hwi (tmp)
2174             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2175           {
2176             HOST_WIDE_INT boffset, abs_off;
2177
2178             /* Strip the component reference completely.  */
2179             op0 = TREE_OPERAND (expr, 0);
2180             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2181             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2182             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2183             if (boffset < 0)
2184               abs_off = -abs_off;
2185
2186             *offset = off0 + int_cst_value (tmp) + abs_off;
2187             return op0;
2188           }
2189       }
2190       break;
2191
2192     case ADDR_EXPR:
2193       op0 = TREE_OPERAND (expr, 0);
2194       op0 = strip_offset_1 (op0, true, true, &off0);
2195       *offset += off0;
2196
2197       if (op0 == TREE_OPERAND (expr, 0))
2198         return orig_expr;
2199
2200       expr = build_fold_addr_expr (op0);
2201       return fold_convert (orig_type, expr);
2202
2203     case MEM_REF:
2204       /* ???  Offset operand?  */
2205       inside_addr = false;
2206       break;
2207
2208     default:
2209       return orig_expr;
2210     }
2211
2212   /* Default handling of expressions for that we want to recurse into
2213      the first operand.  */
2214   op0 = TREE_OPERAND (expr, 0);
2215   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2216   *offset += off0;
2217
2218   if (op0 == TREE_OPERAND (expr, 0)
2219       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2220     return orig_expr;
2221
2222   expr = copy_node (expr);
2223   TREE_OPERAND (expr, 0) = op0;
2224   if (op1)
2225     TREE_OPERAND (expr, 1) = op1;
2226
2227   /* Inside address, we might strip the top level component references,
2228      thus changing type of the expression.  Handling of ADDR_EXPR
2229      will fix that.  */
2230   expr = fold_convert (orig_type, expr);
2231
2232   return expr;
2233 }
2234
2235 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2236
2237 static tree
2238 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2239 {
2240   HOST_WIDE_INT off;
2241   tree core = strip_offset_1 (expr, false, false, &off);
2242   *offset = off;
2243   return core;
2244 }
2245
2246 /* Returns variant of TYPE that can be used as base for different uses.
2247    We return unsigned type with the same precision, which avoids problems
2248    with overflows.  */
2249
2250 static tree
2251 generic_type_for (tree type)
2252 {
2253   if (POINTER_TYPE_P (type))
2254     return unsigned_type_for (type);
2255
2256   if (TYPE_UNSIGNED (type))
2257     return type;
2258
2259   return unsigned_type_for (type);
2260 }
2261
2262 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2263    the bitmap to that we should store it.  */
2264
2265 static struct ivopts_data *fd_ivopts_data;
2266 static tree
2267 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2268 {
2269   bitmap *depends_on = (bitmap *) data;
2270   struct version_info *info;
2271
2272   if (TREE_CODE (*expr_p) != SSA_NAME)
2273     return NULL_TREE;
2274   info = name_info (fd_ivopts_data, *expr_p);
2275
2276   if (!info->inv_id || info->has_nonlin_use)
2277     return NULL_TREE;
2278
2279   if (!*depends_on)
2280     *depends_on = BITMAP_ALLOC (NULL);
2281   bitmap_set_bit (*depends_on, info->inv_id);
2282
2283   return NULL_TREE;
2284 }
2285
2286 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2287    position to POS.  If USE is not NULL, the candidate is set as related to
2288    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2289    replacement of the final value of the iv by a direct computation.  */
2290
2291 static struct iv_cand *
2292 add_candidate_1 (struct ivopts_data *data,
2293                  tree base, tree step, bool important, enum iv_position pos,
2294                  struct iv_use *use, gimple incremented_at)
2295 {
2296   unsigned i;
2297   struct iv_cand *cand = NULL;
2298   tree type, orig_type;
2299
2300   /* For non-original variables, make sure their values are computed in a type
2301      that does not invoke undefined behavior on overflows (since in general,
2302      we cannot prove that these induction variables are non-wrapping).  */
2303   if (pos != IP_ORIGINAL)
2304     {
2305       orig_type = TREE_TYPE (base);
2306       type = generic_type_for (orig_type);
2307       if (type != orig_type)
2308         {
2309           base = fold_convert (type, base);
2310           step = fold_convert (type, step);
2311         }
2312     }
2313
2314   for (i = 0; i < n_iv_cands (data); i++)
2315     {
2316       cand = iv_cand (data, i);
2317
2318       if (cand->pos != pos)
2319         continue;
2320
2321       if (cand->incremented_at != incremented_at
2322           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2323               && cand->ainc_use != use))
2324         continue;
2325
2326       if (!cand->iv)
2327         {
2328           if (!base && !step)
2329             break;
2330
2331           continue;
2332         }
2333
2334       if (!base && !step)
2335         continue;
2336
2337       if (operand_equal_p (base, cand->iv->base, 0)
2338           && operand_equal_p (step, cand->iv->step, 0)
2339           && (TYPE_PRECISION (TREE_TYPE (base))
2340               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2341         break;
2342     }
2343
2344   if (i == n_iv_cands (data))
2345     {
2346       cand = XCNEW (struct iv_cand);
2347       cand->id = i;
2348
2349       if (!base && !step)
2350         cand->iv = NULL;
2351       else
2352         cand->iv = alloc_iv (base, step);
2353
2354       cand->pos = pos;
2355       if (pos != IP_ORIGINAL && cand->iv)
2356         {
2357           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2358           cand->var_after = cand->var_before;
2359         }
2360       cand->important = important;
2361       cand->incremented_at = incremented_at;
2362       data->iv_candidates.safe_push (cand);
2363
2364       if (step
2365           && TREE_CODE (step) != INTEGER_CST)
2366         {
2367           fd_ivopts_data = data;
2368           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2369         }
2370
2371       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2372         cand->ainc_use = use;
2373       else
2374         cand->ainc_use = NULL;
2375
2376       if (dump_file && (dump_flags & TDF_DETAILS))
2377         dump_cand (dump_file, cand);
2378     }
2379
2380   if (important && !cand->important)
2381     {
2382       cand->important = true;
2383       if (dump_file && (dump_flags & TDF_DETAILS))
2384         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2385     }
2386
2387   if (use)
2388     {
2389       bitmap_set_bit (use->related_cands, i);
2390       if (dump_file && (dump_flags & TDF_DETAILS))
2391         fprintf (dump_file, "Candidate %d is related to use %d\n",
2392                  cand->id, use->id);
2393     }
2394
2395   return cand;
2396 }
2397
2398 /* Returns true if incrementing the induction variable at the end of the LOOP
2399    is allowed.
2400
2401    The purpose is to avoid splitting latch edge with a biv increment, thus
2402    creating a jump, possibly confusing other optimization passes and leaving
2403    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2404    is not available (so we do not have a better alternative), or if the latch
2405    edge is already nonempty.  */
2406
2407 static bool
2408 allow_ip_end_pos_p (struct loop *loop)
2409 {
2410   if (!ip_normal_pos (loop))
2411     return true;
2412
2413   if (!empty_block_p (ip_end_pos (loop)))
2414     return true;
2415
2416   return false;
2417 }
2418
2419 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2420    Important field is set to IMPORTANT.  */
2421
2422 static void
2423 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2424                         bool important, struct iv_use *use)
2425 {
2426   basic_block use_bb = gimple_bb (use->stmt);
2427   enum machine_mode mem_mode;
2428   unsigned HOST_WIDE_INT cstepi;
2429
2430   /* If we insert the increment in any position other than the standard
2431      ones, we must ensure that it is incremented once per iteration.
2432      It must not be in an inner nested loop, or one side of an if
2433      statement.  */
2434   if (use_bb->loop_father != data->current_loop
2435       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2436       || stmt_could_throw_p (use->stmt)
2437       || !cst_and_fits_in_hwi (step))
2438     return;
2439
2440   cstepi = int_cst_value (step);
2441
2442   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2443   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2444         || USE_STORE_PRE_INCREMENT (mem_mode))
2445        && GET_MODE_SIZE (mem_mode) == cstepi)
2446       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2447            || USE_STORE_PRE_DECREMENT (mem_mode))
2448           && GET_MODE_SIZE (mem_mode) == -cstepi))
2449     {
2450       enum tree_code code = MINUS_EXPR;
2451       tree new_base;
2452       tree new_step = step;
2453
2454       if (POINTER_TYPE_P (TREE_TYPE (base)))
2455         {
2456           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2457           code = POINTER_PLUS_EXPR;
2458         }
2459       else
2460         new_step = fold_convert (TREE_TYPE (base), new_step);
2461       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2462       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2463                        use->stmt);
2464     }
2465   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2466         || USE_STORE_POST_INCREMENT (mem_mode))
2467        && GET_MODE_SIZE (mem_mode) == cstepi)
2468       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2469            || USE_STORE_POST_DECREMENT (mem_mode))
2470           && GET_MODE_SIZE (mem_mode) == -cstepi))
2471     {
2472       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2473                        use->stmt);
2474     }
2475 }
2476
2477 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2478    position to POS.  If USE is not NULL, the candidate is set as related to
2479    it.  The candidate computation is scheduled on all available positions.  */
2480
2481 static void
2482 add_candidate (struct ivopts_data *data,
2483                tree base, tree step, bool important, struct iv_use *use)
2484 {
2485   if (ip_normal_pos (data->current_loop))
2486     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2487   if (ip_end_pos (data->current_loop)
2488       && allow_ip_end_pos_p (data->current_loop))
2489     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2490
2491   if (use != NULL && use->type == USE_ADDRESS)
2492     add_autoinc_candidates (data, base, step, important, use);
2493 }
2494
2495 /* Adds standard iv candidates.  */
2496
2497 static void
2498 add_standard_iv_candidates (struct ivopts_data *data)
2499 {
2500   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2501
2502   /* The same for a double-integer type if it is still fast enough.  */
2503   if (TYPE_PRECISION
2504         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2505       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2506     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2507                    build_int_cst (long_integer_type_node, 1), true, NULL);
2508
2509   /* The same for a double-integer type if it is still fast enough.  */
2510   if (TYPE_PRECISION
2511         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2512       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2513     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2514                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2515 }
2516
2517
2518 /* Adds candidates bases on the old induction variable IV.  */
2519
2520 static void
2521 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2522 {
2523   gimple phi;
2524   tree def;
2525   struct iv_cand *cand;
2526
2527   add_candidate (data, iv->base, iv->step, true, NULL);
2528
2529   /* The same, but with initial value zero.  */
2530   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2531     add_candidate (data, size_int (0), iv->step, true, NULL);
2532   else
2533     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2534                    iv->step, true, NULL);
2535
2536   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2537   if (gimple_code (phi) == GIMPLE_PHI)
2538     {
2539       /* Additionally record the possibility of leaving the original iv
2540          untouched.  */
2541       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2542       /* Don't add candidate if it's from another PHI node because
2543          it's an affine iv appearing in the form of PEELED_CHREC.  */
2544       phi = SSA_NAME_DEF_STMT (def);
2545       if (gimple_code (phi) != GIMPLE_PHI)
2546         {
2547           cand = add_candidate_1 (data,
2548                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2549                                   SSA_NAME_DEF_STMT (def));
2550           cand->var_before = iv->ssa_name;
2551           cand->var_after = def;
2552         }
2553       else
2554         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2555     }
2556 }
2557
2558 /* Adds candidates based on the old induction variables.  */
2559
2560 static void
2561 add_old_ivs_candidates (struct ivopts_data *data)
2562 {
2563   unsigned i;
2564   struct iv *iv;
2565   bitmap_iterator bi;
2566
2567   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2568     {
2569       iv = ver_info (data, i)->iv;
2570       if (iv && iv->biv_p && !integer_zerop (iv->step))
2571         add_old_iv_candidates (data, iv);
2572     }
2573 }
2574
2575 /* Adds candidates based on the value of the induction variable IV and USE.  */
2576
2577 static void
2578 add_iv_value_candidates (struct ivopts_data *data,
2579                          struct iv *iv, struct iv_use *use)
2580 {
2581   unsigned HOST_WIDE_INT offset;
2582   tree base;
2583   tree basetype;
2584
2585   add_candidate (data, iv->base, iv->step, false, use);
2586
2587   /* The same, but with initial value zero.  Make such variable important,
2588      since it is generic enough so that possibly many uses may be based
2589      on it.  */
2590   basetype = TREE_TYPE (iv->base);
2591   if (POINTER_TYPE_P (basetype))
2592     basetype = sizetype;
2593   add_candidate (data, build_int_cst (basetype, 0),
2594                  iv->step, true, use);
2595
2596   /* Third, try removing the constant offset.  Make sure to even
2597      add a candidate for &a[0] vs. (T *)&a.  */
2598   base = strip_offset (iv->base, &offset);
2599   if (offset
2600       || base != iv->base)
2601     add_candidate (data, base, iv->step, false, use);
2602 }
2603
2604 /* Adds candidates based on the uses.  */
2605
2606 static void
2607 add_derived_ivs_candidates (struct ivopts_data *data)
2608 {
2609   unsigned i;
2610
2611   for (i = 0; i < n_iv_uses (data); i++)
2612     {
2613       struct iv_use *use = iv_use (data, i);
2614
2615       if (!use)
2616         continue;
2617
2618       switch (use->type)
2619         {
2620         case USE_NONLINEAR_EXPR:
2621         case USE_COMPARE:
2622         case USE_ADDRESS:
2623           /* Just add the ivs based on the value of the iv used here.  */
2624           add_iv_value_candidates (data, use->iv, use);
2625           break;
2626
2627         default:
2628           gcc_unreachable ();
2629         }
2630     }
2631 }
2632
2633 /* Record important candidates and add them to related_cands bitmaps
2634    if needed.  */
2635
2636 static void
2637 record_important_candidates (struct ivopts_data *data)
2638 {
2639   unsigned i;
2640   struct iv_use *use;
2641
2642   for (i = 0; i < n_iv_cands (data); i++)
2643     {
2644       struct iv_cand *cand = iv_cand (data, i);
2645
2646       if (cand->important)
2647         bitmap_set_bit (data->important_candidates, i);
2648     }
2649
2650   data->consider_all_candidates = (n_iv_cands (data)
2651                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2652
2653   if (data->consider_all_candidates)
2654     {
2655       /* We will not need "related_cands" bitmaps in this case,
2656          so release them to decrease peak memory consumption.  */
2657       for (i = 0; i < n_iv_uses (data); i++)
2658         {
2659           use = iv_use (data, i);
2660           BITMAP_FREE (use->related_cands);
2661         }
2662     }
2663   else
2664     {
2665       /* Add important candidates to the related_cands bitmaps.  */
2666       for (i = 0; i < n_iv_uses (data); i++)
2667         bitmap_ior_into (iv_use (data, i)->related_cands,
2668                          data->important_candidates);
2669     }
2670 }
2671
2672 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2673    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2674    we allocate a simple list to every use.  */
2675
2676 static void
2677 alloc_use_cost_map (struct ivopts_data *data)
2678 {
2679   unsigned i, size, s;
2680
2681   for (i = 0; i < n_iv_uses (data); i++)
2682     {
2683       struct iv_use *use = iv_use (data, i);
2684
2685       if (data->consider_all_candidates)
2686         size = n_iv_cands (data);
2687       else
2688         {
2689           s = bitmap_count_bits (use->related_cands);
2690
2691           /* Round up to the power of two, so that moduling by it is fast.  */
2692           size = s ? (1 << ceil_log2 (s)) : 1;
2693         }
2694
2695       use->n_map_members = size;
2696       use->cost_map = XCNEWVEC (struct cost_pair, size);
2697     }
2698 }
2699
2700 /* Returns description of computation cost of expression whose runtime
2701    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2702
2703 static comp_cost
2704 new_cost (unsigned runtime, unsigned complexity)
2705 {
2706   comp_cost cost;
2707
2708   cost.cost = runtime;
2709   cost.complexity = complexity;
2710
2711   return cost;
2712 }
2713
2714 /* Adds costs COST1 and COST2.  */
2715
2716 static comp_cost
2717 add_costs (comp_cost cost1, comp_cost cost2)
2718 {
2719   cost1.cost += cost2.cost;
2720   cost1.complexity += cost2.complexity;
2721
2722   return cost1;
2723 }
2724 /* Subtracts costs COST1 and COST2.  */
2725
2726 static comp_cost
2727 sub_costs (comp_cost cost1, comp_cost cost2)
2728 {
2729   cost1.cost -= cost2.cost;
2730   cost1.complexity -= cost2.complexity;
2731
2732   return cost1;
2733 }
2734
2735 /* Returns a negative number if COST1 < COST2, a positive number if
2736    COST1 > COST2, and 0 if COST1 = COST2.  */
2737
2738 static int
2739 compare_costs (comp_cost cost1, comp_cost cost2)
2740 {
2741   if (cost1.cost == cost2.cost)
2742     return cost1.complexity - cost2.complexity;
2743
2744   return cost1.cost - cost2.cost;
2745 }
2746
2747 /* Returns true if COST is infinite.  */
2748
2749 static bool
2750 infinite_cost_p (comp_cost cost)
2751 {
2752   return cost.cost == INFTY;
2753 }
2754
2755 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2756    on invariants DEPENDS_ON and that the value used in expressing it
2757    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2758
2759 static void
2760 set_use_iv_cost (struct ivopts_data *data,
2761                  struct iv_use *use, struct iv_cand *cand,
2762                  comp_cost cost, bitmap depends_on, tree value,
2763                  enum tree_code comp, int inv_expr_id)
2764 {
2765   unsigned i, s;
2766
2767   if (infinite_cost_p (cost))
2768     {
2769       BITMAP_FREE (depends_on);
2770       return;
2771     }
2772
2773   if (data->consider_all_candidates)
2774     {
2775       use->cost_map[cand->id].cand = cand;
2776       use->cost_map[cand->id].cost = cost;
2777       use->cost_map[cand->id].depends_on = depends_on;
2778       use->cost_map[cand->id].value = value;
2779       use->cost_map[cand->id].comp = comp;
2780       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2781       return;
2782     }
2783
2784   /* n_map_members is a power of two, so this computes modulo.  */
2785   s = cand->id & (use->n_map_members - 1);
2786   for (i = s; i < use->n_map_members; i++)
2787     if (!use->cost_map[i].cand)
2788       goto found;
2789   for (i = 0; i < s; i++)
2790     if (!use->cost_map[i].cand)
2791       goto found;
2792
2793   gcc_unreachable ();
2794
2795 found:
2796   use->cost_map[i].cand = cand;
2797   use->cost_map[i].cost = cost;
2798   use->cost_map[i].depends_on = depends_on;
2799   use->cost_map[i].value = value;
2800   use->cost_map[i].comp = comp;
2801   use->cost_map[i].inv_expr_id = inv_expr_id;
2802 }
2803
2804 /* Gets cost of (USE, CANDIDATE) pair.  */
2805
2806 static struct cost_pair *
2807 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2808                  struct iv_cand *cand)
2809 {
2810   unsigned i, s;
2811   struct cost_pair *ret;
2812
2813   if (!cand)
2814     return NULL;
2815
2816   if (data->consider_all_candidates)
2817     {
2818       ret = use->cost_map + cand->id;
2819       if (!ret->cand)
2820         return NULL;
2821
2822       return ret;
2823     }
2824
2825   /* n_map_members is a power of two, so this computes modulo.  */
2826   s = cand->id & (use->n_map_members - 1);
2827   for (i = s; i < use->n_map_members; i++)
2828     if (use->cost_map[i].cand == cand)
2829       return use->cost_map + i;
2830     else if (use->cost_map[i].cand == NULL)
2831       return NULL;
2832   for (i = 0; i < s; i++)
2833     if (use->cost_map[i].cand == cand)
2834       return use->cost_map + i;
2835     else if (use->cost_map[i].cand == NULL)
2836       return NULL;
2837
2838   return NULL;
2839 }
2840
2841 /* Returns estimate on cost of computing SEQ.  */
2842
2843 static unsigned
2844 seq_cost (rtx seq, bool speed)
2845 {
2846   unsigned cost = 0;
2847   rtx set;
2848
2849   for (; seq; seq = NEXT_INSN (seq))
2850     {
2851       set = single_set (seq);
2852       if (set)
2853         cost += set_src_cost (SET_SRC (set), speed);
2854       else
2855         cost++;
2856     }
2857
2858   return cost;
2859 }
2860
2861 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2862 static rtx
2863 produce_memory_decl_rtl (tree obj, int *regno)
2864 {
2865   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2866   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2867   rtx x;
2868
2869   gcc_assert (obj);
2870   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2871     {
2872       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2873       x = gen_rtx_SYMBOL_REF (address_mode, name);
2874       SET_SYMBOL_REF_DECL (x, obj);
2875       x = gen_rtx_MEM (DECL_MODE (obj), x);
2876       set_mem_addr_space (x, as);
2877       targetm.encode_section_info (obj, x, true);
2878     }
2879   else
2880     {
2881       x = gen_raw_REG (address_mode, (*regno)++);
2882       x = gen_rtx_MEM (DECL_MODE (obj), x);
2883       set_mem_addr_space (x, as);
2884     }
2885
2886   return x;
2887 }
2888
2889 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2890    walk_tree.  DATA contains the actual fake register number.  */
2891
2892 static tree
2893 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2894 {
2895   tree obj = NULL_TREE;
2896   rtx x = NULL_RTX;
2897   int *regno = (int *) data;
2898
2899   switch (TREE_CODE (*expr_p))
2900     {
2901     case ADDR_EXPR:
2902       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2903            handled_component_p (*expr_p);
2904            expr_p = &TREE_OPERAND (*expr_p, 0))
2905         continue;
2906       obj = *expr_p;
2907       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2908         x = produce_memory_decl_rtl (obj, regno);
2909       break;
2910
2911     case SSA_NAME:
2912       *ws = 0;
2913       obj = SSA_NAME_VAR (*expr_p);
2914       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2915       if (!obj)
2916         return NULL_TREE;
2917       if (!DECL_RTL_SET_P (obj))
2918         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2919       break;
2920
2921     case VAR_DECL:
2922     case PARM_DECL:
2923     case RESULT_DECL:
2924       *ws = 0;
2925       obj = *expr_p;
2926
2927       if (DECL_RTL_SET_P (obj))
2928         break;
2929
2930       if (DECL_MODE (obj) == BLKmode)
2931         x = produce_memory_decl_rtl (obj, regno);
2932       else
2933         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2934
2935       break;
2936
2937     default:
2938       break;
2939     }
2940
2941   if (x)
2942     {
2943       decl_rtl_to_reset.safe_push (obj);
2944       SET_DECL_RTL (obj, x);
2945     }
2946
2947   return NULL_TREE;
2948 }
2949
2950 /* Determines cost of the computation of EXPR.  */
2951
2952 static unsigned
2953 computation_cost (tree expr, bool speed)
2954 {
2955   rtx seq, rslt;
2956   tree type = TREE_TYPE (expr);
2957   unsigned cost;
2958   /* Avoid using hard regs in ways which may be unsupported.  */
2959   int regno = LAST_VIRTUAL_REGISTER + 1;
2960   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2961   enum node_frequency real_frequency = node->frequency;
2962
2963   node->frequency = NODE_FREQUENCY_NORMAL;
2964   crtl->maybe_hot_insn_p = speed;
2965   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2966   start_sequence ();
2967   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2968   seq = get_insns ();
2969   end_sequence ();
2970   default_rtl_profile ();
2971   node->frequency = real_frequency;
2972
2973   cost = seq_cost (seq, speed);
2974   if (MEM_P (rslt))
2975     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2976                           TYPE_ADDR_SPACE (type), speed);
2977   else if (!REG_P (rslt))
2978     cost += set_src_cost (rslt, speed);
2979
2980   return cost;
2981 }
2982
2983 /* Returns variable containing the value of candidate CAND at statement AT.  */
2984
2985 static tree
2986 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2987 {
2988   if (stmt_after_increment (loop, cand, stmt))
2989     return cand->var_after;
2990   else
2991     return cand->var_before;
2992 }
2993
2994 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2995    same precision that is at least as wide as the precision of TYPE, stores
2996    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2997    type of A and B.  */
2998
2999 static tree
3000 determine_common_wider_type (tree *a, tree *b)
3001 {
3002   tree wider_type = NULL;
3003   tree suba, subb;
3004   tree atype = TREE_TYPE (*a);
3005
3006   if (CONVERT_EXPR_P (*a))
3007     {
3008       suba = TREE_OPERAND (*a, 0);
3009       wider_type = TREE_TYPE (suba);
3010       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3011         return atype;
3012     }
3013   else
3014     return atype;
3015
3016   if (CONVERT_EXPR_P (*b))
3017     {
3018       subb = TREE_OPERAND (*b, 0);
3019       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3020         return atype;
3021     }
3022   else
3023     return atype;
3024
3025   *a = suba;
3026   *b = subb;
3027   return wider_type;
3028 }
3029
3030 /* Determines the expression by that USE is expressed from induction variable
3031    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3032    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3033
3034 static bool
3035 get_computation_aff (struct loop *loop,
3036                      struct iv_use *use, struct iv_cand *cand, gimple at,
3037                      struct aff_tree *aff)
3038 {
3039   tree ubase = use->iv->base;
3040   tree ustep = use->iv->step;
3041   tree cbase = cand->iv->base;
3042   tree cstep = cand->iv->step, cstep_common;
3043   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3044   tree common_type, var;
3045   tree uutype;
3046   aff_tree cbase_aff, var_aff;
3047   widest_int rat;
3048
3049   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3050     {
3051       /* We do not have a precision to express the values of use.  */
3052       return false;
3053     }
3054
3055   var = var_at_stmt (loop, cand, at);
3056   uutype = unsigned_type_for (utype);
3057
3058   /* If the conversion is not noop, perform it.  */
3059   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3060     {
3061       cstep = fold_convert (uutype, cstep);
3062       cbase = fold_convert (uutype, cbase);
3063       var = fold_convert (uutype, var);
3064     }
3065
3066   if (!constant_multiple_of (ustep, cstep, &rat))
3067     return false;
3068
3069   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3070      type, we achieve better folding by computing their difference in this
3071      wider type, and cast the result to UUTYPE.  We do not need to worry about
3072      overflows, as all the arithmetics will in the end be performed in UUTYPE
3073      anyway.  */
3074   common_type = determine_common_wider_type (&ubase, &cbase);
3075
3076   /* use = ubase - ratio * cbase + ratio * var.  */
3077   tree_to_aff_combination (ubase, common_type, aff);
3078   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3079   tree_to_aff_combination (var, uutype, &var_aff);
3080
3081   /* We need to shift the value if we are after the increment.  */
3082   if (stmt_after_increment (loop, cand, at))
3083     {
3084       aff_tree cstep_aff;
3085
3086       if (common_type != uutype)
3087         cstep_common = fold_convert (common_type, cstep);
3088       else
3089         cstep_common = cstep;
3090
3091       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3092       aff_combination_add (&cbase_aff, &cstep_aff);
3093     }
3094
3095   aff_combination_scale (&cbase_aff, -rat);
3096   aff_combination_add (aff, &cbase_aff);
3097   if (common_type != uutype)
3098     aff_combination_convert (aff, uutype);
3099
3100   aff_combination_scale (&var_aff, rat);
3101   aff_combination_add (aff, &var_aff);
3102
3103   return true;
3104 }
3105
3106 /* Return the type of USE.  */
3107
3108 static tree
3109 get_use_type (struct iv_use *use)
3110 {
3111   tree base_type = TREE_TYPE (use->iv->base);
3112   tree type;
3113
3114   if (use->type == USE_ADDRESS)
3115     {
3116       /* The base_type may be a void pointer.  Create a pointer type based on
3117          the mem_ref instead.  */
3118       type = build_pointer_type (TREE_TYPE (*use->op_p));
3119       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3120                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3121     }
3122   else
3123     type = base_type;
3124
3125   return type;
3126 }
3127
3128 /* Determines the expression by that USE is expressed from induction variable
3129    CAND at statement AT in LOOP.  The computation is unshared.  */
3130
3131 static tree
3132 get_computation_at (struct loop *loop,
3133                     struct iv_use *use, struct iv_cand *cand, gimple at)
3134 {
3135   aff_tree aff;
3136   tree type = get_use_type (use);
3137
3138   if (!get_computation_aff (loop, use, cand, at, &aff))
3139     return NULL_TREE;
3140   unshare_aff_combination (&aff);
3141   return fold_convert (type, aff_combination_to_tree (&aff));
3142 }
3143
3144 /* Determines the expression by that USE is expressed from induction variable
3145    CAND in LOOP.  The computation is unshared.  */
3146
3147 static tree
3148 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3149 {
3150   return get_computation_at (loop, use, cand, use->stmt);
3151 }
3152
3153 /* Adjust the cost COST for being in loop setup rather than loop body.
3154    If we're optimizing for space, the loop setup overhead is constant;
3155    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3156 static unsigned
3157 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3158 {
3159   if (cost == INFTY)
3160     return cost;
3161   else if (optimize_loop_for_speed_p (data->current_loop))
3162     return cost / avg_loop_niter (data->current_loop);
3163   else
3164     return cost;
3165 }
3166
3167 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3168    validity for a memory reference accessing memory of mode MODE in
3169    address space AS.  */
3170
3171
3172 bool
3173 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3174                                  addr_space_t as)
3175 {
3176 #define MAX_RATIO 128
3177   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3178   static vec<sbitmap> valid_mult_list;
3179   sbitmap valid_mult;
3180
3181   if (data_index >= valid_mult_list.length ())
3182     valid_mult_list.safe_grow_cleared (data_index + 1);
3183
3184   valid_mult = valid_mult_list[data_index];
3185   if (!valid_mult)
3186     {
3187       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3188       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3189       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3190       rtx addr, scaled;
3191       HOST_WIDE_INT i;
3192
3193       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3194       bitmap_clear (valid_mult);
3195       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3196       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3197       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3198         {
3199           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3200           if (memory_address_addr_space_p (mode, addr, as)
3201               || memory_address_addr_space_p (mode, scaled, as))
3202             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3203         }
3204
3205       if (dump_file && (dump_flags & TDF_DETAILS))
3206         {
3207           fprintf (dump_file, "  allowed multipliers:");
3208           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3209             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3210               fprintf (dump_file, " %d", (int) i);
3211           fprintf (dump_file, "\n");
3212           fprintf (dump_file, "\n");
3213         }
3214
3215       valid_mult_list[data_index] = valid_mult;
3216     }
3217
3218   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3219     return false;
3220
3221   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3222 }
3223
3224 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3225    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3226    variable is omitted.  Compute the cost for a memory reference that accesses
3227    a memory location of mode MEM_MODE in address space AS.
3228
3229    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3230    size of MEM_MODE / RATIO) is available.  To make this determination, we
3231    look at the size of the increment to be made, which is given in CSTEP.
3232    CSTEP may be zero if the step is unknown.
3233    STMT_AFTER_INC is true iff the statement we're looking at is after the
3234    increment of the original biv.
3235
3236    TODO -- there must be some better way.  This all is quite crude.  */
3237
3238 enum ainc_type
3239 {
3240   AINC_PRE_INC,         /* Pre increment.  */
3241   AINC_PRE_DEC,         /* Pre decrement.  */
3242   AINC_POST_INC,        /* Post increment.  */
3243   AINC_POST_DEC,        /* Post decrement.  */
3244   AINC_NONE             /* Also the number of auto increment types.  */
3245 };
3246
3247 typedef struct address_cost_data_s
3248 {
3249   HOST_WIDE_INT min_offset, max_offset;
3250   unsigned costs[2][2][2][2];
3251   unsigned ainc_costs[AINC_NONE];
3252 } *address_cost_data;
3253
3254
3255 static comp_cost
3256 get_address_cost (bool symbol_present, bool var_present,
3257                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3258                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3259                   addr_space_t as, bool speed,
3260                   bool stmt_after_inc, bool *may_autoinc)
3261 {
3262   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3263   static vec<address_cost_data> address_cost_data_list;
3264   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3265   address_cost_data data;
3266   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3267   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3268   unsigned cost, acost, complexity;
3269   enum ainc_type autoinc_type;
3270   bool offset_p, ratio_p, autoinc;
3271   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3272   unsigned HOST_WIDE_INT mask;
3273   unsigned bits;
3274
3275   if (data_index >= address_cost_data_list.length ())
3276     address_cost_data_list.safe_grow_cleared (data_index + 1);
3277
3278   data = address_cost_data_list[data_index];
3279   if (!data)
3280     {
3281       HOST_WIDE_INT i;
3282       HOST_WIDE_INT rat, off = 0;
3283       int old_cse_not_expected, width;
3284       unsigned sym_p, var_p, off_p, rat_p, add_c;
3285       rtx seq, addr, base;
3286       rtx reg0, reg1;
3287
3288       data = (address_cost_data) xcalloc (1, sizeof (*data));
3289
3290       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3291
3292       width = GET_MODE_BITSIZE (address_mode) - 1;
3293       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3294         width = HOST_BITS_PER_WIDE_INT - 1;
3295       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3296
3297       for (i = width; i >= 0; i--)
3298         {
3299           off = -((unsigned HOST_WIDE_INT) 1 << i);
3300           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3301           if (memory_address_addr_space_p (mem_mode, addr, as))
3302             break;
3303         }
3304       data->min_offset = (i == -1? 0 : off);
3305
3306       for (i = width; i >= 0; i--)
3307         {
3308           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3309           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3310           if (memory_address_addr_space_p (mem_mode, addr, as))
3311             break;
3312           /* For some TARGET, like ARM THUMB1, the offset should be nature
3313              aligned.  Try an aligned offset if address_mode is not QImode.  */
3314           off = (address_mode == QImode)
3315                 ? 0
3316                 : ((unsigned HOST_WIDE_INT) 1 << i)
3317                     - GET_MODE_SIZE (address_mode);
3318           if (off > 0)
3319             {
3320               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3321               if (memory_address_addr_space_p (mem_mode, addr, as))
3322                 break;
3323             }
3324         }
3325       if (i == -1)
3326         off = 0;
3327       data->max_offset = off;
3328
3329       if (dump_file && (dump_flags & TDF_DETAILS))
3330         {
3331           fprintf (dump_file, "get_address_cost:\n");
3332           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3333                    GET_MODE_NAME (mem_mode),
3334                    data->min_offset);
3335           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3336                    GET_MODE_NAME (mem_mode),
3337                    data->max_offset);
3338         }
3339
3340       rat = 1;
3341       for (i = 2; i <= MAX_RATIO; i++)
3342         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3343           {
3344             rat = i;
3345             break;
3346           }
3347
3348       /* Compute the cost of various addressing modes.  */
3349       acost = 0;
3350       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3351       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3352
3353       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3354           || USE_STORE_PRE_DECREMENT (mem_mode))
3355         {
3356           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3357           has_predec[mem_mode]
3358             = memory_address_addr_space_p (mem_mode, addr, as);
3359
3360           if (has_predec[mem_mode])
3361             data->ainc_costs[AINC_PRE_DEC]
3362               = address_cost (addr, mem_mode, as, speed);
3363         }
3364       if (USE_LOAD_POST_DECREMENT (mem_mode)
3365           || USE_STORE_POST_DECREMENT (mem_mode))
3366         {
3367           addr = gen_rtx_POST_DEC (address_mode, reg0);
3368           has_postdec[mem_mode]
3369             = memory_address_addr_space_p (mem_mode, addr, as);
3370
3371           if (has_postdec[mem_mode])
3372             data->ainc_costs[AINC_POST_DEC]
3373               = address_cost (addr, mem_mode, as, speed);
3374         }
3375       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3376           || USE_STORE_PRE_DECREMENT (mem_mode))
3377         {
3378           addr = gen_rtx_PRE_INC (address_mode, reg0);
3379           has_preinc[mem_mode]
3380             = memory_address_addr_space_p (mem_mode, addr, as);
3381
3382           if (has_preinc[mem_mode])
3383             data->ainc_costs[AINC_PRE_INC]
3384               = address_cost (addr, mem_mode, as, speed);
3385         }
3386       if (USE_LOAD_POST_INCREMENT (mem_mode)
3387           || USE_STORE_POST_INCREMENT (mem_mode))
3388         {
3389           addr = gen_rtx_POST_INC (address_mode, reg0);
3390           has_postinc[mem_mode]
3391             = memory_address_addr_space_p (mem_mode, addr, as);
3392
3393           if (has_postinc[mem_mode])
3394             data->ainc_costs[AINC_POST_INC]
3395               = address_cost (addr, mem_mode, as, speed);
3396         }
3397       for (i = 0; i < 16; i++)
3398         {
3399           sym_p = i & 1;
3400           var_p = (i >> 1) & 1;
3401           off_p = (i >> 2) & 1;
3402           rat_p = (i >> 3) & 1;
3403
3404           addr = reg0;
3405           if (rat_p)
3406             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3407                                    gen_int_mode (rat, address_mode));
3408
3409           if (var_p)
3410             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3411
3412           if (sym_p)
3413             {
3414               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3415               /* ??? We can run into trouble with some backends by presenting
3416                  it with symbols which haven't been properly passed through
3417                  targetm.encode_section_info.  By setting the local bit, we
3418                  enhance the probability of things working.  */
3419               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3420
3421               if (off_p)
3422                 base = gen_rtx_fmt_e (CONST, address_mode,
3423                                       gen_rtx_fmt_ee
3424                                         (PLUS, address_mode, base,
3425                                          gen_int_mode (off, address_mode)));
3426             }
3427           else if (off_p)
3428             base = gen_int_mode (off, address_mode);
3429           else
3430             base = NULL_RTX;
3431
3432           if (base)
3433             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3434
3435           start_sequence ();
3436           /* To avoid splitting addressing modes, pretend that no cse will
3437              follow.  */
3438           old_cse_not_expected = cse_not_expected;
3439           cse_not_expected = true;
3440           addr = memory_address_addr_space (mem_mode, addr, as);
3441           cse_not_expected = old_cse_not_expected;
3442           seq = get_insns ();
3443           end_sequence ();
3444
3445           acost = seq_cost (seq, speed);
3446           acost += address_cost (addr, mem_mode, as, speed);
3447
3448           if (!acost)
3449             acost = 1;
3450           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3451         }
3452
3453       /* On some targets, it is quite expensive to load symbol to a register,
3454          which makes addresses that contain symbols look much more expensive.
3455          However, the symbol will have to be loaded in any case before the
3456          loop (and quite likely we have it in register already), so it does not
3457          make much sense to penalize them too heavily.  So make some final
3458          tweaks for the SYMBOL_PRESENT modes:
3459
3460          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3461          var is cheaper, use this mode with small penalty.
3462          If VAR_PRESENT is true, try whether the mode with
3463          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3464          if this is the case, use it.  */
3465       add_c = add_cost (speed, address_mode);
3466       for (i = 0; i < 8; i++)
3467         {
3468           var_p = i & 1;
3469           off_p = (i >> 1) & 1;
3470           rat_p = (i >> 2) & 1;
3471
3472           acost = data->costs[0][1][off_p][rat_p] + 1;
3473           if (var_p)
3474             acost += add_c;
3475
3476           if (acost < data->costs[1][var_p][off_p][rat_p])
3477             data->costs[1][var_p][off_p][rat_p] = acost;
3478         }
3479
3480       if (dump_file && (dump_flags & TDF_DETAILS))
3481         {
3482           fprintf (dump_file, "Address costs:\n");
3483
3484           for (i = 0; i < 16; i++)
3485             {
3486               sym_p = i & 1;
3487               var_p = (i >> 1) & 1;
3488               off_p = (i >> 2) & 1;
3489               rat_p = (i >> 3) & 1;
3490
3491               fprintf (dump_file, "  ");
3492               if (sym_p)
3493                 fprintf (dump_file, "sym + ");
3494               if (var_p)
3495                 fprintf (dump_file, "var + ");
3496               if (off_p)
3497                 fprintf (dump_file, "cst + ");
3498               if (rat_p)
3499                 fprintf (dump_file, "rat * ");
3500
3501               acost = data->costs[sym_p][var_p][off_p][rat_p];
3502               fprintf (dump_file, "index costs %d\n", acost);
3503             }
3504           if (has_predec[mem_mode] || has_postdec[mem_mode]
3505               || has_preinc[mem_mode] || has_postinc[mem_mode])
3506             fprintf (dump_file, "  May include autoinc/dec\n");
3507           fprintf (dump_file, "\n");
3508         }
3509
3510       address_cost_data_list[data_index] = data;
3511     }
3512
3513   bits = GET_MODE_BITSIZE (address_mode);
3514   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3515   offset &= mask;
3516   if ((offset >> (bits - 1) & 1))
3517     offset |= ~mask;
3518   s_offset = offset;
3519
3520   autoinc = false;
3521   autoinc_type = AINC_NONE;
3522   msize = GET_MODE_SIZE (mem_mode);
3523   autoinc_offset = offset;
3524   if (stmt_after_inc)
3525     autoinc_offset += ratio * cstep;
3526   if (symbol_present || var_present || ratio != 1)
3527     autoinc = false;
3528   else
3529     {
3530       if (has_postinc[mem_mode] && autoinc_offset == 0
3531           && msize == cstep)
3532         autoinc_type = AINC_POST_INC;
3533       else if (has_postdec[mem_mode] && autoinc_offset == 0
3534                && msize == -cstep)
3535         autoinc_type = AINC_POST_DEC;
3536       else if (has_preinc[mem_mode] && autoinc_offset == msize
3537                && msize == cstep)
3538         autoinc_type = AINC_PRE_INC;
3539       else if (has_predec[mem_mode] && autoinc_offset == -msize
3540                && msize == -cstep)
3541         autoinc_type = AINC_PRE_DEC;
3542
3543       if (autoinc_type != AINC_NONE)
3544         autoinc = true;
3545     }
3546
3547   cost = 0;
3548   offset_p = (s_offset != 0
3549               && data->min_offset <= s_offset
3550               && s_offset <= data->max_offset);
3551   ratio_p = (ratio != 1
3552              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3553
3554   if (ratio != 1 && !ratio_p)
3555     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3556
3557   if (s_offset && !offset_p && !symbol_present)
3558     cost += add_cost (speed, address_mode);
3559
3560   if (may_autoinc)
3561     *may_autoinc = autoinc;
3562   if (autoinc)
3563     acost = data->ainc_costs[autoinc_type];
3564   else
3565     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3566   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3567   return new_cost (cost + acost, complexity);
3568 }
3569
3570  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3571     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3572     calculating the operands of EXPR.  Returns true if successful, and returns
3573     the cost in COST.  */
3574
3575 static bool
3576 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3577                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3578 {
3579   comp_cost res;
3580   tree op1 = TREE_OPERAND (expr, 1);
3581   tree cst = TREE_OPERAND (mult, 1);
3582   tree multop = TREE_OPERAND (mult, 0);
3583   int m = exact_log2 (int_cst_value (cst));
3584   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3585   int sa_cost;
3586   bool equal_p = false;
3587
3588   if (!(m >= 0 && m < maxm))
3589     return false;
3590
3591   if (operand_equal_p (op1, mult, 0))
3592     equal_p = true;
3593
3594   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3595              ? shiftadd_cost (speed, mode, m)
3596              : (equal_p
3597                 ? shiftsub1_cost (speed, mode, m)
3598                 : shiftsub0_cost (speed, mode, m)));
3599   res = new_cost (sa_cost, 0);
3600   res = add_costs (res, equal_p ? cost0 : cost1);
3601
3602   STRIP_NOPS (multop);
3603   if (!is_gimple_val (multop))
3604     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3605
3606   *cost = res;
3607   return true;
3608 }
3609
3610 /* Estimates cost of forcing expression EXPR into a variable.  */
3611
3612 static comp_cost
3613 force_expr_to_var_cost (tree expr, bool speed)
3614 {
3615   static bool costs_initialized = false;
3616   static unsigned integer_cost [2];
3617   static unsigned symbol_cost [2];
3618   static unsigned address_cost [2];
3619   tree op0, op1;
3620   comp_cost cost0, cost1, cost;
3621   enum machine_mode mode;
3622
3623   if (!costs_initialized)
3624     {
3625       tree type = build_pointer_type (integer_type_node);
3626       tree var, addr;
3627       rtx x;
3628       int i;
3629
3630       var = create_tmp_var_raw (integer_type_node, "test_var");
3631       TREE_STATIC (var) = 1;
3632       x = produce_memory_decl_rtl (var, NULL);
3633       SET_DECL_RTL (var, x);
3634
3635       addr = build1 (ADDR_EXPR, type, var);
3636
3637
3638       for (i = 0; i < 2; i++)
3639         {
3640           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3641                                                              2000), i);
3642
3643           symbol_cost[i] = computation_cost (addr, i) + 1;
3644
3645           address_cost[i]
3646             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3647           if (dump_file && (dump_flags & TDF_DETAILS))
3648             {
3649               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3650               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3651               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3652               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3653               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3654               fprintf (dump_file, "\n");
3655             }
3656         }
3657
3658       costs_initialized = true;
3659     }
3660
3661   STRIP_NOPS (expr);
3662
3663   if (SSA_VAR_P (expr))
3664     return no_cost;
3665
3666   if (is_gimple_min_invariant (expr))
3667     {
3668       if (TREE_CODE (expr) == INTEGER_CST)
3669         return new_cost (integer_cost [speed], 0);
3670
3671       if (TREE_CODE (expr) == ADDR_EXPR)
3672         {
3673           tree obj = TREE_OPERAND (expr, 0);
3674
3675           if (TREE_CODE (obj) == VAR_DECL
3676               || TREE_CODE (obj) == PARM_DECL
3677               || TREE_CODE (obj) == RESULT_DECL)
3678             return new_cost (symbol_cost [speed], 0);
3679         }
3680
3681       return new_cost (address_cost [speed], 0);
3682     }
3683
3684   switch (TREE_CODE (expr))
3685     {
3686     case POINTER_PLUS_EXPR:
3687     case PLUS_EXPR:
3688     case MINUS_EXPR:
3689     case MULT_EXPR:
3690       op0 = TREE_OPERAND (expr, 0);
3691       op1 = TREE_OPERAND (expr, 1);
3692       STRIP_NOPS (op0);
3693       STRIP_NOPS (op1);
3694       break;
3695
3696     CASE_CONVERT:
3697     case NEGATE_EXPR:
3698       op0 = TREE_OPERAND (expr, 0);
3699       STRIP_NOPS (op0);
3700       op1 = NULL_TREE;
3701       break;
3702
3703     default:
3704       /* Just an arbitrary value, FIXME.  */
3705       return new_cost (target_spill_cost[speed], 0);
3706     }
3707
3708   if (op0 == NULL_TREE
3709       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3710     cost0 = no_cost;
3711   else
3712     cost0 = force_expr_to_var_cost (op0, speed);
3713
3714   if (op1 == NULL_TREE
3715       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3716     cost1 = no_cost;
3717   else
3718     cost1 = force_expr_to_var_cost (op1, speed);
3719
3720   mode = TYPE_MODE (TREE_TYPE (expr));
3721   switch (TREE_CODE (expr))
3722     {
3723     case POINTER_PLUS_EXPR:
3724     case PLUS_EXPR:
3725     case MINUS_EXPR:
3726     case NEGATE_EXPR:
3727       cost = new_cost (add_cost (speed, mode), 0);
3728       if (TREE_CODE (expr) != NEGATE_EXPR)
3729         {
3730           tree mult = NULL_TREE;
3731           comp_cost sa_cost;
3732           if (TREE_CODE (op1) == MULT_EXPR)
3733             mult = op1;
3734           else if (TREE_CODE (op0) == MULT_EXPR)
3735             mult = op0;
3736
3737           if (mult != NULL_TREE
3738               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3739               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3740                                     speed, &sa_cost))
3741             return sa_cost;
3742         }
3743       break;
3744
3745     CASE_CONVERT:
3746       {
3747         tree inner_mode, outer_mode;
3748         outer_mode = TREE_TYPE (expr);
3749         inner_mode = TREE_TYPE (op0);
3750         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3751                                        TYPE_MODE (inner_mode), speed), 0);
3752       }
3753       break;
3754
3755     case MULT_EXPR:
3756       if (cst_and_fits_in_hwi (op0))
3757         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3758                                              mode, speed), 0);
3759       else if (cst_and_fits_in_hwi (op1))
3760         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3761                                              mode, speed), 0);
3762       else
3763         return new_cost (target_spill_cost [speed], 0);
3764       break;
3765
3766     default:
3767       gcc_unreachable ();
3768     }
3769
3770   cost = add_costs (cost, cost0);
3771   cost = add_costs (cost, cost1);
3772
3773   /* Bound the cost by target_spill_cost.  The parts of complicated
3774      computations often are either loop invariant or at least can
3775      be shared between several iv uses, so letting this grow without
3776      limits would not give reasonable results.  */
3777   if (cost.cost > (int) target_spill_cost [speed])
3778     cost.cost = target_spill_cost [speed];
3779
3780   return cost;
3781 }
3782
3783 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3784    invariants the computation depends on.  */
3785
3786 static comp_cost
3787 force_var_cost (struct ivopts_data *data,
3788                 tree expr, bitmap *depends_on)
3789 {
3790   if (depends_on)
3791     {
3792       fd_ivopts_data = data;
3793       walk_tree (&expr, find_depends, depends_on, NULL);
3794     }
3795
3796   return force_expr_to_var_cost (expr, data->speed);
3797 }
3798
3799 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3800    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3801    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3802    invariants the computation depends on.  */
3803
3804 static comp_cost
3805 split_address_cost (struct ivopts_data *data,
3806                     tree addr, bool *symbol_present, bool *var_present,
3807                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3808 {
3809   tree core;
3810   HOST_WIDE_INT bitsize;
3811   HOST_WIDE_INT bitpos;
3812   tree toffset;
3813   enum machine_mode mode;
3814   int unsignedp, volatilep;
3815
3816   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3817                               &unsignedp, &volatilep, false);
3818
3819   if (toffset != 0
3820       || bitpos % BITS_PER_UNIT != 0
3821       || TREE_CODE (core) != VAR_DECL)
3822     {
3823       *symbol_present = false;
3824       *var_present = true;
3825       fd_ivopts_data = data;
3826       walk_tree (&addr, find_depends, depends_on, NULL);
3827       return new_cost (target_spill_cost[data->speed], 0);
3828     }
3829
3830   *offset += bitpos / BITS_PER_UNIT;
3831   if (TREE_STATIC (core)
3832       || DECL_EXTERNAL (core))
3833     {
3834       *symbol_present = true;
3835       *var_present = false;
3836       return no_cost;
3837     }
3838
3839   *symbol_present = false;
3840   *var_present = true;
3841   return no_cost;
3842 }
3843
3844 /* Estimates cost of expressing difference of addresses E1 - E2 as
3845    var + symbol + offset.  The value of offset is added to OFFSET,
3846    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3847    part is missing.  DEPENDS_ON is a set of the invariants the computation
3848    depends on.  */
3849
3850 static comp_cost
3851 ptr_difference_cost (struct ivopts_data *data,
3852                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3853                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3854 {
3855   HOST_WIDE_INT diff = 0;
3856   aff_tree aff_e1, aff_e2;
3857   tree type;
3858
3859   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3860
3861   if (ptr_difference_const (e1, e2, &diff))
3862     {
3863       *offset += diff;
3864       *symbol_present = false;
3865       *var_present = false;
3866       return no_cost;
3867     }
3868
3869   if (integer_zerop (e2))
3870     return split_address_cost (data, TREE_OPERAND (e1, 0),
3871                                symbol_present, var_present, offset, depends_on);
3872
3873   *symbol_present = false;
3874   *var_present = true;
3875
3876   type = signed_type_for (TREE_TYPE (e1));
3877   tree_to_aff_combination (e1, type, &aff_e1);
3878   tree_to_aff_combination (e2, type, &aff_e2);
3879   aff_combination_scale (&aff_e2, -1);
3880   aff_combination_add (&aff_e1, &aff_e2);
3881
3882   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3883 }
3884
3885 /* Estimates cost of expressing difference E1 - E2 as
3886    var + symbol + offset.  The value of offset is added to OFFSET,
3887    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3888    part is missing.  DEPENDS_ON is a set of the invariants the computation
3889    depends on.  */
3890
3891 static comp_cost
3892 difference_cost (struct ivopts_data *data,
3893                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3894                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3895 {
3896   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3897   unsigned HOST_WIDE_INT off1, off2;
3898   aff_tree aff_e1, aff_e2;
3899   tree type;
3900
3901   e1 = strip_offset (e1, &off1);
3902   e2 = strip_offset (e2, &off2);
3903   *offset += off1 - off2;
3904
3905   STRIP_NOPS (e1);
3906   STRIP_NOPS (e2);
3907
3908   if (TREE_CODE (e1) == ADDR_EXPR)
3909     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3910                                 offset, depends_on);
3911   *symbol_present = false;
3912
3913   if (operand_equal_p (e1, e2, 0))
3914     {
3915       *var_present = false;
3916       return no_cost;
3917     }
3918
3919   *var_present = true;
3920
3921   if (integer_zerop (e2))
3922     return force_var_cost (data, e1, depends_on);
3923
3924   if (integer_zerop (e1))
3925     {
3926       comp_cost cost = force_var_cost (data, e2, depends_on);
3927       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3928       return cost;
3929     }
3930
3931   type = signed_type_for (TREE_TYPE (e1));
3932   tree_to_aff_combination (e1, type, &aff_e1);
3933   tree_to_aff_combination (e2, type, &aff_e2);
3934   aff_combination_scale (&aff_e2, -1);
3935   aff_combination_add (&aff_e1, &aff_e2);
3936
3937   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3938 }
3939
3940 /* Returns true if AFF1 and AFF2 are identical.  */
3941
3942 static bool
3943 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3944 {
3945   unsigned i;
3946
3947   if (aff1->n != aff2->n)
3948     return false;
3949
3950   for (i = 0; i < aff1->n; i++)
3951     {
3952       if (aff1->elts[i].coef != aff2->elts[i].coef)
3953         return false;
3954
3955       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3956         return false;
3957     }
3958   return true;
3959 }
3960
3961 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3962
3963 static int
3964 get_expr_id (struct ivopts_data *data, tree expr)
3965 {
3966   struct iv_inv_expr_ent ent;
3967   struct iv_inv_expr_ent **slot;
3968
3969   ent.expr = expr;
3970   ent.hash = iterative_hash_expr (expr, 0);
3971   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3972   if (*slot)
3973     return (*slot)->id;
3974
3975   *slot = XNEW (struct iv_inv_expr_ent);
3976   (*slot)->expr = expr;
3977   (*slot)->hash = ent.hash;
3978   (*slot)->id = data->inv_expr_id++;
3979   return (*slot)->id;
3980 }
3981
3982 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3983    requires a new compiler generated temporary.  Returns -1 otherwise.
3984    ADDRESS_P is a flag indicating if the expression is for address
3985    computation.  */
3986
3987 static int
3988 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3989                             tree cbase, HOST_WIDE_INT ratio,
3990                             bool address_p)
3991 {
3992   aff_tree ubase_aff, cbase_aff;
3993   tree expr, ub, cb;
3994
3995   STRIP_NOPS (ubase);
3996   STRIP_NOPS (cbase);
3997   ub = ubase;
3998   cb = cbase;
3999
4000   if ((TREE_CODE (ubase) == INTEGER_CST)
4001       && (TREE_CODE (cbase) == INTEGER_CST))
4002     return -1;
4003
4004   /* Strips the constant part. */
4005   if (TREE_CODE (ubase) == PLUS_EXPR
4006       || TREE_CODE (ubase) == MINUS_EXPR
4007       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4008     {
4009       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4010         ubase = TREE_OPERAND (ubase, 0);
4011     }
4012
4013   /* Strips the constant part. */
4014   if (TREE_CODE (cbase) == PLUS_EXPR
4015       || TREE_CODE (cbase) == MINUS_EXPR
4016       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4017     {
4018       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4019         cbase = TREE_OPERAND (cbase, 0);
4020     }
4021
4022   if (address_p)
4023     {
4024       if (((TREE_CODE (ubase) == SSA_NAME)
4025            || (TREE_CODE (ubase) == ADDR_EXPR
4026                && is_gimple_min_invariant (ubase)))
4027           && (TREE_CODE (cbase) == INTEGER_CST))
4028         return -1;
4029
4030       if (((TREE_CODE (cbase) == SSA_NAME)
4031            || (TREE_CODE (cbase) == ADDR_EXPR
4032                && is_gimple_min_invariant (cbase)))
4033           && (TREE_CODE (ubase) == INTEGER_CST))
4034         return -1;
4035     }
4036
4037   if (ratio == 1)
4038     {
4039       if (operand_equal_p (ubase, cbase, 0))
4040         return -1;
4041
4042       if (TREE_CODE (ubase) == ADDR_EXPR
4043           && TREE_CODE (cbase) == ADDR_EXPR)
4044         {
4045           tree usym, csym;
4046
4047           usym = TREE_OPERAND (ubase, 0);
4048           csym = TREE_OPERAND (cbase, 0);
4049           if (TREE_CODE (usym) == ARRAY_REF)
4050             {
4051               tree ind = TREE_OPERAND (usym, 1);
4052               if (TREE_CODE (ind) == INTEGER_CST
4053                   && tree_fits_shwi_p (ind)
4054                   && tree_to_shwi (ind) == 0)
4055                 usym = TREE_OPERAND (usym, 0);
4056             }
4057           if (TREE_CODE (csym) == ARRAY_REF)
4058             {
4059               tree ind = TREE_OPERAND (csym, 1);
4060               if (TREE_CODE (ind) == INTEGER_CST
4061                   && tree_fits_shwi_p (ind)
4062                   && tree_to_shwi (ind) == 0)
4063                 csym = TREE_OPERAND (csym, 0);
4064             }
4065           if (operand_equal_p (usym, csym, 0))
4066             return -1;
4067         }
4068       /* Now do more complex comparison  */
4069       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4070       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4071       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4072         return -1;
4073     }
4074
4075   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4076   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4077
4078   aff_combination_scale (&cbase_aff, -1 * ratio);
4079   aff_combination_add (&ubase_aff, &cbase_aff);
4080   expr = aff_combination_to_tree (&ubase_aff);
4081   return get_expr_id (data, expr);
4082 }
4083
4084
4085
4086 /* Determines the cost of the computation by that USE is expressed
4087    from induction variable CAND.  If ADDRESS_P is true, we just need
4088    to create an address from it, otherwise we want to get it into
4089    register.  A set of invariants we depend on is stored in
4090    DEPENDS_ON.  AT is the statement at that the value is computed.
4091    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4092    addressing is likely.  */
4093
4094 static comp_cost
4095 get_computation_cost_at (struct ivopts_data *data,
4096                          struct iv_use *use, struct iv_cand *cand,
4097                          bool address_p, bitmap *depends_on, gimple at,
4098                          bool *can_autoinc,
4099                          int *inv_expr_id)
4100 {
4101   tree ubase = use->iv->base, ustep = use->iv->step;
4102   tree cbase, cstep;
4103   tree utype = TREE_TYPE (ubase), ctype;
4104   unsigned HOST_WIDE_INT cstepi, offset = 0;
4105   HOST_WIDE_INT ratio, aratio;
4106   bool var_present, symbol_present, stmt_is_after_inc;
4107   comp_cost cost;
4108   widest_int rat;
4109   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4110   enum machine_mode mem_mode = (address_p
4111                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4112                                 : VOIDmode);
4113
4114   *depends_on = NULL;
4115
4116   /* Only consider real candidates.  */
4117   if (!cand->iv)
4118     return infinite_cost;
4119
4120   cbase = cand->iv->base;
4121   cstep = cand->iv->step;
4122   ctype = TREE_TYPE (cbase);
4123
4124   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4125     {
4126       /* We do not have a precision to express the values of use.  */
4127       return infinite_cost;
4128     }
4129
4130   if (address_p
4131       || (use->iv->base_object
4132           && cand->iv->base_object
4133           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4134           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4135     {
4136       /* Do not try to express address of an object with computation based
4137          on address of a different object.  This may cause problems in rtl
4138          level alias analysis (that does not expect this to be happening,
4139          as this is illegal in C), and would be unlikely to be useful
4140          anyway.  */
4141       if (use->iv->base_object
4142           && cand->iv->base_object
4143           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4144         return infinite_cost;
4145     }
4146
4147   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4148     {
4149       /* TODO -- add direct handling of this case.  */
4150       goto fallback;
4151     }
4152
4153   /* CSTEPI is removed from the offset in case statement is after the
4154      increment.  If the step is not constant, we use zero instead.
4155      This is a bit imprecise (there is the extra addition), but
4156      redundancy elimination is likely to transform the code so that
4157      it uses value of the variable before increment anyway,
4158      so it is not that much unrealistic.  */
4159   if (cst_and_fits_in_hwi (cstep))
4160     cstepi = int_cst_value (cstep);
4161   else
4162     cstepi = 0;
4163
4164   if (!constant_multiple_of (ustep, cstep, &rat))
4165     return infinite_cost;
4166
4167   if (wi::fits_shwi_p (rat))
4168     ratio = rat.to_shwi ();
4169   else
4170     return infinite_cost;
4171
4172   STRIP_NOPS (cbase);
4173   ctype = TREE_TYPE (cbase);
4174
4175   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4176
4177   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4178      or ratio == 1, it is better to handle this like
4179
4180      ubase - ratio * cbase + ratio * var
4181
4182      (also holds in the case ratio == -1, TODO.  */
4183
4184   if (cst_and_fits_in_hwi (cbase))
4185     {
4186       offset = - ratio * int_cst_value (cbase);
4187       cost = difference_cost (data,
4188                               ubase, build_int_cst (utype, 0),
4189                               &symbol_present, &var_present, &offset,
4190                               depends_on);
4191       cost.cost /= avg_loop_niter (data->current_loop);
4192     }
4193   else if (ratio == 1)
4194     {
4195       tree real_cbase = cbase;
4196
4197       /* Check to see if any adjustment is needed.  */
4198       if (cstepi == 0 && stmt_is_after_inc)
4199         {
4200           aff_tree real_cbase_aff;
4201           aff_tree cstep_aff;
4202
4203           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4204                                    &real_cbase_aff);
4205           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4206
4207           aff_combination_add (&real_cbase_aff, &cstep_aff);
4208           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4209         }
4210
4211       cost = difference_cost (data,
4212                               ubase, real_cbase,
4213                               &symbol_present, &var_present, &offset,
4214                               depends_on);
4215       cost.cost /= avg_loop_niter (data->current_loop);
4216     }
4217   else if (address_p
4218            && !POINTER_TYPE_P (ctype)
4219            && multiplier_allowed_in_address_p
4220                 (ratio, mem_mode,
4221                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4222     {
4223       cbase
4224         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4225       cost = difference_cost (data,
4226                               ubase, cbase,
4227                               &symbol_present, &var_present, &offset,
4228                               depends_on);
4229       cost.cost /= avg_loop_niter (data->current_loop);
4230     }
4231   else
4232     {
4233       cost = force_var_cost (data, cbase, depends_on);
4234       cost = add_costs (cost,
4235                         difference_cost (data,
4236                                          ubase, build_int_cst (utype, 0),
4237                                          &symbol_present, &var_present,
4238                                          &offset, depends_on));
4239       cost.cost /= avg_loop_niter (data->current_loop);
4240       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4241     }
4242
4243   if (inv_expr_id)
4244     {
4245       *inv_expr_id =
4246           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4247       /* Clear depends on.  */
4248       if (*inv_expr_id != -1 && depends_on && *depends_on)
4249         bitmap_clear (*depends_on);
4250     }
4251
4252   /* If we are after the increment, the value of the candidate is higher by
4253      one iteration.  */
4254   if (stmt_is_after_inc)
4255     offset -= ratio * cstepi;
4256
4257   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4258      (symbol/var1/const parts may be omitted).  If we are looking for an
4259      address, find the cost of addressing this.  */
4260   if (address_p)
4261     return add_costs (cost,
4262                       get_address_cost (symbol_present, var_present,
4263                                         offset, ratio, cstepi,
4264                                         mem_mode,
4265                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4266                                         speed, stmt_is_after_inc,
4267                                         can_autoinc));
4268
4269   /* Otherwise estimate the costs for computing the expression.  */
4270   if (!symbol_present && !var_present && !offset)
4271     {
4272       if (ratio != 1)
4273         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4274       return cost;
4275     }
4276
4277   /* Symbol + offset should be compile-time computable so consider that they
4278       are added once to the variable, if present.  */
4279   if (var_present && (symbol_present || offset))
4280     cost.cost += adjust_setup_cost (data,
4281                                     add_cost (speed, TYPE_MODE (ctype)));
4282
4283   /* Having offset does not affect runtime cost in case it is added to
4284      symbol, but it increases complexity.  */
4285   if (offset)
4286     cost.complexity++;
4287
4288   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4289
4290   aratio = ratio > 0 ? ratio : -ratio;
4291   if (aratio != 1)
4292     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4293   return cost;
4294
4295 fallback:
4296   if (can_autoinc)
4297     *can_autoinc = false;
4298
4299   {
4300     /* Just get the expression, expand it and measure the cost.  */
4301     tree comp = get_computation_at (data->current_loop, use, cand, at);
4302
4303     if (!comp)
4304       return infinite_cost;
4305
4306     if (address_p)
4307       comp = build_simple_mem_ref (comp);
4308
4309     return new_cost (computation_cost (comp, speed), 0);
4310   }
4311 }
4312
4313 /* Determines the cost of the computation by that USE is expressed
4314    from induction variable CAND.  If ADDRESS_P is true, we just need
4315    to create an address from it, otherwise we want to get it into
4316    register.  A set of invariants we depend on is stored in
4317    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4318    autoinc addressing is likely.  */
4319
4320 static comp_cost
4321 get_computation_cost (struct ivopts_data *data,
4322                       struct iv_use *use, struct iv_cand *cand,
4323                       bool address_p, bitmap *depends_on,
4324                       bool *can_autoinc, int *inv_expr_id)
4325 {
4326   return get_computation_cost_at (data,
4327                                   use, cand, address_p, depends_on, use->stmt,
4328                                   can_autoinc, inv_expr_id);
4329 }
4330
4331 /* Determines cost of basing replacement of USE on CAND in a generic
4332    expression.  */
4333
4334 static bool
4335 determine_use_iv_cost_generic (struct ivopts_data *data,
4336                                struct iv_use *use, struct iv_cand *cand)
4337 {
4338   bitmap depends_on;
4339   comp_cost cost;
4340   int inv_expr_id = -1;
4341
4342   /* The simple case first -- if we need to express value of the preserved
4343      original biv, the cost is 0.  This also prevents us from counting the
4344      cost of increment twice -- once at this use and once in the cost of
4345      the candidate.  */
4346   if (cand->pos == IP_ORIGINAL
4347       && cand->incremented_at == use->stmt)
4348     {
4349       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4350                        ERROR_MARK, -1);
4351       return true;
4352     }
4353
4354   cost = get_computation_cost (data, use, cand, false, &depends_on,
4355                                NULL, &inv_expr_id);
4356
4357   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4358                    inv_expr_id);
4359
4360   return !infinite_cost_p (cost);
4361 }
4362
4363 /* Determines cost of basing replacement of USE on CAND in an address.  */
4364
4365 static bool
4366 determine_use_iv_cost_address (struct ivopts_data *data,
4367                                struct iv_use *use, struct iv_cand *cand)
4368 {
4369   bitmap depends_on;
4370   bool can_autoinc;
4371   int inv_expr_id = -1;
4372   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4373                                          &can_autoinc, &inv_expr_id);
4374
4375   if (cand->ainc_use == use)
4376     {
4377       if (can_autoinc)
4378         cost.cost -= cand->cost_step;
4379       /* If we generated the candidate solely for exploiting autoincrement
4380          opportunities, and it turns out it can't be used, set the cost to
4381          infinity to make sure we ignore it.  */
4382       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4383         cost = infinite_cost;
4384     }
4385   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4386                    inv_expr_id);
4387
4388   return !infinite_cost_p (cost);
4389 }
4390
4391 /* Computes value of candidate CAND at position AT in iteration NITER, and
4392    stores it to VAL.  */
4393
4394 static void
4395 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4396                aff_tree *val)
4397 {
4398   aff_tree step, delta, nit;
4399   struct iv *iv = cand->iv;
4400   tree type = TREE_TYPE (iv->base);
4401   tree steptype = type;
4402   if (POINTER_TYPE_P (type))
4403     steptype = sizetype;
4404   steptype = unsigned_type_for (type);
4405
4406   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4407   aff_combination_convert (&step, steptype);
4408   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4409   aff_combination_convert (&nit, steptype);
4410   aff_combination_mult (&nit, &step, &delta);
4411   if (stmt_after_increment (loop, cand, at))
4412     aff_combination_add (&delta, &step);
4413
4414   tree_to_aff_combination (iv->base, type, val);
4415   if (!POINTER_TYPE_P (type))
4416     aff_combination_convert (val, steptype);
4417   aff_combination_add (val, &delta);
4418 }
4419
4420 /* Returns period of induction variable iv.  */
4421
4422 static tree
4423 iv_period (struct iv *iv)
4424 {
4425   tree step = iv->step, period, type;
4426   tree pow2div;
4427
4428   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4429
4430   type = unsigned_type_for (TREE_TYPE (step));
4431   /* Period of the iv is lcm (step, type_range)/step -1,
4432      i.e., N*type_range/step - 1. Since type range is power
4433      of two, N == (step >> num_of_ending_zeros_binary (step),
4434      so the final result is
4435
4436        (type_range >> num_of_ending_zeros_binary (step)) - 1
4437
4438   */
4439   pow2div = num_ending_zeros (step);
4440
4441   period = build_low_bits_mask (type,
4442                                 (TYPE_PRECISION (type)
4443                                  - tree_to_uhwi (pow2div)));
4444
4445   return period;
4446 }
4447
4448 /* Returns the comparison operator used when eliminating the iv USE.  */
4449
4450 static enum tree_code
4451 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4452 {
4453   struct loop *loop = data->current_loop;
4454   basic_block ex_bb;
4455   edge exit;
4456
4457   ex_bb = gimple_bb (use->stmt);
4458   exit = EDGE_SUCC (ex_bb, 0);
4459   if (flow_bb_inside_loop_p (loop, exit->dest))
4460     exit = EDGE_SUCC (ex_bb, 1);
4461
4462   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4463 }
4464
4465 static tree
4466 strip_wrap_conserving_type_conversions (tree exp)
4467 {
4468   while (tree_ssa_useless_type_conversion (exp)
4469          && (nowrap_type_p (TREE_TYPE (exp))
4470              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4471     exp = TREE_OPERAND (exp, 0);
4472   return exp;
4473 }
4474
4475 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4476    check for an exact match.  */
4477
4478 static bool
4479 expr_equal_p (tree e, tree what)
4480 {
4481   gimple stmt;
4482   enum tree_code code;
4483
4484   e = strip_wrap_conserving_type_conversions (e);
4485   what = strip_wrap_conserving_type_conversions (what);
4486
4487   code = TREE_CODE (what);
4488   if (TREE_TYPE (e) != TREE_TYPE (what))
4489     return false;
4490
4491   if (operand_equal_p (e, what, 0))
4492     return true;
4493
4494   if (TREE_CODE (e) != SSA_NAME)
4495     return false;
4496
4497   stmt = SSA_NAME_DEF_STMT (e);
4498   if (gimple_code (stmt) != GIMPLE_ASSIGN
4499       || gimple_assign_rhs_code (stmt) != code)
4500     return false;
4501
4502   switch (get_gimple_rhs_class (code))
4503     {
4504     case GIMPLE_BINARY_RHS:
4505       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4506         return false;
4507       /* Fallthru.  */
4508
4509     case GIMPLE_UNARY_RHS:
4510     case GIMPLE_SINGLE_RHS:
4511       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4512     default:
4513       return false;
4514     }
4515 }
4516
4517 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4518    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4519    calculation is performed in non-wrapping type.
4520
4521    TODO: More generally, we could test for the situation that
4522          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4523          This would require knowing the sign of OFFSET.
4524
4525          Also, we only look for the first addition in the computation of BASE.
4526          More complex analysis would be better, but introducing it just for
4527          this optimization seems like an overkill.  */
4528
4529 static bool
4530 difference_cannot_overflow_p (tree base, tree offset)
4531 {
4532   enum tree_code code;
4533   tree e1, e2;
4534
4535   if (!nowrap_type_p (TREE_TYPE (base)))
4536     return false;
4537
4538   base = expand_simple_operations (base);
4539
4540   if (TREE_CODE (base) == SSA_NAME)
4541     {
4542       gimple stmt = SSA_NAME_DEF_STMT (base);
4543
4544       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4545         return false;
4546
4547       code = gimple_assign_rhs_code (stmt);
4548       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4549         return false;
4550
4551       e1 = gimple_assign_rhs1 (stmt);
4552       e2 = gimple_assign_rhs2 (stmt);
4553     }
4554   else
4555     {
4556       code = TREE_CODE (base);
4557       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4558         return false;
4559       e1 = TREE_OPERAND (base, 0);
4560       e2 = TREE_OPERAND (base, 1);
4561     }
4562
4563   /* TODO: deeper inspection may be necessary to prove the equality.  */
4564   switch (code)
4565     {
4566     case PLUS_EXPR:
4567       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4568     case POINTER_PLUS_EXPR:
4569       return expr_equal_p (e2, offset);
4570
4571     default:
4572       return false;
4573     }
4574 }
4575
4576 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4577    comparison with CAND.  NITER describes the number of iterations of
4578    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4579
4580    We aim to handle the following situation:
4581
4582    sometype *base, *p;
4583    int a, b, i;
4584
4585    i = a;
4586    p = p_0 = base + a;
4587
4588    do
4589      {
4590        bla (*p);
4591        p++;
4592        i++;
4593      }
4594    while (i < b);
4595
4596    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4597    We aim to optimize this to
4598
4599    p = p_0 = base + a;
4600    do
4601      {
4602        bla (*p);
4603        p++;
4604      }
4605    while (p < p_0 - a + b);
4606
4607    This preserves the correctness, since the pointer arithmetics does not
4608    overflow.  More precisely:
4609
4610    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4611       overflow in computing it or the values of p.
4612    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4613       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4614
4615 static bool
4616 iv_elimination_compare_lt (struct ivopts_data *data,
4617                            struct iv_cand *cand, enum tree_code *comp_p,
4618                            struct tree_niter_desc *niter)
4619 {
4620   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4621   struct aff_tree nit, tmpa, tmpb;
4622   enum tree_code comp;
4623   HOST_WIDE_INT step;
4624
4625   /* We need to know that the candidate induction variable does not overflow.
4626      While more complex analysis may be used to prove this, for now just
4627      check that the variable appears in the original program and that it
4628      is computed in a type that guarantees no overflows.  */
4629   cand_type = TREE_TYPE (cand->iv->base);
4630   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4631     return false;
4632
4633   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4634      the calculation of the BOUND could overflow, making the comparison
4635      invalid.  */
4636   if (!data->loop_single_exit_p)
4637     return false;
4638
4639   /* We need to be able to decide whether candidate is increasing or decreasing
4640      in order to choose the right comparison operator.  */
4641   if (!cst_and_fits_in_hwi (cand->iv->step))
4642     return false;
4643   step = int_cst_value (cand->iv->step);
4644
4645   /* Check that the number of iterations matches the expected pattern:
4646      a + 1 > b ? 0 : b - a - 1.  */
4647   mbz = niter->may_be_zero;
4648   if (TREE_CODE (mbz) == GT_EXPR)
4649     {
4650       /* Handle a + 1 > b.  */
4651       tree op0 = TREE_OPERAND (mbz, 0);
4652       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4653         {
4654           a = TREE_OPERAND (op0, 0);
4655           b = TREE_OPERAND (mbz, 1);
4656         }
4657       else
4658         return false;
4659     }
4660   else if (TREE_CODE (mbz) == LT_EXPR)
4661     {
4662       tree op1 = TREE_OPERAND (mbz, 1);
4663
4664       /* Handle b < a + 1.  */
4665       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4666         {
4667           a = TREE_OPERAND (op1, 0);
4668           b = TREE_OPERAND (mbz, 0);
4669         }
4670       else
4671         return false;
4672     }
4673   else
4674     return false;
4675
4676   /* Expected number of iterations is B - A - 1.  Check that it matches
4677      the actual number, i.e., that B - A - NITER = 1.  */
4678   tree_to_aff_combination (niter->niter, nit_type, &nit);
4679   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4680   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4681   aff_combination_scale (&nit, -1);
4682   aff_combination_scale (&tmpa, -1);
4683   aff_combination_add (&tmpb, &tmpa);
4684   aff_combination_add (&tmpb, &nit);
4685   if (tmpb.n != 0 || tmpb.offset != 1)
4686     return false;
4687
4688   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4689      overflow.  */
4690   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4691                         cand->iv->step,
4692                         fold_convert (TREE_TYPE (cand->iv->step), a));
4693   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4694     return false;
4695
4696   /* Determine the new comparison operator.  */
4697   comp = step < 0 ? GT_EXPR : LT_EXPR;
4698   if (*comp_p == NE_EXPR)
4699     *comp_p = comp;
4700   else if (*comp_p == EQ_EXPR)
4701     *comp_p = invert_tree_comparison (comp, false);
4702   else
4703     gcc_unreachable ();
4704
4705   return true;
4706 }
4707
4708 /* Check whether it is possible to express the condition in USE by comparison
4709    of candidate CAND.  If so, store the value compared with to BOUND, and the
4710    comparison operator to COMP.  */
4711
4712 static bool
4713 may_eliminate_iv (struct ivopts_data *data,
4714                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4715                   enum tree_code *comp)
4716 {
4717   basic_block ex_bb;
4718   edge exit;
4719   tree period;
4720   struct loop *loop = data->current_loop;
4721   aff_tree bnd;
4722   struct tree_niter_desc *desc = NULL;
4723
4724   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4725     return false;
4726
4727   /* For now works only for exits that dominate the loop latch.
4728      TODO: extend to other conditions inside loop body.  */
4729   ex_bb = gimple_bb (use->stmt);
4730   if (use->stmt != last_stmt (ex_bb)
4731       || gimple_code (use->stmt) != GIMPLE_COND
4732       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4733     return false;
4734
4735   exit = EDGE_SUCC (ex_bb, 0);
4736   if (flow_bb_inside_loop_p (loop, exit->dest))
4737     exit = EDGE_SUCC (ex_bb, 1);
4738   if (flow_bb_inside_loop_p (loop, exit->dest))
4739     return false;
4740
4741   desc = niter_for_exit (data, exit);
4742   if (!desc)
4743     return false;
4744
4745   /* Determine whether we can use the variable to test the exit condition.
4746      This is the case iff the period of the induction variable is greater
4747      than the number of iterations for which the exit condition is true.  */
4748   period = iv_period (cand->iv);
4749
4750   /* If the number of iterations is constant, compare against it directly.  */
4751   if (TREE_CODE (desc->niter) == INTEGER_CST)
4752     {
4753       /* See cand_value_at.  */
4754       if (stmt_after_increment (loop, cand, use->stmt))
4755         {
4756           if (!tree_int_cst_lt (desc->niter, period))
4757             return false;
4758         }
4759       else
4760         {
4761           if (tree_int_cst_lt (period, desc->niter))
4762             return false;
4763         }
4764     }
4765
4766   /* If not, and if this is the only possible exit of the loop, see whether
4767      we can get a conservative estimate on the number of iterations of the
4768      entire loop and compare against that instead.  */
4769   else
4770     {
4771       widest_int period_value, max_niter;
4772
4773       max_niter = desc->max;
4774       if (stmt_after_increment (loop, cand, use->stmt))
4775         max_niter += 1;
4776       period_value = wi::to_widest (period);
4777       if (wi::gtu_p (max_niter, period_value))
4778         {
4779           /* See if we can take advantage of inferred loop bound information.  */
4780           if (data->loop_single_exit_p)
4781             {
4782               if (!max_loop_iterations (loop, &max_niter))
4783                 return false;
4784               /* The loop bound is already adjusted by adding 1.  */
4785               if (wi::gtu_p (max_niter, period_value))
4786                 return false;
4787             }
4788           else
4789             return false;
4790         }
4791     }
4792
4793   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4794
4795   *bound = fold_convert (TREE_TYPE (cand->iv->base),
4796                          aff_combination_to_tree (&bnd));
4797   *comp = iv_elimination_compare (data, use);
4798
4799   /* It is unlikely that computing the number of iterations using division
4800      would be more profitable than keeping the original induction variable.  */
4801   if (expression_expensive_p (*bound))
4802     return false;
4803
4804   /* Sometimes, it is possible to handle the situation that the number of
4805      iterations may be zero unless additional assumtions by using <
4806      instead of != in the exit condition.
4807
4808      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4809            base the exit condition on it.  However, that is often too
4810            expensive.  */
4811   if (!integer_zerop (desc->may_be_zero))
4812     return iv_elimination_compare_lt (data, cand, comp, desc);
4813
4814   return true;
4815 }
4816
4817  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4818     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4819
4820 static int
4821 parm_decl_cost (struct ivopts_data *data, tree bound)
4822 {
4823   tree sbound = bound;
4824   STRIP_NOPS (sbound);
4825
4826   if (TREE_CODE (sbound) == SSA_NAME
4827       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4828       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4829       && data->body_includes_call)
4830     return COSTS_N_INSNS (1);
4831
4832   return 0;
4833 }
4834
4835 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4836
4837 static bool
4838 determine_use_iv_cost_condition (struct ivopts_data *data,
4839                                  struct iv_use *use, struct iv_cand *cand)
4840 {
4841   tree bound = NULL_TREE;
4842   struct iv *cmp_iv;
4843   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4844   comp_cost elim_cost, express_cost, cost, bound_cost;
4845   bool ok;
4846   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4847   tree *control_var, *bound_cst;
4848   enum tree_code comp = ERROR_MARK;
4849
4850   /* Only consider real candidates.  */
4851   if (!cand->iv)
4852     {
4853       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4854                        ERROR_MARK, -1);
4855       return false;
4856     }
4857
4858   /* Try iv elimination.  */
4859   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4860     {
4861       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4862       if (elim_cost.cost == 0)
4863         elim_cost.cost = parm_decl_cost (data, bound);
4864       else if (TREE_CODE (bound) == INTEGER_CST)
4865         elim_cost.cost = 0;
4866       /* If we replace a loop condition 'i < n' with 'p < base + n',
4867          depends_on_elim will have 'base' and 'n' set, which implies
4868          that both 'base' and 'n' will be live during the loop.  More likely,
4869          'base + n' will be loop invariant, resulting in only one live value
4870          during the loop.  So in that case we clear depends_on_elim and set
4871         elim_inv_expr_id instead.  */
4872       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4873         {
4874           elim_inv_expr_id = get_expr_id (data, bound);
4875           bitmap_clear (depends_on_elim);
4876         }
4877       /* The bound is a loop invariant, so it will be only computed
4878          once.  */
4879       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4880     }
4881   else
4882     elim_cost = infinite_cost;
4883
4884   /* Try expressing the original giv.  If it is compared with an invariant,
4885      note that we cannot get rid of it.  */
4886   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4887                               NULL, &cmp_iv);
4888   gcc_assert (ok);
4889
4890   /* When the condition is a comparison of the candidate IV against
4891      zero, prefer this IV.
4892
4893      TODO: The constant that we're subtracting from the cost should
4894      be target-dependent.  This information should be added to the
4895      target costs for each backend.  */
4896   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4897       && integer_zerop (*bound_cst)
4898       && (operand_equal_p (*control_var, cand->var_after, 0)
4899           || operand_equal_p (*control_var, cand->var_before, 0)))
4900     elim_cost.cost -= 1;
4901
4902   express_cost = get_computation_cost (data, use, cand, false,
4903                                        &depends_on_express, NULL,
4904                                        &express_inv_expr_id);
4905   fd_ivopts_data = data;
4906   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4907
4908   /* Count the cost of the original bound as well.  */
4909   bound_cost = force_var_cost (data, *bound_cst, NULL);
4910   if (bound_cost.cost == 0)
4911     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4912   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4913     bound_cost.cost = 0;
4914   express_cost.cost += bound_cost.cost;
4915
4916   /* Choose the better approach, preferring the eliminated IV. */
4917   if (compare_costs (elim_cost, express_cost) <= 0)
4918     {
4919       cost = elim_cost;
4920       depends_on = depends_on_elim;
4921       depends_on_elim = NULL;
4922       inv_expr_id = elim_inv_expr_id;
4923     }
4924   else
4925     {
4926       cost = express_cost;
4927       depends_on = depends_on_express;
4928       depends_on_express = NULL;
4929       bound = NULL_TREE;
4930       comp = ERROR_MARK;
4931       inv_expr_id = express_inv_expr_id;
4932     }
4933
4934   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4935
4936   if (depends_on_elim)
4937     BITMAP_FREE (depends_on_elim);
4938   if (depends_on_express)
4939     BITMAP_FREE (depends_on_express);
4940
4941   return !infinite_cost_p (cost);
4942 }
4943
4944 /* Determines cost of basing replacement of USE on CAND.  Returns false
4945    if USE cannot be based on CAND.  */
4946
4947 static bool
4948 determine_use_iv_cost (struct ivopts_data *data,
4949                        struct iv_use *use, struct iv_cand *cand)
4950 {
4951   switch (use->type)
4952     {
4953     case USE_NONLINEAR_EXPR:
4954       return determine_use_iv_cost_generic (data, use, cand);
4955
4956     case USE_ADDRESS:
4957       return determine_use_iv_cost_address (data, use, cand);
4958
4959     case USE_COMPARE:
4960       return determine_use_iv_cost_condition (data, use, cand);
4961
4962     default:
4963       gcc_unreachable ();
4964     }
4965 }
4966
4967 /* Return true if get_computation_cost indicates that autoincrement is
4968    a possibility for the pair of USE and CAND, false otherwise.  */
4969
4970 static bool
4971 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4972                            struct iv_cand *cand)
4973 {
4974   bitmap depends_on;
4975   bool can_autoinc;
4976   comp_cost cost;
4977
4978   if (use->type != USE_ADDRESS)
4979     return false;
4980
4981   cost = get_computation_cost (data, use, cand, true, &depends_on,
4982                                &can_autoinc, NULL);
4983
4984   BITMAP_FREE (depends_on);
4985
4986   return !infinite_cost_p (cost) && can_autoinc;
4987 }
4988
4989 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4990    use that allows autoincrement, and set their AINC_USE if possible.  */
4991
4992 static void
4993 set_autoinc_for_original_candidates (struct ivopts_data *data)
4994 {
4995   unsigned i, j;
4996
4997   for (i = 0; i < n_iv_cands (data); i++)
4998     {
4999       struct iv_cand *cand = iv_cand (data, i);
5000       struct iv_use *closest_before = NULL;
5001       struct iv_use *closest_after = NULL;
5002       if (cand->pos != IP_ORIGINAL)
5003         continue;
5004
5005       for (j = 0; j < n_iv_uses (data); j++)
5006         {
5007           struct iv_use *use = iv_use (data, j);
5008           unsigned uid = gimple_uid (use->stmt);
5009
5010           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5011             continue;
5012
5013           if (uid < gimple_uid (cand->incremented_at)
5014               && (closest_before == NULL
5015                   || uid > gimple_uid (closest_before->stmt)))
5016             closest_before = use;
5017
5018           if (uid > gimple_uid (cand->incremented_at)
5019               && (closest_after == NULL
5020                   || uid < gimple_uid (closest_after->stmt)))
5021             closest_after = use;
5022         }
5023
5024       if (closest_before != NULL
5025           && autoinc_possible_for_pair (data, closest_before, cand))
5026         cand->ainc_use = closest_before;
5027       else if (closest_after != NULL
5028                && autoinc_possible_for_pair (data, closest_after, cand))
5029         cand->ainc_use = closest_after;
5030     }
5031 }
5032
5033 /* Finds the candidates for the induction variables.  */
5034
5035 static void
5036 find_iv_candidates (struct ivopts_data *data)
5037 {
5038   /* Add commonly used ivs.  */
5039   add_standard_iv_candidates (data);
5040
5041   /* Add old induction variables.  */
5042   add_old_ivs_candidates (data);
5043
5044   /* Add induction variables derived from uses.  */
5045   add_derived_ivs_candidates (data);
5046
5047   set_autoinc_for_original_candidates (data);
5048
5049   /* Record the important candidates.  */
5050   record_important_candidates (data);
5051 }
5052
5053 /* Determines costs of basing the use of the iv on an iv candidate.  */
5054
5055 static void
5056 determine_use_iv_costs (struct ivopts_data *data)
5057 {
5058   unsigned i, j;
5059   struct iv_use *use;
5060   struct iv_cand *cand;
5061   bitmap to_clear = BITMAP_ALLOC (NULL);
5062
5063   alloc_use_cost_map (data);
5064
5065   for (i = 0; i < n_iv_uses (data); i++)
5066     {
5067       use = iv_use (data, i);
5068
5069       if (data->consider_all_candidates)
5070         {
5071           for (j = 0; j < n_iv_cands (data); j++)
5072             {
5073               cand = iv_cand (data, j);
5074               determine_use_iv_cost (data, use, cand);
5075             }
5076         }
5077       else
5078         {
5079           bitmap_iterator bi;
5080
5081           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5082             {
5083               cand = iv_cand (data, j);
5084               if (!determine_use_iv_cost (data, use, cand))
5085                 bitmap_set_bit (to_clear, j);
5086             }
5087
5088           /* Remove the candidates for that the cost is infinite from
5089              the list of related candidates.  */
5090           bitmap_and_compl_into (use->related_cands, to_clear);
5091           bitmap_clear (to_clear);
5092         }
5093     }
5094
5095   BITMAP_FREE (to_clear);
5096
5097   if (dump_file && (dump_flags & TDF_DETAILS))
5098     {
5099       fprintf (dump_file, "Use-candidate costs:\n");
5100
5101       for (i = 0; i < n_iv_uses (data); i++)
5102         {
5103           use = iv_use (data, i);
5104
5105           fprintf (dump_file, "Use %d:\n", i);
5106           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5107           for (j = 0; j < use->n_map_members; j++)
5108             {
5109               if (!use->cost_map[j].cand
5110                   || infinite_cost_p (use->cost_map[j].cost))
5111                 continue;
5112
5113               fprintf (dump_file, "  %d\t%d\t%d\t",
5114                        use->cost_map[j].cand->id,
5115                        use->cost_map[j].cost.cost,
5116                        use->cost_map[j].cost.complexity);
5117               if (use->cost_map[j].depends_on)
5118                 bitmap_print (dump_file,
5119                               use->cost_map[j].depends_on, "","");
5120               if (use->cost_map[j].inv_expr_id != -1)
5121                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5122               fprintf (dump_file, "\n");
5123             }
5124
5125           fprintf (dump_file, "\n");
5126         }
5127       fprintf (dump_file, "\n");
5128     }
5129 }
5130
5131 /* Determines cost of the candidate CAND.  */
5132
5133 static void
5134 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5135 {
5136   comp_cost cost_base;
5137   unsigned cost, cost_step;
5138   tree base;
5139
5140   if (!cand->iv)
5141     {
5142       cand->cost = 0;
5143       return;
5144     }
5145
5146   /* There are two costs associated with the candidate -- its increment
5147      and its initialization.  The second is almost negligible for any loop
5148      that rolls enough, so we take it just very little into account.  */
5149
5150   base = cand->iv->base;
5151   cost_base = force_var_cost (data, base, NULL);
5152   /* It will be exceptional that the iv register happens to be initialized with
5153      the proper value at no cost.  In general, there will at least be a regcopy
5154      or a const set.  */
5155   if (cost_base.cost == 0)
5156     cost_base.cost = COSTS_N_INSNS (1);
5157   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5158
5159   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5160
5161   /* Prefer the original ivs unless we may gain something by replacing it.
5162      The reason is to make debugging simpler; so this is not relevant for
5163      artificial ivs created by other optimization passes.  */
5164   if (cand->pos != IP_ORIGINAL
5165       || !SSA_NAME_VAR (cand->var_before)
5166       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5167     cost++;
5168
5169   /* Prefer not to insert statements into latch unless there are some
5170      already (so that we do not create unnecessary jumps).  */
5171   if (cand->pos == IP_END
5172       && empty_block_p (ip_end_pos (data->current_loop)))
5173     cost++;
5174
5175   cand->cost = cost;
5176   cand->cost_step = cost_step;
5177 }
5178
5179 /* Determines costs of computation of the candidates.  */
5180
5181 static void
5182 determine_iv_costs (struct ivopts_data *data)
5183 {
5184   unsigned i;
5185
5186   if (dump_file && (dump_flags & TDF_DETAILS))
5187     {
5188       fprintf (dump_file, "Candidate costs:\n");
5189       fprintf (dump_file, "  cand\tcost\n");
5190     }
5191
5192   for (i = 0; i < n_iv_cands (data); i++)
5193     {
5194       struct iv_cand *cand = iv_cand (data, i);
5195
5196       determine_iv_cost (data, cand);
5197
5198       if (dump_file && (dump_flags & TDF_DETAILS))
5199         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5200     }
5201
5202   if (dump_file && (dump_flags & TDF_DETAILS))
5203     fprintf (dump_file, "\n");
5204 }
5205
5206 /* Calculates cost for having SIZE induction variables.  */
5207
5208 static unsigned
5209 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5210 {
5211   /* We add size to the cost, so that we prefer eliminating ivs
5212      if possible.  */
5213   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5214                                             data->body_includes_call);
5215 }
5216
5217 /* For each size of the induction variable set determine the penalty.  */
5218
5219 static void
5220 determine_set_costs (struct ivopts_data *data)
5221 {
5222   unsigned j, n;
5223   gimple phi;
5224   gimple_stmt_iterator psi;
5225   tree op;
5226   struct loop *loop = data->current_loop;
5227   bitmap_iterator bi;
5228
5229   if (dump_file && (dump_flags & TDF_DETAILS))
5230     {
5231       fprintf (dump_file, "Global costs:\n");
5232       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5233       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5234       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5235       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5236     }
5237
5238   n = 0;
5239   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5240     {
5241       phi = gsi_stmt (psi);
5242       op = PHI_RESULT (phi);
5243
5244       if (virtual_operand_p (op))
5245         continue;
5246
5247       if (get_iv (data, op))
5248         continue;
5249
5250       n++;
5251     }
5252
5253   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5254     {
5255       struct version_info *info = ver_info (data, j);
5256
5257       if (info->inv_id && info->has_nonlin_use)
5258         n++;
5259     }
5260
5261   data->regs_used = n;
5262   if (dump_file && (dump_flags & TDF_DETAILS))
5263     fprintf (dump_file, "  regs_used %d\n", n);
5264
5265   if (dump_file && (dump_flags & TDF_DETAILS))
5266     {
5267       fprintf (dump_file, "  cost for size:\n");
5268       fprintf (dump_file, "  ivs\tcost\n");
5269       for (j = 0; j <= 2 * target_avail_regs; j++)
5270         fprintf (dump_file, "  %d\t%d\n", j,
5271                  ivopts_global_cost_for_size (data, j));
5272       fprintf (dump_file, "\n");
5273     }
5274 }
5275
5276 /* Returns true if A is a cheaper cost pair than B.  */
5277
5278 static bool
5279 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5280 {
5281   int cmp;
5282
5283   if (!a)
5284     return false;
5285
5286   if (!b)
5287     return true;
5288
5289   cmp = compare_costs (a->cost, b->cost);
5290   if (cmp < 0)
5291     return true;
5292
5293   if (cmp > 0)
5294     return false;
5295
5296   /* In case the costs are the same, prefer the cheaper candidate.  */
5297   if (a->cand->cost < b->cand->cost)
5298     return true;
5299
5300   return false;
5301 }
5302
5303
5304 /* Returns candidate by that USE is expressed in IVS.  */
5305
5306 static struct cost_pair *
5307 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5308 {
5309   return ivs->cand_for_use[use->id];
5310 }
5311
5312 /* Computes the cost field of IVS structure.  */
5313
5314 static void
5315 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5316 {
5317   comp_cost cost = ivs->cand_use_cost;
5318
5319   cost.cost += ivs->cand_cost;
5320
5321   cost.cost += ivopts_global_cost_for_size (data,
5322                                             ivs->n_regs + ivs->num_used_inv_expr);
5323
5324   ivs->cost = cost;
5325 }
5326
5327 /* Remove invariants in set INVS to set IVS.  */
5328
5329 static void
5330 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5331 {
5332   bitmap_iterator bi;
5333   unsigned iid;
5334
5335   if (!invs)
5336     return;
5337
5338   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5339     {
5340       ivs->n_invariant_uses[iid]--;
5341       if (ivs->n_invariant_uses[iid] == 0)
5342         ivs->n_regs--;
5343     }
5344 }
5345
5346 /* Set USE not to be expressed by any candidate in IVS.  */
5347
5348 static void
5349 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5350                  struct iv_use *use)
5351 {
5352   unsigned uid = use->id, cid;
5353   struct cost_pair *cp;
5354
5355   cp = ivs->cand_for_use[uid];
5356   if (!cp)
5357     return;
5358   cid = cp->cand->id;
5359
5360   ivs->bad_uses++;
5361   ivs->cand_for_use[uid] = NULL;
5362   ivs->n_cand_uses[cid]--;
5363
5364   if (ivs->n_cand_uses[cid] == 0)
5365     {
5366       bitmap_clear_bit (ivs->cands, cid);
5367       /* Do not count the pseudocandidates.  */
5368       if (cp->cand->iv)
5369         ivs->n_regs--;
5370       ivs->n_cands--;
5371       ivs->cand_cost -= cp->cand->cost;
5372
5373       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5374     }
5375
5376   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5377
5378   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5379
5380   if (cp->inv_expr_id != -1)
5381     {
5382       ivs->used_inv_expr[cp->inv_expr_id]--;
5383       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5384         ivs->num_used_inv_expr--;
5385     }
5386   iv_ca_recount_cost (data, ivs);
5387 }
5388
5389 /* Add invariants in set INVS to set IVS.  */
5390
5391 static void
5392 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5393 {
5394   bitmap_iterator bi;
5395   unsigned iid;
5396
5397   if (!invs)
5398     return;
5399
5400   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5401     {
5402       ivs->n_invariant_uses[iid]++;
5403       if (ivs->n_invariant_uses[iid] == 1)
5404         ivs->n_regs++;
5405     }
5406 }
5407
5408 /* Set cost pair for USE in set IVS to CP.  */
5409
5410 static void
5411 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5412               struct iv_use *use, struct cost_pair *cp)
5413 {
5414   unsigned uid = use->id, cid;
5415
5416   if (ivs->cand_for_use[uid] == cp)
5417     return;
5418
5419   if (ivs->cand_for_use[uid])
5420     iv_ca_set_no_cp (data, ivs, use);
5421
5422   if (cp)
5423     {
5424       cid = cp->cand->id;
5425
5426       ivs->bad_uses--;
5427       ivs->cand_for_use[uid] = cp;
5428       ivs->n_cand_uses[cid]++;
5429       if (ivs->n_cand_uses[cid] == 1)
5430         {
5431           bitmap_set_bit (ivs->cands, cid);
5432           /* Do not count the pseudocandidates.  */
5433           if (cp->cand->iv)
5434             ivs->n_regs++;
5435           ivs->n_cands++;
5436           ivs->cand_cost += cp->cand->cost;
5437
5438           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5439         }
5440
5441       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5442       iv_ca_set_add_invariants (ivs, cp->depends_on);
5443
5444       if (cp->inv_expr_id != -1)
5445         {
5446           ivs->used_inv_expr[cp->inv_expr_id]++;
5447           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5448             ivs->num_used_inv_expr++;
5449         }
5450       iv_ca_recount_cost (data, ivs);
5451     }
5452 }
5453
5454 /* Extend set IVS by expressing USE by some of the candidates in it
5455    if possible. All important candidates will be considered
5456    if IMPORTANT_CANDIDATES is true.  */
5457
5458 static void
5459 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5460                struct iv_use *use, bool important_candidates)
5461 {
5462   struct cost_pair *best_cp = NULL, *cp;
5463   bitmap_iterator bi;
5464   bitmap cands;
5465   unsigned i;
5466
5467   gcc_assert (ivs->upto >= use->id);
5468
5469   if (ivs->upto == use->id)
5470     {
5471       ivs->upto++;
5472       ivs->bad_uses++;
5473     }
5474
5475   cands = (important_candidates ? data->important_candidates : ivs->cands);
5476   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5477     {
5478       struct iv_cand *cand = iv_cand (data, i);
5479
5480       cp = get_use_iv_cost (data, use, cand);
5481
5482       if (cheaper_cost_pair (cp, best_cp))
5483         best_cp = cp;
5484     }
5485
5486   iv_ca_set_cp (data, ivs, use, best_cp);
5487 }
5488
5489 /* Get cost for assignment IVS.  */
5490
5491 static comp_cost
5492 iv_ca_cost (struct iv_ca *ivs)
5493 {
5494   /* This was a conditional expression but it triggered a bug in
5495      Sun C 5.5.  */
5496   if (ivs->bad_uses)
5497     return infinite_cost;
5498   else
5499     return ivs->cost;
5500 }
5501
5502 /* Returns true if all dependences of CP are among invariants in IVS.  */
5503
5504 static bool
5505 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5506 {
5507   unsigned i;
5508   bitmap_iterator bi;
5509
5510   if (!cp->depends_on)
5511     return true;
5512
5513   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5514     {
5515       if (ivs->n_invariant_uses[i] == 0)
5516         return false;
5517     }
5518
5519   return true;
5520 }
5521
5522 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5523    it before NEXT_CHANGE.  */
5524
5525 static struct iv_ca_delta *
5526 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5527                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5528 {
5529   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5530
5531   change->use = use;
5532   change->old_cp = old_cp;
5533   change->new_cp = new_cp;
5534   change->next_change = next_change;
5535
5536   return change;
5537 }
5538
5539 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5540    are rewritten.  */
5541
5542 static struct iv_ca_delta *
5543 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5544 {
5545   struct iv_ca_delta *last;
5546
5547   if (!l2)
5548     return l1;
5549
5550   if (!l1)
5551     return l2;
5552
5553   for (last = l1; last->next_change; last = last->next_change)
5554     continue;
5555   last->next_change = l2;
5556
5557   return l1;
5558 }
5559
5560 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5561
5562 static struct iv_ca_delta *
5563 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5564 {
5565   struct iv_ca_delta *act, *next, *prev = NULL;
5566   struct cost_pair *tmp;
5567
5568   for (act = delta; act; act = next)
5569     {
5570       next = act->next_change;
5571       act->next_change = prev;
5572       prev = act;
5573
5574       tmp = act->old_cp;
5575       act->old_cp = act->new_cp;
5576       act->new_cp = tmp;
5577     }
5578
5579   return prev;
5580 }
5581
5582 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5583    reverted instead.  */
5584
5585 static void
5586 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5587                     struct iv_ca_delta *delta, bool forward)
5588 {
5589   struct cost_pair *from, *to;
5590   struct iv_ca_delta *act;
5591
5592   if (!forward)
5593     delta = iv_ca_delta_reverse (delta);
5594
5595   for (act = delta; act; act = act->next_change)
5596     {
5597       from = act->old_cp;
5598       to = act->new_cp;
5599       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5600       iv_ca_set_cp (data, ivs, act->use, to);
5601     }
5602
5603   if (!forward)
5604     iv_ca_delta_reverse (delta);
5605 }
5606
5607 /* Returns true if CAND is used in IVS.  */
5608
5609 static bool
5610 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5611 {
5612   return ivs->n_cand_uses[cand->id] > 0;
5613 }
5614
5615 /* Returns number of induction variable candidates in the set IVS.  */
5616
5617 static unsigned
5618 iv_ca_n_cands (struct iv_ca *ivs)
5619 {
5620   return ivs->n_cands;
5621 }
5622
5623 /* Free the list of changes DELTA.  */
5624
5625 static void
5626 iv_ca_delta_free (struct iv_ca_delta **delta)
5627 {
5628   struct iv_ca_delta *act, *next;
5629
5630   for (act = *delta; act; act = next)
5631     {
5632       next = act->next_change;
5633       free (act);
5634     }
5635
5636   *delta = NULL;
5637 }
5638
5639 /* Allocates new iv candidates assignment.  */
5640
5641 static struct iv_ca *
5642 iv_ca_new (struct ivopts_data *data)
5643 {
5644   struct iv_ca *nw = XNEW (struct iv_ca);
5645
5646   nw->upto = 0;
5647   nw->bad_uses = 0;
5648   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5649   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5650   nw->cands = BITMAP_ALLOC (NULL);
5651   nw->n_cands = 0;
5652   nw->n_regs = 0;
5653   nw->cand_use_cost = no_cost;
5654   nw->cand_cost = 0;
5655   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5656   nw->cost = no_cost;
5657   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5658   nw->num_used_inv_expr = 0;
5659
5660   return nw;
5661 }
5662
5663 /* Free memory occupied by the set IVS.  */
5664
5665 static void
5666 iv_ca_free (struct iv_ca **ivs)
5667 {
5668   free ((*ivs)->cand_for_use);
5669   free ((*ivs)->n_cand_uses);
5670   BITMAP_FREE ((*ivs)->cands);
5671   free ((*ivs)->n_invariant_uses);
5672   free ((*ivs)->used_inv_expr);
5673   free (*ivs);
5674   *ivs = NULL;
5675 }
5676
5677 /* Dumps IVS to FILE.  */
5678
5679 static void
5680 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5681 {
5682   const char *pref = "  invariants ";
5683   unsigned i;
5684   comp_cost cost = iv_ca_cost (ivs);
5685
5686   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5687   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5688            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5689   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5690
5691    for (i = 0; i < ivs->upto; i++)
5692     {
5693       struct iv_use *use = iv_use (data, i);
5694       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5695       if (cp)
5696         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5697                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5698       else
5699         fprintf (file, "   use:%d --> ??\n", use->id);
5700     }
5701
5702   for (i = 1; i <= data->max_inv_id; i++)
5703     if (ivs->n_invariant_uses[i])
5704       {
5705         fprintf (file, "%s%d", pref, i);
5706         pref = ", ";
5707       }
5708   fprintf (file, "\n\n");
5709 }
5710
5711 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5712    new set, and store differences in DELTA.  Number of induction variables
5713    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5714    the function will try to find a solution with mimimal iv candidates.  */
5715
5716 static comp_cost
5717 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5718               struct iv_cand *cand, struct iv_ca_delta **delta,
5719               unsigned *n_ivs, bool min_ncand)
5720 {
5721   unsigned i;
5722   comp_cost cost;
5723   struct iv_use *use;
5724   struct cost_pair *old_cp, *new_cp;
5725
5726   *delta = NULL;
5727   for (i = 0; i < ivs->upto; i++)
5728     {
5729       use = iv_use (data, i);
5730       old_cp = iv_ca_cand_for_use (ivs, use);
5731
5732       if (old_cp
5733           && old_cp->cand == cand)
5734         continue;
5735
5736       new_cp = get_use_iv_cost (data, use, cand);
5737       if (!new_cp)
5738         continue;
5739
5740       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5741         continue;
5742
5743       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5744         continue;
5745
5746       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5747     }
5748
5749   iv_ca_delta_commit (data, ivs, *delta, true);
5750   cost = iv_ca_cost (ivs);
5751   if (n_ivs)
5752     *n_ivs = iv_ca_n_cands (ivs);
5753   iv_ca_delta_commit (data, ivs, *delta, false);
5754
5755   return cost;
5756 }
5757
5758 /* Try narrowing set IVS by removing CAND.  Return the cost of
5759    the new set and store the differences in DELTA.  START is
5760    the candidate with which we start narrowing.  */
5761
5762 static comp_cost
5763 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5764               struct iv_cand *cand, struct iv_cand *start,
5765               struct iv_ca_delta **delta)
5766 {
5767   unsigned i, ci;
5768   struct iv_use *use;
5769   struct cost_pair *old_cp, *new_cp, *cp;
5770   bitmap_iterator bi;
5771   struct iv_cand *cnd;
5772   comp_cost cost, best_cost, acost;
5773
5774   *delta = NULL;
5775   for (i = 0; i < n_iv_uses (data); i++)
5776     {
5777       use = iv_use (data, i);
5778
5779       old_cp = iv_ca_cand_for_use (ivs, use);
5780       if (old_cp->cand != cand)
5781         continue;
5782
5783       best_cost = iv_ca_cost (ivs);
5784       /* Start narrowing with START.  */
5785       new_cp = get_use_iv_cost (data, use, start);
5786
5787       if (data->consider_all_candidates)
5788         {
5789           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5790             {
5791               if (ci == cand->id || (start && ci == start->id))
5792                 continue;
5793
5794               cnd = iv_cand (data, ci);
5795
5796               cp = get_use_iv_cost (data, use, cnd);
5797               if (!cp)
5798                 continue;
5799
5800               iv_ca_set_cp (data, ivs, use, cp);
5801               acost = iv_ca_cost (ivs);
5802
5803               if (compare_costs (acost, best_cost) < 0)
5804                 {
5805                   best_cost = acost;
5806                   new_cp = cp;
5807                 }
5808             }
5809         }
5810       else
5811         {
5812           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5813             {
5814               if (ci == cand->id || (start && ci == start->id))
5815                 continue;
5816
5817               cnd = iv_cand (data, ci);
5818
5819               cp = get_use_iv_cost (data, use, cnd);
5820               if (!cp)
5821                 continue;
5822
5823               iv_ca_set_cp (data, ivs, use, cp);
5824               acost = iv_ca_cost (ivs);
5825
5826               if (compare_costs (acost, best_cost) < 0)
5827                 {
5828                   best_cost = acost;
5829                   new_cp = cp;
5830                 }
5831             }
5832         }
5833       /* Restore to old cp for use.  */
5834       iv_ca_set_cp (data, ivs, use, old_cp);
5835
5836       if (!new_cp)
5837         {
5838           iv_ca_delta_free (delta);
5839           return infinite_cost;
5840         }
5841
5842       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5843     }
5844
5845   iv_ca_delta_commit (data, ivs, *delta, true);
5846   cost = iv_ca_cost (ivs);
5847   iv_ca_delta_commit (data, ivs, *delta, false);
5848
5849   return cost;
5850 }
5851
5852 /* Try optimizing the set of candidates IVS by removing candidates different
5853    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5854    differences in DELTA.  */
5855
5856 static comp_cost
5857 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5858              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5859 {
5860   bitmap_iterator bi;
5861   struct iv_ca_delta *act_delta, *best_delta;
5862   unsigned i;
5863   comp_cost best_cost, acost;
5864   struct iv_cand *cand;
5865
5866   best_delta = NULL;
5867   best_cost = iv_ca_cost (ivs);
5868
5869   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5870     {
5871       cand = iv_cand (data, i);
5872
5873       if (cand == except_cand)
5874         continue;
5875
5876       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5877
5878       if (compare_costs (acost, best_cost) < 0)
5879         {
5880           best_cost = acost;
5881           iv_ca_delta_free (&best_delta);
5882           best_delta = act_delta;
5883         }
5884       else
5885         iv_ca_delta_free (&act_delta);
5886     }
5887
5888   if (!best_delta)
5889     {
5890       *delta = NULL;
5891       return best_cost;
5892     }
5893
5894   /* Recurse to possibly remove other unnecessary ivs.  */
5895   iv_ca_delta_commit (data, ivs, best_delta, true);
5896   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5897   iv_ca_delta_commit (data, ivs, best_delta, false);
5898   *delta = iv_ca_delta_join (best_delta, *delta);
5899   return best_cost;
5900 }
5901
5902 /* Tries to extend the sets IVS in the best possible way in order
5903    to express the USE.  If ORIGINALP is true, prefer candidates from
5904    the original set of IVs, otherwise favor important candidates not
5905    based on any memory object.  */
5906
5907 static bool
5908 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5909                   struct iv_use *use, bool originalp)
5910 {
5911   comp_cost best_cost, act_cost;
5912   unsigned i;
5913   bitmap_iterator bi;
5914   struct iv_cand *cand;
5915   struct iv_ca_delta *best_delta = NULL, *act_delta;
5916   struct cost_pair *cp;
5917
5918   iv_ca_add_use (data, ivs, use, false);
5919   best_cost = iv_ca_cost (ivs);
5920
5921   cp = iv_ca_cand_for_use (ivs, use);
5922   if (!cp)
5923     {
5924       ivs->upto--;
5925       ivs->bad_uses--;
5926       iv_ca_add_use (data, ivs, use, true);
5927       best_cost = iv_ca_cost (ivs);
5928       cp = iv_ca_cand_for_use (ivs, use);
5929     }
5930   if (cp)
5931     {
5932       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5933       iv_ca_set_no_cp (data, ivs, use);
5934     }
5935
5936   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5937      first try important candidates not based on any memory object.  Only if
5938      this fails, try the specific ones.  Rationale -- in loops with many
5939      variables the best choice often is to use just one generic biv.  If we
5940      added here many ivs specific to the uses, the optimization algorithm later
5941      would be likely to get stuck in a local minimum, thus causing us to create
5942      too many ivs.  The approach from few ivs to more seems more likely to be
5943      successful -- starting from few ivs, replacing an expensive use by a
5944      specific iv should always be a win.  */
5945   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5946     {
5947       cand = iv_cand (data, i);
5948
5949       if (originalp && cand->pos !=IP_ORIGINAL)
5950         continue;
5951
5952       if (!originalp && cand->iv->base_object != NULL_TREE)
5953         continue;
5954
5955       if (iv_ca_cand_used_p (ivs, cand))
5956         continue;
5957
5958       cp = get_use_iv_cost (data, use, cand);
5959       if (!cp)
5960         continue;
5961
5962       iv_ca_set_cp (data, ivs, use, cp);
5963       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5964                                true);
5965       iv_ca_set_no_cp (data, ivs, use);
5966       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5967
5968       if (compare_costs (act_cost, best_cost) < 0)
5969         {
5970           best_cost = act_cost;
5971
5972           iv_ca_delta_free (&best_delta);
5973           best_delta = act_delta;
5974         }
5975       else
5976         iv_ca_delta_free (&act_delta);
5977     }
5978
5979   if (infinite_cost_p (best_cost))
5980     {
5981       for (i = 0; i < use->n_map_members; i++)
5982         {
5983           cp = use->cost_map + i;
5984           cand = cp->cand;
5985           if (!cand)
5986             continue;
5987
5988           /* Already tried this.  */
5989           if (cand->important)
5990             {
5991               if (originalp && cand->pos == IP_ORIGINAL)
5992                 continue;
5993               if (!originalp && cand->iv->base_object == NULL_TREE)
5994                 continue;
5995             }
5996
5997           if (iv_ca_cand_used_p (ivs, cand))
5998             continue;
5999
6000           act_delta = NULL;
6001           iv_ca_set_cp (data, ivs, use, cp);
6002           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6003           iv_ca_set_no_cp (data, ivs, use);
6004           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6005                                        cp, act_delta);
6006
6007           if (compare_costs (act_cost, best_cost) < 0)
6008             {
6009               best_cost = act_cost;
6010
6011               if (best_delta)
6012                 iv_ca_delta_free (&best_delta);
6013               best_delta = act_delta;
6014             }
6015           else
6016             iv_ca_delta_free (&act_delta);
6017         }
6018     }
6019
6020   iv_ca_delta_commit (data, ivs, best_delta, true);
6021   iv_ca_delta_free (&best_delta);
6022
6023   return !infinite_cost_p (best_cost);
6024 }
6025
6026 /* Finds an initial assignment of candidates to uses.  */
6027
6028 static struct iv_ca *
6029 get_initial_solution (struct ivopts_data *data, bool originalp)
6030 {
6031   struct iv_ca *ivs = iv_ca_new (data);
6032   unsigned i;
6033
6034   for (i = 0; i < n_iv_uses (data); i++)
6035     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6036       {
6037         iv_ca_free (&ivs);
6038         return NULL;
6039       }
6040
6041   return ivs;
6042 }
6043
6044 /* Tries to improve set of induction variables IVS.  */
6045
6046 static bool
6047 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6048 {
6049   unsigned i, n_ivs;
6050   comp_cost acost, best_cost = iv_ca_cost (ivs);
6051   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6052   struct iv_cand *cand;
6053
6054   /* Try extending the set of induction variables by one.  */
6055   for (i = 0; i < n_iv_cands (data); i++)
6056     {
6057       cand = iv_cand (data, i);
6058
6059       if (iv_ca_cand_used_p (ivs, cand))
6060         continue;
6061
6062       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6063       if (!act_delta)
6064         continue;
6065
6066       /* If we successfully added the candidate and the set is small enough,
6067          try optimizing it by removing other candidates.  */
6068       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6069         {
6070           iv_ca_delta_commit (data, ivs, act_delta, true);
6071           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6072           iv_ca_delta_commit (data, ivs, act_delta, false);
6073           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6074         }
6075
6076       if (compare_costs (acost, best_cost) < 0)
6077         {
6078           best_cost = acost;
6079           iv_ca_delta_free (&best_delta);
6080           best_delta = act_delta;
6081         }
6082       else
6083         iv_ca_delta_free (&act_delta);
6084     }
6085
6086   if (!best_delta)
6087     {
6088       /* Try removing the candidates from the set instead.  */
6089       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6090
6091       /* Nothing more we can do.  */
6092       if (!best_delta)
6093         return false;
6094     }
6095
6096   iv_ca_delta_commit (data, ivs, best_delta, true);
6097   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6098   iv_ca_delta_free (&best_delta);
6099   return true;
6100 }
6101
6102 /* Attempts to find the optimal set of induction variables.  We do simple
6103    greedy heuristic -- we try to replace at most one candidate in the selected
6104    solution and remove the unused ivs while this improves the cost.  */
6105
6106 static struct iv_ca *
6107 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6108 {
6109   struct iv_ca *set;
6110
6111   /* Get the initial solution.  */
6112   set = get_initial_solution (data, originalp);
6113   if (!set)
6114     {
6115       if (dump_file && (dump_flags & TDF_DETAILS))
6116         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6117       return NULL;
6118     }
6119
6120   if (dump_file && (dump_flags & TDF_DETAILS))
6121     {
6122       fprintf (dump_file, "Initial set of candidates:\n");
6123       iv_ca_dump (data, dump_file, set);
6124     }
6125
6126   while (try_improve_iv_set (data, set))
6127     {
6128       if (dump_file && (dump_flags & TDF_DETAILS))
6129         {
6130           fprintf (dump_file, "Improved to:\n");
6131           iv_ca_dump (data, dump_file, set);
6132         }
6133     }
6134
6135   return set;
6136 }
6137
6138 static struct iv_ca *
6139 find_optimal_iv_set (struct ivopts_data *data)
6140 {
6141   unsigned i;
6142   struct iv_ca *set, *origset;
6143   struct iv_use *use;
6144   comp_cost cost, origcost;
6145
6146   /* Determine the cost based on a strategy that starts with original IVs,
6147      and try again using a strategy that prefers candidates not based
6148      on any IVs.  */
6149   origset = find_optimal_iv_set_1 (data, true);
6150   set = find_optimal_iv_set_1 (data, false);
6151
6152   if (!origset && !set)
6153     return NULL;
6154
6155   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6156   cost = set ? iv_ca_cost (set) : infinite_cost;
6157
6158   if (dump_file && (dump_flags & TDF_DETAILS))
6159     {
6160       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6161                origcost.cost, origcost.complexity);
6162       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6163                cost.cost, cost.complexity);
6164     }
6165
6166   /* Choose the one with the best cost.  */
6167   if (compare_costs (origcost, cost) <= 0)
6168     {
6169       if (set)
6170         iv_ca_free (&set);
6171       set = origset;
6172     }
6173   else if (origset)
6174     iv_ca_free (&origset);
6175
6176   for (i = 0; i < n_iv_uses (data); i++)
6177     {
6178       use = iv_use (data, i);
6179       use->selected = iv_ca_cand_for_use (set, use)->cand;
6180     }
6181
6182   return set;
6183 }
6184
6185 /* Creates a new induction variable corresponding to CAND.  */
6186
6187 static void
6188 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6189 {
6190   gimple_stmt_iterator incr_pos;
6191   tree base;
6192   bool after = false;
6193
6194   if (!cand->iv)
6195     return;
6196
6197   switch (cand->pos)
6198     {
6199     case IP_NORMAL:
6200       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6201       break;
6202
6203     case IP_END:
6204       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6205       after = true;
6206       break;
6207
6208     case IP_AFTER_USE:
6209       after = true;
6210       /* fall through */
6211     case IP_BEFORE_USE:
6212       incr_pos = gsi_for_stmt (cand->incremented_at);
6213       break;
6214
6215     case IP_ORIGINAL:
6216       /* Mark that the iv is preserved.  */
6217       name_info (data, cand->var_before)->preserve_biv = true;
6218       name_info (data, cand->var_after)->preserve_biv = true;
6219
6220       /* Rewrite the increment so that it uses var_before directly.  */
6221       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6222       return;
6223     }
6224
6225   gimple_add_tmp_var (cand->var_before);
6226
6227   base = unshare_expr (cand->iv->base);
6228
6229   create_iv (base, unshare_expr (cand->iv->step),
6230              cand->var_before, data->current_loop,
6231              &incr_pos, after, &cand->var_before, &cand->var_after);
6232 }
6233
6234 /* Creates new induction variables described in SET.  */
6235
6236 static void
6237 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6238 {
6239   unsigned i;
6240   struct iv_cand *cand;
6241   bitmap_iterator bi;
6242
6243   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6244     {
6245       cand = iv_cand (data, i);
6246       create_new_iv (data, cand);
6247     }
6248
6249   if (dump_file && (dump_flags & TDF_DETAILS))
6250     {
6251       fprintf (dump_file, "\nSelected IV set: \n");
6252       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6253         {
6254           cand = iv_cand (data, i);
6255           dump_cand (dump_file, cand);
6256         }
6257       fprintf (dump_file, "\n");
6258     }
6259 }
6260
6261 /* Rewrites USE (definition of iv used in a nonlinear expression)
6262    using candidate CAND.  */
6263
6264 static void
6265 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6266                             struct iv_use *use, struct iv_cand *cand)
6267 {
6268   tree comp;
6269   tree op, tgt;
6270   gimple ass;
6271   gimple_stmt_iterator bsi;
6272
6273   /* An important special case -- if we are asked to express value of
6274      the original iv by itself, just exit; there is no need to
6275      introduce a new computation (that might also need casting the
6276      variable to unsigned and back).  */
6277   if (cand->pos == IP_ORIGINAL
6278       && cand->incremented_at == use->stmt)
6279     {
6280       enum tree_code stmt_code;
6281
6282       gcc_assert (is_gimple_assign (use->stmt));
6283       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6284
6285       /* Check whether we may leave the computation unchanged.
6286          This is the case only if it does not rely on other
6287          computations in the loop -- otherwise, the computation
6288          we rely upon may be removed in remove_unused_ivs,
6289          thus leading to ICE.  */
6290       stmt_code = gimple_assign_rhs_code (use->stmt);
6291       if (stmt_code == PLUS_EXPR
6292           || stmt_code == MINUS_EXPR
6293           || stmt_code == POINTER_PLUS_EXPR)
6294         {
6295           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6296             op = gimple_assign_rhs2 (use->stmt);
6297           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6298             op = gimple_assign_rhs1 (use->stmt);
6299           else
6300             op = NULL_TREE;
6301         }
6302       else
6303         op = NULL_TREE;
6304
6305       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6306         return;
6307     }
6308
6309   comp = get_computation (data->current_loop, use, cand);
6310   gcc_assert (comp != NULL_TREE);
6311
6312   switch (gimple_code (use->stmt))
6313     {
6314     case GIMPLE_PHI:
6315       tgt = PHI_RESULT (use->stmt);
6316
6317       /* If we should keep the biv, do not replace it.  */
6318       if (name_info (data, tgt)->preserve_biv)
6319         return;
6320
6321       bsi = gsi_after_labels (gimple_bb (use->stmt));
6322       break;
6323
6324     case GIMPLE_ASSIGN:
6325       tgt = gimple_assign_lhs (use->stmt);
6326       bsi = gsi_for_stmt (use->stmt);
6327       break;
6328
6329     default:
6330       gcc_unreachable ();
6331     }
6332
6333   if (!valid_gimple_rhs_p (comp)
6334       || (gimple_code (use->stmt) != GIMPLE_PHI
6335           /* We can't allow re-allocating the stmt as it might be pointed
6336              to still.  */
6337           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6338               >= gimple_num_ops (gsi_stmt (bsi)))))
6339     {
6340       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6341                                        true, GSI_SAME_STMT);
6342       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6343         {
6344           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6345           /* As this isn't a plain copy we have to reset alignment
6346              information.  */
6347           if (SSA_NAME_PTR_INFO (comp))
6348             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6349         }
6350     }
6351
6352   if (gimple_code (use->stmt) == GIMPLE_PHI)
6353     {
6354       ass = gimple_build_assign (tgt, comp);
6355       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6356
6357       bsi = gsi_for_stmt (use->stmt);
6358       remove_phi_node (&bsi, false);
6359     }
6360   else
6361     {
6362       gimple_assign_set_rhs_from_tree (&bsi, comp);
6363       use->stmt = gsi_stmt (bsi);
6364     }
6365 }
6366
6367 /* Performs a peephole optimization to reorder the iv update statement with
6368    a mem ref to enable instruction combining in later phases. The mem ref uses
6369    the iv value before the update, so the reordering transformation requires
6370    adjustment of the offset. CAND is the selected IV_CAND.
6371
6372    Example:
6373
6374    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6375    iv2 = iv1 + 1;
6376
6377    if (t < val)      (1)
6378      goto L;
6379    goto Head;
6380
6381
6382    directly propagating t over to (1) will introduce overlapping live range
6383    thus increase register pressure. This peephole transform it into:
6384
6385
6386    iv2 = iv1 + 1;
6387    t = MEM_REF (base, iv2, 8, 8);
6388    if (t < val)
6389      goto L;
6390    goto Head;
6391 */
6392
6393 static void
6394 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6395 {
6396   tree var_after;
6397   gimple iv_update, stmt;
6398   basic_block bb;
6399   gimple_stmt_iterator gsi, gsi_iv;
6400
6401   if (cand->pos != IP_NORMAL)
6402     return;
6403
6404   var_after = cand->var_after;
6405   iv_update = SSA_NAME_DEF_STMT (var_after);
6406
6407   bb = gimple_bb (iv_update);
6408   gsi = gsi_last_nondebug_bb (bb);
6409   stmt = gsi_stmt (gsi);
6410
6411   /* Only handle conditional statement for now.  */
6412   if (gimple_code (stmt) != GIMPLE_COND)
6413     return;
6414
6415   gsi_prev_nondebug (&gsi);
6416   stmt = gsi_stmt (gsi);
6417   if (stmt != iv_update)
6418     return;
6419
6420   gsi_prev_nondebug (&gsi);
6421   if (gsi_end_p (gsi))
6422     return;
6423
6424   stmt = gsi_stmt (gsi);
6425   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6426     return;
6427
6428   if (stmt != use->stmt)
6429     return;
6430
6431   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6432     return;
6433
6434   if (dump_file && (dump_flags & TDF_DETAILS))
6435     {
6436       fprintf (dump_file, "Reordering \n");
6437       print_gimple_stmt (dump_file, iv_update, 0, 0);
6438       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6439       fprintf (dump_file, "\n");
6440     }
6441
6442   gsi = gsi_for_stmt (use->stmt);
6443   gsi_iv = gsi_for_stmt (iv_update);
6444   gsi_move_before (&gsi_iv, &gsi);
6445
6446   cand->pos = IP_BEFORE_USE;
6447   cand->incremented_at = use->stmt;
6448 }
6449
6450 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6451
6452 static void
6453 rewrite_use_address (struct ivopts_data *data,
6454                      struct iv_use *use, struct iv_cand *cand)
6455 {
6456   aff_tree aff;
6457   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6458   tree base_hint = NULL_TREE;
6459   tree ref, iv;
6460   bool ok;
6461
6462   adjust_iv_update_pos (cand, use);
6463   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6464   gcc_assert (ok);
6465   unshare_aff_combination (&aff);
6466
6467   /* To avoid undefined overflow problems, all IV candidates use unsigned
6468      integer types.  The drawback is that this makes it impossible for
6469      create_mem_ref to distinguish an IV that is based on a memory object
6470      from one that represents simply an offset.
6471
6472      To work around this problem, we pass a hint to create_mem_ref that
6473      indicates which variable (if any) in aff is an IV based on a memory
6474      object.  Note that we only consider the candidate.  If this is not
6475      based on an object, the base of the reference is in some subexpression
6476      of the use -- but these will use pointer types, so they are recognized
6477      by the create_mem_ref heuristics anyway.  */
6478   if (cand->iv->base_object)
6479     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6480
6481   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6482   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6483                         reference_alias_ptr_type (*use->op_p),
6484                         iv, base_hint, data->speed);
6485   copy_ref_info (ref, *use->op_p);
6486   *use->op_p = ref;
6487 }
6488
6489 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6490    candidate CAND.  */
6491
6492 static void
6493 rewrite_use_compare (struct ivopts_data *data,
6494                      struct iv_use *use, struct iv_cand *cand)
6495 {
6496   tree comp, *var_p, op, bound;
6497   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6498   enum tree_code compare;
6499   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6500   bool ok;
6501
6502   bound = cp->value;
6503   if (bound)
6504     {
6505       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6506       tree var_type = TREE_TYPE (var);
6507       gimple_seq stmts;
6508
6509       if (dump_file && (dump_flags & TDF_DETAILS))
6510         {
6511           fprintf (dump_file, "Replacing exit test: ");
6512           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6513         }
6514       compare = cp->comp;
6515       bound = unshare_expr (fold_convert (var_type, bound));
6516       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6517       if (stmts)
6518         gsi_insert_seq_on_edge_immediate (
6519                 loop_preheader_edge (data->current_loop),
6520                 stmts);
6521
6522       gimple_cond_set_lhs (use->stmt, var);
6523       gimple_cond_set_code (use->stmt, compare);
6524       gimple_cond_set_rhs (use->stmt, op);
6525       return;
6526     }
6527
6528   /* The induction variable elimination failed; just express the original
6529      giv.  */
6530   comp = get_computation (data->current_loop, use, cand);
6531   gcc_assert (comp != NULL_TREE);
6532
6533   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6534   gcc_assert (ok);
6535
6536   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6537                                      true, GSI_SAME_STMT);
6538 }
6539
6540 /* Rewrites USE using candidate CAND.  */
6541
6542 static void
6543 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6544 {
6545   switch (use->type)
6546     {
6547       case USE_NONLINEAR_EXPR:
6548         rewrite_use_nonlinear_expr (data, use, cand);
6549         break;
6550
6551       case USE_ADDRESS:
6552         rewrite_use_address (data, use, cand);
6553         break;
6554
6555       case USE_COMPARE:
6556         rewrite_use_compare (data, use, cand);
6557         break;
6558
6559       default:
6560         gcc_unreachable ();
6561     }
6562
6563   update_stmt (use->stmt);
6564 }
6565
6566 /* Rewrite the uses using the selected induction variables.  */
6567
6568 static void
6569 rewrite_uses (struct ivopts_data *data)
6570 {
6571   unsigned i;
6572   struct iv_cand *cand;
6573   struct iv_use *use;
6574
6575   for (i = 0; i < n_iv_uses (data); i++)
6576     {
6577       use = iv_use (data, i);
6578       cand = use->selected;
6579       gcc_assert (cand);
6580
6581       rewrite_use (data, use, cand);
6582     }
6583 }
6584
6585 /* Removes the ivs that are not used after rewriting.  */
6586
6587 static void
6588 remove_unused_ivs (struct ivopts_data *data)
6589 {
6590   unsigned j;
6591   bitmap_iterator bi;
6592   bitmap toremove = BITMAP_ALLOC (NULL);
6593
6594   /* Figure out an order in which to release SSA DEFs so that we don't
6595      release something that we'd have to propagate into a debug stmt
6596      afterwards.  */
6597   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6598     {
6599       struct version_info *info;
6600
6601       info = ver_info (data, j);
6602       if (info->iv
6603           && !integer_zerop (info->iv->step)
6604           && !info->inv_id
6605           && !info->iv->have_use_for
6606           && !info->preserve_biv)
6607         {
6608           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6609
6610           tree def = info->iv->ssa_name;
6611
6612           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6613             {
6614               imm_use_iterator imm_iter;
6615               use_operand_p use_p;
6616               gimple stmt;
6617               int count = 0;
6618
6619               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6620                 {
6621                   if (!gimple_debug_bind_p (stmt))
6622                     continue;
6623
6624                   /* We just want to determine whether to do nothing
6625                      (count == 0), to substitute the computed
6626                      expression into a single use of the SSA DEF by
6627                      itself (count == 1), or to use a debug temp
6628                      because the SSA DEF is used multiple times or as
6629                      part of a larger expression (count > 1). */
6630                   count++;
6631                   if (gimple_debug_bind_get_value (stmt) != def)
6632                     count++;
6633
6634                   if (count > 1)
6635                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6636                 }
6637
6638               if (!count)
6639                 continue;
6640
6641               struct iv_use dummy_use;
6642               struct iv_cand *best_cand = NULL, *cand;
6643               unsigned i, best_pref = 0, cand_pref;
6644
6645               memset (&dummy_use, 0, sizeof (dummy_use));
6646               dummy_use.iv = info->iv;
6647               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6648                 {
6649                   cand = iv_use (data, i)->selected;
6650                   if (cand == best_cand)
6651                     continue;
6652                   cand_pref = operand_equal_p (cand->iv->step,
6653                                                info->iv->step, 0)
6654                     ? 4 : 0;
6655                   cand_pref
6656                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6657                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6658                     ? 2 : 0;
6659                   cand_pref
6660                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6661                     ? 1 : 0;
6662                   if (best_cand == NULL || best_pref < cand_pref)
6663                     {
6664                       best_cand = cand;
6665                       best_pref = cand_pref;
6666                     }
6667                 }
6668
6669               if (!best_cand)
6670                 continue;
6671
6672               tree comp = get_computation_at (data->current_loop,
6673                                               &dummy_use, best_cand,
6674                                               SSA_NAME_DEF_STMT (def));
6675               if (!comp)
6676                 continue;
6677
6678               if (count > 1)
6679                 {
6680                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6681                   DECL_ARTIFICIAL (vexpr) = 1;
6682                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6683                   if (SSA_NAME_VAR (def))
6684                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6685                   else
6686                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6687                   gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6688                   gimple_stmt_iterator gsi;
6689
6690                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6691                     gsi = gsi_after_labels (gimple_bb
6692                                             (SSA_NAME_DEF_STMT (def)));
6693                   else
6694                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6695
6696                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6697                   comp = vexpr;
6698                 }
6699
6700               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6701                 {
6702                   if (!gimple_debug_bind_p (stmt))
6703                     continue;
6704
6705                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6706                     SET_USE (use_p, comp);
6707
6708                   update_stmt (stmt);
6709                 }
6710             }
6711         }
6712     }
6713
6714   release_defs_bitset (toremove);
6715
6716   BITMAP_FREE (toremove);
6717 }
6718
6719 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6720    for hash_map::traverse.  */
6721
6722 bool
6723 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6724 {
6725   free (value);
6726   return true;
6727 }
6728
6729 /* Frees data allocated by the optimization of a single loop.  */
6730
6731 static void
6732 free_loop_data (struct ivopts_data *data)
6733 {
6734   unsigned i, j;
6735   bitmap_iterator bi;
6736   tree obj;
6737
6738   if (data->niters)
6739     {
6740       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6741       delete data->niters;
6742       data->niters = NULL;
6743     }
6744
6745   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6746     {
6747       struct version_info *info;
6748
6749       info = ver_info (data, i);
6750       free (info->iv);
6751       info->iv = NULL;
6752       info->has_nonlin_use = false;
6753       info->preserve_biv = false;
6754       info->inv_id = 0;
6755     }
6756   bitmap_clear (data->relevant);
6757   bitmap_clear (data->important_candidates);
6758
6759   for (i = 0; i < n_iv_uses (data); i++)
6760     {
6761       struct iv_use *use = iv_use (data, i);
6762
6763       free (use->iv);
6764       BITMAP_FREE (use->related_cands);
6765       for (j = 0; j < use->n_map_members; j++)
6766         if (use->cost_map[j].depends_on)
6767           BITMAP_FREE (use->cost_map[j].depends_on);
6768       free (use->cost_map);
6769       free (use);
6770     }
6771   data->iv_uses.truncate (0);
6772
6773   for (i = 0; i < n_iv_cands (data); i++)
6774     {
6775       struct iv_cand *cand = iv_cand (data, i);
6776
6777       free (cand->iv);
6778       if (cand->depends_on)
6779         BITMAP_FREE (cand->depends_on);
6780       free (cand);
6781     }
6782   data->iv_candidates.truncate (0);
6783
6784   if (data->version_info_size < num_ssa_names)
6785     {
6786       data->version_info_size = 2 * num_ssa_names;
6787       free (data->version_info);
6788       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6789     }
6790
6791   data->max_inv_id = 0;
6792
6793   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6794     SET_DECL_RTL (obj, NULL_RTX);
6795
6796   decl_rtl_to_reset.truncate (0);
6797
6798   data->inv_expr_tab->empty ();
6799   data->inv_expr_id = 0;
6800 }
6801
6802 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6803    loop tree.  */
6804
6805 static void
6806 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6807 {
6808   free_loop_data (data);
6809   free (data->version_info);
6810   BITMAP_FREE (data->relevant);
6811   BITMAP_FREE (data->important_candidates);
6812
6813   decl_rtl_to_reset.release ();
6814   data->iv_uses.release ();
6815   data->iv_candidates.release ();
6816   delete data->inv_expr_tab;
6817   data->inv_expr_tab = NULL;
6818 }
6819
6820 /* Returns true if the loop body BODY includes any function calls.  */
6821
6822 static bool
6823 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6824 {
6825   gimple_stmt_iterator gsi;
6826   unsigned i;
6827
6828   for (i = 0; i < num_nodes; i++)
6829     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6830       {
6831         gimple stmt = gsi_stmt (gsi);
6832         if (is_gimple_call (stmt)
6833             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6834           return true;
6835       }
6836   return false;
6837 }
6838
6839 /* Optimizes the LOOP.  Returns true if anything changed.  */
6840
6841 static bool
6842 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6843 {
6844   bool changed = false;
6845   struct iv_ca *iv_ca;
6846   edge exit = single_dom_exit (loop);
6847   basic_block *body;
6848
6849   gcc_assert (!data->niters);
6850   data->current_loop = loop;
6851   data->speed = optimize_loop_for_speed_p (loop);
6852
6853   if (dump_file && (dump_flags & TDF_DETAILS))
6854     {
6855       fprintf (dump_file, "Processing loop %d\n", loop->num);
6856
6857       if (exit)
6858         {
6859           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6860                    exit->src->index, exit->dest->index);
6861           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6862           fprintf (dump_file, "\n");
6863         }
6864
6865       fprintf (dump_file, "\n");
6866     }
6867
6868   body = get_loop_body (loop);
6869   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6870   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6871   free (body);
6872
6873   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6874
6875   /* For each ssa name determines whether it behaves as an induction variable
6876      in some loop.  */
6877   if (!find_induction_variables (data))
6878     goto finish;
6879
6880   /* Finds interesting uses (item 1).  */
6881   find_interesting_uses (data);
6882   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6883     goto finish;
6884
6885   /* Finds candidates for the induction variables (item 2).  */
6886   find_iv_candidates (data);
6887
6888   /* Calculates the costs (item 3, part 1).  */
6889   determine_iv_costs (data);
6890   determine_use_iv_costs (data);
6891   determine_set_costs (data);
6892
6893   /* Find the optimal set of induction variables (item 3, part 2).  */
6894   iv_ca = find_optimal_iv_set (data);
6895   if (!iv_ca)
6896     goto finish;
6897   changed = true;
6898
6899   /* Create the new induction variables (item 4, part 1).  */
6900   create_new_ivs (data, iv_ca);
6901   iv_ca_free (&iv_ca);
6902
6903   /* Rewrite the uses (item 4, part 2).  */
6904   rewrite_uses (data);
6905
6906   /* Remove the ivs that are unused after rewriting.  */
6907   remove_unused_ivs (data);
6908
6909   /* We have changed the structure of induction variables; it might happen
6910      that definitions in the scev database refer to some of them that were
6911      eliminated.  */
6912   scev_reset ();
6913
6914 finish:
6915   free_loop_data (data);
6916
6917   return changed;
6918 }
6919
6920 /* Main entry point.  Optimizes induction variables in loops.  */
6921
6922 void
6923 tree_ssa_iv_optimize (void)
6924 {
6925   struct loop *loop;
6926   struct ivopts_data data;
6927
6928   tree_ssa_iv_optimize_init (&data);
6929
6930   /* Optimize the loops starting with the innermost ones.  */
6931   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6932     {
6933       if (dump_file && (dump_flags & TDF_DETAILS))
6934         flow_loop_dump (loop, dump_file, NULL, 1);
6935
6936       tree_ssa_iv_optimize_loop (&data, loop);
6937     }
6938
6939   tree_ssa_iv_optimize_finalize (&data);
6940 }