gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "pointer-set.h"
  87 #include "hashtab.h"
  88 #include "tree-chrec.h"
  89 #include "tree-scalar-evolution.h"
  90 #include "cfgloop.h"
  91 #include "params.h"
  92 #include "langhooks.h"
  93 #include "tree-affine.h"
  94 #include "target.h"
  95
  96 /* The infinite cost.  */
  97 #define INFTY 10000000
  98
  99 /* The expected number of loop iterations.  TODO -- use profiling instead of
 100    this.  */
 101 #define AVG_LOOP_NITER(LOOP) 5
 102
 103
 104 /* Representation of the induction variable.  */
 105 struct iv
 106 {
 107   tree base;            /* Initial value of the iv.  */
 108   tree base_object;     /* A memory object to that the induction variable points.  */
 109   tree step;            /* Step of the iv (constant only).  */
 110   tree ssa_name;        /* The ssa name with the value.  */
 111   bool biv_p;           /* Is it a biv?  */
 112   bool have_use_for;    /* Do we already have a use for it?  */
 113   unsigned use_id;      /* The identifier in the use if it is the case.  */
 114 };
 115
 116 /* Per-ssa version information (induction variable descriptions, etc.).  */
 117 struct version_info
 118 {
 119   tree name;            /* The ssa name.  */
 120   struct iv *iv;        /* Induction variable description.  */
 121   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 122                            an expression that is not an induction variable.  */
 123   unsigned inv_id;      /* Id of an invariant.  */
 124   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 125 };
 126
 127 /* Types of uses.  */
 128 enum use_type
 129 {
 130   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 131   USE_ADDRESS,          /* Use in an address.  */
 132   USE_COMPARE           /* Use is a compare.  */
 133 };
 134
 135 /* The candidate - cost pair.  */
 136 struct cost_pair
 137 {
 138   struct iv_cand *cand; /* The candidate.  */
 139   unsigned cost;        /* The cost.  */
 140   bitmap depends_on;    /* The list of invariants that have to be
 141                            preserved.  */
 142   tree value;           /* For final value elimination, the expression for
 143                            the final value of the iv.  For iv elimination,
 144                            the new bound to compare with.  */
 145 };
 146
 147 /* Use.  */
 148 struct iv_use
 149 {
 150   unsigned id;          /* The id of the use.  */
 151   enum use_type type;   /* Type of the use.  */
 152   struct iv *iv;        /* The induction variable it is based on.  */
 153   tree stmt;            /* Statement in that it occurs.  */
 154   tree *op_p;           /* The place where it occurs.  */
 155   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 156                            important ones.  */
 157
 158   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 159   struct cost_pair *cost_map;
 160                         /* The costs wrto the iv candidates.  */
 161
 162   struct iv_cand *selected;
 163                         /* The selected candidate.  */
 164 };
 165
 166 /* The position where the iv is computed.  */
 167 enum iv_position
 168 {
 169   IP_NORMAL,            /* At the end, just before the exit condition.  */
 170   IP_END,               /* At the end of the latch block.  */
 171   IP_ORIGINAL           /* The original biv.  */
 172 };
 173
 174 /* The induction variable candidate.  */
 175 struct iv_cand
 176 {
 177   unsigned id;          /* The number of the candidate.  */
 178   bool important;       /* Whether this is an "important" candidate, i.e. such
 179                            that it should be considered by all uses.  */
 180   enum iv_position pos; /* Where it is computed.  */
 181   tree incremented_at;  /* For original biv, the statement where it is
 182                            incremented.  */
 183   tree var_before;      /* The variable used for it before increment.  */
 184   tree var_after;       /* The variable used for it after increment.  */
 185   struct iv *iv;        /* The value of the candidate.  NULL for
 186                            "pseudocandidate" used to indicate the possibility
 187                            to replace the final value of an iv by direct
 188                            computation of the value.  */
 189   unsigned cost;        /* Cost of the candidate.  */
 190   bitmap depends_on;    /* The list of invariants that are used in step of the
 191                            biv.  */
 192 };
 193
 194 /* The data used by the induction variable optimizations.  */
 195
 196 typedef struct iv_use *iv_use_p;
 197 DEF_VEC_P(iv_use_p);
 198 DEF_VEC_ALLOC_P(iv_use_p,heap);
 199
 200 typedef struct iv_cand *iv_cand_p;
 201 DEF_VEC_P(iv_cand_p);
 202 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 203
 204 struct ivopts_data
 205 {
 206   /* The currently optimized loop.  */
 207   struct loop *current_loop;
 208
 209   /* Number of registers used in it.  */
 210   unsigned regs_used;
 211
 212   /* Numbers of iterations for all exits of the current loop.  */
 213   struct pointer_map_t *niters;
 214
 215   /* The size of version_info array allocated.  */
 216   unsigned version_info_size;
 217
 218   /* The array of information for the ssa names.  */
 219   struct version_info *version_info;
 220
 221   /* The bitmap of indices in version_info whose value was changed.  */
 222   bitmap relevant;
 223
 224   /* The maximum invariant id.  */
 225   unsigned max_inv_id;
 226
 227   /* The uses of induction variables.  */
 228   VEC(iv_use_p,heap) *iv_uses;
 229
 230   /* The candidates.  */
 231   VEC(iv_cand_p,heap) *iv_candidates;
 232
 233   /* A bitmap of important candidates.  */
 234   bitmap important_candidates;
 235
 236   /* Whether to consider just related and important candidates when replacing a
 237      use.  */
 238   bool consider_all_candidates;
 239 };
 240
 241 /* An assignment of iv candidates to uses.  */
 242
 243 struct iv_ca
 244 {
 245   /* The number of uses covered by the assignment.  */
 246   unsigned upto;
 247
 248   /* Number of uses that cannot be expressed by the candidates in the set.  */
 249   unsigned bad_uses;
 250
 251   /* Candidate assigned to a use, together with the related costs.  */
 252   struct cost_pair **cand_for_use;
 253
 254   /* Number of times each candidate is used.  */
 255   unsigned *n_cand_uses;
 256
 257   /* The candidates used.  */
 258   bitmap cands;
 259
 260   /* The number of candidates in the set.  */
 261   unsigned n_cands;
 262
 263   /* Total number of registers needed.  */
 264   unsigned n_regs;
 265
 266   /* Total cost of expressing uses.  */
 267   unsigned cand_use_cost;
 268
 269   /* Total cost of candidates.  */
 270   unsigned cand_cost;
 271
 272   /* Number of times each invariant is used.  */
 273   unsigned *n_invariant_uses;
 274
 275   /* Total cost of the assignment.  */
 276   unsigned cost;
 277 };
 278
 279 /* Difference of two iv candidate assignments.  */
 280
 281 struct iv_ca_delta
 282 {
 283   /* Changed use.  */
 284   struct iv_use *use;
 285
 286   /* An old assignment (for rollback purposes).  */
 287   struct cost_pair *old_cp;
 288
 289   /* A new assignment.  */
 290   struct cost_pair *new_cp;
 291
 292   /* Next change in the list.  */
 293   struct iv_ca_delta *next_change;
 294 };
 295
 296 /* Bound on number of candidates below that all candidates are considered.  */
 297
 298 #define CONSIDER_ALL_CANDIDATES_BOUND \
 299   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 300
 301 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 302    optimizing such a loop would help, and it would take ages).  */
 303
 304 #define MAX_CONSIDERED_USES \
 305   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 306
 307 /* If there are at most this number of ivs in the set, try removing unnecessary
 308    ivs from the set always.  */
 309
 310 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 311   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 312
 313 /* The list of trees for that the decl_rtl field must be reset is stored
 314    here.  */
 315
 316 static VEC(tree,heap) *decl_rtl_to_reset;
 317
 318 /* Number of uses recorded in DATA.  */
 319
 320 static inline unsigned
 321 n_iv_uses (struct ivopts_data *data)
 322 {
 323   return VEC_length (iv_use_p, data->iv_uses);
 324 }
 325
 326 /* Ith use recorded in DATA.  */
 327
 328 static inline struct iv_use *
 329 iv_use (struct ivopts_data *data, unsigned i)
 330 {
 331   return VEC_index (iv_use_p, data->iv_uses, i);
 332 }
 333
 334 /* Number of candidates recorded in DATA.  */
 335
 336 static inline unsigned
 337 n_iv_cands (struct ivopts_data *data)
 338 {
 339   return VEC_length (iv_cand_p, data->iv_candidates);
 340 }
 341
 342 /* Ith candidate recorded in DATA.  */
 343
 344 static inline struct iv_cand *
 345 iv_cand (struct ivopts_data *data, unsigned i)
 346 {
 347   return VEC_index (iv_cand_p, data->iv_candidates, i);
 348 }
 349
 350 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 351
 352 edge
 353 single_dom_exit (struct loop *loop)
 354 {
 355   edge exit = single_exit (loop);
 356
 357   if (!exit)
 358     return NULL;
 359
 360   if (!just_once_each_iteration_p (loop, exit->src))
 361     return NULL;
 362
 363   return exit;
 364 }
 365
 366 /* Dumps information about the induction variable IV to FILE.  */
 367
 368 extern void dump_iv (FILE *, struct iv *);
 369 void
 370 dump_iv (FILE *file, struct iv *iv)
 371 {
 372   if (iv->ssa_name)
 373     {
 374       fprintf (file, "ssa name ");
 375       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 376       fprintf (file, "\n");
 377     }
 378
 379   fprintf (file, "  type ");
 380   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 381   fprintf (file, "\n");
 382
 383   if (iv->step)
 384     {
 385       fprintf (file, "  base ");
 386       print_generic_expr (file, iv->base, TDF_SLIM);
 387       fprintf (file, "\n");
 388
 389       fprintf (file, "  step ");
 390       print_generic_expr (file, iv->step, TDF_SLIM);
 391       fprintf (file, "\n");
 392     }
 393   else
 394     {
 395       fprintf (file, "  invariant ");
 396       print_generic_expr (file, iv->base, TDF_SLIM);
 397       fprintf (file, "\n");
 398     }
 399
 400   if (iv->base_object)
 401     {
 402       fprintf (file, "  base object ");
 403       print_generic_expr (file, iv->base_object, TDF_SLIM);
 404       fprintf (file, "\n");
 405     }
 406
 407   if (iv->biv_p)
 408     fprintf (file, "  is a biv\n");
 409 }
 410
 411 /* Dumps information about the USE to FILE.  */
 412
 413 extern void dump_use (FILE *, struct iv_use *);
 414 void
 415 dump_use (FILE *file, struct iv_use *use)
 416 {
 417   fprintf (file, "use %d\n", use->id);
 418
 419   switch (use->type)
 420     {
 421     case USE_NONLINEAR_EXPR:
 422       fprintf (file, "  generic\n");
 423       break;
 424
 425     case USE_ADDRESS:
 426       fprintf (file, "  address\n");
 427       break;
 428
 429     case USE_COMPARE:
 430       fprintf (file, "  compare\n");
 431       break;
 432
 433     default:
 434       gcc_unreachable ();
 435     }
 436
 437   fprintf (file, "  in statement ");
 438   print_generic_expr (file, use->stmt, TDF_SLIM);
 439   fprintf (file, "\n");
 440
 441   fprintf (file, "  at position ");
 442   if (use->op_p)
 443     print_generic_expr (file, *use->op_p, TDF_SLIM);
 444   fprintf (file, "\n");
 445
 446   dump_iv (file, use->iv);
 447
 448   if (use->related_cands)
 449     {
 450       fprintf (file, "  related candidates ");
 451       dump_bitmap (file, use->related_cands);
 452     }
 453 }
 454
 455 /* Dumps information about the uses to FILE.  */
 456
 457 extern void dump_uses (FILE *, struct ivopts_data *);
 458 void
 459 dump_uses (FILE *file, struct ivopts_data *data)
 460 {
 461   unsigned i;
 462   struct iv_use *use;
 463
 464   for (i = 0; i < n_iv_uses (data); i++)
 465     {
 466       use = iv_use (data, i);
 467
 468       dump_use (file, use);
 469       fprintf (file, "\n");
 470     }
 471 }
 472
 473 /* Dumps information about induction variable candidate CAND to FILE.  */
 474
 475 extern void dump_cand (FILE *, struct iv_cand *);
 476 void
 477 dump_cand (FILE *file, struct iv_cand *cand)
 478 {
 479   struct iv *iv = cand->iv;
 480
 481   fprintf (file, "candidate %d%s\n",
 482            cand->id, cand->important ? " (important)" : "");
 483
 484   if (cand->depends_on)
 485     {
 486       fprintf (file, "  depends on ");
 487       dump_bitmap (file, cand->depends_on);
 488     }
 489
 490   if (!iv)
 491     {
 492       fprintf (file, "  final value replacement\n");
 493       return;
 494     }
 495
 496   switch (cand->pos)
 497     {
 498     case IP_NORMAL:
 499       fprintf (file, "  incremented before exit test\n");
 500       break;
 501
 502     case IP_END:
 503       fprintf (file, "  incremented at end\n");
 504       break;
 505
 506     case IP_ORIGINAL:
 507       fprintf (file, "  original biv\n");
 508       break;
 509     }
 510
 511   dump_iv (file, iv);
 512 }
 513
 514 /* Returns the info for ssa version VER.  */
 515
 516 static inline struct version_info *
 517 ver_info (struct ivopts_data *data, unsigned ver)
 518 {
 519   return data->version_info + ver;
 520 }
 521
 522 /* Returns the info for ssa name NAME.  */
 523
 524 static inline struct version_info *
 525 name_info (struct ivopts_data *data, tree name)
 526 {
 527   return ver_info (data, SSA_NAME_VERSION (name));
 528 }
 529
 530 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 531    emitted in LOOP.  */
 532
 533 static bool
 534 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 535 {
 536   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 537
 538   gcc_assert (bb);
 539
 540   if (sbb == loop->latch)
 541     return true;
 542
 543   if (sbb != bb)
 544     return false;
 545
 546   return stmt == last_stmt (bb);
 547 }
 548
 549 /* Returns true if STMT if after the place where the original induction
 550    variable CAND is incremented.  */
 551
 552 static bool
 553 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 554 {
 555   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 556   basic_block stmt_bb = bb_for_stmt (stmt);
 557   block_stmt_iterator bsi;
 558
 559   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 560     return false;
 561
 562   if (stmt_bb != cand_bb)
 563     return true;
 564
 565   /* Scan the block from the end, since the original ivs are usually
 566      incremented at the end of the loop body.  */
 567   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 568     {
 569       if (bsi_stmt (bsi) == cand->incremented_at)
 570         return false;
 571       if (bsi_stmt (bsi) == stmt)
 572         return true;
 573     }
 574 }
 575
 576 /* Returns true if STMT if after the place where the induction variable
 577    CAND is incremented in LOOP.  */
 578
 579 static bool
 580 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 581 {
 582   switch (cand->pos)
 583     {
 584     case IP_END:
 585       return false;
 586
 587     case IP_NORMAL:
 588       return stmt_after_ip_normal_pos (loop, stmt);
 589
 590     case IP_ORIGINAL:
 591       return stmt_after_ip_original_pos (cand, stmt);
 592
 593     default:
 594       gcc_unreachable ();
 595     }
 596 }
 597
 598 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 599
 600 static bool
 601 abnormal_ssa_name_p (tree exp)
 602 {
 603   if (!exp)
 604     return false;
 605
 606   if (TREE_CODE (exp) != SSA_NAME)
 607     return false;
 608
 609   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 610 }
 611
 612 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 613    abnormal phi node.  Callback for for_each_index.  */
 614
 615 static bool
 616 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 617                                   void *data ATTRIBUTE_UNUSED)
 618 {
 619   if (TREE_CODE (base) == ARRAY_REF)
 620     {
 621       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 622         return false;
 623       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 624         return false;
 625     }
 626
 627   return !abnormal_ssa_name_p (*index);
 628 }
 629
 630 /* Returns true if EXPR contains a ssa name that occurs in an
 631    abnormal phi node.  */
 632
 633 bool
 634 contains_abnormal_ssa_name_p (tree expr)
 635 {
 636   enum tree_code code;
 637   enum tree_code_class codeclass;
 638
 639   if (!expr)
 640     return false;
 641
 642   code = TREE_CODE (expr);
 643   codeclass = TREE_CODE_CLASS (code);
 644
 645   if (code == SSA_NAME)
 646     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 647
 648   if (code == INTEGER_CST
 649       || is_gimple_min_invariant (expr))
 650     return false;
 651
 652   if (code == ADDR_EXPR)
 653     return !for_each_index (&TREE_OPERAND (expr, 0),
 654                             idx_contains_abnormal_ssa_name_p,
 655                             NULL);
 656
 657   switch (codeclass)
 658     {
 659     case tcc_binary:
 660     case tcc_comparison:
 661       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 662         return true;
 663
 664       /* Fallthru.  */
 665     case tcc_unary:
 666       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 667         return true;
 668
 669       break;
 670
 671     default:
 672       gcc_unreachable ();
 673     }
 674
 675   return false;
 676 }
 677
 678 /*  Returns tree describing number of iterations determined from
 679     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 680
 681 static tree
 682 niter_for_exit (struct ivopts_data *data, edge exit)
 683 {
 684   struct tree_niter_desc desc;
 685   tree niter;
 686   void **slot;
 687
 688   if (!data->niters)
 689     {
 690       data->niters = pointer_map_create ();
 691       slot = NULL;
 692     }
 693   else
 694     slot = pointer_map_contains (data->niters, exit);
 695
 696   if (!slot)
 697     {
 698       /* Try to determine number of iterations.  We must know it
 699          unconditionally (i.e., without possibility of # of iterations
 700          being zero).  Also, we cannot safely work with ssa names that
 701          appear in phi nodes on abnormal edges, so that we do not create
 702          overlapping life ranges for them (PR 27283).  */
 703       if (number_of_iterations_exit (data->current_loop,
 704                                      exit, &desc, true)
 705           && integer_zerop (desc.may_be_zero)
 706           && !contains_abnormal_ssa_name_p (desc.niter))
 707         niter = desc.niter;
 708       else
 709         niter = NULL_TREE;
 710
 711       *pointer_map_insert (data->niters, exit) = niter;
 712     }
 713   else
 714     niter = (tree) *slot;
 715
 716   return niter;
 717 }
 718
 719 /* Returns tree describing number of iterations determined from
 720    single dominating exit of DATA->current_loop, or NULL if something
 721    goes wrong.  */
 722
 723 static tree
 724 niter_for_single_dom_exit (struct ivopts_data *data)
 725 {
 726   edge exit = single_dom_exit (data->current_loop);
 727
 728   if (!exit)
 729     return NULL;
 730
 731   return niter_for_exit (data, exit);
 732 }
 733
 734 /* Initializes data structures used by the iv optimization pass, stored
 735    in DATA.  */
 736
 737 static void
 738 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 739 {
 740   data->version_info_size = 2 * num_ssa_names;
 741   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 742   data->relevant = BITMAP_ALLOC (NULL);
 743   data->important_candidates = BITMAP_ALLOC (NULL);
 744   data->max_inv_id = 0;
 745   data->niters = NULL;
 746   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 747   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 748   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 749 }
 750
 751 /* Returns a memory object to that EXPR points.  In case we are able to
 752    determine that it does not point to any such object, NULL is returned.  */
 753
 754 static tree
 755 determine_base_object (tree expr)
 756 {
 757   enum tree_code code = TREE_CODE (expr);
 758   tree base, obj, op0, op1;
 759
 760   /* If this is a pointer casted to any type, we need to determine
 761      the base object for the pointer; so handle conversions before
 762      throwing away non-pointer expressions.  */
 763   if (TREE_CODE (expr) == NOP_EXPR
 764       || TREE_CODE (expr) == CONVERT_EXPR)
 765     return determine_base_object (TREE_OPERAND (expr, 0));
 766
 767   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 768     return NULL_TREE;
 769
 770   switch (code)
 771     {
 772     case INTEGER_CST:
 773       return NULL_TREE;
 774
 775     case ADDR_EXPR:
 776       obj = TREE_OPERAND (expr, 0);
 777       base = get_base_address (obj);
 778
 779       if (!base)
 780         return expr;
 781
 782       if (TREE_CODE (base) == INDIRECT_REF)
 783         return determine_base_object (TREE_OPERAND (base, 0));
 784
 785       return fold_convert (ptr_type_node,
 786                            build_fold_addr_expr (base));
 787
 788     case PLUS_EXPR:
 789     case MINUS_EXPR:
 790       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 791       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 792
 793       if (!op1)
 794         return op0;
 795
 796       if (!op0)
 797         return (code == PLUS_EXPR
 798                 ? op1
 799                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 800
 801       return fold_build2 (code, ptr_type_node, op0, op1);
 802
 803     default:
 804       return fold_convert (ptr_type_node, expr);
 805     }
 806 }
 807
 808 /* Allocates an induction variable with given initial value BASE and step STEP
 809    for loop LOOP.  */
 810
 811 static struct iv *
 812 alloc_iv (tree base, tree step)
 813 {
 814   struct iv *iv = XCNEW (struct iv);
 815   gcc_assert (step != NULL_TREE);
 816
 817   iv->base = base;
 818   iv->base_object = determine_base_object (base);
 819   iv->step = step;
 820   iv->biv_p = false;
 821   iv->have_use_for = false;
 822   iv->use_id = 0;
 823   iv->ssa_name = NULL_TREE;
 824
 825   return iv;
 826 }
 827
 828 /* Sets STEP and BASE for induction variable IV.  */
 829
 830 static void
 831 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 832 {
 833   struct version_info *info = name_info (data, iv);
 834
 835   gcc_assert (!info->iv);
 836
 837   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 838   info->iv = alloc_iv (base, step);
 839   info->iv->ssa_name = iv;
 840 }
 841
 842 /* Finds induction variable declaration for VAR.  */
 843
 844 static struct iv *
 845 get_iv (struct ivopts_data *data, tree var)
 846 {
 847   basic_block bb;
 848   tree type = TREE_TYPE (var);
 849
 850   if (!POINTER_TYPE_P (type)
 851       && !INTEGRAL_TYPE_P (type))
 852     return NULL;
 853
 854   if (!name_info (data, var)->iv)
 855     {
 856       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 857
 858       if (!bb
 859           || !flow_bb_inside_loop_p (data->current_loop, bb))
 860         set_iv (data, var, var, build_int_cst (type, 0));
 861     }
 862
 863   return name_info (data, var)->iv;
 864 }
 865
 866 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 867    not define a simple affine biv with nonzero step.  */
 868
 869 static tree
 870 determine_biv_step (tree phi)
 871 {
 872   struct loop *loop = bb_for_stmt (phi)->loop_father;
 873   tree name = PHI_RESULT (phi);
 874   affine_iv iv;
 875
 876   if (!is_gimple_reg (name))
 877     return NULL_TREE;
 878
 879   if (!simple_iv (loop, phi, name, &iv, true))
 880     return NULL_TREE;
 881
 882   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 883 }
 884
 885 /* Finds basic ivs.  */
 886
 887 static bool
 888 find_bivs (struct ivopts_data *data)
 889 {
 890   tree phi, step, type, base;
 891   bool found = false;
 892   struct loop *loop = data->current_loop;
 893
 894   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 895     {
 896       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 897         continue;
 898
 899       step = determine_biv_step (phi);
 900       if (!step)
 901         continue;
 902
 903       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 904       base = expand_simple_operations (base);
 905       if (contains_abnormal_ssa_name_p (base)
 906           || contains_abnormal_ssa_name_p (step))
 907         continue;
 908
 909       type = TREE_TYPE (PHI_RESULT (phi));
 910       base = fold_convert (type, base);
 911       if (step)
 912         step = fold_convert (type, step);
 913
 914       set_iv (data, PHI_RESULT (phi), base, step);
 915       found = true;
 916     }
 917
 918   return found;
 919 }
 920
 921 /* Marks basic ivs.  */
 922
 923 static void
 924 mark_bivs (struct ivopts_data *data)
 925 {
 926   tree phi, var;
 927   struct iv *iv, *incr_iv;
 928   struct loop *loop = data->current_loop;
 929   basic_block incr_bb;
 930
 931   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 932     {
 933       iv = get_iv (data, PHI_RESULT (phi));
 934       if (!iv)
 935         continue;
 936
 937       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 938       incr_iv = get_iv (data, var);
 939       if (!incr_iv)
 940         continue;
 941
 942       /* If the increment is in the subloop, ignore it.  */
 943       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 944       if (incr_bb->loop_father != data->current_loop
 945           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 946         continue;
 947
 948       iv->biv_p = true;
 949       incr_iv->biv_p = true;
 950     }
 951 }
 952
 953 /* Checks whether STMT defines a linear induction variable and stores its
 954    parameters to IV.  */
 955
 956 static bool
 957 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 958 {
 959   tree lhs;
 960   struct loop *loop = data->current_loop;
 961
 962   iv->base = NULL_TREE;
 963   iv->step = NULL_TREE;
 964
 965   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 966     return false;
 967
 968   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
 969   if (TREE_CODE (lhs) != SSA_NAME)
 970     return false;
 971
 972   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
 973     return false;
 974   iv->base = expand_simple_operations (iv->base);
 975
 976   if (contains_abnormal_ssa_name_p (iv->base)
 977       || contains_abnormal_ssa_name_p (iv->step))
 978     return false;
 979
 980   return true;
 981 }
 982
 983 /* Finds general ivs in statement STMT.  */
 984
 985 static void
 986 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
 987 {
 988   affine_iv iv;
 989
 990   if (!find_givs_in_stmt_scev (data, stmt, &iv))
 991     return;
 992
 993   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
 994 }
 995
 996 /* Finds general ivs in basic block BB.  */
 997
 998 static void
 999 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1000 {
1001   block_stmt_iterator bsi;
1002
1003   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1004     find_givs_in_stmt (data, bsi_stmt (bsi));
1005 }
1006
1007 /* Finds general ivs.  */
1008
1009 static void
1010 find_givs (struct ivopts_data *data)
1011 {
1012   struct loop *loop = data->current_loop;
1013   basic_block *body = get_loop_body_in_dom_order (loop);
1014   unsigned i;
1015
1016   for (i = 0; i < loop->num_nodes; i++)
1017     find_givs_in_bb (data, body[i]);
1018   free (body);
1019 }
1020
1021 /* For each ssa name defined in LOOP determines whether it is an induction
1022    variable and if so, its initial value and step.  */
1023
1024 static bool
1025 find_induction_variables (struct ivopts_data *data)
1026 {
1027   unsigned i;
1028   bitmap_iterator bi;
1029
1030   if (!find_bivs (data))
1031     return false;
1032
1033   find_givs (data);
1034   mark_bivs (data);
1035
1036   if (dump_file && (dump_flags & TDF_DETAILS))
1037     {
1038       tree niter = niter_for_single_dom_exit (data);
1039
1040       if (niter)
1041         {
1042           fprintf (dump_file, "  number of iterations ");
1043           print_generic_expr (dump_file, niter, TDF_SLIM);
1044           fprintf (dump_file, "\n\n");
1045         };
1046
1047       fprintf (dump_file, "Induction variables:\n\n");
1048
1049       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1050         {
1051           if (ver_info (data, i)->iv)
1052             dump_iv (dump_file, ver_info (data, i)->iv);
1053         }
1054     }
1055
1056   return true;
1057 }
1058
1059 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1060
1061 static struct iv_use *
1062 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1063             tree stmt, enum use_type use_type)
1064 {
1065   struct iv_use *use = XCNEW (struct iv_use);
1066
1067   use->id = n_iv_uses (data);
1068   use->type = use_type;
1069   use->iv = iv;
1070   use->stmt = stmt;
1071   use->op_p = use_p;
1072   use->related_cands = BITMAP_ALLOC (NULL);
1073
1074   /* To avoid showing ssa name in the dumps, if it was not reset by the
1075      caller.  */
1076   iv->ssa_name = NULL_TREE;
1077
1078   if (dump_file && (dump_flags & TDF_DETAILS))
1079     dump_use (dump_file, use);
1080
1081   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1082
1083   return use;
1084 }
1085
1086 /* Checks whether OP is a loop-level invariant and if so, records it.
1087    NONLINEAR_USE is true if the invariant is used in a way we do not
1088    handle specially.  */
1089
1090 static void
1091 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1092 {
1093   basic_block bb;
1094   struct version_info *info;
1095
1096   if (TREE_CODE (op) != SSA_NAME
1097       || !is_gimple_reg (op))
1098     return;
1099
1100   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1101   if (bb
1102       && flow_bb_inside_loop_p (data->current_loop, bb))
1103     return;
1104
1105   info = name_info (data, op);
1106   info->name = op;
1107   info->has_nonlin_use |= nonlinear_use;
1108   if (!info->inv_id)
1109     info->inv_id = ++data->max_inv_id;
1110   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1111 }
1112
1113 /* Checks whether the use OP is interesting and if so, records it.  */
1114
1115 static struct iv_use *
1116 find_interesting_uses_op (struct ivopts_data *data, tree op)
1117 {
1118   struct iv *iv;
1119   struct iv *civ;
1120   tree stmt;
1121   struct iv_use *use;
1122
1123   if (TREE_CODE (op) != SSA_NAME)
1124     return NULL;
1125
1126   iv = get_iv (data, op);
1127   if (!iv)
1128     return NULL;
1129
1130   if (iv->have_use_for)
1131     {
1132       use = iv_use (data, iv->use_id);
1133
1134       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1135       return use;
1136     }
1137
1138   if (integer_zerop (iv->step))
1139     {
1140       record_invariant (data, op, true);
1141       return NULL;
1142     }
1143   iv->have_use_for = true;
1144
1145   civ = XNEW (struct iv);
1146   *civ = *iv;
1147
1148   stmt = SSA_NAME_DEF_STMT (op);
1149   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1150               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1151
1152   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1153   iv->use_id = use->id;
1154
1155   return use;
1156 }
1157
1158 /* Given a condition *COND_P, checks whether it is a compare of an induction
1159    variable and an invariant.  If this is the case, CONTROL_VAR is set
1160    to location of the iv, BOUND to the location of the invariant,
1161    IV_VAR and IV_BOUND are set to the corresponding induction variable
1162    descriptions, and true is returned.  If this is not the case,
1163    CONTROL_VAR and BOUND are set to the arguments of the condition and
1164    false is returned.  */
1165
1166 static bool
1167 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1168                        tree **control_var, tree **bound,
1169                        struct iv **iv_var, struct iv **iv_bound)
1170 {
1171   /* The nodes returned when COND has just one operand.  Note that you should
1172      not modify anything in BOUND or IV_BOUND because of this.  */
1173   static struct iv const_iv;
1174   static tree zero;
1175   tree cond = *cond_p;
1176   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1177   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1178   bool ret = false;
1179
1180   zero = integer_zero_node;
1181   const_iv.step = integer_zero_node;
1182
1183   if (TREE_CODE (cond) == SSA_NAME)
1184     {
1185       op0 = cond_p;
1186       iv0 = get_iv (data, cond);
1187       ret = (iv0 && !integer_zerop (iv0->step));
1188       goto end;
1189     }
1190
1191   if (!COMPARISON_CLASS_P (cond))
1192     {
1193       op0 = cond_p;
1194       goto end;
1195     }
1196
1197   op0 = &TREE_OPERAND (cond, 0);
1198   op1 = &TREE_OPERAND (cond, 1);
1199   if (TREE_CODE (*op0) == SSA_NAME)
1200     iv0 = get_iv (data, *op0);
1201   if (TREE_CODE (*op1) == SSA_NAME)
1202     iv1 = get_iv (data, *op1);
1203
1204   /* Exactly one of the compared values must be an iv, and the other one must
1205      be an invariant.  */
1206   if (!iv0 || !iv1)
1207     goto end;
1208
1209   if (integer_zerop (iv0->step))
1210     {
1211       /* Control variable may be on the other side.  */
1212       tmp_op = op0; op0 = op1; op1 = tmp_op;
1213       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1214     }
1215   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1216
1217 end:
1218   if (control_var)
1219     *control_var = op0;;
1220   if (iv_var)
1221     *iv_var = iv0;;
1222   if (bound)
1223     *bound = op1;
1224   if (iv_bound)
1225     *iv_bound = iv1;
1226
1227   return ret;
1228 }
1229
1230 /* Checks whether the condition *COND_P in STMT is interesting
1231    and if so, records it.  */
1232
1233 static void
1234 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1235 {
1236   tree *var_p, *bound_p;
1237   struct iv *var_iv, *civ;
1238
1239   if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1240     {
1241       find_interesting_uses_op (data, *var_p);
1242       find_interesting_uses_op (data, *bound_p);
1243       return;
1244     }
1245
1246   civ = XNEW (struct iv);
1247   *civ = *var_iv;
1248   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1249 }
1250
1251 /* Returns true if expression EXPR is obviously invariant in LOOP,
1252    i.e. if all its operands are defined outside of the LOOP.  */
1253
1254 bool
1255 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1256 {
1257   basic_block def_bb;
1258   unsigned i, len;
1259
1260   if (is_gimple_min_invariant (expr))
1261     return true;
1262
1263   if (TREE_CODE (expr) == SSA_NAME)
1264     {
1265       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1266       if (def_bb
1267           && flow_bb_inside_loop_p (loop, def_bb))
1268         return false;
1269
1270       return true;
1271     }
1272
1273   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1274     return false;
1275
1276   len = TREE_OPERAND_LENGTH (expr);
1277   for (i = 0; i < len; i++)
1278     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1279       return false;
1280
1281   return true;
1282 }
1283
1284 /* Cumulates the steps of indices into DATA and replaces their values with the
1285    initial ones.  Returns false when the value of the index cannot be determined.
1286    Callback for for_each_index.  */
1287
1288 struct ifs_ivopts_data
1289 {
1290   struct ivopts_data *ivopts_data;
1291   tree stmt;
1292   tree step;
1293 };
1294
1295 static bool
1296 idx_find_step (tree base, tree *idx, void *data)
1297 {
1298   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1299   struct iv *iv;
1300   tree step, iv_base, iv_step, lbound, off;
1301   struct loop *loop = dta->ivopts_data->current_loop;
1302
1303   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1304       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1305     return false;
1306
1307   /* If base is a component ref, require that the offset of the reference
1308      be invariant.  */
1309   if (TREE_CODE (base) == COMPONENT_REF)
1310     {
1311       off = component_ref_field_offset (base);
1312       return expr_invariant_in_loop_p (loop, off);
1313     }
1314
1315   /* If base is array, first check whether we will be able to move the
1316      reference out of the loop (in order to take its address in strength
1317      reduction).  In order for this to work we need both lower bound
1318      and step to be loop invariants.  */
1319   if (TREE_CODE (base) == ARRAY_REF)
1320     {
1321       step = array_ref_element_size (base);
1322       lbound = array_ref_low_bound (base);
1323
1324       if (!expr_invariant_in_loop_p (loop, step)
1325           || !expr_invariant_in_loop_p (loop, lbound))
1326         return false;
1327     }
1328
1329   if (TREE_CODE (*idx) != SSA_NAME)
1330     return true;
1331
1332   iv = get_iv (dta->ivopts_data, *idx);
1333   if (!iv)
1334     return false;
1335
1336   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1337           *&x[0], which is not folded and does not trigger the
1338           ARRAY_REF path below.  */
1339   *idx = iv->base;
1340
1341   if (integer_zerop (iv->step))
1342     return true;
1343
1344   if (TREE_CODE (base) == ARRAY_REF)
1345     {
1346       step = array_ref_element_size (base);
1347
1348       /* We only handle addresses whose step is an integer constant.  */
1349       if (TREE_CODE (step) != INTEGER_CST)
1350         return false;
1351     }
1352   else
1353     /* The step for pointer arithmetics already is 1 byte.  */
1354     step = build_int_cst (sizetype, 1);
1355
1356   iv_base = iv->base;
1357   iv_step = iv->step;
1358   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1359                             sizetype, &iv_base, &iv_step, dta->stmt,
1360                             false))
1361     {
1362       /* The index might wrap.  */
1363       return false;
1364     }
1365
1366   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1367   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1368
1369   return true;
1370 }
1371
1372 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1373    object is passed to it in DATA.  */
1374
1375 static bool
1376 idx_record_use (tree base, tree *idx,
1377                 void *vdata)
1378 {
1379   struct ivopts_data *data = (struct ivopts_data *) vdata;
1380   find_interesting_uses_op (data, *idx);
1381   if (TREE_CODE (base) == ARRAY_REF)
1382     {
1383       find_interesting_uses_op (data, array_ref_element_size (base));
1384       find_interesting_uses_op (data, array_ref_low_bound (base));
1385     }
1386   return true;
1387 }
1388
1389 /* Returns true if memory reference REF may be unaligned.  */
1390
1391 static bool
1392 may_be_unaligned_p (tree ref)
1393 {
1394   tree base;
1395   tree base_type;
1396   HOST_WIDE_INT bitsize;
1397   HOST_WIDE_INT bitpos;
1398   tree toffset;
1399   enum machine_mode mode;
1400   int unsignedp, volatilep;
1401   unsigned base_align;
1402
1403   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1404      thus they are not misaligned.  */
1405   if (TREE_CODE (ref) == TARGET_MEM_REF)
1406     return false;
1407
1408   /* The test below is basically copy of what expr.c:normal_inner_ref
1409      does to check whether the object must be loaded by parts when
1410      STRICT_ALIGNMENT is true.  */
1411   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1412                               &unsignedp, &volatilep, true);
1413   base_type = TREE_TYPE (base);
1414   base_align = TYPE_ALIGN (base_type);
1415
1416   if (mode != BLKmode
1417       && (base_align < GET_MODE_ALIGNMENT (mode)
1418           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1419           || bitpos % BITS_PER_UNIT != 0))
1420     return true;
1421
1422   return false;
1423 }
1424
1425 /* Return true if EXPR may be non-addressable.   */
1426
1427 static bool
1428 may_be_nonaddressable_p (tree expr)
1429 {
1430   switch (TREE_CODE (expr))
1431     {
1432     case COMPONENT_REF:
1433       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1434              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1435
1436     case ARRAY_REF:
1437     case ARRAY_RANGE_REF:
1438       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1439
1440     case VIEW_CONVERT_EXPR:
1441       /* This kind of view-conversions may wrap non-addressable objects
1442          and make them look addressable.  After some processing the
1443          non-addressability may be uncovered again, causing ADDR_EXPRs
1444          of inappropriate objects to be built.  */
1445       return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1446              && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1447
1448     default:
1449       break;
1450     }
1451
1452   return false;
1453 }
1454
1455 /* Finds addresses in *OP_P inside STMT.  */
1456
1457 static void
1458 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1459 {
1460   tree base = *op_p, step = build_int_cst (sizetype, 0);
1461   struct iv *civ;
1462   struct ifs_ivopts_data ifs_ivopts_data;
1463
1464   /* Do not play with volatile memory references.  A bit too conservative,
1465      perhaps, but safe.  */
1466   if (stmt_ann (stmt)->has_volatile_ops)
1467     goto fail;
1468
1469   /* Ignore bitfields for now.  Not really something terribly complicated
1470      to handle.  TODO.  */
1471   if (TREE_CODE (base) == BIT_FIELD_REF)
1472     goto fail;
1473
1474   if (may_be_nonaddressable_p (base))
1475     goto fail;
1476
1477   if (STRICT_ALIGNMENT
1478       && may_be_unaligned_p (base))
1479     goto fail;
1480
1481   base = unshare_expr (base);
1482
1483   if (TREE_CODE (base) == TARGET_MEM_REF)
1484     {
1485       tree type = build_pointer_type (TREE_TYPE (base));
1486       tree astep;
1487
1488       if (TMR_BASE (base)
1489           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1490         {
1491           civ = get_iv (data, TMR_BASE (base));
1492           if (!civ)
1493             goto fail;
1494
1495           TMR_BASE (base) = civ->base;
1496           step = civ->step;
1497         }
1498       if (TMR_INDEX (base)
1499           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1500         {
1501           civ = get_iv (data, TMR_INDEX (base));
1502           if (!civ)
1503             goto fail;
1504
1505           TMR_INDEX (base) = civ->base;
1506           astep = civ->step;
1507
1508           if (astep)
1509             {
1510               if (TMR_STEP (base))
1511                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1512
1513               step = fold_build2 (PLUS_EXPR, type, step, astep);
1514             }
1515         }
1516
1517       if (integer_zerop (step))
1518         goto fail;
1519       base = tree_mem_ref_addr (type, base);
1520     }
1521   else
1522     {
1523       ifs_ivopts_data.ivopts_data = data;
1524       ifs_ivopts_data.stmt = stmt;
1525       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1526       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1527           || integer_zerop (ifs_ivopts_data.step))
1528         goto fail;
1529       step = ifs_ivopts_data.step;
1530
1531       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1532       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1533
1534       base = build_fold_addr_expr (base);
1535
1536       /* Substituting bases of IVs into the base expression might
1537          have caused folding opportunities.  */
1538       if (TREE_CODE (base) == ADDR_EXPR)
1539         {
1540           tree *ref = &TREE_OPERAND (base, 0);
1541           while (handled_component_p (*ref))
1542             ref = &TREE_OPERAND (*ref, 0);
1543           if (TREE_CODE (*ref) == INDIRECT_REF)
1544             *ref = fold_indirect_ref (*ref);
1545         }
1546     }
1547
1548   civ = alloc_iv (base, step);
1549   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1550   return;
1551
1552 fail:
1553   for_each_index (op_p, idx_record_use, data);
1554 }
1555
1556 /* Finds and records invariants used in STMT.  */
1557
1558 static void
1559 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1560 {
1561   ssa_op_iter iter;
1562   use_operand_p use_p;
1563   tree op;
1564
1565   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1566     {
1567       op = USE_FROM_PTR (use_p);
1568       record_invariant (data, op, false);
1569     }
1570 }
1571
1572 /* Finds interesting uses of induction variables in the statement STMT.  */
1573
1574 static void
1575 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1576 {
1577   struct iv *iv;
1578   tree op, lhs, rhs;
1579   ssa_op_iter iter;
1580   use_operand_p use_p;
1581
1582   find_invariants_stmt (data, stmt);
1583
1584   if (TREE_CODE (stmt) == COND_EXPR)
1585     {
1586       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1587       return;
1588     }
1589
1590   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1591     {
1592       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1593       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1594
1595       if (TREE_CODE (lhs) == SSA_NAME)
1596         {
1597           /* If the statement defines an induction variable, the uses are not
1598              interesting by themselves.  */
1599
1600           iv = get_iv (data, lhs);
1601
1602           if (iv && !integer_zerop (iv->step))
1603             return;
1604         }
1605
1606       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1607         {
1608         case tcc_comparison:
1609           find_interesting_uses_cond (data, stmt,
1610                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1611           return;
1612
1613         case tcc_reference:
1614           find_interesting_uses_address (data, stmt,
1615                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1616           if (REFERENCE_CLASS_P (lhs))
1617             find_interesting_uses_address (data, stmt,
1618                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1619           return;
1620
1621         default: ;
1622         }
1623
1624       if (REFERENCE_CLASS_P (lhs)
1625           && is_gimple_val (rhs))
1626         {
1627           find_interesting_uses_address (data, stmt,
1628                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1629           find_interesting_uses_op (data, rhs);
1630           return;
1631         }
1632
1633       /* TODO -- we should also handle address uses of type
1634
1635          memory = call (whatever);
1636
1637          and
1638
1639          call (memory).  */
1640     }
1641
1642   if (TREE_CODE (stmt) == PHI_NODE
1643       && bb_for_stmt (stmt) == data->current_loop->header)
1644     {
1645       lhs = PHI_RESULT (stmt);
1646       iv = get_iv (data, lhs);
1647
1648       if (iv && !integer_zerop (iv->step))
1649         return;
1650     }
1651
1652   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1653     {
1654       op = USE_FROM_PTR (use_p);
1655
1656       if (TREE_CODE (op) != SSA_NAME)
1657         continue;
1658
1659       iv = get_iv (data, op);
1660       if (!iv)
1661         continue;
1662
1663       find_interesting_uses_op (data, op);
1664     }
1665 }
1666
1667 /* Finds interesting uses of induction variables outside of loops
1668    on loop exit edge EXIT.  */
1669
1670 static void
1671 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1672 {
1673   tree phi, def;
1674
1675   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1676     {
1677       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1678       if (is_gimple_reg (def))
1679         find_interesting_uses_op (data, def);
1680     }
1681 }
1682
1683 /* Finds uses of the induction variables that are interesting.  */
1684
1685 static void
1686 find_interesting_uses (struct ivopts_data *data)
1687 {
1688   basic_block bb;
1689   block_stmt_iterator bsi;
1690   tree phi;
1691   basic_block *body = get_loop_body (data->current_loop);
1692   unsigned i;
1693   struct version_info *info;
1694   edge e;
1695
1696   if (dump_file && (dump_flags & TDF_DETAILS))
1697     fprintf (dump_file, "Uses:\n\n");
1698
1699   for (i = 0; i < data->current_loop->num_nodes; i++)
1700     {
1701       edge_iterator ei;
1702       bb = body[i];
1703
1704       FOR_EACH_EDGE (e, ei, bb->succs)
1705         if (e->dest != EXIT_BLOCK_PTR
1706             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1707           find_interesting_uses_outside (data, e);
1708
1709       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1710         find_interesting_uses_stmt (data, phi);
1711       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1712         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1713     }
1714
1715   if (dump_file && (dump_flags & TDF_DETAILS))
1716     {
1717       bitmap_iterator bi;
1718
1719       fprintf (dump_file, "\n");
1720
1721       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1722         {
1723           info = ver_info (data, i);
1724           if (info->inv_id)
1725             {
1726               fprintf (dump_file, "  ");
1727               print_generic_expr (dump_file, info->name, TDF_SLIM);
1728               fprintf (dump_file, " is invariant (%d)%s\n",
1729                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1730             }
1731         }
1732
1733       fprintf (dump_file, "\n");
1734     }
1735
1736   free (body);
1737 }
1738
1739 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1740    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1741    we are at the top-level of the processed address.  */
1742
1743 static tree
1744 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1745                 unsigned HOST_WIDE_INT *offset)
1746 {
1747   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1748   enum tree_code code;
1749   tree type, orig_type = TREE_TYPE (expr);
1750   unsigned HOST_WIDE_INT off0, off1, st;
1751   tree orig_expr = expr;
1752
1753   STRIP_NOPS (expr);
1754
1755   type = TREE_TYPE (expr);
1756   code = TREE_CODE (expr);
1757   *offset = 0;
1758
1759   switch (code)
1760     {
1761     case INTEGER_CST:
1762       if (!cst_and_fits_in_hwi (expr)
1763           || integer_zerop (expr))
1764         return orig_expr;
1765
1766       *offset = int_cst_value (expr);
1767       return build_int_cst (orig_type, 0);
1768
1769     case PLUS_EXPR:
1770     case MINUS_EXPR:
1771       op0 = TREE_OPERAND (expr, 0);
1772       op1 = TREE_OPERAND (expr, 1);
1773
1774       op0 = strip_offset_1 (op0, false, false, &off0);
1775       op1 = strip_offset_1 (op1, false, false, &off1);
1776
1777       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1778       if (op0 == TREE_OPERAND (expr, 0)
1779           && op1 == TREE_OPERAND (expr, 1))
1780         return orig_expr;
1781
1782       if (integer_zerop (op1))
1783         expr = op0;
1784       else if (integer_zerop (op0))
1785         {
1786           if (code == PLUS_EXPR)
1787             expr = op1;
1788           else
1789             expr = fold_build1 (NEGATE_EXPR, type, op1);
1790         }
1791       else
1792         expr = fold_build2 (code, type, op0, op1);
1793
1794       return fold_convert (orig_type, expr);
1795
1796     case ARRAY_REF:
1797       if (!inside_addr)
1798         return orig_expr;
1799
1800       step = array_ref_element_size (expr);
1801       if (!cst_and_fits_in_hwi (step))
1802         break;
1803
1804       st = int_cst_value (step);
1805       op1 = TREE_OPERAND (expr, 1);
1806       op1 = strip_offset_1 (op1, false, false, &off1);
1807       *offset = off1 * st;
1808
1809       if (top_compref
1810           && integer_zerop (op1))
1811         {
1812           /* Strip the component reference completely.  */
1813           op0 = TREE_OPERAND (expr, 0);
1814           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1815           *offset += off0;
1816           return op0;
1817         }
1818       break;
1819
1820     case COMPONENT_REF:
1821       if (!inside_addr)
1822         return orig_expr;
1823
1824       tmp = component_ref_field_offset (expr);
1825       if (top_compref
1826           && cst_and_fits_in_hwi (tmp))
1827         {
1828           /* Strip the component reference completely.  */
1829           op0 = TREE_OPERAND (expr, 0);
1830           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1831           *offset = off0 + int_cst_value (tmp);
1832           return op0;
1833         }
1834       break;
1835
1836     case ADDR_EXPR:
1837       op0 = TREE_OPERAND (expr, 0);
1838       op0 = strip_offset_1 (op0, true, true, &off0);
1839       *offset += off0;
1840
1841       if (op0 == TREE_OPERAND (expr, 0))
1842         return orig_expr;
1843
1844       expr = build_fold_addr_expr (op0);
1845       return fold_convert (orig_type, expr);
1846
1847     case INDIRECT_REF:
1848       inside_addr = false;
1849       break;
1850
1851     default:
1852       return orig_expr;
1853     }
1854
1855   /* Default handling of expressions for that we want to recurse into
1856      the first operand.  */
1857   op0 = TREE_OPERAND (expr, 0);
1858   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1859   *offset += off0;
1860
1861   if (op0 == TREE_OPERAND (expr, 0)
1862       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1863     return orig_expr;
1864
1865   expr = copy_node (expr);
1866   TREE_OPERAND (expr, 0) = op0;
1867   if (op1)
1868     TREE_OPERAND (expr, 1) = op1;
1869
1870   /* Inside address, we might strip the top level component references,
1871      thus changing type of the expression.  Handling of ADDR_EXPR
1872      will fix that.  */
1873   expr = fold_convert (orig_type, expr);
1874
1875   return expr;
1876 }
1877
1878 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1879
1880 static tree
1881 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1882 {
1883   return strip_offset_1 (expr, false, false, offset);
1884 }
1885
1886 /* Returns variant of TYPE that can be used as base for different uses.
1887    We return unsigned type with the same precision, which avoids problems
1888    with overflows.  */
1889
1890 static tree
1891 generic_type_for (tree type)
1892 {
1893   if (POINTER_TYPE_P (type))
1894     return unsigned_type_for (type);
1895
1896   if (TYPE_UNSIGNED (type))
1897     return type;
1898
1899   return unsigned_type_for (type);
1900 }
1901
1902 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1903    the bitmap to that we should store it.  */
1904
1905 static struct ivopts_data *fd_ivopts_data;
1906 static tree
1907 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1908 {
1909   bitmap *depends_on = (bitmap *) data;
1910   struct version_info *info;
1911
1912   if (TREE_CODE (*expr_p) != SSA_NAME)
1913     return NULL_TREE;
1914   info = name_info (fd_ivopts_data, *expr_p);
1915
1916   if (!info->inv_id || info->has_nonlin_use)
1917     return NULL_TREE;
1918
1919   if (!*depends_on)
1920     *depends_on = BITMAP_ALLOC (NULL);
1921   bitmap_set_bit (*depends_on, info->inv_id);
1922
1923   return NULL_TREE;
1924 }
1925
1926 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1927    position to POS.  If USE is not NULL, the candidate is set as related to
1928    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1929    replacement of the final value of the iv by a direct computation.  */
1930
1931 static struct iv_cand *
1932 add_candidate_1 (struct ivopts_data *data,
1933                  tree base, tree step, bool important, enum iv_position pos,
1934                  struct iv_use *use, tree incremented_at)
1935 {
1936   unsigned i;
1937   struct iv_cand *cand = NULL;
1938   tree type, orig_type;
1939
1940   if (base)
1941     {
1942       orig_type = TREE_TYPE (base);
1943       type = generic_type_for (orig_type);
1944       if (type != orig_type)
1945         {
1946           base = fold_convert (type, base);
1947           step = fold_convert (type, step);
1948         }
1949     }
1950
1951   for (i = 0; i < n_iv_cands (data); i++)
1952     {
1953       cand = iv_cand (data, i);
1954
1955       if (cand->pos != pos)
1956         continue;
1957
1958       if (cand->incremented_at != incremented_at)
1959         continue;
1960
1961       if (!cand->iv)
1962         {
1963           if (!base && !step)
1964             break;
1965
1966           continue;
1967         }
1968
1969       if (!base && !step)
1970         continue;
1971
1972       if (operand_equal_p (base, cand->iv->base, 0)
1973           && operand_equal_p (step, cand->iv->step, 0))
1974         break;
1975     }
1976
1977   if (i == n_iv_cands (data))
1978     {
1979       cand = XCNEW (struct iv_cand);
1980       cand->id = i;
1981
1982       if (!base && !step)
1983         cand->iv = NULL;
1984       else
1985         cand->iv = alloc_iv (base, step);
1986
1987       cand->pos = pos;
1988       if (pos != IP_ORIGINAL && cand->iv)
1989         {
1990           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1991           cand->var_after = cand->var_before;
1992         }
1993       cand->important = important;
1994       cand->incremented_at = incremented_at;
1995       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
1996
1997       if (step
1998           && TREE_CODE (step) != INTEGER_CST)
1999         {
2000           fd_ivopts_data = data;
2001           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2002         }
2003
2004       if (dump_file && (dump_flags & TDF_DETAILS))
2005         dump_cand (dump_file, cand);
2006     }
2007
2008   if (important && !cand->important)
2009     {
2010       cand->important = true;
2011       if (dump_file && (dump_flags & TDF_DETAILS))
2012         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2013     }
2014
2015   if (use)
2016     {
2017       bitmap_set_bit (use->related_cands, i);
2018       if (dump_file && (dump_flags & TDF_DETAILS))
2019         fprintf (dump_file, "Candidate %d is related to use %d\n",
2020                  cand->id, use->id);
2021     }
2022
2023   return cand;
2024 }
2025
2026 /* Returns true if incrementing the induction variable at the end of the LOOP
2027    is allowed.
2028
2029    The purpose is to avoid splitting latch edge with a biv increment, thus
2030    creating a jump, possibly confusing other optimization passes and leaving
2031    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2032    is not available (so we do not have a better alternative), or if the latch
2033    edge is already nonempty.  */
2034
2035 static bool
2036 allow_ip_end_pos_p (struct loop *loop)
2037 {
2038   if (!ip_normal_pos (loop))
2039     return true;
2040
2041   if (!empty_block_p (ip_end_pos (loop)))
2042     return true;
2043
2044   return false;
2045 }
2046
2047 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2048    position to POS.  If USE is not NULL, the candidate is set as related to
2049    it.  The candidate computation is scheduled on all available positions.  */
2050
2051 static void
2052 add_candidate (struct ivopts_data *data,
2053                tree base, tree step, bool important, struct iv_use *use)
2054 {
2055   if (ip_normal_pos (data->current_loop))
2056     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2057   if (ip_end_pos (data->current_loop)
2058       && allow_ip_end_pos_p (data->current_loop))
2059     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2060 }
2061
2062 /* Add a standard "0 + 1 * iteration" iv candidate for a
2063    type with SIZE bits.  */
2064
2065 static void
2066 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2067                                      unsigned int size)
2068 {
2069   tree type = lang_hooks.types.type_for_size (size, true);
2070   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2071                  true, NULL);
2072 }
2073
2074 /* Adds standard iv candidates.  */
2075
2076 static void
2077 add_standard_iv_candidates (struct ivopts_data *data)
2078 {
2079   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2080
2081   /* The same for a double-integer type if it is still fast enough.  */
2082   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2083     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2084 }
2085
2086
2087 /* Adds candidates bases on the old induction variable IV.  */
2088
2089 static void
2090 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2091 {
2092   tree phi, def;
2093   struct iv_cand *cand;
2094
2095   add_candidate (data, iv->base, iv->step, true, NULL);
2096
2097   /* The same, but with initial value zero.  */
2098   add_candidate (data,
2099                  build_int_cst (TREE_TYPE (iv->base), 0),
2100                  iv->step, true, NULL);
2101
2102   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2103   if (TREE_CODE (phi) == PHI_NODE)
2104     {
2105       /* Additionally record the possibility of leaving the original iv
2106          untouched.  */
2107       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2108       cand = add_candidate_1 (data,
2109                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2110                               SSA_NAME_DEF_STMT (def));
2111       cand->var_before = iv->ssa_name;
2112       cand->var_after = def;
2113     }
2114 }
2115
2116 /* Adds candidates based on the old induction variables.  */
2117
2118 static void
2119 add_old_ivs_candidates (struct ivopts_data *data)
2120 {
2121   unsigned i;
2122   struct iv *iv;
2123   bitmap_iterator bi;
2124
2125   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2126     {
2127       iv = ver_info (data, i)->iv;
2128       if (iv && iv->biv_p && !integer_zerop (iv->step))
2129         add_old_iv_candidates (data, iv);
2130     }
2131 }
2132
2133 /* Adds candidates based on the value of the induction variable IV and USE.  */
2134
2135 static void
2136 add_iv_value_candidates (struct ivopts_data *data,
2137                          struct iv *iv, struct iv_use *use)
2138 {
2139   unsigned HOST_WIDE_INT offset;
2140   tree base;
2141
2142   add_candidate (data, iv->base, iv->step, false, use);
2143
2144   /* The same, but with initial value zero.  Make such variable important,
2145      since it is generic enough so that possibly many uses may be based
2146      on it.  */
2147   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2148                  iv->step, true, use);
2149
2150   /* Third, try removing the constant offset.  */
2151   base = strip_offset (iv->base, &offset);
2152   if (offset)
2153     add_candidate (data, base, iv->step, false, use);
2154 }
2155
2156 /* Adds candidates based on the uses.  */
2157
2158 static void
2159 add_derived_ivs_candidates (struct ivopts_data *data)
2160 {
2161   unsigned i;
2162
2163   for (i = 0; i < n_iv_uses (data); i++)
2164     {
2165       struct iv_use *use = iv_use (data, i);
2166
2167       if (!use)
2168         continue;
2169
2170       switch (use->type)
2171         {
2172         case USE_NONLINEAR_EXPR:
2173         case USE_COMPARE:
2174         case USE_ADDRESS:
2175           /* Just add the ivs based on the value of the iv used here.  */
2176           add_iv_value_candidates (data, use->iv, use);
2177           break;
2178
2179         default:
2180           gcc_unreachable ();
2181         }
2182     }
2183 }
2184
2185 /* Record important candidates and add them to related_cands bitmaps
2186    if needed.  */
2187
2188 static void
2189 record_important_candidates (struct ivopts_data *data)
2190 {
2191   unsigned i;
2192   struct iv_use *use;
2193
2194   for (i = 0; i < n_iv_cands (data); i++)
2195     {
2196       struct iv_cand *cand = iv_cand (data, i);
2197
2198       if (cand->important)
2199         bitmap_set_bit (data->important_candidates, i);
2200     }
2201
2202   data->consider_all_candidates = (n_iv_cands (data)
2203                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2204
2205   if (data->consider_all_candidates)
2206     {
2207       /* We will not need "related_cands" bitmaps in this case,
2208          so release them to decrease peak memory consumption.  */
2209       for (i = 0; i < n_iv_uses (data); i++)
2210         {
2211           use = iv_use (data, i);
2212           BITMAP_FREE (use->related_cands);
2213         }
2214     }
2215   else
2216     {
2217       /* Add important candidates to the related_cands bitmaps.  */
2218       for (i = 0; i < n_iv_uses (data); i++)
2219         bitmap_ior_into (iv_use (data, i)->related_cands,
2220                          data->important_candidates);
2221     }
2222 }
2223
2224 /* Finds the candidates for the induction variables.  */
2225
2226 static void
2227 find_iv_candidates (struct ivopts_data *data)
2228 {
2229   /* Add commonly used ivs.  */
2230   add_standard_iv_candidates (data);
2231
2232   /* Add old induction variables.  */
2233   add_old_ivs_candidates (data);
2234
2235   /* Add induction variables derived from uses.  */
2236   add_derived_ivs_candidates (data);
2237
2238   /* Record the important candidates.  */
2239   record_important_candidates (data);
2240 }
2241
2242 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2243    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2244    we allocate a simple list to every use.  */
2245
2246 static void
2247 alloc_use_cost_map (struct ivopts_data *data)
2248 {
2249   unsigned i, size, s, j;
2250
2251   for (i = 0; i < n_iv_uses (data); i++)
2252     {
2253       struct iv_use *use = iv_use (data, i);
2254       bitmap_iterator bi;
2255
2256       if (data->consider_all_candidates)
2257         size = n_iv_cands (data);
2258       else
2259         {
2260           s = 0;
2261           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2262             {
2263               s++;
2264             }
2265
2266           /* Round up to the power of two, so that moduling by it is fast.  */
2267           for (size = 1; size < s; size <<= 1)
2268             continue;
2269         }
2270
2271       use->n_map_members = size;
2272       use->cost_map = XCNEWVEC (struct cost_pair, size);
2273     }
2274 }
2275
2276 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2277    on invariants DEPENDS_ON and that the value used in expressing it
2278    is VALUE.*/
2279
2280 static void
2281 set_use_iv_cost (struct ivopts_data *data,
2282                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2283                  bitmap depends_on, tree value)
2284 {
2285   unsigned i, s;
2286
2287   if (cost == INFTY)
2288     {
2289       BITMAP_FREE (depends_on);
2290       return;
2291     }
2292
2293   if (data->consider_all_candidates)
2294     {
2295       use->cost_map[cand->id].cand = cand;
2296       use->cost_map[cand->id].cost = cost;
2297       use->cost_map[cand->id].depends_on = depends_on;
2298       use->cost_map[cand->id].value = value;
2299       return;
2300     }
2301
2302   /* n_map_members is a power of two, so this computes modulo.  */
2303   s = cand->id & (use->n_map_members - 1);
2304   for (i = s; i < use->n_map_members; i++)
2305     if (!use->cost_map[i].cand)
2306       goto found;
2307   for (i = 0; i < s; i++)
2308     if (!use->cost_map[i].cand)
2309       goto found;
2310
2311   gcc_unreachable ();
2312
2313 found:
2314   use->cost_map[i].cand = cand;
2315   use->cost_map[i].cost = cost;
2316   use->cost_map[i].depends_on = depends_on;
2317   use->cost_map[i].value = value;
2318 }
2319
2320 /* Gets cost of (USE, CANDIDATE) pair.  */
2321
2322 static struct cost_pair *
2323 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2324                  struct iv_cand *cand)
2325 {
2326   unsigned i, s;
2327   struct cost_pair *ret;
2328
2329   if (!cand)
2330     return NULL;
2331
2332   if (data->consider_all_candidates)
2333     {
2334       ret = use->cost_map + cand->id;
2335       if (!ret->cand)
2336         return NULL;
2337
2338       return ret;
2339     }
2340
2341   /* n_map_members is a power of two, so this computes modulo.  */
2342   s = cand->id & (use->n_map_members - 1);
2343   for (i = s; i < use->n_map_members; i++)
2344     if (use->cost_map[i].cand == cand)
2345       return use->cost_map + i;
2346
2347   for (i = 0; i < s; i++)
2348     if (use->cost_map[i].cand == cand)
2349       return use->cost_map + i;
2350
2351   return NULL;
2352 }
2353
2354 /* Returns estimate on cost of computing SEQ.  */
2355
2356 static unsigned
2357 seq_cost (rtx seq)
2358 {
2359   unsigned cost = 0;
2360   rtx set;
2361
2362   for (; seq; seq = NEXT_INSN (seq))
2363     {
2364       set = single_set (seq);
2365       if (set)
2366         cost += rtx_cost (set, SET);
2367       else
2368         cost++;
2369     }
2370
2371   return cost;
2372 }
2373
2374 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2375 static rtx
2376 produce_memory_decl_rtl (tree obj, int *regno)
2377 {
2378   rtx x;
2379
2380   gcc_assert (obj);
2381   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2382     {
2383       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2384       x = gen_rtx_SYMBOL_REF (Pmode, name);
2385       SET_SYMBOL_REF_DECL (x, obj);
2386       x = gen_rtx_MEM (DECL_MODE (obj), x);
2387       targetm.encode_section_info (obj, x, true);
2388     }
2389   else
2390     {
2391       x = gen_raw_REG (Pmode, (*regno)++);
2392       x = gen_rtx_MEM (DECL_MODE (obj), x);
2393     }
2394
2395   return x;
2396 }
2397
2398 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2399    walk_tree.  DATA contains the actual fake register number.  */
2400
2401 static tree
2402 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2403 {
2404   tree obj = NULL_TREE;
2405   rtx x = NULL_RTX;
2406   int *regno = (int *) data;
2407
2408   switch (TREE_CODE (*expr_p))
2409     {
2410     case ADDR_EXPR:
2411       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2412            handled_component_p (*expr_p);
2413            expr_p = &TREE_OPERAND (*expr_p, 0))
2414         continue;
2415       obj = *expr_p;
2416       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2417         x = produce_memory_decl_rtl (obj, regno);
2418       break;
2419
2420     case SSA_NAME:
2421       *ws = 0;
2422       obj = SSA_NAME_VAR (*expr_p);
2423       if (!DECL_RTL_SET_P (obj))
2424         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2425       break;
2426
2427     case VAR_DECL:
2428     case PARM_DECL:
2429     case RESULT_DECL:
2430       *ws = 0;
2431       obj = *expr_p;
2432
2433       if (DECL_RTL_SET_P (obj))
2434         break;
2435
2436       if (DECL_MODE (obj) == BLKmode)
2437         x = produce_memory_decl_rtl (obj, regno);
2438       else
2439         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2440
2441       break;
2442
2443     default:
2444       break;
2445     }
2446
2447   if (x)
2448     {
2449       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2450       SET_DECL_RTL (obj, x);
2451     }
2452
2453   return NULL_TREE;
2454 }
2455
2456 /* Determines cost of the computation of EXPR.  */
2457
2458 static unsigned
2459 computation_cost (tree expr)
2460 {
2461   rtx seq, rslt;
2462   tree type = TREE_TYPE (expr);
2463   unsigned cost;
2464   /* Avoid using hard regs in ways which may be unsupported.  */
2465   int regno = LAST_VIRTUAL_REGISTER + 1;
2466
2467   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2468   start_sequence ();
2469   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2470   seq = get_insns ();
2471   end_sequence ();
2472
2473   cost = seq_cost (seq);
2474   if (MEM_P (rslt))
2475     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2476
2477   return cost;
2478 }
2479
2480 /* Returns variable containing the value of candidate CAND at statement AT.  */
2481
2482 static tree
2483 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2484 {
2485   if (stmt_after_increment (loop, cand, stmt))
2486     return cand->var_after;
2487   else
2488     return cand->var_before;
2489 }
2490
2491 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2492    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2493
2494 int
2495 tree_int_cst_sign_bit (tree t)
2496 {
2497   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2498   unsigned HOST_WIDE_INT w;
2499
2500   if (bitno < HOST_BITS_PER_WIDE_INT)
2501     w = TREE_INT_CST_LOW (t);
2502   else
2503     {
2504       w = TREE_INT_CST_HIGH (t);
2505       bitno -= HOST_BITS_PER_WIDE_INT;
2506     }
2507
2508   return (w >> bitno) & 1;
2509 }
2510
2511 /* If we can prove that TOP = cst * BOT for some constant cst,
2512    store cst to MUL and return true.  Otherwise return false.
2513    The returned value is always sign-extended, regardless of the
2514    signedness of TOP and BOT.  */
2515
2516 static bool
2517 constant_multiple_of (tree top, tree bot, double_int *mul)
2518 {
2519   tree mby;
2520   enum tree_code code;
2521   double_int res, p0, p1;
2522   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2523
2524   STRIP_NOPS (top);
2525   STRIP_NOPS (bot);
2526
2527   if (operand_equal_p (top, bot, 0))
2528     {
2529       *mul = double_int_one;
2530       return true;
2531     }
2532
2533   code = TREE_CODE (top);
2534   switch (code)
2535     {
2536     case MULT_EXPR:
2537       mby = TREE_OPERAND (top, 1);
2538       if (TREE_CODE (mby) != INTEGER_CST)
2539         return false;
2540
2541       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2542         return false;
2543
2544       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2545                               precision);
2546       return true;
2547
2548     case PLUS_EXPR:
2549     case MINUS_EXPR:
2550       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2551           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2552         return false;
2553
2554       if (code == MINUS_EXPR)
2555         p1 = double_int_neg (p1);
2556       *mul = double_int_sext (double_int_add (p0, p1), precision);
2557       return true;
2558
2559     case INTEGER_CST:
2560       if (TREE_CODE (bot) != INTEGER_CST)
2561         return false;
2562
2563       p0 = double_int_sext (tree_to_double_int (top), precision);
2564       p1 = double_int_sext (tree_to_double_int (bot), precision);
2565       if (double_int_zero_p (p1))
2566         return false;
2567       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2568                               precision);
2569       return double_int_zero_p (res);
2570
2571     default:
2572       return false;
2573     }
2574 }
2575
2576 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2577    same precision that is at least as wide as the precision of TYPE, stores
2578    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2579    type of A and B.  */
2580
2581 static tree
2582 determine_common_wider_type (tree *a, tree *b)
2583 {
2584   tree wider_type = NULL;
2585   tree suba, subb;
2586   tree atype = TREE_TYPE (*a);
2587
2588   if ((TREE_CODE (*a) == NOP_EXPR
2589        || TREE_CODE (*a) == CONVERT_EXPR))
2590     {
2591       suba = TREE_OPERAND (*a, 0);
2592       wider_type = TREE_TYPE (suba);
2593       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2594         return atype;
2595     }
2596   else
2597     return atype;
2598
2599   if ((TREE_CODE (*b) == NOP_EXPR
2600        || TREE_CODE (*b) == CONVERT_EXPR))
2601     {
2602       subb = TREE_OPERAND (*b, 0);
2603       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2604         return atype;
2605     }
2606   else
2607     return atype;
2608
2609   *a = suba;
2610   *b = subb;
2611   return wider_type;
2612 }
2613
2614 /* Determines the expression by that USE is expressed from induction variable
2615    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2616    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2617
2618 static bool
2619 get_computation_aff (struct loop *loop,
2620                      struct iv_use *use, struct iv_cand *cand, tree at,
2621                      struct affine_tree_combination *aff)
2622 {
2623   tree ubase = use->iv->base;
2624   tree ustep = use->iv->step;
2625   tree cbase = cand->iv->base;
2626   tree cstep = cand->iv->step, cstep_common;
2627   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2628   tree common_type, var;
2629   tree uutype;
2630   aff_tree cbase_aff, var_aff;
2631   double_int rat;
2632
2633   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2634     {
2635       /* We do not have a precision to express the values of use.  */
2636       return false;
2637     }
2638
2639   var = var_at_stmt (loop, cand, at);
2640   uutype = unsigned_type_for (utype);
2641
2642   /* If the conversion is not noop, perform it.  */
2643   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2644     {
2645       cstep = fold_convert (uutype, cstep);
2646       cbase = fold_convert (uutype, cbase);
2647       var = fold_convert (uutype, var);
2648     }
2649
2650   if (!constant_multiple_of (ustep, cstep, &rat))
2651     return false;
2652
2653   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2654      type, we achieve better folding by computing their difference in this
2655      wider type, and cast the result to UUTYPE.  We do not need to worry about
2656      overflows, as all the arithmetics will in the end be performed in UUTYPE
2657      anyway.  */
2658   common_type = determine_common_wider_type (&ubase, &cbase);
2659
2660   /* use = ubase - ratio * cbase + ratio * var.  */
2661   tree_to_aff_combination (ubase, common_type, aff);
2662   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2663   tree_to_aff_combination (var, uutype, &var_aff);
2664
2665   /* We need to shift the value if we are after the increment.  */
2666   if (stmt_after_increment (loop, cand, at))
2667     {
2668       aff_tree cstep_aff;
2669
2670       if (common_type != uutype)
2671         cstep_common = fold_convert (common_type, cstep);
2672       else
2673         cstep_common = cstep;
2674
2675       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2676       aff_combination_add (&cbase_aff, &cstep_aff);
2677     }
2678
2679   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2680   aff_combination_add (aff, &cbase_aff);
2681   if (common_type != uutype)
2682     aff_combination_convert (aff, uutype);
2683
2684   aff_combination_scale (&var_aff, rat);
2685   aff_combination_add (aff, &var_aff);
2686
2687   return true;
2688 }
2689
2690 /* Determines the expression by that USE is expressed from induction variable
2691    CAND at statement AT in LOOP.  The computation is unshared.  */
2692
2693 static tree
2694 get_computation_at (struct loop *loop,
2695                     struct iv_use *use, struct iv_cand *cand, tree at)
2696 {
2697   aff_tree aff;
2698   tree type = TREE_TYPE (use->iv->base);
2699
2700   if (!get_computation_aff (loop, use, cand, at, &aff))
2701     return NULL_TREE;
2702   unshare_aff_combination (&aff);
2703   return fold_convert (type, aff_combination_to_tree (&aff));
2704 }
2705
2706 /* Determines the expression by that USE is expressed from induction variable
2707    CAND in LOOP.  The computation is unshared.  */
2708
2709 static tree
2710 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2711 {
2712   return get_computation_at (loop, use, cand, use->stmt);
2713 }
2714
2715 /* Returns cost of addition in MODE.  */
2716
2717 static unsigned
2718 add_cost (enum machine_mode mode)
2719 {
2720   static unsigned costs[NUM_MACHINE_MODES];
2721   rtx seq;
2722   unsigned cost;
2723
2724   if (costs[mode])
2725     return costs[mode];
2726
2727   start_sequence ();
2728   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2729                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2730                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2731                  NULL_RTX);
2732   seq = get_insns ();
2733   end_sequence ();
2734
2735   cost = seq_cost (seq);
2736   if (!cost)
2737     cost = 1;
2738
2739   costs[mode] = cost;
2740
2741   if (dump_file && (dump_flags & TDF_DETAILS))
2742     fprintf (dump_file, "Addition in %s costs %d\n",
2743              GET_MODE_NAME (mode), cost);
2744   return cost;
2745 }
2746
2747 /* Entry in a hashtable of already known costs for multiplication.  */
2748 struct mbc_entry
2749 {
2750   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2751   enum machine_mode mode;       /* In mode.  */
2752   unsigned cost;                /* The cost.  */
2753 };
2754
2755 /* Counts hash value for the ENTRY.  */
2756
2757 static hashval_t
2758 mbc_entry_hash (const void *entry)
2759 {
2760   const struct mbc_entry *e = (const struct mbc_entry *) entry;
2761
2762   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2763 }
2764
2765 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2766
2767 static int
2768 mbc_entry_eq (const void *entry1, const void *entry2)
2769 {
2770   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2771   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2772
2773   return (e1->mode == e2->mode
2774           && e1->cst == e2->cst);
2775 }
2776
2777 /* Returns cost of multiplication by constant CST in MODE.  */
2778
2779 unsigned
2780 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2781 {
2782   static htab_t costs;
2783   struct mbc_entry **cached, act;
2784   rtx seq;
2785   unsigned cost;
2786
2787   if (!costs)
2788     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2789
2790   act.mode = mode;
2791   act.cst = cst;
2792   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2793   if (*cached)
2794     return (*cached)->cost;
2795
2796   *cached = XNEW (struct mbc_entry);
2797   (*cached)->mode = mode;
2798   (*cached)->cst = cst;
2799
2800   start_sequence ();
2801   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2802                gen_int_mode (cst, mode), NULL_RTX, 0);
2803   seq = get_insns ();
2804   end_sequence ();
2805
2806   cost = seq_cost (seq);
2807
2808   if (dump_file && (dump_flags & TDF_DETAILS))
2809     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2810              (int) cst, GET_MODE_NAME (mode), cost);
2811
2812   (*cached)->cost = cost;
2813
2814   return cost;
2815 }
2816
2817 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2818    validity for a memory reference accessing memory of mode MODE.  */
2819
2820 bool
2821 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2822 {
2823 #define MAX_RATIO 128
2824   static sbitmap valid_mult[MAX_MACHINE_MODE];
2825
2826   if (!valid_mult[mode])
2827     {
2828       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2829       rtx addr;
2830       HOST_WIDE_INT i;
2831
2832       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2833       sbitmap_zero (valid_mult[mode]);
2834       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2835       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2836         {
2837           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2838           if (memory_address_p (mode, addr))
2839             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2840         }
2841
2842       if (dump_file && (dump_flags & TDF_DETAILS))
2843         {
2844           fprintf (dump_file, "  allowed multipliers:");
2845           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2846             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2847               fprintf (dump_file, " %d", (int) i);
2848           fprintf (dump_file, "\n");
2849           fprintf (dump_file, "\n");
2850         }
2851     }
2852
2853   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2854     return false;
2855
2856   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2857 }
2858
2859 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2860    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2861    variable is omitted.  Compute the cost for a memory reference that accesses
2862    a memory location of mode MEM_MODE.
2863
2864    TODO -- there must be some better way.  This all is quite crude.  */
2865
2866 static unsigned
2867 get_address_cost (bool symbol_present, bool var_present,
2868                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2869                   enum machine_mode mem_mode)
2870 {
2871   static bool initialized[MAX_MACHINE_MODE];
2872   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2873   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2874   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2875   unsigned cost, acost;
2876   bool offset_p, ratio_p;
2877   HOST_WIDE_INT s_offset;
2878   unsigned HOST_WIDE_INT mask;
2879   unsigned bits;
2880
2881   if (!initialized[mem_mode])
2882     {
2883       HOST_WIDE_INT i;
2884       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2885       int old_cse_not_expected;
2886       unsigned sym_p, var_p, off_p, rat_p, add_c;
2887       rtx seq, addr, base;
2888       rtx reg0, reg1;
2889
2890       initialized[mem_mode] = true;
2891
2892       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2893
2894       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2895       for (i = start; i <= 1 << 20; i <<= 1)
2896         {
2897           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2898           if (!memory_address_p (mem_mode, addr))
2899             break;
2900         }
2901       max_offset[mem_mode] = i == start ? 0 : i >> 1;
2902       off[mem_mode] = max_offset[mem_mode];
2903
2904       for (i = start; i <= 1 << 20; i <<= 1)
2905         {
2906           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2907           if (!memory_address_p (mem_mode, addr))
2908             break;
2909         }
2910       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2911
2912       if (dump_file && (dump_flags & TDF_DETAILS))
2913         {
2914           fprintf (dump_file, "get_address_cost:\n");
2915           fprintf (dump_file, "  min offset %s %d\n",
2916                    GET_MODE_NAME (mem_mode),
2917                    (int) min_offset[mem_mode]);
2918           fprintf (dump_file, "  max offset %s %d\n",
2919                    GET_MODE_NAME (mem_mode),
2920                    (int) max_offset[mem_mode]);
2921         }
2922
2923       rat[mem_mode] = 1;
2924       for (i = 2; i <= MAX_RATIO; i++)
2925         if (multiplier_allowed_in_address_p (i, mem_mode))
2926           {
2927             rat[mem_mode] = i;
2928             break;
2929           }
2930
2931       /* Compute the cost of various addressing modes.  */
2932       acost = 0;
2933       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2934       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2935
2936       for (i = 0; i < 16; i++)
2937         {
2938           sym_p = i & 1;
2939           var_p = (i >> 1) & 1;
2940           off_p = (i >> 2) & 1;
2941           rat_p = (i >> 3) & 1;
2942
2943           addr = reg0;
2944           if (rat_p)
2945             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2946                                    gen_int_mode (rat[mem_mode], Pmode));
2947
2948           if (var_p)
2949             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2950
2951           if (sym_p)
2952             {
2953               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2954               /* ??? We can run into trouble with some backends by presenting
2955                  it with symbols which havn't been properly passed through
2956                  targetm.encode_section_info.  By setting the local bit, we
2957                  enhance the probability of things working.  */
2958               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
2959
2960               if (off_p)
2961                 base = gen_rtx_fmt_e (CONST, Pmode,
2962                                       gen_rtx_fmt_ee (PLUS, Pmode,
2963                                                       base,
2964                                                       gen_int_mode (off[mem_mode],
2965                                                                     Pmode)));
2966             }
2967           else if (off_p)
2968             base = gen_int_mode (off[mem_mode], Pmode);
2969           else
2970             base = NULL_RTX;
2971
2972           if (base)
2973             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2974
2975           start_sequence ();
2976           /* To avoid splitting addressing modes, pretend that no cse will
2977              follow.  */
2978           old_cse_not_expected = cse_not_expected;
2979           cse_not_expected = true;
2980           addr = memory_address (mem_mode, addr);
2981           cse_not_expected = old_cse_not_expected;
2982           seq = get_insns ();
2983           end_sequence ();
2984
2985           acost = seq_cost (seq);
2986           acost += address_cost (addr, mem_mode);
2987
2988           if (!acost)
2989             acost = 1;
2990           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
2991         }
2992
2993       /* On some targets, it is quite expensive to load symbol to a register,
2994          which makes addresses that contain symbols look much more expensive.
2995          However, the symbol will have to be loaded in any case before the
2996          loop (and quite likely we have it in register already), so it does not
2997          make much sense to penalize them too heavily.  So make some final
2998          tweaks for the SYMBOL_PRESENT modes:
2999
3000          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3001          var is cheaper, use this mode with small penalty.
3002          If VAR_PRESENT is true, try whether the mode with
3003          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3004          if this is the case, use it.  */
3005       add_c = add_cost (Pmode);
3006       for (i = 0; i < 8; i++)
3007         {
3008           var_p = i & 1;
3009           off_p = (i >> 1) & 1;
3010           rat_p = (i >> 2) & 1;
3011
3012           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3013           if (var_p)
3014             acost += add_c;
3015
3016           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3017             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3018         }
3019
3020       if (dump_file && (dump_flags & TDF_DETAILS))
3021         {
3022           fprintf (dump_file, "Address costs:\n");
3023
3024           for (i = 0; i < 16; i++)
3025             {
3026               sym_p = i & 1;
3027               var_p = (i >> 1) & 1;
3028               off_p = (i >> 2) & 1;
3029               rat_p = (i >> 3) & 1;
3030
3031               fprintf (dump_file, "  ");
3032               if (sym_p)
3033                 fprintf (dump_file, "sym + ");
3034               if (var_p)
3035                 fprintf (dump_file, "var + ");
3036               if (off_p)
3037                 fprintf (dump_file, "cst + ");
3038               if (rat_p)
3039                 fprintf (dump_file, "rat * ");
3040
3041               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3042               fprintf (dump_file, "index costs %d\n", acost);
3043             }
3044           fprintf (dump_file, "\n");
3045         }
3046     }
3047
3048   bits = GET_MODE_BITSIZE (Pmode);
3049   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3050   offset &= mask;
3051   if ((offset >> (bits - 1) & 1))
3052     offset |= ~mask;
3053   s_offset = offset;
3054
3055   cost = 0;
3056   offset_p = (s_offset != 0
3057               && min_offset[mem_mode] <= s_offset
3058               && s_offset <= max_offset[mem_mode]);
3059   ratio_p = (ratio != 1
3060              && multiplier_allowed_in_address_p (ratio, mem_mode));
3061
3062   if (ratio != 1 && !ratio_p)
3063     cost += multiply_by_cost (ratio, Pmode);
3064
3065   if (s_offset && !offset_p && !symbol_present)
3066     cost += add_cost (Pmode);
3067
3068   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3069   return cost + acost;
3070 }
3071
3072 /* Estimates cost of forcing expression EXPR into a variable.  */
3073
3074 unsigned
3075 force_expr_to_var_cost (tree expr)
3076 {
3077   static bool costs_initialized = false;
3078   static unsigned integer_cost;
3079   static unsigned symbol_cost;
3080   static unsigned address_cost;
3081   tree op0, op1;
3082   unsigned cost0, cost1, cost;
3083   enum machine_mode mode;
3084
3085   if (!costs_initialized)
3086     {
3087       tree type = build_pointer_type (integer_type_node);
3088       tree var, addr;
3089       rtx x;
3090
3091       var = create_tmp_var_raw (integer_type_node, "test_var");
3092       TREE_STATIC (var) = 1;
3093       x = produce_memory_decl_rtl (var, NULL);
3094       SET_DECL_RTL (var, x);
3095
3096       integer_cost = computation_cost (build_int_cst (integer_type_node,
3097                                                       2000));
3098
3099       addr = build1 (ADDR_EXPR, type, var);
3100       symbol_cost = computation_cost (addr) + 1;
3101
3102       address_cost
3103         = computation_cost (build2 (PLUS_EXPR, type,
3104                                     addr,
3105                                     build_int_cst (type, 2000))) + 1;
3106       if (dump_file && (dump_flags & TDF_DETAILS))
3107         {
3108           fprintf (dump_file, "force_expr_to_var_cost:\n");
3109           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3110           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3111           fprintf (dump_file, "  address %d\n", (int) address_cost);
3112           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3113           fprintf (dump_file, "\n");
3114         }
3115
3116       costs_initialized = true;
3117     }
3118
3119   STRIP_NOPS (expr);
3120
3121   if (SSA_VAR_P (expr))
3122     return 0;
3123
3124   if (TREE_INVARIANT (expr))
3125     {
3126       if (TREE_CODE (expr) == INTEGER_CST)
3127         return integer_cost;
3128
3129       if (TREE_CODE (expr) == ADDR_EXPR)
3130         {
3131           tree obj = TREE_OPERAND (expr, 0);
3132
3133           if (TREE_CODE (obj) == VAR_DECL
3134               || TREE_CODE (obj) == PARM_DECL
3135               || TREE_CODE (obj) == RESULT_DECL)
3136             return symbol_cost;
3137         }
3138
3139       return address_cost;
3140     }
3141
3142   switch (TREE_CODE (expr))
3143     {
3144     case PLUS_EXPR:
3145     case MINUS_EXPR:
3146     case MULT_EXPR:
3147       op0 = TREE_OPERAND (expr, 0);
3148       op1 = TREE_OPERAND (expr, 1);
3149       STRIP_NOPS (op0);
3150       STRIP_NOPS (op1);
3151
3152       if (is_gimple_val (op0))
3153         cost0 = 0;
3154       else
3155         cost0 = force_expr_to_var_cost (op0);
3156
3157       if (is_gimple_val (op1))
3158         cost1 = 0;
3159       else
3160         cost1 = force_expr_to_var_cost (op1);
3161
3162       break;
3163
3164     default:
3165       /* Just an arbitrary value, FIXME.  */
3166       return target_spill_cost;
3167     }
3168
3169   mode = TYPE_MODE (TREE_TYPE (expr));
3170   switch (TREE_CODE (expr))
3171     {
3172     case PLUS_EXPR:
3173     case MINUS_EXPR:
3174       cost = add_cost (mode);
3175       break;
3176
3177     case MULT_EXPR:
3178       if (cst_and_fits_in_hwi (op0))
3179         cost = multiply_by_cost (int_cst_value (op0), mode);
3180       else if (cst_and_fits_in_hwi (op1))
3181         cost = multiply_by_cost (int_cst_value (op1), mode);
3182       else
3183         return target_spill_cost;
3184       break;
3185
3186     default:
3187       gcc_unreachable ();
3188     }
3189
3190   cost += cost0;
3191   cost += cost1;
3192
3193   /* Bound the cost by target_spill_cost.  The parts of complicated
3194      computations often are either loop invariant or at least can
3195      be shared between several iv uses, so letting this grow without
3196      limits would not give reasonable results.  */
3197   return cost < target_spill_cost ? cost : target_spill_cost;
3198 }
3199
3200 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3201    invariants the computation depends on.  */
3202
3203 static unsigned
3204 force_var_cost (struct ivopts_data *data,
3205                 tree expr, bitmap *depends_on)
3206 {
3207   if (depends_on)
3208     {
3209       fd_ivopts_data = data;
3210       walk_tree (&expr, find_depends, depends_on, NULL);
3211     }
3212
3213   return force_expr_to_var_cost (expr);
3214 }
3215
3216 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3217    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3218    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3219    invariants the computation depends on.  */
3220
3221 static unsigned
3222 split_address_cost (struct ivopts_data *data,
3223                     tree addr, bool *symbol_present, bool *var_present,
3224                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3225 {
3226   tree core;
3227   HOST_WIDE_INT bitsize;
3228   HOST_WIDE_INT bitpos;
3229   tree toffset;
3230   enum machine_mode mode;
3231   int unsignedp, volatilep;
3232
3233   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3234                               &unsignedp, &volatilep, false);
3235
3236   if (toffset != 0
3237       || bitpos % BITS_PER_UNIT != 0
3238       || TREE_CODE (core) != VAR_DECL)
3239     {
3240       *symbol_present = false;
3241       *var_present = true;
3242       fd_ivopts_data = data;
3243       walk_tree (&addr, find_depends, depends_on, NULL);
3244       return target_spill_cost;
3245     }
3246
3247   *offset += bitpos / BITS_PER_UNIT;
3248   if (TREE_STATIC (core)
3249       || DECL_EXTERNAL (core))
3250     {
3251       *symbol_present = true;
3252       *var_present = false;
3253       return 0;
3254     }
3255
3256   *symbol_present = false;
3257   *var_present = true;
3258   return 0;
3259 }
3260
3261 /* Estimates cost of expressing difference of addresses E1 - E2 as
3262    var + symbol + offset.  The value of offset is added to OFFSET,
3263    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3264    part is missing.  DEPENDS_ON is a set of the invariants the computation
3265    depends on.  */
3266
3267 static unsigned
3268 ptr_difference_cost (struct ivopts_data *data,
3269                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3270                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3271 {
3272   HOST_WIDE_INT diff = 0;
3273   unsigned cost;
3274
3275   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3276
3277   if (ptr_difference_const (e1, e2, &diff))
3278     {
3279       *offset += diff;
3280       *symbol_present = false;
3281       *var_present = false;
3282       return 0;
3283     }
3284
3285   if (e2 == integer_zero_node)
3286     return split_address_cost (data, TREE_OPERAND (e1, 0),
3287                                symbol_present, var_present, offset, depends_on);
3288
3289   *symbol_present = false;
3290   *var_present = true;
3291
3292   cost = force_var_cost (data, e1, depends_on);
3293   cost += force_var_cost (data, e2, depends_on);
3294   cost += add_cost (Pmode);
3295
3296   return cost;
3297 }
3298
3299 /* Estimates cost of expressing difference E1 - E2 as
3300    var + symbol + offset.  The value of offset is added to OFFSET,
3301    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3302    part is missing.  DEPENDS_ON is a set of the invariants the computation
3303    depends on.  */
3304
3305 static unsigned
3306 difference_cost (struct ivopts_data *data,
3307                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3308                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3309 {
3310   unsigned cost;
3311   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3312   unsigned HOST_WIDE_INT off1, off2;
3313
3314   e1 = strip_offset (e1, &off1);
3315   e2 = strip_offset (e2, &off2);
3316   *offset += off1 - off2;
3317
3318   STRIP_NOPS (e1);
3319   STRIP_NOPS (e2);
3320
3321   if (TREE_CODE (e1) == ADDR_EXPR)
3322     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3323                                 depends_on);
3324   *symbol_present = false;
3325
3326   if (operand_equal_p (e1, e2, 0))
3327     {
3328       *var_present = false;
3329       return 0;
3330     }
3331   *var_present = true;
3332   if (integer_zerop (e2))
3333     return force_var_cost (data, e1, depends_on);
3334
3335   if (integer_zerop (e1))
3336     {
3337       cost = force_var_cost (data, e2, depends_on);
3338       cost += multiply_by_cost (-1, mode);
3339
3340       return cost;
3341     }
3342
3343   cost = force_var_cost (data, e1, depends_on);
3344   cost += force_var_cost (data, e2, depends_on);
3345   cost += add_cost (mode);
3346
3347   return cost;
3348 }
3349
3350 /* Determines the cost of the computation by that USE is expressed
3351    from induction variable CAND.  If ADDRESS_P is true, we just need
3352    to create an address from it, otherwise we want to get it into
3353    register.  A set of invariants we depend on is stored in
3354    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3355
3356 static unsigned
3357 get_computation_cost_at (struct ivopts_data *data,
3358                          struct iv_use *use, struct iv_cand *cand,
3359                          bool address_p, bitmap *depends_on, tree at)
3360 {
3361   tree ubase = use->iv->base, ustep = use->iv->step;
3362   tree cbase, cstep;
3363   tree utype = TREE_TYPE (ubase), ctype;
3364   unsigned HOST_WIDE_INT cstepi, offset = 0;
3365   HOST_WIDE_INT ratio, aratio;
3366   bool var_present, symbol_present;
3367   unsigned cost = 0, n_sums;
3368   double_int rat;
3369
3370   *depends_on = NULL;
3371
3372   /* Only consider real candidates.  */
3373   if (!cand->iv)
3374     return INFTY;
3375
3376   cbase = cand->iv->base;
3377   cstep = cand->iv->step;
3378   ctype = TREE_TYPE (cbase);
3379
3380   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3381     {
3382       /* We do not have a precision to express the values of use.  */
3383       return INFTY;
3384     }
3385
3386   if (address_p)
3387     {
3388       /* Do not try to express address of an object with computation based
3389          on address of a different object.  This may cause problems in rtl
3390          level alias analysis (that does not expect this to be happening,
3391          as this is illegal in C), and would be unlikely to be useful
3392          anyway.  */
3393       if (use->iv->base_object
3394           && cand->iv->base_object
3395           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3396         return INFTY;
3397     }
3398
3399   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3400     {
3401       /* TODO -- add direct handling of this case.  */
3402       goto fallback;
3403     }
3404
3405   /* CSTEPI is removed from the offset in case statement is after the
3406      increment.  If the step is not constant, we use zero instead.
3407      This is a bit imprecise (there is the extra addition), but
3408      redundancy elimination is likely to transform the code so that
3409      it uses value of the variable before increment anyway,
3410      so it is not that much unrealistic.  */
3411   if (cst_and_fits_in_hwi (cstep))
3412     cstepi = int_cst_value (cstep);
3413   else
3414     cstepi = 0;
3415
3416   if (!constant_multiple_of (ustep, cstep, &rat))
3417     return INFTY;
3418
3419   if (double_int_fits_in_shwi_p (rat))
3420     ratio = double_int_to_shwi (rat);
3421   else
3422     return INFTY;
3423
3424   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3425      or ratio == 1, it is better to handle this like
3426
3427      ubase - ratio * cbase + ratio * var
3428
3429      (also holds in the case ratio == -1, TODO.  */
3430
3431   if (cst_and_fits_in_hwi (cbase))
3432     {
3433       offset = - ratio * int_cst_value (cbase);
3434       cost += difference_cost (data,
3435                                ubase, integer_zero_node,
3436                                &symbol_present, &var_present, &offset,
3437                                depends_on);
3438     }
3439   else if (ratio == 1)
3440     {
3441       cost += difference_cost (data,
3442                                ubase, cbase,
3443                                &symbol_present, &var_present, &offset,
3444                                depends_on);
3445     }
3446   else
3447     {
3448       cost += force_var_cost (data, cbase, depends_on);
3449       cost += add_cost (TYPE_MODE (ctype));
3450       cost += difference_cost (data,
3451                                ubase, integer_zero_node,
3452                                &symbol_present, &var_present, &offset,
3453                                depends_on);
3454     }
3455
3456   /* If we are after the increment, the value of the candidate is higher by
3457      one iteration.  */
3458   if (stmt_after_increment (data->current_loop, cand, at))
3459     offset -= ratio * cstepi;
3460
3461   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3462      (symbol/var/const parts may be omitted).  If we are looking for an address,
3463      find the cost of addressing this.  */
3464   if (address_p)
3465     return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3466                                     TYPE_MODE (TREE_TYPE (*use->op_p)));
3467
3468   /* Otherwise estimate the costs for computing the expression.  */
3469   aratio = ratio > 0 ? ratio : -ratio;
3470   if (!symbol_present && !var_present && !offset)
3471     {
3472       if (ratio != 1)
3473         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3474
3475       return cost;
3476     }
3477
3478   if (aratio != 1)
3479     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3480
3481   n_sums = 1;
3482   if (var_present
3483       /* Symbol + offset should be compile-time computable.  */
3484       && (symbol_present || offset))
3485     n_sums++;
3486
3487   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3488
3489 fallback:
3490   {
3491     /* Just get the expression, expand it and measure the cost.  */
3492     tree comp = get_computation_at (data->current_loop, use, cand, at);
3493
3494     if (!comp)
3495       return INFTY;
3496
3497     if (address_p)
3498       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3499
3500     return computation_cost (comp);
3501   }
3502 }
3503
3504 /* Determines the cost of the computation by that USE is expressed
3505    from induction variable CAND.  If ADDRESS_P is true, we just need
3506    to create an address from it, otherwise we want to get it into
3507    register.  A set of invariants we depend on is stored in
3508    DEPENDS_ON.  */
3509
3510 static unsigned
3511 get_computation_cost (struct ivopts_data *data,
3512                       struct iv_use *use, struct iv_cand *cand,
3513                       bool address_p, bitmap *depends_on)
3514 {
3515   return get_computation_cost_at (data,
3516                                   use, cand, address_p, depends_on, use->stmt);
3517 }
3518
3519 /* Determines cost of basing replacement of USE on CAND in a generic
3520    expression.  */
3521
3522 static bool
3523 determine_use_iv_cost_generic (struct ivopts_data *data,
3524                                struct iv_use *use, struct iv_cand *cand)
3525 {
3526   bitmap depends_on;
3527   unsigned cost;
3528
3529   /* The simple case first -- if we need to express value of the preserved
3530      original biv, the cost is 0.  This also prevents us from counting the
3531      cost of increment twice -- once at this use and once in the cost of
3532      the candidate.  */
3533   if (cand->pos == IP_ORIGINAL
3534       && cand->incremented_at == use->stmt)
3535     {
3536       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3537       return true;
3538     }
3539
3540   cost = get_computation_cost (data, use, cand, false, &depends_on);
3541   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3542
3543   return cost != INFTY;
3544 }
3545
3546 /* Determines cost of basing replacement of USE on CAND in an address.  */
3547
3548 static bool
3549 determine_use_iv_cost_address (struct ivopts_data *data,
3550                                struct iv_use *use, struct iv_cand *cand)
3551 {
3552   bitmap depends_on;
3553   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3554
3555   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3556
3557   return cost != INFTY;
3558 }
3559
3560 /* Computes value of candidate CAND at position AT in iteration NITER, and
3561    stores it to VAL.  */
3562
3563 static void
3564 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3565                aff_tree *val)
3566 {
3567   aff_tree step, delta, nit;
3568   struct iv *iv = cand->iv;
3569   tree type = TREE_TYPE (iv->base);
3570
3571   tree_to_aff_combination (iv->step, type, &step);
3572   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3573   aff_combination_convert (&nit, type);
3574   aff_combination_mult (&nit, &step, &delta);
3575   if (stmt_after_increment (loop, cand, at))
3576     aff_combination_add (&delta, &step);
3577
3578   tree_to_aff_combination (iv->base, type, val);
3579   aff_combination_add (val, &delta);
3580 }
3581
3582 /* Returns period of induction variable iv.  */
3583
3584 static tree
3585 iv_period (struct iv *iv)
3586 {
3587   tree step = iv->step, period, type;
3588   tree pow2div;
3589
3590   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3591
3592   /* Period of the iv is gcd (step, type range).  Since type range is power
3593      of two, it suffices to determine the maximum power of two that divides
3594      step.  */
3595   pow2div = num_ending_zeros (step);
3596   type = unsigned_type_for (TREE_TYPE (step));
3597
3598   period = build_low_bits_mask (type,
3599                                 (TYPE_PRECISION (type)
3600                                  - tree_low_cst (pow2div, 1)));
3601
3602   return period;
3603 }
3604
3605 /* Returns the comparison operator used when eliminating the iv USE.  */
3606
3607 static enum tree_code
3608 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3609 {
3610   struct loop *loop = data->current_loop;
3611   basic_block ex_bb;
3612   edge exit;
3613
3614   ex_bb = bb_for_stmt (use->stmt);
3615   exit = EDGE_SUCC (ex_bb, 0);
3616   if (flow_bb_inside_loop_p (loop, exit->dest))
3617     exit = EDGE_SUCC (ex_bb, 1);
3618
3619   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3620 }
3621
3622 /* Check whether it is possible to express the condition in USE by comparison
3623    of candidate CAND.  If so, store the value compared with to BOUND.  */
3624
3625 static bool
3626 may_eliminate_iv (struct ivopts_data *data,
3627                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3628 {
3629   basic_block ex_bb;
3630   edge exit;
3631   tree nit, nit_type;
3632   tree wider_type, period, per_type;
3633   struct loop *loop = data->current_loop;
3634   aff_tree bnd;
3635
3636   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3637     return false;
3638
3639   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3640      for other conditions inside loop body.  */
3641   ex_bb = bb_for_stmt (use->stmt);
3642   if (use->stmt != last_stmt (ex_bb)
3643       || TREE_CODE (use->stmt) != COND_EXPR)
3644     return false;
3645   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3646     return false;
3647
3648   exit = EDGE_SUCC (ex_bb, 0);
3649   if (flow_bb_inside_loop_p (loop, exit->dest))
3650     exit = EDGE_SUCC (ex_bb, 1);
3651   if (flow_bb_inside_loop_p (loop, exit->dest))
3652     return false;
3653
3654   nit = niter_for_exit (data, exit);
3655   if (!nit)
3656     return false;
3657
3658   nit_type = TREE_TYPE (nit);
3659
3660   /* Determine whether we may use the variable to test whether niter iterations
3661      elapsed.  This is the case iff the period of the induction variable is
3662      greater than the number of iterations.  */
3663   period = iv_period (cand->iv);
3664   if (!period)
3665     return false;
3666   per_type = TREE_TYPE (period);
3667
3668   wider_type = TREE_TYPE (period);
3669   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3670     wider_type = per_type;
3671   else
3672     wider_type = nit_type;
3673
3674   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3675                                       fold_convert (wider_type, period),
3676                                       fold_convert (wider_type, nit))))
3677     return false;
3678
3679   cand_value_at (loop, cand, use->stmt, nit, &bnd);
3680   *bound = aff_combination_to_tree (&bnd);
3681   return true;
3682 }
3683
3684 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3685
3686 static bool
3687 determine_use_iv_cost_condition (struct ivopts_data *data,
3688                                  struct iv_use *use, struct iv_cand *cand)
3689 {
3690   tree bound = NULL_TREE;
3691   struct iv *cmp_iv;
3692   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3693   unsigned elim_cost, express_cost, cost;
3694   bool ok;
3695
3696   /* Only consider real candidates.  */
3697   if (!cand->iv)
3698     {
3699       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3700       return false;
3701     }
3702
3703   /* Try iv elimination.  */
3704   if (may_eliminate_iv (data, use, cand, &bound))
3705     elim_cost = force_var_cost (data, bound, &depends_on_elim);
3706   else
3707     elim_cost = INFTY;
3708
3709   /* Try expressing the original giv.  If it is compared with an invariant,
3710      note that we cannot get rid of it.  */
3711   ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3712   gcc_assert (ok);
3713
3714   express_cost = get_computation_cost (data, use, cand, false,
3715                                        &depends_on_express);
3716   fd_ivopts_data = data;
3717   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3718
3719   /* Choose the better approach.  */
3720   if (elim_cost < express_cost)
3721     {
3722       cost = elim_cost;
3723       depends_on = depends_on_elim;
3724       depends_on_elim = NULL;
3725     }
3726   else
3727     {
3728       cost = express_cost;
3729       depends_on = depends_on_express;
3730       depends_on_express = NULL;
3731       bound = NULL_TREE;
3732     }
3733
3734   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3735
3736   if (depends_on_elim)
3737     BITMAP_FREE (depends_on_elim);
3738   if (depends_on_express)
3739     BITMAP_FREE (depends_on_express);
3740
3741   return cost != INFTY;
3742 }
3743
3744 /* Determines cost of basing replacement of USE on CAND.  Returns false
3745    if USE cannot be based on CAND.  */
3746
3747 static bool
3748 determine_use_iv_cost (struct ivopts_data *data,
3749                        struct iv_use *use, struct iv_cand *cand)
3750 {
3751   switch (use->type)
3752     {
3753     case USE_NONLINEAR_EXPR:
3754       return determine_use_iv_cost_generic (data, use, cand);
3755
3756     case USE_ADDRESS:
3757       return determine_use_iv_cost_address (data, use, cand);
3758
3759     case USE_COMPARE:
3760       return determine_use_iv_cost_condition (data, use, cand);
3761
3762     default:
3763       gcc_unreachable ();
3764     }
3765 }
3766
3767 /* Determines costs of basing the use of the iv on an iv candidate.  */
3768
3769 static void
3770 determine_use_iv_costs (struct ivopts_data *data)
3771 {
3772   unsigned i, j;
3773   struct iv_use *use;
3774   struct iv_cand *cand;
3775   bitmap to_clear = BITMAP_ALLOC (NULL);
3776
3777   alloc_use_cost_map (data);
3778
3779   for (i = 0; i < n_iv_uses (data); i++)
3780     {
3781       use = iv_use (data, i);
3782
3783       if (data->consider_all_candidates)
3784         {
3785           for (j = 0; j < n_iv_cands (data); j++)
3786             {
3787               cand = iv_cand (data, j);
3788               determine_use_iv_cost (data, use, cand);
3789             }
3790         }
3791       else
3792         {
3793           bitmap_iterator bi;
3794
3795           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3796             {
3797               cand = iv_cand (data, j);
3798               if (!determine_use_iv_cost (data, use, cand))
3799                 bitmap_set_bit (to_clear, j);
3800             }
3801
3802           /* Remove the candidates for that the cost is infinite from
3803              the list of related candidates.  */
3804           bitmap_and_compl_into (use->related_cands, to_clear);
3805           bitmap_clear (to_clear);
3806         }
3807     }
3808
3809   BITMAP_FREE (to_clear);
3810
3811   if (dump_file && (dump_flags & TDF_DETAILS))
3812     {
3813       fprintf (dump_file, "Use-candidate costs:\n");
3814
3815       for (i = 0; i < n_iv_uses (data); i++)
3816         {
3817           use = iv_use (data, i);
3818
3819           fprintf (dump_file, "Use %d:\n", i);
3820           fprintf (dump_file, "  cand\tcost\tdepends on\n");
3821           for (j = 0; j < use->n_map_members; j++)
3822             {
3823               if (!use->cost_map[j].cand
3824                   || use->cost_map[j].cost == INFTY)
3825                 continue;
3826
3827               fprintf (dump_file, "  %d\t%d\t",
3828                        use->cost_map[j].cand->id,
3829                        use->cost_map[j].cost);
3830               if (use->cost_map[j].depends_on)
3831                 bitmap_print (dump_file,
3832                               use->cost_map[j].depends_on, "","");
3833               fprintf (dump_file, "\n");
3834             }
3835
3836           fprintf (dump_file, "\n");
3837         }
3838       fprintf (dump_file, "\n");
3839     }
3840 }
3841
3842 /* Determines cost of the candidate CAND.  */
3843
3844 static void
3845 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3846 {
3847   unsigned cost_base, cost_step;
3848   tree base;
3849
3850   if (!cand->iv)
3851     {
3852       cand->cost = 0;
3853       return;
3854     }
3855
3856   /* There are two costs associated with the candidate -- its increment
3857      and its initialization.  The second is almost negligible for any loop
3858      that rolls enough, so we take it just very little into account.  */
3859
3860   base = cand->iv->base;
3861   cost_base = force_var_cost (data, base, NULL);
3862   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3863
3864   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3865
3866   /* Prefer the original iv unless we may gain something by replacing it;
3867      this is not really relevant for artificial ivs created by other
3868      passes.  */
3869   if (cand->pos == IP_ORIGINAL
3870       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3871     cand->cost--;
3872
3873   /* Prefer not to insert statements into latch unless there are some
3874      already (so that we do not create unnecessary jumps).  */
3875   if (cand->pos == IP_END
3876       && empty_block_p (ip_end_pos (data->current_loop)))
3877     cand->cost++;
3878 }
3879
3880 /* Determines costs of computation of the candidates.  */
3881
3882 static void
3883 determine_iv_costs (struct ivopts_data *data)
3884 {
3885   unsigned i;
3886
3887   if (dump_file && (dump_flags & TDF_DETAILS))
3888     {
3889       fprintf (dump_file, "Candidate costs:\n");
3890       fprintf (dump_file, "  cand\tcost\n");
3891     }
3892
3893   for (i = 0; i < n_iv_cands (data); i++)
3894     {
3895       struct iv_cand *cand = iv_cand (data, i);
3896
3897       determine_iv_cost (data, cand);
3898
3899       if (dump_file && (dump_flags & TDF_DETAILS))
3900         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
3901     }
3902
3903   if (dump_file && (dump_flags & TDF_DETAILS))
3904     fprintf (dump_file, "\n");
3905 }
3906
3907 /* Calculates cost for having SIZE induction variables.  */
3908
3909 static unsigned
3910 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3911 {
3912   /* We add size to the cost, so that we prefer eliminating ivs
3913      if possible.  */
3914   return size + estimate_reg_pressure_cost (size, data->regs_used);
3915 }
3916
3917 /* For each size of the induction variable set determine the penalty.  */
3918
3919 static void
3920 determine_set_costs (struct ivopts_data *data)
3921 {
3922   unsigned j, n;
3923   tree phi, op;
3924   struct loop *loop = data->current_loop;
3925   bitmap_iterator bi;
3926
3927   /* We use the following model (definitely improvable, especially the
3928      cost function -- TODO):
3929
3930      We estimate the number of registers available (using MD data), name it A.
3931
3932      We estimate the number of registers used by the loop, name it U.  This
3933      number is obtained as the number of loop phi nodes (not counting virtual
3934      registers and bivs) + the number of variables from outside of the loop.
3935
3936      We set a reserve R (free regs that are used for temporary computations,
3937      etc.).  For now the reserve is a constant 3.
3938
3939      Let I be the number of induction variables.
3940
3941      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3942         make a lot of ivs without a reason).
3943      -- if A - R < U + I <= A, the cost is I * PRES_COST
3944      -- if U + I > A, the cost is I * PRES_COST and
3945         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
3946
3947   if (dump_file && (dump_flags & TDF_DETAILS))
3948     {
3949       fprintf (dump_file, "Global costs:\n");
3950       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
3951       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost);
3952       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
3953     }
3954
3955   n = 0;
3956   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3957     {
3958       op = PHI_RESULT (phi);
3959
3960       if (!is_gimple_reg (op))
3961         continue;
3962
3963       if (get_iv (data, op))
3964         continue;
3965
3966       n++;
3967     }
3968
3969   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3970     {
3971       struct version_info *info = ver_info (data, j);
3972
3973       if (info->inv_id && info->has_nonlin_use)
3974         n++;
3975     }
3976
3977   data->regs_used = n;
3978   if (dump_file && (dump_flags & TDF_DETAILS))
3979     fprintf (dump_file, "  regs_used %d\n", n);
3980
3981   if (dump_file && (dump_flags & TDF_DETAILS))
3982     {
3983       fprintf (dump_file, "  cost for size:\n");
3984       fprintf (dump_file, "  ivs\tcost\n");
3985       for (j = 0; j <= 2 * target_avail_regs; j++)
3986         fprintf (dump_file, "  %d\t%d\n", j,
3987                  ivopts_global_cost_for_size (data, j));
3988       fprintf (dump_file, "\n");
3989     }
3990 }
3991
3992 /* Returns true if A is a cheaper cost pair than B.  */
3993
3994 static bool
3995 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3996 {
3997   if (!a)
3998     return false;
3999
4000   if (!b)
4001     return true;
4002
4003   if (a->cost < b->cost)
4004     return true;
4005
4006   if (a->cost > b->cost)
4007     return false;
4008
4009   /* In case the costs are the same, prefer the cheaper candidate.  */
4010   if (a->cand->cost < b->cand->cost)
4011     return true;
4012
4013   return false;
4014 }
4015
4016 /* Computes the cost field of IVS structure.  */
4017
4018 static void
4019 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4020 {
4021   unsigned cost = 0;
4022
4023   cost += ivs->cand_use_cost;
4024   cost += ivs->cand_cost;
4025   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4026
4027   ivs->cost = cost;
4028 }
4029
4030 /* Remove invariants in set INVS to set IVS.  */
4031
4032 static void
4033 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4034 {
4035   bitmap_iterator bi;
4036   unsigned iid;
4037
4038   if (!invs)
4039     return;
4040
4041   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4042     {
4043       ivs->n_invariant_uses[iid]--;
4044       if (ivs->n_invariant_uses[iid] == 0)
4045         ivs->n_regs--;
4046     }
4047 }
4048
4049 /* Set USE not to be expressed by any candidate in IVS.  */
4050
4051 static void
4052 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4053                  struct iv_use *use)
4054 {
4055   unsigned uid = use->id, cid;
4056   struct cost_pair *cp;
4057
4058   cp = ivs->cand_for_use[uid];
4059   if (!cp)
4060     return;
4061   cid = cp->cand->id;
4062
4063   ivs->bad_uses++;
4064   ivs->cand_for_use[uid] = NULL;
4065   ivs->n_cand_uses[cid]--;
4066
4067   if (ivs->n_cand_uses[cid] == 0)
4068     {
4069       bitmap_clear_bit (ivs->cands, cid);
4070       /* Do not count the pseudocandidates.  */
4071       if (cp->cand->iv)
4072         ivs->n_regs--;
4073       ivs->n_cands--;
4074       ivs->cand_cost -= cp->cand->cost;
4075
4076       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4077     }
4078
4079   ivs->cand_use_cost -= cp->cost;
4080
4081   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4082   iv_ca_recount_cost (data, ivs);
4083 }
4084
4085 /* Add invariants in set INVS to set IVS.  */
4086
4087 static void
4088 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4089 {
4090   bitmap_iterator bi;
4091   unsigned iid;
4092
4093   if (!invs)
4094     return;
4095
4096   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4097     {
4098       ivs->n_invariant_uses[iid]++;
4099       if (ivs->n_invariant_uses[iid] == 1)
4100         ivs->n_regs++;
4101     }
4102 }
4103
4104 /* Set cost pair for USE in set IVS to CP.  */
4105
4106 static void
4107 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4108               struct iv_use *use, struct cost_pair *cp)
4109 {
4110   unsigned uid = use->id, cid;
4111
4112   if (ivs->cand_for_use[uid] == cp)
4113     return;
4114
4115   if (ivs->cand_for_use[uid])
4116     iv_ca_set_no_cp (data, ivs, use);
4117
4118   if (cp)
4119     {
4120       cid = cp->cand->id;
4121
4122       ivs->bad_uses--;
4123       ivs->cand_for_use[uid] = cp;
4124       ivs->n_cand_uses[cid]++;
4125       if (ivs->n_cand_uses[cid] == 1)
4126         {
4127           bitmap_set_bit (ivs->cands, cid);
4128           /* Do not count the pseudocandidates.  */
4129           if (cp->cand->iv)
4130             ivs->n_regs++;
4131           ivs->n_cands++;
4132           ivs->cand_cost += cp->cand->cost;
4133
4134           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4135         }
4136
4137       ivs->cand_use_cost += cp->cost;
4138       iv_ca_set_add_invariants (ivs, cp->depends_on);
4139       iv_ca_recount_cost (data, ivs);
4140     }
4141 }
4142
4143 /* Extend set IVS by expressing USE by some of the candidates in it
4144    if possible.  */
4145
4146 static void
4147 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4148                struct iv_use *use)
4149 {
4150   struct cost_pair *best_cp = NULL, *cp;
4151   bitmap_iterator bi;
4152   unsigned i;
4153
4154   gcc_assert (ivs->upto >= use->id);
4155
4156   if (ivs->upto == use->id)
4157     {
4158       ivs->upto++;
4159       ivs->bad_uses++;
4160     }
4161
4162   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4163     {
4164       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4165
4166       if (cheaper_cost_pair (cp, best_cp))
4167         best_cp = cp;
4168     }
4169
4170   iv_ca_set_cp (data, ivs, use, best_cp);
4171 }
4172
4173 /* Get cost for assignment IVS.  */
4174
4175 static unsigned
4176 iv_ca_cost (struct iv_ca *ivs)
4177 {
4178   return (ivs->bad_uses ? INFTY : ivs->cost);
4179 }
4180
4181 /* Returns true if all dependences of CP are among invariants in IVS.  */
4182
4183 static bool
4184 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4185 {
4186   unsigned i;
4187   bitmap_iterator bi;
4188
4189   if (!cp->depends_on)
4190     return true;
4191
4192   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4193     {
4194       if (ivs->n_invariant_uses[i] == 0)
4195         return false;
4196     }
4197
4198   return true;
4199 }
4200
4201 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4202    it before NEXT_CHANGE.  */
4203
4204 static struct iv_ca_delta *
4205 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4206                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4207 {
4208   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4209
4210   change->use = use;
4211   change->old_cp = old_cp;
4212   change->new_cp = new_cp;
4213   change->next_change = next_change;
4214
4215   return change;
4216 }
4217
4218 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4219    are rewritten.  */
4220
4221 static struct iv_ca_delta *
4222 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4223 {
4224   struct iv_ca_delta *last;
4225
4226   if (!l2)
4227     return l1;
4228
4229   if (!l1)
4230     return l2;
4231
4232   for (last = l1; last->next_change; last = last->next_change)
4233     continue;
4234   last->next_change = l2;
4235
4236   return l1;
4237 }
4238
4239 /* Returns candidate by that USE is expressed in IVS.  */
4240
4241 static struct cost_pair *
4242 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4243 {
4244   return ivs->cand_for_use[use->id];
4245 }
4246
4247 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4248
4249 static struct iv_ca_delta *
4250 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4251 {
4252   struct iv_ca_delta *act, *next, *prev = NULL;
4253   struct cost_pair *tmp;
4254
4255   for (act = delta; act; act = next)
4256     {
4257       next = act->next_change;
4258       act->next_change = prev;
4259       prev = act;
4260
4261       tmp = act->old_cp;
4262       act->old_cp = act->new_cp;
4263       act->new_cp = tmp;
4264     }
4265
4266   return prev;
4267 }
4268
4269 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4270    reverted instead.  */
4271
4272 static void
4273 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4274                     struct iv_ca_delta *delta, bool forward)
4275 {
4276   struct cost_pair *from, *to;
4277   struct iv_ca_delta *act;
4278
4279   if (!forward)
4280     delta = iv_ca_delta_reverse (delta);
4281
4282   for (act = delta; act; act = act->next_change)
4283     {
4284       from = act->old_cp;
4285       to = act->new_cp;
4286       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4287       iv_ca_set_cp (data, ivs, act->use, to);
4288     }
4289
4290   if (!forward)
4291     iv_ca_delta_reverse (delta);
4292 }
4293
4294 /* Returns true if CAND is used in IVS.  */
4295
4296 static bool
4297 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4298 {
4299   return ivs->n_cand_uses[cand->id] > 0;
4300 }
4301
4302 /* Returns number of induction variable candidates in the set IVS.  */
4303
4304 static unsigned
4305 iv_ca_n_cands (struct iv_ca *ivs)
4306 {
4307   return ivs->n_cands;
4308 }
4309
4310 /* Free the list of changes DELTA.  */
4311
4312 static void
4313 iv_ca_delta_free (struct iv_ca_delta **delta)
4314 {
4315   struct iv_ca_delta *act, *next;
4316
4317   for (act = *delta; act; act = next)
4318     {
4319       next = act->next_change;
4320       free (act);
4321     }
4322
4323   *delta = NULL;
4324 }
4325
4326 /* Allocates new iv candidates assignment.  */
4327
4328 static struct iv_ca *
4329 iv_ca_new (struct ivopts_data *data)
4330 {
4331   struct iv_ca *nw = XNEW (struct iv_ca);
4332
4333   nw->upto = 0;
4334   nw->bad_uses = 0;
4335   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4336   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4337   nw->cands = BITMAP_ALLOC (NULL);
4338   nw->n_cands = 0;
4339   nw->n_regs = 0;
4340   nw->cand_use_cost = 0;
4341   nw->cand_cost = 0;
4342   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4343   nw->cost = 0;
4344
4345   return nw;
4346 }
4347
4348 /* Free memory occupied by the set IVS.  */
4349
4350 static void
4351 iv_ca_free (struct iv_ca **ivs)
4352 {
4353   free ((*ivs)->cand_for_use);
4354   free ((*ivs)->n_cand_uses);
4355   BITMAP_FREE ((*ivs)->cands);
4356   free ((*ivs)->n_invariant_uses);
4357   free (*ivs);
4358   *ivs = NULL;
4359 }
4360
4361 /* Dumps IVS to FILE.  */
4362
4363 static void
4364 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4365 {
4366   const char *pref = "  invariants ";
4367   unsigned i;
4368
4369   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4370   bitmap_print (file, ivs->cands, "  candidates ","\n");
4371
4372   for (i = 1; i <= data->max_inv_id; i++)
4373     if (ivs->n_invariant_uses[i])
4374       {
4375         fprintf (file, "%s%d", pref, i);
4376         pref = ", ";
4377       }
4378   fprintf (file, "\n");
4379 }
4380
4381 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4382    new set, and store differences in DELTA.  Number of induction variables
4383    in the new set is stored to N_IVS.  */
4384
4385 static unsigned
4386 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4387               struct iv_cand *cand, struct iv_ca_delta **delta,
4388               unsigned *n_ivs)
4389 {
4390   unsigned i, cost;
4391   struct iv_use *use;
4392   struct cost_pair *old_cp, *new_cp;
4393
4394   *delta = NULL;
4395   for (i = 0; i < ivs->upto; i++)
4396     {
4397       use = iv_use (data, i);
4398       old_cp = iv_ca_cand_for_use (ivs, use);
4399
4400       if (old_cp
4401           && old_cp->cand == cand)
4402         continue;
4403
4404       new_cp = get_use_iv_cost (data, use, cand);
4405       if (!new_cp)
4406         continue;
4407
4408       if (!iv_ca_has_deps (ivs, new_cp))
4409         continue;
4410
4411       if (!cheaper_cost_pair (new_cp, old_cp))
4412         continue;
4413
4414       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4415     }
4416
4417   iv_ca_delta_commit (data, ivs, *delta, true);
4418   cost = iv_ca_cost (ivs);
4419   if (n_ivs)
4420     *n_ivs = iv_ca_n_cands (ivs);
4421   iv_ca_delta_commit (data, ivs, *delta, false);
4422
4423   return cost;
4424 }
4425
4426 /* Try narrowing set IVS by removing CAND.  Return the cost of
4427    the new set and store the differences in DELTA.  */
4428
4429 static unsigned
4430 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4431               struct iv_cand *cand, struct iv_ca_delta **delta)
4432 {
4433   unsigned i, ci;
4434   struct iv_use *use;
4435   struct cost_pair *old_cp, *new_cp, *cp;
4436   bitmap_iterator bi;
4437   struct iv_cand *cnd;
4438   unsigned cost;
4439
4440   *delta = NULL;
4441   for (i = 0; i < n_iv_uses (data); i++)
4442     {
4443       use = iv_use (data, i);
4444
4445       old_cp = iv_ca_cand_for_use (ivs, use);
4446       if (old_cp->cand != cand)
4447         continue;
4448
4449       new_cp = NULL;
4450
4451       if (data->consider_all_candidates)
4452         {
4453           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4454             {
4455               if (ci == cand->id)
4456                 continue;
4457
4458               cnd = iv_cand (data, ci);
4459
4460               cp = get_use_iv_cost (data, use, cnd);
4461               if (!cp)
4462                 continue;
4463               if (!iv_ca_has_deps (ivs, cp))
4464                 continue;
4465
4466               if (!cheaper_cost_pair (cp, new_cp))
4467                 continue;
4468
4469               new_cp = cp;
4470             }
4471         }
4472       else
4473         {
4474           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4475             {
4476               if (ci == cand->id)
4477                 continue;
4478
4479               cnd = iv_cand (data, ci);
4480
4481               cp = get_use_iv_cost (data, use, cnd);
4482               if (!cp)
4483                 continue;
4484               if (!iv_ca_has_deps (ivs, cp))
4485                 continue;
4486
4487               if (!cheaper_cost_pair (cp, new_cp))
4488                 continue;
4489
4490               new_cp = cp;
4491             }
4492         }
4493
4494       if (!new_cp)
4495         {
4496           iv_ca_delta_free (delta);
4497           return INFTY;
4498         }
4499
4500       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4501     }
4502
4503   iv_ca_delta_commit (data, ivs, *delta, true);
4504   cost = iv_ca_cost (ivs);
4505   iv_ca_delta_commit (data, ivs, *delta, false);
4506
4507   return cost;
4508 }
4509
4510 /* Try optimizing the set of candidates IVS by removing candidates different
4511    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4512    differences in DELTA.  */
4513
4514 static unsigned
4515 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4516              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4517 {
4518   bitmap_iterator bi;
4519   struct iv_ca_delta *act_delta, *best_delta;
4520   unsigned i, best_cost, acost;
4521   struct iv_cand *cand;
4522
4523   best_delta = NULL;
4524   best_cost = iv_ca_cost (ivs);
4525
4526   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4527     {
4528       cand = iv_cand (data, i);
4529
4530       if (cand == except_cand)
4531         continue;
4532
4533       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4534
4535       if (acost < best_cost)
4536         {
4537           best_cost = acost;
4538           iv_ca_delta_free (&best_delta);
4539           best_delta = act_delta;
4540         }
4541       else
4542         iv_ca_delta_free (&act_delta);
4543     }
4544
4545   if (!best_delta)
4546     {
4547       *delta = NULL;
4548       return best_cost;
4549     }
4550
4551   /* Recurse to possibly remove other unnecessary ivs.  */
4552   iv_ca_delta_commit (data, ivs, best_delta, true);
4553   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4554   iv_ca_delta_commit (data, ivs, best_delta, false);
4555   *delta = iv_ca_delta_join (best_delta, *delta);
4556   return best_cost;
4557 }
4558
4559 /* Tries to extend the sets IVS in the best possible way in order
4560    to express the USE.  */
4561
4562 static bool
4563 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4564                   struct iv_use *use)
4565 {
4566   unsigned best_cost, act_cost;
4567   unsigned i;
4568   bitmap_iterator bi;
4569   struct iv_cand *cand;
4570   struct iv_ca_delta *best_delta = NULL, *act_delta;
4571   struct cost_pair *cp;
4572
4573   iv_ca_add_use (data, ivs, use);
4574   best_cost = iv_ca_cost (ivs);
4575
4576   cp = iv_ca_cand_for_use (ivs, use);
4577   if (cp)
4578     {
4579       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4580       iv_ca_set_no_cp (data, ivs, use);
4581     }
4582
4583   /* First try important candidates.  Only if it fails, try the specific ones.
4584      Rationale -- in loops with many variables the best choice often is to use
4585      just one generic biv.  If we added here many ivs specific to the uses,
4586      the optimization algorithm later would be likely to get stuck in a local
4587      minimum, thus causing us to create too many ivs.  The approach from
4588      few ivs to more seems more likely to be successful -- starting from few
4589      ivs, replacing an expensive use by a specific iv should always be a
4590      win.  */
4591   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4592     {
4593       cand = iv_cand (data, i);
4594
4595       if (iv_ca_cand_used_p (ivs, cand))
4596         continue;
4597
4598       cp = get_use_iv_cost (data, use, cand);
4599       if (!cp)
4600         continue;
4601
4602       iv_ca_set_cp (data, ivs, use, cp);
4603       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4604       iv_ca_set_no_cp (data, ivs, use);
4605       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4606
4607       if (act_cost < best_cost)
4608         {
4609           best_cost = act_cost;
4610
4611           iv_ca_delta_free (&best_delta);
4612           best_delta = act_delta;
4613         }
4614       else
4615         iv_ca_delta_free (&act_delta);
4616     }
4617
4618   if (best_cost == INFTY)
4619     {
4620       for (i = 0; i < use->n_map_members; i++)
4621         {
4622           cp = use->cost_map + i;
4623           cand = cp->cand;
4624           if (!cand)
4625             continue;
4626
4627           /* Already tried this.  */
4628           if (cand->important)
4629             continue;
4630
4631           if (iv_ca_cand_used_p (ivs, cand))
4632             continue;
4633
4634           act_delta = NULL;
4635           iv_ca_set_cp (data, ivs, use, cp);
4636           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4637           iv_ca_set_no_cp (data, ivs, use);
4638           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4639                                        cp, act_delta);
4640
4641           if (act_cost < best_cost)
4642             {
4643               best_cost = act_cost;
4644
4645               if (best_delta)
4646                 iv_ca_delta_free (&best_delta);
4647               best_delta = act_delta;
4648             }
4649           else
4650             iv_ca_delta_free (&act_delta);
4651         }
4652     }
4653
4654   iv_ca_delta_commit (data, ivs, best_delta, true);
4655   iv_ca_delta_free (&best_delta);
4656
4657   return (best_cost != INFTY);
4658 }
4659
4660 /* Finds an initial assignment of candidates to uses.  */
4661
4662 static struct iv_ca *
4663 get_initial_solution (struct ivopts_data *data)
4664 {
4665   struct iv_ca *ivs = iv_ca_new (data);
4666   unsigned i;
4667
4668   for (i = 0; i < n_iv_uses (data); i++)
4669     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4670       {
4671         iv_ca_free (&ivs);
4672         return NULL;
4673       }
4674
4675   return ivs;
4676 }
4677
4678 /* Tries to improve set of induction variables IVS.  */
4679
4680 static bool
4681 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4682 {
4683   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4684   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4685   struct iv_cand *cand;
4686
4687   /* Try extending the set of induction variables by one.  */
4688   for (i = 0; i < n_iv_cands (data); i++)
4689     {
4690       cand = iv_cand (data, i);
4691
4692       if (iv_ca_cand_used_p (ivs, cand))
4693         continue;
4694
4695       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4696       if (!act_delta)
4697         continue;
4698
4699       /* If we successfully added the candidate and the set is small enough,
4700          try optimizing it by removing other candidates.  */
4701       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4702         {
4703           iv_ca_delta_commit (data, ivs, act_delta, true);
4704           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4705           iv_ca_delta_commit (data, ivs, act_delta, false);
4706           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4707         }
4708
4709       if (acost < best_cost)
4710         {
4711           best_cost = acost;
4712           iv_ca_delta_free (&best_delta);
4713           best_delta = act_delta;
4714         }
4715       else
4716         iv_ca_delta_free (&act_delta);
4717     }
4718
4719   if (!best_delta)
4720     {
4721       /* Try removing the candidates from the set instead.  */
4722       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4723
4724       /* Nothing more we can do.  */
4725       if (!best_delta)
4726         return false;
4727     }
4728
4729   iv_ca_delta_commit (data, ivs, best_delta, true);
4730   gcc_assert (best_cost == iv_ca_cost (ivs));
4731   iv_ca_delta_free (&best_delta);
4732   return true;
4733 }
4734
4735 /* Attempts to find the optimal set of induction variables.  We do simple
4736    greedy heuristic -- we try to replace at most one candidate in the selected
4737    solution and remove the unused ivs while this improves the cost.  */
4738
4739 static struct iv_ca *
4740 find_optimal_iv_set (struct ivopts_data *data)
4741 {
4742   unsigned i;
4743   struct iv_ca *set;
4744   struct iv_use *use;
4745
4746   /* Get the initial solution.  */
4747   set = get_initial_solution (data);
4748   if (!set)
4749     {
4750       if (dump_file && (dump_flags & TDF_DETAILS))
4751         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4752       return NULL;
4753     }
4754
4755   if (dump_file && (dump_flags & TDF_DETAILS))
4756     {
4757       fprintf (dump_file, "Initial set of candidates:\n");
4758       iv_ca_dump (data, dump_file, set);
4759     }
4760
4761   while (try_improve_iv_set (data, set))
4762     {
4763       if (dump_file && (dump_flags & TDF_DETAILS))
4764         {
4765           fprintf (dump_file, "Improved to:\n");
4766           iv_ca_dump (data, dump_file, set);
4767         }
4768     }
4769
4770   if (dump_file && (dump_flags & TDF_DETAILS))
4771     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4772
4773   for (i = 0; i < n_iv_uses (data); i++)
4774     {
4775       use = iv_use (data, i);
4776       use->selected = iv_ca_cand_for_use (set, use)->cand;
4777     }
4778
4779   return set;
4780 }
4781
4782 /* Creates a new induction variable corresponding to CAND.  */
4783
4784 static void
4785 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4786 {
4787   block_stmt_iterator incr_pos;
4788   tree base;
4789   bool after = false;
4790
4791   if (!cand->iv)
4792     return;
4793
4794   switch (cand->pos)
4795     {
4796     case IP_NORMAL:
4797       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4798       break;
4799
4800     case IP_END:
4801       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4802       after = true;
4803       break;
4804
4805     case IP_ORIGINAL:
4806       /* Mark that the iv is preserved.  */
4807       name_info (data, cand->var_before)->preserve_biv = true;
4808       name_info (data, cand->var_after)->preserve_biv = true;
4809
4810       /* Rewrite the increment so that it uses var_before directly.  */
4811       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4812
4813       return;
4814     }
4815
4816   gimple_add_tmp_var (cand->var_before);
4817   add_referenced_var (cand->var_before);
4818
4819   base = unshare_expr (cand->iv->base);
4820
4821   create_iv (base, unshare_expr (cand->iv->step),
4822              cand->var_before, data->current_loop,
4823              &incr_pos, after, &cand->var_before, &cand->var_after);
4824 }
4825
4826 /* Creates new induction variables described in SET.  */
4827
4828 static void
4829 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4830 {
4831   unsigned i;
4832   struct iv_cand *cand;
4833   bitmap_iterator bi;
4834
4835   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4836     {
4837       cand = iv_cand (data, i);
4838       create_new_iv (data, cand);
4839     }
4840 }
4841
4842 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4843    is true, remove also the ssa name defined by the statement.  */
4844
4845 static void
4846 remove_statement (tree stmt, bool including_defined_name)
4847 {
4848   if (TREE_CODE (stmt) == PHI_NODE)
4849     {
4850       remove_phi_node (stmt, NULL_TREE, including_defined_name);
4851     }
4852   else
4853     {
4854       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4855
4856       bsi_remove (&bsi, true);
4857       release_defs (stmt);
4858     }
4859 }
4860
4861 /* Rewrites USE (definition of iv used in a nonlinear expression)
4862    using candidate CAND.  */
4863
4864 static void
4865 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4866                             struct iv_use *use, struct iv_cand *cand)
4867 {
4868   tree comp;
4869   tree op, stmts, tgt, ass;
4870   block_stmt_iterator bsi;
4871
4872   /* An important special case -- if we are asked to express value of
4873      the original iv by itself, just exit; there is no need to
4874      introduce a new computation (that might also need casting the
4875      variable to unsigned and back).  */
4876   if (cand->pos == IP_ORIGINAL
4877       && cand->incremented_at == use->stmt)
4878     {
4879       tree step, ctype, utype;
4880       enum tree_code incr_code = PLUS_EXPR;
4881
4882       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4883       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4884
4885       step = cand->iv->step;
4886       ctype = TREE_TYPE (step);
4887       utype = TREE_TYPE (cand->var_after);
4888       if (TREE_CODE (step) == NEGATE_EXPR)
4889         {
4890           incr_code = MINUS_EXPR;
4891           step = TREE_OPERAND (step, 0);
4892         }
4893
4894       /* Check whether we may leave the computation unchanged.
4895          This is the case only if it does not rely on other
4896          computations in the loop -- otherwise, the computation
4897          we rely upon may be removed in remove_unused_ivs,
4898          thus leading to ICE.  */
4899       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4900       if (TREE_CODE (op) == PLUS_EXPR
4901           || TREE_CODE (op) == MINUS_EXPR)
4902         {
4903           if (TREE_OPERAND (op, 0) == cand->var_before)
4904             op = TREE_OPERAND (op, 1);
4905           else if (TREE_CODE (op) == PLUS_EXPR
4906                    && TREE_OPERAND (op, 1) == cand->var_before)
4907             op = TREE_OPERAND (op, 0);
4908           else
4909             op = NULL_TREE;
4910         }
4911       else
4912         op = NULL_TREE;
4913
4914       if (op
4915           && (TREE_CODE (op) == INTEGER_CST
4916               || operand_equal_p (op, step, 0)))
4917         return;
4918
4919       /* Otherwise, add the necessary computations to express
4920          the iv.  */
4921       op = fold_convert (ctype, cand->var_before);
4922       comp = fold_convert (utype,
4923                            build2 (incr_code, ctype, op,
4924                                    unshare_expr (step)));
4925     }
4926   else
4927     {
4928       comp = get_computation (data->current_loop, use, cand);
4929       gcc_assert (comp != NULL_TREE);
4930     }
4931
4932   switch (TREE_CODE (use->stmt))
4933     {
4934     case PHI_NODE:
4935       tgt = PHI_RESULT (use->stmt);
4936
4937       /* If we should keep the biv, do not replace it.  */
4938       if (name_info (data, tgt)->preserve_biv)
4939         return;
4940
4941       bsi = bsi_after_labels (bb_for_stmt (use->stmt));
4942       break;
4943
4944     case GIMPLE_MODIFY_STMT:
4945       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4946       bsi = bsi_for_stmt (use->stmt);
4947       break;
4948
4949     default:
4950       gcc_unreachable ();
4951     }
4952
4953   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4954   if (stmts)
4955     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4956
4957   if (TREE_CODE (use->stmt) == PHI_NODE)
4958     {
4959       ass = build_gimple_modify_stmt (tgt, op);
4960       bsi_insert_before (&bsi, ass, BSI_SAME_STMT);
4961       remove_statement (use->stmt, false);
4962       SSA_NAME_DEF_STMT (tgt) = ass;
4963     }
4964   else
4965     GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4966 }
4967
4968 /* Replaces ssa name in index IDX by its basic variable.  Callback for
4969    for_each_index.  */
4970
4971 static bool
4972 idx_remove_ssa_names (tree base, tree *idx,
4973                       void *data ATTRIBUTE_UNUSED)
4974 {
4975   tree *op;
4976
4977   if (TREE_CODE (*idx) == SSA_NAME)
4978     *idx = SSA_NAME_VAR (*idx);
4979
4980   if (TREE_CODE (base) == ARRAY_REF)
4981     {
4982       op = &TREE_OPERAND (base, 2);
4983       if (*op
4984           && TREE_CODE (*op) == SSA_NAME)
4985         *op = SSA_NAME_VAR (*op);
4986       op = &TREE_OPERAND (base, 3);
4987       if (*op
4988           && TREE_CODE (*op) == SSA_NAME)
4989         *op = SSA_NAME_VAR (*op);
4990     }
4991
4992   return true;
4993 }
4994
4995 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
4996
4997 static tree
4998 unshare_and_remove_ssa_names (tree ref)
4999 {
5000   ref = unshare_expr (ref);
5001   for_each_index (&ref, idx_remove_ssa_names, NULL);
5002
5003   return ref;
5004 }
5005
5006 /* Extract the alias analysis info for the memory reference REF.  There are
5007    several ways how this information may be stored and what precisely is
5008    its semantics depending on the type of the reference, but there always is
5009    somewhere hidden one _DECL node that is used to determine the set of
5010    virtual operands for the reference.  The code below deciphers this jungle
5011    and extracts this single useful piece of information.  */
5012
5013 static tree
5014 get_ref_tag (tree ref, tree orig)
5015 {
5016   tree var = get_base_address (ref);
5017   tree aref = NULL_TREE, tag, sv;
5018   HOST_WIDE_INT offset, size, maxsize;
5019
5020   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5021     {
5022       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5023       if (ref)
5024         break;
5025     }
5026
5027   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5028     return unshare_expr (sv);
5029
5030   if (!var)
5031     return NULL_TREE;
5032
5033   if (TREE_CODE (var) == INDIRECT_REF)
5034     {
5035       /* If the base is a dereference of a pointer, first check its name memory
5036          tag.  If it does not have one, use its symbol memory tag.  */
5037       var = TREE_OPERAND (var, 0);
5038       if (TREE_CODE (var) != SSA_NAME)
5039         return NULL_TREE;
5040
5041       if (SSA_NAME_PTR_INFO (var))
5042         {
5043           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5044           if (tag)
5045             return tag;
5046         }
5047
5048       var = SSA_NAME_VAR (var);
5049       tag = symbol_mem_tag (var);
5050       gcc_assert (tag != NULL_TREE);
5051       return tag;
5052     }
5053   else
5054     {
5055       if (!DECL_P (var))
5056         return NULL_TREE;
5057
5058       tag = symbol_mem_tag (var);
5059       if (tag)
5060         return tag;
5061
5062       return var;
5063     }
5064 }
5065
5066 /* Copies the reference information from OLD_REF to NEW_REF.  */
5067
5068 static void
5069 copy_ref_info (tree new_ref, tree old_ref)
5070 {
5071   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5072     copy_mem_ref_info (new_ref, old_ref);
5073   else
5074     {
5075       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5076       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5077     }
5078 }
5079
5080 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5081
5082 static void
5083 rewrite_use_address (struct ivopts_data *data,
5084                      struct iv_use *use, struct iv_cand *cand)
5085 {
5086   aff_tree aff;
5087   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5088   tree ref;
5089   bool ok;
5090
5091   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5092   gcc_assert (ok);
5093   unshare_aff_combination (&aff);
5094
5095   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5096   copy_ref_info (ref, *use->op_p);
5097   *use->op_p = ref;
5098 }
5099
5100 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5101    candidate CAND.  */
5102
5103 static void
5104 rewrite_use_compare (struct ivopts_data *data,
5105                      struct iv_use *use, struct iv_cand *cand)
5106 {
5107   tree comp, *var_p, op, bound;
5108   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5109   enum tree_code compare;
5110   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5111   bool ok;
5112
5113   bound = cp->value;
5114   if (bound)
5115     {
5116       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5117       tree var_type = TREE_TYPE (var);
5118
5119       compare = iv_elimination_compare (data, use);
5120       bound = unshare_expr (fold_convert (var_type, bound));
5121       op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE);
5122
5123       *use->op_p = build2 (compare, boolean_type_node, var, op);
5124       return;
5125     }
5126
5127   /* The induction variable elimination failed; just express the original
5128      giv.  */
5129   comp = get_computation (data->current_loop, use, cand);
5130   gcc_assert (comp != NULL_TREE);
5131
5132   ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5133   gcc_assert (ok);
5134
5135   *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p));
5136 }
5137
5138 /* Rewrites USE using candidate CAND.  */
5139
5140 static void
5141 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5142 {
5143   push_stmt_changes (&use->stmt);
5144
5145   switch (use->type)
5146     {
5147       case USE_NONLINEAR_EXPR:
5148         rewrite_use_nonlinear_expr (data, use, cand);
5149         break;
5150
5151       case USE_ADDRESS:
5152         rewrite_use_address (data, use, cand);
5153         break;
5154
5155       case USE_COMPARE:
5156         rewrite_use_compare (data, use, cand);
5157         break;
5158
5159       default:
5160         gcc_unreachable ();
5161     }
5162
5163   pop_stmt_changes (&use->stmt);
5164 }
5165
5166 /* Rewrite the uses using the selected induction variables.  */
5167
5168 static void
5169 rewrite_uses (struct ivopts_data *data)
5170 {
5171   unsigned i;
5172   struct iv_cand *cand;
5173   struct iv_use *use;
5174
5175   for (i = 0; i < n_iv_uses (data); i++)
5176     {
5177       use = iv_use (data, i);
5178       cand = use->selected;
5179       gcc_assert (cand);
5180
5181       rewrite_use (data, use, cand);
5182     }
5183 }
5184
5185 /* Removes the ivs that are not used after rewriting.  */
5186
5187 static void
5188 remove_unused_ivs (struct ivopts_data *data)
5189 {
5190   unsigned j;
5191   bitmap_iterator bi;
5192
5193   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5194     {
5195       struct version_info *info;
5196
5197       info = ver_info (data, j);
5198       if (info->iv
5199           && !integer_zerop (info->iv->step)
5200           && !info->inv_id
5201           && !info->iv->have_use_for
5202           && !info->preserve_biv)
5203         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5204     }
5205 }
5206
5207 /* Frees data allocated by the optimization of a single loop.  */
5208
5209 static void
5210 free_loop_data (struct ivopts_data *data)
5211 {
5212   unsigned i, j;
5213   bitmap_iterator bi;
5214   tree obj;
5215
5216   if (data->niters)
5217     {
5218       pointer_map_destroy (data->niters);
5219       data->niters = NULL;
5220     }
5221
5222   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5223     {
5224       struct version_info *info;
5225
5226       info = ver_info (data, i);
5227       if (info->iv)
5228         free (info->iv);
5229       info->iv = NULL;
5230       info->has_nonlin_use = false;
5231       info->preserve_biv = false;
5232       info->inv_id = 0;
5233     }
5234   bitmap_clear (data->relevant);
5235   bitmap_clear (data->important_candidates);
5236
5237   for (i = 0; i < n_iv_uses (data); i++)
5238     {
5239       struct iv_use *use = iv_use (data, i);
5240
5241       free (use->iv);
5242       BITMAP_FREE (use->related_cands);
5243       for (j = 0; j < use->n_map_members; j++)
5244         if (use->cost_map[j].depends_on)
5245           BITMAP_FREE (use->cost_map[j].depends_on);
5246       free (use->cost_map);
5247       free (use);
5248     }
5249   VEC_truncate (iv_use_p, data->iv_uses, 0);
5250
5251   for (i = 0; i < n_iv_cands (data); i++)
5252     {
5253       struct iv_cand *cand = iv_cand (data, i);
5254
5255       if (cand->iv)
5256         free (cand->iv);
5257       if (cand->depends_on)
5258         BITMAP_FREE (cand->depends_on);
5259       free (cand);
5260     }
5261   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5262
5263   if (data->version_info_size < num_ssa_names)
5264     {
5265       data->version_info_size = 2 * num_ssa_names;
5266       free (data->version_info);
5267       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5268     }
5269
5270   data->max_inv_id = 0;
5271
5272   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5273     SET_DECL_RTL (obj, NULL_RTX);
5274
5275   VEC_truncate (tree, decl_rtl_to_reset, 0);
5276 }
5277
5278 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5279    loop tree.  */
5280
5281 static void
5282 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5283 {
5284   free_loop_data (data);
5285   free (data->version_info);
5286   BITMAP_FREE (data->relevant);
5287   BITMAP_FREE (data->important_candidates);
5288
5289   VEC_free (tree, heap, decl_rtl_to_reset);
5290   VEC_free (iv_use_p, heap, data->iv_uses);
5291   VEC_free (iv_cand_p, heap, data->iv_candidates);
5292 }
5293
5294 /* Optimizes the LOOP.  Returns true if anything changed.  */
5295
5296 static bool
5297 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5298 {
5299   bool changed = false;
5300   struct iv_ca *iv_ca;
5301   edge exit;
5302
5303   gcc_assert (!data->niters);
5304   data->current_loop = loop;
5305
5306   if (dump_file && (dump_flags & TDF_DETAILS))
5307     {
5308       fprintf (dump_file, "Processing loop %d\n", loop->num);
5309
5310       exit = single_dom_exit (loop);
5311       if (exit)
5312         {
5313           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5314                    exit->src->index, exit->dest->index);
5315           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5316           fprintf (dump_file, "\n");
5317         }
5318
5319       fprintf (dump_file, "\n");
5320     }
5321
5322   /* For each ssa name determines whether it behaves as an induction variable
5323      in some loop.  */
5324   if (!find_induction_variables (data))
5325     goto finish;
5326
5327   /* Finds interesting uses (item 1).  */
5328   find_interesting_uses (data);
5329   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5330     goto finish;
5331
5332   /* Finds candidates for the induction variables (item 2).  */
5333   find_iv_candidates (data);
5334
5335   /* Calculates the costs (item 3, part 1).  */
5336   determine_use_iv_costs (data);
5337   determine_iv_costs (data);
5338   determine_set_costs (data);
5339
5340   /* Find the optimal set of induction variables (item 3, part 2).  */
5341   iv_ca = find_optimal_iv_set (data);
5342   if (!iv_ca)
5343     goto finish;
5344   changed = true;
5345
5346   /* Create the new induction variables (item 4, part 1).  */
5347   create_new_ivs (data, iv_ca);
5348   iv_ca_free (&iv_ca);
5349
5350   /* Rewrite the uses (item 4, part 2).  */
5351   rewrite_uses (data);
5352
5353   /* Remove the ivs that are unused after rewriting.  */
5354   remove_unused_ivs (data);
5355
5356   /* We have changed the structure of induction variables; it might happen
5357      that definitions in the scev database refer to some of them that were
5358      eliminated.  */
5359   scev_reset ();
5360
5361 finish:
5362   free_loop_data (data);
5363
5364   return changed;
5365 }
5366
5367 /* Main entry point.  Optimizes induction variables in loops.  */
5368
5369 void
5370 tree_ssa_iv_optimize (void)
5371 {
5372   struct loop *loop;
5373   struct ivopts_data data;
5374   loop_iterator li;
5375
5376   tree_ssa_iv_optimize_init (&data);
5377
5378   /* Optimize the loops starting with the innermost ones.  */
5379   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5380     {
5381       if (dump_file && (dump_flags & TDF_DETAILS))
5382         flow_loop_dump (loop, dump_file, NULL, 1);
5383
5384       tree_ssa_iv_optimize_loop (&data, loop);
5385     }
5386
5387   tree_ssa_iv_optimize_finalize (&data);
5388 }