gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2013 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "hash-table.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "tm_p.h"
  28 #include "tree.h"
  29 #include "basic-block.h"
  30 #include "gimple-pretty-print.h"
  31 #include "tree-inline.h"
  32 #include "gimplify.h"
  33 #include "gimple-iterator.h"
  34 #include "gimple-ssa.h"
  35 #include "tree-cfg.h"
  36 #include "tree-phinodes.h"
  37 #include "ssa-iterators.h"
  38 #include "tree-ssanames.h"
  39 #include "tree-ssa-loop-niter.h"
  40 #include "tree-ssa-loop.h"
  41 #include "tree-dfa.h"
  42 #include "tree-ssa.h"
  43 #include "tree-iterator.h"
  44 #include "tree-pass.h"
  45 #include "alloc-pool.h"
  46 #include "vec.h"
  47 #include "langhooks.h"
  48 #include "pointer-set.h"
  49 #include "cfgloop.h"
  50 #include "flags.h"
  51 #include "target.h"
  52 #include "params.h"
  53 #include "diagnostic-core.h"
  54
  55 /*  This is a simple global reassociation pass.  It is, in part, based
  56     on the LLVM pass of the same name (They do some things more/less
  57     than we do, in different orders, etc).
  58
  59     It consists of five steps:
  60
  61     1. Breaking up subtract operations into addition + negate, where
  62     it would promote the reassociation of adds.
  63
  64     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  65     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  66     During linearization, we place the operands of the binary
  67     expressions into a vector of operand_entry_t
  68
  69     3. Optimization of the operand lists, eliminating things like a +
  70     -a, a & a, etc.
  71
  72     3a. Combine repeated factors with the same occurrence counts
  73     into a __builtin_powi call that will later be optimized into
  74     an optimal number of multiplies.
  75
  76     4. Rewrite the expression trees we linearized and optimized so
  77     they are in proper rank order.
  78
  79     5. Repropagate negates, as nothing else will clean it up ATM.
  80
  81     A bit of theory on #4, since nobody seems to write anything down
  82     about why it makes sense to do it the way they do it:
  83
  84     We could do this much nicer theoretically, but don't (for reasons
  85     explained after how to do it theoretically nice :P).
  86
  87     In order to promote the most redundancy elimination, you want
  88     binary expressions whose operands are the same rank (or
  89     preferably, the same value) exposed to the redundancy eliminator,
  90     for possible elimination.
  91
  92     So the way to do this if we really cared, is to build the new op
  93     tree from the leaves to the roots, merging as you go, and putting the
  94     new op on the end of the worklist, until you are left with one
  95     thing on the worklist.
  96
  97     IE if you have to rewrite the following set of operands (listed with
  98     rank in parentheses), with opcode PLUS_EXPR:
  99
 100     a (1),  b (1),  c (1),  d (2), e (2)
 101
 102
 103     We start with our merge worklist empty, and the ops list with all of
 104     those on it.
 105
 106     You want to first merge all leaves of the same rank, as much as
 107     possible.
 108
 109     So first build a binary op of
 110
 111     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 112
 113     Because there is no three operand form of PLUS_EXPR, c is not going to
 114     be exposed to redundancy elimination as a rank 1 operand.
 115
 116     So you might as well throw it on the merge worklist (you could also
 117     consider it to now be a rank two operand, and merge it with d and e,
 118     but in this case, you then have evicted e from a binary op. So at
 119     least in this situation, you can't win.)
 120
 121     Then build a binary op of d + e
 122     mergetmp2 = d + e
 123
 124     and put mergetmp2 on the merge worklist.
 125
 126     so merge worklist = {mergetmp, c, mergetmp2}
 127
 128     Continue building binary ops of these operations until you have only
 129     one operation left on the worklist.
 130
 131     So we have
 132
 133     build binary op
 134     mergetmp3 = mergetmp + c
 135
 136     worklist = {mergetmp2, mergetmp3}
 137
 138     mergetmp4 = mergetmp2 + mergetmp3
 139
 140     worklist = {mergetmp4}
 141
 142     because we have one operation left, we can now just set the original
 143     statement equal to the result of that operation.
 144
 145     This will at least expose a + b  and d + e to redundancy elimination
 146     as binary operations.
 147
 148     For extra points, you can reuse the old statements to build the
 149     mergetmps, since you shouldn't run out.
 150
 151     So why don't we do this?
 152
 153     Because it's expensive, and rarely will help.  Most trees we are
 154     reassociating have 3 or less ops.  If they have 2 ops, they already
 155     will be written into a nice single binary op.  If you have 3 ops, a
 156     single simple check suffices to tell you whether the first two are of the
 157     same rank.  If so, you know to order it
 158
 159     mergetmp = op1 + op2
 160     newstmt = mergetmp + op3
 161
 162     instead of
 163     mergetmp = op2 + op3
 164     newstmt = mergetmp + op1
 165
 166     If all three are of the same rank, you can't expose them all in a
 167     single binary operator anyway, so the above is *still* the best you
 168     can do.
 169
 170     Thus, this is what we do.  When we have three ops left, we check to see
 171     what order to put them in, and call it a day.  As a nod to vector sum
 172     reduction, we check if any of the ops are really a phi node that is a
 173     destructive update for the associating op, and keep the destructive
 174     update together for vector sum reduction recognition.  */
 175
 176
 177 /* Statistics */
 178 static struct
 179 {
 180   int linearized;
 181   int constants_eliminated;
 182   int ops_eliminated;
 183   int rewritten;
 184   int pows_encountered;
 185   int pows_created;
 186 } reassociate_stats;
 187
 188 /* Operator, rank pair.  */
 189 typedef struct operand_entry
 190 {
 191   unsigned int rank;
 192   int id;
 193   tree op;
 194   unsigned int count;
 195 } *operand_entry_t;
 196
 197 static alloc_pool operand_entry_pool;
 198
 199 /* This is used to assign a unique ID to each struct operand_entry
 200    so that qsort results are identical on different hosts.  */
 201 static int next_operand_entry_id;
 202
 203 /* Starting rank number for a given basic block, so that we can rank
 204    operations using unmovable instructions in that BB based on the bb
 205    depth.  */
 206 static long *bb_rank;
 207
 208 /* Operand->rank hashtable.  */
 209 static struct pointer_map_t *operand_rank;
 210
 211 /* Forward decls.  */
 212 static long get_rank (tree);
 213
 214
 215 /* Bias amount for loop-carried phis.  We want this to be larger than
 216    the depth of any reassociation tree we can see, but not larger than
 217    the rank difference between two blocks.  */
 218 #define PHI_LOOP_BIAS (1 << 15)
 219
 220 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 221    an innermost loop, and the phi has only a single use which is inside
 222    the loop, then the rank is the block rank of the loop latch plus an
 223    extra bias for the loop-carried dependence.  This causes expressions
 224    calculated into an accumulator variable to be independent for each
 225    iteration of the loop.  If STMT is some other phi, the rank is the
 226    block rank of its containing block.  */
 227 static long
 228 phi_rank (gimple stmt)
 229 {
 230   basic_block bb = gimple_bb (stmt);
 231   struct loop *father = bb->loop_father;
 232   tree res;
 233   unsigned i;
 234   use_operand_p use;
 235   gimple use_stmt;
 236
 237   /* We only care about real loops (those with a latch).  */
 238   if (!father->latch)
 239     return bb_rank[bb->index];
 240
 241   /* Interesting phis must be in headers of innermost loops.  */
 242   if (bb != father->header
 243       || father->inner)
 244     return bb_rank[bb->index];
 245
 246   /* Ignore virtual SSA_NAMEs.  */
 247   res = gimple_phi_result (stmt);
 248   if (virtual_operand_p (res))
 249     return bb_rank[bb->index];
 250
 251   /* The phi definition must have a single use, and that use must be
 252      within the loop.  Otherwise this isn't an accumulator pattern.  */
 253   if (!single_imm_use (res, &use, &use_stmt)
 254       || gimple_bb (use_stmt)->loop_father != father)
 255     return bb_rank[bb->index];
 256
 257   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 258   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 259     {
 260       tree arg = gimple_phi_arg_def (stmt, i);
 261       if (TREE_CODE (arg) == SSA_NAME
 262           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 263         {
 264           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 265           if (gimple_bb (def_stmt)->loop_father == father)
 266             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 267         }
 268     }
 269
 270   /* Must be an uninteresting phi.  */
 271   return bb_rank[bb->index];
 272 }
 273
 274 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 275    loop-carried dependence of an innermost loop, return TRUE; else
 276    return FALSE.  */
 277 static bool
 278 loop_carried_phi (tree exp)
 279 {
 280   gimple phi_stmt;
 281   long block_rank;
 282
 283   if (TREE_CODE (exp) != SSA_NAME
 284       || SSA_NAME_IS_DEFAULT_DEF (exp))
 285     return false;
 286
 287   phi_stmt = SSA_NAME_DEF_STMT (exp);
 288
 289   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 290     return false;
 291
 292   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 293      an additional bias added in.  If this phi doesn't have block rank,
 294      it's biased and should not be propagated.  */
 295   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 296
 297   if (phi_rank (phi_stmt) != block_rank)
 298     return true;
 299
 300   return false;
 301 }
 302
 303 /* Return the maximum of RANK and the rank that should be propagated
 304    from expression OP.  For most operands, this is just the rank of OP.
 305    For loop-carried phis, the value is zero to avoid undoing the bias
 306    in favor of the phi.  */
 307 static long
 308 propagate_rank (long rank, tree op)
 309 {
 310   long op_rank;
 311
 312   if (loop_carried_phi (op))
 313     return rank;
 314
 315   op_rank = get_rank (op);
 316
 317   return MAX (rank, op_rank);
 318 }
 319
 320 /* Look up the operand rank structure for expression E.  */
 321
 322 static inline long
 323 find_operand_rank (tree e)
 324 {
 325   void **slot = pointer_map_contains (operand_rank, e);
 326   return slot ? (long) (intptr_t) *slot : -1;
 327 }
 328
 329 /* Insert {E,RANK} into the operand rank hashtable.  */
 330
 331 static inline void
 332 insert_operand_rank (tree e, long rank)
 333 {
 334   void **slot;
 335   gcc_assert (rank > 0);
 336   slot = pointer_map_insert (operand_rank, e);
 337   gcc_assert (!*slot);
 338   *slot = (void *) (intptr_t) rank;
 339 }
 340
 341 /* Given an expression E, return the rank of the expression.  */
 342
 343 static long
 344 get_rank (tree e)
 345 {
 346   /* Constants have rank 0.  */
 347   if (is_gimple_min_invariant (e))
 348     return 0;
 349
 350   /* SSA_NAME's have the rank of the expression they are the result
 351      of.
 352      For globals and uninitialized values, the rank is 0.
 353      For function arguments, use the pre-setup rank.
 354      For PHI nodes, stores, asm statements, etc, we use the rank of
 355      the BB.
 356      For simple operations, the rank is the maximum rank of any of
 357      its operands, or the bb_rank, whichever is less.
 358      I make no claims that this is optimal, however, it gives good
 359      results.  */
 360
 361   /* We make an exception to the normal ranking system to break
 362      dependences of accumulator variables in loops.  Suppose we
 363      have a simple one-block loop containing:
 364
 365        x_1 = phi(x_0, x_2)
 366        b = a + x_1
 367        c = b + d
 368        x_2 = c + e
 369
 370      As shown, each iteration of the calculation into x is fully
 371      dependent upon the iteration before it.  We would prefer to
 372      see this in the form:
 373
 374        x_1 = phi(x_0, x_2)
 375        b = a + d
 376        c = b + e
 377        x_2 = c + x_1
 378
 379      If the loop is unrolled, the calculations of b and c from
 380      different iterations can be interleaved.
 381
 382      To obtain this result during reassociation, we bias the rank
 383      of the phi definition x_1 upward, when it is recognized as an
 384      accumulator pattern.  The artificial rank causes it to be
 385      added last, providing the desired independence.  */
 386
 387   if (TREE_CODE (e) == SSA_NAME)
 388     {
 389       gimple stmt;
 390       long rank;
 391       int i, n;
 392       tree op;
 393
 394       if (SSA_NAME_IS_DEFAULT_DEF (e))
 395         return find_operand_rank (e);
 396
 397       stmt = SSA_NAME_DEF_STMT (e);
 398       if (gimple_code (stmt) == GIMPLE_PHI)
 399         return phi_rank (stmt);
 400
 401       if (!is_gimple_assign (stmt)
 402           || gimple_vdef (stmt))
 403         return bb_rank[gimple_bb (stmt)->index];
 404
 405       /* If we already have a rank for this expression, use that.  */
 406       rank = find_operand_rank (e);
 407       if (rank != -1)
 408         return rank;
 409
 410       /* Otherwise, find the maximum rank for the operands.  As an
 411          exception, remove the bias from loop-carried phis when propagating
 412          the rank so that dependent operations are not also biased.  */
 413       rank = 0;
 414       if (gimple_assign_single_p (stmt))
 415         {
 416           tree rhs = gimple_assign_rhs1 (stmt);
 417           n = TREE_OPERAND_LENGTH (rhs);
 418           if (n == 0)
 419             rank = propagate_rank (rank, rhs);
 420           else
 421             {
 422               for (i = 0; i < n; i++)
 423                 {
 424                   op = TREE_OPERAND (rhs, i);
 425
 426                   if (op != NULL_TREE)
 427                     rank = propagate_rank (rank, op);
 428                 }
 429             }
 430         }
 431       else
 432         {
 433           n = gimple_num_ops (stmt);
 434           for (i = 1; i < n; i++)
 435             {
 436               op = gimple_op (stmt, i);
 437               gcc_assert (op);
 438               rank = propagate_rank (rank, op);
 439             }
 440         }
 441
 442       if (dump_file && (dump_flags & TDF_DETAILS))
 443         {
 444           fprintf (dump_file, "Rank for ");
 445           print_generic_expr (dump_file, e, 0);
 446           fprintf (dump_file, " is %ld\n", (rank + 1));
 447         }
 448
 449       /* Note the rank in the hashtable so we don't recompute it.  */
 450       insert_operand_rank (e, (rank + 1));
 451       return (rank + 1);
 452     }
 453
 454   /* Globals, etc,  are rank 0 */
 455   return 0;
 456 }
 457
 458
 459 /* We want integer ones to end up last no matter what, since they are
 460    the ones we can do the most with.  */
 461 #define INTEGER_CONST_TYPE 1 << 3
 462 #define FLOAT_CONST_TYPE 1 << 2
 463 #define OTHER_CONST_TYPE 1 << 1
 464
 465 /* Classify an invariant tree into integer, float, or other, so that
 466    we can sort them to be near other constants of the same type.  */
 467 static inline int
 468 constant_type (tree t)
 469 {
 470   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 471     return INTEGER_CONST_TYPE;
 472   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 473     return FLOAT_CONST_TYPE;
 474   else
 475     return OTHER_CONST_TYPE;
 476 }
 477
 478 /* qsort comparison function to sort operand entries PA and PB by rank
 479    so that the sorted array is ordered by rank in decreasing order.  */
 480 static int
 481 sort_by_operand_rank (const void *pa, const void *pb)
 482 {
 483   const operand_entry_t oea = *(const operand_entry_t *)pa;
 484   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 485
 486   /* It's nicer for optimize_expression if constants that are likely
 487      to fold when added/multiplied//whatever are put next to each
 488      other.  Since all constants have rank 0, order them by type.  */
 489   if (oeb->rank == 0 && oea->rank == 0)
 490     {
 491       if (constant_type (oeb->op) != constant_type (oea->op))
 492         return constant_type (oeb->op) - constant_type (oea->op);
 493       else
 494         /* To make sorting result stable, we use unique IDs to determine
 495            order.  */
 496         return oeb->id - oea->id;
 497     }
 498
 499   /* Lastly, make sure the versions that are the same go next to each
 500      other.  We use SSA_NAME_VERSION because it's stable.  */
 501   if ((oeb->rank - oea->rank == 0)
 502       && TREE_CODE (oea->op) == SSA_NAME
 503       && TREE_CODE (oeb->op) == SSA_NAME)
 504     {
 505       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 506         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 507       else
 508         return oeb->id - oea->id;
 509     }
 510
 511   if (oeb->rank != oea->rank)
 512     return oeb->rank - oea->rank;
 513   else
 514     return oeb->id - oea->id;
 515 }
 516
 517 /* Add an operand entry to *OPS for the tree operand OP.  */
 518
 519 static void
 520 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 521 {
 522   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 523
 524   oe->op = op;
 525   oe->rank = get_rank (op);
 526   oe->id = next_operand_entry_id++;
 527   oe->count = 1;
 528   ops->safe_push (oe);
 529 }
 530
 531 /* Add an operand entry to *OPS for the tree operand OP with repeat
 532    count REPEAT.  */
 533
 534 static void
 535 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 536                        HOST_WIDE_INT repeat)
 537 {
 538   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 539
 540   oe->op = op;
 541   oe->rank = get_rank (op);
 542   oe->id = next_operand_entry_id++;
 543   oe->count = repeat;
 544   ops->safe_push (oe);
 545
 546   reassociate_stats.pows_encountered++;
 547 }
 548
 549 /* Return true if STMT is reassociable operation containing a binary
 550    operation with tree code CODE, and is inside LOOP.  */
 551
 552 static bool
 553 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 554 {
 555   basic_block bb = gimple_bb (stmt);
 556
 557   if (gimple_bb (stmt) == NULL)
 558     return false;
 559
 560   if (!flow_bb_inside_loop_p (loop, bb))
 561     return false;
 562
 563   if (is_gimple_assign (stmt)
 564       && gimple_assign_rhs_code (stmt) == code
 565       && has_single_use (gimple_assign_lhs (stmt)))
 566     return true;
 567
 568   return false;
 569 }
 570
 571
 572 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 573    operand of the negate operation.  Otherwise, return NULL.  */
 574
 575 static tree
 576 get_unary_op (tree name, enum tree_code opcode)
 577 {
 578   gimple stmt = SSA_NAME_DEF_STMT (name);
 579
 580   if (!is_gimple_assign (stmt))
 581     return NULL_TREE;
 582
 583   if (gimple_assign_rhs_code (stmt) == opcode)
 584     return gimple_assign_rhs1 (stmt);
 585   return NULL_TREE;
 586 }
 587
 588 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 589    eliminate through equivalences, do so, remove them from OPS, and
 590    return true.  Otherwise, return false.  */
 591
 592 static bool
 593 eliminate_duplicate_pair (enum tree_code opcode,
 594                           vec<operand_entry_t> *ops,
 595                           bool *all_done,
 596                           unsigned int i,
 597                           operand_entry_t curr,
 598                           operand_entry_t last)
 599 {
 600
 601   /* If we have two of the same op, and the opcode is & |, min, or max,
 602      we can eliminate one of them.
 603      If we have two of the same op, and the opcode is ^, we can
 604      eliminate both of them.  */
 605
 606   if (last && last->op == curr->op)
 607     {
 608       switch (opcode)
 609         {
 610         case MAX_EXPR:
 611         case MIN_EXPR:
 612         case BIT_IOR_EXPR:
 613         case BIT_AND_EXPR:
 614           if (dump_file && (dump_flags & TDF_DETAILS))
 615             {
 616               fprintf (dump_file, "Equivalence: ");
 617               print_generic_expr (dump_file, curr->op, 0);
 618               fprintf (dump_file, " [&|minmax] ");
 619               print_generic_expr (dump_file, last->op, 0);
 620               fprintf (dump_file, " -> ");
 621               print_generic_stmt (dump_file, last->op, 0);
 622             }
 623
 624           ops->ordered_remove (i);
 625           reassociate_stats.ops_eliminated ++;
 626
 627           return true;
 628
 629         case BIT_XOR_EXPR:
 630           if (dump_file && (dump_flags & TDF_DETAILS))
 631             {
 632               fprintf (dump_file, "Equivalence: ");
 633               print_generic_expr (dump_file, curr->op, 0);
 634               fprintf (dump_file, " ^ ");
 635               print_generic_expr (dump_file, last->op, 0);
 636               fprintf (dump_file, " -> nothing\n");
 637             }
 638
 639           reassociate_stats.ops_eliminated += 2;
 640
 641           if (ops->length () == 2)
 642             {
 643               ops->create (0);
 644               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 645               *all_done = true;
 646             }
 647           else
 648             {
 649               ops->ordered_remove (i-1);
 650               ops->ordered_remove (i-1);
 651             }
 652
 653           return true;
 654
 655         default:
 656           break;
 657         }
 658     }
 659   return false;
 660 }
 661
 662 static vec<tree> plus_negates;
 663
 664 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 665    expression, look in OPS for a corresponding positive operation to cancel
 666    it out.  If we find one, remove the other from OPS, replace
 667    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 668    return false. */
 669
 670 static bool
 671 eliminate_plus_minus_pair (enum tree_code opcode,
 672                            vec<operand_entry_t> *ops,
 673                            unsigned int currindex,
 674                            operand_entry_t curr)
 675 {
 676   tree negateop;
 677   tree notop;
 678   unsigned int i;
 679   operand_entry_t oe;
 680
 681   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 682     return false;
 683
 684   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 685   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 686   if (negateop == NULL_TREE && notop == NULL_TREE)
 687     return false;
 688
 689   /* Any non-negated version will have a rank that is one less than
 690      the current rank.  So once we hit those ranks, if we don't find
 691      one, we can stop.  */
 692
 693   for (i = currindex + 1;
 694        ops->iterate (i, &oe)
 695        && oe->rank >= curr->rank - 1 ;
 696        i++)
 697     {
 698       if (oe->op == negateop)
 699         {
 700
 701           if (dump_file && (dump_flags & TDF_DETAILS))
 702             {
 703               fprintf (dump_file, "Equivalence: ");
 704               print_generic_expr (dump_file, negateop, 0);
 705               fprintf (dump_file, " + -");
 706               print_generic_expr (dump_file, oe->op, 0);
 707               fprintf (dump_file, " -> 0\n");
 708             }
 709
 710           ops->ordered_remove (i);
 711           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 712           ops->ordered_remove (currindex);
 713           reassociate_stats.ops_eliminated ++;
 714
 715           return true;
 716         }
 717       else if (oe->op == notop)
 718         {
 719           tree op_type = TREE_TYPE (oe->op);
 720
 721           if (dump_file && (dump_flags & TDF_DETAILS))
 722             {
 723               fprintf (dump_file, "Equivalence: ");
 724               print_generic_expr (dump_file, notop, 0);
 725               fprintf (dump_file, " + ~");
 726               print_generic_expr (dump_file, oe->op, 0);
 727               fprintf (dump_file, " -> -1\n");
 728             }
 729
 730           ops->ordered_remove (i);
 731           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 732           ops->ordered_remove (currindex);
 733           reassociate_stats.ops_eliminated ++;
 734
 735           return true;
 736         }
 737     }
 738
 739   /* CURR->OP is a negate expr in a plus expr: save it for later
 740      inspection in repropagate_negates().  */
 741   if (negateop != NULL_TREE)
 742     plus_negates.safe_push (curr->op);
 743
 744   return false;
 745 }
 746
 747 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 748    bitwise not expression, look in OPS for a corresponding operand to
 749    cancel it out.  If we find one, remove the other from OPS, replace
 750    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 751    false. */
 752
 753 static bool
 754 eliminate_not_pairs (enum tree_code opcode,
 755                      vec<operand_entry_t> *ops,
 756                      unsigned int currindex,
 757                      operand_entry_t curr)
 758 {
 759   tree notop;
 760   unsigned int i;
 761   operand_entry_t oe;
 762
 763   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 764       || TREE_CODE (curr->op) != SSA_NAME)
 765     return false;
 766
 767   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 768   if (notop == NULL_TREE)
 769     return false;
 770
 771   /* Any non-not version will have a rank that is one less than
 772      the current rank.  So once we hit those ranks, if we don't find
 773      one, we can stop.  */
 774
 775   for (i = currindex + 1;
 776        ops->iterate (i, &oe)
 777        && oe->rank >= curr->rank - 1;
 778        i++)
 779     {
 780       if (oe->op == notop)
 781         {
 782           if (dump_file && (dump_flags & TDF_DETAILS))
 783             {
 784               fprintf (dump_file, "Equivalence: ");
 785               print_generic_expr (dump_file, notop, 0);
 786               if (opcode == BIT_AND_EXPR)
 787                 fprintf (dump_file, " & ~");
 788               else if (opcode == BIT_IOR_EXPR)
 789                 fprintf (dump_file, " | ~");
 790               print_generic_expr (dump_file, oe->op, 0);
 791               if (opcode == BIT_AND_EXPR)
 792                 fprintf (dump_file, " -> 0\n");
 793               else if (opcode == BIT_IOR_EXPR)
 794                 fprintf (dump_file, " -> -1\n");
 795             }
 796
 797           if (opcode == BIT_AND_EXPR)
 798             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 799           else if (opcode == BIT_IOR_EXPR)
 800             oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
 801                                           TYPE_PRECISION (TREE_TYPE (oe->op)));
 802
 803           reassociate_stats.ops_eliminated += ops->length () - 1;
 804           ops->truncate (0);
 805           ops->quick_push (oe);
 806           return true;
 807         }
 808     }
 809
 810   return false;
 811 }
 812
 813 /* Use constant value that may be present in OPS to try to eliminate
 814    operands.  Note that this function is only really used when we've
 815    eliminated ops for other reasons, or merged constants.  Across
 816    single statements, fold already does all of this, plus more.  There
 817    is little point in duplicating logic, so I've only included the
 818    identities that I could ever construct testcases to trigger.  */
 819
 820 static void
 821 eliminate_using_constants (enum tree_code opcode,
 822                            vec<operand_entry_t> *ops)
 823 {
 824   operand_entry_t oelast = ops->last ();
 825   tree type = TREE_TYPE (oelast->op);
 826
 827   if (oelast->rank == 0
 828       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 829     {
 830       switch (opcode)
 831         {
 832         case BIT_AND_EXPR:
 833           if (integer_zerop (oelast->op))
 834             {
 835               if (ops->length () != 1)
 836                 {
 837                   if (dump_file && (dump_flags & TDF_DETAILS))
 838                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 839
 840                   reassociate_stats.ops_eliminated += ops->length () - 1;
 841
 842                   ops->truncate (0);
 843                   ops->quick_push (oelast);
 844                   return;
 845                 }
 846             }
 847           else if (integer_all_onesp (oelast->op))
 848             {
 849               if (ops->length () != 1)
 850                 {
 851                   if (dump_file && (dump_flags & TDF_DETAILS))
 852                     fprintf (dump_file, "Found & -1, removing\n");
 853                   ops->pop ();
 854                   reassociate_stats.ops_eliminated++;
 855                 }
 856             }
 857           break;
 858         case BIT_IOR_EXPR:
 859           if (integer_all_onesp (oelast->op))
 860             {
 861               if (ops->length () != 1)
 862                 {
 863                   if (dump_file && (dump_flags & TDF_DETAILS))
 864                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 865
 866                   reassociate_stats.ops_eliminated += ops->length () - 1;
 867
 868                   ops->truncate (0);
 869                   ops->quick_push (oelast);
 870                   return;
 871                 }
 872             }
 873           else if (integer_zerop (oelast->op))
 874             {
 875               if (ops->length () != 1)
 876                 {
 877                   if (dump_file && (dump_flags & TDF_DETAILS))
 878                     fprintf (dump_file, "Found | 0, removing\n");
 879                   ops->pop ();
 880                   reassociate_stats.ops_eliminated++;
 881                 }
 882             }
 883           break;
 884         case MULT_EXPR:
 885           if (integer_zerop (oelast->op)
 886               || (FLOAT_TYPE_P (type)
 887                   && !HONOR_NANS (TYPE_MODE (type))
 888                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 889                   && real_zerop (oelast->op)))
 890             {
 891               if (ops->length () != 1)
 892                 {
 893                   if (dump_file && (dump_flags & TDF_DETAILS))
 894                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 895
 896                   reassociate_stats.ops_eliminated += ops->length () - 1;
 897                   ops->truncate (1);
 898                   ops->quick_push (oelast);
 899                   return;
 900                 }
 901             }
 902           else if (integer_onep (oelast->op)
 903                    || (FLOAT_TYPE_P (type)
 904                        && !HONOR_SNANS (TYPE_MODE (type))
 905                        && real_onep (oelast->op)))
 906             {
 907               if (ops->length () != 1)
 908                 {
 909                   if (dump_file && (dump_flags & TDF_DETAILS))
 910                     fprintf (dump_file, "Found * 1, removing\n");
 911                   ops->pop ();
 912                   reassociate_stats.ops_eliminated++;
 913                   return;
 914                 }
 915             }
 916           break;
 917         case BIT_XOR_EXPR:
 918         case PLUS_EXPR:
 919         case MINUS_EXPR:
 920           if (integer_zerop (oelast->op)
 921               || (FLOAT_TYPE_P (type)
 922                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 923                   && fold_real_zero_addition_p (type, oelast->op,
 924                                                 opcode == MINUS_EXPR)))
 925             {
 926               if (ops->length () != 1)
 927                 {
 928                   if (dump_file && (dump_flags & TDF_DETAILS))
 929                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 930                   ops->pop ();
 931                   reassociate_stats.ops_eliminated++;
 932                   return;
 933                 }
 934             }
 935           break;
 936         default:
 937           break;
 938         }
 939     }
 940 }
 941
 942
 943 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
 944                                  bool, bool);
 945
 946 /* Structure for tracking and counting operands.  */
 947 typedef struct oecount_s {
 948   int cnt;
 949   int id;
 950   enum tree_code oecode;
 951   tree op;
 952 } oecount;
 953
 954
 955 /* The heap for the oecount hashtable and the sorted list of operands.  */
 956 static vec<oecount> cvec;
 957
 958
 959 /* Oecount hashtable helpers.  */
 960
 961 struct oecount_hasher : typed_noop_remove <void>
 962 {
 963   /* Note that this hash table stores integers, not pointers.
 964      So, observe the casting in the member functions.  */
 965   typedef void value_type;
 966   typedef void compare_type;
 967   static inline hashval_t hash (const value_type *);
 968   static inline bool equal (const value_type *, const compare_type *);
 969 };
 970
 971 /* Hash function for oecount.  */
 972
 973 inline hashval_t
 974 oecount_hasher::hash (const value_type *p)
 975 {
 976   const oecount *c = &cvec[(size_t)p - 42];
 977   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 978 }
 979
 980 /* Comparison function for oecount.  */
 981
 982 inline bool
 983 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
 984 {
 985   const oecount *c1 = &cvec[(size_t)p1 - 42];
 986   const oecount *c2 = &cvec[(size_t)p2 - 42];
 987   return (c1->oecode == c2->oecode
 988           && c1->op == c2->op);
 989 }
 990
 991 /* Comparison function for qsort sorting oecount elements by count.  */
 992
 993 static int
 994 oecount_cmp (const void *p1, const void *p2)
 995 {
 996   const oecount *c1 = (const oecount *)p1;
 997   const oecount *c2 = (const oecount *)p2;
 998   if (c1->cnt != c2->cnt)
 999     return c1->cnt - c2->cnt;
1000   else
1001     /* If counts are identical, use unique IDs to stabilize qsort.  */
1002     return c1->id - c2->id;
1003 }
1004
1005 /* Return TRUE iff STMT represents a builtin call that raises OP
1006    to some exponent.  */
1007
1008 static bool
1009 stmt_is_power_of_op (gimple stmt, tree op)
1010 {
1011   tree fndecl;
1012
1013   if (!is_gimple_call (stmt))
1014     return false;
1015
1016   fndecl = gimple_call_fndecl (stmt);
1017
1018   if (!fndecl
1019       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1020     return false;
1021
1022   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1023     {
1024     CASE_FLT_FN (BUILT_IN_POW):
1025     CASE_FLT_FN (BUILT_IN_POWI):
1026       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1027
1028     default:
1029       return false;
1030     }
1031 }
1032
1033 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1034    in place and return the result.  Assumes that stmt_is_power_of_op
1035    was previously called for STMT and returned TRUE.  */
1036
1037 static HOST_WIDE_INT
1038 decrement_power (gimple stmt)
1039 {
1040   REAL_VALUE_TYPE c, cint;
1041   HOST_WIDE_INT power;
1042   tree arg1;
1043
1044   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1045     {
1046     CASE_FLT_FN (BUILT_IN_POW):
1047       arg1 = gimple_call_arg (stmt, 1);
1048       c = TREE_REAL_CST (arg1);
1049       power = real_to_integer (&c) - 1;
1050       real_from_integer (&cint, VOIDmode, power, 0, 0);
1051       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1052       return power;
1053
1054     CASE_FLT_FN (BUILT_IN_POWI):
1055       arg1 = gimple_call_arg (stmt, 1);
1056       power = TREE_INT_CST_LOW (arg1) - 1;
1057       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1058       return power;
1059
1060     default:
1061       gcc_unreachable ();
1062     }
1063 }
1064
1065 /* Find the single immediate use of STMT's LHS, and replace it
1066    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1067    replace *DEF with OP as well.  */
1068
1069 static void
1070 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1071 {
1072   tree lhs;
1073   gimple use_stmt;
1074   use_operand_p use;
1075   gimple_stmt_iterator gsi;
1076
1077   if (is_gimple_call (stmt))
1078     lhs = gimple_call_lhs (stmt);
1079   else
1080     lhs = gimple_assign_lhs (stmt);
1081
1082   gcc_assert (has_single_use (lhs));
1083   single_imm_use (lhs, &use, &use_stmt);
1084   if (lhs == *def)
1085     *def = op;
1086   SET_USE (use, op);
1087   if (TREE_CODE (op) != SSA_NAME)
1088     update_stmt (use_stmt);
1089   gsi = gsi_for_stmt (stmt);
1090   unlink_stmt_vdef (stmt);
1091   gsi_remove (&gsi, true);
1092   release_defs (stmt);
1093 }
1094
1095 /* Walks the linear chain with result *DEF searching for an operation
1096    with operand OP and code OPCODE removing that from the chain.  *DEF
1097    is updated if there is only one operand but no operation left.  */
1098
1099 static void
1100 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1101 {
1102   gimple stmt = SSA_NAME_DEF_STMT (*def);
1103
1104   do
1105     {
1106       tree name;
1107
1108       if (opcode == MULT_EXPR
1109           && stmt_is_power_of_op (stmt, op))
1110         {
1111           if (decrement_power (stmt) == 1)
1112             propagate_op_to_single_use (op, stmt, def);
1113           return;
1114         }
1115
1116       name = gimple_assign_rhs1 (stmt);
1117
1118       /* If this is the operation we look for and one of the operands
1119          is ours simply propagate the other operand into the stmts
1120          single use.  */
1121       if (gimple_assign_rhs_code (stmt) == opcode
1122           && (name == op
1123               || gimple_assign_rhs2 (stmt) == op))
1124         {
1125           if (name == op)
1126             name = gimple_assign_rhs2 (stmt);
1127           propagate_op_to_single_use (name, stmt, def);
1128           return;
1129         }
1130
1131       /* We might have a multiply of two __builtin_pow* calls, and
1132          the operand might be hiding in the rightmost one.  */
1133       if (opcode == MULT_EXPR
1134           && gimple_assign_rhs_code (stmt) == opcode
1135           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1136           && has_single_use (gimple_assign_rhs2 (stmt)))
1137         {
1138           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1139           if (stmt_is_power_of_op (stmt2, op))
1140             {
1141               if (decrement_power (stmt2) == 1)
1142                 propagate_op_to_single_use (op, stmt2, def);
1143               return;
1144             }
1145         }
1146
1147       /* Continue walking the chain.  */
1148       gcc_assert (name != op
1149                   && TREE_CODE (name) == SSA_NAME);
1150       stmt = SSA_NAME_DEF_STMT (name);
1151     }
1152   while (1);
1153 }
1154
1155 /* Returns true if statement S1 dominates statement S2.  Like
1156    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1157
1158 static bool
1159 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1160 {
1161   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1162
1163   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1164      SSA_NAME.  Assume it lives at the beginning of function and
1165      thus dominates everything.  */
1166   if (!bb1 || s1 == s2)
1167     return true;
1168
1169   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1170   if (!bb2)
1171     return false;
1172
1173   if (bb1 == bb2)
1174     {
1175       /* PHIs in the same basic block are assumed to be
1176          executed all in parallel, if only one stmt is a PHI,
1177          it dominates the other stmt in the same basic block.  */
1178       if (gimple_code (s1) == GIMPLE_PHI)
1179         return true;
1180
1181       if (gimple_code (s2) == GIMPLE_PHI)
1182         return false;
1183
1184       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1185
1186       if (gimple_uid (s1) < gimple_uid (s2))
1187         return true;
1188
1189       if (gimple_uid (s1) > gimple_uid (s2))
1190         return false;
1191
1192       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1193       unsigned int uid = gimple_uid (s1);
1194       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1195         {
1196           gimple s = gsi_stmt (gsi);
1197           if (gimple_uid (s) != uid)
1198             break;
1199           if (s == s2)
1200             return true;
1201         }
1202
1203       return false;
1204     }
1205
1206   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1207 }
1208
1209 /* Insert STMT after INSERT_POINT.  */
1210
1211 static void
1212 insert_stmt_after (gimple stmt, gimple insert_point)
1213 {
1214   gimple_stmt_iterator gsi;
1215   basic_block bb;
1216
1217   if (gimple_code (insert_point) == GIMPLE_PHI)
1218     bb = gimple_bb (insert_point);
1219   else if (!stmt_ends_bb_p (insert_point))
1220     {
1221       gsi = gsi_for_stmt (insert_point);
1222       gimple_set_uid (stmt, gimple_uid (insert_point));
1223       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1224       return;
1225     }
1226   else
1227     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1228        thus if it must end a basic block, it should be a call that can
1229        throw, or some assignment that can throw.  If it throws, the LHS
1230        of it will not be initialized though, so only valid places using
1231        the SSA_NAME should be dominated by the fallthru edge.  */
1232     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1233   gsi = gsi_after_labels (bb);
1234   if (gsi_end_p (gsi))
1235     {
1236       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1237       gimple_set_uid (stmt,
1238                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1239     }
1240   else
1241     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1242   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1243 }
1244
1245 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1246    the result.  Places the statement after the definition of either
1247    OP1 or OP2.  Returns the new statement.  */
1248
1249 static gimple
1250 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1251 {
1252   gimple op1def = NULL, op2def = NULL;
1253   gimple_stmt_iterator gsi;
1254   tree op;
1255   gimple sum;
1256
1257   /* Create the addition statement.  */
1258   op = make_ssa_name (type, NULL);
1259   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1260
1261   /* Find an insertion place and insert.  */
1262   if (TREE_CODE (op1) == SSA_NAME)
1263     op1def = SSA_NAME_DEF_STMT (op1);
1264   if (TREE_CODE (op2) == SSA_NAME)
1265     op2def = SSA_NAME_DEF_STMT (op2);
1266   if ((!op1def || gimple_nop_p (op1def))
1267       && (!op2def || gimple_nop_p (op2def)))
1268     {
1269       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1270       if (gsi_end_p (gsi))
1271         {
1272           gimple_stmt_iterator gsi2
1273             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR));
1274           gimple_set_uid (sum,
1275                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1276         }
1277       else
1278         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1279       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1280     }
1281   else
1282     {
1283       gimple insert_point;
1284       if ((!op1def || gimple_nop_p (op1def))
1285            || (op2def && !gimple_nop_p (op2def)
1286                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1287         insert_point = op2def;
1288       else
1289         insert_point = op1def;
1290       insert_stmt_after (sum, insert_point);
1291     }
1292   update_stmt (sum);
1293
1294   return sum;
1295 }
1296
1297 /* Perform un-distribution of divisions and multiplications.
1298    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1299    to (A + B) / X for real X.
1300
1301    The algorithm is organized as follows.
1302
1303     - First we walk the addition chain *OPS looking for summands that
1304       are defined by a multiplication or a real division.  This results
1305       in the candidates bitmap with relevant indices into *OPS.
1306
1307     - Second we build the chains of multiplications or divisions for
1308       these candidates, counting the number of occurrences of (operand, code)
1309       pairs in all of the candidates chains.
1310
1311     - Third we sort the (operand, code) pairs by number of occurrence and
1312       process them starting with the pair with the most uses.
1313
1314       * For each such pair we walk the candidates again to build a
1315         second candidate bitmap noting all multiplication/division chains
1316         that have at least one occurrence of (operand, code).
1317
1318       * We build an alternate addition chain only covering these
1319         candidates with one (operand, code) operation removed from their
1320         multiplication/division chain.
1321
1322       * The first candidate gets replaced by the alternate addition chain
1323         multiplied/divided by the operand.
1324
1325       * All candidate chains get disabled for further processing and
1326         processing of (operand, code) pairs continues.
1327
1328   The alternate addition chains built are re-processed by the main
1329   reassociation algorithm which allows optimizing a * x * y + b * y * x
1330   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1331
1332 static bool
1333 undistribute_ops_list (enum tree_code opcode,
1334                        vec<operand_entry_t> *ops, struct loop *loop)
1335 {
1336   unsigned int length = ops->length ();
1337   operand_entry_t oe1;
1338   unsigned i, j;
1339   sbitmap candidates, candidates2;
1340   unsigned nr_candidates, nr_candidates2;
1341   sbitmap_iterator sbi0;
1342   vec<operand_entry_t> *subops;
1343   hash_table <oecount_hasher> ctable;
1344   bool changed = false;
1345   int next_oecount_id = 0;
1346
1347   if (length <= 1
1348       || opcode != PLUS_EXPR)
1349     return false;
1350
1351   /* Build a list of candidates to process.  */
1352   candidates = sbitmap_alloc (length);
1353   bitmap_clear (candidates);
1354   nr_candidates = 0;
1355   FOR_EACH_VEC_ELT (*ops, i, oe1)
1356     {
1357       enum tree_code dcode;
1358       gimple oe1def;
1359
1360       if (TREE_CODE (oe1->op) != SSA_NAME)
1361         continue;
1362       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1363       if (!is_gimple_assign (oe1def))
1364         continue;
1365       dcode = gimple_assign_rhs_code (oe1def);
1366       if ((dcode != MULT_EXPR
1367            && dcode != RDIV_EXPR)
1368           || !is_reassociable_op (oe1def, dcode, loop))
1369         continue;
1370
1371       bitmap_set_bit (candidates, i);
1372       nr_candidates++;
1373     }
1374
1375   if (nr_candidates < 2)
1376     {
1377       sbitmap_free (candidates);
1378       return false;
1379     }
1380
1381   if (dump_file && (dump_flags & TDF_DETAILS))
1382     {
1383       fprintf (dump_file, "searching for un-distribute opportunities ");
1384       print_generic_expr (dump_file,
1385         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1386       fprintf (dump_file, " %d\n", nr_candidates);
1387     }
1388
1389   /* Build linearized sub-operand lists and the counting table.  */
1390   cvec.create (0);
1391   ctable.create (15);
1392   /* ??? Macro arguments cannot have multi-argument template types in
1393      them.  This typedef is needed to workaround that limitation.  */
1394   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1395   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1396   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1397     {
1398       gimple oedef;
1399       enum tree_code oecode;
1400       unsigned j;
1401
1402       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1403       oecode = gimple_assign_rhs_code (oedef);
1404       linearize_expr_tree (&subops[i], oedef,
1405                            associative_tree_code (oecode), false);
1406
1407       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1408         {
1409           oecount c;
1410           void **slot;
1411           size_t idx;
1412           c.oecode = oecode;
1413           c.cnt = 1;
1414           c.id = next_oecount_id++;
1415           c.op = oe1->op;
1416           cvec.safe_push (c);
1417           idx = cvec.length () + 41;
1418           slot = ctable.find_slot ((void *)idx, INSERT);
1419           if (!*slot)
1420             {
1421               *slot = (void *)idx;
1422             }
1423           else
1424             {
1425               cvec.pop ();
1426               cvec[(size_t)*slot - 42].cnt++;
1427             }
1428         }
1429     }
1430   ctable.dispose ();
1431
1432   /* Sort the counting table.  */
1433   cvec.qsort (oecount_cmp);
1434
1435   if (dump_file && (dump_flags & TDF_DETAILS))
1436     {
1437       oecount *c;
1438       fprintf (dump_file, "Candidates:\n");
1439       FOR_EACH_VEC_ELT (cvec, j, c)
1440         {
1441           fprintf (dump_file, "  %u %s: ", c->cnt,
1442                    c->oecode == MULT_EXPR
1443                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1444           print_generic_expr (dump_file, c->op, 0);
1445           fprintf (dump_file, "\n");
1446         }
1447     }
1448
1449   /* Process the (operand, code) pairs in order of most occurrence.  */
1450   candidates2 = sbitmap_alloc (length);
1451   while (!cvec.is_empty ())
1452     {
1453       oecount *c = &cvec.last ();
1454       if (c->cnt < 2)
1455         break;
1456
1457       /* Now collect the operands in the outer chain that contain
1458          the common operand in their inner chain.  */
1459       bitmap_clear (candidates2);
1460       nr_candidates2 = 0;
1461       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1462         {
1463           gimple oedef;
1464           enum tree_code oecode;
1465           unsigned j;
1466           tree op = (*ops)[i]->op;
1467
1468           /* If we undistributed in this chain already this may be
1469              a constant.  */
1470           if (TREE_CODE (op) != SSA_NAME)
1471             continue;
1472
1473           oedef = SSA_NAME_DEF_STMT (op);
1474           oecode = gimple_assign_rhs_code (oedef);
1475           if (oecode != c->oecode)
1476             continue;
1477
1478           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1479             {
1480               if (oe1->op == c->op)
1481                 {
1482                   bitmap_set_bit (candidates2, i);
1483                   ++nr_candidates2;
1484                   break;
1485                 }
1486             }
1487         }
1488
1489       if (nr_candidates2 >= 2)
1490         {
1491           operand_entry_t oe1, oe2;
1492           gimple prod;
1493           int first = bitmap_first_set_bit (candidates2);
1494
1495           /* Build the new addition chain.  */
1496           oe1 = (*ops)[first];
1497           if (dump_file && (dump_flags & TDF_DETAILS))
1498             {
1499               fprintf (dump_file, "Building (");
1500               print_generic_expr (dump_file, oe1->op, 0);
1501             }
1502           zero_one_operation (&oe1->op, c->oecode, c->op);
1503           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1504             {
1505               gimple sum;
1506               oe2 = (*ops)[i];
1507               if (dump_file && (dump_flags & TDF_DETAILS))
1508                 {
1509                   fprintf (dump_file, " + ");
1510                   print_generic_expr (dump_file, oe2->op, 0);
1511                 }
1512               zero_one_operation (&oe2->op, c->oecode, c->op);
1513               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1514                                        oe1->op, oe2->op, opcode);
1515               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1516               oe2->rank = 0;
1517               oe1->op = gimple_get_lhs (sum);
1518             }
1519
1520           /* Apply the multiplication/division.  */
1521           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1522                                     oe1->op, c->op, c->oecode);
1523           if (dump_file && (dump_flags & TDF_DETAILS))
1524             {
1525               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1526               print_generic_expr (dump_file, c->op, 0);
1527               fprintf (dump_file, "\n");
1528             }
1529
1530           /* Record it in the addition chain and disable further
1531              undistribution with this op.  */
1532           oe1->op = gimple_assign_lhs (prod);
1533           oe1->rank = get_rank (oe1->op);
1534           subops[first].release ();
1535
1536           changed = true;
1537         }
1538
1539       cvec.pop ();
1540     }
1541
1542   for (i = 0; i < ops->length (); ++i)
1543     subops[i].release ();
1544   free (subops);
1545   cvec.release ();
1546   sbitmap_free (candidates);
1547   sbitmap_free (candidates2);
1548
1549   return changed;
1550 }
1551
1552 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1553    expression, examine the other OPS to see if any of them are comparisons
1554    of the same values, which we may be able to combine or eliminate.
1555    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1556
1557 static bool
1558 eliminate_redundant_comparison (enum tree_code opcode,
1559                                 vec<operand_entry_t> *ops,
1560                                 unsigned int currindex,
1561                                 operand_entry_t curr)
1562 {
1563   tree op1, op2;
1564   enum tree_code lcode, rcode;
1565   gimple def1, def2;
1566   int i;
1567   operand_entry_t oe;
1568
1569   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1570     return false;
1571
1572   /* Check that CURR is a comparison.  */
1573   if (TREE_CODE (curr->op) != SSA_NAME)
1574     return false;
1575   def1 = SSA_NAME_DEF_STMT (curr->op);
1576   if (!is_gimple_assign (def1))
1577     return false;
1578   lcode = gimple_assign_rhs_code (def1);
1579   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1580     return false;
1581   op1 = gimple_assign_rhs1 (def1);
1582   op2 = gimple_assign_rhs2 (def1);
1583
1584   /* Now look for a similar comparison in the remaining OPS.  */
1585   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1586     {
1587       tree t;
1588
1589       if (TREE_CODE (oe->op) != SSA_NAME)
1590         continue;
1591       def2 = SSA_NAME_DEF_STMT (oe->op);
1592       if (!is_gimple_assign (def2))
1593         continue;
1594       rcode = gimple_assign_rhs_code (def2);
1595       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1596         continue;
1597
1598       /* If we got here, we have a match.  See if we can combine the
1599          two comparisons.  */
1600       if (opcode == BIT_IOR_EXPR)
1601         t = maybe_fold_or_comparisons (lcode, op1, op2,
1602                                        rcode, gimple_assign_rhs1 (def2),
1603                                        gimple_assign_rhs2 (def2));
1604       else
1605         t = maybe_fold_and_comparisons (lcode, op1, op2,
1606                                         rcode, gimple_assign_rhs1 (def2),
1607                                         gimple_assign_rhs2 (def2));
1608       if (!t)
1609         continue;
1610
1611       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1612          always give us a boolean_type_node value back.  If the original
1613          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1614          we need to convert.  */
1615       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1616         t = fold_convert (TREE_TYPE (curr->op), t);
1617
1618       if (TREE_CODE (t) != INTEGER_CST
1619           && !operand_equal_p (t, curr->op, 0))
1620         {
1621           enum tree_code subcode;
1622           tree newop1, newop2;
1623           if (!COMPARISON_CLASS_P (t))
1624             continue;
1625           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1626           STRIP_USELESS_TYPE_CONVERSION (newop1);
1627           STRIP_USELESS_TYPE_CONVERSION (newop2);
1628           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1629             continue;
1630         }
1631
1632       if (dump_file && (dump_flags & TDF_DETAILS))
1633         {
1634           fprintf (dump_file, "Equivalence: ");
1635           print_generic_expr (dump_file, curr->op, 0);
1636           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1637           print_generic_expr (dump_file, oe->op, 0);
1638           fprintf (dump_file, " -> ");
1639           print_generic_expr (dump_file, t, 0);
1640           fprintf (dump_file, "\n");
1641         }
1642
1643       /* Now we can delete oe, as it has been subsumed by the new combined
1644          expression t.  */
1645       ops->ordered_remove (i);
1646       reassociate_stats.ops_eliminated ++;
1647
1648       /* If t is the same as curr->op, we're done.  Otherwise we must
1649          replace curr->op with t.  Special case is if we got a constant
1650          back, in which case we add it to the end instead of in place of
1651          the current entry.  */
1652       if (TREE_CODE (t) == INTEGER_CST)
1653         {
1654           ops->ordered_remove (currindex);
1655           add_to_ops_vec (ops, t);
1656         }
1657       else if (!operand_equal_p (t, curr->op, 0))
1658         {
1659           gimple sum;
1660           enum tree_code subcode;
1661           tree newop1;
1662           tree newop2;
1663           gcc_assert (COMPARISON_CLASS_P (t));
1664           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1665           STRIP_USELESS_TYPE_CONVERSION (newop1);
1666           STRIP_USELESS_TYPE_CONVERSION (newop2);
1667           gcc_checking_assert (is_gimple_val (newop1)
1668                                && is_gimple_val (newop2));
1669           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1670           curr->op = gimple_get_lhs (sum);
1671         }
1672       return true;
1673     }
1674
1675   return false;
1676 }
1677
1678 /* Perform various identities and other optimizations on the list of
1679    operand entries, stored in OPS.  The tree code for the binary
1680    operation between all the operands is OPCODE.  */
1681
1682 static void
1683 optimize_ops_list (enum tree_code opcode,
1684                    vec<operand_entry_t> *ops)
1685 {
1686   unsigned int length = ops->length ();
1687   unsigned int i;
1688   operand_entry_t oe;
1689   operand_entry_t oelast = NULL;
1690   bool iterate = false;
1691
1692   if (length == 1)
1693     return;
1694
1695   oelast = ops->last ();
1696
1697   /* If the last two are constants, pop the constants off, merge them
1698      and try the next two.  */
1699   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1700     {
1701       operand_entry_t oelm1 = (*ops)[length - 2];
1702
1703       if (oelm1->rank == 0
1704           && is_gimple_min_invariant (oelm1->op)
1705           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1706                                        TREE_TYPE (oelast->op)))
1707         {
1708           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1709                                      oelm1->op, oelast->op);
1710
1711           if (folded && is_gimple_min_invariant (folded))
1712             {
1713               if (dump_file && (dump_flags & TDF_DETAILS))
1714                 fprintf (dump_file, "Merging constants\n");
1715
1716               ops->pop ();
1717               ops->pop ();
1718
1719               add_to_ops_vec (ops, folded);
1720               reassociate_stats.constants_eliminated++;
1721
1722               optimize_ops_list (opcode, ops);
1723               return;
1724             }
1725         }
1726     }
1727
1728   eliminate_using_constants (opcode, ops);
1729   oelast = NULL;
1730
1731   for (i = 0; ops->iterate (i, &oe);)
1732     {
1733       bool done = false;
1734
1735       if (eliminate_not_pairs (opcode, ops, i, oe))
1736         return;
1737       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1738           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1739           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1740         {
1741           if (done)
1742             return;
1743           iterate = true;
1744           oelast = NULL;
1745           continue;
1746         }
1747       oelast = oe;
1748       i++;
1749     }
1750
1751   length = ops->length ();
1752   oelast = ops->last ();
1753
1754   if (iterate)
1755     optimize_ops_list (opcode, ops);
1756 }
1757
1758 /* The following functions are subroutines to optimize_range_tests and allow
1759    it to try to change a logical combination of comparisons into a range
1760    test.
1761
1762    For example, both
1763         X == 2 || X == 5 || X == 3 || X == 4
1764    and
1765         X >= 2 && X <= 5
1766    are converted to
1767         (unsigned) (X - 2) <= 3
1768
1769    For more information see comments above fold_test_range in fold-const.c,
1770    this implementation is for GIMPLE.  */
1771
1772 struct range_entry
1773 {
1774   tree exp;
1775   tree low;
1776   tree high;
1777   bool in_p;
1778   bool strict_overflow_p;
1779   unsigned int idx, next;
1780 };
1781
1782 /* This is similar to make_range in fold-const.c, but on top of
1783    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1784    an SSA_NAME and STMT argument is ignored, otherwise STMT
1785    argument should be a GIMPLE_COND.  */
1786
1787 static void
1788 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1789 {
1790   int in_p;
1791   tree low, high;
1792   bool is_bool, strict_overflow_p;
1793
1794   r->exp = NULL_TREE;
1795   r->in_p = false;
1796   r->strict_overflow_p = false;
1797   r->low = NULL_TREE;
1798   r->high = NULL_TREE;
1799   if (exp != NULL_TREE
1800       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1801     return;
1802
1803   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1804      and see if we can refine the range.  Some of the cases below may not
1805      happen, but it doesn't seem worth worrying about this.  We "continue"
1806      the outer loop when we've changed something; otherwise we "break"
1807      the switch, which will "break" the while.  */
1808   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1809   high = low;
1810   in_p = 0;
1811   strict_overflow_p = false;
1812   is_bool = false;
1813   if (exp == NULL_TREE)
1814     is_bool = true;
1815   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1816     {
1817       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1818         is_bool = true;
1819       else
1820         return;
1821     }
1822   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1823     is_bool = true;
1824
1825   while (1)
1826     {
1827       enum tree_code code;
1828       tree arg0, arg1, exp_type;
1829       tree nexp;
1830       location_t loc;
1831
1832       if (exp != NULL_TREE)
1833         {
1834           if (TREE_CODE (exp) != SSA_NAME)
1835             break;
1836
1837           stmt = SSA_NAME_DEF_STMT (exp);
1838           if (!is_gimple_assign (stmt))
1839             break;
1840
1841           code = gimple_assign_rhs_code (stmt);
1842           arg0 = gimple_assign_rhs1 (stmt);
1843           arg1 = gimple_assign_rhs2 (stmt);
1844           exp_type = TREE_TYPE (exp);
1845         }
1846       else
1847         {
1848           code = gimple_cond_code (stmt);
1849           arg0 = gimple_cond_lhs (stmt);
1850           arg1 = gimple_cond_rhs (stmt);
1851           exp_type = boolean_type_node;
1852         }
1853
1854       if (TREE_CODE (arg0) != SSA_NAME)
1855         break;
1856       loc = gimple_location (stmt);
1857       switch (code)
1858         {
1859         case BIT_NOT_EXPR:
1860           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1861               /* Ensure the range is either +[-,0], +[0,0],
1862                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1863                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
1864                  or similar expression of unconditional true or
1865                  false, it should not be negated.  */
1866               && ((high && integer_zerop (high))
1867                   || (low && integer_onep (low))))
1868             {
1869               in_p = !in_p;
1870               exp = arg0;
1871               continue;
1872             }
1873           break;
1874         case SSA_NAME:
1875           exp = arg0;
1876           continue;
1877         CASE_CONVERT:
1878           if (is_bool)
1879             goto do_default;
1880           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1881             {
1882               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1883                 is_bool = true;
1884               else
1885                 return;
1886             }
1887           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1888             is_bool = true;
1889           goto do_default;
1890         case EQ_EXPR:
1891         case NE_EXPR:
1892         case LT_EXPR:
1893         case LE_EXPR:
1894         case GE_EXPR:
1895         case GT_EXPR:
1896           is_bool = true;
1897           /* FALLTHRU */
1898         default:
1899           if (!is_bool)
1900             return;
1901         do_default:
1902           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1903                                   &low, &high, &in_p,
1904                                   &strict_overflow_p);
1905           if (nexp != NULL_TREE)
1906             {
1907               exp = nexp;
1908               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1909               continue;
1910             }
1911           break;
1912         }
1913       break;
1914     }
1915   if (is_bool)
1916     {
1917       r->exp = exp;
1918       r->in_p = in_p;
1919       r->low = low;
1920       r->high = high;
1921       r->strict_overflow_p = strict_overflow_p;
1922     }
1923 }
1924
1925 /* Comparison function for qsort.  Sort entries
1926    without SSA_NAME exp first, then with SSA_NAMEs sorted
1927    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1928    by increasing ->low and if ->low is the same, by increasing
1929    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1930    maximum.  */
1931
1932 static int
1933 range_entry_cmp (const void *a, const void *b)
1934 {
1935   const struct range_entry *p = (const struct range_entry *) a;
1936   const struct range_entry *q = (const struct range_entry *) b;
1937
1938   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1939     {
1940       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1941         {
1942           /* Group range_entries for the same SSA_NAME together.  */
1943           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1944             return -1;
1945           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1946             return 1;
1947           /* If ->low is different, NULL low goes first, then by
1948              ascending low.  */
1949           if (p->low != NULL_TREE)
1950             {
1951               if (q->low != NULL_TREE)
1952                 {
1953                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1954                                           p->low, q->low);
1955                   if (tem && integer_onep (tem))
1956                     return -1;
1957                   tem = fold_binary (GT_EXPR, boolean_type_node,
1958                                      p->low, q->low);
1959                   if (tem && integer_onep (tem))
1960                     return 1;
1961                 }
1962               else
1963                 return 1;
1964             }
1965           else if (q->low != NULL_TREE)
1966             return -1;
1967           /* If ->high is different, NULL high goes last, before that by
1968              ascending high.  */
1969           if (p->high != NULL_TREE)
1970             {
1971               if (q->high != NULL_TREE)
1972                 {
1973                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1974                                           p->high, q->high);
1975                   if (tem && integer_onep (tem))
1976                     return -1;
1977                   tem = fold_binary (GT_EXPR, boolean_type_node,
1978                                      p->high, q->high);
1979                   if (tem && integer_onep (tem))
1980                     return 1;
1981                 }
1982               else
1983                 return -1;
1984             }
1985           else if (p->high != NULL_TREE)
1986             return 1;
1987           /* If both ranges are the same, sort below by ascending idx.  */
1988         }
1989       else
1990         return 1;
1991     }
1992   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1993     return -1;
1994
1995   if (p->idx < q->idx)
1996     return -1;
1997   else
1998     {
1999       gcc_checking_assert (p->idx > q->idx);
2000       return 1;
2001     }
2002 }
2003
2004 /* Helper routine of optimize_range_test.
2005    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2006    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2007    OPCODE and OPS are arguments of optimize_range_tests.  Return
2008    true if the range merge has been successful.
2009    If OPCODE is ERROR_MARK, this is called from within
2010    maybe_optimize_range_tests and is performing inter-bb range optimization.
2011    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2012    oe->rank.  */
2013
2014 static bool
2015 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2016                    unsigned int count, enum tree_code opcode,
2017                    vec<operand_entry_t> *ops, tree exp, bool in_p,
2018                    tree low, tree high, bool strict_overflow_p)
2019 {
2020   operand_entry_t oe = (*ops)[range->idx];
2021   tree op = oe->op;
2022   gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
2023   location_t loc = gimple_location (stmt);
2024   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2025   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2026   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2027   gimple_stmt_iterator gsi;
2028
2029   if (tem == NULL_TREE)
2030     return false;
2031
2032   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2033     warning_at (loc, OPT_Wstrict_overflow,
2034                 "assuming signed overflow does not occur "
2035                 "when simplifying range test");
2036
2037   if (dump_file && (dump_flags & TDF_DETAILS))
2038     {
2039       struct range_entry *r;
2040       fprintf (dump_file, "Optimizing range tests ");
2041       print_generic_expr (dump_file, range->exp, 0);
2042       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2043       print_generic_expr (dump_file, range->low, 0);
2044       fprintf (dump_file, ", ");
2045       print_generic_expr (dump_file, range->high, 0);
2046       fprintf (dump_file, "]");
2047       for (r = otherrange; r < otherrange + count; r++)
2048         {
2049           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2050           print_generic_expr (dump_file, r->low, 0);
2051           fprintf (dump_file, ", ");
2052           print_generic_expr (dump_file, r->high, 0);
2053           fprintf (dump_file, "]");
2054         }
2055       fprintf (dump_file, "\n into ");
2056       print_generic_expr (dump_file, tem, 0);
2057       fprintf (dump_file, "\n");
2058     }
2059
2060   if (opcode == BIT_IOR_EXPR
2061       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2062     tem = invert_truthvalue_loc (loc, tem);
2063
2064   tem = fold_convert_loc (loc, optype, tem);
2065   gsi = gsi_for_stmt (stmt);
2066   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2067                                   GSI_SAME_STMT);
2068   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
2069     if (gimple_uid (gsi_stmt (gsi)))
2070       break;
2071     else
2072       gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2073
2074   oe->op = tem;
2075   range->exp = exp;
2076   range->low = low;
2077   range->high = high;
2078   range->in_p = in_p;
2079   range->strict_overflow_p = false;
2080
2081   for (range = otherrange; range < otherrange + count; range++)
2082     {
2083       oe = (*ops)[range->idx];
2084       /* Now change all the other range test immediate uses, so that
2085          those tests will be optimized away.  */
2086       if (opcode == ERROR_MARK)
2087         {
2088           if (oe->op)
2089             oe->op = build_int_cst (TREE_TYPE (oe->op),
2090                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2091           else
2092             oe->op = (oe->rank == BIT_IOR_EXPR
2093                       ? boolean_false_node : boolean_true_node);
2094         }
2095       else
2096         oe->op = error_mark_node;
2097       range->exp = NULL_TREE;
2098     }
2099   return true;
2100 }
2101
2102 /* Optimize X == CST1 || X == CST2
2103    if popcount (CST1 ^ CST2) == 1 into
2104    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2105    Similarly for ranges.  E.g.
2106    X != 2 && X != 3 && X != 10 && X != 11
2107    will be transformed by the previous optimization into
2108    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2109    and this loop can transform that into
2110    !(((X & ~8) - 2U) <= 1U).  */
2111
2112 static bool
2113 optimize_range_tests_xor (enum tree_code opcode, tree type,
2114                           tree lowi, tree lowj, tree highi, tree highj,
2115                           vec<operand_entry_t> *ops,
2116                           struct range_entry *rangei,
2117                           struct range_entry *rangej)
2118 {
2119   tree lowxor, highxor, tem, exp;
2120   /* Check highi ^ lowi == highj ^ lowj and
2121      popcount (highi ^ lowi) == 1.  */
2122   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2123   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2124     return false;
2125   if (tree_log2 (lowxor) < 0)
2126     return false;
2127   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2128   if (!tree_int_cst_equal (lowxor, highxor))
2129     return false;
2130
2131   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2132   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2133   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2134   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2135   if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2136                          rangei->in_p, lowj, highj,
2137                          rangei->strict_overflow_p
2138                          || rangej->strict_overflow_p))
2139     return true;
2140   return false;
2141 }
2142
2143 /* Optimize X == CST1 || X == CST2
2144    if popcount (CST2 - CST1) == 1 into
2145    ((X - CST1) & ~(CST2 - CST1)) == 0.
2146    Similarly for ranges.  E.g.
2147    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2148    || X == 75 || X == 45
2149    will be transformed by the previous optimization into
2150    (X - 43U) <= 3U || (X - 75U) <= 3U
2151    and this loop can transform that into
2152    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2153 static bool
2154 optimize_range_tests_diff (enum tree_code opcode, tree type,
2155                             tree lowi, tree lowj, tree highi, tree highj,
2156                             vec<operand_entry_t> *ops,
2157                             struct range_entry *rangei,
2158                             struct range_entry *rangej)
2159 {
2160   tree tem1, tem2, mask;
2161   /* Check highi - lowi == highj - lowj.  */
2162   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2163   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2164     return false;
2165   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2166   if (!tree_int_cst_equal (tem1, tem2))
2167     return false;
2168   /* Check popcount (lowj - lowi) == 1.  */
2169   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2170   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2171     return false;
2172   if (tree_log2 (tem1) < 0)
2173     return false;
2174
2175   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2176   tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2177   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2178   lowj = build_int_cst (type, 0);
2179   if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2180                          rangei->in_p, lowj, tem2,
2181                          rangei->strict_overflow_p
2182                          || rangej->strict_overflow_p))
2183     return true;
2184   return false;
2185 }
2186
2187 /* It does some common checks for function optimize_range_tests_xor and
2188    optimize_range_tests_diff.
2189    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2190    Else it calls optimize_range_tests_diff.  */
2191
2192 static bool
2193 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2194                         bool optimize_xor, vec<operand_entry_t> *ops,
2195                         struct range_entry *ranges)
2196 {
2197   int i, j;
2198   bool any_changes = false;
2199   for (i = first; i < length; i++)
2200     {
2201       tree lowi, highi, lowj, highj, type, tem;
2202
2203       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2204         continue;
2205       type = TREE_TYPE (ranges[i].exp);
2206       if (!INTEGRAL_TYPE_P (type))
2207         continue;
2208       lowi = ranges[i].low;
2209       if (lowi == NULL_TREE)
2210         lowi = TYPE_MIN_VALUE (type);
2211       highi = ranges[i].high;
2212       if (highi == NULL_TREE)
2213         continue;
2214       for (j = i + 1; j < length && j < i + 64; j++)
2215         {
2216           bool changes;
2217           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2218             continue;
2219           lowj = ranges[j].low;
2220           if (lowj == NULL_TREE)
2221             continue;
2222           highj = ranges[j].high;
2223           if (highj == NULL_TREE)
2224             highj = TYPE_MAX_VALUE (type);
2225           /* Check lowj > highi.  */
2226           tem = fold_binary (GT_EXPR, boolean_type_node,
2227                              lowj, highi);
2228           if (tem == NULL_TREE || !integer_onep (tem))
2229             continue;
2230           if (optimize_xor)
2231             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2232                                                 highi, highj, ops,
2233                                                 ranges + i, ranges + j);
2234           else
2235             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2236                                                  highi, highj, ops,
2237                                                  ranges + i, ranges + j);
2238           if (changes)
2239             {
2240               any_changes = true;
2241               break;
2242             }
2243         }
2244     }
2245   return any_changes;
2246 }
2247
2248 /* Optimize range tests, similarly how fold_range_test optimizes
2249    it on trees.  The tree code for the binary
2250    operation between all the operands is OPCODE.
2251    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2252    maybe_optimize_range_tests for inter-bb range optimization.
2253    In that case if oe->op is NULL, oe->id is bb->index whose
2254    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2255    the actual opcode.  */
2256
2257 static bool
2258 optimize_range_tests (enum tree_code opcode,
2259                       vec<operand_entry_t> *ops)
2260 {
2261   unsigned int length = ops->length (), i, j, first;
2262   operand_entry_t oe;
2263   struct range_entry *ranges;
2264   bool any_changes = false;
2265
2266   if (length == 1)
2267     return false;
2268
2269   ranges = XNEWVEC (struct range_entry, length);
2270   for (i = 0; i < length; i++)
2271     {
2272       oe = (*ops)[i];
2273       ranges[i].idx = i;
2274       init_range_entry (ranges + i, oe->op,
2275                         oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2276       /* For | invert it now, we will invert it again before emitting
2277          the optimized expression.  */
2278       if (opcode == BIT_IOR_EXPR
2279           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2280         ranges[i].in_p = !ranges[i].in_p;
2281     }
2282
2283   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2284   for (i = 0; i < length; i++)
2285     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2286       break;
2287
2288   /* Try to merge ranges.  */
2289   for (first = i; i < length; i++)
2290     {
2291       tree low = ranges[i].low;
2292       tree high = ranges[i].high;
2293       int in_p = ranges[i].in_p;
2294       bool strict_overflow_p = ranges[i].strict_overflow_p;
2295       int update_fail_count = 0;
2296
2297       for (j = i + 1; j < length; j++)
2298         {
2299           if (ranges[i].exp != ranges[j].exp)
2300             break;
2301           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2302                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2303             break;
2304           strict_overflow_p |= ranges[j].strict_overflow_p;
2305         }
2306
2307       if (j == i + 1)
2308         continue;
2309
2310       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2311                              ops, ranges[i].exp, in_p, low, high,
2312                              strict_overflow_p))
2313         {
2314           i = j - 1;
2315           any_changes = true;
2316         }
2317       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2318          while update_range_test would fail.  */
2319       else if (update_fail_count == 64)
2320         i = j - 1;
2321       else
2322         ++update_fail_count;
2323     }
2324
2325   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2326                                          ops, ranges);
2327
2328   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2329     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2330                                            ops, ranges);
2331
2332   if (any_changes && opcode != ERROR_MARK)
2333     {
2334       j = 0;
2335       FOR_EACH_VEC_ELT (*ops, i, oe)
2336         {
2337           if (oe->op == error_mark_node)
2338             continue;
2339           else if (i != j)
2340             (*ops)[j] = oe;
2341           j++;
2342         }
2343       ops->truncate (j);
2344     }
2345
2346   XDELETEVEC (ranges);
2347   return any_changes;
2348 }
2349
2350 /* Return true if STMT is a cast like:
2351    <bb N>:
2352    ...
2353    _123 = (int) _234;
2354
2355    <bb M>:
2356    # _345 = PHI <_123(N), 1(...), 1(...)>
2357    where _234 has bool type, _123 has single use and
2358    bb N has a single successor M.  This is commonly used in
2359    the last block of a range test.  */
2360
2361 static bool
2362 final_range_test_p (gimple stmt)
2363 {
2364   basic_block bb, rhs_bb;
2365   edge e;
2366   tree lhs, rhs;
2367   use_operand_p use_p;
2368   gimple use_stmt;
2369
2370   if (!gimple_assign_cast_p (stmt))
2371     return false;
2372   bb = gimple_bb (stmt);
2373   if (!single_succ_p (bb))
2374     return false;
2375   e = single_succ_edge (bb);
2376   if (e->flags & EDGE_COMPLEX)
2377     return false;
2378
2379   lhs = gimple_assign_lhs (stmt);
2380   rhs = gimple_assign_rhs1 (stmt);
2381   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2382       || TREE_CODE (rhs) != SSA_NAME
2383       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2384     return false;
2385
2386   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2387   if (!single_imm_use (lhs, &use_p, &use_stmt))
2388     return false;
2389
2390   if (gimple_code (use_stmt) != GIMPLE_PHI
2391       || gimple_bb (use_stmt) != e->dest)
2392     return false;
2393
2394   /* And that the rhs is defined in the same loop.  */
2395   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2396   if (rhs_bb == NULL
2397       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2398     return false;
2399
2400   return true;
2401 }
2402
2403 /* Return true if BB is suitable basic block for inter-bb range test
2404    optimization.  If BACKWARD is true, BB should be the only predecessor
2405    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2406    or compared with to find a common basic block to which all conditions
2407    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2408    be the only predecessor of BB.  */
2409
2410 static bool
2411 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2412                   bool backward)
2413 {
2414   edge_iterator ei, ei2;
2415   edge e, e2;
2416   gimple stmt;
2417   gimple_stmt_iterator gsi;
2418   bool other_edge_seen = false;
2419   bool is_cond;
2420
2421   if (test_bb == bb)
2422     return false;
2423   /* Check last stmt first.  */
2424   stmt = last_stmt (bb);
2425   if (stmt == NULL
2426       || (gimple_code (stmt) != GIMPLE_COND
2427           && (backward || !final_range_test_p (stmt)))
2428       || gimple_visited_p (stmt)
2429       || stmt_could_throw_p (stmt)
2430       || *other_bb == bb)
2431     return false;
2432   is_cond = gimple_code (stmt) == GIMPLE_COND;
2433   if (is_cond)
2434     {
2435       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2436          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2437          to *OTHER_BB (if not set yet, try to find it out).  */
2438       if (EDGE_COUNT (bb->succs) != 2)
2439         return false;
2440       FOR_EACH_EDGE (e, ei, bb->succs)
2441         {
2442           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2443             return false;
2444           if (e->dest == test_bb)
2445             {
2446               if (backward)
2447                 continue;
2448               else
2449                 return false;
2450             }
2451           if (e->dest == bb)
2452             return false;
2453           if (*other_bb == NULL)
2454             {
2455               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2456                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2457                   return false;
2458                 else if (e->dest == e2->dest)
2459                   *other_bb = e->dest;
2460               if (*other_bb == NULL)
2461                 return false;
2462             }
2463           if (e->dest == *other_bb)
2464             other_edge_seen = true;
2465           else if (backward)
2466             return false;
2467         }
2468       if (*other_bb == NULL || !other_edge_seen)
2469         return false;
2470     }
2471   else if (single_succ (bb) != *other_bb)
2472     return false;
2473
2474   /* Now check all PHIs of *OTHER_BB.  */
2475   e = find_edge (bb, *other_bb);
2476   e2 = find_edge (test_bb, *other_bb);
2477   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2478     {
2479       gimple phi = gsi_stmt (gsi);
2480       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2481          corresponding to BB and TEST_BB predecessor must be the same.  */
2482       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2483                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2484         {
2485           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2486              one of the PHIs should have the lhs of the last stmt in
2487              that block as PHI arg and that PHI should have 0 or 1
2488              corresponding to it in all other range test basic blocks
2489              considered.  */
2490           if (!is_cond)
2491             {
2492               if (gimple_phi_arg_def (phi, e->dest_idx)
2493                   == gimple_assign_lhs (stmt)
2494                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2495                       || integer_onep (gimple_phi_arg_def (phi,
2496                                                            e2->dest_idx))))
2497                 continue;
2498             }
2499           else
2500             {
2501               gimple test_last = last_stmt (test_bb);
2502               if (gimple_code (test_last) != GIMPLE_COND
2503                   && gimple_phi_arg_def (phi, e2->dest_idx)
2504                      == gimple_assign_lhs (test_last)
2505                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2506                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2507                 continue;
2508             }
2509
2510           return false;
2511         }
2512     }
2513   return true;
2514 }
2515
2516 /* Return true if BB doesn't have side-effects that would disallow
2517    range test optimization, all SSA_NAMEs set in the bb are consumed
2518    in the bb and there are no PHIs.  */
2519
2520 static bool
2521 no_side_effect_bb (basic_block bb)
2522 {
2523   gimple_stmt_iterator gsi;
2524   gimple last;
2525
2526   if (!gimple_seq_empty_p (phi_nodes (bb)))
2527     return false;
2528   last = last_stmt (bb);
2529   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2530     {
2531       gimple stmt = gsi_stmt (gsi);
2532       tree lhs;
2533       imm_use_iterator imm_iter;
2534       use_operand_p use_p;
2535
2536       if (is_gimple_debug (stmt))
2537         continue;
2538       if (gimple_has_side_effects (stmt))
2539         return false;
2540       if (stmt == last)
2541         return true;
2542       if (!is_gimple_assign (stmt))
2543         return false;
2544       lhs = gimple_assign_lhs (stmt);
2545       if (TREE_CODE (lhs) != SSA_NAME)
2546         return false;
2547       if (gimple_assign_rhs_could_trap_p (stmt))
2548         return false;
2549       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2550         {
2551           gimple use_stmt = USE_STMT (use_p);
2552           if (is_gimple_debug (use_stmt))
2553             continue;
2554           if (gimple_bb (use_stmt) != bb)
2555             return false;
2556         }
2557     }
2558   return false;
2559 }
2560
2561 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2562    return true and fill in *OPS recursively.  */
2563
2564 static bool
2565 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2566          struct loop *loop)
2567 {
2568   gimple stmt = SSA_NAME_DEF_STMT (var);
2569   tree rhs[2];
2570   int i;
2571
2572   if (!is_reassociable_op (stmt, code, loop))
2573     return false;
2574
2575   rhs[0] = gimple_assign_rhs1 (stmt);
2576   rhs[1] = gimple_assign_rhs2 (stmt);
2577   gimple_set_visited (stmt, true);
2578   for (i = 0; i < 2; i++)
2579     if (TREE_CODE (rhs[i]) == SSA_NAME
2580         && !get_ops (rhs[i], code, ops, loop)
2581         && has_single_use (rhs[i]))
2582       {
2583         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2584
2585         oe->op = rhs[i];
2586         oe->rank = code;
2587         oe->id = 0;
2588         oe->count = 1;
2589         ops->safe_push (oe);
2590       }
2591   return true;
2592 }
2593
2594 /* Find the ops that were added by get_ops starting from VAR, see if
2595    they were changed during update_range_test and if yes, create new
2596    stmts.  */
2597
2598 static tree
2599 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2600             unsigned int *pidx, struct loop *loop)
2601 {
2602   gimple stmt = SSA_NAME_DEF_STMT (var);
2603   tree rhs[4];
2604   int i;
2605
2606   if (!is_reassociable_op (stmt, code, loop))
2607     return NULL;
2608
2609   rhs[0] = gimple_assign_rhs1 (stmt);
2610   rhs[1] = gimple_assign_rhs2 (stmt);
2611   rhs[2] = rhs[0];
2612   rhs[3] = rhs[1];
2613   for (i = 0; i < 2; i++)
2614     if (TREE_CODE (rhs[i]) == SSA_NAME)
2615       {
2616         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2617         if (rhs[2 + i] == NULL_TREE)
2618           {
2619             if (has_single_use (rhs[i]))
2620               rhs[2 + i] = ops[(*pidx)++]->op;
2621             else
2622               rhs[2 + i] = rhs[i];
2623           }
2624       }
2625   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2626       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2627     {
2628       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2629       var = make_ssa_name (TREE_TYPE (var), NULL);
2630       gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2631                                                var, rhs[2], rhs[3]);
2632       gimple_set_uid (g, gimple_uid (stmt));
2633       gimple_set_visited (g, true);
2634       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2635     }
2636   return var;
2637 }
2638
2639 /* Structure to track the initial value passed to get_ops and
2640    the range in the ops vector for each basic block.  */
2641
2642 struct inter_bb_range_test_entry
2643 {
2644   tree op;
2645   unsigned int first_idx, last_idx;
2646 };
2647
2648 /* Inter-bb range test optimization.  */
2649
2650 static void
2651 maybe_optimize_range_tests (gimple stmt)
2652 {
2653   basic_block first_bb = gimple_bb (stmt);
2654   basic_block last_bb = first_bb;
2655   basic_block other_bb = NULL;
2656   basic_block bb;
2657   edge_iterator ei;
2658   edge e;
2659   vec<operand_entry_t> ops = vNULL;
2660   vec<inter_bb_range_test_entry> bbinfo = vNULL;
2661   bool any_changes = false;
2662
2663   /* Consider only basic blocks that end with GIMPLE_COND or
2664      a cast statement satisfying final_range_test_p.  All
2665      but the last bb in the first_bb .. last_bb range
2666      should end with GIMPLE_COND.  */
2667   if (gimple_code (stmt) == GIMPLE_COND)
2668     {
2669       if (EDGE_COUNT (first_bb->succs) != 2)
2670         return;
2671     }
2672   else if (final_range_test_p (stmt))
2673     other_bb = single_succ (first_bb);
2674   else
2675     return;
2676
2677   if (stmt_could_throw_p (stmt))
2678     return;
2679
2680   /* As relative ordering of post-dominator sons isn't fixed,
2681      maybe_optimize_range_tests can be called first on any
2682      bb in the range we want to optimize.  So, start searching
2683      backwards, if first_bb can be set to a predecessor.  */
2684   while (single_pred_p (first_bb))
2685     {
2686       basic_block pred_bb = single_pred (first_bb);
2687       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2688         break;
2689       if (!no_side_effect_bb (first_bb))
2690         break;
2691       first_bb = pred_bb;
2692     }
2693   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2694      Before starting forward search in last_bb successors, find
2695      out the other_bb.  */
2696   if (first_bb == last_bb)
2697     {
2698       other_bb = NULL;
2699       /* As non-GIMPLE_COND last stmt always terminates the range,
2700          if forward search didn't discover anything, just give up.  */
2701       if (gimple_code (stmt) != GIMPLE_COND)
2702         return;
2703       /* Look at both successors.  Either it ends with a GIMPLE_COND
2704          and satisfies suitable_cond_bb, or ends with a cast and
2705          other_bb is that cast's successor.  */
2706       FOR_EACH_EDGE (e, ei, first_bb->succs)
2707         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2708             || e->dest == first_bb)
2709           return;
2710         else if (single_pred_p (e->dest))
2711           {
2712             stmt = last_stmt (e->dest);
2713             if (stmt
2714                 && gimple_code (stmt) == GIMPLE_COND
2715                 && EDGE_COUNT (e->dest->succs) == 2)
2716               {
2717                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2718                   break;
2719                 else
2720                   other_bb = NULL;
2721               }
2722             else if (stmt
2723                      && final_range_test_p (stmt)
2724                      && find_edge (first_bb, single_succ (e->dest)))
2725               {
2726                 other_bb = single_succ (e->dest);
2727                 if (other_bb == first_bb)
2728                   other_bb = NULL;
2729               }
2730           }
2731       if (other_bb == NULL)
2732         return;
2733     }
2734   /* Now do the forward search, moving last_bb to successor bbs
2735      that aren't other_bb.  */
2736   while (EDGE_COUNT (last_bb->succs) == 2)
2737     {
2738       FOR_EACH_EDGE (e, ei, last_bb->succs)
2739         if (e->dest != other_bb)
2740           break;
2741       if (e == NULL)
2742         break;
2743       if (!single_pred_p (e->dest))
2744         break;
2745       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2746         break;
2747       if (!no_side_effect_bb (e->dest))
2748         break;
2749       last_bb = e->dest;
2750     }
2751   if (first_bb == last_bb)
2752     return;
2753   /* Here basic blocks first_bb through last_bb's predecessor
2754      end with GIMPLE_COND, all of them have one of the edges to
2755      other_bb and another to another block in the range,
2756      all blocks except first_bb don't have side-effects and
2757      last_bb ends with either GIMPLE_COND, or cast satisfying
2758      final_range_test_p.  */
2759   for (bb = last_bb; ; bb = single_pred (bb))
2760     {
2761       enum tree_code code;
2762       tree lhs, rhs;
2763       inter_bb_range_test_entry bb_ent;
2764
2765       bb_ent.op = NULL_TREE;
2766       bb_ent.first_idx = ops.length ();
2767       bb_ent.last_idx = bb_ent.first_idx;
2768       e = find_edge (bb, other_bb);
2769       stmt = last_stmt (bb);
2770       gimple_set_visited (stmt, true);
2771       if (gimple_code (stmt) != GIMPLE_COND)
2772         {
2773           use_operand_p use_p;
2774           gimple phi;
2775           edge e2;
2776           unsigned int d;
2777
2778           lhs = gimple_assign_lhs (stmt);
2779           rhs = gimple_assign_rhs1 (stmt);
2780           gcc_assert (bb == last_bb);
2781
2782           /* stmt is
2783              _123 = (int) _234;
2784
2785              followed by:
2786              <bb M>:
2787              # _345 = PHI <_123(N), 1(...), 1(...)>
2788
2789              or 0 instead of 1.  If it is 0, the _234
2790              range test is anded together with all the
2791              other range tests, if it is 1, it is ored with
2792              them.  */
2793           single_imm_use (lhs, &use_p, &phi);
2794           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2795           e2 = find_edge (first_bb, other_bb);
2796           d = e2->dest_idx;
2797           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2798           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2799             code = BIT_AND_EXPR;
2800           else
2801             {
2802               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2803               code = BIT_IOR_EXPR;
2804             }
2805
2806           /* If _234 SSA_NAME_DEF_STMT is
2807              _234 = _567 | _789;
2808              (or &, corresponding to 1/0 in the phi arguments,
2809              push into ops the individual range test arguments
2810              of the bitwise or resp. and, recursively.  */
2811           if (!get_ops (rhs, code, &ops,
2812                         loop_containing_stmt (stmt))
2813               && has_single_use (rhs))
2814             {
2815               /* Otherwise, push the _234 range test itself.  */
2816               operand_entry_t oe
2817                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2818
2819               oe->op = rhs;
2820               oe->rank = code;
2821               oe->id = 0;
2822               oe->count = 1;
2823               ops.safe_push (oe);
2824               bb_ent.last_idx++;
2825             }
2826           else
2827             bb_ent.last_idx = ops.length ();
2828           bb_ent.op = rhs;
2829           bbinfo.safe_push (bb_ent);
2830           continue;
2831         }
2832       /* Otherwise stmt is GIMPLE_COND.  */
2833       code = gimple_cond_code (stmt);
2834       lhs = gimple_cond_lhs (stmt);
2835       rhs = gimple_cond_rhs (stmt);
2836       if (TREE_CODE (lhs) == SSA_NAME
2837           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2838           && ((code != EQ_EXPR && code != NE_EXPR)
2839               || rhs != boolean_false_node
2840                  /* Either push into ops the individual bitwise
2841                     or resp. and operands, depending on which
2842                     edge is other_bb.  */
2843               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2844                                  ^ (code == EQ_EXPR))
2845                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2846                            loop_containing_stmt (stmt))))
2847         {
2848           /* Or push the GIMPLE_COND stmt itself.  */
2849           operand_entry_t oe
2850             = (operand_entry_t) pool_alloc (operand_entry_pool);
2851
2852           oe->op = NULL;
2853           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2854                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2855           /* oe->op = NULL signs that there is no SSA_NAME
2856              for the range test, and oe->id instead is the
2857              basic block number, at which's end the GIMPLE_COND
2858              is.  */
2859           oe->id = bb->index;
2860           oe->count = 1;
2861           ops.safe_push (oe);
2862           bb_ent.op = NULL;
2863           bb_ent.last_idx++;
2864         }
2865       else if (ops.length () > bb_ent.first_idx)
2866         {
2867           bb_ent.op = lhs;
2868           bb_ent.last_idx = ops.length ();
2869         }
2870       bbinfo.safe_push (bb_ent);
2871       if (bb == first_bb)
2872         break;
2873     }
2874   if (ops.length () > 1)
2875     any_changes = optimize_range_tests (ERROR_MARK, &ops);
2876   if (any_changes)
2877     {
2878       unsigned int idx;
2879       /* update_ops relies on has_single_use predicates returning the
2880          same values as it did during get_ops earlier.  Additionally it
2881          never removes statements, only adds new ones and it should walk
2882          from the single imm use and check the predicate already before
2883          making those changes.
2884          On the other side, the handling of GIMPLE_COND directly can turn
2885          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2886          it needs to be done in a separate loop afterwards.  */
2887       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2888         {
2889           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2890               && bbinfo[idx].op != NULL_TREE)
2891             {
2892               tree new_op;
2893
2894               stmt = last_stmt (bb);
2895               new_op = update_ops (bbinfo[idx].op,
2896                                    (enum tree_code)
2897                                    ops[bbinfo[idx].first_idx]->rank,
2898                                    ops, &bbinfo[idx].first_idx,
2899                                    loop_containing_stmt (stmt));
2900               if (new_op == NULL_TREE)
2901                 {
2902                   gcc_assert (bb == last_bb);
2903                   new_op = ops[bbinfo[idx].first_idx++]->op;
2904                 }
2905               if (bbinfo[idx].op != new_op)
2906                 {
2907                   imm_use_iterator iter;
2908                   use_operand_p use_p;
2909                   gimple use_stmt, cast_stmt = NULL;
2910
2911                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2912                     if (is_gimple_debug (use_stmt))
2913                       continue;
2914                     else if (gimple_code (use_stmt) == GIMPLE_COND
2915                              || gimple_code (use_stmt) == GIMPLE_PHI)
2916                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2917                         SET_USE (use_p, new_op);
2918                     else if (gimple_assign_cast_p (use_stmt))
2919                       cast_stmt = use_stmt;
2920                     else
2921                       gcc_unreachable ();
2922                   if (cast_stmt)
2923                     {
2924                       gcc_assert (bb == last_bb);
2925                       tree lhs = gimple_assign_lhs (cast_stmt);
2926                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
2927                       enum tree_code rhs_code
2928                         = gimple_assign_rhs_code (cast_stmt);
2929                       gimple g
2930                         = gimple_build_assign_with_ops (rhs_code, new_lhs,
2931                                                         new_op, NULL_TREE);
2932                       gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
2933                       gimple_set_uid (g, gimple_uid (cast_stmt));
2934                       gimple_set_visited (g, true);
2935                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2936                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2937                         if (is_gimple_debug (use_stmt))
2938                           continue;
2939                         else if (gimple_code (use_stmt) == GIMPLE_COND
2940                                  || gimple_code (use_stmt) == GIMPLE_PHI)
2941                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2942                             SET_USE (use_p, new_lhs);
2943                         else
2944                           gcc_unreachable ();
2945                     }
2946                 }
2947             }
2948           if (bb == first_bb)
2949             break;
2950         }
2951       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2952         {
2953           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2954               && bbinfo[idx].op == NULL_TREE
2955               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
2956             {
2957               stmt = last_stmt (bb);
2958               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
2959                 gimple_cond_make_false (stmt);
2960               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
2961                 gimple_cond_make_true (stmt);
2962               else
2963                 {
2964                   gimple_cond_set_code (stmt, NE_EXPR);
2965                   gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
2966                   gimple_cond_set_rhs (stmt, boolean_false_node);
2967                 }
2968               update_stmt (stmt);
2969             }
2970           if (bb == first_bb)
2971             break;
2972         }
2973     }
2974   bbinfo.release ();
2975   ops.release ();
2976 }
2977
2978 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2979    of STMT in it's operands.  This is also known as a "destructive
2980    update" operation.  */
2981
2982 static bool
2983 is_phi_for_stmt (gimple stmt, tree operand)
2984 {
2985   gimple def_stmt;
2986   tree lhs;
2987   use_operand_p arg_p;
2988   ssa_op_iter i;
2989
2990   if (TREE_CODE (operand) != SSA_NAME)
2991     return false;
2992
2993   lhs = gimple_assign_lhs (stmt);
2994
2995   def_stmt = SSA_NAME_DEF_STMT (operand);
2996   if (gimple_code (def_stmt) != GIMPLE_PHI)
2997     return false;
2998
2999   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3000     if (lhs == USE_FROM_PTR (arg_p))
3001       return true;
3002   return false;
3003 }
3004
3005 /* Remove def stmt of VAR if VAR has zero uses and recurse
3006    on rhs1 operand if so.  */
3007
3008 static void
3009 remove_visited_stmt_chain (tree var)
3010 {
3011   gimple stmt;
3012   gimple_stmt_iterator gsi;
3013
3014   while (1)
3015     {
3016       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3017         return;
3018       stmt = SSA_NAME_DEF_STMT (var);
3019       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3020         {
3021           var = gimple_assign_rhs1 (stmt);
3022           gsi = gsi_for_stmt (stmt);
3023           gsi_remove (&gsi, true);
3024           release_defs (stmt);
3025         }
3026       else
3027         return;
3028     }
3029 }
3030
3031 /* This function checks three consequtive operands in
3032    passed operands vector OPS starting from OPINDEX and
3033    swaps two operands if it is profitable for binary operation
3034    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3035
3036    We pair ops with the same rank if possible.
3037
3038    The alternative we try is to see if STMT is a destructive
3039    update style statement, which is like:
3040    b = phi (a, ...)
3041    a = c + b;
3042    In that case, we want to use the destructive update form to
3043    expose the possible vectorizer sum reduction opportunity.
3044    In that case, the third operand will be the phi node. This
3045    check is not performed if STMT is null.
3046
3047    We could, of course, try to be better as noted above, and do a
3048    lot of work to try to find these opportunities in >3 operand
3049    cases, but it is unlikely to be worth it.  */
3050
3051 static void
3052 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3053                           unsigned int opindex, gimple stmt)
3054 {
3055   operand_entry_t oe1, oe2, oe3;
3056
3057   oe1 = ops[opindex];
3058   oe2 = ops[opindex + 1];
3059   oe3 = ops[opindex + 2];
3060
3061   if ((oe1->rank == oe2->rank
3062        && oe2->rank != oe3->rank)
3063       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3064           && !is_phi_for_stmt (stmt, oe1->op)
3065           && !is_phi_for_stmt (stmt, oe2->op)))
3066     {
3067       struct operand_entry temp = *oe3;
3068       oe3->op = oe1->op;
3069       oe3->rank = oe1->rank;
3070       oe1->op = temp.op;
3071       oe1->rank= temp.rank;
3072     }
3073   else if ((oe1->rank == oe3->rank
3074             && oe2->rank != oe3->rank)
3075            || (stmt && is_phi_for_stmt (stmt, oe2->op)
3076                && !is_phi_for_stmt (stmt, oe1->op)
3077                && !is_phi_for_stmt (stmt, oe3->op)))
3078     {
3079       struct operand_entry temp = *oe2;
3080       oe2->op = oe1->op;
3081       oe2->rank = oe1->rank;
3082       oe1->op = temp.op;
3083       oe1->rank = temp.rank;
3084     }
3085 }
3086
3087 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3088    two definitions, otherwise return STMT.  */
3089
3090 static inline gimple
3091 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3092 {
3093   if (TREE_CODE (rhs1) == SSA_NAME
3094       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3095     stmt = SSA_NAME_DEF_STMT (rhs1);
3096   if (TREE_CODE (rhs2) == SSA_NAME
3097       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3098     stmt = SSA_NAME_DEF_STMT (rhs2);
3099   return stmt;
3100 }
3101
3102 /* Recursively rewrite our linearized statements so that the operators
3103    match those in OPS[OPINDEX], putting the computation in rank
3104    order.  Return new lhs.  */
3105
3106 static tree
3107 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3108                    vec<operand_entry_t> ops, bool changed)
3109 {
3110   tree rhs1 = gimple_assign_rhs1 (stmt);
3111   tree rhs2 = gimple_assign_rhs2 (stmt);
3112   tree lhs = gimple_assign_lhs (stmt);
3113   operand_entry_t oe;
3114
3115   /* The final recursion case for this function is that you have
3116      exactly two operations left.
3117      If we had one exactly one op in the entire list to start with, we
3118      would have never called this function, and the tail recursion
3119      rewrites them one at a time.  */
3120   if (opindex + 2 == ops.length ())
3121     {
3122       operand_entry_t oe1, oe2;
3123
3124       oe1 = ops[opindex];
3125       oe2 = ops[opindex + 1];
3126
3127       if (rhs1 != oe1->op || rhs2 != oe2->op)
3128         {
3129           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3130           unsigned int uid = gimple_uid (stmt);
3131
3132           if (dump_file && (dump_flags & TDF_DETAILS))
3133             {
3134               fprintf (dump_file, "Transforming ");
3135               print_gimple_stmt (dump_file, stmt, 0, 0);
3136             }
3137
3138           if (changed)
3139             {
3140               gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3141               lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3142               stmt
3143                 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3144                                                 lhs, oe1->op, oe2->op);
3145               gimple_set_uid (stmt, uid);
3146               gimple_set_visited (stmt, true);
3147               if (insert_point == gsi_stmt (gsi))
3148                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3149               else
3150                 insert_stmt_after (stmt, insert_point);
3151             }
3152           else
3153             {
3154               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3155                                    == stmt);
3156               gimple_assign_set_rhs1 (stmt, oe1->op);
3157               gimple_assign_set_rhs2 (stmt, oe2->op);
3158               update_stmt (stmt);
3159             }
3160
3161           if (rhs1 != oe1->op && rhs1 != oe2->op)
3162             remove_visited_stmt_chain (rhs1);
3163
3164           if (dump_file && (dump_flags & TDF_DETAILS))
3165             {
3166               fprintf (dump_file, " into ");
3167               print_gimple_stmt (dump_file, stmt, 0, 0);
3168             }
3169         }
3170       return lhs;
3171     }
3172
3173   /* If we hit here, we should have 3 or more ops left.  */
3174   gcc_assert (opindex + 2 < ops.length ());
3175
3176   /* Rewrite the next operator.  */
3177   oe = ops[opindex];
3178
3179   /* Recurse on the LHS of the binary operator, which is guaranteed to
3180      be the non-leaf side.  */
3181   tree new_rhs1
3182     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3183                          changed || oe->op != rhs2);
3184
3185   if (oe->op != rhs2 || new_rhs1 != rhs1)
3186     {
3187       if (dump_file && (dump_flags & TDF_DETAILS))
3188         {
3189           fprintf (dump_file, "Transforming ");
3190           print_gimple_stmt (dump_file, stmt, 0, 0);
3191         }
3192
3193       /* If changed is false, this is either opindex == 0
3194          or all outer rhs2's were equal to corresponding oe->op,
3195          and powi_result is NULL.
3196          That means lhs is equivalent before and after reassociation.
3197          Otherwise ensure the old lhs SSA_NAME is not reused and
3198          create a new stmt as well, so that any debug stmts will be
3199          properly adjusted.  */
3200       if (changed)
3201         {
3202           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3203           unsigned int uid = gimple_uid (stmt);
3204           gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3205
3206           lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3207           stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3208                                                lhs, new_rhs1, oe->op);
3209           gimple_set_uid (stmt, uid);
3210           gimple_set_visited (stmt, true);
3211           if (insert_point == gsi_stmt (gsi))
3212             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3213           else
3214             insert_stmt_after (stmt, insert_point);
3215         }
3216       else
3217         {
3218           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3219                                == stmt);
3220           gimple_assign_set_rhs1 (stmt, new_rhs1);
3221           gimple_assign_set_rhs2 (stmt, oe->op);
3222           update_stmt (stmt);
3223         }
3224
3225       if (dump_file && (dump_flags & TDF_DETAILS))
3226         {
3227           fprintf (dump_file, " into ");
3228           print_gimple_stmt (dump_file, stmt, 0, 0);
3229         }
3230     }
3231   return lhs;
3232 }
3233
3234 /* Find out how many cycles we need to compute statements chain.
3235    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3236    maximum number of independent statements we may execute per cycle.  */
3237
3238 static int
3239 get_required_cycles (int ops_num, int cpu_width)
3240 {
3241   int res;
3242   int elog;
3243   unsigned int rest;
3244
3245   /* While we have more than 2 * cpu_width operands
3246      we may reduce number of operands by cpu_width
3247      per cycle.  */
3248   res = ops_num / (2 * cpu_width);
3249
3250   /* Remained operands count may be reduced twice per cycle
3251      until we have only one operand.  */
3252   rest = (unsigned)(ops_num - res * cpu_width);
3253   elog = exact_log2 (rest);
3254   if (elog >= 0)
3255     res += elog;
3256   else
3257     res += floor_log2 (rest) + 1;
3258
3259   return res;
3260 }
3261
3262 /* Returns an optimal number of registers to use for computation of
3263    given statements.  */
3264
3265 static int
3266 get_reassociation_width (int ops_num, enum tree_code opc,
3267                          enum machine_mode mode)
3268 {
3269   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3270   int width;
3271   int width_min;
3272   int cycles_best;
3273
3274   if (param_width > 0)
3275     width = param_width;
3276   else
3277     width = targetm.sched.reassociation_width (opc, mode);
3278
3279   if (width == 1)
3280     return width;
3281
3282   /* Get the minimal time required for sequence computation.  */
3283   cycles_best = get_required_cycles (ops_num, width);
3284
3285   /* Check if we may use less width and still compute sequence for
3286      the same time.  It will allow us to reduce registers usage.
3287      get_required_cycles is monotonically increasing with lower width
3288      so we can perform a binary search for the minimal width that still
3289      results in the optimal cycle count.  */
3290   width_min = 1;
3291   while (width > width_min)
3292     {
3293       int width_mid = (width + width_min) / 2;
3294
3295       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3296         width = width_mid;
3297       else if (width_min < width_mid)
3298         width_min = width_mid;
3299       else
3300         break;
3301     }
3302
3303   return width;
3304 }
3305
3306 /* Recursively rewrite our linearized statements so that the operators
3307    match those in OPS[OPINDEX], putting the computation in rank
3308    order and trying to allow operations to be executed in
3309    parallel.  */
3310
3311 static void
3312 rewrite_expr_tree_parallel (gimple stmt, int width,
3313                             vec<operand_entry_t> ops)
3314 {
3315   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3316   int op_num = ops.length ();
3317   int stmt_num = op_num - 1;
3318   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3319   int op_index = op_num - 1;
3320   int stmt_index = 0;
3321   int ready_stmts_end = 0;
3322   int i = 0;
3323   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3324
3325   /* We start expression rewriting from the top statements.
3326      So, in this loop we create a full list of statements
3327      we will work with.  */
3328   stmts[stmt_num - 1] = stmt;
3329   for (i = stmt_num - 2; i >= 0; i--)
3330     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3331
3332   for (i = 0; i < stmt_num; i++)
3333     {
3334       tree op1, op2;
3335
3336       /* Determine whether we should use results of
3337          already handled statements or not.  */
3338       if (ready_stmts_end == 0
3339           && (i - stmt_index >= width || op_index < 1))
3340         ready_stmts_end = i;
3341
3342       /* Now we choose operands for the next statement.  Non zero
3343          value in ready_stmts_end means here that we should use
3344          the result of already generated statements as new operand.  */
3345       if (ready_stmts_end > 0)
3346         {
3347           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3348           if (ready_stmts_end > stmt_index)
3349             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3350           else if (op_index >= 0)
3351             op2 = ops[op_index--]->op;
3352           else
3353             {
3354               gcc_assert (stmt_index < i);
3355               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3356             }
3357
3358           if (stmt_index >= ready_stmts_end)
3359             ready_stmts_end = 0;
3360         }
3361       else
3362         {
3363           if (op_index > 1)
3364             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3365           op2 = ops[op_index--]->op;
3366           op1 = ops[op_index--]->op;
3367         }
3368
3369       /* If we emit the last statement then we should put
3370          operands into the last statement.  It will also
3371          break the loop.  */
3372       if (op_index < 0 && stmt_index == i)
3373         i = stmt_num - 1;
3374
3375       if (dump_file && (dump_flags & TDF_DETAILS))
3376         {
3377           fprintf (dump_file, "Transforming ");
3378           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3379         }
3380
3381       /* We keep original statement only for the last one.  All
3382          others are recreated.  */
3383       if (i == stmt_num - 1)
3384         {
3385           gimple_assign_set_rhs1 (stmts[i], op1);
3386           gimple_assign_set_rhs2 (stmts[i], op2);
3387           update_stmt (stmts[i]);
3388         }
3389       else
3390         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3391
3392       if (dump_file && (dump_flags & TDF_DETAILS))
3393         {
3394           fprintf (dump_file, " into ");
3395           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3396         }
3397     }
3398
3399   remove_visited_stmt_chain (last_rhs1);
3400 }
3401
3402 /* Transform STMT, which is really (A +B) + (C + D) into the left
3403    linear form, ((A+B)+C)+D.
3404    Recurse on D if necessary.  */
3405
3406 static void
3407 linearize_expr (gimple stmt)
3408 {
3409   gimple_stmt_iterator gsi;
3410   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3411   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3412   gimple oldbinrhs = binrhs;
3413   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3414   gimple newbinrhs = NULL;
3415   struct loop *loop = loop_containing_stmt (stmt);
3416   tree lhs = gimple_assign_lhs (stmt);
3417
3418   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3419               && is_reassociable_op (binrhs, rhscode, loop));
3420
3421   gsi = gsi_for_stmt (stmt);
3422
3423   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3424   binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3425                                          make_ssa_name (TREE_TYPE (lhs), NULL),
3426                                          gimple_assign_lhs (binlhs),
3427                                          gimple_assign_rhs2 (binrhs));
3428   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3429   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3430   gimple_set_uid (binrhs, gimple_uid (stmt));
3431
3432   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3433     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3434
3435   if (dump_file && (dump_flags & TDF_DETAILS))
3436     {
3437       fprintf (dump_file, "Linearized: ");
3438       print_gimple_stmt (dump_file, stmt, 0, 0);
3439     }
3440
3441   reassociate_stats.linearized++;
3442   update_stmt (stmt);
3443
3444   gsi = gsi_for_stmt (oldbinrhs);
3445   gsi_remove (&gsi, true);
3446   release_defs (oldbinrhs);
3447
3448   gimple_set_visited (stmt, true);
3449   gimple_set_visited (binlhs, true);
3450   gimple_set_visited (binrhs, true);
3451
3452   /* Tail recurse on the new rhs if it still needs reassociation.  */
3453   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3454     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3455            want to change the algorithm while converting to tuples.  */
3456     linearize_expr (stmt);
3457 }
3458
3459 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3460    it.  Otherwise, return NULL.  */
3461
3462 static gimple
3463 get_single_immediate_use (tree lhs)
3464 {
3465   use_operand_p immuse;
3466   gimple immusestmt;
3467
3468   if (TREE_CODE (lhs) == SSA_NAME
3469       && single_imm_use (lhs, &immuse, &immusestmt)
3470       && is_gimple_assign (immusestmt))
3471     return immusestmt;
3472
3473   return NULL;
3474 }
3475
3476 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3477    representing the negated value.  Insertions of any necessary
3478    instructions go before GSI.
3479    This function is recursive in that, if you hand it "a_5" as the
3480    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3481    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3482
3483 static tree
3484 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3485 {
3486   gimple negatedefstmt = NULL;
3487   tree resultofnegate;
3488   gimple_stmt_iterator gsi;
3489   unsigned int uid;
3490
3491   /* If we are trying to negate a name, defined by an add, negate the
3492      add operands instead.  */
3493   if (TREE_CODE (tonegate) == SSA_NAME)
3494     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3495   if (TREE_CODE (tonegate) == SSA_NAME
3496       && is_gimple_assign (negatedefstmt)
3497       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3498       && has_single_use (gimple_assign_lhs (negatedefstmt))
3499       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3500     {
3501       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3502       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3503       tree lhs = gimple_assign_lhs (negatedefstmt);
3504       gimple g;
3505
3506       gsi = gsi_for_stmt (negatedefstmt);
3507       rhs1 = negate_value (rhs1, &gsi);
3508
3509       gsi = gsi_for_stmt (negatedefstmt);
3510       rhs2 = negate_value (rhs2, &gsi);
3511
3512       gsi = gsi_for_stmt (negatedefstmt);
3513       lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3514       gimple_set_visited (negatedefstmt, true);
3515       g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3516       gimple_set_uid (g, gimple_uid (negatedefstmt));
3517       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3518       return lhs;
3519     }
3520
3521   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3522   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3523                                              NULL_TREE, true, GSI_SAME_STMT);
3524   gsi = *gsip;
3525   uid = gimple_uid (gsi_stmt (gsi));
3526   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3527     {
3528       gimple stmt = gsi_stmt (gsi);
3529       if (gimple_uid (stmt) != 0)
3530         break;
3531       gimple_set_uid (stmt, uid);
3532     }
3533   return resultofnegate;
3534 }
3535
3536 /* Return true if we should break up the subtract in STMT into an add
3537    with negate.  This is true when we the subtract operands are really
3538    adds, or the subtract itself is used in an add expression.  In
3539    either case, breaking up the subtract into an add with negate
3540    exposes the adds to reassociation.  */
3541
3542 static bool
3543 should_break_up_subtract (gimple stmt)
3544 {
3545   tree lhs = gimple_assign_lhs (stmt);
3546   tree binlhs = gimple_assign_rhs1 (stmt);
3547   tree binrhs = gimple_assign_rhs2 (stmt);
3548   gimple immusestmt;
3549   struct loop *loop = loop_containing_stmt (stmt);
3550
3551   if (TREE_CODE (binlhs) == SSA_NAME
3552       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3553     return true;
3554
3555   if (TREE_CODE (binrhs) == SSA_NAME
3556       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3557     return true;
3558
3559   if (TREE_CODE (lhs) == SSA_NAME
3560       && (immusestmt = get_single_immediate_use (lhs))
3561       && is_gimple_assign (immusestmt)
3562       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3563           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3564     return true;
3565   return false;
3566 }
3567
3568 /* Transform STMT from A - B into A + -B.  */
3569
3570 static void
3571 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3572 {
3573   tree rhs1 = gimple_assign_rhs1 (stmt);
3574   tree rhs2 = gimple_assign_rhs2 (stmt);
3575
3576   if (dump_file && (dump_flags & TDF_DETAILS))
3577     {
3578       fprintf (dump_file, "Breaking up subtract ");
3579       print_gimple_stmt (dump_file, stmt, 0, 0);
3580     }
3581
3582   rhs2 = negate_value (rhs2, gsip);
3583   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3584   update_stmt (stmt);
3585 }
3586
3587 /* Determine whether STMT is a builtin call that raises an SSA name
3588    to an integer power and has only one use.  If so, and this is early
3589    reassociation and unsafe math optimizations are permitted, place
3590    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3591    If any of these conditions does not hold, return FALSE.  */
3592
3593 static bool
3594 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3595 {
3596   tree fndecl, arg1;
3597   REAL_VALUE_TYPE c, cint;
3598
3599   if (!first_pass_instance
3600       || !flag_unsafe_math_optimizations
3601       || !is_gimple_call (stmt)
3602       || !has_single_use (gimple_call_lhs (stmt)))
3603     return false;
3604
3605   fndecl = gimple_call_fndecl (stmt);
3606
3607   if (!fndecl
3608       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3609     return false;
3610
3611   switch (DECL_FUNCTION_CODE (fndecl))
3612     {
3613     CASE_FLT_FN (BUILT_IN_POW):
3614       *base = gimple_call_arg (stmt, 0);
3615       arg1 = gimple_call_arg (stmt, 1);
3616
3617       if (TREE_CODE (arg1) != REAL_CST)
3618         return false;
3619
3620       c = TREE_REAL_CST (arg1);
3621
3622       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3623         return false;
3624
3625       *exponent = real_to_integer (&c);
3626       real_from_integer (&cint, VOIDmode, *exponent,
3627                          *exponent < 0 ? -1 : 0, 0);
3628       if (!real_identical (&c, &cint))
3629         return false;
3630
3631       break;
3632
3633     CASE_FLT_FN (BUILT_IN_POWI):
3634       *base = gimple_call_arg (stmt, 0);
3635       arg1 = gimple_call_arg (stmt, 1);
3636
3637       if (!host_integerp (arg1, 0))
3638         return false;
3639
3640       *exponent = TREE_INT_CST_LOW (arg1);
3641       break;
3642
3643     default:
3644       return false;
3645     }
3646
3647   /* Expanding negative exponents is generally unproductive, so we don't
3648      complicate matters with those.  Exponents of zero and one should
3649      have been handled by expression folding.  */
3650   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3651     return false;
3652
3653   return true;
3654 }
3655
3656 /* Recursively linearize a binary expression that is the RHS of STMT.
3657    Place the operands of the expression tree in the vector named OPS.  */
3658
3659 static void
3660 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3661                      bool is_associative, bool set_visited)
3662 {
3663   tree binlhs = gimple_assign_rhs1 (stmt);
3664   tree binrhs = gimple_assign_rhs2 (stmt);
3665   gimple binlhsdef = NULL, binrhsdef = NULL;
3666   bool binlhsisreassoc = false;
3667   bool binrhsisreassoc = false;
3668   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3669   struct loop *loop = loop_containing_stmt (stmt);
3670   tree base = NULL_TREE;
3671   HOST_WIDE_INT exponent = 0;
3672
3673   if (set_visited)
3674     gimple_set_visited (stmt, true);
3675
3676   if (TREE_CODE (binlhs) == SSA_NAME)
3677     {
3678       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3679       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3680                          && !stmt_could_throw_p (binlhsdef));
3681     }
3682
3683   if (TREE_CODE (binrhs) == SSA_NAME)
3684     {
3685       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3686       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3687                          && !stmt_could_throw_p (binrhsdef));
3688     }
3689
3690   /* If the LHS is not reassociable, but the RHS is, we need to swap
3691      them.  If neither is reassociable, there is nothing we can do, so
3692      just put them in the ops vector.  If the LHS is reassociable,
3693      linearize it.  If both are reassociable, then linearize the RHS
3694      and the LHS.  */
3695
3696   if (!binlhsisreassoc)
3697     {
3698       tree temp;
3699
3700       /* If this is not a associative operation like division, give up.  */
3701       if (!is_associative)
3702         {
3703           add_to_ops_vec (ops, binrhs);
3704           return;
3705         }
3706
3707       if (!binrhsisreassoc)
3708         {
3709           if (rhscode == MULT_EXPR
3710               && TREE_CODE (binrhs) == SSA_NAME
3711               && acceptable_pow_call (binrhsdef, &base, &exponent))
3712             {
3713               add_repeat_to_ops_vec (ops, base, exponent);
3714               gimple_set_visited (binrhsdef, true);
3715             }
3716           else
3717             add_to_ops_vec (ops, binrhs);
3718
3719           if (rhscode == MULT_EXPR
3720               && TREE_CODE (binlhs) == SSA_NAME
3721               && acceptable_pow_call (binlhsdef, &base, &exponent))
3722             {
3723               add_repeat_to_ops_vec (ops, base, exponent);
3724               gimple_set_visited (binlhsdef, true);
3725             }
3726           else
3727             add_to_ops_vec (ops, binlhs);
3728
3729           return;
3730         }
3731
3732       if (dump_file && (dump_flags & TDF_DETAILS))
3733         {
3734           fprintf (dump_file, "swapping operands of ");
3735           print_gimple_stmt (dump_file, stmt, 0, 0);
3736         }
3737
3738       swap_ssa_operands (stmt,
3739                          gimple_assign_rhs1_ptr (stmt),
3740                          gimple_assign_rhs2_ptr (stmt));
3741       update_stmt (stmt);
3742
3743       if (dump_file && (dump_flags & TDF_DETAILS))
3744         {
3745           fprintf (dump_file, " is now ");
3746           print_gimple_stmt (dump_file, stmt, 0, 0);
3747         }
3748
3749       /* We want to make it so the lhs is always the reassociative op,
3750          so swap.  */
3751       temp = binlhs;
3752       binlhs = binrhs;
3753       binrhs = temp;
3754     }
3755   else if (binrhsisreassoc)
3756     {
3757       linearize_expr (stmt);
3758       binlhs = gimple_assign_rhs1 (stmt);
3759       binrhs = gimple_assign_rhs2 (stmt);
3760     }
3761
3762   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3763               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3764                                       rhscode, loop));
3765   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3766                        is_associative, set_visited);
3767
3768   if (rhscode == MULT_EXPR
3769       && TREE_CODE (binrhs) == SSA_NAME
3770       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3771     {
3772       add_repeat_to_ops_vec (ops, base, exponent);
3773       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3774     }
3775   else
3776     add_to_ops_vec (ops, binrhs);
3777 }
3778
3779 /* Repropagate the negates back into subtracts, since no other pass
3780    currently does it.  */
3781
3782 static void
3783 repropagate_negates (void)
3784 {
3785   unsigned int i = 0;
3786   tree negate;
3787
3788   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3789     {
3790       gimple user = get_single_immediate_use (negate);
3791
3792       if (!user || !is_gimple_assign (user))
3793         continue;
3794
3795       /* The negate operand can be either operand of a PLUS_EXPR
3796          (it can be the LHS if the RHS is a constant for example).
3797
3798          Force the negate operand to the RHS of the PLUS_EXPR, then
3799          transform the PLUS_EXPR into a MINUS_EXPR.  */
3800       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3801         {
3802           /* If the negated operand appears on the LHS of the
3803              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3804              to force the negated operand to the RHS of the PLUS_EXPR.  */
3805           if (gimple_assign_rhs1 (user) == negate)
3806             {
3807               swap_ssa_operands (user,
3808                                  gimple_assign_rhs1_ptr (user),
3809                                  gimple_assign_rhs2_ptr (user));
3810             }
3811
3812           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3813              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3814           if (gimple_assign_rhs2 (user) == negate)
3815             {
3816               tree rhs1 = gimple_assign_rhs1 (user);
3817               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3818               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3819               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3820               update_stmt (user);
3821             }
3822         }
3823       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3824         {
3825           if (gimple_assign_rhs1 (user) == negate)
3826             {
3827               /* We have
3828                    x = -a
3829                    y = x - b
3830                  which we transform into
3831                    x = a + b
3832                    y = -x .
3833                  This pushes down the negate which we possibly can merge
3834                  into some other operation, hence insert it into the
3835                  plus_negates vector.  */
3836               gimple feed = SSA_NAME_DEF_STMT (negate);
3837               tree a = gimple_assign_rhs1 (feed);
3838               tree b = gimple_assign_rhs2 (user);
3839               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3840               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3841               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3842               gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3843               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3844               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3845               user = gsi_stmt (gsi2);
3846               update_stmt (user);
3847               gsi_remove (&gsi, true);
3848               release_defs (feed);
3849               plus_negates.safe_push (gimple_assign_lhs (user));
3850             }
3851           else
3852             {
3853               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3854                  rid of one operation.  */
3855               gimple feed = SSA_NAME_DEF_STMT (negate);
3856               tree a = gimple_assign_rhs1 (feed);
3857               tree rhs1 = gimple_assign_rhs1 (user);
3858               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3859               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3860               update_stmt (gsi_stmt (gsi));
3861             }
3862         }
3863     }
3864 }
3865
3866 /* Returns true if OP is of a type for which we can do reassociation.
3867    That is for integral or non-saturating fixed-point types, and for
3868    floating point type when associative-math is enabled.  */
3869
3870 static bool
3871 can_reassociate_p (tree op)
3872 {
3873   tree type = TREE_TYPE (op);
3874   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3875       || NON_SAT_FIXED_POINT_TYPE_P (type)
3876       || (flag_associative_math && FLOAT_TYPE_P (type)))
3877     return true;
3878   return false;
3879 }
3880
3881 /* Break up subtract operations in block BB.
3882
3883    We do this top down because we don't know whether the subtract is
3884    part of a possible chain of reassociation except at the top.
3885
3886    IE given
3887    d = f + g
3888    c = a + e
3889    b = c - d
3890    q = b - r
3891    k = t - q
3892
3893    we want to break up k = t - q, but we won't until we've transformed q
3894    = b - r, which won't be broken up until we transform b = c - d.
3895
3896    En passant, clear the GIMPLE visited flag on every statement
3897    and set UIDs within each basic block.  */
3898
3899 static void
3900 break_up_subtract_bb (basic_block bb)
3901 {
3902   gimple_stmt_iterator gsi;
3903   basic_block son;
3904   unsigned int uid = 1;
3905
3906   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3907     {
3908       gimple stmt = gsi_stmt (gsi);
3909       gimple_set_visited (stmt, false);
3910       gimple_set_uid (stmt, uid++);
3911
3912       if (!is_gimple_assign (stmt)
3913           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3914         continue;
3915
3916       /* Look for simple gimple subtract operations.  */
3917       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3918         {
3919           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3920               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3921             continue;
3922
3923           /* Check for a subtract used only in an addition.  If this
3924              is the case, transform it into add of a negate for better
3925              reassociation.  IE transform C = A-B into C = A + -B if C
3926              is only used in an addition.  */
3927           if (should_break_up_subtract (stmt))
3928             break_up_subtract (stmt, &gsi);
3929         }
3930       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3931                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3932         plus_negates.safe_push (gimple_assign_lhs (stmt));
3933     }
3934   for (son = first_dom_son (CDI_DOMINATORS, bb);
3935        son;
3936        son = next_dom_son (CDI_DOMINATORS, son))
3937     break_up_subtract_bb (son);
3938 }
3939
3940 /* Used for repeated factor analysis.  */
3941 struct repeat_factor_d
3942 {
3943   /* An SSA name that occurs in a multiply chain.  */
3944   tree factor;
3945
3946   /* Cached rank of the factor.  */
3947   unsigned rank;
3948
3949   /* Number of occurrences of the factor in the chain.  */
3950   HOST_WIDE_INT count;
3951
3952   /* An SSA name representing the product of this factor and
3953      all factors appearing later in the repeated factor vector.  */
3954   tree repr;
3955 };
3956
3957 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3958 typedef const struct repeat_factor_d *const_repeat_factor_t;
3959
3960
3961 static vec<repeat_factor> repeat_factor_vec;
3962
3963 /* Used for sorting the repeat factor vector.  Sort primarily by
3964    ascending occurrence count, secondarily by descending rank.  */
3965
3966 static int
3967 compare_repeat_factors (const void *x1, const void *x2)
3968 {
3969   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3970   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3971
3972   if (rf1->count != rf2->count)
3973     return rf1->count - rf2->count;
3974
3975   return rf2->rank - rf1->rank;
3976 }
3977
3978 /* Look for repeated operands in OPS in the multiply tree rooted at
3979    STMT.  Replace them with an optimal sequence of multiplies and powi
3980    builtin calls, and remove the used operands from OPS.  Return an
3981    SSA name representing the value of the replacement sequence.  */
3982
3983 static tree
3984 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3985 {
3986   unsigned i, j, vec_len;
3987   int ii;
3988   operand_entry_t oe;
3989   repeat_factor_t rf1, rf2;
3990   repeat_factor rfnew;
3991   tree result = NULL_TREE;
3992   tree target_ssa, iter_result;
3993   tree type = TREE_TYPE (gimple_get_lhs (stmt));
3994   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3995   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3996   gimple mul_stmt, pow_stmt;
3997
3998   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3999      target.  */
4000   if (!powi_fndecl)
4001     return NULL_TREE;
4002
4003   /* Allocate the repeated factor vector.  */
4004   repeat_factor_vec.create (10);
4005
4006   /* Scan the OPS vector for all SSA names in the product and build
4007      up a vector of occurrence counts for each factor.  */
4008   FOR_EACH_VEC_ELT (*ops, i, oe)
4009     {
4010       if (TREE_CODE (oe->op) == SSA_NAME)
4011         {
4012           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4013             {
4014               if (rf1->factor == oe->op)
4015                 {
4016                   rf1->count += oe->count;
4017                   break;
4018                 }
4019             }
4020
4021           if (j >= repeat_factor_vec.length ())
4022             {
4023               rfnew.factor = oe->op;
4024               rfnew.rank = oe->rank;
4025               rfnew.count = oe->count;
4026               rfnew.repr = NULL_TREE;
4027               repeat_factor_vec.safe_push (rfnew);
4028             }
4029         }
4030     }
4031
4032   /* Sort the repeated factor vector by (a) increasing occurrence count,
4033      and (b) decreasing rank.  */
4034   repeat_factor_vec.qsort (compare_repeat_factors);
4035
4036   /* It is generally best to combine as many base factors as possible
4037      into a product before applying __builtin_powi to the result.
4038      However, the sort order chosen for the repeated factor vector
4039      allows us to cache partial results for the product of the base
4040      factors for subsequent use.  When we already have a cached partial
4041      result from a previous iteration, it is best to make use of it
4042      before looking for another __builtin_pow opportunity.
4043
4044      As an example, consider x * x * y * y * y * z * z * z * z.
4045      We want to first compose the product x * y * z, raise it to the
4046      second power, then multiply this by y * z, and finally multiply
4047      by z.  This can be done in 5 multiplies provided we cache y * z
4048      for use in both expressions:
4049
4050         t1 = y * z
4051         t2 = t1 * x
4052         t3 = t2 * t2
4053         t4 = t1 * t3
4054         result = t4 * z
4055
4056      If we instead ignored the cached y * z and first multiplied by
4057      the __builtin_pow opportunity z * z, we would get the inferior:
4058
4059         t1 = y * z
4060         t2 = t1 * x
4061         t3 = t2 * t2
4062         t4 = z * z
4063         t5 = t3 * t4
4064         result = t5 * y  */
4065
4066   vec_len = repeat_factor_vec.length ();
4067
4068   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4069   while (true)
4070     {
4071       HOST_WIDE_INT power;
4072
4073       /* First look for the largest cached product of factors from
4074          preceding iterations.  If found, create a builtin_powi for
4075          it if the minimum occurrence count for its factors is at
4076          least 2, or just use this cached product as our next
4077          multiplicand if the minimum occurrence count is 1.  */
4078       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4079         {
4080           if (rf1->repr && rf1->count > 0)
4081             break;
4082         }
4083
4084       if (j < vec_len)
4085         {
4086           power = rf1->count;
4087
4088           if (power == 1)
4089             {
4090               iter_result = rf1->repr;
4091
4092               if (dump_file && (dump_flags & TDF_DETAILS))
4093                 {
4094                   unsigned elt;
4095                   repeat_factor_t rf;
4096                   fputs ("Multiplying by cached product ", dump_file);
4097                   for (elt = j; elt < vec_len; elt++)
4098                     {
4099                       rf = &repeat_factor_vec[elt];
4100                       print_generic_expr (dump_file, rf->factor, 0);
4101                       if (elt < vec_len - 1)
4102                         fputs (" * ", dump_file);
4103                     }
4104                   fputs ("\n", dump_file);
4105                 }
4106             }
4107           else
4108             {
4109               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4110               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4111                                             build_int_cst (integer_type_node,
4112                                                            power));
4113               gimple_call_set_lhs (pow_stmt, iter_result);
4114               gimple_set_location (pow_stmt, gimple_location (stmt));
4115               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4116
4117               if (dump_file && (dump_flags & TDF_DETAILS))
4118                 {
4119                   unsigned elt;
4120                   repeat_factor_t rf;
4121                   fputs ("Building __builtin_pow call for cached product (",
4122                          dump_file);
4123                   for (elt = j; elt < vec_len; elt++)
4124                     {
4125                       rf = &repeat_factor_vec[elt];
4126                       print_generic_expr (dump_file, rf->factor, 0);
4127                       if (elt < vec_len - 1)
4128                         fputs (" * ", dump_file);
4129                     }
4130                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4131                            power);
4132                 }
4133             }
4134         }
4135       else
4136         {
4137           /* Otherwise, find the first factor in the repeated factor
4138              vector whose occurrence count is at least 2.  If no such
4139              factor exists, there are no builtin_powi opportunities
4140              remaining.  */
4141           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4142             {
4143               if (rf1->count >= 2)
4144                 break;
4145             }
4146
4147           if (j >= vec_len)
4148             break;
4149
4150           power = rf1->count;
4151
4152           if (dump_file && (dump_flags & TDF_DETAILS))
4153             {
4154               unsigned elt;
4155               repeat_factor_t rf;
4156               fputs ("Building __builtin_pow call for (", dump_file);
4157               for (elt = j; elt < vec_len; elt++)
4158                 {
4159                   rf = &repeat_factor_vec[elt];
4160                   print_generic_expr (dump_file, rf->factor, 0);
4161                   if (elt < vec_len - 1)
4162                     fputs (" * ", dump_file);
4163                 }
4164               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4165             }
4166
4167           reassociate_stats.pows_created++;
4168
4169           /* Visit each element of the vector in reverse order (so that
4170              high-occurrence elements are visited first, and within the
4171              same occurrence count, lower-ranked elements are visited
4172              first).  Form a linear product of all elements in this order
4173              whose occurrencce count is at least that of element J.
4174              Record the SSA name representing the product of each element
4175              with all subsequent elements in the vector.  */
4176           if (j == vec_len - 1)
4177             rf1->repr = rf1->factor;
4178           else
4179             {
4180               for (ii = vec_len - 2; ii >= (int)j; ii--)
4181                 {
4182                   tree op1, op2;
4183
4184                   rf1 = &repeat_factor_vec[ii];
4185                   rf2 = &repeat_factor_vec[ii + 1];
4186
4187                   /* Init the last factor's representative to be itself.  */
4188                   if (!rf2->repr)
4189                     rf2->repr = rf2->factor;
4190
4191                   op1 = rf1->factor;
4192                   op2 = rf2->repr;
4193
4194                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4195                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4196                                                            target_ssa,
4197                                                            op1, op2);
4198                   gimple_set_location (mul_stmt, gimple_location (stmt));
4199                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4200                   rf1->repr = target_ssa;
4201
4202                   /* Don't reprocess the multiply we just introduced.  */
4203                   gimple_set_visited (mul_stmt, true);
4204                 }
4205             }
4206
4207           /* Form a call to __builtin_powi for the maximum product
4208              just formed, raised to the power obtained earlier.  */
4209           rf1 = &repeat_factor_vec[j];
4210           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4211           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4212                                         build_int_cst (integer_type_node,
4213                                                        power));
4214           gimple_call_set_lhs (pow_stmt, iter_result);
4215           gimple_set_location (pow_stmt, gimple_location (stmt));
4216           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4217         }
4218
4219       /* If we previously formed at least one other builtin_powi call,
4220          form the product of this one and those others.  */
4221       if (result)
4222         {
4223           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4224           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4225                                                    result, iter_result);
4226           gimple_set_location (mul_stmt, gimple_location (stmt));
4227           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4228           gimple_set_visited (mul_stmt, true);
4229           result = new_result;
4230         }
4231       else
4232         result = iter_result;
4233
4234       /* Decrement the occurrence count of each element in the product
4235          by the count found above, and remove this many copies of each
4236          factor from OPS.  */
4237       for (i = j; i < vec_len; i++)
4238         {
4239           unsigned k = power;
4240           unsigned n;
4241
4242           rf1 = &repeat_factor_vec[i];
4243           rf1->count -= power;
4244
4245           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4246             {
4247               if (oe->op == rf1->factor)
4248                 {
4249                   if (oe->count <= k)
4250                     {
4251                       ops->ordered_remove (n);
4252                       k -= oe->count;
4253
4254                       if (k == 0)
4255                         break;
4256                     }
4257                   else
4258                     {
4259                       oe->count -= k;
4260                       break;
4261                     }
4262                 }
4263             }
4264         }
4265     }
4266
4267   /* At this point all elements in the repeated factor vector have a
4268      remaining occurrence count of 0 or 1, and those with a count of 1
4269      don't have cached representatives.  Re-sort the ops vector and
4270      clean up.  */
4271   ops->qsort (sort_by_operand_rank);
4272   repeat_factor_vec.release ();
4273
4274   /* Return the final product computed herein.  Note that there may
4275      still be some elements with single occurrence count left in OPS;
4276      those will be handled by the normal reassociation logic.  */
4277   return result;
4278 }
4279
4280 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4281
4282 static void
4283 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4284 {
4285   tree rhs1;
4286
4287   if (dump_file && (dump_flags & TDF_DETAILS))
4288     {
4289       fprintf (dump_file, "Transforming ");
4290       print_gimple_stmt (dump_file, stmt, 0, 0);
4291     }
4292
4293   rhs1 = gimple_assign_rhs1 (stmt);
4294   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4295   update_stmt (stmt);
4296   remove_visited_stmt_chain (rhs1);
4297
4298   if (dump_file && (dump_flags & TDF_DETAILS))
4299     {
4300       fprintf (dump_file, " into ");
4301       print_gimple_stmt (dump_file, stmt, 0, 0);
4302     }
4303 }
4304
4305 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4306
4307 static void
4308 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4309                             tree rhs1, tree rhs2)
4310 {
4311   if (dump_file && (dump_flags & TDF_DETAILS))
4312     {
4313       fprintf (dump_file, "Transforming ");
4314       print_gimple_stmt (dump_file, stmt, 0, 0);
4315     }
4316
4317   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4318   update_stmt (gsi_stmt (*gsi));
4319   remove_visited_stmt_chain (rhs1);
4320
4321   if (dump_file && (dump_flags & TDF_DETAILS))
4322     {
4323       fprintf (dump_file, " into ");
4324       print_gimple_stmt (dump_file, stmt, 0, 0);
4325     }
4326 }
4327
4328 /* Reassociate expressions in basic block BB and its post-dominator as
4329    children.  */
4330
4331 static void
4332 reassociate_bb (basic_block bb)
4333 {
4334   gimple_stmt_iterator gsi;
4335   basic_block son;
4336   gimple stmt = last_stmt (bb);
4337
4338   if (stmt && !gimple_visited_p (stmt))
4339     maybe_optimize_range_tests (stmt);
4340
4341   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4342     {
4343       stmt = gsi_stmt (gsi);
4344
4345       if (is_gimple_assign (stmt)
4346           && !stmt_could_throw_p (stmt))
4347         {
4348           tree lhs, rhs1, rhs2;
4349           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4350
4351           /* If this is not a gimple binary expression, there is
4352              nothing for us to do with it.  */
4353           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4354             continue;
4355
4356           /* If this was part of an already processed statement,
4357              we don't need to touch it again. */
4358           if (gimple_visited_p (stmt))
4359             {
4360               /* This statement might have become dead because of previous
4361                  reassociations.  */
4362               if (has_zero_uses (gimple_get_lhs (stmt)))
4363                 {
4364                   gsi_remove (&gsi, true);
4365                   release_defs (stmt);
4366                   /* We might end up removing the last stmt above which
4367                      places the iterator to the end of the sequence.
4368                      Reset it to the last stmt in this case which might
4369                      be the end of the sequence as well if we removed
4370                      the last statement of the sequence.  In which case
4371                      we need to bail out.  */
4372                   if (gsi_end_p (gsi))
4373                     {
4374                       gsi = gsi_last_bb (bb);
4375                       if (gsi_end_p (gsi))
4376                         break;
4377                     }
4378                 }
4379               continue;
4380             }
4381
4382           lhs = gimple_assign_lhs (stmt);
4383           rhs1 = gimple_assign_rhs1 (stmt);
4384           rhs2 = gimple_assign_rhs2 (stmt);
4385
4386           /* For non-bit or min/max operations we can't associate
4387              all types.  Verify that here.  */
4388           if (rhs_code != BIT_IOR_EXPR
4389               && rhs_code != BIT_AND_EXPR
4390               && rhs_code != BIT_XOR_EXPR
4391               && rhs_code != MIN_EXPR
4392               && rhs_code != MAX_EXPR
4393               && (!can_reassociate_p (lhs)
4394                   || !can_reassociate_p (rhs1)
4395                   || !can_reassociate_p (rhs2)))
4396             continue;
4397
4398           if (associative_tree_code (rhs_code))
4399             {
4400               vec<operand_entry_t> ops = vNULL;
4401               tree powi_result = NULL_TREE;
4402
4403               /* There may be no immediate uses left by the time we
4404                  get here because we may have eliminated them all.  */
4405               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4406                 continue;
4407
4408               gimple_set_visited (stmt, true);
4409               linearize_expr_tree (&ops, stmt, true, true);
4410               ops.qsort (sort_by_operand_rank);
4411               optimize_ops_list (rhs_code, &ops);
4412               if (undistribute_ops_list (rhs_code, &ops,
4413                                          loop_containing_stmt (stmt)))
4414                 {
4415                   ops.qsort (sort_by_operand_rank);
4416                   optimize_ops_list (rhs_code, &ops);
4417                 }
4418
4419               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4420                 optimize_range_tests (rhs_code, &ops);
4421
4422               if (first_pass_instance
4423                   && rhs_code == MULT_EXPR
4424                   && flag_unsafe_math_optimizations)
4425                 powi_result = attempt_builtin_powi (stmt, &ops);
4426
4427               /* If the operand vector is now empty, all operands were
4428                  consumed by the __builtin_powi optimization.  */
4429               if (ops.length () == 0)
4430                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4431               else if (ops.length () == 1)
4432                 {
4433                   tree last_op = ops.last ()->op;
4434
4435                   if (powi_result)
4436                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4437                                                 powi_result);
4438                   else
4439                     transform_stmt_to_copy (&gsi, stmt, last_op);
4440                 }
4441               else
4442                 {
4443                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4444                   int ops_num = ops.length ();
4445                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4446                   tree new_lhs = lhs;
4447
4448                   if (dump_file && (dump_flags & TDF_DETAILS))
4449                     fprintf (dump_file,
4450                              "Width = %d was chosen for reassociation\n", width);
4451
4452                   if (width > 1
4453                       && ops.length () > 3)
4454                     rewrite_expr_tree_parallel (stmt, width, ops);
4455                   else
4456                     {
4457                       /* When there are three operands left, we want
4458                          to make sure the ones that get the double
4459                          binary op are chosen wisely.  */
4460                       int len = ops.length ();
4461                       if (len >= 3)
4462                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
4463
4464                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
4465                                                    powi_result != NULL);
4466                     }
4467
4468                   /* If we combined some repeated factors into a
4469                      __builtin_powi call, multiply that result by the
4470                      reassociated operands.  */
4471                   if (powi_result)
4472                     {
4473                       gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4474                       tree type = TREE_TYPE (lhs);
4475                       tree target_ssa = make_temp_ssa_name (type, NULL,
4476                                                             "reassocpow");
4477                       gimple_set_lhs (lhs_stmt, target_ssa);
4478                       update_stmt (lhs_stmt);
4479                       if (lhs != new_lhs)
4480                         target_ssa = new_lhs;
4481                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4482                                                                powi_result,
4483                                                                target_ssa);
4484                       gimple_set_location (mul_stmt, gimple_location (stmt));
4485                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4486                     }
4487                 }
4488
4489               ops.release ();
4490             }
4491         }
4492     }
4493   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4494        son;
4495        son = next_dom_son (CDI_POST_DOMINATORS, son))
4496     reassociate_bb (son);
4497 }
4498
4499 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4500 void debug_ops_vector (vec<operand_entry_t> ops);
4501
4502 /* Dump the operand entry vector OPS to FILE.  */
4503
4504 void
4505 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4506 {
4507   operand_entry_t oe;
4508   unsigned int i;
4509
4510   FOR_EACH_VEC_ELT (ops, i, oe)
4511     {
4512       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4513       print_generic_expr (file, oe->op, 0);
4514     }
4515 }
4516
4517 /* Dump the operand entry vector OPS to STDERR.  */
4518
4519 DEBUG_FUNCTION void
4520 debug_ops_vector (vec<operand_entry_t> ops)
4521 {
4522   dump_ops_vector (stderr, ops);
4523 }
4524
4525 static void
4526 do_reassoc (void)
4527 {
4528   break_up_subtract_bb (ENTRY_BLOCK_PTR);
4529   reassociate_bb (EXIT_BLOCK_PTR);
4530 }
4531
4532 /* Initialize the reassociation pass.  */
4533
4534 static void
4535 init_reassoc (void)
4536 {
4537   int i;
4538   long rank = 2;
4539   int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4540
4541   /* Find the loops, so that we can prevent moving calculations in
4542      them.  */
4543   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4544
4545   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4546
4547   operand_entry_pool = create_alloc_pool ("operand entry pool",
4548                                           sizeof (struct operand_entry), 30);
4549   next_operand_entry_id = 0;
4550
4551   /* Reverse RPO (Reverse Post Order) will give us something where
4552      deeper loops come later.  */
4553   pre_and_rev_post_order_compute (NULL, bbs, false);
4554   bb_rank = XCNEWVEC (long, last_basic_block);
4555   operand_rank = pointer_map_create ();
4556
4557   /* Give each default definition a distinct rank.  This includes
4558      parameters and the static chain.  Walk backwards over all
4559      SSA names so that we get proper rank ordering according
4560      to tree_swap_operands_p.  */
4561   for (i = num_ssa_names - 1; i > 0; --i)
4562     {
4563       tree name = ssa_name (i);
4564       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4565         insert_operand_rank (name, ++rank);
4566     }
4567
4568   /* Set up rank for each BB  */
4569   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4570     bb_rank[bbs[i]] = ++rank  << 16;
4571
4572   free (bbs);
4573   calculate_dominance_info (CDI_POST_DOMINATORS);
4574   plus_negates = vNULL;
4575 }
4576
4577 /* Cleanup after the reassociation pass, and print stats if
4578    requested.  */
4579
4580 static void
4581 fini_reassoc (void)
4582 {
4583   statistics_counter_event (cfun, "Linearized",
4584                             reassociate_stats.linearized);
4585   statistics_counter_event (cfun, "Constants eliminated",
4586                             reassociate_stats.constants_eliminated);
4587   statistics_counter_event (cfun, "Ops eliminated",
4588                             reassociate_stats.ops_eliminated);
4589   statistics_counter_event (cfun, "Statements rewritten",
4590                             reassociate_stats.rewritten);
4591   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4592                             reassociate_stats.pows_encountered);
4593   statistics_counter_event (cfun, "Built-in powi calls created",
4594                             reassociate_stats.pows_created);
4595
4596   pointer_map_destroy (operand_rank);
4597   free_alloc_pool (operand_entry_pool);
4598   free (bb_rank);
4599   plus_negates.release ();
4600   free_dominance_info (CDI_POST_DOMINATORS);
4601   loop_optimizer_finalize ();
4602 }
4603
4604 /* Gate and execute functions for Reassociation.  */
4605
4606 static unsigned int
4607 execute_reassoc (void)
4608 {
4609   init_reassoc ();
4610
4611   do_reassoc ();
4612   repropagate_negates ();
4613
4614   fini_reassoc ();
4615   return 0;
4616 }
4617
4618 static bool
4619 gate_tree_ssa_reassoc (void)
4620 {
4621   return flag_tree_reassoc != 0;
4622 }
4623
4624 namespace {
4625
4626 const pass_data pass_data_reassoc =
4627 {
4628   GIMPLE_PASS, /* type */
4629   "reassoc", /* name */
4630   OPTGROUP_NONE, /* optinfo_flags */
4631   true, /* has_gate */
4632   true, /* has_execute */
4633   TV_TREE_REASSOC, /* tv_id */
4634   ( PROP_cfg | PROP_ssa ), /* properties_required */
4635   0, /* properties_provided */
4636   0, /* properties_destroyed */
4637   0, /* todo_flags_start */
4638   ( TODO_verify_ssa
4639     | TODO_update_ssa_only_virtuals
4640     | TODO_verify_flow ), /* todo_flags_finish */
4641 };
4642
4643 class pass_reassoc : public gimple_opt_pass
4644 {
4645 public:
4646   pass_reassoc (gcc::context *ctxt)
4647     : gimple_opt_pass (pass_data_reassoc, ctxt)
4648   {}
4649
4650   /* opt_pass methods: */
4651   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4652   bool gate () { return gate_tree_ssa_reassoc (); }
4653   unsigned int execute () { return execute_reassoc (); }
4654
4655 }; // class pass_reassoc
4656
4657 } // anon namespace
4658
4659 gimple_opt_pass *
4660 make_pass_reassoc (gcc::context *ctxt)
4661 {
4662   return new pass_reassoc (ctxt);
4663 }