gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2019 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "target.h"
  26 #include "rtl.h"
  27 #include "tree.h"
  28 #include "gimple.h"
  29 #include "cfghooks.h"
  30 #include "alloc-pool.h"
  31 #include "tree-pass.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "ssa.h"
  35 #include "optabs-tree.h"
  36 #include "gimple-pretty-print.h"
  37 #include "diagnostic-core.h"
  38 #include "fold-const.h"
  39 #include "stor-layout.h"
  40 #include "cfganal.h"
  41 #include "gimple-fold.h"
  42 #include "tree-eh.h"
  43 #include "gimple-iterator.h"
  44 #include "gimplify-me.h"
  45 #include "tree-cfg.h"
  46 #include "tree-ssa-loop.h"
  47 #include "flags.h"
  48 #include "tree-ssa.h"
  49 #include "langhooks.h"
  50 #include "cfgloop.h"
  51 #include "params.h"
  52 #include "builtins.h"
  53 #include "gimplify.h"
  54 #include "case-cfn-macros.h"
  55
  56 /*  This is a simple global reassociation pass.  It is, in part, based
  57     on the LLVM pass of the same name (They do some things more/less
  58     than we do, in different orders, etc).
  59
  60     It consists of five steps:
  61
  62     1. Breaking up subtract operations into addition + negate, where
  63     it would promote the reassociation of adds.
  64
  65     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  66     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  67     During linearization, we place the operands of the binary
  68     expressions into a vector of operand_entry_*
  69
  70     3. Optimization of the operand lists, eliminating things like a +
  71     -a, a & a, etc.
  72
  73     3a. Combine repeated factors with the same occurrence counts
  74     into a __builtin_powi call that will later be optimized into
  75     an optimal number of multiplies.
  76
  77     4. Rewrite the expression trees we linearized and optimized so
  78     they are in proper rank order.
  79
  80     5. Repropagate negates, as nothing else will clean it up ATM.
  81
  82     A bit of theory on #4, since nobody seems to write anything down
  83     about why it makes sense to do it the way they do it:
  84
  85     We could do this much nicer theoretically, but don't (for reasons
  86     explained after how to do it theoretically nice :P).
  87
  88     In order to promote the most redundancy elimination, you want
  89     binary expressions whose operands are the same rank (or
  90     preferably, the same value) exposed to the redundancy eliminator,
  91     for possible elimination.
  92
  93     So the way to do this if we really cared, is to build the new op
  94     tree from the leaves to the roots, merging as you go, and putting the
  95     new op on the end of the worklist, until you are left with one
  96     thing on the worklist.
  97
  98     IE if you have to rewrite the following set of operands (listed with
  99     rank in parentheses), with opcode PLUS_EXPR:
 100
 101     a (1),  b (1),  c (1),  d (2), e (2)
 102
 103
 104     We start with our merge worklist empty, and the ops list with all of
 105     those on it.
 106
 107     You want to first merge all leaves of the same rank, as much as
 108     possible.
 109
 110     So first build a binary op of
 111
 112     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 113
 114     Because there is no three operand form of PLUS_EXPR, c is not going to
 115     be exposed to redundancy elimination as a rank 1 operand.
 116
 117     So you might as well throw it on the merge worklist (you could also
 118     consider it to now be a rank two operand, and merge it with d and e,
 119     but in this case, you then have evicted e from a binary op. So at
 120     least in this situation, you can't win.)
 121
 122     Then build a binary op of d + e
 123     mergetmp2 = d + e
 124
 125     and put mergetmp2 on the merge worklist.
 126
 127     so merge worklist = {mergetmp, c, mergetmp2}
 128
 129     Continue building binary ops of these operations until you have only
 130     one operation left on the worklist.
 131
 132     So we have
 133
 134     build binary op
 135     mergetmp3 = mergetmp + c
 136
 137     worklist = {mergetmp2, mergetmp3}
 138
 139     mergetmp4 = mergetmp2 + mergetmp3
 140
 141     worklist = {mergetmp4}
 142
 143     because we have one operation left, we can now just set the original
 144     statement equal to the result of that operation.
 145
 146     This will at least expose a + b  and d + e to redundancy elimination
 147     as binary operations.
 148
 149     For extra points, you can reuse the old statements to build the
 150     mergetmps, since you shouldn't run out.
 151
 152     So why don't we do this?
 153
 154     Because it's expensive, and rarely will help.  Most trees we are
 155     reassociating have 3 or less ops.  If they have 2 ops, they already
 156     will be written into a nice single binary op.  If you have 3 ops, a
 157     single simple check suffices to tell you whether the first two are of the
 158     same rank.  If so, you know to order it
 159
 160     mergetmp = op1 + op2
 161     newstmt = mergetmp + op3
 162
 163     instead of
 164     mergetmp = op2 + op3
 165     newstmt = mergetmp + op1
 166
 167     If all three are of the same rank, you can't expose them all in a
 168     single binary operator anyway, so the above is *still* the best you
 169     can do.
 170
 171     Thus, this is what we do.  When we have three ops left, we check to see
 172     what order to put them in, and call it a day.  As a nod to vector sum
 173     reduction, we check if any of the ops are really a phi node that is a
 174     destructive update for the associating op, and keep the destructive
 175     update together for vector sum reduction recognition.  */
 176
 177 /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
 178    point 3a in the pass header comment.  */
 179 static bool reassoc_insert_powi_p;
 180
 181 /* Statistics */
 182 static struct
 183 {
 184   int linearized;
 185   int constants_eliminated;
 186   int ops_eliminated;
 187   int rewritten;
 188   int pows_encountered;
 189   int pows_created;
 190 } reassociate_stats;
 191
 192 /* Operator, rank pair.  */
 193 struct operand_entry
 194 {
 195   unsigned int rank;
 196   unsigned int id;
 197   tree op;
 198   unsigned int count;
 199   gimple *stmt_to_insert;
 200 };
 201
 202 static object_allocator<operand_entry> operand_entry_pool
 203   ("operand entry pool");
 204
 205 /* This is used to assign a unique ID to each struct operand_entry
 206    so that qsort results are identical on different hosts.  */
 207 static unsigned int next_operand_entry_id;
 208
 209 /* Starting rank number for a given basic block, so that we can rank
 210    operations using unmovable instructions in that BB based on the bb
 211    depth.  */
 212 static long *bb_rank;
 213
 214 /* Operand->rank hashtable.  */
 215 static hash_map<tree, long> *operand_rank;
 216
 217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
 218    all basic blocks the CFG should be adjusted - basic blocks
 219    split right after that SSA_NAME's definition statement and before
 220    the only use, which must be a bit ior.  */
 221 static vec<tree> reassoc_branch_fixups;
 222
 223 /* Forward decls.  */
 224 static long get_rank (tree);
 225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
 226
 227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
 228    possibly added by gsi_remove.  */
 229
 230 bool
 231 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
 232 {
 233   gimple *stmt = gsi_stmt (*gsi);
 234
 235   if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
 236     return gsi_remove (gsi, true);
 237
 238   gimple_stmt_iterator prev = *gsi;
 239   gsi_prev (&prev);
 240   unsigned uid = gimple_uid (stmt);
 241   basic_block bb = gimple_bb (stmt);
 242   bool ret = gsi_remove (gsi, true);
 243   if (!gsi_end_p (prev))
 244     gsi_next (&prev);
 245   else
 246     prev = gsi_start_bb (bb);
 247   gimple *end_stmt = gsi_stmt (*gsi);
 248   while ((stmt = gsi_stmt (prev)) != end_stmt)
 249     {
 250       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
 251       gimple_set_uid (stmt, uid);
 252       gsi_next (&prev);
 253     }
 254   return ret;
 255 }
 256
 257 /* Bias amount for loop-carried phis.  We want this to be larger than
 258    the depth of any reassociation tree we can see, but not larger than
 259    the rank difference between two blocks.  */
 260 #define PHI_LOOP_BIAS (1 << 15)
 261
 262 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 263    an innermost loop, and the phi has only a single use which is inside
 264    the loop, then the rank is the block rank of the loop latch plus an
 265    extra bias for the loop-carried dependence.  This causes expressions
 266    calculated into an accumulator variable to be independent for each
 267    iteration of the loop.  If STMT is some other phi, the rank is the
 268    block rank of its containing block.  */
 269 static long
 270 phi_rank (gimple *stmt)
 271 {
 272   basic_block bb = gimple_bb (stmt);
 273   class loop *father = bb->loop_father;
 274   tree res;
 275   unsigned i;
 276   use_operand_p use;
 277   gimple *use_stmt;
 278
 279   /* We only care about real loops (those with a latch).  */
 280   if (!father->latch)
 281     return bb_rank[bb->index];
 282
 283   /* Interesting phis must be in headers of innermost loops.  */
 284   if (bb != father->header
 285       || father->inner)
 286     return bb_rank[bb->index];
 287
 288   /* Ignore virtual SSA_NAMEs.  */
 289   res = gimple_phi_result (stmt);
 290   if (virtual_operand_p (res))
 291     return bb_rank[bb->index];
 292
 293   /* The phi definition must have a single use, and that use must be
 294      within the loop.  Otherwise this isn't an accumulator pattern.  */
 295   if (!single_imm_use (res, &use, &use_stmt)
 296       || gimple_bb (use_stmt)->loop_father != father)
 297     return bb_rank[bb->index];
 298
 299   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 300   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 301     {
 302       tree arg = gimple_phi_arg_def (stmt, i);
 303       if (TREE_CODE (arg) == SSA_NAME
 304           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 305         {
 306           gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
 307           if (gimple_bb (def_stmt)->loop_father == father)
 308             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 309         }
 310     }
 311
 312   /* Must be an uninteresting phi.  */
 313   return bb_rank[bb->index];
 314 }
 315
 316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 317    loop-carried dependence of an innermost loop, return TRUE; else
 318    return FALSE.  */
 319 static bool
 320 loop_carried_phi (tree exp)
 321 {
 322   gimple *phi_stmt;
 323   long block_rank;
 324
 325   if (TREE_CODE (exp) != SSA_NAME
 326       || SSA_NAME_IS_DEFAULT_DEF (exp))
 327     return false;
 328
 329   phi_stmt = SSA_NAME_DEF_STMT (exp);
 330
 331   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 332     return false;
 333
 334   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 335      an additional bias added in.  If this phi doesn't have block rank,
 336      it's biased and should not be propagated.  */
 337   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 338
 339   if (phi_rank (phi_stmt) != block_rank)
 340     return true;
 341
 342   return false;
 343 }
 344
 345 /* Return the maximum of RANK and the rank that should be propagated
 346    from expression OP.  For most operands, this is just the rank of OP.
 347    For loop-carried phis, the value is zero to avoid undoing the bias
 348    in favor of the phi.  */
 349 static long
 350 propagate_rank (long rank, tree op)
 351 {
 352   long op_rank;
 353
 354   if (loop_carried_phi (op))
 355     return rank;
 356
 357   op_rank = get_rank (op);
 358
 359   return MAX (rank, op_rank);
 360 }
 361
 362 /* Look up the operand rank structure for expression E.  */
 363
 364 static inline long
 365 find_operand_rank (tree e)
 366 {
 367   long *slot = operand_rank->get (e);
 368   return slot ? *slot : -1;
 369 }
 370
 371 /* Insert {E,RANK} into the operand rank hashtable.  */
 372
 373 static inline void
 374 insert_operand_rank (tree e, long rank)
 375 {
 376   gcc_assert (rank > 0);
 377   gcc_assert (!operand_rank->put (e, rank));
 378 }
 379
 380 /* Given an expression E, return the rank of the expression.  */
 381
 382 static long
 383 get_rank (tree e)
 384 {
 385   /* SSA_NAME's have the rank of the expression they are the result
 386      of.
 387      For globals and uninitialized values, the rank is 0.
 388      For function arguments, use the pre-setup rank.
 389      For PHI nodes, stores, asm statements, etc, we use the rank of
 390      the BB.
 391      For simple operations, the rank is the maximum rank of any of
 392      its operands, or the bb_rank, whichever is less.
 393      I make no claims that this is optimal, however, it gives good
 394      results.  */
 395
 396   /* We make an exception to the normal ranking system to break
 397      dependences of accumulator variables in loops.  Suppose we
 398      have a simple one-block loop containing:
 399
 400        x_1 = phi(x_0, x_2)
 401        b = a + x_1
 402        c = b + d
 403        x_2 = c + e
 404
 405      As shown, each iteration of the calculation into x is fully
 406      dependent upon the iteration before it.  We would prefer to
 407      see this in the form:
 408
 409        x_1 = phi(x_0, x_2)
 410        b = a + d
 411        c = b + e
 412        x_2 = c + x_1
 413
 414      If the loop is unrolled, the calculations of b and c from
 415      different iterations can be interleaved.
 416
 417      To obtain this result during reassociation, we bias the rank
 418      of the phi definition x_1 upward, when it is recognized as an
 419      accumulator pattern.  The artificial rank causes it to be
 420      added last, providing the desired independence.  */
 421
 422   if (TREE_CODE (e) == SSA_NAME)
 423     {
 424       ssa_op_iter iter;
 425       gimple *stmt;
 426       long rank;
 427       tree op;
 428
 429       if (SSA_NAME_IS_DEFAULT_DEF (e))
 430         return find_operand_rank (e);
 431
 432       stmt = SSA_NAME_DEF_STMT (e);
 433       if (gimple_code (stmt) == GIMPLE_PHI)
 434         return phi_rank (stmt);
 435
 436       if (!is_gimple_assign (stmt))
 437         return bb_rank[gimple_bb (stmt)->index];
 438
 439       /* If we already have a rank for this expression, use that.  */
 440       rank = find_operand_rank (e);
 441       if (rank != -1)
 442         return rank;
 443
 444       /* Otherwise, find the maximum rank for the operands.  As an
 445          exception, remove the bias from loop-carried phis when propagating
 446          the rank so that dependent operations are not also biased.  */
 447       /* Simply walk over all SSA uses - this takes advatage of the
 448          fact that non-SSA operands are is_gimple_min_invariant and
 449          thus have rank 0.  */
 450       rank = 0;
 451       FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
 452         rank = propagate_rank (rank, op);
 453
 454       if (dump_file && (dump_flags & TDF_DETAILS))
 455         {
 456           fprintf (dump_file, "Rank for ");
 457           print_generic_expr (dump_file, e);
 458           fprintf (dump_file, " is %ld\n", (rank + 1));
 459         }
 460
 461       /* Note the rank in the hashtable so we don't recompute it.  */
 462       insert_operand_rank (e, (rank + 1));
 463       return (rank + 1);
 464     }
 465
 466   /* Constants, globals, etc., are rank 0 */
 467   return 0;
 468 }
 469
 470
 471 /* We want integer ones to end up last no matter what, since they are
 472    the ones we can do the most with.  */
 473 #define INTEGER_CONST_TYPE 1 << 4
 474 #define FLOAT_ONE_CONST_TYPE 1 << 3
 475 #define FLOAT_CONST_TYPE 1 << 2
 476 #define OTHER_CONST_TYPE 1 << 1
 477
 478 /* Classify an invariant tree into integer, float, or other, so that
 479    we can sort them to be near other constants of the same type.  */
 480 static inline int
 481 constant_type (tree t)
 482 {
 483   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 484     return INTEGER_CONST_TYPE;
 485   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 486     {
 487       /* Sort -1.0 and 1.0 constants last, while in some cases
 488          const_binop can't optimize some inexact operations, multiplication
 489          by -1.0 or 1.0 can be always merged with others.  */
 490       if (real_onep (t) || real_minus_onep (t))
 491         return FLOAT_ONE_CONST_TYPE;
 492       return FLOAT_CONST_TYPE;
 493     }
 494   else
 495     return OTHER_CONST_TYPE;
 496 }
 497
 498 /* qsort comparison function to sort operand entries PA and PB by rank
 499    so that the sorted array is ordered by rank in decreasing order.  */
 500 static int
 501 sort_by_operand_rank (const void *pa, const void *pb)
 502 {
 503   const operand_entry *oea = *(const operand_entry *const *)pa;
 504   const operand_entry *oeb = *(const operand_entry *const *)pb;
 505
 506   if (oeb->rank != oea->rank)
 507     return oeb->rank > oea->rank ? 1 : -1;
 508
 509   /* It's nicer for optimize_expression if constants that are likely
 510      to fold when added/multiplied/whatever are put next to each
 511      other.  Since all constants have rank 0, order them by type.  */
 512   if (oea->rank == 0)
 513     {
 514       if (constant_type (oeb->op) != constant_type (oea->op))
 515         return constant_type (oea->op) - constant_type (oeb->op);
 516       else
 517         /* To make sorting result stable, we use unique IDs to determine
 518            order.  */
 519         return oeb->id > oea->id ? 1 : -1;
 520     }
 521
 522   if (TREE_CODE (oea->op) != SSA_NAME)
 523     {
 524       if (TREE_CODE (oeb->op) != SSA_NAME)
 525         return oeb->id > oea->id ? 1 : -1;
 526       else
 527         return 1;
 528     }
 529   else if (TREE_CODE (oeb->op) != SSA_NAME)
 530     return -1;
 531
 532   /* Lastly, make sure the versions that are the same go next to each
 533      other.  */
 534   if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 535     {
 536       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
 537          versions of removed SSA_NAMEs, so if possible, prefer to sort
 538          based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
 539          See PR60418.  */
 540       gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
 541       gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
 542       basic_block bba = gimple_bb (stmta);
 543       basic_block bbb = gimple_bb (stmtb);
 544       if (bbb != bba)
 545         {
 546           /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
 547              but the other might not.  */
 548           if (!bba)
 549             return 1;
 550           if (!bbb)
 551             return -1;
 552           /* If neither is, compare bb_rank.  */
 553           if (bb_rank[bbb->index] != bb_rank[bba->index])
 554             return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
 555         }
 556
 557       bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
 558       bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
 559       if (da != db)
 560         return da ? 1 : -1;
 561
 562       return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
 563     }
 564
 565   return oeb->id > oea->id ? 1 : -1;
 566 }
 567
 568 /* Add an operand entry to *OPS for the tree operand OP.  */
 569
 570 static void
 571 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
 572 {
 573   operand_entry *oe = operand_entry_pool.allocate ();
 574
 575   oe->op = op;
 576   oe->rank = get_rank (op);
 577   oe->id = next_operand_entry_id++;
 578   oe->count = 1;
 579   oe->stmt_to_insert = stmt_to_insert;
 580   ops->safe_push (oe);
 581 }
 582
 583 /* Add an operand entry to *OPS for the tree operand OP with repeat
 584    count REPEAT.  */
 585
 586 static void
 587 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
 588                        HOST_WIDE_INT repeat)
 589 {
 590   operand_entry *oe = operand_entry_pool.allocate ();
 591
 592   oe->op = op;
 593   oe->rank = get_rank (op);
 594   oe->id = next_operand_entry_id++;
 595   oe->count = repeat;
 596   oe->stmt_to_insert = NULL;
 597   ops->safe_push (oe);
 598
 599   reassociate_stats.pows_encountered++;
 600 }
 601
 602 /* Return true if STMT is reassociable operation containing a binary
 603    operation with tree code CODE, and is inside LOOP.  */
 604
 605 static bool
 606 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
 607 {
 608   basic_block bb = gimple_bb (stmt);
 609
 610   if (gimple_bb (stmt) == NULL)
 611     return false;
 612
 613   if (!flow_bb_inside_loop_p (loop, bb))
 614     return false;
 615
 616   if (is_gimple_assign (stmt)
 617       && gimple_assign_rhs_code (stmt) == code
 618       && has_single_use (gimple_assign_lhs (stmt)))
 619     {
 620       tree rhs1 = gimple_assign_rhs1 (stmt);
 621       tree rhs2 = gimple_assign_rhs2 (stmt);
 622       if (TREE_CODE (rhs1) == SSA_NAME
 623           && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
 624         return false;
 625       if (rhs2
 626           && TREE_CODE (rhs2) == SSA_NAME
 627           && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
 628         return false;
 629       return true;
 630     }
 631
 632   return false;
 633 }
 634
 635
 636 /* Return true if STMT is a nop-conversion.  */
 637
 638 static bool
 639 gimple_nop_conversion_p (gimple *stmt)
 640 {
 641   if (gassign *ass = dyn_cast <gassign *> (stmt))
 642     {
 643       if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
 644           && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
 645                                     TREE_TYPE (gimple_assign_rhs1 (ass))))
 646         return true;
 647     }
 648   return false;
 649 }
 650
 651 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 652    operand of the negate operation.  Otherwise, return NULL.  */
 653
 654 static tree
 655 get_unary_op (tree name, enum tree_code opcode)
 656 {
 657   gimple *stmt = SSA_NAME_DEF_STMT (name);
 658
 659   /* Look through nop conversions (sign changes).  */
 660   if (gimple_nop_conversion_p (stmt)
 661       && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
 662     stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
 663
 664   if (!is_gimple_assign (stmt))
 665     return NULL_TREE;
 666
 667   if (gimple_assign_rhs_code (stmt) == opcode)
 668     return gimple_assign_rhs1 (stmt);
 669   return NULL_TREE;
 670 }
 671
 672 /* Return true if OP1 and OP2 have the same value if casted to either type.  */
 673
 674 static bool
 675 ops_equal_values_p (tree op1, tree op2)
 676 {
 677   if (op1 == op2)
 678     return true;
 679
 680   tree orig_op1 = op1;
 681   if (TREE_CODE (op1) == SSA_NAME)
 682     {
 683       gimple *stmt = SSA_NAME_DEF_STMT (op1);
 684       if (gimple_nop_conversion_p (stmt))
 685         {
 686           op1 = gimple_assign_rhs1 (stmt);
 687           if (op1 == op2)
 688             return true;
 689         }
 690     }
 691
 692   if (TREE_CODE (op2) == SSA_NAME)
 693     {
 694       gimple *stmt = SSA_NAME_DEF_STMT (op2);
 695       if (gimple_nop_conversion_p (stmt))
 696         {
 697           op2 = gimple_assign_rhs1 (stmt);
 698           if (op1 == op2
 699               || orig_op1 == op2)
 700             return true;
 701         }
 702     }
 703
 704   return false;
 705 }
 706
 707
 708 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 709    eliminate through equivalences, do so, remove them from OPS, and
 710    return true.  Otherwise, return false.  */
 711
 712 static bool
 713 eliminate_duplicate_pair (enum tree_code opcode,
 714                           vec<operand_entry *> *ops,
 715                           bool *all_done,
 716                           unsigned int i,
 717                           operand_entry *curr,
 718                           operand_entry *last)
 719 {
 720
 721   /* If we have two of the same op, and the opcode is & |, min, or max,
 722      we can eliminate one of them.
 723      If we have two of the same op, and the opcode is ^, we can
 724      eliminate both of them.  */
 725
 726   if (last && last->op == curr->op)
 727     {
 728       switch (opcode)
 729         {
 730         case MAX_EXPR:
 731         case MIN_EXPR:
 732         case BIT_IOR_EXPR:
 733         case BIT_AND_EXPR:
 734           if (dump_file && (dump_flags & TDF_DETAILS))
 735             {
 736               fprintf (dump_file, "Equivalence: ");
 737               print_generic_expr (dump_file, curr->op);
 738               fprintf (dump_file, " [&|minmax] ");
 739               print_generic_expr (dump_file, last->op);
 740               fprintf (dump_file, " -> ");
 741               print_generic_stmt (dump_file, last->op);
 742             }
 743
 744           ops->ordered_remove (i);
 745           reassociate_stats.ops_eliminated ++;
 746
 747           return true;
 748
 749         case BIT_XOR_EXPR:
 750           if (dump_file && (dump_flags & TDF_DETAILS))
 751             {
 752               fprintf (dump_file, "Equivalence: ");
 753               print_generic_expr (dump_file, curr->op);
 754               fprintf (dump_file, " ^ ");
 755               print_generic_expr (dump_file, last->op);
 756               fprintf (dump_file, " -> nothing\n");
 757             }
 758
 759           reassociate_stats.ops_eliminated += 2;
 760
 761           if (ops->length () == 2)
 762             {
 763               ops->truncate (0);
 764               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 765               *all_done = true;
 766             }
 767           else
 768             {
 769               ops->ordered_remove (i-1);
 770               ops->ordered_remove (i-1);
 771             }
 772
 773           return true;
 774
 775         default:
 776           break;
 777         }
 778     }
 779   return false;
 780 }
 781
 782 static vec<tree> plus_negates;
 783
 784 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 785    expression, look in OPS for a corresponding positive operation to cancel
 786    it out.  If we find one, remove the other from OPS, replace
 787    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 788    return false. */
 789
 790 static bool
 791 eliminate_plus_minus_pair (enum tree_code opcode,
 792                            vec<operand_entry *> *ops,
 793                            unsigned int currindex,
 794                            operand_entry *curr)
 795 {
 796   tree negateop;
 797   tree notop;
 798   unsigned int i;
 799   operand_entry *oe;
 800
 801   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 802     return false;
 803
 804   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 805   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 806   if (negateop == NULL_TREE && notop == NULL_TREE)
 807     return false;
 808
 809   /* Any non-negated version will have a rank that is one less than
 810      the current rank.  So once we hit those ranks, if we don't find
 811      one, we can stop.  */
 812
 813   for (i = currindex + 1;
 814        ops->iterate (i, &oe)
 815        && oe->rank >= curr->rank - 1 ;
 816        i++)
 817     {
 818       if (negateop
 819           && ops_equal_values_p (oe->op, negateop))
 820         {
 821           if (dump_file && (dump_flags & TDF_DETAILS))
 822             {
 823               fprintf (dump_file, "Equivalence: ");
 824               print_generic_expr (dump_file, negateop);
 825               fprintf (dump_file, " + -");
 826               print_generic_expr (dump_file, oe->op);
 827               fprintf (dump_file, " -> 0\n");
 828             }
 829
 830           ops->ordered_remove (i);
 831           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 832           ops->ordered_remove (currindex);
 833           reassociate_stats.ops_eliminated ++;
 834
 835           return true;
 836         }
 837       else if (notop
 838                && ops_equal_values_p (oe->op, notop))
 839         {
 840           tree op_type = TREE_TYPE (oe->op);
 841
 842           if (dump_file && (dump_flags & TDF_DETAILS))
 843             {
 844               fprintf (dump_file, "Equivalence: ");
 845               print_generic_expr (dump_file, notop);
 846               fprintf (dump_file, " + ~");
 847               print_generic_expr (dump_file, oe->op);
 848               fprintf (dump_file, " -> -1\n");
 849             }
 850
 851           ops->ordered_remove (i);
 852           add_to_ops_vec (ops, build_all_ones_cst (op_type));
 853           ops->ordered_remove (currindex);
 854           reassociate_stats.ops_eliminated ++;
 855
 856           return true;
 857         }
 858     }
 859
 860   /* If CURR->OP is a negate expr without nop conversion in a plus expr:
 861      save it for later inspection in repropagate_negates().  */
 862   if (negateop != NULL_TREE
 863       && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
 864     plus_negates.safe_push (curr->op);
 865
 866   return false;
 867 }
 868
 869 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 870    bitwise not expression, look in OPS for a corresponding operand to
 871    cancel it out.  If we find one, remove the other from OPS, replace
 872    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 873    false. */
 874
 875 static bool
 876 eliminate_not_pairs (enum tree_code opcode,
 877                      vec<operand_entry *> *ops,
 878                      unsigned int currindex,
 879                      operand_entry *curr)
 880 {
 881   tree notop;
 882   unsigned int i;
 883   operand_entry *oe;
 884
 885   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 886       || TREE_CODE (curr->op) != SSA_NAME)
 887     return false;
 888
 889   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 890   if (notop == NULL_TREE)
 891     return false;
 892
 893   /* Any non-not version will have a rank that is one less than
 894      the current rank.  So once we hit those ranks, if we don't find
 895      one, we can stop.  */
 896
 897   for (i = currindex + 1;
 898        ops->iterate (i, &oe)
 899        && oe->rank >= curr->rank - 1;
 900        i++)
 901     {
 902       if (oe->op == notop)
 903         {
 904           if (dump_file && (dump_flags & TDF_DETAILS))
 905             {
 906               fprintf (dump_file, "Equivalence: ");
 907               print_generic_expr (dump_file, notop);
 908               if (opcode == BIT_AND_EXPR)
 909                 fprintf (dump_file, " & ~");
 910               else if (opcode == BIT_IOR_EXPR)
 911                 fprintf (dump_file, " | ~");
 912               print_generic_expr (dump_file, oe->op);
 913               if (opcode == BIT_AND_EXPR)
 914                 fprintf (dump_file, " -> 0\n");
 915               else if (opcode == BIT_IOR_EXPR)
 916                 fprintf (dump_file, " -> -1\n");
 917             }
 918
 919           if (opcode == BIT_AND_EXPR)
 920             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 921           else if (opcode == BIT_IOR_EXPR)
 922             oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
 923
 924           reassociate_stats.ops_eliminated += ops->length () - 1;
 925           ops->truncate (0);
 926           ops->quick_push (oe);
 927           return true;
 928         }
 929     }
 930
 931   return false;
 932 }
 933
 934 /* Use constant value that may be present in OPS to try to eliminate
 935    operands.  Note that this function is only really used when we've
 936    eliminated ops for other reasons, or merged constants.  Across
 937    single statements, fold already does all of this, plus more.  There
 938    is little point in duplicating logic, so I've only included the
 939    identities that I could ever construct testcases to trigger.  */
 940
 941 static void
 942 eliminate_using_constants (enum tree_code opcode,
 943                            vec<operand_entry *> *ops)
 944 {
 945   operand_entry *oelast = ops->last ();
 946   tree type = TREE_TYPE (oelast->op);
 947
 948   if (oelast->rank == 0
 949       && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 950     {
 951       switch (opcode)
 952         {
 953         case BIT_AND_EXPR:
 954           if (integer_zerop (oelast->op))
 955             {
 956               if (ops->length () != 1)
 957                 {
 958                   if (dump_file && (dump_flags & TDF_DETAILS))
 959                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 960
 961                   reassociate_stats.ops_eliminated += ops->length () - 1;
 962
 963                   ops->truncate (0);
 964                   ops->quick_push (oelast);
 965                   return;
 966                 }
 967             }
 968           else if (integer_all_onesp (oelast->op))
 969             {
 970               if (ops->length () != 1)
 971                 {
 972                   if (dump_file && (dump_flags & TDF_DETAILS))
 973                     fprintf (dump_file, "Found & -1, removing\n");
 974                   ops->pop ();
 975                   reassociate_stats.ops_eliminated++;
 976                 }
 977             }
 978           break;
 979         case BIT_IOR_EXPR:
 980           if (integer_all_onesp (oelast->op))
 981             {
 982               if (ops->length () != 1)
 983                 {
 984                   if (dump_file && (dump_flags & TDF_DETAILS))
 985                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 986
 987                   reassociate_stats.ops_eliminated += ops->length () - 1;
 988
 989                   ops->truncate (0);
 990                   ops->quick_push (oelast);
 991                   return;
 992                 }
 993             }
 994           else if (integer_zerop (oelast->op))
 995             {
 996               if (ops->length () != 1)
 997                 {
 998                   if (dump_file && (dump_flags & TDF_DETAILS))
 999                     fprintf (dump_file, "Found | 0, removing\n");
1000                   ops->pop ();
1001                   reassociate_stats.ops_eliminated++;
1002                 }
1003             }
1004           break;
1005         case MULT_EXPR:
1006           if (integer_zerop (oelast->op)
1007               || (FLOAT_TYPE_P (type)
1008                   && !HONOR_NANS (type)
1009                   && !HONOR_SIGNED_ZEROS (type)
1010                   && real_zerop (oelast->op)))
1011             {
1012               if (ops->length () != 1)
1013                 {
1014                   if (dump_file && (dump_flags & TDF_DETAILS))
1015                     fprintf (dump_file, "Found * 0, removing all other ops\n");
1016
1017                   reassociate_stats.ops_eliminated += ops->length () - 1;
1018                   ops->truncate (0);
1019                   ops->quick_push (oelast);
1020                   return;
1021                 }
1022             }
1023           else if (integer_onep (oelast->op)
1024                    || (FLOAT_TYPE_P (type)
1025                        && !HONOR_SNANS (type)
1026                        && real_onep (oelast->op)))
1027             {
1028               if (ops->length () != 1)
1029                 {
1030                   if (dump_file && (dump_flags & TDF_DETAILS))
1031                     fprintf (dump_file, "Found * 1, removing\n");
1032                   ops->pop ();
1033                   reassociate_stats.ops_eliminated++;
1034                   return;
1035                 }
1036             }
1037           break;
1038         case BIT_XOR_EXPR:
1039         case PLUS_EXPR:
1040         case MINUS_EXPR:
1041           if (integer_zerop (oelast->op)
1042               || (FLOAT_TYPE_P (type)
1043                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1044                   && fold_real_zero_addition_p (type, oelast->op,
1045                                                 opcode == MINUS_EXPR)))
1046             {
1047               if (ops->length () != 1)
1048                 {
1049                   if (dump_file && (dump_flags & TDF_DETAILS))
1050                     fprintf (dump_file, "Found [|^+] 0, removing\n");
1051                   ops->pop ();
1052                   reassociate_stats.ops_eliminated++;
1053                   return;
1054                 }
1055             }
1056           break;
1057         default:
1058           break;
1059         }
1060     }
1061 }
1062
1063
1064 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1065                                  bool, bool);
1066
1067 /* Structure for tracking and counting operands.  */
1068 struct oecount {
1069   unsigned int cnt;
1070   unsigned int id;
1071   enum tree_code oecode;
1072   tree op;
1073 };
1074
1075
1076 /* The heap for the oecount hashtable and the sorted list of operands.  */
1077 static vec<oecount> cvec;
1078
1079
1080 /* Oecount hashtable helpers.  */
1081
1082 struct oecount_hasher : int_hash <int, 0, 1>
1083 {
1084   static inline hashval_t hash (int);
1085   static inline bool equal (int, int);
1086 };
1087
1088 /* Hash function for oecount.  */
1089
1090 inline hashval_t
1091 oecount_hasher::hash (int p)
1092 {
1093   const oecount *c = &cvec[p - 42];
1094   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1095 }
1096
1097 /* Comparison function for oecount.  */
1098
1099 inline bool
1100 oecount_hasher::equal (int p1, int p2)
1101 {
1102   const oecount *c1 = &cvec[p1 - 42];
1103   const oecount *c2 = &cvec[p2 - 42];
1104   return c1->oecode == c2->oecode && c1->op == c2->op;
1105 }
1106
1107 /* Comparison function for qsort sorting oecount elements by count.  */
1108
1109 static int
1110 oecount_cmp (const void *p1, const void *p2)
1111 {
1112   const oecount *c1 = (const oecount *)p1;
1113   const oecount *c2 = (const oecount *)p2;
1114   if (c1->cnt != c2->cnt)
1115     return c1->cnt > c2->cnt ? 1 : -1;
1116   else
1117     /* If counts are identical, use unique IDs to stabilize qsort.  */
1118     return c1->id > c2->id ? 1 : -1;
1119 }
1120
1121 /* Return TRUE iff STMT represents a builtin call that raises OP
1122    to some exponent.  */
1123
1124 static bool
1125 stmt_is_power_of_op (gimple *stmt, tree op)
1126 {
1127   if (!is_gimple_call (stmt))
1128     return false;
1129
1130   switch (gimple_call_combined_fn (stmt))
1131     {
1132     CASE_CFN_POW:
1133     CASE_CFN_POWI:
1134       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1135
1136     default:
1137       return false;
1138     }
1139 }
1140
1141 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1142    in place and return the result.  Assumes that stmt_is_power_of_op
1143    was previously called for STMT and returned TRUE.  */
1144
1145 static HOST_WIDE_INT
1146 decrement_power (gimple *stmt)
1147 {
1148   REAL_VALUE_TYPE c, cint;
1149   HOST_WIDE_INT power;
1150   tree arg1;
1151
1152   switch (gimple_call_combined_fn (stmt))
1153     {
1154     CASE_CFN_POW:
1155       arg1 = gimple_call_arg (stmt, 1);
1156       c = TREE_REAL_CST (arg1);
1157       power = real_to_integer (&c) - 1;
1158       real_from_integer (&cint, VOIDmode, power, SIGNED);
1159       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1160       return power;
1161
1162     CASE_CFN_POWI:
1163       arg1 = gimple_call_arg (stmt, 1);
1164       power = TREE_INT_CST_LOW (arg1) - 1;
1165       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1166       return power;
1167
1168     default:
1169       gcc_unreachable ();
1170     }
1171 }
1172
1173 /* Replace SSA defined by STMT and replace all its uses with new
1174    SSA.  Also return the new SSA.  */
1175
1176 static tree
1177 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1178 {
1179   gimple *use_stmt;
1180   use_operand_p use;
1181   imm_use_iterator iter;
1182   tree new_lhs, new_debug_lhs = NULL_TREE;
1183   tree lhs = gimple_get_lhs (stmt);
1184
1185   new_lhs = make_ssa_name (TREE_TYPE (lhs));
1186   gimple_set_lhs (stmt, new_lhs);
1187
1188   /* Also need to update GIMPLE_DEBUGs.  */
1189   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1190     {
1191       tree repl = new_lhs;
1192       if (is_gimple_debug (use_stmt))
1193         {
1194           if (new_debug_lhs == NULL_TREE)
1195             {
1196               new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1197               gdebug *def_temp
1198                 = gimple_build_debug_bind (new_debug_lhs,
1199                                            build2 (opcode, TREE_TYPE (lhs),
1200                                                    new_lhs, op),
1201                                            stmt);
1202               DECL_ARTIFICIAL (new_debug_lhs) = 1;
1203               TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1204               SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1205               gimple_set_uid (def_temp, gimple_uid (stmt));
1206               gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1207               gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1208             }
1209           repl = new_debug_lhs;
1210         }
1211       FOR_EACH_IMM_USE_ON_STMT (use, iter)
1212         SET_USE (use, repl);
1213       update_stmt (use_stmt);
1214     }
1215   return new_lhs;
1216 }
1217
1218 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1219    uses with new SSAs.  Also do this for the stmt that defines DEF
1220    if *DEF is not OP.  */
1221
1222 static void
1223 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1224                            vec<gimple *> &stmts_to_fix)
1225 {
1226   unsigned i;
1227   gimple *stmt;
1228
1229   if (*def != op
1230       && TREE_CODE (*def) == SSA_NAME
1231       && (stmt = SSA_NAME_DEF_STMT (*def))
1232       && gimple_code (stmt) != GIMPLE_NOP)
1233     *def = make_new_ssa_for_def (stmt, opcode, op);
1234
1235   FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1236     make_new_ssa_for_def (stmt, opcode, op);
1237 }
1238
1239 /* Find the single immediate use of STMT's LHS, and replace it
1240    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1241    replace *DEF with OP as well.  */
1242
1243 static void
1244 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1245 {
1246   tree lhs;
1247   gimple *use_stmt;
1248   use_operand_p use;
1249   gimple_stmt_iterator gsi;
1250
1251   if (is_gimple_call (stmt))
1252     lhs = gimple_call_lhs (stmt);
1253   else
1254     lhs = gimple_assign_lhs (stmt);
1255
1256   gcc_assert (has_single_use (lhs));
1257   single_imm_use (lhs, &use, &use_stmt);
1258   if (lhs == *def)
1259     *def = op;
1260   SET_USE (use, op);
1261   if (TREE_CODE (op) != SSA_NAME)
1262     update_stmt (use_stmt);
1263   gsi = gsi_for_stmt (stmt);
1264   unlink_stmt_vdef (stmt);
1265   reassoc_remove_stmt (&gsi);
1266   release_defs (stmt);
1267 }
1268
1269 /* Walks the linear chain with result *DEF searching for an operation
1270    with operand OP and code OPCODE removing that from the chain.  *DEF
1271    is updated if there is only one operand but no operation left.  */
1272
1273 static void
1274 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1275 {
1276   tree orig_def = *def;
1277   gimple *stmt = SSA_NAME_DEF_STMT (*def);
1278   /* PR72835 - Record the stmt chain that has to be updated such that
1279      we dont use the same LHS when the values computed are different.  */
1280   auto_vec<gimple *, 64> stmts_to_fix;
1281
1282   do
1283     {
1284       tree name;
1285
1286       if (opcode == MULT_EXPR)
1287         {
1288           if (stmt_is_power_of_op (stmt, op))
1289             {
1290               if (decrement_power (stmt) == 1)
1291                 {
1292                   if (stmts_to_fix.length () > 0)
1293                     stmts_to_fix.pop ();
1294                   propagate_op_to_single_use (op, stmt, def);
1295                 }
1296               break;
1297             }
1298           else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1299             {
1300               if (gimple_assign_rhs1 (stmt) == op)
1301                 {
1302                   tree cst = build_minus_one_cst (TREE_TYPE (op));
1303                   if (stmts_to_fix.length () > 0)
1304                     stmts_to_fix.pop ();
1305                   propagate_op_to_single_use (cst, stmt, def);
1306                   break;
1307                 }
1308               else if (integer_minus_onep (op)
1309                        || real_minus_onep (op))
1310                 {
1311                   gimple_assign_set_rhs_code
1312                     (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1313                   break;
1314                 }
1315             }
1316         }
1317
1318       name = gimple_assign_rhs1 (stmt);
1319
1320       /* If this is the operation we look for and one of the operands
1321          is ours simply propagate the other operand into the stmts
1322          single use.  */
1323       if (gimple_assign_rhs_code (stmt) == opcode
1324           && (name == op
1325               || gimple_assign_rhs2 (stmt) == op))
1326         {
1327           if (name == op)
1328             name = gimple_assign_rhs2 (stmt);
1329           if (stmts_to_fix.length () > 0)
1330             stmts_to_fix.pop ();
1331           propagate_op_to_single_use (name, stmt, def);
1332           break;
1333         }
1334
1335       /* We might have a multiply of two __builtin_pow* calls, and
1336          the operand might be hiding in the rightmost one.  Likewise
1337          this can happen for a negate.  */
1338       if (opcode == MULT_EXPR
1339           && gimple_assign_rhs_code (stmt) == opcode
1340           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1341           && has_single_use (gimple_assign_rhs2 (stmt)))
1342         {
1343           gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1344           if (stmt_is_power_of_op (stmt2, op))
1345             {
1346               if (decrement_power (stmt2) == 1)
1347                 propagate_op_to_single_use (op, stmt2, def);
1348               else
1349                 stmts_to_fix.safe_push (stmt2);
1350               break;
1351             }
1352           else if (is_gimple_assign (stmt2)
1353                    && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1354             {
1355               if (gimple_assign_rhs1 (stmt2) == op)
1356                 {
1357                   tree cst = build_minus_one_cst (TREE_TYPE (op));
1358                   propagate_op_to_single_use (cst, stmt2, def);
1359                   break;
1360                 }
1361               else if (integer_minus_onep (op)
1362                        || real_minus_onep (op))
1363                 {
1364                   stmts_to_fix.safe_push (stmt2);
1365                   gimple_assign_set_rhs_code
1366                     (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1367                   break;
1368                 }
1369             }
1370         }
1371
1372       /* Continue walking the chain.  */
1373       gcc_assert (name != op
1374                   && TREE_CODE (name) == SSA_NAME);
1375       stmt = SSA_NAME_DEF_STMT (name);
1376       stmts_to_fix.safe_push (stmt);
1377     }
1378   while (1);
1379
1380   if (stmts_to_fix.length () > 0 || *def == orig_def)
1381     make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1382 }
1383
1384 /* Returns true if statement S1 dominates statement S2.  Like
1385    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1386
1387 static bool
1388 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1389 {
1390   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1391
1392   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1393      SSA_NAME.  Assume it lives at the beginning of function and
1394      thus dominates everything.  */
1395   if (!bb1 || s1 == s2)
1396     return true;
1397
1398   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1399   if (!bb2)
1400     return false;
1401
1402   if (bb1 == bb2)
1403     {
1404       /* PHIs in the same basic block are assumed to be
1405          executed all in parallel, if only one stmt is a PHI,
1406          it dominates the other stmt in the same basic block.  */
1407       if (gimple_code (s1) == GIMPLE_PHI)
1408         return true;
1409
1410       if (gimple_code (s2) == GIMPLE_PHI)
1411         return false;
1412
1413       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1414
1415       if (gimple_uid (s1) < gimple_uid (s2))
1416         return true;
1417
1418       if (gimple_uid (s1) > gimple_uid (s2))
1419         return false;
1420
1421       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1422       unsigned int uid = gimple_uid (s1);
1423       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1424         {
1425           gimple *s = gsi_stmt (gsi);
1426           if (gimple_uid (s) != uid)
1427             break;
1428           if (s == s2)
1429             return true;
1430         }
1431
1432       return false;
1433     }
1434
1435   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1436 }
1437
1438 /* Insert STMT after INSERT_POINT.  */
1439
1440 static void
1441 insert_stmt_after (gimple *stmt, gimple *insert_point)
1442 {
1443   gimple_stmt_iterator gsi;
1444   basic_block bb;
1445
1446   if (gimple_code (insert_point) == GIMPLE_PHI)
1447     bb = gimple_bb (insert_point);
1448   else if (!stmt_ends_bb_p (insert_point))
1449     {
1450       gsi = gsi_for_stmt (insert_point);
1451       gimple_set_uid (stmt, gimple_uid (insert_point));
1452       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1453       return;
1454     }
1455   else
1456     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1457        thus if it must end a basic block, it should be a call that can
1458        throw, or some assignment that can throw.  If it throws, the LHS
1459        of it will not be initialized though, so only valid places using
1460        the SSA_NAME should be dominated by the fallthru edge.  */
1461     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1462   gsi = gsi_after_labels (bb);
1463   if (gsi_end_p (gsi))
1464     {
1465       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1466       gimple_set_uid (stmt,
1467                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1468     }
1469   else
1470     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1471   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1472 }
1473
1474 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1475    the result.  Places the statement after the definition of either
1476    OP1 or OP2.  Returns the new statement.  */
1477
1478 static gimple *
1479 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1480 {
1481   gimple *op1def = NULL, *op2def = NULL;
1482   gimple_stmt_iterator gsi;
1483   tree op;
1484   gassign *sum;
1485
1486   /* Create the addition statement.  */
1487   op = make_ssa_name (type);
1488   sum = gimple_build_assign (op, opcode, op1, op2);
1489
1490   /* Find an insertion place and insert.  */
1491   if (TREE_CODE (op1) == SSA_NAME)
1492     op1def = SSA_NAME_DEF_STMT (op1);
1493   if (TREE_CODE (op2) == SSA_NAME)
1494     op2def = SSA_NAME_DEF_STMT (op2);
1495   if ((!op1def || gimple_nop_p (op1def))
1496       && (!op2def || gimple_nop_p (op2def)))
1497     {
1498       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1499       if (gsi_end_p (gsi))
1500         {
1501           gimple_stmt_iterator gsi2
1502             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1503           gimple_set_uid (sum,
1504                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1505         }
1506       else
1507         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1508       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1509     }
1510   else
1511     {
1512       gimple *insert_point;
1513       if ((!op1def || gimple_nop_p (op1def))
1514            || (op2def && !gimple_nop_p (op2def)
1515                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1516         insert_point = op2def;
1517       else
1518         insert_point = op1def;
1519       insert_stmt_after (sum, insert_point);
1520     }
1521   update_stmt (sum);
1522
1523   return sum;
1524 }
1525
1526 /* Perform un-distribution of divisions and multiplications.
1527    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1528    to (A + B) / X for real X.
1529
1530    The algorithm is organized as follows.
1531
1532     - First we walk the addition chain *OPS looking for summands that
1533       are defined by a multiplication or a real division.  This results
1534       in the candidates bitmap with relevant indices into *OPS.
1535
1536     - Second we build the chains of multiplications or divisions for
1537       these candidates, counting the number of occurrences of (operand, code)
1538       pairs in all of the candidates chains.
1539
1540     - Third we sort the (operand, code) pairs by number of occurrence and
1541       process them starting with the pair with the most uses.
1542
1543       * For each such pair we walk the candidates again to build a
1544         second candidate bitmap noting all multiplication/division chains
1545         that have at least one occurrence of (operand, code).
1546
1547       * We build an alternate addition chain only covering these
1548         candidates with one (operand, code) operation removed from their
1549         multiplication/division chain.
1550
1551       * The first candidate gets replaced by the alternate addition chain
1552         multiplied/divided by the operand.
1553
1554       * All candidate chains get disabled for further processing and
1555         processing of (operand, code) pairs continues.
1556
1557   The alternate addition chains built are re-processed by the main
1558   reassociation algorithm which allows optimizing a * x * y + b * y * x
1559   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1560
1561 static bool
1562 undistribute_ops_list (enum tree_code opcode,
1563                        vec<operand_entry *> *ops, class loop *loop)
1564 {
1565   unsigned int length = ops->length ();
1566   operand_entry *oe1;
1567   unsigned i, j;
1568   unsigned nr_candidates, nr_candidates2;
1569   sbitmap_iterator sbi0;
1570   vec<operand_entry *> *subops;
1571   bool changed = false;
1572   unsigned int next_oecount_id = 0;
1573
1574   if (length <= 1
1575       || opcode != PLUS_EXPR)
1576     return false;
1577
1578   /* Build a list of candidates to process.  */
1579   auto_sbitmap candidates (length);
1580   bitmap_clear (candidates);
1581   nr_candidates = 0;
1582   FOR_EACH_VEC_ELT (*ops, i, oe1)
1583     {
1584       enum tree_code dcode;
1585       gimple *oe1def;
1586
1587       if (TREE_CODE (oe1->op) != SSA_NAME)
1588         continue;
1589       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1590       if (!is_gimple_assign (oe1def))
1591         continue;
1592       dcode = gimple_assign_rhs_code (oe1def);
1593       if ((dcode != MULT_EXPR
1594            && dcode != RDIV_EXPR)
1595           || !is_reassociable_op (oe1def, dcode, loop))
1596         continue;
1597
1598       bitmap_set_bit (candidates, i);
1599       nr_candidates++;
1600     }
1601
1602   if (nr_candidates < 2)
1603     return false;
1604
1605   if (dump_file && (dump_flags & TDF_DETAILS))
1606     {
1607       fprintf (dump_file, "searching for un-distribute opportunities ");
1608       print_generic_expr (dump_file,
1609         (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1610       fprintf (dump_file, " %d\n", nr_candidates);
1611     }
1612
1613   /* Build linearized sub-operand lists and the counting table.  */
1614   cvec.create (0);
1615
1616   hash_table<oecount_hasher> ctable (15);
1617
1618   /* ??? Macro arguments cannot have multi-argument template types in
1619      them.  This typedef is needed to workaround that limitation.  */
1620   typedef vec<operand_entry *> vec_operand_entry_t_heap;
1621   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1622   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1623     {
1624       gimple *oedef;
1625       enum tree_code oecode;
1626       unsigned j;
1627
1628       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1629       oecode = gimple_assign_rhs_code (oedef);
1630       linearize_expr_tree (&subops[i], oedef,
1631                            associative_tree_code (oecode), false);
1632
1633       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1634         {
1635           oecount c;
1636           int *slot;
1637           int idx;
1638           c.oecode = oecode;
1639           c.cnt = 1;
1640           c.id = next_oecount_id++;
1641           c.op = oe1->op;
1642           cvec.safe_push (c);
1643           idx = cvec.length () + 41;
1644           slot = ctable.find_slot (idx, INSERT);
1645           if (!*slot)
1646             {
1647               *slot = idx;
1648             }
1649           else
1650             {
1651               cvec.pop ();
1652               cvec[*slot - 42].cnt++;
1653             }
1654         }
1655     }
1656
1657   /* Sort the counting table.  */
1658   cvec.qsort (oecount_cmp);
1659
1660   if (dump_file && (dump_flags & TDF_DETAILS))
1661     {
1662       oecount *c;
1663       fprintf (dump_file, "Candidates:\n");
1664       FOR_EACH_VEC_ELT (cvec, j, c)
1665         {
1666           fprintf (dump_file, "  %u %s: ", c->cnt,
1667                    c->oecode == MULT_EXPR
1668                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1669           print_generic_expr (dump_file, c->op);
1670           fprintf (dump_file, "\n");
1671         }
1672     }
1673
1674   /* Process the (operand, code) pairs in order of most occurrence.  */
1675   auto_sbitmap candidates2 (length);
1676   while (!cvec.is_empty ())
1677     {
1678       oecount *c = &cvec.last ();
1679       if (c->cnt < 2)
1680         break;
1681
1682       /* Now collect the operands in the outer chain that contain
1683          the common operand in their inner chain.  */
1684       bitmap_clear (candidates2);
1685       nr_candidates2 = 0;
1686       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1687         {
1688           gimple *oedef;
1689           enum tree_code oecode;
1690           unsigned j;
1691           tree op = (*ops)[i]->op;
1692
1693           /* If we undistributed in this chain already this may be
1694              a constant.  */
1695           if (TREE_CODE (op) != SSA_NAME)
1696             continue;
1697
1698           oedef = SSA_NAME_DEF_STMT (op);
1699           oecode = gimple_assign_rhs_code (oedef);
1700           if (oecode != c->oecode)
1701             continue;
1702
1703           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1704             {
1705               if (oe1->op == c->op)
1706                 {
1707                   bitmap_set_bit (candidates2, i);
1708                   ++nr_candidates2;
1709                   break;
1710                 }
1711             }
1712         }
1713
1714       if (nr_candidates2 >= 2)
1715         {
1716           operand_entry *oe1, *oe2;
1717           gimple *prod;
1718           int first = bitmap_first_set_bit (candidates2);
1719
1720           /* Build the new addition chain.  */
1721           oe1 = (*ops)[first];
1722           if (dump_file && (dump_flags & TDF_DETAILS))
1723             {
1724               fprintf (dump_file, "Building (");
1725               print_generic_expr (dump_file, oe1->op);
1726             }
1727           zero_one_operation (&oe1->op, c->oecode, c->op);
1728           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1729             {
1730               gimple *sum;
1731               oe2 = (*ops)[i];
1732               if (dump_file && (dump_flags & TDF_DETAILS))
1733                 {
1734                   fprintf (dump_file, " + ");
1735                   print_generic_expr (dump_file, oe2->op);
1736                 }
1737               zero_one_operation (&oe2->op, c->oecode, c->op);
1738               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1739                                        oe1->op, oe2->op, opcode);
1740               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1741               oe2->rank = 0;
1742               oe1->op = gimple_get_lhs (sum);
1743             }
1744
1745           /* Apply the multiplication/division.  */
1746           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1747                                     oe1->op, c->op, c->oecode);
1748           if (dump_file && (dump_flags & TDF_DETAILS))
1749             {
1750               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1751               print_generic_expr (dump_file, c->op);
1752               fprintf (dump_file, "\n");
1753             }
1754
1755           /* Record it in the addition chain and disable further
1756              undistribution with this op.  */
1757           oe1->op = gimple_assign_lhs (prod);
1758           oe1->rank = get_rank (oe1->op);
1759           subops[first].release ();
1760
1761           changed = true;
1762         }
1763
1764       cvec.pop ();
1765     }
1766
1767   for (i = 0; i < ops->length (); ++i)
1768     subops[i].release ();
1769   free (subops);
1770   cvec.release ();
1771
1772   return changed;
1773 }
1774
1775 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1776    first: element index for each relevant BIT_FIELD_REF.
1777    second: the index of vec ops* for each relevant BIT_FIELD_REF.  */
1778 typedef std::pair<unsigned, unsigned> v_info_elem;
1779 typedef auto_vec<v_info_elem, 32> v_info;
1780 typedef v_info *v_info_ptr;
1781
1782 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode.  */
1783 static int
1784 sort_by_mach_mode (const void *p_i, const void *p_j)
1785 {
1786   const tree tr1 = *((const tree *) p_i);
1787   const tree tr2 = *((const tree *) p_j);
1788   unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1789   unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1790   if (mode1 > mode2)
1791     return 1;
1792   else if (mode1 < mode2)
1793     return -1;
1794   else
1795     return 0;
1796 }
1797
1798 /* Cleanup hash map for VECTOR information.  */
1799 static void
1800 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1801 {
1802   for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1803        it != info_map.end (); ++it)
1804     {
1805       v_info_ptr info = (*it).second;
1806       delete info;
1807       (*it).second = NULL;
1808     }
1809 }
1810
1811 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1812      V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1813    is transformed to
1814      Vs = (V1 + V2 + ... + Vn)
1815      Vs[0] + Vs[1] + ... + Vs[k]
1816
1817    The basic steps are listed below:
1818
1819     1) Check the addition chain *OPS by looking those summands coming from
1820        VECTOR bit_field_ref on VECTOR type.  Put the information into
1821        v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1822
1823     2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1824        continuous, they can cover the whole VECTOR perfectly without any holes.
1825        Obtain one VECTOR list which contain candidates to be transformed.
1826
1827     3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1828        candidates with same mode, build the addition statements for them and
1829        generate BIT_FIELD_REFs accordingly.
1830
1831    TODO:
1832        The current implementation requires the whole VECTORs should be fully
1833        covered, but it can be extended to support partial, checking adjacent
1834        but not fill the whole, it may need some cost model to define the
1835        boundary to do or not.
1836 */
1837 static bool
1838 undistribute_bitref_for_vector (enum tree_code opcode,
1839                                 vec<operand_entry *> *ops, struct loop *loop)
1840 {
1841   if (ops->length () <= 1)
1842     return false;
1843
1844   if (opcode != PLUS_EXPR && opcode != MULT_EXPR && opcode != BIT_XOR_EXPR
1845       && opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1846     return false;
1847
1848   hash_map<tree, v_info_ptr> v_info_map;
1849   operand_entry *oe1;
1850   unsigned i;
1851
1852   /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1853      information into map.  */
1854   FOR_EACH_VEC_ELT (*ops, i, oe1)
1855     {
1856       enum tree_code dcode;
1857       gimple *oe1def;
1858
1859       if (TREE_CODE (oe1->op) != SSA_NAME)
1860         continue;
1861       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1862       if (!is_gimple_assign (oe1def))
1863         continue;
1864       dcode = gimple_assign_rhs_code (oe1def);
1865       if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1866         continue;
1867
1868       tree rhs = gimple_assign_rhs1 (oe1def);
1869       tree vec = TREE_OPERAND (rhs, 0);
1870       tree vec_type = TREE_TYPE (vec);
1871
1872       if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1873         continue;
1874
1875       /* Ignore it if target machine can't support this VECTOR type.  */
1876       if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1877         continue;
1878
1879       /* Check const vector type, constrain BIT_FIELD_REF offset and size.  */
1880       if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1881         continue;
1882
1883       tree elem_type = TREE_TYPE (vec_type);
1884       unsigned HOST_WIDE_INT elem_size
1885         = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
1886       if (maybe_ne (bit_field_size (rhs), elem_size))
1887         continue;
1888
1889       unsigned idx;
1890       if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
1891         continue;
1892
1893       /* Ignore it if target machine can't support this type of VECTOR
1894          operation.  */
1895       optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
1896       if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
1897         continue;
1898
1899       bool existed;
1900       v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
1901       if (!existed)
1902         info = new v_info;
1903       info->safe_push (std::make_pair (idx, i));
1904     }
1905
1906   /* At least two VECTOR to combine.  */
1907   if (v_info_map.elements () <= 1)
1908     {
1909       cleanup_vinfo_map (v_info_map);
1910       return false;
1911     }
1912
1913   /* Verify all VECTOR candidates by checking two conditions:
1914        1) sorted offsets are adjacent, no holes.
1915        2) can fill the whole VECTOR perfectly.
1916      And add the valid candidates to a vector for further handling.  */
1917   auto_vec<tree> valid_vecs (v_info_map.elements ());
1918   for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
1919        it != v_info_map.end (); ++it)
1920     {
1921       tree cand_vec = (*it).first;
1922       v_info_ptr cand_info = (*it).second;
1923       unsigned int num_elems = VECTOR_CST_NELTS (cand_vec).to_constant ();
1924       if (cand_info->length () != num_elems)
1925         continue;
1926       sbitmap holes = sbitmap_alloc (num_elems);
1927       bitmap_ones (holes);
1928       bool valid = true;
1929       v_info_elem *curr;
1930       FOR_EACH_VEC_ELT (*cand_info, i, curr)
1931         {
1932           if (!bitmap_bit_p (holes, curr->first))
1933             {
1934               valid = false;
1935               break;
1936             }
1937           else
1938             bitmap_clear_bit (holes, curr->first);
1939         }
1940       if (valid && bitmap_empty_p (holes))
1941         valid_vecs.quick_push (cand_vec);
1942       sbitmap_free (holes);
1943     }
1944
1945   /* At least two VECTOR to combine.  */
1946   if (valid_vecs.length () <= 1)
1947     {
1948       cleanup_vinfo_map (v_info_map);
1949       return false;
1950     }
1951
1952   valid_vecs.qsort (sort_by_mach_mode);
1953   /* Go through all candidates by machine mode order, query the mode_to_total
1954      to get the total number for each mode and skip the single one.  */
1955   for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
1956     {
1957       tree tvec = valid_vecs[i];
1958       enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
1959
1960       /* Skip modes with only a single candidate.  */
1961       if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
1962         continue;
1963
1964       unsigned int idx, j;
1965       gimple *sum = NULL;
1966       v_info_ptr info_ptr;
1967       tree sum_vec = tvec;
1968       v_info_elem *elem;
1969
1970       /* Build the sum for all candidates with same mode.  */
1971       do
1972         {
1973           sum = build_and_add_sum (TREE_TYPE (sum_vec), sum_vec,
1974                                    valid_vecs[i + 1], opcode);
1975           sum_vec = gimple_get_lhs (sum);
1976           info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
1977           /* Update those related ops of current candidate VECTOR.  */
1978           FOR_EACH_VEC_ELT (*info_ptr, j, elem)
1979             {
1980               idx = elem->second;
1981               gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
1982               /* Set this then op definition will get DCEd later.  */
1983               gimple_set_visited (def, true);
1984               if (opcode == PLUS_EXPR || opcode == BIT_XOR_EXPR
1985                   || opcode == BIT_IOR_EXPR)
1986                 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
1987               else if (opcode == MULT_EXPR)
1988                 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
1989               else
1990                 {
1991                   gcc_assert (opcode == BIT_AND_EXPR);
1992                   (*ops)[idx]->op
1993                     = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
1994                 }
1995               (*ops)[idx]->rank = 0;
1996             }
1997           if (dump_file && (dump_flags & TDF_DETAILS))
1998             {
1999               fprintf (dump_file, "Generating addition -> ");
2000               print_gimple_stmt (dump_file, sum, 0);
2001             }
2002           i++;
2003         }
2004       while ((i < valid_vecs.length () - 1)
2005              && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2006
2007       /* Referring to first valid VECTOR with this mode, generate the
2008          BIT_FIELD_REF statements accordingly.  */
2009       info_ptr = *(v_info_map.get (tvec));
2010       gcc_assert (sum);
2011       tree elem_type = TREE_TYPE (TREE_TYPE (tvec));
2012       FOR_EACH_VEC_ELT (*info_ptr, j, elem)
2013         {
2014           idx = elem->second;
2015           tree dst = make_ssa_name (elem_type);
2016           gimple *gs = gimple_build_assign (
2017             dst, BIT_FIELD_REF,
2018             build3 (BIT_FIELD_REF, elem_type, sum_vec, TYPE_SIZE (elem_type),
2019                     bitsize_int (elem->first
2020                                  * tree_to_uhwi (TYPE_SIZE (elem_type)))));
2021           insert_stmt_after (gs, sum);
2022           gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2023           /* Set this then op definition will get DCEd later.  */
2024           gimple_set_visited (def, true);
2025           (*ops)[idx]->op = gimple_assign_lhs (gs);
2026           (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2027           if (dump_file && (dump_flags & TDF_DETAILS))
2028             {
2029               fprintf (dump_file, "Generating bit_field_ref -> ");
2030               print_gimple_stmt (dump_file, gs, 0);
2031             }
2032         }
2033     }
2034
2035   if (dump_file && (dump_flags & TDF_DETAILS))
2036     fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2037
2038   cleanup_vinfo_map (v_info_map);
2039
2040   return true;
2041 }
2042
2043 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2044    expression, examine the other OPS to see if any of them are comparisons
2045    of the same values, which we may be able to combine or eliminate.
2046    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
2047
2048 static bool
2049 eliminate_redundant_comparison (enum tree_code opcode,
2050                                 vec<operand_entry *> *ops,
2051                                 unsigned int currindex,
2052                                 operand_entry *curr)
2053 {
2054   tree op1, op2;
2055   enum tree_code lcode, rcode;
2056   gimple *def1, *def2;
2057   int i;
2058   operand_entry *oe;
2059
2060   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2061     return false;
2062
2063   /* Check that CURR is a comparison.  */
2064   if (TREE_CODE (curr->op) != SSA_NAME)
2065     return false;
2066   def1 = SSA_NAME_DEF_STMT (curr->op);
2067   if (!is_gimple_assign (def1))
2068     return false;
2069   lcode = gimple_assign_rhs_code (def1);
2070   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2071     return false;
2072   op1 = gimple_assign_rhs1 (def1);
2073   op2 = gimple_assign_rhs2 (def1);
2074
2075   /* Now look for a similar comparison in the remaining OPS.  */
2076   for (i = currindex + 1; ops->iterate (i, &oe); i++)
2077     {
2078       tree t;
2079
2080       if (TREE_CODE (oe->op) != SSA_NAME)
2081         continue;
2082       def2 = SSA_NAME_DEF_STMT (oe->op);
2083       if (!is_gimple_assign (def2))
2084         continue;
2085       rcode = gimple_assign_rhs_code (def2);
2086       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2087         continue;
2088
2089       /* If we got here, we have a match.  See if we can combine the
2090          two comparisons.  */
2091       if (opcode == BIT_IOR_EXPR)
2092         t = maybe_fold_or_comparisons (lcode, op1, op2,
2093                                        rcode, gimple_assign_rhs1 (def2),
2094                                        gimple_assign_rhs2 (def2));
2095       else
2096         t = maybe_fold_and_comparisons (lcode, op1, op2,
2097                                         rcode, gimple_assign_rhs1 (def2),
2098                                         gimple_assign_rhs2 (def2));
2099       if (!t)
2100         continue;
2101
2102       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2103          always give us a boolean_type_node value back.  If the original
2104          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2105          we need to convert.  */
2106       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2107         t = fold_convert (TREE_TYPE (curr->op), t);
2108
2109       if (TREE_CODE (t) != INTEGER_CST
2110           && !operand_equal_p (t, curr->op, 0))
2111         {
2112           enum tree_code subcode;
2113           tree newop1, newop2;
2114           if (!COMPARISON_CLASS_P (t))
2115             continue;
2116           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2117           STRIP_USELESS_TYPE_CONVERSION (newop1);
2118           STRIP_USELESS_TYPE_CONVERSION (newop2);
2119           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2120             continue;
2121         }
2122
2123       if (dump_file && (dump_flags & TDF_DETAILS))
2124         {
2125           fprintf (dump_file, "Equivalence: ");
2126           print_generic_expr (dump_file, curr->op);
2127           fprintf (dump_file, " %s ", op_symbol_code (opcode));
2128           print_generic_expr (dump_file, oe->op);
2129           fprintf (dump_file, " -> ");
2130           print_generic_expr (dump_file, t);
2131           fprintf (dump_file, "\n");
2132         }
2133
2134       /* Now we can delete oe, as it has been subsumed by the new combined
2135          expression t.  */
2136       ops->ordered_remove (i);
2137       reassociate_stats.ops_eliminated ++;
2138
2139       /* If t is the same as curr->op, we're done.  Otherwise we must
2140          replace curr->op with t.  Special case is if we got a constant
2141          back, in which case we add it to the end instead of in place of
2142          the current entry.  */
2143       if (TREE_CODE (t) == INTEGER_CST)
2144         {
2145           ops->ordered_remove (currindex);
2146           add_to_ops_vec (ops, t);
2147         }
2148       else if (!operand_equal_p (t, curr->op, 0))
2149         {
2150           gimple *sum;
2151           enum tree_code subcode;
2152           tree newop1;
2153           tree newop2;
2154           gcc_assert (COMPARISON_CLASS_P (t));
2155           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2156           STRIP_USELESS_TYPE_CONVERSION (newop1);
2157           STRIP_USELESS_TYPE_CONVERSION (newop2);
2158           gcc_checking_assert (is_gimple_val (newop1)
2159                                && is_gimple_val (newop2));
2160           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2161           curr->op = gimple_get_lhs (sum);
2162         }
2163       return true;
2164     }
2165
2166   return false;
2167 }
2168
2169
2170 /* Transform repeated addition of same values into multiply with
2171    constant.  */
2172 static bool
2173 transform_add_to_multiply (vec<operand_entry *> *ops)
2174 {
2175   operand_entry *oe;
2176   tree op = NULL_TREE;
2177   int j;
2178   int i, start = -1, end = 0, count = 0;
2179   auto_vec<std::pair <int, int> > indxs;
2180   bool changed = false;
2181
2182   if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2183       && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2184           || !flag_unsafe_math_optimizations))
2185     return false;
2186
2187   /* Look for repeated operands.  */
2188   FOR_EACH_VEC_ELT (*ops, i, oe)
2189     {
2190       if (start == -1)
2191         {
2192           count = 1;
2193           op = oe->op;
2194           start = i;
2195         }
2196       else if (operand_equal_p (oe->op, op, 0))
2197         {
2198           count++;
2199           end = i;
2200         }
2201       else
2202         {
2203           if (count > 1)
2204             indxs.safe_push (std::make_pair (start, end));
2205           count = 1;
2206           op = oe->op;
2207           start = i;
2208         }
2209     }
2210
2211   if (count > 1)
2212     indxs.safe_push (std::make_pair (start, end));
2213
2214   for (j = indxs.length () - 1; j >= 0; --j)
2215     {
2216       /* Convert repeated operand addition to multiplication.  */
2217       start = indxs[j].first;
2218       end = indxs[j].second;
2219       op = (*ops)[start]->op;
2220       count = end - start + 1;
2221       for (i = end; i >= start; --i)
2222         ops->unordered_remove (i);
2223       tree tmp = make_ssa_name (TREE_TYPE (op));
2224       tree cst = build_int_cst (integer_type_node, count);
2225       gassign *mul_stmt
2226         = gimple_build_assign (tmp, MULT_EXPR,
2227                                op, fold_convert (TREE_TYPE (op), cst));
2228       gimple_set_visited (mul_stmt, true);
2229       add_to_ops_vec (ops, tmp, mul_stmt);
2230       changed = true;
2231     }
2232
2233   return changed;
2234 }
2235
2236
2237 /* Perform various identities and other optimizations on the list of
2238    operand entries, stored in OPS.  The tree code for the binary
2239    operation between all the operands is OPCODE.  */
2240
2241 static void
2242 optimize_ops_list (enum tree_code opcode,
2243                    vec<operand_entry *> *ops)
2244 {
2245   unsigned int length = ops->length ();
2246   unsigned int i;
2247   operand_entry *oe;
2248   operand_entry *oelast = NULL;
2249   bool iterate = false;
2250
2251   if (length == 1)
2252     return;
2253
2254   oelast = ops->last ();
2255
2256   /* If the last two are constants, pop the constants off, merge them
2257      and try the next two.  */
2258   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2259     {
2260       operand_entry *oelm1 = (*ops)[length - 2];
2261
2262       if (oelm1->rank == 0
2263           && is_gimple_min_invariant (oelm1->op)
2264           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2265                                        TREE_TYPE (oelast->op)))
2266         {
2267           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2268                                      oelm1->op, oelast->op);
2269
2270           if (folded && is_gimple_min_invariant (folded))
2271             {
2272               if (dump_file && (dump_flags & TDF_DETAILS))
2273                 fprintf (dump_file, "Merging constants\n");
2274
2275               ops->pop ();
2276               ops->pop ();
2277
2278               add_to_ops_vec (ops, folded);
2279               reassociate_stats.constants_eliminated++;
2280
2281               optimize_ops_list (opcode, ops);
2282               return;
2283             }
2284         }
2285     }
2286
2287   eliminate_using_constants (opcode, ops);
2288   oelast = NULL;
2289
2290   for (i = 0; ops->iterate (i, &oe);)
2291     {
2292       bool done = false;
2293
2294       if (eliminate_not_pairs (opcode, ops, i, oe))
2295         return;
2296       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2297           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2298           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2299         {
2300           if (done)
2301             return;
2302           iterate = true;
2303           oelast = NULL;
2304           continue;
2305         }
2306       oelast = oe;
2307       i++;
2308     }
2309
2310   if (iterate)
2311     optimize_ops_list (opcode, ops);
2312 }
2313
2314 /* The following functions are subroutines to optimize_range_tests and allow
2315    it to try to change a logical combination of comparisons into a range
2316    test.
2317
2318    For example, both
2319         X == 2 || X == 5 || X == 3 || X == 4
2320    and
2321         X >= 2 && X <= 5
2322    are converted to
2323         (unsigned) (X - 2) <= 3
2324
2325    For more information see comments above fold_test_range in fold-const.c,
2326    this implementation is for GIMPLE.  */
2327
2328 struct range_entry
2329 {
2330   tree exp;
2331   tree low;
2332   tree high;
2333   bool in_p;
2334   bool strict_overflow_p;
2335   unsigned int idx, next;
2336 };
2337
2338 /* This is similar to make_range in fold-const.c, but on top of
2339    GIMPLE instead of trees.  If EXP is non-NULL, it should be
2340    an SSA_NAME and STMT argument is ignored, otherwise STMT
2341    argument should be a GIMPLE_COND.  */
2342
2343 static void
2344 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2345 {
2346   int in_p;
2347   tree low, high;
2348   bool is_bool, strict_overflow_p;
2349
2350   r->exp = NULL_TREE;
2351   r->in_p = false;
2352   r->strict_overflow_p = false;
2353   r->low = NULL_TREE;
2354   r->high = NULL_TREE;
2355   if (exp != NULL_TREE
2356       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2357     return;
2358
2359   /* Start with simply saying "EXP != 0" and then look at the code of EXP
2360      and see if we can refine the range.  Some of the cases below may not
2361      happen, but it doesn't seem worth worrying about this.  We "continue"
2362      the outer loop when we've changed something; otherwise we "break"
2363      the switch, which will "break" the while.  */
2364   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2365   high = low;
2366   in_p = 0;
2367   strict_overflow_p = false;
2368   is_bool = false;
2369   if (exp == NULL_TREE)
2370     is_bool = true;
2371   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2372     {
2373       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2374         is_bool = true;
2375       else
2376         return;
2377     }
2378   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2379     is_bool = true;
2380
2381   while (1)
2382     {
2383       enum tree_code code;
2384       tree arg0, arg1, exp_type;
2385       tree nexp;
2386       location_t loc;
2387
2388       if (exp != NULL_TREE)
2389         {
2390           if (TREE_CODE (exp) != SSA_NAME
2391               || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2392             break;
2393
2394           stmt = SSA_NAME_DEF_STMT (exp);
2395           if (!is_gimple_assign (stmt))
2396             break;
2397
2398           code = gimple_assign_rhs_code (stmt);
2399           arg0 = gimple_assign_rhs1 (stmt);
2400           arg1 = gimple_assign_rhs2 (stmt);
2401           exp_type = TREE_TYPE (exp);
2402         }
2403       else
2404         {
2405           code = gimple_cond_code (stmt);
2406           arg0 = gimple_cond_lhs (stmt);
2407           arg1 = gimple_cond_rhs (stmt);
2408           exp_type = boolean_type_node;
2409         }
2410
2411       if (TREE_CODE (arg0) != SSA_NAME
2412           || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2413         break;
2414       loc = gimple_location (stmt);
2415       switch (code)
2416         {
2417         case BIT_NOT_EXPR:
2418           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2419               /* Ensure the range is either +[-,0], +[0,0],
2420                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2421                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
2422                  or similar expression of unconditional true or
2423                  false, it should not be negated.  */
2424               && ((high && integer_zerop (high))
2425                   || (low && integer_onep (low))))
2426             {
2427               in_p = !in_p;
2428               exp = arg0;
2429               continue;
2430             }
2431           break;
2432         case SSA_NAME:
2433           exp = arg0;
2434           continue;
2435         CASE_CONVERT:
2436           if (is_bool)
2437             {
2438               if ((TYPE_PRECISION (exp_type) == 1
2439                    || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2440                   && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2441                 return;
2442             }
2443           else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2444             {
2445               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2446                 is_bool = true;
2447               else
2448                 return;
2449             }
2450           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2451             is_bool = true;
2452           goto do_default;
2453         case EQ_EXPR:
2454         case NE_EXPR:
2455         case LT_EXPR:
2456         case LE_EXPR:
2457         case GE_EXPR:
2458         case GT_EXPR:
2459           is_bool = true;
2460           /* FALLTHRU */
2461         default:
2462           if (!is_bool)
2463             return;
2464         do_default:
2465           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2466                                   &low, &high, &in_p,
2467                                   &strict_overflow_p);
2468           if (nexp != NULL_TREE)
2469             {
2470               exp = nexp;
2471               gcc_assert (TREE_CODE (exp) == SSA_NAME);
2472               continue;
2473             }
2474           break;
2475         }
2476       break;
2477     }
2478   if (is_bool)
2479     {
2480       r->exp = exp;
2481       r->in_p = in_p;
2482       r->low = low;
2483       r->high = high;
2484       r->strict_overflow_p = strict_overflow_p;
2485     }
2486 }
2487
2488 /* Comparison function for qsort.  Sort entries
2489    without SSA_NAME exp first, then with SSA_NAMEs sorted
2490    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2491    by increasing ->low and if ->low is the same, by increasing
2492    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
2493    maximum.  */
2494
2495 static int
2496 range_entry_cmp (const void *a, const void *b)
2497 {
2498   const struct range_entry *p = (const struct range_entry *) a;
2499   const struct range_entry *q = (const struct range_entry *) b;
2500
2501   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2502     {
2503       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2504         {
2505           /* Group range_entries for the same SSA_NAME together.  */
2506           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2507             return -1;
2508           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2509             return 1;
2510           /* If ->low is different, NULL low goes first, then by
2511              ascending low.  */
2512           if (p->low != NULL_TREE)
2513             {
2514               if (q->low != NULL_TREE)
2515                 {
2516                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2517                                           p->low, q->low);
2518                   if (tem && integer_onep (tem))
2519                     return -1;
2520                   tem = fold_binary (GT_EXPR, boolean_type_node,
2521                                      p->low, q->low);
2522                   if (tem && integer_onep (tem))
2523                     return 1;
2524                 }
2525               else
2526                 return 1;
2527             }
2528           else if (q->low != NULL_TREE)
2529             return -1;
2530           /* If ->high is different, NULL high goes last, before that by
2531              ascending high.  */
2532           if (p->high != NULL_TREE)
2533             {
2534               if (q->high != NULL_TREE)
2535                 {
2536                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2537                                           p->high, q->high);
2538                   if (tem && integer_onep (tem))
2539                     return -1;
2540                   tem = fold_binary (GT_EXPR, boolean_type_node,
2541                                      p->high, q->high);
2542                   if (tem && integer_onep (tem))
2543                     return 1;
2544                 }
2545               else
2546                 return -1;
2547             }
2548           else if (q->high != NULL_TREE)
2549             return 1;
2550           /* If both ranges are the same, sort below by ascending idx.  */
2551         }
2552       else
2553         return 1;
2554     }
2555   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2556     return -1;
2557
2558   if (p->idx < q->idx)
2559     return -1;
2560   else
2561     {
2562       gcc_checking_assert (p->idx > q->idx);
2563       return 1;
2564     }
2565 }
2566
2567 /* Helper function for update_range_test.  Force EXPR into an SSA_NAME,
2568    insert needed statements BEFORE or after GSI.  */
2569
2570 static tree
2571 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2572 {
2573   enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2574   tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2575   if (TREE_CODE (ret) != SSA_NAME)
2576     {
2577       gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2578       if (before)
2579         gsi_insert_before (gsi, g, GSI_SAME_STMT);
2580       else
2581         gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2582       ret = gimple_assign_lhs (g);
2583     }
2584   return ret;
2585 }
2586
2587 /* Helper routine of optimize_range_test.
2588    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2589    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2590    OPCODE and OPS are arguments of optimize_range_tests.  If OTHERRANGE
2591    is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2592    an array of COUNT pointers to other ranges.  Return
2593    true if the range merge has been successful.
2594    If OPCODE is ERROR_MARK, this is called from within
2595    maybe_optimize_range_tests and is performing inter-bb range optimization.
2596    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2597    oe->rank.  */
2598
2599 static bool
2600 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2601                    struct range_entry **otherrangep,
2602                    unsigned int count, enum tree_code opcode,
2603                    vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2604                    bool in_p, tree low, tree high, bool strict_overflow_p)
2605 {
2606   operand_entry *oe = (*ops)[range->idx];
2607   tree op = oe->op;
2608   gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2609                     : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2610   location_t loc = gimple_location (stmt);
2611   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2612   tree tem = build_range_check (loc, optype, unshare_expr (exp),
2613                                 in_p, low, high);
2614   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2615   gimple_stmt_iterator gsi;
2616   unsigned int i, uid;
2617
2618   if (tem == NULL_TREE)
2619     return false;
2620
2621   /* If op is default def SSA_NAME, there is no place to insert the
2622      new comparison.  Give up, unless we can use OP itself as the
2623      range test.  */
2624   if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2625     {
2626       if (op == range->exp
2627           && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2628               || TREE_CODE (optype) == BOOLEAN_TYPE)
2629           && (op == tem
2630               || (TREE_CODE (tem) == EQ_EXPR
2631                   && TREE_OPERAND (tem, 0) == op
2632                   && integer_onep (TREE_OPERAND (tem, 1))))
2633           && opcode != BIT_IOR_EXPR
2634           && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2635         {
2636           stmt = NULL;
2637           tem = op;
2638         }
2639       else
2640         return false;
2641     }
2642
2643   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2644     warning_at (loc, OPT_Wstrict_overflow,
2645                 "assuming signed overflow does not occur "
2646                 "when simplifying range test");
2647
2648   if (dump_file && (dump_flags & TDF_DETAILS))
2649     {
2650       struct range_entry *r;
2651       fprintf (dump_file, "Optimizing range tests ");
2652       print_generic_expr (dump_file, range->exp);
2653       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2654       print_generic_expr (dump_file, range->low);
2655       fprintf (dump_file, ", ");
2656       print_generic_expr (dump_file, range->high);
2657       fprintf (dump_file, "]");
2658       for (i = 0; i < count; i++)
2659         {
2660           if (otherrange)
2661             r = otherrange + i;
2662           else
2663             r = otherrangep[i];
2664           if (r->exp
2665               && r->exp != range->exp
2666               && TREE_CODE (r->exp) == SSA_NAME)
2667             {
2668               fprintf (dump_file, " and ");
2669               print_generic_expr (dump_file, r->exp);
2670             }
2671           else
2672             fprintf (dump_file, " and");
2673           fprintf (dump_file, " %c[", r->in_p ? '+' : '-');
2674           print_generic_expr (dump_file, r->low);
2675           fprintf (dump_file, ", ");
2676           print_generic_expr (dump_file, r->high);
2677           fprintf (dump_file, "]");
2678         }
2679       fprintf (dump_file, "\n into ");
2680       print_generic_expr (dump_file, tem);
2681       fprintf (dump_file, "\n");
2682     }
2683
2684   if (opcode == BIT_IOR_EXPR
2685       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2686     tem = invert_truthvalue_loc (loc, tem);
2687
2688   tem = fold_convert_loc (loc, optype, tem);
2689   if (stmt)
2690     {
2691       gsi = gsi_for_stmt (stmt);
2692       uid = gimple_uid (stmt);
2693     }
2694   else
2695     {
2696       gsi = gsi_none ();
2697       uid = 0;
2698     }
2699   if (stmt == NULL)
2700     gcc_checking_assert (tem == op);
2701   /* In rare cases range->exp can be equal to lhs of stmt.
2702      In that case we have to insert after the stmt rather then before
2703      it.  If stmt is a PHI, insert it at the start of the basic block.  */
2704   else if (op != range->exp)
2705     {
2706       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2707       tem = force_into_ssa_name (&gsi, tem, true);
2708       gsi_prev (&gsi);
2709     }
2710   else if (gimple_code (stmt) != GIMPLE_PHI)
2711     {
2712       gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2713       tem = force_into_ssa_name (&gsi, tem, false);
2714     }
2715   else
2716     {
2717       gsi = gsi_after_labels (gimple_bb (stmt));
2718       if (!gsi_end_p (gsi))
2719         uid = gimple_uid (gsi_stmt (gsi));
2720       else
2721         {
2722           gsi = gsi_start_bb (gimple_bb (stmt));
2723           uid = 1;
2724           while (!gsi_end_p (gsi))
2725             {
2726               uid = gimple_uid (gsi_stmt (gsi));
2727               gsi_next (&gsi);
2728             }
2729         }
2730       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2731       tem = force_into_ssa_name (&gsi, tem, true);
2732       if (gsi_end_p (gsi))
2733         gsi = gsi_last_bb (gimple_bb (stmt));
2734       else
2735         gsi_prev (&gsi);
2736     }
2737   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2738     if (gimple_uid (gsi_stmt (gsi)))
2739       break;
2740     else
2741       gimple_set_uid (gsi_stmt (gsi), uid);
2742
2743   oe->op = tem;
2744   range->exp = exp;
2745   range->low = low;
2746   range->high = high;
2747   range->in_p = in_p;
2748   range->strict_overflow_p = false;
2749
2750   for (i = 0; i < count; i++)
2751     {
2752       if (otherrange)
2753         range = otherrange + i;
2754       else
2755         range = otherrangep[i];
2756       oe = (*ops)[range->idx];
2757       /* Now change all the other range test immediate uses, so that
2758          those tests will be optimized away.  */
2759       if (opcode == ERROR_MARK)
2760         {
2761           if (oe->op)
2762             oe->op = build_int_cst (TREE_TYPE (oe->op),
2763                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2764           else
2765             oe->op = (oe->rank == BIT_IOR_EXPR
2766                       ? boolean_false_node : boolean_true_node);
2767         }
2768       else
2769         oe->op = error_mark_node;
2770       range->exp = NULL_TREE;
2771       range->low = NULL_TREE;
2772       range->high = NULL_TREE;
2773     }
2774   return true;
2775 }
2776
2777 /* Optimize X == CST1 || X == CST2
2778    if popcount (CST1 ^ CST2) == 1 into
2779    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2780    Similarly for ranges.  E.g.
2781    X != 2 && X != 3 && X != 10 && X != 11
2782    will be transformed by the previous optimization into
2783    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2784    and this loop can transform that into
2785    !(((X & ~8) - 2U) <= 1U).  */
2786
2787 static bool
2788 optimize_range_tests_xor (enum tree_code opcode, tree type,
2789                           tree lowi, tree lowj, tree highi, tree highj,
2790                           vec<operand_entry *> *ops,
2791                           struct range_entry *rangei,
2792                           struct range_entry *rangej)
2793 {
2794   tree lowxor, highxor, tem, exp;
2795   /* Check lowi ^ lowj == highi ^ highj and
2796      popcount (lowi ^ lowj) == 1.  */
2797   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2798   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2799     return false;
2800   if (!integer_pow2p (lowxor))
2801     return false;
2802   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2803   if (!tree_int_cst_equal (lowxor, highxor))
2804     return false;
2805
2806   exp = rangei->exp;
2807   scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2808   int prec = GET_MODE_PRECISION (mode);
2809   if (TYPE_PRECISION (type) < prec
2810       || (wi::to_wide (TYPE_MIN_VALUE (type))
2811           != wi::min_value (prec, TYPE_SIGN (type)))
2812       || (wi::to_wide (TYPE_MAX_VALUE (type))
2813           != wi::max_value (prec, TYPE_SIGN (type))))
2814     {
2815       type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
2816       exp = fold_convert (type, exp);
2817       lowxor = fold_convert (type, lowxor);
2818       lowi = fold_convert (type, lowi);
2819       highi = fold_convert (type, highi);
2820     }
2821   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2822   exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
2823   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2824   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2825   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2826                          NULL, rangei->in_p, lowj, highj,
2827                          rangei->strict_overflow_p
2828                          || rangej->strict_overflow_p))
2829     return true;
2830   return false;
2831 }
2832
2833 /* Optimize X == CST1 || X == CST2
2834    if popcount (CST2 - CST1) == 1 into
2835    ((X - CST1) & ~(CST2 - CST1)) == 0.
2836    Similarly for ranges.  E.g.
2837    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2838    || X == 75 || X == 45
2839    will be transformed by the previous optimization into
2840    (X - 43U) <= 3U || (X - 75U) <= 3U
2841    and this loop can transform that into
2842    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2843 static bool
2844 optimize_range_tests_diff (enum tree_code opcode, tree type,
2845                            tree lowi, tree lowj, tree highi, tree highj,
2846                            vec<operand_entry *> *ops,
2847                            struct range_entry *rangei,
2848                            struct range_entry *rangej)
2849 {
2850   tree tem1, tem2, mask;
2851   /* Check highi - lowi == highj - lowj.  */
2852   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2853   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2854     return false;
2855   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2856   if (!tree_int_cst_equal (tem1, tem2))
2857     return false;
2858   /* Check popcount (lowj - lowi) == 1.  */
2859   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2860   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2861     return false;
2862   if (!integer_pow2p (tem1))
2863     return false;
2864
2865   scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2866   int prec = GET_MODE_PRECISION (mode);
2867   if (TYPE_PRECISION (type) < prec
2868       || (wi::to_wide (TYPE_MIN_VALUE (type))
2869           != wi::min_value (prec, TYPE_SIGN (type)))
2870       || (wi::to_wide (TYPE_MAX_VALUE (type))
2871           != wi::max_value (prec, TYPE_SIGN (type))))
2872     type = build_nonstandard_integer_type (prec, 1);
2873   else
2874     type = unsigned_type_for (type);
2875   tem1 = fold_convert (type, tem1);
2876   tem2 = fold_convert (type, tem2);
2877   lowi = fold_convert (type, lowi);
2878   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2879   tem1 = fold_build2 (MINUS_EXPR, type,
2880                       fold_convert (type, rangei->exp), lowi);
2881   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2882   lowj = build_int_cst (type, 0);
2883   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2884                          NULL, rangei->in_p, lowj, tem2,
2885                          rangei->strict_overflow_p
2886                          || rangej->strict_overflow_p))
2887     return true;
2888   return false;
2889 }
2890
2891 /* It does some common checks for function optimize_range_tests_xor and
2892    optimize_range_tests_diff.
2893    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2894    Else it calls optimize_range_tests_diff.  */
2895
2896 static bool
2897 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2898                         bool optimize_xor, vec<operand_entry *> *ops,
2899                         struct range_entry *ranges)
2900 {
2901   int i, j;
2902   bool any_changes = false;
2903   for (i = first; i < length; i++)
2904     {
2905       tree lowi, highi, lowj, highj, type, tem;
2906
2907       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2908         continue;
2909       type = TREE_TYPE (ranges[i].exp);
2910       if (!INTEGRAL_TYPE_P (type))
2911         continue;
2912       lowi = ranges[i].low;
2913       if (lowi == NULL_TREE)
2914         lowi = TYPE_MIN_VALUE (type);
2915       highi = ranges[i].high;
2916       if (highi == NULL_TREE)
2917         continue;
2918       for (j = i + 1; j < length && j < i + 64; j++)
2919         {
2920           bool changes;
2921           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2922             continue;
2923           lowj = ranges[j].low;
2924           if (lowj == NULL_TREE)
2925             continue;
2926           highj = ranges[j].high;
2927           if (highj == NULL_TREE)
2928             highj = TYPE_MAX_VALUE (type);
2929           /* Check lowj > highi.  */
2930           tem = fold_binary (GT_EXPR, boolean_type_node,
2931                              lowj, highi);
2932           if (tem == NULL_TREE || !integer_onep (tem))
2933             continue;
2934           if (optimize_xor)
2935             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2936                                                 highi, highj, ops,
2937                                                 ranges + i, ranges + j);
2938           else
2939             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2940                                                  highi, highj, ops,
2941                                                  ranges + i, ranges + j);
2942           if (changes)
2943             {
2944               any_changes = true;
2945               break;
2946             }
2947         }
2948     }
2949   return any_changes;
2950 }
2951
2952 /* Helper function of optimize_range_tests_to_bit_test.  Handle a single
2953    range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2954    EXP on success, NULL otherwise.  */
2955
2956 static tree
2957 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2958                        wide_int *mask, tree *totallowp)
2959 {
2960   tree tem = int_const_binop (MINUS_EXPR, high, low);
2961   if (tem == NULL_TREE
2962       || TREE_CODE (tem) != INTEGER_CST
2963       || TREE_OVERFLOW (tem)
2964       || tree_int_cst_sgn (tem) == -1
2965       || compare_tree_int (tem, prec) != -1)
2966     return NULL_TREE;
2967
2968   unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2969   *mask = wi::shifted_mask (0, max, false, prec);
2970   if (TREE_CODE (exp) == BIT_AND_EXPR
2971       && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2972     {
2973       widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2974       msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2975       if (wi::popcount (msk) == 1
2976           && wi::ltu_p (msk, prec - max))
2977         {
2978           *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2979           max += msk.to_uhwi ();
2980           exp = TREE_OPERAND (exp, 0);
2981           if (integer_zerop (low)
2982               && TREE_CODE (exp) == PLUS_EXPR
2983               && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2984             {
2985               tree ret = TREE_OPERAND (exp, 0);
2986               STRIP_NOPS (ret);
2987               widest_int bias
2988                 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2989                                      TYPE_PRECISION (TREE_TYPE (low))));
2990               tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2991               if (totallowp)
2992                 {
2993                   *totallowp = tbias;
2994                   return ret;
2995                 }
2996               else if (!tree_int_cst_lt (totallow, tbias))
2997                 return NULL_TREE;
2998               bias = wi::to_widest (tbias);
2999               bias -= wi::to_widest (totallow);
3000               if (bias >= 0 && bias < prec - max)
3001                 {
3002                   *mask = wi::lshift (*mask, bias);
3003                   return ret;
3004                 }
3005             }
3006         }
3007     }
3008   if (totallowp)
3009     return exp;
3010   if (!tree_int_cst_lt (totallow, low))
3011     return exp;
3012   tem = int_const_binop (MINUS_EXPR, low, totallow);
3013   if (tem == NULL_TREE
3014       || TREE_CODE (tem) != INTEGER_CST
3015       || TREE_OVERFLOW (tem)
3016       || compare_tree_int (tem, prec - max) == 1)
3017     return NULL_TREE;
3018
3019   *mask = wi::lshift (*mask, wi::to_widest (tem));
3020   return exp;
3021 }
3022
3023 /* Attempt to optimize small range tests using bit test.
3024    E.g.
3025    X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3026    && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3027    has been by earlier optimizations optimized into:
3028    ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3029    As all the 43 through 82 range is less than 64 numbers,
3030    for 64-bit word targets optimize that into:
3031    (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0  */
3032
3033 static bool
3034 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3035                                   vec<operand_entry *> *ops,
3036                                   struct range_entry *ranges)
3037 {
3038   int i, j;
3039   bool any_changes = false;
3040   int prec = GET_MODE_BITSIZE (word_mode);
3041   auto_vec<struct range_entry *, 64> candidates;
3042
3043   for (i = first; i < length - 2; i++)
3044     {
3045       tree lowi, highi, lowj, highj, type;
3046
3047       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3048         continue;
3049       type = TREE_TYPE (ranges[i].exp);
3050       if (!INTEGRAL_TYPE_P (type))
3051         continue;
3052       lowi = ranges[i].low;
3053       if (lowi == NULL_TREE)
3054         lowi = TYPE_MIN_VALUE (type);
3055       highi = ranges[i].high;
3056       if (highi == NULL_TREE)
3057         continue;
3058       wide_int mask;
3059       tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3060                                         highi, &mask, &lowi);
3061       if (exp == NULL_TREE)
3062         continue;
3063       bool strict_overflow_p = ranges[i].strict_overflow_p;
3064       candidates.truncate (0);
3065       int end = MIN (i + 64, length);
3066       for (j = i + 1; j < end; j++)
3067         {
3068           tree exp2;
3069           if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3070             continue;
3071           if (ranges[j].exp == exp)
3072             ;
3073           else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3074             {
3075               exp2 = TREE_OPERAND (ranges[j].exp, 0);
3076               if (exp2 == exp)
3077                 ;
3078               else if (TREE_CODE (exp2) == PLUS_EXPR)
3079                 {
3080                   exp2 = TREE_OPERAND (exp2, 0);
3081                   STRIP_NOPS (exp2);
3082                   if (exp2 != exp)
3083                     continue;
3084                 }
3085               else
3086                 continue;
3087             }
3088           else
3089             continue;
3090           lowj = ranges[j].low;
3091           if (lowj == NULL_TREE)
3092             continue;
3093           highj = ranges[j].high;
3094           if (highj == NULL_TREE)
3095             highj = TYPE_MAX_VALUE (type);
3096           wide_int mask2;
3097           exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3098                                         highj, &mask2, NULL);
3099           if (exp2 != exp)
3100             continue;
3101           mask |= mask2;
3102           strict_overflow_p |= ranges[j].strict_overflow_p;
3103           candidates.safe_push (&ranges[j]);
3104         }
3105
3106       /* If we need otherwise 3 or more comparisons, use a bit test.  */
3107       if (candidates.length () >= 2)
3108         {
3109           tree high = wide_int_to_tree (TREE_TYPE (lowi),
3110                                         wi::to_widest (lowi)
3111                                         + prec - 1 - wi::clz (mask));
3112           operand_entry *oe = (*ops)[ranges[i].idx];
3113           tree op = oe->op;
3114           gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3115                             : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3116           location_t loc = gimple_location (stmt);
3117           tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3118
3119           /* See if it isn't cheaper to pretend the minimum value of the
3120              range is 0, if maximum value is small enough.
3121              We can avoid then subtraction of the minimum value, but the
3122              mask constant could be perhaps more expensive.  */
3123           if (compare_tree_int (lowi, 0) > 0
3124               && compare_tree_int (high, prec) < 0)
3125             {
3126               int cost_diff;
3127               HOST_WIDE_INT m = tree_to_uhwi (lowi);
3128               rtx reg = gen_raw_REG (word_mode, 10000);
3129               bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3130               cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
3131                                                       GEN_INT (-m)), speed_p);
3132               rtx r = immed_wide_int_const (mask, word_mode);
3133               cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3134                                          word_mode, speed_p);
3135               r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3136               cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3137                                          word_mode, speed_p);
3138               if (cost_diff > 0)
3139                 {
3140                   mask = wi::lshift (mask, m);
3141                   lowi = build_zero_cst (TREE_TYPE (lowi));
3142                 }
3143             }
3144
3145           tree tem = build_range_check (loc, optype, unshare_expr (exp),
3146                                         false, lowi, high);
3147           if (tem == NULL_TREE || is_gimple_val (tem))
3148             continue;
3149           tree etype = unsigned_type_for (TREE_TYPE (exp));
3150           exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3151                                  fold_convert_loc (loc, etype, exp),
3152                                  fold_convert_loc (loc, etype, lowi));
3153           exp = fold_convert_loc (loc, integer_type_node, exp);
3154           tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3155           exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3156                                  build_int_cst (word_type, 1), exp);
3157           exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3158                                  wide_int_to_tree (word_type, mask));
3159           exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3160                                  build_zero_cst (word_type));
3161           if (is_gimple_val (exp))
3162             continue;
3163
3164           /* The shift might have undefined behavior if TEM is true,
3165              but reassociate_bb isn't prepared to have basic blocks
3166              split when it is running.  So, temporarily emit a code
3167              with BIT_IOR_EXPR instead of &&, and fix it up in
3168              branch_fixup.  */
3169           gimple_seq seq;
3170           tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3171           gcc_assert (TREE_CODE (tem) == SSA_NAME);
3172           gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3173           gimple_seq seq2;
3174           exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3175           gimple_seq_add_seq_without_update (&seq, seq2);
3176           gcc_assert (TREE_CODE (exp) == SSA_NAME);
3177           gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3178           gimple *g = gimple_build_assign (make_ssa_name (optype),
3179                                            BIT_IOR_EXPR, tem, exp);
3180           gimple_set_location (g, loc);
3181           gimple_seq_add_stmt_without_update (&seq, g);
3182           exp = gimple_assign_lhs (g);
3183           tree val = build_zero_cst (optype);
3184           if (update_range_test (&ranges[i], NULL, candidates.address (),
3185                                  candidates.length (), opcode, ops, exp,
3186                                  seq, false, val, val, strict_overflow_p))
3187             {
3188               any_changes = true;
3189               reassoc_branch_fixups.safe_push (tem);
3190             }
3191           else
3192             gimple_seq_discard (seq);
3193         }
3194     }
3195   return any_changes;
3196 }
3197
3198 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3199    and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.  */
3200
3201 static bool
3202 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3203                                   vec<operand_entry *> *ops,
3204                                   struct range_entry *ranges)
3205 {
3206   int i;
3207   unsigned int b;
3208   bool any_changes = false;
3209   auto_vec<int, 128> buckets;
3210   auto_vec<int, 32> chains;
3211   auto_vec<struct range_entry *, 32> candidates;
3212
3213   for (i = first; i < length; i++)
3214     {
3215       if (ranges[i].exp == NULL_TREE
3216           || TREE_CODE (ranges[i].exp) != SSA_NAME
3217           || !ranges[i].in_p
3218           || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3219           || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
3220           || ranges[i].low == NULL_TREE
3221           || ranges[i].low != ranges[i].high)
3222         continue;
3223
3224       bool zero_p = integer_zerop (ranges[i].low);
3225       if (!zero_p && !integer_all_onesp (ranges[i].low))
3226         continue;
3227
3228       b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
3229       if (buckets.length () <= b)
3230         buckets.safe_grow_cleared (b + 1);
3231       if (chains.length () <= (unsigned) i)
3232         chains.safe_grow (i + 1);
3233       chains[i] = buckets[b];
3234       buckets[b] = i + 1;
3235     }
3236
3237   FOR_EACH_VEC_ELT (buckets, b, i)
3238     if (i && chains[i - 1])
3239       {
3240         int j, k = i;
3241         for (j = chains[i - 1]; j; j = chains[j - 1])
3242           {
3243             gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3244             gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3245             if (reassoc_stmt_dominates_stmt_p (gk, gj))
3246               k = j;
3247           }
3248         tree type1 = TREE_TYPE (ranges[k - 1].exp);
3249         tree type2 = NULL_TREE;
3250         bool strict_overflow_p = false;
3251         candidates.truncate (0);
3252         for (j = i; j; j = chains[j - 1])
3253           {
3254             tree type = TREE_TYPE (ranges[j - 1].exp);
3255             strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3256             if (j == k
3257                 || useless_type_conversion_p (type1, type))
3258               ;
3259             else if (type2 == NULL_TREE
3260                      || useless_type_conversion_p (type2, type))
3261               {
3262                 if (type2 == NULL_TREE)
3263                   type2 = type;
3264                 candidates.safe_push (&ranges[j - 1]);
3265               }
3266           }
3267         unsigned l = candidates.length ();
3268         for (j = i; j; j = chains[j - 1])
3269           {
3270             tree type = TREE_TYPE (ranges[j - 1].exp);
3271             if (j == k)
3272               continue;
3273             if (useless_type_conversion_p (type1, type))
3274               ;
3275             else if (type2 == NULL_TREE
3276                      || useless_type_conversion_p (type2, type))
3277               continue;
3278             candidates.safe_push (&ranges[j - 1]);
3279           }
3280         gimple_seq seq = NULL;
3281         tree op = NULL_TREE;
3282         unsigned int id;
3283         struct range_entry *r;
3284         candidates.safe_push (&ranges[k - 1]);
3285         FOR_EACH_VEC_ELT (candidates, id, r)
3286           {
3287             gimple *g;
3288             if (id == 0)
3289               {
3290                 op = r->exp;
3291                 continue;
3292               }
3293             if (id == l)
3294               {
3295                 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3296                 gimple_seq_add_stmt_without_update (&seq, g);
3297                 op = gimple_assign_lhs (g);
3298               }
3299             tree type = TREE_TYPE (r->exp);
3300             tree exp = r->exp;
3301             if (id >= l && !useless_type_conversion_p (type1, type))
3302               {
3303                 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3304                 gimple_seq_add_stmt_without_update (&seq, g);
3305                 exp = gimple_assign_lhs (g);
3306               }
3307             g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3308                                      (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
3309                                      op, exp);
3310             gimple_seq_add_stmt_without_update (&seq, g);
3311             op = gimple_assign_lhs (g);
3312           }
3313         candidates.pop ();
3314         if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3315                                candidates.length (), opcode, ops, op,
3316                                seq, true, ranges[k - 1].low,
3317                                ranges[k - 1].low, strict_overflow_p))
3318           any_changes = true;
3319         else
3320           gimple_seq_discard (seq);
3321       }
3322
3323   return any_changes;
3324 }
3325
3326 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3327    a >= 0 && a < b into (unsigned) a < (unsigned) b
3328    a >= 0 && a <= b into (unsigned) a <= (unsigned) b  */
3329
3330 static bool
3331 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3332                                 vec<operand_entry *> *ops,
3333                                 struct range_entry *ranges,
3334                                 basic_block first_bb)
3335 {
3336   int i;
3337   bool any_changes = false;
3338   hash_map<tree, int> *map = NULL;
3339
3340   for (i = first; i < length; i++)
3341     {
3342       if (ranges[i].exp == NULL_TREE
3343           || TREE_CODE (ranges[i].exp) != SSA_NAME
3344           || !ranges[i].in_p)
3345         continue;
3346
3347       tree type = TREE_TYPE (ranges[i].exp);
3348       if (!INTEGRAL_TYPE_P (type)
3349           || TYPE_UNSIGNED (type)
3350           || ranges[i].low == NULL_TREE
3351           || !integer_zerop (ranges[i].low)
3352           || ranges[i].high != NULL_TREE)
3353         continue;
3354       /* EXP >= 0 here.  */
3355       if (map == NULL)
3356         map = new hash_map <tree, int>;
3357       map->put (ranges[i].exp, i);
3358     }
3359
3360   if (map == NULL)
3361     return false;
3362
3363   for (i = 0; i < length; i++)
3364     {
3365       bool in_p = ranges[i].in_p;
3366       if (ranges[i].low == NULL_TREE
3367           || ranges[i].high == NULL_TREE)
3368         continue;
3369       if (!integer_zerop (ranges[i].low)
3370           || !integer_zerop (ranges[i].high))
3371         {
3372           if (ranges[i].exp
3373               && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3374               && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3375               && integer_onep (ranges[i].low)
3376               && integer_onep (ranges[i].high))
3377             in_p = !in_p;
3378           else
3379             continue;
3380         }
3381
3382       gimple *stmt;
3383       tree_code ccode;
3384       tree rhs1, rhs2;
3385       if (ranges[i].exp)
3386         {
3387           if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3388             continue;
3389           stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3390           if (!is_gimple_assign (stmt))
3391             continue;
3392           ccode = gimple_assign_rhs_code (stmt);
3393           rhs1 = gimple_assign_rhs1 (stmt);
3394           rhs2 = gimple_assign_rhs2 (stmt);
3395         }
3396       else
3397         {
3398           operand_entry *oe = (*ops)[ranges[i].idx];
3399           stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3400           if (gimple_code (stmt) != GIMPLE_COND)
3401             continue;
3402           ccode = gimple_cond_code (stmt);
3403           rhs1 = gimple_cond_lhs (stmt);
3404           rhs2 = gimple_cond_rhs (stmt);
3405         }
3406
3407       if (TREE_CODE (rhs1) != SSA_NAME
3408           || rhs2 == NULL_TREE
3409           || TREE_CODE (rhs2) != SSA_NAME)
3410         continue;
3411
3412       switch (ccode)
3413         {
3414         case GT_EXPR:
3415         case GE_EXPR:
3416         case LT_EXPR:
3417         case LE_EXPR:
3418           break;
3419         default:
3420           continue;
3421         }
3422       if (in_p)
3423         ccode = invert_tree_comparison (ccode, false);
3424       switch (ccode)
3425         {
3426         case GT_EXPR:
3427         case GE_EXPR:
3428           std::swap (rhs1, rhs2);
3429           ccode = swap_tree_comparison (ccode);
3430           break;
3431         case LT_EXPR:
3432         case LE_EXPR:
3433           break;
3434         default:
3435           gcc_unreachable ();
3436         }
3437
3438       int *idx = map->get (rhs1);
3439       if (idx == NULL)
3440         continue;
3441
3442       /* maybe_optimize_range_tests allows statements without side-effects
3443          in the basic blocks as long as they are consumed in the same bb.
3444          Make sure rhs2's def stmt is not among them, otherwise we can't
3445          use safely get_nonzero_bits on it.  E.g. in:
3446           # RANGE [-83, 1] NONZERO 173
3447           # k_32 = PHI <k_47(13), k_12(9)>
3448          ...
3449           if (k_32 >= 0)
3450             goto <bb 5>; [26.46%]
3451           else
3452             goto <bb 9>; [73.54%]
3453
3454           <bb 5> [local count: 140323371]:
3455           # RANGE [0, 1] NONZERO 1
3456           _5 = (int) k_32;
3457           # RANGE [0, 4] NONZERO 4
3458           _21 = _5 << 2;
3459           # RANGE [0, 4] NONZERO 4
3460           iftmp.0_44 = (char) _21;
3461           if (k_32 < iftmp.0_44)
3462             goto <bb 6>; [84.48%]
3463           else
3464             goto <bb 9>; [15.52%]
3465          the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3466          k_32 >= 0.  If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3467          to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3468          those stmts even for negative k_32 and the value ranges would be no
3469          longer guaranteed and so the optimization would be invalid.  */
3470       while (opcode == ERROR_MARK)
3471         {
3472           gimple *g = SSA_NAME_DEF_STMT (rhs2);
3473           basic_block bb2 = gimple_bb (g);
3474           if (bb2
3475               && bb2 != first_bb
3476               && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3477             {
3478               /* As an exception, handle a few common cases.  */
3479               if (gimple_assign_cast_p (g)
3480                   && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3481                 {
3482                   tree op0 = gimple_assign_rhs1 (g);
3483                   if (TYPE_UNSIGNED (TREE_TYPE (op0))
3484                       && (TYPE_PRECISION (TREE_TYPE (rhs2))
3485                           > TYPE_PRECISION (TREE_TYPE (op0))))
3486                     /* Zero-extension is always ok.  */
3487                     break;
3488                   else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3489                            == TYPE_PRECISION (TREE_TYPE (op0))
3490                            && TREE_CODE (op0) == SSA_NAME)
3491                     {
3492                       /* Cast from signed to unsigned or vice versa.  Retry
3493                          with the op0 as new rhs2.  */
3494                       rhs2 = op0;
3495                       continue;
3496                     }
3497                 }
3498               else if (is_gimple_assign (g)
3499                        && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3500                        && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3501                        && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3502                 /* Masking with INTEGER_CST with MSB clear is always ok
3503                    too.  */
3504                 break;
3505               rhs2 = NULL_TREE;
3506             }
3507           break;
3508         }
3509       if (rhs2 == NULL_TREE)
3510         continue;
3511
3512       wide_int nz = get_nonzero_bits (rhs2);
3513       if (wi::neg_p (nz))
3514         continue;
3515
3516       /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3517          and RHS2 is known to be RHS2 >= 0.  */
3518       tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3519
3520       enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3521       if ((ranges[*idx].strict_overflow_p
3522            || ranges[i].strict_overflow_p)
3523           && issue_strict_overflow_warning (wc))
3524         warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3525                     "assuming signed overflow does not occur "
3526                     "when simplifying range test");
3527
3528       if (dump_file && (dump_flags & TDF_DETAILS))
3529         {
3530           struct range_entry *r = &ranges[*idx];
3531           fprintf (dump_file, "Optimizing range test ");
3532           print_generic_expr (dump_file, r->exp);
3533           fprintf (dump_file, " +[");
3534           print_generic_expr (dump_file, r->low);
3535           fprintf (dump_file, ", ");
3536           print_generic_expr (dump_file, r->high);
3537           fprintf (dump_file, "] and comparison ");
3538           print_generic_expr (dump_file, rhs1);
3539           fprintf (dump_file, " %s ", op_symbol_code (ccode));
3540           print_generic_expr (dump_file, rhs2);
3541           fprintf (dump_file, "\n into (");
3542           print_generic_expr (dump_file, utype);
3543           fprintf (dump_file, ") ");
3544           print_generic_expr (dump_file, rhs1);
3545           fprintf (dump_file, " %s (", op_symbol_code (ccode));
3546           print_generic_expr (dump_file, utype);
3547           fprintf (dump_file, ") ");
3548           print_generic_expr (dump_file, rhs2);
3549           fprintf (dump_file, "\n");
3550         }
3551
3552       operand_entry *oe = (*ops)[ranges[i].idx];
3553       ranges[i].in_p = 0;
3554       if (opcode == BIT_IOR_EXPR
3555           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3556         {
3557           ranges[i].in_p = 1;
3558           ccode = invert_tree_comparison (ccode, false);
3559         }
3560
3561       unsigned int uid = gimple_uid (stmt);
3562       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3563       gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3564       gimple_set_uid (g, uid);
3565       rhs1 = gimple_assign_lhs (g);
3566       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3567       if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3568         {
3569           g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3570           gimple_set_uid (g, uid);
3571           rhs2 = gimple_assign_lhs (g);
3572           gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3573         }
3574       if (tree_swap_operands_p (rhs1, rhs2))
3575         {
3576           std::swap (rhs1, rhs2);
3577           ccode = swap_tree_comparison (ccode);
3578         }
3579       if (gimple_code (stmt) == GIMPLE_COND)
3580         {
3581           gcond *c = as_a <gcond *> (stmt);
3582           gimple_cond_set_code (c, ccode);
3583           gimple_cond_set_lhs (c, rhs1);
3584           gimple_cond_set_rhs (c, rhs2);
3585           update_stmt (stmt);
3586         }
3587       else
3588         {
3589           tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3590           if (!INTEGRAL_TYPE_P (ctype)
3591               || (TREE_CODE (ctype) != BOOLEAN_TYPE
3592                   && TYPE_PRECISION (ctype) != 1))
3593             ctype = boolean_type_node;
3594           g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3595           gimple_set_uid (g, uid);
3596           gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3597           if (oe->op && ctype != TREE_TYPE (oe->op))
3598             {
3599               g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3600                                        NOP_EXPR, gimple_assign_lhs (g));
3601               gimple_set_uid (g, uid);
3602               gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3603             }
3604           ranges[i].exp = gimple_assign_lhs (g);
3605           oe->op = ranges[i].exp;
3606           ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3607           ranges[i].high = ranges[i].low;
3608         }
3609       ranges[i].strict_overflow_p = false;
3610       oe = (*ops)[ranges[*idx].idx];
3611       /* Now change all the other range test immediate uses, so that
3612          those tests will be optimized away.  */
3613       if (opcode == ERROR_MARK)
3614         {
3615           if (oe->op)
3616             oe->op = build_int_cst (TREE_TYPE (oe->op),
3617                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
3618           else
3619             oe->op = (oe->rank == BIT_IOR_EXPR
3620                       ? boolean_false_node : boolean_true_node);
3621         }
3622       else
3623         oe->op = error_mark_node;
3624       ranges[*idx].exp = NULL_TREE;
3625       ranges[*idx].low = NULL_TREE;
3626       ranges[*idx].high = NULL_TREE;
3627       any_changes = true;
3628     }
3629
3630   delete map;
3631   return any_changes;
3632 }
3633
3634 /* Optimize range tests, similarly how fold_range_test optimizes
3635    it on trees.  The tree code for the binary
3636    operation between all the operands is OPCODE.
3637    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3638    maybe_optimize_range_tests for inter-bb range optimization.
3639    In that case if oe->op is NULL, oe->id is bb->index whose
3640    GIMPLE_COND is && or ||ed into the test, and oe->rank says
3641    the actual opcode.
3642    FIRST_BB is the first basic block if OPCODE is ERROR_MARK.  */
3643
3644 static bool
3645 optimize_range_tests (enum tree_code opcode,
3646                       vec<operand_entry *> *ops, basic_block first_bb)
3647 {
3648   unsigned int length = ops->length (), i, j, first;
3649   operand_entry *oe;
3650   struct range_entry *ranges;
3651   bool any_changes = false;
3652
3653   if (length == 1)
3654     return false;
3655
3656   ranges = XNEWVEC (struct range_entry, length);
3657   for (i = 0; i < length; i++)
3658     {
3659       oe = (*ops)[i];
3660       ranges[i].idx = i;
3661       init_range_entry (ranges + i, oe->op,
3662                         oe->op
3663                         ? NULL
3664                         : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3665       /* For | invert it now, we will invert it again before emitting
3666          the optimized expression.  */
3667       if (opcode == BIT_IOR_EXPR
3668           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3669         ranges[i].in_p = !ranges[i].in_p;
3670     }
3671
3672   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3673   for (i = 0; i < length; i++)
3674     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3675       break;
3676
3677   /* Try to merge ranges.  */
3678   for (first = i; i < length; i++)
3679     {
3680       tree low = ranges[i].low;
3681       tree high = ranges[i].high;
3682       int in_p = ranges[i].in_p;
3683       bool strict_overflow_p = ranges[i].strict_overflow_p;
3684       int update_fail_count = 0;
3685
3686       for (j = i + 1; j < length; j++)
3687         {
3688           if (ranges[i].exp != ranges[j].exp)
3689             break;
3690           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3691                              ranges[j].in_p, ranges[j].low, ranges[j].high))
3692             break;
3693           strict_overflow_p |= ranges[j].strict_overflow_p;
3694         }
3695
3696       if (j == i + 1)
3697         continue;
3698
3699       if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3700                              opcode, ops, ranges[i].exp, NULL, in_p,
3701                              low, high, strict_overflow_p))
3702         {
3703           i = j - 1;
3704           any_changes = true;
3705         }
3706       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3707          while update_range_test would fail.  */
3708       else if (update_fail_count == 64)
3709         i = j - 1;
3710       else
3711         ++update_fail_count;
3712     }
3713
3714   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3715                                          ops, ranges);
3716
3717   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3718     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3719                                            ops, ranges);
3720   if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3721     any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3722                                                      ops, ranges);
3723   any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3724                                                    ops, ranges);
3725   any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3726                                                  ranges, first_bb);
3727
3728   if (any_changes && opcode != ERROR_MARK)
3729     {
3730       j = 0;
3731       FOR_EACH_VEC_ELT (*ops, i, oe)
3732         {
3733           if (oe->op == error_mark_node)
3734             continue;
3735           else if (i != j)
3736             (*ops)[j] = oe;
3737           j++;
3738         }
3739       ops->truncate (j);
3740     }
3741
3742   XDELETEVEC (ranges);
3743   return any_changes;
3744 }
3745
3746 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3747    the operands of the VEC_COND_EXPR.  Returns ERROR_MARK on failure,
3748    otherwise the comparison code.  */
3749
3750 static tree_code
3751 ovce_extract_ops (tree var, gassign **rets, bool *reti)
3752 {
3753   if (TREE_CODE (var) != SSA_NAME)
3754     return ERROR_MARK;
3755
3756   gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3757   if (stmt == NULL)
3758     return ERROR_MARK;
3759
3760   /* ??? If we start creating more COND_EXPR, we could perform
3761      this same optimization with them.  For now, simplify.  */
3762   if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3763     return ERROR_MARK;
3764
3765   tree cond = gimple_assign_rhs1 (stmt);
3766   tree_code cmp = TREE_CODE (cond);
3767   if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3768     return ERROR_MARK;
3769
3770   /* ??? For now, allow only canonical true and false result vectors.
3771      We could expand this to other constants should the need arise,
3772      but at the moment we don't create them.  */
3773   tree t = gimple_assign_rhs2 (stmt);
3774   tree f = gimple_assign_rhs3 (stmt);
3775   bool inv;
3776   if (integer_all_onesp (t))
3777     inv = false;
3778   else if (integer_all_onesp (f))
3779     {
3780       cmp = invert_tree_comparison (cmp, false);
3781       inv = true;
3782     }
3783   else
3784     return ERROR_MARK;
3785   if (!integer_zerop (f))
3786     return ERROR_MARK;
3787
3788   /* Success!  */
3789   if (rets)
3790     *rets = stmt;
3791   if (reti)
3792     *reti = inv;
3793   return cmp;
3794 }
3795
3796 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3797    with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR).  */
3798
3799 static bool
3800 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3801 {
3802   unsigned int length = ops->length (), i, j;
3803   bool any_changes = false;
3804
3805   if (length == 1)
3806     return false;
3807
3808   for (i = 0; i < length; ++i)
3809     {
3810       tree elt0 = (*ops)[i]->op;
3811
3812       gassign *stmt0;
3813       bool invert;
3814       tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
3815       if (cmp0 == ERROR_MARK)
3816         continue;
3817
3818       for (j = i + 1; j < length; ++j)
3819         {
3820           tree &elt1 = (*ops)[j]->op;
3821
3822           gassign *stmt1;
3823           tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
3824           if (cmp1 == ERROR_MARK)
3825             continue;
3826
3827           tree cond0 = gimple_assign_rhs1 (stmt0);
3828           tree x0 = TREE_OPERAND (cond0, 0);
3829           tree y0 = TREE_OPERAND (cond0, 1);
3830
3831           tree cond1 = gimple_assign_rhs1 (stmt1);
3832           tree x1 = TREE_OPERAND (cond1, 0);
3833           tree y1 = TREE_OPERAND (cond1, 1);
3834
3835           tree comb;
3836           if (opcode == BIT_AND_EXPR)
3837             comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3838           else if (opcode == BIT_IOR_EXPR)
3839             comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3840           else
3841             gcc_unreachable ();
3842           if (comb == NULL)
3843             continue;
3844
3845           /* Success! */
3846           if (dump_file && (dump_flags & TDF_DETAILS))
3847             {
3848               fprintf (dump_file, "Transforming ");
3849               print_generic_expr (dump_file, cond0);
3850               fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3851               print_generic_expr (dump_file, cond1);
3852               fprintf (dump_file, " into ");
3853               print_generic_expr (dump_file, comb);
3854               fputc ('\n', dump_file);
3855             }
3856
3857           gimple_assign_set_rhs1 (stmt0, comb);
3858           if (invert)
3859             std::swap (*gimple_assign_rhs2_ptr (stmt0),
3860                        *gimple_assign_rhs3_ptr (stmt0));
3861           update_stmt (stmt0);
3862
3863           elt1 = error_mark_node;
3864           any_changes = true;
3865         }
3866     }
3867
3868   if (any_changes)
3869     {
3870       operand_entry *oe;
3871       j = 0;
3872       FOR_EACH_VEC_ELT (*ops, i, oe)
3873         {
3874           if (oe->op == error_mark_node)
3875             continue;
3876           else if (i != j)
3877             (*ops)[j] = oe;
3878           j++;
3879         }
3880       ops->truncate (j);
3881     }
3882
3883   return any_changes;
3884 }
3885
3886 /* Return true if STMT is a cast like:
3887    <bb N>:
3888    ...
3889    _123 = (int) _234;
3890
3891    <bb M>:
3892    # _345 = PHI <_123(N), 1(...), 1(...)>
3893    where _234 has bool type, _123 has single use and
3894    bb N has a single successor M.  This is commonly used in
3895    the last block of a range test.
3896
3897    Also Return true if STMT is tcc_compare like:
3898    <bb N>:
3899    ...
3900    _234 = a_2(D) == 2;
3901
3902    <bb M>:
3903    # _345 = PHI <_234(N), 1(...), 1(...)>
3904    _346 = (int) _345;
3905    where _234 has booltype, single use and
3906    bb N has a single successor M.  This is commonly used in
3907    the last block of a range test.  */
3908
3909 static bool
3910 final_range_test_p (gimple *stmt)
3911 {
3912   basic_block bb, rhs_bb, lhs_bb;
3913   edge e;
3914   tree lhs, rhs;
3915   use_operand_p use_p;
3916   gimple *use_stmt;
3917
3918   if (!gimple_assign_cast_p (stmt)
3919       && (!is_gimple_assign (stmt)
3920           || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3921               != tcc_comparison)))
3922     return false;
3923   bb = gimple_bb (stmt);
3924   if (!single_succ_p (bb))
3925     return false;
3926   e = single_succ_edge (bb);
3927   if (e->flags & EDGE_COMPLEX)
3928     return false;
3929
3930   lhs = gimple_assign_lhs (stmt);
3931   rhs = gimple_assign_rhs1 (stmt);
3932   if (gimple_assign_cast_p (stmt)
3933       && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3934           || TREE_CODE (rhs) != SSA_NAME
3935           || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
3936     return false;
3937
3938   if (!gimple_assign_cast_p (stmt)
3939       && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
3940       return false;
3941
3942   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
3943   if (!single_imm_use (lhs, &use_p, &use_stmt))
3944     return false;
3945
3946   if (gimple_code (use_stmt) != GIMPLE_PHI
3947       || gimple_bb (use_stmt) != e->dest)
3948     return false;
3949
3950   /* And that the rhs is defined in the same loop.  */
3951   if (gimple_assign_cast_p (stmt))
3952     {
3953       if (TREE_CODE (rhs) != SSA_NAME
3954           || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
3955           || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3956         return false;
3957     }
3958   else
3959     {
3960       if (TREE_CODE (lhs) != SSA_NAME
3961           || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
3962           || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
3963         return false;
3964     }
3965
3966   return true;
3967 }
3968
3969 /* Return true if BB is suitable basic block for inter-bb range test
3970    optimization.  If BACKWARD is true, BB should be the only predecessor
3971    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3972    or compared with to find a common basic block to which all conditions
3973    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
3974    be the only predecessor of BB.  */
3975
3976 static bool
3977 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3978                   bool backward)
3979 {
3980   edge_iterator ei, ei2;
3981   edge e, e2;
3982   gimple *stmt;
3983   gphi_iterator gsi;
3984   bool other_edge_seen = false;
3985   bool is_cond;
3986
3987   if (test_bb == bb)
3988     return false;
3989   /* Check last stmt first.  */
3990   stmt = last_stmt (bb);
3991   if (stmt == NULL
3992       || (gimple_code (stmt) != GIMPLE_COND
3993           && (backward || !final_range_test_p (stmt)))
3994       || gimple_visited_p (stmt)
3995       || stmt_could_throw_p (cfun, stmt)
3996       || *other_bb == bb)
3997     return false;
3998   is_cond = gimple_code (stmt) == GIMPLE_COND;
3999   if (is_cond)
4000     {
4001       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4002          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4003          to *OTHER_BB (if not set yet, try to find it out).  */
4004       if (EDGE_COUNT (bb->succs) != 2)
4005         return false;
4006       FOR_EACH_EDGE (e, ei, bb->succs)
4007         {
4008           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4009             return false;
4010           if (e->dest == test_bb)
4011             {
4012               if (backward)
4013                 continue;
4014               else
4015                 return false;
4016             }
4017           if (e->dest == bb)
4018             return false;
4019           if (*other_bb == NULL)
4020             {
4021               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4022                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4023                   return false;
4024                 else if (e->dest == e2->dest)
4025                   *other_bb = e->dest;
4026               if (*other_bb == NULL)
4027                 return false;
4028             }
4029           if (e->dest == *other_bb)
4030             other_edge_seen = true;
4031           else if (backward)
4032             return false;
4033         }
4034       if (*other_bb == NULL || !other_edge_seen)
4035         return false;
4036     }
4037   else if (single_succ (bb) != *other_bb)
4038     return false;
4039
4040   /* Now check all PHIs of *OTHER_BB.  */
4041   e = find_edge (bb, *other_bb);
4042   e2 = find_edge (test_bb, *other_bb);
4043   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4044     {
4045       gphi *phi = gsi.phi ();
4046       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4047          corresponding to BB and TEST_BB predecessor must be the same.  */
4048       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4049                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
4050         {
4051           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4052              one of the PHIs should have the lhs of the last stmt in
4053              that block as PHI arg and that PHI should have 0 or 1
4054              corresponding to it in all other range test basic blocks
4055              considered.  */
4056           if (!is_cond)
4057             {
4058               if (gimple_phi_arg_def (phi, e->dest_idx)
4059                   == gimple_assign_lhs (stmt)
4060                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4061                       || integer_onep (gimple_phi_arg_def (phi,
4062                                                            e2->dest_idx))))
4063                 continue;
4064             }
4065           else
4066             {
4067               gimple *test_last = last_stmt (test_bb);
4068               if (gimple_code (test_last) != GIMPLE_COND
4069                   && gimple_phi_arg_def (phi, e2->dest_idx)
4070                      == gimple_assign_lhs (test_last)
4071                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
4072                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
4073                 continue;
4074             }
4075
4076           return false;
4077         }
4078     }
4079   return true;
4080 }
4081
4082 /* Return true if BB doesn't have side-effects that would disallow
4083    range test optimization, all SSA_NAMEs set in the bb are consumed
4084    in the bb and there are no PHIs.  */
4085
4086 static bool
4087 no_side_effect_bb (basic_block bb)
4088 {
4089   gimple_stmt_iterator gsi;
4090   gimple *last;
4091
4092   if (!gimple_seq_empty_p (phi_nodes (bb)))
4093     return false;
4094   last = last_stmt (bb);
4095   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4096     {
4097       gimple *stmt = gsi_stmt (gsi);
4098       tree lhs;
4099       imm_use_iterator imm_iter;
4100       use_operand_p use_p;
4101
4102       if (is_gimple_debug (stmt))
4103         continue;
4104       if (gimple_has_side_effects (stmt))
4105         return false;
4106       if (stmt == last)
4107         return true;
4108       if (!is_gimple_assign (stmt))
4109         return false;
4110       lhs = gimple_assign_lhs (stmt);
4111       if (TREE_CODE (lhs) != SSA_NAME)
4112         return false;
4113       if (gimple_assign_rhs_could_trap_p (stmt))
4114         return false;
4115       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4116         {
4117           gimple *use_stmt = USE_STMT (use_p);
4118           if (is_gimple_debug (use_stmt))
4119             continue;
4120           if (gimple_bb (use_stmt) != bb)
4121             return false;
4122         }
4123     }
4124   return false;
4125 }
4126
4127 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4128    return true and fill in *OPS recursively.  */
4129
4130 static bool
4131 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4132          class loop *loop)
4133 {
4134   gimple *stmt = SSA_NAME_DEF_STMT (var);
4135   tree rhs[2];
4136   int i;
4137
4138   if (!is_reassociable_op (stmt, code, loop))
4139     return false;
4140
4141   rhs[0] = gimple_assign_rhs1 (stmt);
4142   rhs[1] = gimple_assign_rhs2 (stmt);
4143   gimple_set_visited (stmt, true);
4144   for (i = 0; i < 2; i++)
4145     if (TREE_CODE (rhs[i]) == SSA_NAME
4146         && !get_ops (rhs[i], code, ops, loop)
4147         && has_single_use (rhs[i]))
4148       {
4149         operand_entry *oe = operand_entry_pool.allocate ();
4150
4151         oe->op = rhs[i];
4152         oe->rank = code;
4153         oe->id = 0;
4154         oe->count = 1;
4155         oe->stmt_to_insert = NULL;
4156         ops->safe_push (oe);
4157       }
4158   return true;
4159 }
4160
4161 /* Find the ops that were added by get_ops starting from VAR, see if
4162    they were changed during update_range_test and if yes, create new
4163    stmts.  */
4164
4165 static tree
4166 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
4167             unsigned int *pidx, class loop *loop)
4168 {
4169   gimple *stmt = SSA_NAME_DEF_STMT (var);
4170   tree rhs[4];
4171   int i;
4172
4173   if (!is_reassociable_op (stmt, code, loop))
4174     return NULL;
4175
4176   rhs[0] = gimple_assign_rhs1 (stmt);
4177   rhs[1] = gimple_assign_rhs2 (stmt);
4178   rhs[2] = rhs[0];
4179   rhs[3] = rhs[1];
4180   for (i = 0; i < 2; i++)
4181     if (TREE_CODE (rhs[i]) == SSA_NAME)
4182       {
4183         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4184         if (rhs[2 + i] == NULL_TREE)
4185           {
4186             if (has_single_use (rhs[i]))
4187               rhs[2 + i] = ops[(*pidx)++]->op;
4188             else
4189               rhs[2 + i] = rhs[i];
4190           }
4191       }
4192   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4193       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4194     {
4195       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4196       var = make_ssa_name (TREE_TYPE (var));
4197       gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4198                                         rhs[2], rhs[3]);
4199       gimple_set_uid (g, gimple_uid (stmt));
4200       gimple_set_visited (g, true);
4201       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4202     }
4203   return var;
4204 }
4205
4206 /* Structure to track the initial value passed to get_ops and
4207    the range in the ops vector for each basic block.  */
4208
4209 struct inter_bb_range_test_entry
4210 {
4211   tree op;
4212   unsigned int first_idx, last_idx;
4213 };
4214
4215 /* Inter-bb range test optimization.
4216
4217    Returns TRUE if a gimple conditional is optimized to a true/false,
4218    otherwise return FALSE.
4219
4220    This indicates to the caller that it should run a CFG cleanup pass
4221    once reassociation is completed.  */
4222
4223 static bool
4224 maybe_optimize_range_tests (gimple *stmt)
4225 {
4226   basic_block first_bb = gimple_bb (stmt);
4227   basic_block last_bb = first_bb;
4228   basic_block other_bb = NULL;
4229   basic_block bb;
4230   edge_iterator ei;
4231   edge e;
4232   auto_vec<operand_entry *> ops;
4233   auto_vec<inter_bb_range_test_entry> bbinfo;
4234   bool any_changes = false;
4235   bool cfg_cleanup_needed = false;
4236
4237   /* Consider only basic blocks that end with GIMPLE_COND or
4238      a cast statement satisfying final_range_test_p.  All
4239      but the last bb in the first_bb .. last_bb range
4240      should end with GIMPLE_COND.  */
4241   if (gimple_code (stmt) == GIMPLE_COND)
4242     {
4243       if (EDGE_COUNT (first_bb->succs) != 2)
4244         return cfg_cleanup_needed;
4245     }
4246   else if (final_range_test_p (stmt))
4247     other_bb = single_succ (first_bb);
4248   else
4249     return cfg_cleanup_needed;
4250
4251   if (stmt_could_throw_p (cfun, stmt))
4252     return cfg_cleanup_needed;
4253
4254   /* As relative ordering of post-dominator sons isn't fixed,
4255      maybe_optimize_range_tests can be called first on any
4256      bb in the range we want to optimize.  So, start searching
4257      backwards, if first_bb can be set to a predecessor.  */
4258   while (single_pred_p (first_bb))
4259     {
4260       basic_block pred_bb = single_pred (first_bb);
4261       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
4262         break;
4263       if (!no_side_effect_bb (first_bb))
4264         break;
4265       first_bb = pred_bb;
4266     }
4267   /* If first_bb is last_bb, other_bb hasn't been computed yet.
4268      Before starting forward search in last_bb successors, find
4269      out the other_bb.  */
4270   if (first_bb == last_bb)
4271     {
4272       other_bb = NULL;
4273       /* As non-GIMPLE_COND last stmt always terminates the range,
4274          if forward search didn't discover anything, just give up.  */
4275       if (gimple_code (stmt) != GIMPLE_COND)
4276         return cfg_cleanup_needed;
4277       /* Look at both successors.  Either it ends with a GIMPLE_COND
4278          and satisfies suitable_cond_bb, or ends with a cast and
4279          other_bb is that cast's successor.  */
4280       FOR_EACH_EDGE (e, ei, first_bb->succs)
4281         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4282             || e->dest == first_bb)
4283           return cfg_cleanup_needed;
4284         else if (single_pred_p (e->dest))
4285           {
4286             stmt = last_stmt (e->dest);
4287             if (stmt
4288                 && gimple_code (stmt) == GIMPLE_COND
4289                 && EDGE_COUNT (e->dest->succs) == 2)
4290               {
4291                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
4292                   break;
4293                 else
4294                   other_bb = NULL;
4295               }
4296             else if (stmt
4297                      && final_range_test_p (stmt)
4298                      && find_edge (first_bb, single_succ (e->dest)))
4299               {
4300                 other_bb = single_succ (e->dest);
4301                 if (other_bb == first_bb)
4302                   other_bb = NULL;
4303               }
4304           }
4305       if (other_bb == NULL)
4306         return cfg_cleanup_needed;
4307     }
4308   /* Now do the forward search, moving last_bb to successor bbs
4309      that aren't other_bb.  */
4310   while (EDGE_COUNT (last_bb->succs) == 2)
4311     {
4312       FOR_EACH_EDGE (e, ei, last_bb->succs)
4313         if (e->dest != other_bb)
4314           break;
4315       if (e == NULL)
4316         break;
4317       if (!single_pred_p (e->dest))
4318         break;
4319       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
4320         break;
4321       if (!no_side_effect_bb (e->dest))
4322         break;
4323       last_bb = e->dest;
4324     }
4325   if (first_bb == last_bb)
4326     return cfg_cleanup_needed;
4327   /* Here basic blocks first_bb through last_bb's predecessor
4328      end with GIMPLE_COND, all of them have one of the edges to
4329      other_bb and another to another block in the range,
4330      all blocks except first_bb don't have side-effects and
4331      last_bb ends with either GIMPLE_COND, or cast satisfying
4332      final_range_test_p.  */
4333   for (bb = last_bb; ; bb = single_pred (bb))
4334     {
4335       enum tree_code code;
4336       tree lhs, rhs;
4337       inter_bb_range_test_entry bb_ent;
4338
4339       bb_ent.op = NULL_TREE;
4340       bb_ent.first_idx = ops.length ();
4341       bb_ent.last_idx = bb_ent.first_idx;
4342       e = find_edge (bb, other_bb);
4343       stmt = last_stmt (bb);
4344       gimple_set_visited (stmt, true);
4345       if (gimple_code (stmt) != GIMPLE_COND)
4346         {
4347           use_operand_p use_p;
4348           gimple *phi;
4349           edge e2;
4350           unsigned int d;
4351
4352           lhs = gimple_assign_lhs (stmt);
4353           rhs = gimple_assign_rhs1 (stmt);
4354           gcc_assert (bb == last_bb);
4355
4356           /* stmt is
4357              _123 = (int) _234;
4358              OR
4359              _234 = a_2(D) == 2;
4360
4361              followed by:
4362              <bb M>:
4363              # _345 = PHI <_123(N), 1(...), 1(...)>
4364
4365              or 0 instead of 1.  If it is 0, the _234
4366              range test is anded together with all the
4367              other range tests, if it is 1, it is ored with
4368              them.  */
4369           single_imm_use (lhs, &use_p, &phi);
4370           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4371           e2 = find_edge (first_bb, other_bb);
4372           d = e2->dest_idx;
4373           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4374           if (integer_zerop (gimple_phi_arg_def (phi, d)))
4375             code = BIT_AND_EXPR;
4376           else
4377             {
4378               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4379               code = BIT_IOR_EXPR;
4380             }
4381
4382           /* If _234 SSA_NAME_DEF_STMT is
4383              _234 = _567 | _789;
4384              (or &, corresponding to 1/0 in the phi arguments,
4385              push into ops the individual range test arguments
4386              of the bitwise or resp. and, recursively.  */
4387           if (TREE_CODE (rhs) == SSA_NAME
4388               && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4389                   != tcc_comparison)
4390               && !get_ops (rhs, code, &ops,
4391                         loop_containing_stmt (stmt))
4392               && has_single_use (rhs))
4393             {
4394               /* Otherwise, push the _234 range test itself.  */
4395               operand_entry *oe = operand_entry_pool.allocate ();
4396
4397               oe->op = rhs;
4398               oe->rank = code;
4399               oe->id = 0;
4400               oe->count = 1;
4401               oe->stmt_to_insert = NULL;
4402               ops.safe_push (oe);
4403               bb_ent.last_idx++;
4404               bb_ent.op = rhs;
4405             }
4406           else if (is_gimple_assign (stmt)
4407                    && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4408                        == tcc_comparison)
4409                    && !get_ops (lhs, code, &ops,
4410                                 loop_containing_stmt (stmt))
4411                    && has_single_use (lhs))
4412             {
4413               operand_entry *oe = operand_entry_pool.allocate ();
4414               oe->op = lhs;
4415               oe->rank = code;
4416               oe->id = 0;
4417               oe->count = 1;
4418               ops.safe_push (oe);
4419               bb_ent.last_idx++;
4420               bb_ent.op = lhs;
4421             }
4422           else
4423             {
4424               bb_ent.last_idx = ops.length ();
4425               bb_ent.op = rhs;
4426             }
4427           bbinfo.safe_push (bb_ent);
4428           continue;
4429         }
4430       /* Otherwise stmt is GIMPLE_COND.  */
4431       code = gimple_cond_code (stmt);
4432       lhs = gimple_cond_lhs (stmt);
4433       rhs = gimple_cond_rhs (stmt);
4434       if (TREE_CODE (lhs) == SSA_NAME
4435           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4436           && ((code != EQ_EXPR && code != NE_EXPR)
4437               || rhs != boolean_false_node
4438                  /* Either push into ops the individual bitwise
4439                     or resp. and operands, depending on which
4440                     edge is other_bb.  */
4441               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4442                                  ^ (code == EQ_EXPR))
4443                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4444                            loop_containing_stmt (stmt))))
4445         {
4446           /* Or push the GIMPLE_COND stmt itself.  */
4447           operand_entry *oe = operand_entry_pool.allocate ();
4448
4449           oe->op = NULL;
4450           oe->rank = (e->flags & EDGE_TRUE_VALUE)
4451                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
4452           /* oe->op = NULL signs that there is no SSA_NAME
4453              for the range test, and oe->id instead is the
4454              basic block number, at which's end the GIMPLE_COND
4455              is.  */
4456           oe->id = bb->index;
4457           oe->count = 1;
4458           oe->stmt_to_insert = NULL;
4459           ops.safe_push (oe);
4460           bb_ent.op = NULL;
4461           bb_ent.last_idx++;
4462         }
4463       else if (ops.length () > bb_ent.first_idx)
4464         {
4465           bb_ent.op = lhs;
4466           bb_ent.last_idx = ops.length ();
4467         }
4468       bbinfo.safe_push (bb_ent);
4469       if (bb == first_bb)
4470         break;
4471     }
4472   if (ops.length () > 1)
4473     any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4474   if (any_changes)
4475     {
4476       unsigned int idx, max_idx = 0;
4477       /* update_ops relies on has_single_use predicates returning the
4478          same values as it did during get_ops earlier.  Additionally it
4479          never removes statements, only adds new ones and it should walk
4480          from the single imm use and check the predicate already before
4481          making those changes.
4482          On the other side, the handling of GIMPLE_COND directly can turn
4483          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4484          it needs to be done in a separate loop afterwards.  */
4485       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4486         {
4487           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4488               && bbinfo[idx].op != NULL_TREE)
4489             {
4490               tree new_op;
4491
4492               max_idx = idx;
4493               stmt = last_stmt (bb);
4494               new_op = update_ops (bbinfo[idx].op,
4495                                    (enum tree_code)
4496                                    ops[bbinfo[idx].first_idx]->rank,
4497                                    ops, &bbinfo[idx].first_idx,
4498                                    loop_containing_stmt (stmt));
4499               if (new_op == NULL_TREE)
4500                 {
4501                   gcc_assert (bb == last_bb);
4502                   new_op = ops[bbinfo[idx].first_idx++]->op;
4503                 }
4504               if (bbinfo[idx].op != new_op)
4505                 {
4506                   imm_use_iterator iter;
4507                   use_operand_p use_p;
4508                   gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4509
4510                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4511                     if (is_gimple_debug (use_stmt))
4512                       continue;
4513                     else if (gimple_code (use_stmt) == GIMPLE_COND
4514                              || gimple_code (use_stmt) == GIMPLE_PHI)
4515                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4516                         SET_USE (use_p, new_op);
4517                     else if ((is_gimple_assign (use_stmt)
4518                               && (TREE_CODE_CLASS
4519                                   (gimple_assign_rhs_code (use_stmt))
4520                                   == tcc_comparison)))
4521                       cast_or_tcc_cmp_stmt = use_stmt;
4522                     else if (gimple_assign_cast_p (use_stmt))
4523                       cast_or_tcc_cmp_stmt = use_stmt;
4524                     else
4525                       gcc_unreachable ();
4526
4527                   if (cast_or_tcc_cmp_stmt)
4528                     {
4529                       gcc_assert (bb == last_bb);
4530                       tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4531                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4532                       enum tree_code rhs_code
4533                         = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4534                         ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4535                         : CONVERT_EXPR;
4536                       gassign *g;
4537                       if (is_gimple_min_invariant (new_op))
4538                         {
4539                           new_op = fold_convert (TREE_TYPE (lhs), new_op);
4540                           g = gimple_build_assign (new_lhs, new_op);
4541                         }
4542                       else
4543                         g = gimple_build_assign (new_lhs, rhs_code, new_op);
4544                       gimple_stmt_iterator gsi
4545                         = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4546                       gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4547                       gimple_set_visited (g, true);
4548                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4549                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4550                         if (is_gimple_debug (use_stmt))
4551                           continue;
4552                         else if (gimple_code (use_stmt) == GIMPLE_COND
4553                                  || gimple_code (use_stmt) == GIMPLE_PHI)
4554                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4555                             SET_USE (use_p, new_lhs);
4556                         else
4557                           gcc_unreachable ();
4558                     }
4559                 }
4560             }
4561           if (bb == first_bb)
4562             break;
4563         }
4564       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4565         {
4566           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4567               && bbinfo[idx].op == NULL_TREE
4568               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4569             {
4570               gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4571
4572               if (idx > max_idx)
4573                 max_idx = idx;
4574
4575               /* If we collapse the conditional to a true/false
4576                  condition, then bubble that knowledge up to our caller.  */
4577               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4578                 {
4579                   gimple_cond_make_false (cond_stmt);
4580                   cfg_cleanup_needed = true;
4581                 }
4582               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4583                 {
4584                   gimple_cond_make_true (cond_stmt);
4585                   cfg_cleanup_needed = true;
4586                 }
4587               else
4588                 {
4589                   gimple_cond_set_code (cond_stmt, NE_EXPR);
4590                   gimple_cond_set_lhs (cond_stmt,
4591                                        ops[bbinfo[idx].first_idx]->op);
4592                   gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4593                 }
4594               update_stmt (cond_stmt);
4595             }
4596           if (bb == first_bb)
4597             break;
4598         }
4599
4600       /* The above changes could result in basic blocks after the first
4601          modified one, up to and including last_bb, to be executed even if
4602          they would not be in the original program.  If the value ranges of
4603          assignment lhs' in those bbs were dependent on the conditions
4604          guarding those basic blocks which now can change, the VRs might
4605          be incorrect.  As no_side_effect_bb should ensure those SSA_NAMEs
4606          are only used within the same bb, it should be not a big deal if
4607          we just reset all the VRs in those bbs.  See PR68671.  */
4608       for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4609         reset_flow_sensitive_info_in_bb (bb);
4610     }
4611   return cfg_cleanup_needed;
4612 }
4613
4614 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4615    of STMT in it's operands.  This is also known as a "destructive
4616    update" operation.  */
4617
4618 static bool
4619 is_phi_for_stmt (gimple *stmt, tree operand)
4620 {
4621   gimple *def_stmt;
4622   gphi *def_phi;
4623   tree lhs;
4624   use_operand_p arg_p;
4625   ssa_op_iter i;
4626
4627   if (TREE_CODE (operand) != SSA_NAME)
4628     return false;
4629
4630   lhs = gimple_assign_lhs (stmt);
4631
4632   def_stmt = SSA_NAME_DEF_STMT (operand);
4633   def_phi = dyn_cast <gphi *> (def_stmt);
4634   if (!def_phi)
4635     return false;
4636
4637   FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4638     if (lhs == USE_FROM_PTR (arg_p))
4639       return true;
4640   return false;
4641 }
4642
4643 /* Remove def stmt of VAR if VAR has zero uses and recurse
4644    on rhs1 operand if so.  */
4645
4646 static void
4647 remove_visited_stmt_chain (tree var)
4648 {
4649   gimple *stmt;
4650   gimple_stmt_iterator gsi;
4651
4652   while (1)
4653     {
4654       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4655         return;
4656       stmt = SSA_NAME_DEF_STMT (var);
4657       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4658         {
4659           var = gimple_assign_rhs1 (stmt);
4660           gsi = gsi_for_stmt (stmt);
4661           reassoc_remove_stmt (&gsi);
4662           release_defs (stmt);
4663         }
4664       else
4665         return;
4666     }
4667 }
4668
4669 /* This function checks three consequtive operands in
4670    passed operands vector OPS starting from OPINDEX and
4671    swaps two operands if it is profitable for binary operation
4672    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4673
4674    We pair ops with the same rank if possible.
4675
4676    The alternative we try is to see if STMT is a destructive
4677    update style statement, which is like:
4678    b = phi (a, ...)
4679    a = c + b;
4680    In that case, we want to use the destructive update form to
4681    expose the possible vectorizer sum reduction opportunity.
4682    In that case, the third operand will be the phi node. This
4683    check is not performed if STMT is null.
4684
4685    We could, of course, try to be better as noted above, and do a
4686    lot of work to try to find these opportunities in >3 operand
4687    cases, but it is unlikely to be worth it.  */
4688
4689 static void
4690 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4691                           unsigned int opindex, gimple *stmt)
4692 {
4693   operand_entry *oe1, *oe2, *oe3;
4694
4695   oe1 = ops[opindex];
4696   oe2 = ops[opindex + 1];
4697   oe3 = ops[opindex + 2];
4698
4699   if ((oe1->rank == oe2->rank
4700        && oe2->rank != oe3->rank)
4701       || (stmt && is_phi_for_stmt (stmt, oe3->op)
4702           && !is_phi_for_stmt (stmt, oe1->op)
4703           && !is_phi_for_stmt (stmt, oe2->op)))
4704     std::swap (*oe1, *oe3);
4705   else if ((oe1->rank == oe3->rank
4706             && oe2->rank != oe3->rank)
4707            || (stmt && is_phi_for_stmt (stmt, oe2->op)
4708                && !is_phi_for_stmt (stmt, oe1->op)
4709                && !is_phi_for_stmt (stmt, oe3->op)))
4710     std::swap (*oe1, *oe2);
4711 }
4712
4713 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4714    two definitions, otherwise return STMT.  */
4715
4716 static inline gimple *
4717 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4718 {
4719   if (TREE_CODE (rhs1) == SSA_NAME
4720       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4721     stmt = SSA_NAME_DEF_STMT (rhs1);
4722   if (TREE_CODE (rhs2) == SSA_NAME
4723       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4724     stmt = SSA_NAME_DEF_STMT (rhs2);
4725   return stmt;
4726 }
4727
4728 /* If the stmt that defines operand has to be inserted, insert it
4729    before the use.  */
4730 static void
4731 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4732 {
4733   gcc_assert (is_gimple_assign (stmt_to_insert));
4734   tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4735   tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4736   gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4737   gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4738   gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4739
4740   /* If the insert point is not stmt, then insert_point would be
4741      the point where operand rhs1 or rhs2 is defined. In this case,
4742      stmt_to_insert has to be inserted afterwards. This would
4743      only happen when the stmt insertion point is flexible. */
4744   if (stmt == insert_point)
4745     gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4746   else
4747     insert_stmt_after (stmt_to_insert, insert_point);
4748 }
4749
4750
4751 /* Recursively rewrite our linearized statements so that the operators
4752    match those in OPS[OPINDEX], putting the computation in rank
4753    order.  Return new lhs.
4754    CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4755    the current stmt and during recursive invocations.
4756    NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4757    recursive invocations.  */
4758
4759 static tree
4760 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
4761                    vec<operand_entry *> ops, bool changed, bool next_changed)
4762 {
4763   tree rhs1 = gimple_assign_rhs1 (stmt);
4764   tree rhs2 = gimple_assign_rhs2 (stmt);
4765   tree lhs = gimple_assign_lhs (stmt);
4766   operand_entry *oe;
4767
4768   /* The final recursion case for this function is that you have
4769      exactly two operations left.
4770      If we had exactly one op in the entire list to start with, we
4771      would have never called this function, and the tail recursion
4772      rewrites them one at a time.  */
4773   if (opindex + 2 == ops.length ())
4774     {
4775       operand_entry *oe1, *oe2;
4776
4777       oe1 = ops[opindex];
4778       oe2 = ops[opindex + 1];
4779
4780       if (rhs1 != oe1->op || rhs2 != oe2->op)
4781         {
4782           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4783           unsigned int uid = gimple_uid (stmt);
4784
4785           if (dump_file && (dump_flags & TDF_DETAILS))
4786             {
4787               fprintf (dump_file, "Transforming ");
4788               print_gimple_stmt (dump_file, stmt, 0);
4789             }
4790
4791           /* If the stmt that defines operand has to be inserted, insert it
4792              before the use.  */
4793           if (oe1->stmt_to_insert)
4794             insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4795           if (oe2->stmt_to_insert)
4796             insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4797           /* Even when changed is false, reassociation could have e.g. removed
4798              some redundant operations, so unless we are just swapping the
4799              arguments or unless there is no change at all (then we just
4800              return lhs), force creation of a new SSA_NAME.  */
4801           if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4802             {
4803               gimple *insert_point
4804                 = find_insert_point (stmt, oe1->op, oe2->op);
4805               lhs = make_ssa_name (TREE_TYPE (lhs));
4806               stmt
4807                 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4808                                        oe1->op, oe2->op);
4809               gimple_set_uid (stmt, uid);
4810               gimple_set_visited (stmt, true);
4811               if (insert_point == gsi_stmt (gsi))
4812                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4813               else
4814                 insert_stmt_after (stmt, insert_point);
4815             }
4816           else
4817             {
4818               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4819                                    == stmt);
4820               gimple_assign_set_rhs1 (stmt, oe1->op);
4821               gimple_assign_set_rhs2 (stmt, oe2->op);
4822               update_stmt (stmt);
4823             }
4824
4825           if (rhs1 != oe1->op && rhs1 != oe2->op)
4826             remove_visited_stmt_chain (rhs1);
4827
4828           if (dump_file && (dump_flags & TDF_DETAILS))
4829             {
4830               fprintf (dump_file, " into ");
4831               print_gimple_stmt (dump_file, stmt, 0);
4832             }
4833         }
4834       return lhs;
4835     }
4836
4837   /* If we hit here, we should have 3 or more ops left.  */
4838   gcc_assert (opindex + 2 < ops.length ());
4839
4840   /* Rewrite the next operator.  */
4841   oe = ops[opindex];
4842
4843   /* If the stmt that defines operand has to be inserted, insert it
4844      before the use.  */
4845   if (oe->stmt_to_insert)
4846     insert_stmt_before_use (stmt, oe->stmt_to_insert);
4847
4848   /* Recurse on the LHS of the binary operator, which is guaranteed to
4849      be the non-leaf side.  */
4850   tree new_rhs1
4851     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
4852                          changed || oe->op != rhs2 || next_changed,
4853                          false);
4854
4855   if (oe->op != rhs2 || new_rhs1 != rhs1)
4856     {
4857       if (dump_file && (dump_flags & TDF_DETAILS))
4858         {
4859           fprintf (dump_file, "Transforming ");
4860           print_gimple_stmt (dump_file, stmt, 0);
4861         }
4862
4863       /* If changed is false, this is either opindex == 0
4864          or all outer rhs2's were equal to corresponding oe->op,
4865          and powi_result is NULL.
4866          That means lhs is equivalent before and after reassociation.
4867          Otherwise ensure the old lhs SSA_NAME is not reused and
4868          create a new stmt as well, so that any debug stmts will be
4869          properly adjusted.  */
4870       if (changed)
4871         {
4872           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4873           unsigned int uid = gimple_uid (stmt);
4874           gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4875
4876           lhs = make_ssa_name (TREE_TYPE (lhs));
4877           stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4878                                       new_rhs1, oe->op);
4879           gimple_set_uid (stmt, uid);
4880           gimple_set_visited (stmt, true);
4881           if (insert_point == gsi_stmt (gsi))
4882             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4883           else
4884             insert_stmt_after (stmt, insert_point);
4885         }
4886       else
4887         {
4888           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4889                                == stmt);
4890           gimple_assign_set_rhs1 (stmt, new_rhs1);
4891           gimple_assign_set_rhs2 (stmt, oe->op);
4892           update_stmt (stmt);
4893         }
4894
4895       if (dump_file && (dump_flags & TDF_DETAILS))
4896         {
4897           fprintf (dump_file, " into ");
4898           print_gimple_stmt (dump_file, stmt, 0);
4899         }
4900     }
4901   return lhs;
4902 }
4903
4904 /* Find out how many cycles we need to compute statements chain.
4905    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
4906    maximum number of independent statements we may execute per cycle.  */
4907
4908 static int
4909 get_required_cycles (int ops_num, int cpu_width)
4910 {
4911   int res;
4912   int elog;
4913   unsigned int rest;
4914
4915   /* While we have more than 2 * cpu_width operands
4916      we may reduce number of operands by cpu_width
4917      per cycle.  */
4918   res = ops_num / (2 * cpu_width);
4919
4920   /* Remained operands count may be reduced twice per cycle
4921      until we have only one operand.  */
4922   rest = (unsigned)(ops_num - res * cpu_width);
4923   elog = exact_log2 (rest);
4924   if (elog >= 0)
4925     res += elog;
4926   else
4927     res += floor_log2 (rest) + 1;
4928
4929   return res;
4930 }
4931
4932 /* Returns an optimal number of registers to use for computation of
4933    given statements.  */
4934
4935 static int
4936 get_reassociation_width (int ops_num, enum tree_code opc,
4937                          machine_mode mode)
4938 {
4939   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4940   int width;
4941   int width_min;
4942   int cycles_best;
4943
4944   if (param_width > 0)
4945     width = param_width;
4946   else
4947     width = targetm.sched.reassociation_width (opc, mode);
4948
4949   if (width == 1)
4950     return width;
4951
4952   /* Get the minimal time required for sequence computation.  */
4953   cycles_best = get_required_cycles (ops_num, width);
4954
4955   /* Check if we may use less width and still compute sequence for
4956      the same time.  It will allow us to reduce registers usage.
4957      get_required_cycles is monotonically increasing with lower width
4958      so we can perform a binary search for the minimal width that still
4959      results in the optimal cycle count.  */
4960   width_min = 1;
4961   while (width > width_min)
4962     {
4963       int width_mid = (width + width_min) / 2;
4964
4965       if (get_required_cycles (ops_num, width_mid) == cycles_best)
4966         width = width_mid;
4967       else if (width_min < width_mid)
4968         width_min = width_mid;
4969       else
4970         break;
4971     }
4972
4973   return width;
4974 }
4975
4976 /* Recursively rewrite our linearized statements so that the operators
4977    match those in OPS[OPINDEX], putting the computation in rank
4978    order and trying to allow operations to be executed in
4979    parallel.  */
4980
4981 static void
4982 rewrite_expr_tree_parallel (gassign *stmt, int width,
4983                             vec<operand_entry *> ops)
4984 {
4985   enum tree_code opcode = gimple_assign_rhs_code (stmt);
4986   int op_num = ops.length ();
4987   gcc_assert (op_num > 0);
4988   int stmt_num = op_num - 1;
4989   gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4990   int op_index = op_num - 1;
4991   int stmt_index = 0;
4992   int ready_stmts_end = 0;
4993   int i = 0;
4994   gimple *stmt1 = NULL, *stmt2 = NULL;
4995   tree last_rhs1 = gimple_assign_rhs1 (stmt);
4996
4997   /* We start expression rewriting from the top statements.
4998      So, in this loop we create a full list of statements
4999      we will work with.  */
5000   stmts[stmt_num - 1] = stmt;
5001   for (i = stmt_num - 2; i >= 0; i--)
5002     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5003
5004   for (i = 0; i < stmt_num; i++)
5005     {
5006       tree op1, op2;
5007
5008       /* Determine whether we should use results of
5009          already handled statements or not.  */
5010       if (ready_stmts_end == 0
5011           && (i - stmt_index >= width || op_index < 1))
5012         ready_stmts_end = i;
5013
5014       /* Now we choose operands for the next statement.  Non zero
5015          value in ready_stmts_end means here that we should use
5016          the result of already generated statements as new operand.  */
5017       if (ready_stmts_end > 0)
5018         {
5019           op1 = gimple_assign_lhs (stmts[stmt_index++]);
5020           if (ready_stmts_end > stmt_index)
5021             op2 = gimple_assign_lhs (stmts[stmt_index++]);
5022           else if (op_index >= 0)
5023             {
5024               operand_entry *oe = ops[op_index--];
5025               stmt2 = oe->stmt_to_insert;
5026               op2 = oe->op;
5027             }
5028           else
5029             {
5030               gcc_assert (stmt_index < i);
5031               op2 = gimple_assign_lhs (stmts[stmt_index++]);
5032             }
5033
5034           if (stmt_index >= ready_stmts_end)
5035             ready_stmts_end = 0;
5036         }
5037       else
5038         {
5039           if (op_index > 1)
5040             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
5041           operand_entry *oe2 = ops[op_index--];
5042           operand_entry *oe1 = ops[op_index--];
5043           op2 = oe2->op;
5044           stmt2 = oe2->stmt_to_insert;
5045           op1 = oe1->op;
5046           stmt1 = oe1->stmt_to_insert;
5047         }
5048
5049       /* If we emit the last statement then we should put
5050          operands into the last statement.  It will also
5051          break the loop.  */
5052       if (op_index < 0 && stmt_index == i)
5053         i = stmt_num - 1;
5054
5055       if (dump_file && (dump_flags & TDF_DETAILS))
5056         {
5057           fprintf (dump_file, "Transforming ");
5058           print_gimple_stmt (dump_file, stmts[i], 0);
5059         }
5060
5061       /* If the stmt that defines operand has to be inserted, insert it
5062          before the use.  */
5063       if (stmt1)
5064         insert_stmt_before_use (stmts[i], stmt1);
5065       if (stmt2)
5066         insert_stmt_before_use (stmts[i], stmt2);
5067       stmt1 = stmt2 = NULL;
5068
5069       /* We keep original statement only for the last one.  All
5070          others are recreated.  */
5071       if (i == stmt_num - 1)
5072         {
5073           gimple_assign_set_rhs1 (stmts[i], op1);
5074           gimple_assign_set_rhs2 (stmts[i], op2);
5075           update_stmt (stmts[i]);
5076         }
5077       else
5078         {
5079           stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5080           gimple_set_visited (stmts[i], true);
5081         }
5082       if (dump_file && (dump_flags & TDF_DETAILS))
5083         {
5084           fprintf (dump_file, " into ");
5085           print_gimple_stmt (dump_file, stmts[i], 0);
5086         }
5087     }
5088
5089   remove_visited_stmt_chain (last_rhs1);
5090 }
5091
5092 /* Transform STMT, which is really (A +B) + (C + D) into the left
5093    linear form, ((A+B)+C)+D.
5094    Recurse on D if necessary.  */
5095
5096 static void
5097 linearize_expr (gimple *stmt)
5098 {
5099   gimple_stmt_iterator gsi;
5100   gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5101   gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5102   gimple *oldbinrhs = binrhs;
5103   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5104   gimple *newbinrhs = NULL;
5105   class loop *loop = loop_containing_stmt (stmt);
5106   tree lhs = gimple_assign_lhs (stmt);
5107
5108   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5109               && is_reassociable_op (binrhs, rhscode, loop));
5110
5111   gsi = gsi_for_stmt (stmt);
5112
5113   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5114   binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5115                                 gimple_assign_rhs_code (binrhs),
5116                                 gimple_assign_lhs (binlhs),
5117                                 gimple_assign_rhs2 (binrhs));
5118   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5119   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5120   gimple_set_uid (binrhs, gimple_uid (stmt));
5121
5122   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5123     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5124
5125   if (dump_file && (dump_flags & TDF_DETAILS))
5126     {
5127       fprintf (dump_file, "Linearized: ");
5128       print_gimple_stmt (dump_file, stmt, 0);
5129     }
5130
5131   reassociate_stats.linearized++;
5132   update_stmt (stmt);
5133
5134   gsi = gsi_for_stmt (oldbinrhs);
5135   reassoc_remove_stmt (&gsi);
5136   release_defs (oldbinrhs);
5137
5138   gimple_set_visited (stmt, true);
5139   gimple_set_visited (binlhs, true);
5140   gimple_set_visited (binrhs, true);
5141
5142   /* Tail recurse on the new rhs if it still needs reassociation.  */
5143   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5144     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5145            want to change the algorithm while converting to tuples.  */
5146     linearize_expr (stmt);
5147 }
5148
5149 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5150    it.  Otherwise, return NULL.  */
5151
5152 static gimple *
5153 get_single_immediate_use (tree lhs)
5154 {
5155   use_operand_p immuse;
5156   gimple *immusestmt;
5157
5158   if (TREE_CODE (lhs) == SSA_NAME
5159       && single_imm_use (lhs, &immuse, &immusestmt)
5160       && is_gimple_assign (immusestmt))
5161     return immusestmt;
5162
5163   return NULL;
5164 }
5165
5166 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5167    representing the negated value.  Insertions of any necessary
5168    instructions go before GSI.
5169    This function is recursive in that, if you hand it "a_5" as the
5170    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5171    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
5172
5173 static tree
5174 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5175 {
5176   gimple *negatedefstmt = NULL;
5177   tree resultofnegate;
5178   gimple_stmt_iterator gsi;
5179   unsigned int uid;
5180
5181   /* If we are trying to negate a name, defined by an add, negate the
5182      add operands instead.  */
5183   if (TREE_CODE (tonegate) == SSA_NAME)
5184     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5185   if (TREE_CODE (tonegate) == SSA_NAME
5186       && is_gimple_assign (negatedefstmt)
5187       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5188       && has_single_use (gimple_assign_lhs (negatedefstmt))
5189       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5190     {
5191       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5192       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5193       tree lhs = gimple_assign_lhs (negatedefstmt);
5194       gimple *g;
5195
5196       gsi = gsi_for_stmt (negatedefstmt);
5197       rhs1 = negate_value (rhs1, &gsi);
5198
5199       gsi = gsi_for_stmt (negatedefstmt);
5200       rhs2 = negate_value (rhs2, &gsi);
5201
5202       gsi = gsi_for_stmt (negatedefstmt);
5203       lhs = make_ssa_name (TREE_TYPE (lhs));
5204       gimple_set_visited (negatedefstmt, true);
5205       g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5206       gimple_set_uid (g, gimple_uid (negatedefstmt));
5207       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5208       return lhs;
5209     }
5210
5211   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5212   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5213                                              NULL_TREE, true, GSI_SAME_STMT);
5214   gsi = *gsip;
5215   uid = gimple_uid (gsi_stmt (gsi));
5216   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5217     {
5218       gimple *stmt = gsi_stmt (gsi);
5219       if (gimple_uid (stmt) != 0)
5220         break;
5221       gimple_set_uid (stmt, uid);
5222     }
5223   return resultofnegate;
5224 }
5225
5226 /* Return true if we should break up the subtract in STMT into an add
5227    with negate.  This is true when we the subtract operands are really
5228    adds, or the subtract itself is used in an add expression.  In
5229    either case, breaking up the subtract into an add with negate
5230    exposes the adds to reassociation.  */
5231
5232 static bool
5233 should_break_up_subtract (gimple *stmt)
5234 {
5235   tree lhs = gimple_assign_lhs (stmt);
5236   tree binlhs = gimple_assign_rhs1 (stmt);
5237   tree binrhs = gimple_assign_rhs2 (stmt);
5238   gimple *immusestmt;
5239   class loop *loop = loop_containing_stmt (stmt);
5240
5241   if (TREE_CODE (binlhs) == SSA_NAME
5242       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5243     return true;
5244
5245   if (TREE_CODE (binrhs) == SSA_NAME
5246       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5247     return true;
5248
5249   if (TREE_CODE (lhs) == SSA_NAME
5250       && (immusestmt = get_single_immediate_use (lhs))
5251       && is_gimple_assign (immusestmt)
5252       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5253           || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5254               && gimple_assign_rhs1 (immusestmt) == lhs)
5255           || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5256     return true;
5257   return false;
5258 }
5259
5260 /* Transform STMT from A - B into A + -B.  */
5261
5262 static void
5263 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5264 {
5265   tree rhs1 = gimple_assign_rhs1 (stmt);
5266   tree rhs2 = gimple_assign_rhs2 (stmt);
5267
5268   if (dump_file && (dump_flags & TDF_DETAILS))
5269     {
5270       fprintf (dump_file, "Breaking up subtract ");
5271       print_gimple_stmt (dump_file, stmt, 0);
5272     }
5273
5274   rhs2 = negate_value (rhs2, gsip);
5275   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5276   update_stmt (stmt);
5277 }
5278
5279 /* Determine whether STMT is a builtin call that raises an SSA name
5280    to an integer power and has only one use.  If so, and this is early
5281    reassociation and unsafe math optimizations are permitted, place
5282    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5283    If any of these conditions does not hold, return FALSE.  */
5284
5285 static bool
5286 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5287 {
5288   tree arg1;
5289   REAL_VALUE_TYPE c, cint;
5290
5291   switch (gimple_call_combined_fn (stmt))
5292     {
5293     CASE_CFN_POW:
5294       if (flag_errno_math)
5295         return false;
5296
5297       *base = gimple_call_arg (stmt, 0);
5298       arg1 = gimple_call_arg (stmt, 1);
5299
5300       if (TREE_CODE (arg1) != REAL_CST)
5301         return false;
5302
5303       c = TREE_REAL_CST (arg1);
5304
5305       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5306         return false;
5307
5308       *exponent = real_to_integer (&c);
5309       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5310       if (!real_identical (&c, &cint))
5311         return false;
5312
5313       break;
5314
5315     CASE_CFN_POWI:
5316       *base = gimple_call_arg (stmt, 0);
5317       arg1 = gimple_call_arg (stmt, 1);
5318
5319       if (!tree_fits_shwi_p (arg1))
5320         return false;
5321
5322       *exponent = tree_to_shwi (arg1);
5323       break;
5324
5325     default:
5326       return false;
5327     }
5328
5329   /* Expanding negative exponents is generally unproductive, so we don't
5330      complicate matters with those.  Exponents of zero and one should
5331      have been handled by expression folding.  */
5332   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5333     return false;
5334
5335   return true;
5336 }
5337
5338 /* Try to derive and add operand entry for OP to *OPS.  Return false if
5339    unsuccessful.  */
5340
5341 static bool
5342 try_special_add_to_ops (vec<operand_entry *> *ops,
5343                         enum tree_code code,
5344                         tree op, gimple* def_stmt)
5345 {
5346   tree base = NULL_TREE;
5347   HOST_WIDE_INT exponent = 0;
5348
5349   if (TREE_CODE (op) != SSA_NAME
5350       || ! has_single_use (op))
5351     return false;
5352
5353   if (code == MULT_EXPR
5354       && reassoc_insert_powi_p
5355       && flag_unsafe_math_optimizations
5356       && is_gimple_call (def_stmt)
5357       && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5358     {
5359       add_repeat_to_ops_vec (ops, base, exponent);
5360       gimple_set_visited (def_stmt, true);
5361       return true;
5362     }
5363   else if (code == MULT_EXPR
5364            && is_gimple_assign (def_stmt)
5365            && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5366            && !HONOR_SNANS (TREE_TYPE (op))
5367            && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5368                || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5369     {
5370       tree rhs1 = gimple_assign_rhs1 (def_stmt);
5371       tree cst = build_minus_one_cst (TREE_TYPE (op));
5372       add_to_ops_vec (ops, rhs1);
5373       add_to_ops_vec (ops, cst);
5374       gimple_set_visited (def_stmt, true);
5375       return true;
5376     }
5377
5378   return false;
5379 }
5380
5381 /* Recursively linearize a binary expression that is the RHS of STMT.
5382    Place the operands of the expression tree in the vector named OPS.  */
5383
5384 static void
5385 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5386                      bool is_associative, bool set_visited)
5387 {
5388   tree binlhs = gimple_assign_rhs1 (stmt);
5389   tree binrhs = gimple_assign_rhs2 (stmt);
5390   gimple *binlhsdef = NULL, *binrhsdef = NULL;
5391   bool binlhsisreassoc = false;
5392   bool binrhsisreassoc = false;
5393   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5394   class loop *loop = loop_containing_stmt (stmt);
5395
5396   if (set_visited)
5397     gimple_set_visited (stmt, true);
5398
5399   if (TREE_CODE (binlhs) == SSA_NAME)
5400     {
5401       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5402       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5403                          && !stmt_could_throw_p (cfun, binlhsdef));
5404     }
5405
5406   if (TREE_CODE (binrhs) == SSA_NAME)
5407     {
5408       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5409       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5410                          && !stmt_could_throw_p (cfun, binrhsdef));
5411     }
5412
5413   /* If the LHS is not reassociable, but the RHS is, we need to swap
5414      them.  If neither is reassociable, there is nothing we can do, so
5415      just put them in the ops vector.  If the LHS is reassociable,
5416      linearize it.  If both are reassociable, then linearize the RHS
5417      and the LHS.  */
5418
5419   if (!binlhsisreassoc)
5420     {
5421       /* If this is not a associative operation like division, give up.  */
5422       if (!is_associative)
5423         {
5424           add_to_ops_vec (ops, binrhs);
5425           return;
5426         }
5427
5428       if (!binrhsisreassoc)
5429         {
5430           if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5431             add_to_ops_vec (ops, binrhs);
5432
5433           if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5434             add_to_ops_vec (ops, binlhs);
5435
5436           return;
5437         }
5438
5439       if (dump_file && (dump_flags & TDF_DETAILS))
5440         {
5441           fprintf (dump_file, "swapping operands of ");
5442           print_gimple_stmt (dump_file, stmt, 0);
5443         }
5444
5445       swap_ssa_operands (stmt,
5446                          gimple_assign_rhs1_ptr (stmt),
5447                          gimple_assign_rhs2_ptr (stmt));
5448       update_stmt (stmt);
5449
5450       if (dump_file && (dump_flags & TDF_DETAILS))
5451         {
5452           fprintf (dump_file, " is now ");
5453           print_gimple_stmt (dump_file, stmt, 0);
5454         }
5455
5456       /* We want to make it so the lhs is always the reassociative op,
5457          so swap.  */
5458       std::swap (binlhs, binrhs);
5459     }
5460   else if (binrhsisreassoc)
5461     {
5462       linearize_expr (stmt);
5463       binlhs = gimple_assign_rhs1 (stmt);
5464       binrhs = gimple_assign_rhs2 (stmt);
5465     }
5466
5467   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5468               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5469                                       rhscode, loop));
5470   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5471                        is_associative, set_visited);
5472
5473   if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5474     add_to_ops_vec (ops, binrhs);
5475 }
5476
5477 /* Repropagate the negates back into subtracts, since no other pass
5478    currently does it.  */
5479
5480 static void
5481 repropagate_negates (void)
5482 {
5483   unsigned int i = 0;
5484   tree negate;
5485
5486   FOR_EACH_VEC_ELT (plus_negates, i, negate)
5487     {
5488       gimple *user = get_single_immediate_use (negate);
5489
5490       if (!user || !is_gimple_assign (user))
5491         continue;
5492
5493       /* The negate operand can be either operand of a PLUS_EXPR
5494          (it can be the LHS if the RHS is a constant for example).
5495
5496          Force the negate operand to the RHS of the PLUS_EXPR, then
5497          transform the PLUS_EXPR into a MINUS_EXPR.  */
5498       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5499         {
5500           /* If the negated operand appears on the LHS of the
5501              PLUS_EXPR, exchange the operands of the PLUS_EXPR
5502              to force the negated operand to the RHS of the PLUS_EXPR.  */
5503           if (gimple_assign_rhs1 (user) == negate)
5504             {
5505               swap_ssa_operands (user,
5506                                  gimple_assign_rhs1_ptr (user),
5507                                  gimple_assign_rhs2_ptr (user));
5508             }
5509
5510           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5511              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
5512           if (gimple_assign_rhs2 (user) == negate)
5513             {
5514               tree rhs1 = gimple_assign_rhs1 (user);
5515               tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5516               gimple_stmt_iterator gsi = gsi_for_stmt (user);
5517               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5518               update_stmt (user);
5519             }
5520         }
5521       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5522         {
5523           if (gimple_assign_rhs1 (user) == negate)
5524             {
5525               /* We have
5526                    x = -a
5527                    y = x - b
5528                  which we transform into
5529                    x = a + b
5530                    y = -x .
5531                  This pushes down the negate which we possibly can merge
5532                  into some other operation, hence insert it into the
5533                  plus_negates vector.  */
5534               gimple *feed = SSA_NAME_DEF_STMT (negate);
5535               tree a = gimple_assign_rhs1 (feed);
5536               tree b = gimple_assign_rhs2 (user);
5537               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5538               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5539               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5540               gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5541               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5542               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5543               user = gsi_stmt (gsi2);
5544               update_stmt (user);
5545               reassoc_remove_stmt (&gsi);
5546               release_defs (feed);
5547               plus_negates.safe_push (gimple_assign_lhs (user));
5548             }
5549           else
5550             {
5551               /* Transform "x = -a; y = b - x" into "y = b + a", getting
5552                  rid of one operation.  */
5553               gimple *feed = SSA_NAME_DEF_STMT (negate);
5554               tree a = gimple_assign_rhs1 (feed);
5555               tree rhs1 = gimple_assign_rhs1 (user);
5556               gimple_stmt_iterator gsi = gsi_for_stmt (user);
5557               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5558               update_stmt (gsi_stmt (gsi));
5559             }
5560         }
5561     }
5562 }
5563
5564 /* Returns true if OP is of a type for which we can do reassociation.
5565    That is for integral or non-saturating fixed-point types, and for
5566    floating point type when associative-math is enabled.  */
5567
5568 static bool
5569 can_reassociate_p (tree op)
5570 {
5571   tree type = TREE_TYPE (op);
5572   if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5573     return false;
5574   if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5575       || NON_SAT_FIXED_POINT_TYPE_P (type)
5576       || (flag_associative_math && FLOAT_TYPE_P (type)))
5577     return true;
5578   return false;
5579 }
5580
5581 /* Break up subtract operations in block BB.
5582
5583    We do this top down because we don't know whether the subtract is
5584    part of a possible chain of reassociation except at the top.
5585
5586    IE given
5587    d = f + g
5588    c = a + e
5589    b = c - d
5590    q = b - r
5591    k = t - q
5592
5593    we want to break up k = t - q, but we won't until we've transformed q
5594    = b - r, which won't be broken up until we transform b = c - d.
5595
5596    En passant, clear the GIMPLE visited flag on every statement
5597    and set UIDs within each basic block.  */
5598
5599 static void
5600 break_up_subtract_bb (basic_block bb)
5601 {
5602   gimple_stmt_iterator gsi;
5603   basic_block son;
5604   unsigned int uid = 1;
5605
5606   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5607     {
5608       gimple *stmt = gsi_stmt (gsi);
5609       gimple_set_visited (stmt, false);
5610       gimple_set_uid (stmt, uid++);
5611
5612       if (!is_gimple_assign (stmt)
5613           || !can_reassociate_p (gimple_assign_lhs (stmt)))
5614         continue;
5615
5616       /* Look for simple gimple subtract operations.  */
5617       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5618         {
5619           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5620               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5621             continue;
5622
5623           /* Check for a subtract used only in an addition.  If this
5624              is the case, transform it into add of a negate for better
5625              reassociation.  IE transform C = A-B into C = A + -B if C
5626              is only used in an addition.  */
5627           if (should_break_up_subtract (stmt))
5628             break_up_subtract (stmt, &gsi);
5629         }
5630       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5631                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5632         plus_negates.safe_push (gimple_assign_lhs (stmt));
5633     }
5634   for (son = first_dom_son (CDI_DOMINATORS, bb);
5635        son;
5636        son = next_dom_son (CDI_DOMINATORS, son))
5637     break_up_subtract_bb (son);
5638 }
5639
5640 /* Used for repeated factor analysis.  */
5641 struct repeat_factor
5642 {
5643   /* An SSA name that occurs in a multiply chain.  */
5644   tree factor;
5645
5646   /* Cached rank of the factor.  */
5647   unsigned rank;
5648
5649   /* Number of occurrences of the factor in the chain.  */
5650   HOST_WIDE_INT count;
5651
5652   /* An SSA name representing the product of this factor and
5653      all factors appearing later in the repeated factor vector.  */
5654   tree repr;
5655 };
5656
5657
5658 static vec<repeat_factor> repeat_factor_vec;
5659
5660 /* Used for sorting the repeat factor vector.  Sort primarily by
5661    ascending occurrence count, secondarily by descending rank.  */
5662
5663 static int
5664 compare_repeat_factors (const void *x1, const void *x2)
5665 {
5666   const repeat_factor *rf1 = (const repeat_factor *) x1;
5667   const repeat_factor *rf2 = (const repeat_factor *) x2;
5668
5669   if (rf1->count != rf2->count)
5670     return rf1->count - rf2->count;
5671
5672   return rf2->rank - rf1->rank;
5673 }
5674
5675 /* Look for repeated operands in OPS in the multiply tree rooted at
5676    STMT.  Replace them with an optimal sequence of multiplies and powi
5677    builtin calls, and remove the used operands from OPS.  Return an
5678    SSA name representing the value of the replacement sequence.  */
5679
5680 static tree
5681 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5682 {
5683   unsigned i, j, vec_len;
5684   int ii;
5685   operand_entry *oe;
5686   repeat_factor *rf1, *rf2;
5687   repeat_factor rfnew;
5688   tree result = NULL_TREE;
5689   tree target_ssa, iter_result;
5690   tree type = TREE_TYPE (gimple_get_lhs (stmt));
5691   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5692   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5693   gimple *mul_stmt, *pow_stmt;
5694
5695   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5696      target.  */
5697   if (!powi_fndecl)
5698     return NULL_TREE;
5699
5700   /* Allocate the repeated factor vector.  */
5701   repeat_factor_vec.create (10);
5702
5703   /* Scan the OPS vector for all SSA names in the product and build
5704      up a vector of occurrence counts for each factor.  */
5705   FOR_EACH_VEC_ELT (*ops, i, oe)
5706     {
5707       if (TREE_CODE (oe->op) == SSA_NAME)
5708         {
5709           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5710             {
5711               if (rf1->factor == oe->op)
5712                 {
5713                   rf1->count += oe->count;
5714                   break;
5715                 }
5716             }
5717
5718           if (j >= repeat_factor_vec.length ())
5719             {
5720               rfnew.factor = oe->op;
5721               rfnew.rank = oe->rank;
5722               rfnew.count = oe->count;
5723               rfnew.repr = NULL_TREE;
5724               repeat_factor_vec.safe_push (rfnew);
5725             }
5726         }
5727     }
5728
5729   /* Sort the repeated factor vector by (a) increasing occurrence count,
5730      and (b) decreasing rank.  */
5731   repeat_factor_vec.qsort (compare_repeat_factors);
5732
5733   /* It is generally best to combine as many base factors as possible
5734      into a product before applying __builtin_powi to the result.
5735      However, the sort order chosen for the repeated factor vector
5736      allows us to cache partial results for the product of the base
5737      factors for subsequent use.  When we already have a cached partial
5738      result from a previous iteration, it is best to make use of it
5739      before looking for another __builtin_pow opportunity.
5740
5741      As an example, consider x * x * y * y * y * z * z * z * z.
5742      We want to first compose the product x * y * z, raise it to the
5743      second power, then multiply this by y * z, and finally multiply
5744      by z.  This can be done in 5 multiplies provided we cache y * z
5745      for use in both expressions:
5746
5747         t1 = y * z
5748         t2 = t1 * x
5749         t3 = t2 * t2
5750         t4 = t1 * t3
5751         result = t4 * z
5752
5753      If we instead ignored the cached y * z and first multiplied by
5754      the __builtin_pow opportunity z * z, we would get the inferior:
5755
5756         t1 = y * z
5757         t2 = t1 * x
5758         t3 = t2 * t2
5759         t4 = z * z
5760         t5 = t3 * t4
5761         result = t5 * y  */
5762
5763   vec_len = repeat_factor_vec.length ();
5764
5765   /* Repeatedly look for opportunities to create a builtin_powi call.  */
5766   while (true)
5767     {
5768       HOST_WIDE_INT power;
5769
5770       /* First look for the largest cached product of factors from
5771          preceding iterations.  If found, create a builtin_powi for
5772          it if the minimum occurrence count for its factors is at
5773          least 2, or just use this cached product as our next
5774          multiplicand if the minimum occurrence count is 1.  */
5775       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5776         {
5777           if (rf1->repr && rf1->count > 0)
5778             break;
5779         }
5780
5781       if (j < vec_len)
5782         {
5783           power = rf1->count;
5784
5785           if (power == 1)
5786             {
5787               iter_result = rf1->repr;
5788
5789               if (dump_file && (dump_flags & TDF_DETAILS))
5790                 {
5791                   unsigned elt;
5792                   repeat_factor *rf;
5793                   fputs ("Multiplying by cached product ", dump_file);
5794                   for (elt = j; elt < vec_len; elt++)
5795                     {
5796                       rf = &repeat_factor_vec[elt];
5797                       print_generic_expr (dump_file, rf->factor);
5798                       if (elt < vec_len - 1)
5799                         fputs (" * ", dump_file);
5800                     }
5801                   fputs ("\n", dump_file);
5802                 }
5803             }
5804           else
5805             {
5806               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5807               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5808                                             build_int_cst (integer_type_node,
5809                                                            power));
5810               gimple_call_set_lhs (pow_stmt, iter_result);
5811               gimple_set_location (pow_stmt, gimple_location (stmt));
5812               gimple_set_uid (pow_stmt, gimple_uid (stmt));
5813               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5814
5815               if (dump_file && (dump_flags & TDF_DETAILS))
5816                 {
5817                   unsigned elt;
5818                   repeat_factor *rf;
5819                   fputs ("Building __builtin_pow call for cached product (",
5820                          dump_file);
5821                   for (elt = j; elt < vec_len; elt++)
5822                     {
5823                       rf = &repeat_factor_vec[elt];
5824                       print_generic_expr (dump_file, rf->factor);
5825                       if (elt < vec_len - 1)
5826                         fputs (" * ", dump_file);
5827                     }
5828                   fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5829                            power);
5830                 }
5831             }
5832         }
5833       else
5834         {
5835           /* Otherwise, find the first factor in the repeated factor
5836              vector whose occurrence count is at least 2.  If no such
5837              factor exists, there are no builtin_powi opportunities
5838              remaining.  */
5839           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5840             {
5841               if (rf1->count >= 2)
5842                 break;
5843             }
5844
5845           if (j >= vec_len)
5846             break;
5847
5848           power = rf1->count;
5849
5850           if (dump_file && (dump_flags & TDF_DETAILS))
5851             {
5852               unsigned elt;
5853               repeat_factor *rf;
5854               fputs ("Building __builtin_pow call for (", dump_file);
5855               for (elt = j; elt < vec_len; elt++)
5856                 {
5857                   rf = &repeat_factor_vec[elt];
5858                   print_generic_expr (dump_file, rf->factor);
5859                   if (elt < vec_len - 1)
5860                     fputs (" * ", dump_file);
5861                 }
5862               fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5863             }
5864
5865           reassociate_stats.pows_created++;
5866
5867           /* Visit each element of the vector in reverse order (so that
5868              high-occurrence elements are visited first, and within the
5869              same occurrence count, lower-ranked elements are visited
5870              first).  Form a linear product of all elements in this order
5871              whose occurrencce count is at least that of element J.
5872              Record the SSA name representing the product of each element
5873              with all subsequent elements in the vector.  */
5874           if (j == vec_len - 1)
5875             rf1->repr = rf1->factor;
5876           else
5877             {
5878               for (ii = vec_len - 2; ii >= (int)j; ii--)
5879                 {
5880                   tree op1, op2;
5881
5882                   rf1 = &repeat_factor_vec[ii];
5883                   rf2 = &repeat_factor_vec[ii + 1];
5884
5885                   /* Init the last factor's representative to be itself.  */
5886                   if (!rf2->repr)
5887                     rf2->repr = rf2->factor;
5888
5889                   op1 = rf1->factor;
5890                   op2 = rf2->repr;
5891
5892                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5893                   mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5894                                                   op1, op2);
5895                   gimple_set_location (mul_stmt, gimple_location (stmt));
5896                   gimple_set_uid (mul_stmt, gimple_uid (stmt));
5897                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5898                   rf1->repr = target_ssa;
5899
5900                   /* Don't reprocess the multiply we just introduced.  */
5901                   gimple_set_visited (mul_stmt, true);
5902                 }
5903             }
5904
5905           /* Form a call to __builtin_powi for the maximum product
5906              just formed, raised to the power obtained earlier.  */
5907           rf1 = &repeat_factor_vec[j];
5908           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5909           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5910                                         build_int_cst (integer_type_node,
5911                                                        power));
5912           gimple_call_set_lhs (pow_stmt, iter_result);
5913           gimple_set_location (pow_stmt, gimple_location (stmt));
5914           gimple_set_uid (pow_stmt, gimple_uid (stmt));
5915           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5916         }
5917
5918       /* If we previously formed at least one other builtin_powi call,
5919          form the product of this one and those others.  */
5920       if (result)
5921         {
5922           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
5923           mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
5924                                           result, iter_result);
5925           gimple_set_location (mul_stmt, gimple_location (stmt));
5926           gimple_set_uid (mul_stmt, gimple_uid (stmt));
5927           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5928           gimple_set_visited (mul_stmt, true);
5929           result = new_result;
5930         }
5931       else
5932         result = iter_result;
5933
5934       /* Decrement the occurrence count of each element in the product
5935          by the count found above, and remove this many copies of each
5936          factor from OPS.  */
5937       for (i = j; i < vec_len; i++)
5938         {
5939           unsigned k = power;
5940           unsigned n;
5941
5942           rf1 = &repeat_factor_vec[i];
5943           rf1->count -= power;
5944
5945           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5946             {
5947               if (oe->op == rf1->factor)
5948                 {
5949                   if (oe->count <= k)
5950                     {
5951                       ops->ordered_remove (n);
5952                       k -= oe->count;
5953
5954                       if (k == 0)
5955                         break;
5956                     }
5957                   else
5958                     {
5959                       oe->count -= k;
5960                       break;
5961                     }
5962                 }
5963             }
5964         }
5965     }
5966
5967   /* At this point all elements in the repeated factor vector have a
5968      remaining occurrence count of 0 or 1, and those with a count of 1
5969      don't have cached representatives.  Re-sort the ops vector and
5970      clean up.  */
5971   ops->qsort (sort_by_operand_rank);
5972   repeat_factor_vec.release ();
5973
5974   /* Return the final product computed herein.  Note that there may
5975      still be some elements with single occurrence count left in OPS;
5976      those will be handled by the normal reassociation logic.  */
5977   return result;
5978 }
5979
5980 /* Attempt to optimize
5981    CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5982    CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
5983
5984 static void
5985 attempt_builtin_copysign (vec<operand_entry *> *ops)
5986 {
5987   operand_entry *oe;
5988   unsigned int i;
5989   unsigned int length = ops->length ();
5990   tree cst = ops->last ()->op;
5991
5992   if (length == 1 || TREE_CODE (cst) != REAL_CST)
5993     return;
5994
5995   FOR_EACH_VEC_ELT (*ops, i, oe)
5996     {
5997       if (TREE_CODE (oe->op) == SSA_NAME
5998           && has_single_use (oe->op))
5999         {
6000           gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6001           if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6002             {
6003               tree arg0, arg1;
6004               switch (gimple_call_combined_fn (old_call))
6005                 {
6006                 CASE_CFN_COPYSIGN:
6007                 CASE_CFN_COPYSIGN_FN:
6008                   arg0 = gimple_call_arg (old_call, 0);
6009                   arg1 = gimple_call_arg (old_call, 1);
6010                   /* The first argument of copysign must be a constant,
6011                      otherwise there's nothing to do.  */
6012                   if (TREE_CODE (arg0) == REAL_CST)
6013                     {
6014                       tree type = TREE_TYPE (arg0);
6015                       tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6016                       /* If we couldn't fold to a single constant, skip it.
6017                          That happens e.g. for inexact multiplication when
6018                          -frounding-math.  */
6019                       if (mul == NULL_TREE)
6020                         break;
6021                       /* Instead of adjusting OLD_CALL, let's build a new
6022                          call to not leak the LHS and prevent keeping bogus
6023                          debug statements.  DCE will clean up the old call.  */
6024                       gcall *new_call;
6025                       if (gimple_call_internal_p (old_call))
6026                         new_call = gimple_build_call_internal
6027                           (IFN_COPYSIGN, 2, mul, arg1);
6028                       else
6029                         new_call = gimple_build_call
6030                           (gimple_call_fndecl (old_call), 2, mul, arg1);
6031                       tree lhs = make_ssa_name (type);
6032                       gimple_call_set_lhs (new_call, lhs);
6033                       gimple_set_location (new_call,
6034                                            gimple_location (old_call));
6035                       insert_stmt_after (new_call, old_call);
6036                       /* We've used the constant, get rid of it.  */
6037                       ops->pop ();
6038                       bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6039                       /* Handle the CST1 < 0 case by negating the result.  */
6040                       if (cst1_neg)
6041                         {
6042                           tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6043                           gimple *negate_stmt
6044                             = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6045                           insert_stmt_after (negate_stmt, new_call);
6046                           oe->op = negrhs;
6047                         }
6048                       else
6049                         oe->op = lhs;
6050                       if (dump_file && (dump_flags & TDF_DETAILS))
6051                         {
6052                           fprintf (dump_file, "Optimizing copysign: ");
6053                           print_generic_expr (dump_file, cst);
6054                           fprintf (dump_file, " * COPYSIGN (");
6055                           print_generic_expr (dump_file, arg0);
6056                           fprintf (dump_file, ", ");
6057                           print_generic_expr (dump_file, arg1);
6058                           fprintf (dump_file, ") into %sCOPYSIGN (",
6059                                    cst1_neg ? "-" : "");
6060                           print_generic_expr (dump_file, mul);
6061                           fprintf (dump_file, ", ");
6062                           print_generic_expr (dump_file, arg1);
6063                           fprintf (dump_file, "\n");
6064                         }
6065                       return;
6066                     }
6067                   break;
6068                 default:
6069                   break;
6070                 }
6071             }
6072         }
6073     }
6074 }
6075
6076 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
6077
6078 static void
6079 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6080 {
6081   tree rhs1;
6082
6083   if (dump_file && (dump_flags & TDF_DETAILS))
6084     {
6085       fprintf (dump_file, "Transforming ");
6086       print_gimple_stmt (dump_file, stmt, 0);
6087     }
6088
6089   rhs1 = gimple_assign_rhs1 (stmt);
6090   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6091   update_stmt (stmt);
6092   remove_visited_stmt_chain (rhs1);
6093
6094   if (dump_file && (dump_flags & TDF_DETAILS))
6095     {
6096       fprintf (dump_file, " into ");
6097       print_gimple_stmt (dump_file, stmt, 0);
6098     }
6099 }
6100
6101 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
6102
6103 static void
6104 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6105                             tree rhs1, tree rhs2)
6106 {
6107   if (dump_file && (dump_flags & TDF_DETAILS))
6108     {
6109       fprintf (dump_file, "Transforming ");
6110       print_gimple_stmt (dump_file, stmt, 0);
6111     }
6112
6113   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6114   update_stmt (gsi_stmt (*gsi));
6115   remove_visited_stmt_chain (rhs1);
6116
6117   if (dump_file && (dump_flags & TDF_DETAILS))
6118     {
6119       fprintf (dump_file, " into ");
6120       print_gimple_stmt (dump_file, stmt, 0);
6121     }
6122 }
6123
6124 /* Reassociate expressions in basic block BB and its post-dominator as
6125    children.
6126
6127    Bubble up return status from maybe_optimize_range_tests.  */
6128
6129 static bool
6130 reassociate_bb (basic_block bb)
6131 {
6132   gimple_stmt_iterator gsi;
6133   basic_block son;
6134   gimple *stmt = last_stmt (bb);
6135   bool cfg_cleanup_needed = false;
6136
6137   if (stmt && !gimple_visited_p (stmt))
6138     cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6139
6140   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
6141     {
6142       stmt = gsi_stmt (gsi);
6143
6144       if (is_gimple_assign (stmt)
6145           && !stmt_could_throw_p (cfun, stmt))
6146         {
6147           tree lhs, rhs1, rhs2;
6148           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6149
6150           /* If this was part of an already processed statement,
6151              we don't need to touch it again. */
6152           if (gimple_visited_p (stmt))
6153             {
6154               /* This statement might have become dead because of previous
6155                  reassociations.  */
6156               if (has_zero_uses (gimple_get_lhs (stmt)))
6157                 {
6158                   reassoc_remove_stmt (&gsi);
6159                   release_defs (stmt);
6160                   /* We might end up removing the last stmt above which
6161                      places the iterator to the end of the sequence.
6162                      Reset it to the last stmt in this case which might
6163                      be the end of the sequence as well if we removed
6164                      the last statement of the sequence.  In which case
6165                      we need to bail out.  */
6166                   if (gsi_end_p (gsi))
6167                     {
6168                       gsi = gsi_last_bb (bb);
6169                       if (gsi_end_p (gsi))
6170                         break;
6171                     }
6172                 }
6173               continue;
6174             }
6175
6176           /* If this is not a gimple binary expression, there is
6177              nothing for us to do with it.  */
6178           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6179             continue;
6180
6181           lhs = gimple_assign_lhs (stmt);
6182           rhs1 = gimple_assign_rhs1 (stmt);
6183           rhs2 = gimple_assign_rhs2 (stmt);
6184
6185           /* For non-bit or min/max operations we can't associate
6186              all types.  Verify that here.  */
6187           if (rhs_code != BIT_IOR_EXPR
6188               && rhs_code != BIT_AND_EXPR
6189               && rhs_code != BIT_XOR_EXPR
6190               && rhs_code != MIN_EXPR
6191               && rhs_code != MAX_EXPR
6192               && (!can_reassociate_p (lhs)
6193                   || !can_reassociate_p (rhs1)
6194                   || !can_reassociate_p (rhs2)))
6195             continue;
6196
6197           if (associative_tree_code (rhs_code))
6198             {
6199               auto_vec<operand_entry *> ops;
6200               tree powi_result = NULL_TREE;
6201               bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6202
6203               /* There may be no immediate uses left by the time we
6204                  get here because we may have eliminated them all.  */
6205               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6206                 continue;
6207
6208               gimple_set_visited (stmt, true);
6209               linearize_expr_tree (&ops, stmt, true, true);
6210               ops.qsort (sort_by_operand_rank);
6211               int orig_len = ops.length ();
6212               optimize_ops_list (rhs_code, &ops);
6213               if (undistribute_ops_list (rhs_code, &ops,
6214                                          loop_containing_stmt (stmt)))
6215                 {
6216                   ops.qsort (sort_by_operand_rank);
6217                   optimize_ops_list (rhs_code, &ops);
6218                 }
6219               if (undistribute_bitref_for_vector (rhs_code, &ops,
6220                                                   loop_containing_stmt (stmt)))
6221                 {
6222                   ops.qsort (sort_by_operand_rank);
6223                   optimize_ops_list (rhs_code, &ops);
6224                 }
6225               if (rhs_code == PLUS_EXPR
6226                   && transform_add_to_multiply (&ops))
6227                 ops.qsort (sort_by_operand_rank);
6228
6229               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6230                 {
6231                   if (is_vector)
6232                     optimize_vec_cond_expr (rhs_code, &ops);
6233                   else
6234                     optimize_range_tests (rhs_code, &ops, NULL);
6235                 }
6236
6237               if (rhs_code == MULT_EXPR && !is_vector)
6238                 {
6239                   attempt_builtin_copysign (&ops);
6240
6241                   if (reassoc_insert_powi_p
6242                       && flag_unsafe_math_optimizations)
6243                     powi_result = attempt_builtin_powi (stmt, &ops);
6244                 }
6245
6246               operand_entry *last;
6247               bool negate_result = false;
6248               if (ops.length () > 1
6249                   && rhs_code == MULT_EXPR)
6250                 {
6251                   last = ops.last ();
6252                   if ((integer_minus_onep (last->op)
6253                        || real_minus_onep (last->op))
6254                       && !HONOR_SNANS (TREE_TYPE (lhs))
6255                       && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6256                           || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6257                     {
6258                       ops.pop ();
6259                       negate_result = true;
6260                     }
6261                 }
6262
6263               tree new_lhs = lhs;
6264               /* If the operand vector is now empty, all operands were
6265                  consumed by the __builtin_powi optimization.  */
6266               if (ops.length () == 0)
6267                 transform_stmt_to_copy (&gsi, stmt, powi_result);
6268               else if (ops.length () == 1)
6269                 {
6270                   tree last_op = ops.last ()->op;
6271
6272                   /* If the stmt that defines operand has to be inserted, insert it
6273                      before the use.  */
6274                   if (ops.last ()->stmt_to_insert)
6275                     insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6276                   if (powi_result)
6277                     transform_stmt_to_multiply (&gsi, stmt, last_op,
6278                                                 powi_result);
6279                   else
6280                     transform_stmt_to_copy (&gsi, stmt, last_op);
6281                 }
6282               else
6283                 {
6284                   machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6285                   int ops_num = ops.length ();
6286                   int width;
6287
6288                   /* For binary bit operations, if there are at least 3
6289                      operands and the last last operand in OPS is a constant,
6290                      move it to the front.  This helps ensure that we generate
6291                      (X & Y) & C rather than (X & C) & Y.  The former will
6292                      often match a canonical bit test when we get to RTL.  */
6293                   if (ops.length () > 2
6294                       && (rhs_code == BIT_AND_EXPR
6295                           || rhs_code == BIT_IOR_EXPR
6296                           || rhs_code == BIT_XOR_EXPR)
6297                       && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6298                     std::swap (*ops[0], *ops[ops_num - 1]);
6299
6300                   /* Only rewrite the expression tree to parallel in the
6301                      last reassoc pass to avoid useless work back-and-forth
6302                      with initial linearization.  */
6303                   if (!reassoc_insert_powi_p
6304                       && ops.length () > 3
6305                       && (width = get_reassociation_width (ops_num, rhs_code,
6306                                                            mode)) > 1)
6307                     {
6308                       if (dump_file && (dump_flags & TDF_DETAILS))
6309                         fprintf (dump_file,
6310                                  "Width = %d was chosen for reassociation\n",
6311                                  width);
6312                       rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6313                                                   width, ops);
6314                     }
6315                   else
6316                     {
6317                       /* When there are three operands left, we want
6318                          to make sure the ones that get the double
6319                          binary op are chosen wisely.  */
6320                       int len = ops.length ();
6321                       if (len >= 3)
6322                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
6323
6324                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
6325                                                    powi_result != NULL
6326                                                    || negate_result,
6327                                                    len != orig_len);
6328                     }
6329
6330                   /* If we combined some repeated factors into a
6331                      __builtin_powi call, multiply that result by the
6332                      reassociated operands.  */
6333                   if (powi_result)
6334                     {
6335                       gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6336                       tree type = TREE_TYPE (lhs);
6337                       tree target_ssa = make_temp_ssa_name (type, NULL,
6338                                                             "reassocpow");
6339                       gimple_set_lhs (lhs_stmt, target_ssa);
6340                       update_stmt (lhs_stmt);
6341                       if (lhs != new_lhs)
6342                         {
6343                           target_ssa = new_lhs;
6344                           new_lhs = lhs;
6345                         }
6346                       mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6347                                                       powi_result, target_ssa);
6348                       gimple_set_location (mul_stmt, gimple_location (stmt));
6349                       gimple_set_uid (mul_stmt, gimple_uid (stmt));
6350                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6351                     }
6352                 }
6353
6354               if (negate_result)
6355                 {
6356                   stmt = SSA_NAME_DEF_STMT (lhs);
6357                   tree tmp = make_ssa_name (TREE_TYPE (lhs));
6358                   gimple_set_lhs (stmt, tmp);
6359                   if (lhs != new_lhs)
6360                     tmp = new_lhs;
6361                   gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6362                                                            tmp);
6363                   gimple_set_uid (neg_stmt, gimple_uid (stmt));
6364                   gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6365                   update_stmt (stmt);
6366                 }
6367             }
6368         }
6369     }
6370   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6371        son;
6372        son = next_dom_son (CDI_POST_DOMINATORS, son))
6373     cfg_cleanup_needed |= reassociate_bb (son);
6374
6375   return cfg_cleanup_needed;
6376 }
6377
6378 /* Add jumps around shifts for range tests turned into bit tests.
6379    For each SSA_NAME VAR we have code like:
6380    VAR = ...; // final stmt of range comparison
6381    // bit test here...;
6382    OTHERVAR = ...; // final stmt of the bit test sequence
6383    RES = VAR | OTHERVAR;
6384    Turn the above into:
6385    VAR = ...;
6386    if (VAR != 0)
6387      goto <l3>;
6388    else
6389      goto <l2>;
6390    <l2>:
6391    // bit test here...;
6392    OTHERVAR = ...;
6393    <l3>:
6394    # RES = PHI<1(l1), OTHERVAR(l2)>;  */
6395
6396 static void
6397 branch_fixup (void)
6398 {
6399   tree var;
6400   unsigned int i;
6401
6402   FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6403     {
6404       gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6405       gimple *use_stmt;
6406       use_operand_p use;
6407       bool ok = single_imm_use (var, &use, &use_stmt);
6408       gcc_assert (ok
6409                   && is_gimple_assign (use_stmt)
6410                   && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6411                   && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6412
6413       basic_block cond_bb = gimple_bb (def_stmt);
6414       basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6415       basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6416
6417       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6418       gimple *g = gimple_build_cond (NE_EXPR, var,
6419                                      build_zero_cst (TREE_TYPE (var)),
6420                                      NULL_TREE, NULL_TREE);
6421       location_t loc = gimple_location (use_stmt);
6422       gimple_set_location (g, loc);
6423       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6424
6425       edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6426       etrue->probability = profile_probability::even ();
6427       edge efalse = find_edge (cond_bb, then_bb);
6428       efalse->flags = EDGE_FALSE_VALUE;
6429       efalse->probability -= etrue->probability;
6430       then_bb->count -= etrue->count ();
6431
6432       tree othervar = NULL_TREE;
6433       if (gimple_assign_rhs1 (use_stmt) == var)
6434         othervar = gimple_assign_rhs2 (use_stmt);
6435       else if (gimple_assign_rhs2 (use_stmt) == var)
6436         othervar = gimple_assign_rhs1 (use_stmt);
6437       else
6438         gcc_unreachable ();
6439       tree lhs = gimple_assign_lhs (use_stmt);
6440       gphi *phi = create_phi_node (lhs, merge_bb);
6441       add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6442       add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6443       gsi = gsi_for_stmt (use_stmt);
6444       gsi_remove (&gsi, true);
6445
6446       set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6447       set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6448     }
6449   reassoc_branch_fixups.release ();
6450 }
6451
6452 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6453 void debug_ops_vector (vec<operand_entry *> ops);
6454
6455 /* Dump the operand entry vector OPS to FILE.  */
6456
6457 void
6458 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6459 {
6460   operand_entry *oe;
6461   unsigned int i;
6462
6463   FOR_EACH_VEC_ELT (ops, i, oe)
6464     {
6465       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6466       print_generic_expr (file, oe->op);
6467       fprintf (file, "\n");
6468     }
6469 }
6470
6471 /* Dump the operand entry vector OPS to STDERR.  */
6472
6473 DEBUG_FUNCTION void
6474 debug_ops_vector (vec<operand_entry *> ops)
6475 {
6476   dump_ops_vector (stderr, ops);
6477 }
6478
6479 /* Bubble up return status from reassociate_bb.  */
6480
6481 static bool
6482 do_reassoc (void)
6483 {
6484   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6485   return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6486 }
6487
6488 /* Initialize the reassociation pass.  */
6489
6490 static void
6491 init_reassoc (void)
6492 {
6493   int i;
6494   long rank = 2;
6495   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6496
6497   /* Find the loops, so that we can prevent moving calculations in
6498      them.  */
6499   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6500
6501   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6502
6503   next_operand_entry_id = 0;
6504
6505   /* Reverse RPO (Reverse Post Order) will give us something where
6506      deeper loops come later.  */
6507   pre_and_rev_post_order_compute (NULL, bbs, false);
6508   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
6509   operand_rank = new hash_map<tree, long>;
6510
6511   /* Give each default definition a distinct rank.  This includes
6512      parameters and the static chain.  Walk backwards over all
6513      SSA names so that we get proper rank ordering according
6514      to tree_swap_operands_p.  */
6515   for (i = num_ssa_names - 1; i > 0; --i)
6516     {
6517       tree name = ssa_name (i);
6518       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6519         insert_operand_rank (name, ++rank);
6520     }
6521
6522   /* Set up rank for each BB  */
6523   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6524     bb_rank[bbs[i]] = ++rank << 16;
6525
6526   free (bbs);
6527   calculate_dominance_info (CDI_POST_DOMINATORS);
6528   plus_negates = vNULL;
6529 }
6530
6531 /* Cleanup after the reassociation pass, and print stats if
6532    requested.  */
6533
6534 static void
6535 fini_reassoc (void)
6536 {
6537   statistics_counter_event (cfun, "Linearized",
6538                             reassociate_stats.linearized);
6539   statistics_counter_event (cfun, "Constants eliminated",
6540                             reassociate_stats.constants_eliminated);
6541   statistics_counter_event (cfun, "Ops eliminated",
6542                             reassociate_stats.ops_eliminated);
6543   statistics_counter_event (cfun, "Statements rewritten",
6544                             reassociate_stats.rewritten);
6545   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6546                             reassociate_stats.pows_encountered);
6547   statistics_counter_event (cfun, "Built-in powi calls created",
6548                             reassociate_stats.pows_created);
6549
6550   delete operand_rank;
6551   operand_entry_pool.release ();
6552   free (bb_rank);
6553   plus_negates.release ();
6554   free_dominance_info (CDI_POST_DOMINATORS);
6555   loop_optimizer_finalize ();
6556 }
6557
6558 /* Gate and execute functions for Reassociation.  If INSERT_POWI_P, enable
6559    insertion of __builtin_powi calls.
6560
6561    Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6562    optimization of a gimple conditional.  Otherwise returns zero.  */
6563
6564 static unsigned int
6565 execute_reassoc (bool insert_powi_p)
6566 {
6567   reassoc_insert_powi_p = insert_powi_p;
6568
6569   init_reassoc ();
6570
6571   bool cfg_cleanup_needed;
6572   cfg_cleanup_needed = do_reassoc ();
6573   repropagate_negates ();
6574   branch_fixup ();
6575
6576   fini_reassoc ();
6577   return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6578 }
6579
6580 namespace {
6581
6582 const pass_data pass_data_reassoc =
6583 {
6584   GIMPLE_PASS, /* type */
6585   "reassoc", /* name */
6586   OPTGROUP_NONE, /* optinfo_flags */
6587   TV_TREE_REASSOC, /* tv_id */
6588   ( PROP_cfg | PROP_ssa ), /* properties_required */
6589   0, /* properties_provided */
6590   0, /* properties_destroyed */
6591   0, /* todo_flags_start */
6592   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6593 };
6594
6595 class pass_reassoc : public gimple_opt_pass
6596 {
6597 public:
6598   pass_reassoc (gcc::context *ctxt)
6599     : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6600   {}
6601
6602   /* opt_pass methods: */
6603   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
6604   void set_pass_param (unsigned int n, bool param)
6605     {
6606       gcc_assert (n == 0);
6607       insert_powi_p = param;
6608     }
6609   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
6610   virtual unsigned int execute (function *)
6611     { return execute_reassoc (insert_powi_p); }
6612
6613  private:
6614   /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
6615      point 3a in the pass header comment.  */
6616   bool insert_powi_p;
6617 }; // class pass_reassoc
6618
6619 } // anon namespace
6620
6621 gimple_opt_pass *
6622 make_pass_reassoc (gcc::context *ctxt)
6623 {
6624   return new pass_reassoc (ctxt);
6625 }