gcc/tree-ssa-math-opts.cc

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005-2022 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  21    operations.  These are common in sequences such as this one:
  22
  23         modulus = sqrt(x*x + y*y + z*z);
  24         x = x / modulus;
  25         y = y / modulus;
  26         z = z / modulus;
  27
  28    that can be optimized to
  29
  30         modulus = sqrt(x*x + y*y + z*z);
  31         rmodulus = 1.0 / modulus;
  32         x = x * rmodulus;
  33         y = y * rmodulus;
  34         z = z * rmodulus;
  35
  36    We do this for loop invariant divisors, and with this pass whenever
  37    we notice that a division has the same divisor multiple times.
  38
  39    Of course, like in PRE, we don't insert a division if a dominator
  40    already has one.  However, this cannot be done as an extension of
  41    PRE for several reasons.
  42
  43    First of all, with some experiments it was found out that the
  44    transformation is not always useful if there are only two divisions
  45    by the same divisor.  This is probably because modern processors
  46    can pipeline the divisions; on older, in-order processors it should
  47    still be effective to optimize two divisions by the same number.
  48    We make this a param, and it shall be called N in the remainder of
  49    this comment.
  50
  51    Second, if trapping math is active, we have less freedom on where
  52    to insert divisions: we can only do so in basic blocks that already
  53    contain one.  (If divisions don't trap, instead, we can insert
  54    divisions elsewhere, which will be in blocks that are common dominators
  55    of those that have the division).
  56
  57    We really don't want to compute the reciprocal unless a division will
  58    be found.  To do this, we won't insert the division in a basic block
  59    that has less than N divisions *post-dominating* it.
  60
  61    The algorithm constructs a subset of the dominator tree, holding the
  62    blocks containing the divisions and the common dominators to them,
  63    and walk it twice.  The first walk is in post-order, and it annotates
  64    each block with the number of divisions that post-dominate it: this
  65    gives information on where divisions can be inserted profitably.
  66    The second walk is in pre-order, and it inserts divisions as explained
  67    above, and replaces divisions by multiplications.
  68
  69    In the best case, the cost of the pass is O(n_statements).  In the
  70    worst-case, the cost is due to creating the dominator tree subset,
  71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  72    for n_statements / n_basic_blocks statements.  So, the amortized cost
  73    of creating the dominator tree subset is O(n_basic_blocks) and the
  74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  75
  76    More practically, the cost will be small because there are few
  77    divisions, and they tend to be in the same basic block, so insert_bb
  78    is called very few times.
  79
  80    If we did this using domwalk.cc, an efficient implementation would have
  81    to work on all the variables in a single pass, because we could not
  82    work on just a subset of the dominator tree, as we do now, and the
  83    cost would also be something like O(n_statements * n_basic_blocks).
  84    The data structures would be more complex in order to work on all the
  85    variables in a single pass.  */
  86
  87 #include "config.h"
  88 #include "system.h"
  89 #include "coretypes.h"
  90 #include "backend.h"
  91 #include "target.h"
  92 #include "rtl.h"
  93 #include "tree.h"
  94 #include "gimple.h"
  95 #include "predict.h"
  96 #include "alloc-pool.h"
  97 #include "tree-pass.h"
  98 #include "ssa.h"
  99 #include "optabs-tree.h"
 100 #include "gimple-pretty-print.h"
 101 #include "alias.h"
 102 #include "fold-const.h"
 103 #include "gimple-iterator.h"
 104 #include "gimple-fold.h"
 105 #include "gimplify.h"
 106 #include "gimplify-me.h"
 107 #include "stor-layout.h"
 108 #include "tree-cfg.h"
 109 #include "tree-dfa.h"
 110 #include "tree-ssa.h"
 111 #include "builtins.h"
 112 #include "internal-fn.h"
 113 #include "case-cfn-macros.h"
 114 #include "optabs-libfuncs.h"
 115 #include "tree-eh.h"
 116 #include "targhooks.h"
 117 #include "domwalk.h"
 118 #include "tree-ssa-math-opts.h"
 119
 120 /* This structure represents one basic block that either computes a
 121    division, or is a common dominator for basic block that compute a
 122    division.  */
 123 struct occurrence {
 124   /* The basic block represented by this structure.  */
 125   basic_block bb = basic_block();
 126
 127   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 128      inserted in BB.  */
 129   tree recip_def = tree();
 130
 131   /* If non-NULL, the SSA_NAME holding the definition for a squared
 132      reciprocal inserted in BB.  */
 133   tree square_recip_def = tree();
 134
 135   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 136      was inserted in BB.  */
 137   gimple *recip_def_stmt = nullptr;
 138
 139   /* Pointer to a list of "struct occurrence"s for blocks dominated
 140      by BB.  */
 141   struct occurrence *children = nullptr;
 142
 143   /* Pointer to the next "struct occurrence"s in the list of blocks
 144      sharing a common dominator.  */
 145   struct occurrence *next = nullptr;
 146
 147   /* The number of divisions that are in BB before compute_merit.  The
 148      number of divisions that are in BB or post-dominate it after
 149      compute_merit.  */
 150   int num_divisions = 0;
 151
 152   /* True if the basic block has a division, false if it is a common
 153      dominator for basic blocks that do.  If it is false and trapping
 154      math is active, BB is not a candidate for inserting a reciprocal.  */
 155   bool bb_has_division = false;
 156
 157   /* Construct a struct occurrence for basic block BB, and whose
 158      children list is headed by CHILDREN.  */
 159   occurrence (basic_block bb, struct occurrence *children)
 160   : bb (bb), children (children)
 161   {
 162     bb->aux = this;
 163   }
 164
 165   /* Destroy a struct occurrence and remove it from its basic block.  */
 166   ~occurrence ()
 167   {
 168     bb->aux = nullptr;
 169   }
 170
 171   /* Allocate memory for a struct occurrence from OCC_POOL.  */
 172   static void* operator new (size_t);
 173
 174   /* Return memory for a struct occurrence to OCC_POOL.  */
 175   static void operator delete (void*, size_t);
 176 };
 177
 178 static struct
 179 {
 180   /* Number of 1.0/X ops inserted.  */
 181   int rdivs_inserted;
 182
 183   /* Number of 1.0/FUNC ops inserted.  */
 184   int rfuncs_inserted;
 185 } reciprocal_stats;
 186
 187 static struct
 188 {
 189   /* Number of cexpi calls inserted.  */
 190   int inserted;
 191
 192   /* Number of conversions removed.  */
 193   int conv_removed;
 194
 195 } sincos_stats;
 196
 197 static struct
 198 {
 199   /* Number of widening multiplication ops inserted.  */
 200   int widen_mults_inserted;
 201
 202   /* Number of integer multiply-and-accumulate ops inserted.  */
 203   int maccs_inserted;
 204
 205   /* Number of fp fused multiply-add ops inserted.  */
 206   int fmas_inserted;
 207
 208   /* Number of divmod calls inserted.  */
 209   int divmod_calls_inserted;
 210
 211   /* Number of highpart multiplication ops inserted.  */
 212   int highpart_mults_inserted;
 213 } widen_mul_stats;
 214
 215 /* The instance of "struct occurrence" representing the highest
 216    interesting block in the dominator tree.  */
 217 static struct occurrence *occ_head;
 218
 219 /* Allocation pool for getting instances of "struct occurrence".  */
 220 static object_allocator<occurrence> *occ_pool;
 221
 222 void* occurrence::operator new (size_t n)
 223 {
 224   gcc_assert (n == sizeof(occurrence));
 225   return occ_pool->allocate_raw ();
 226 }
 227
 228 void occurrence::operator delete (void *occ, size_t n)
 229 {
 230   gcc_assert (n == sizeof(occurrence));
 231   occ_pool->remove_raw (occ);
 232 }
 233
 234 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 235    list of "struct occurrence"s, one per basic block, having IDOM as
 236    their common dominator.
 237
 238    We try to insert NEW_OCC as deep as possible in the tree, and we also
 239    insert any other block that is a common dominator for BB and one
 240    block already in the tree.  */
 241
 242 static void
 243 insert_bb (struct occurrence *new_occ, basic_block idom,
 244            struct occurrence **p_head)
 245 {
 246   struct occurrence *occ, **p_occ;
 247
 248   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 249     {
 250       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 251       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 252       if (dom == bb)
 253         {
 254           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 255              from its list.  */
 256           *p_occ = occ->next;
 257           occ->next = new_occ->children;
 258           new_occ->children = occ;
 259
 260           /* Try the next block (it may as well be dominated by BB).  */
 261         }
 262
 263       else if (dom == occ_bb)
 264         {
 265           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 266           insert_bb (new_occ, dom, &occ->children);
 267           return;
 268         }
 269
 270       else if (dom != idom)
 271         {
 272           gcc_assert (!dom->aux);
 273
 274           /* There is a dominator between IDOM and BB, add it and make
 275              two children out of NEW_OCC and OCC.  First, remove OCC from
 276              its list.  */
 277           *p_occ = occ->next;
 278           new_occ->next = occ;
 279           occ->next = NULL;
 280
 281           /* None of the previous blocks has DOM as a dominator: if we tail
 282              recursed, we would reexamine them uselessly. Just switch BB with
 283              DOM, and go on looking for blocks dominated by DOM.  */
 284           new_occ = new occurrence (dom, new_occ);
 285         }
 286
 287       else
 288         {
 289           /* Nothing special, go on with the next element.  */
 290           p_occ = &occ->next;
 291         }
 292     }
 293
 294   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 295   new_occ->next = *p_head;
 296   *p_head = new_occ;
 297 }
 298
 299 /* Register that we found a division in BB.
 300    IMPORTANCE is a measure of how much weighting to give
 301    that division.  Use IMPORTANCE = 2 to register a single
 302    division.  If the division is going to be found multiple
 303    times use 1 (as it is with squares).  */
 304
 305 static inline void
 306 register_division_in (basic_block bb, int importance)
 307 {
 308   struct occurrence *occ;
 309
 310   occ = (struct occurrence *) bb->aux;
 311   if (!occ)
 312     {
 313       occ = new occurrence (bb, NULL);
 314       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
 315     }
 316
 317   occ->bb_has_division = true;
 318   occ->num_divisions += importance;
 319 }
 320
 321
 322 /* Compute the number of divisions that postdominate each block in OCC and
 323    its children.  */
 324
 325 static void
 326 compute_merit (struct occurrence *occ)
 327 {
 328   struct occurrence *occ_child;
 329   basic_block dom = occ->bb;
 330
 331   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 332     {
 333       basic_block bb;
 334       if (occ_child->children)
 335         compute_merit (occ_child);
 336
 337       if (flag_exceptions)
 338         bb = single_noncomplex_succ (dom);
 339       else
 340         bb = dom;
 341
 342       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 343         occ->num_divisions += occ_child->num_divisions;
 344     }
 345 }
 346
 347
 348 /* Return whether USE_STMT is a floating-point division by DEF.  */
 349 static inline bool
 350 is_division_by (gimple *use_stmt, tree def)
 351 {
 352   return is_gimple_assign (use_stmt)
 353          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 354          && gimple_assign_rhs2 (use_stmt) == def
 355          /* Do not recognize x / x as valid division, as we are getting
 356             confused later by replacing all immediate uses x in such
 357             a stmt.  */
 358          && gimple_assign_rhs1 (use_stmt) != def
 359          && !stmt_can_throw_internal (cfun, use_stmt);
 360 }
 361
 362 /* Return TRUE if USE_STMT is a multiplication of DEF by A.  */
 363 static inline bool
 364 is_mult_by (gimple *use_stmt, tree def, tree a)
 365 {
 366   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 367       && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
 368     {
 369       tree op0 = gimple_assign_rhs1 (use_stmt);
 370       tree op1 = gimple_assign_rhs2 (use_stmt);
 371
 372       return (op0 == def && op1 == a)
 373               || (op0 == a && op1 == def);
 374     }
 375   return 0;
 376 }
 377
 378 /* Return whether USE_STMT is DEF * DEF.  */
 379 static inline bool
 380 is_square_of (gimple *use_stmt, tree def)
 381 {
 382   return is_mult_by (use_stmt, def, def);
 383 }
 384
 385 /* Return whether USE_STMT is a floating-point division by
 386    DEF * DEF.  */
 387 static inline bool
 388 is_division_by_square (gimple *use_stmt, tree def)
 389 {
 390   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 391       && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 392       && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
 393       && !stmt_can_throw_internal (cfun, use_stmt))
 394     {
 395       tree denominator = gimple_assign_rhs2 (use_stmt);
 396       if (TREE_CODE (denominator) == SSA_NAME)
 397         return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
 398     }
 399   return 0;
 400 }
 401
 402 /* Walk the subset of the dominator tree rooted at OCC, setting the
 403    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 404    the given basic block.  The field may be left NULL, of course,
 405    if it is not possible or profitable to do the optimization.
 406
 407    DEF_BSI is an iterator pointing at the statement defining DEF.
 408    If RECIP_DEF is set, a dominator already has a computation that can
 409    be used.
 410
 411    If should_insert_square_recip is set, then this also inserts
 412    the square of the reciprocal immediately after the definition
 413    of the reciprocal.  */
 414
 415 static void
 416 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 417                     tree def, tree recip_def, tree square_recip_def,
 418                     int should_insert_square_recip, int threshold)
 419 {
 420   tree type;
 421   gassign *new_stmt, *new_square_stmt;
 422   gimple_stmt_iterator gsi;
 423   struct occurrence *occ_child;
 424
 425   if (!recip_def
 426       && (occ->bb_has_division || !flag_trapping_math)
 427       /* Divide by two as all divisions are counted twice in
 428          the costing loop.  */
 429       && occ->num_divisions / 2 >= threshold)
 430     {
 431       /* Make a variable with the replacement and substitute it.  */
 432       type = TREE_TYPE (def);
 433       recip_def = create_tmp_reg (type, "reciptmp");
 434       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
 435                                       build_one_cst (type), def);
 436
 437       if (should_insert_square_recip)
 438         {
 439           square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
 440           new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
 441                                                  recip_def, recip_def);
 442         }
 443
 444       if (occ->bb_has_division)
 445         {
 446           /* Case 1: insert before an existing division.  */
 447           gsi = gsi_after_labels (occ->bb);
 448           while (!gsi_end_p (gsi)
 449                  && (!is_division_by (gsi_stmt (gsi), def))
 450                  && (!is_division_by_square (gsi_stmt (gsi), def)))
 451             gsi_next (&gsi);
 452
 453           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 454           if (should_insert_square_recip)
 455             gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
 456         }
 457       else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
 458         {
 459           /* Case 2: insert right after the definition.  Note that this will
 460              never happen if the definition statement can throw, because in
 461              that case the sole successor of the statement's basic block will
 462              dominate all the uses as well.  */
 463           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 464           if (should_insert_square_recip)
 465             gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
 466         }
 467       else
 468         {
 469           /* Case 3: insert in a basic block not containing defs/uses.  */
 470           gsi = gsi_after_labels (occ->bb);
 471           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 472           if (should_insert_square_recip)
 473             gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
 474         }
 475
 476       reciprocal_stats.rdivs_inserted++;
 477
 478       occ->recip_def_stmt = new_stmt;
 479     }
 480
 481   occ->recip_def = recip_def;
 482   occ->square_recip_def = square_recip_def;
 483   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 484     insert_reciprocals (def_gsi, occ_child, def, recip_def,
 485                         square_recip_def, should_insert_square_recip,
 486                         threshold);
 487 }
 488
 489 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
 490    Take as argument the use for (x * x).  */
 491 static inline void
 492 replace_reciprocal_squares (use_operand_p use_p)
 493 {
 494   gimple *use_stmt = USE_STMT (use_p);
 495   basic_block bb = gimple_bb (use_stmt);
 496   struct occurrence *occ = (struct occurrence *) bb->aux;
 497
 498   if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
 499       && occ->recip_def)
 500     {
 501       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 502       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 503       gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
 504       SET_USE (use_p, occ->square_recip_def);
 505       fold_stmt_inplace (&gsi);
 506       update_stmt (use_stmt);
 507     }
 508 }
 509
 510
 511 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 512    possible.  */
 513
 514 static inline void
 515 replace_reciprocal (use_operand_p use_p)
 516 {
 517   gimple *use_stmt = USE_STMT (use_p);
 518   basic_block bb = gimple_bb (use_stmt);
 519   struct occurrence *occ = (struct occurrence *) bb->aux;
 520
 521   if (optimize_bb_for_speed_p (bb)
 522       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 523     {
 524       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 525       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 526       SET_USE (use_p, occ->recip_def);
 527       fold_stmt_inplace (&gsi);
 528       update_stmt (use_stmt);
 529     }
 530 }
 531
 532
 533 /* Free OCC and return one more "struct occurrence" to be freed.  */
 534
 535 static struct occurrence *
 536 free_bb (struct occurrence *occ)
 537 {
 538   struct occurrence *child, *next;
 539
 540   /* First get the two pointers hanging off OCC.  */
 541   next = occ->next;
 542   child = occ->children;
 543   delete occ;
 544
 545   /* Now ensure that we don't recurse unless it is necessary.  */
 546   if (!child)
 547     return next;
 548   else
 549     {
 550       while (next)
 551         next = free_bb (next);
 552
 553       return child;
 554     }
 555 }
 556
 557 /* Transform sequences like
 558    t = sqrt (a)
 559    x = 1.0 / t;
 560    r1 = x * x;
 561    r2 = a * x;
 562    into:
 563    t = sqrt (a)
 564    r1 = 1.0 / a;
 565    r2 = t;
 566    x = r1 * r2;
 567    depending on the uses of x, r1, r2.  This removes one multiplication and
 568    allows the sqrt and division operations to execute in parallel.
 569    DEF_GSI is the gsi of the initial division by sqrt that defines
 570    DEF (x in the example above).  */
 571
 572 static void
 573 optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
 574 {
 575   gimple *use_stmt;
 576   imm_use_iterator use_iter;
 577   gimple *stmt = gsi_stmt (*def_gsi);
 578   tree x = def;
 579   tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
 580   tree div_rhs1 = gimple_assign_rhs1 (stmt);
 581
 582   if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
 583       || TREE_CODE (div_rhs1) != REAL_CST
 584       || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
 585     return;
 586
 587   gcall *sqrt_stmt
 588     = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
 589
 590   if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
 591     return;
 592
 593   switch (gimple_call_combined_fn (sqrt_stmt))
 594     {
 595     CASE_CFN_SQRT:
 596     CASE_CFN_SQRT_FN:
 597       break;
 598
 599     default:
 600       return;
 601     }
 602   tree a = gimple_call_arg (sqrt_stmt, 0);
 603
 604   /* We have 'a' and 'x'.  Now analyze the uses of 'x'.  */
 605
 606   /* Statements that use x in x * x.  */
 607   auto_vec<gimple *> sqr_stmts;
 608   /* Statements that use x in a * x.  */
 609   auto_vec<gimple *> mult_stmts;
 610   bool has_other_use = false;
 611   bool mult_on_main_path = false;
 612
 613   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
 614     {
 615       if (is_gimple_debug (use_stmt))
 616         continue;
 617       if (is_square_of (use_stmt, x))
 618         {
 619           sqr_stmts.safe_push (use_stmt);
 620           if (gimple_bb (use_stmt) == gimple_bb (stmt))
 621             mult_on_main_path = true;
 622         }
 623       else if (is_mult_by (use_stmt, x, a))
 624         {
 625           mult_stmts.safe_push (use_stmt);
 626           if (gimple_bb (use_stmt) == gimple_bb (stmt))
 627             mult_on_main_path = true;
 628         }
 629       else
 630         has_other_use = true;
 631     }
 632
 633   /* In the x * x and a * x cases we just rewire stmt operands or
 634      remove multiplications.  In the has_other_use case we introduce
 635      a multiplication so make sure we don't introduce a multiplication
 636      on a path where there was none.  */
 637   if (has_other_use && !mult_on_main_path)
 638     return;
 639
 640   if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
 641     return;
 642
 643   /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
 644      to be able to compose it from the sqr and mult cases.  */
 645   if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
 646     return;
 647
 648   if (dump_file)
 649     {
 650       fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
 651       print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
 652       print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 653       fprintf (dump_file, "\n");
 654     }
 655
 656   bool delete_div = !has_other_use;
 657   tree sqr_ssa_name = NULL_TREE;
 658   if (!sqr_stmts.is_empty ())
 659     {
 660       /* r1 = x * x.  Transform the original
 661          x = 1.0 / t
 662          into
 663          tmp1 = 1.0 / a
 664          r1 = tmp1.  */
 665
 666       sqr_ssa_name
 667         = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
 668
 669       if (dump_file)
 670         {
 671           fprintf (dump_file, "Replacing original division\n");
 672           print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 673           fprintf (dump_file, "with new division\n");
 674         }
 675       stmt
 676         = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
 677                                gimple_assign_rhs1 (stmt), a);
 678       gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
 679       gsi_remove (def_gsi, true);
 680       *def_gsi = gsi_for_stmt (stmt);
 681       fold_stmt_inplace (def_gsi);
 682       update_stmt (stmt);
 683
 684       if (dump_file)
 685         print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 686
 687       delete_div = false;
 688       gimple *sqr_stmt;
 689       unsigned int i;
 690       FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
 691         {
 692           gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
 693           gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
 694           update_stmt (sqr_stmt);
 695         }
 696     }
 697   if (!mult_stmts.is_empty ())
 698     {
 699       /* r2 = a * x.  Transform this into:
 700          r2 = t (The original sqrt (a)).  */
 701       unsigned int i;
 702       gimple *mult_stmt = NULL;
 703       FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
 704         {
 705           gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
 706
 707           if (dump_file)
 708             {
 709               fprintf (dump_file, "Replacing squaring multiplication\n");
 710               print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
 711               fprintf (dump_file, "with assignment\n");
 712             }
 713           gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
 714           fold_stmt_inplace (&gsi2);
 715           update_stmt (mult_stmt);
 716           if (dump_file)
 717             print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
 718       }
 719     }
 720
 721   if (has_other_use)
 722     {
 723       /* Using the two temporaries tmp1, tmp2 from above
 724          the original x is now:
 725          x = tmp1 * tmp2.  */
 726       gcc_assert (orig_sqrt_ssa_name);
 727       gcc_assert (sqr_ssa_name);
 728
 729       gimple *new_stmt
 730         = gimple_build_assign (x, MULT_EXPR,
 731                                orig_sqrt_ssa_name, sqr_ssa_name);
 732       gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 733       update_stmt (stmt);
 734     }
 735   else if (delete_div)
 736     {
 737       /* Remove the original division.  */
 738       gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
 739       gsi_remove (&gsi2, true);
 740       release_defs (stmt);
 741     }
 742   else
 743     release_ssa_name (x);
 744 }
 745
 746 /* Look for floating-point divisions among DEF's uses, and try to
 747    replace them by multiplications with the reciprocal.  Add
 748    as many statements computing the reciprocal as needed.
 749
 750    DEF must be a GIMPLE register of a floating-point type.  */
 751
 752 static void
 753 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 754 {
 755   use_operand_p use_p, square_use_p;
 756   imm_use_iterator use_iter, square_use_iter;
 757   tree square_def;
 758   struct occurrence *occ;
 759   int count = 0;
 760   int threshold;
 761   int square_recip_count = 0;
 762   int sqrt_recip_count = 0;
 763
 764   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
 765   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 766
 767   /* If DEF is a square (x * x), count the number of divisions by x.
 768      If there are more divisions by x than by (DEF * DEF), prefer to optimize
 769      the reciprocal of x instead of DEF.  This improves cases like:
 770        def = x * x
 771        t0 = a / def
 772        t1 = b / def
 773        t2 = c / x
 774      Reciprocal optimization of x results in 1 division rather than 2 or 3.  */
 775   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
 776
 777   if (is_gimple_assign (def_stmt)
 778       && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
 779       && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
 780       && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
 781     {
 782       tree op0 = gimple_assign_rhs1 (def_stmt);
 783
 784       FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
 785         {
 786           gimple *use_stmt = USE_STMT (use_p);
 787           if (is_division_by (use_stmt, op0))
 788             sqrt_recip_count++;
 789         }
 790     }
 791
 792   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 793     {
 794       gimple *use_stmt = USE_STMT (use_p);
 795       if (is_division_by (use_stmt, def))
 796         {
 797           register_division_in (gimple_bb (use_stmt), 2);
 798           count++;
 799         }
 800
 801       if (is_square_of (use_stmt, def))
 802         {
 803           square_def = gimple_assign_lhs (use_stmt);
 804           FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
 805             {
 806               gimple *square_use_stmt = USE_STMT (square_use_p);
 807               if (is_division_by (square_use_stmt, square_def))
 808                 {
 809                   /* This is executed twice for each division by a square.  */
 810                   register_division_in (gimple_bb (square_use_stmt), 1);
 811                   square_recip_count++;
 812                 }
 813             }
 814         }
 815     }
 816
 817   /* Square reciprocals were counted twice above.  */
 818   square_recip_count /= 2;
 819
 820   /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x).  */
 821   if (sqrt_recip_count > square_recip_count)
 822     goto out;
 823
 824   /* Do the expensive part only if we can hope to optimize something.  */
 825   if (count + square_recip_count >= threshold && count >= 1)
 826     {
 827       gimple *use_stmt;
 828       for (occ = occ_head; occ; occ = occ->next)
 829         {
 830           compute_merit (occ);
 831           insert_reciprocals (def_gsi, occ, def, NULL, NULL,
 832                               square_recip_count, threshold);
 833         }
 834
 835       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 836         {
 837           if (is_division_by (use_stmt, def))
 838             {
 839               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 840                 replace_reciprocal (use_p);
 841             }
 842           else if (square_recip_count > 0 && is_square_of (use_stmt, def))
 843             {
 844               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 845                 {
 846                   /* Find all uses of the square that are divisions and
 847                    * replace them by multiplications with the inverse.  */
 848                   imm_use_iterator square_iterator;
 849                   gimple *powmult_use_stmt = USE_STMT (use_p);
 850                   tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
 851
 852                   FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
 853                                          square_iterator, powmult_def_name)
 854                     FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
 855                       {
 856                         gimple *powmult_use_stmt = USE_STMT (square_use_p);
 857                         if (is_division_by (powmult_use_stmt, powmult_def_name))
 858                           replace_reciprocal_squares (square_use_p);
 859                       }
 860                 }
 861             }
 862         }
 863     }
 864
 865 out:
 866   for (occ = occ_head; occ; )
 867     occ = free_bb (occ);
 868
 869   occ_head = NULL;
 870 }
 871
 872 /* Return an internal function that implements the reciprocal of CALL,
 873    or IFN_LAST if there is no such function that the target supports.  */
 874
 875 internal_fn
 876 internal_fn_reciprocal (gcall *call)
 877 {
 878   internal_fn ifn;
 879
 880   switch (gimple_call_combined_fn (call))
 881     {
 882     CASE_CFN_SQRT:
 883     CASE_CFN_SQRT_FN:
 884       ifn = IFN_RSQRT;
 885       break;
 886
 887     default:
 888       return IFN_LAST;
 889     }
 890
 891   tree_pair types = direct_internal_fn_types (ifn, call);
 892   if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
 893     return IFN_LAST;
 894
 895   return ifn;
 896 }
 897
 898 /* Go through all the floating-point SSA_NAMEs, and call
 899    execute_cse_reciprocals_1 on each of them.  */
 900 namespace {
 901
 902 const pass_data pass_data_cse_reciprocals =
 903 {
 904   GIMPLE_PASS, /* type */
 905   "recip", /* name */
 906   OPTGROUP_NONE, /* optinfo_flags */
 907   TV_TREE_RECIP, /* tv_id */
 908   PROP_ssa, /* properties_required */
 909   0, /* properties_provided */
 910   0, /* properties_destroyed */
 911   0, /* todo_flags_start */
 912   TODO_update_ssa, /* todo_flags_finish */
 913 };
 914
 915 class pass_cse_reciprocals : public gimple_opt_pass
 916 {
 917 public:
 918   pass_cse_reciprocals (gcc::context *ctxt)
 919     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
 920   {}
 921
 922   /* opt_pass methods: */
 923   virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
 924   virtual unsigned int execute (function *);
 925
 926 }; // class pass_cse_reciprocals
 927
 928 unsigned int
 929 pass_cse_reciprocals::execute (function *fun)
 930 {
 931   basic_block bb;
 932   tree arg;
 933
 934   occ_pool = new object_allocator<occurrence> ("dominators for recip");
 935
 936   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 937   calculate_dominance_info (CDI_DOMINATORS);
 938   calculate_dominance_info (CDI_POST_DOMINATORS);
 939
 940   if (flag_checking)
 941     FOR_EACH_BB_FN (bb, fun)
 942       gcc_assert (!bb->aux);
 943
 944   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
 945     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 946         && is_gimple_reg (arg))
 947       {
 948         tree name = ssa_default_def (fun, arg);
 949         if (name)
 950           execute_cse_reciprocals_1 (NULL, name);
 951       }
 952
 953   FOR_EACH_BB_FN (bb, fun)
 954     {
 955       tree def;
 956
 957       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 958            gsi_next (&gsi))
 959         {
 960           gphi *phi = gsi.phi ();
 961           def = PHI_RESULT (phi);
 962           if (! virtual_operand_p (def)
 963               && FLOAT_TYPE_P (TREE_TYPE (def)))
 964             execute_cse_reciprocals_1 (NULL, def);
 965         }
 966
 967       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 968            gsi_next (&gsi))
 969         {
 970           gimple *stmt = gsi_stmt (gsi);
 971
 972           if (gimple_has_lhs (stmt)
 973               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 974               && FLOAT_TYPE_P (TREE_TYPE (def))
 975               && TREE_CODE (def) == SSA_NAME)
 976             {
 977               execute_cse_reciprocals_1 (&gsi, def);
 978               stmt = gsi_stmt (gsi);
 979               if (flag_unsafe_math_optimizations
 980                   && is_gimple_assign (stmt)
 981                   && gimple_assign_lhs (stmt) == def
 982                   && !stmt_can_throw_internal (cfun, stmt)
 983                   && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 984                 optimize_recip_sqrt (&gsi, def);
 985             }
 986         }
 987
 988       if (optimize_bb_for_size_p (bb))
 989         continue;
 990
 991       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 992       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 993            gsi_next (&gsi))
 994         {
 995           gimple *stmt = gsi_stmt (gsi);
 996
 997           if (is_gimple_assign (stmt)
 998               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 999             {
1000               tree arg1 = gimple_assign_rhs2 (stmt);
1001               gimple *stmt1;
1002
1003               if (TREE_CODE (arg1) != SSA_NAME)
1004                 continue;
1005
1006               stmt1 = SSA_NAME_DEF_STMT (arg1);
1007
1008               if (is_gimple_call (stmt1)
1009                   && gimple_call_lhs (stmt1))
1010                 {
1011                   bool fail;
1012                   imm_use_iterator ui;
1013                   use_operand_p use_p;
1014                   tree fndecl = NULL_TREE;
1015
1016                   gcall *call = as_a <gcall *> (stmt1);
1017                   internal_fn ifn = internal_fn_reciprocal (call);
1018                   if (ifn == IFN_LAST)
1019                     {
1020                       fndecl = gimple_call_fndecl (call);
1021                       if (!fndecl
1022                           || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1023                         continue;
1024                       fndecl = targetm.builtin_reciprocal (fndecl);
1025                       if (!fndecl)
1026                         continue;
1027                     }
1028
1029                   /* Check that all uses of the SSA name are divisions,
1030                      otherwise replacing the defining statement will do
1031                      the wrong thing.  */
1032                   fail = false;
1033                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1034                     {
1035                       gimple *stmt2 = USE_STMT (use_p);
1036                       if (is_gimple_debug (stmt2))
1037                         continue;
1038                       if (!is_gimple_assign (stmt2)
1039                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1040                           || gimple_assign_rhs1 (stmt2) == arg1
1041                           || gimple_assign_rhs2 (stmt2) != arg1)
1042                         {
1043                           fail = true;
1044                           break;
1045                         }
1046                     }
1047                   if (fail)
1048                     continue;
1049
1050                   gimple_replace_ssa_lhs (call, arg1);
1051                   if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1052                     {
1053                       auto_vec<tree, 4> args;
1054                       for (unsigned int i = 0;
1055                            i < gimple_call_num_args (call); i++)
1056                         args.safe_push (gimple_call_arg (call, i));
1057                       gcall *stmt2;
1058                       if (ifn == IFN_LAST)
1059                         stmt2 = gimple_build_call_vec (fndecl, args);
1060                       else
1061                         stmt2 = gimple_build_call_internal_vec (ifn, args);
1062                       gimple_call_set_lhs (stmt2, arg1);
1063                       gimple_move_vops (stmt2, call);
1064                       gimple_call_set_nothrow (stmt2,
1065                                                gimple_call_nothrow_p (call));
1066                       gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1067                       gsi_replace (&gsi2, stmt2, true);
1068                     }
1069                   else
1070                     {
1071                       if (ifn == IFN_LAST)
1072                         gimple_call_set_fndecl (call, fndecl);
1073                       else
1074                         gimple_call_set_internal_fn (call, ifn);
1075                       update_stmt (call);
1076                     }
1077                   reciprocal_stats.rfuncs_inserted++;
1078
1079                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1080                     {
1081                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1082                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1083                       fold_stmt_inplace (&gsi);
1084                       update_stmt (stmt);
1085                     }
1086                 }
1087             }
1088         }
1089     }
1090
1091   statistics_counter_event (fun, "reciprocal divs inserted",
1092                             reciprocal_stats.rdivs_inserted);
1093   statistics_counter_event (fun, "reciprocal functions inserted",
1094                             reciprocal_stats.rfuncs_inserted);
1095
1096   free_dominance_info (CDI_DOMINATORS);
1097   free_dominance_info (CDI_POST_DOMINATORS);
1098   delete occ_pool;
1099   return 0;
1100 }
1101
1102 } // anon namespace
1103
1104 gimple_opt_pass *
1105 make_pass_cse_reciprocals (gcc::context *ctxt)
1106 {
1107   return new pass_cse_reciprocals (ctxt);
1108 }
1109
1110 /* If NAME is the result of a type conversion, look for other
1111    equivalent dominating or dominated conversions, and replace all
1112    uses with the earliest dominating name, removing the redundant
1113    conversions.  Return the prevailing name.  */
1114
1115 static tree
1116 execute_cse_conv_1 (tree name, bool *cfg_changed)
1117 {
1118   if (SSA_NAME_IS_DEFAULT_DEF (name)
1119       || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1120     return name;
1121
1122   gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1123
1124   if (!gimple_assign_cast_p (def_stmt))
1125     return name;
1126
1127   tree src = gimple_assign_rhs1 (def_stmt);
1128
1129   if (TREE_CODE (src) != SSA_NAME)
1130     return name;
1131
1132   imm_use_iterator use_iter;
1133   gimple *use_stmt;
1134
1135   /* Find the earliest dominating def.    */
1136   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1137     {
1138       if (use_stmt == def_stmt
1139           || !gimple_assign_cast_p (use_stmt))
1140         continue;
1141
1142       tree lhs = gimple_assign_lhs (use_stmt);
1143
1144       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1145           || (gimple_assign_rhs1 (use_stmt)
1146               != gimple_assign_rhs1 (def_stmt))
1147           || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1148         continue;
1149
1150       bool use_dominates;
1151       if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1152         {
1153           gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1154           while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1155             gsi_next (&gsi);
1156           use_dominates = !gsi_end_p (gsi);
1157         }
1158       else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1159                                gimple_bb (def_stmt)))
1160         use_dominates = false;
1161       else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1162                                gimple_bb (use_stmt)))
1163         use_dominates = true;
1164       else
1165         continue;
1166
1167       if (use_dominates)
1168         {
1169           std::swap (name, lhs);
1170           std::swap (def_stmt, use_stmt);
1171         }
1172     }
1173
1174   /* Now go through all uses of SRC again, replacing the equivalent
1175      dominated conversions.  We may replace defs that were not
1176      dominated by the then-prevailing defs when we first visited
1177      them.  */
1178   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1179     {
1180       if (use_stmt == def_stmt
1181           || !gimple_assign_cast_p (use_stmt))
1182         continue;
1183
1184       tree lhs = gimple_assign_lhs (use_stmt);
1185
1186       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1187           || (gimple_assign_rhs1 (use_stmt)
1188               != gimple_assign_rhs1 (def_stmt))
1189           || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1190         continue;
1191
1192       basic_block use_bb = gimple_bb (use_stmt);
1193       if (gimple_bb (def_stmt) == use_bb
1194           || dominated_by_p (CDI_DOMINATORS, use_bb, gimple_bb (def_stmt)))
1195         {
1196           sincos_stats.conv_removed++;
1197
1198           gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1199           replace_uses_by (lhs, name);
1200           if (gsi_remove (&gsi, true)
1201               && gimple_purge_dead_eh_edges (use_bb))
1202             *cfg_changed = true;
1203           release_defs (use_stmt);
1204         }
1205     }
1206
1207   return name;
1208 }
1209
1210 /* Records an occurrence at statement USE_STMT in the vector of trees
1211    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1212    is not yet initialized.  Returns true if the occurrence was pushed on
1213    the vector.  Adjusts *TOP_BB to be the basic block dominating all
1214    statements in the vector.  */
1215
1216 static bool
1217 maybe_record_sincos (vec<gimple *> *stmts,
1218                      basic_block *top_bb, gimple *use_stmt)
1219 {
1220   basic_block use_bb = gimple_bb (use_stmt);
1221   if (*top_bb
1222       && (*top_bb == use_bb
1223           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1224     stmts->safe_push (use_stmt);
1225   else if (!*top_bb
1226            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1227     {
1228       stmts->safe_push (use_stmt);
1229       *top_bb = use_bb;
1230     }
1231   else
1232     return false;
1233
1234   return true;
1235 }
1236
1237 /* Look for sin, cos and cexpi calls with the same argument NAME and
1238    create a single call to cexpi CSEing the result in this case.
1239    We first walk over all immediate uses of the argument collecting
1240    statements that we can CSE in a vector and in a second pass replace
1241    the statement rhs with a REALPART or IMAGPART expression on the
1242    result of the cexpi call we insert before the use statement that
1243    dominates all other candidates.  */
1244
1245 static bool
1246 execute_cse_sincos_1 (tree name)
1247 {
1248   gimple_stmt_iterator gsi;
1249   imm_use_iterator use_iter;
1250   tree fndecl, res, type = NULL_TREE;
1251   gimple *def_stmt, *use_stmt, *stmt;
1252   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1253   auto_vec<gimple *> stmts;
1254   basic_block top_bb = NULL;
1255   int i;
1256   bool cfg_changed = false;
1257
1258   name = execute_cse_conv_1 (name, &cfg_changed);
1259
1260   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1261     {
1262       if (gimple_code (use_stmt) != GIMPLE_CALL
1263           || !gimple_call_lhs (use_stmt))
1264         continue;
1265
1266       switch (gimple_call_combined_fn (use_stmt))
1267         {
1268         CASE_CFN_COS:
1269           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1270           break;
1271
1272         CASE_CFN_SIN:
1273           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1274           break;
1275
1276         CASE_CFN_CEXPI:
1277           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1278           break;
1279
1280         default:;
1281           continue;
1282         }
1283
1284       tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt));
1285       if (!type)
1286         {
1287           type = t;
1288           t = TREE_TYPE (name);
1289         }
1290       /* This checks that NAME has the right type in the first round,
1291          and, in subsequent rounds, that the built_in type is the same
1292          type, or a compatible type.  */
1293       if (type != t && !types_compatible_p (type, t))
1294         return false;
1295     }
1296   if (seen_cos + seen_sin + seen_cexpi <= 1)
1297     return false;
1298
1299   /* Simply insert cexpi at the beginning of top_bb but not earlier than
1300      the name def statement.  */
1301   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1302   if (!fndecl)
1303     return false;
1304   stmt = gimple_build_call (fndecl, 1, name);
1305   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1306   gimple_call_set_lhs (stmt, res);
1307
1308   def_stmt = SSA_NAME_DEF_STMT (name);
1309   if (!SSA_NAME_IS_DEFAULT_DEF (name)
1310       && gimple_code (def_stmt) != GIMPLE_PHI
1311       && gimple_bb (def_stmt) == top_bb)
1312     {
1313       gsi = gsi_for_stmt (def_stmt);
1314       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1315     }
1316   else
1317     {
1318       gsi = gsi_after_labels (top_bb);
1319       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1320     }
1321   sincos_stats.inserted++;
1322
1323   /* And adjust the recorded old call sites.  */
1324   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1325     {
1326       tree rhs = NULL;
1327
1328       switch (gimple_call_combined_fn (use_stmt))
1329         {
1330         CASE_CFN_COS:
1331           rhs = fold_build1 (REALPART_EXPR, type, res);
1332           break;
1333
1334         CASE_CFN_SIN:
1335           rhs = fold_build1 (IMAGPART_EXPR, type, res);
1336           break;
1337
1338         CASE_CFN_CEXPI:
1339           rhs = res;
1340           break;
1341
1342         default:;
1343           gcc_unreachable ();
1344         }
1345
1346         /* Replace call with a copy.  */
1347         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1348
1349         gsi = gsi_for_stmt (use_stmt);
1350         gsi_replace (&gsi, stmt, true);
1351         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1352           cfg_changed = true;
1353     }
1354
1355   return cfg_changed;
1356 }
1357
1358 /* To evaluate powi(x,n), the floating point value x raised to the
1359    constant integer exponent n, we use a hybrid algorithm that
1360    combines the "window method" with look-up tables.  For an
1361    introduction to exponentiation algorithms and "addition chains",
1362    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1363    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1364    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1365    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
1366
1367 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1368    multiplications to inline before calling the system library's pow
1369    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1370    so this default never requires calling pow, powf or powl.  */
1371
1372 #ifndef POWI_MAX_MULTS
1373 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
1374 #endif
1375
1376 /* The size of the "optimal power tree" lookup table.  All
1377    exponents less than this value are simply looked up in the
1378    powi_table below.  This threshold is also used to size the
1379    cache of pseudo registers that hold intermediate results.  */
1380 #define POWI_TABLE_SIZE 256
1381
1382 /* The size, in bits of the window, used in the "window method"
1383    exponentiation algorithm.  This is equivalent to a radix of
1384    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
1385 #define POWI_WINDOW_SIZE 3
1386
1387 /* The following table is an efficient representation of an
1388    "optimal power tree".  For each value, i, the corresponding
1389    value, j, in the table states than an optimal evaluation
1390    sequence for calculating pow(x,i) can be found by evaluating
1391    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
1392    100 integers is given in Knuth's "Seminumerical algorithms".  */
1393
1394 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1395   {
1396       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
1397       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
1398       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
1399      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
1400      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
1401      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
1402      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
1403      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
1404      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
1405      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
1406      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
1407      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
1408      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
1409      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
1410      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
1411      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
1412      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
1413      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
1414      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
1415      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
1416      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
1417      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
1418      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
1419      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
1420      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
1421     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
1422     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
1423     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
1424     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
1425     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
1426     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
1427     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
1428   };
1429
1430
1431 /* Return the number of multiplications required to calculate
1432    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
1433    subroutine of powi_cost.  CACHE is an array indicating
1434    which exponents have already been calculated.  */
1435
1436 static int
1437 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1438 {
1439   /* If we've already calculated this exponent, then this evaluation
1440      doesn't require any additional multiplications.  */
1441   if (cache[n])
1442     return 0;
1443
1444   cache[n] = true;
1445   return powi_lookup_cost (n - powi_table[n], cache)
1446          + powi_lookup_cost (powi_table[n], cache) + 1;
1447 }
1448
1449 /* Return the number of multiplications required to calculate
1450    powi(x,n) for an arbitrary x, given the exponent N.  This
1451    function needs to be kept in sync with powi_as_mults below.  */
1452
1453 static int
1454 powi_cost (HOST_WIDE_INT n)
1455 {
1456   bool cache[POWI_TABLE_SIZE];
1457   unsigned HOST_WIDE_INT digit;
1458   unsigned HOST_WIDE_INT val;
1459   int result;
1460
1461   if (n == 0)
1462     return 0;
1463
1464   /* Ignore the reciprocal when calculating the cost.  */
1465   val = absu_hwi (n);
1466
1467   /* Initialize the exponent cache.  */
1468   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1469   cache[1] = true;
1470
1471   result = 0;
1472
1473   while (val >= POWI_TABLE_SIZE)
1474     {
1475       if (val & 1)
1476         {
1477           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1478           result += powi_lookup_cost (digit, cache)
1479                     + POWI_WINDOW_SIZE + 1;
1480           val >>= POWI_WINDOW_SIZE;
1481         }
1482       else
1483         {
1484           val >>= 1;
1485           result++;
1486         }
1487     }
1488
1489   return result + powi_lookup_cost (val, cache);
1490 }
1491
1492 /* Recursive subroutine of powi_as_mults.  This function takes the
1493    array, CACHE, of already calculated exponents and an exponent N and
1494    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
1495
1496 static tree
1497 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1498                  unsigned HOST_WIDE_INT n, tree *cache)
1499 {
1500   tree op0, op1, ssa_target;
1501   unsigned HOST_WIDE_INT digit;
1502   gassign *mult_stmt;
1503
1504   if (n < POWI_TABLE_SIZE && cache[n])
1505     return cache[n];
1506
1507   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1508
1509   if (n < POWI_TABLE_SIZE)
1510     {
1511       cache[n] = ssa_target;
1512       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1513       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1514     }
1515   else if (n & 1)
1516     {
1517       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1518       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1519       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1520     }
1521   else
1522     {
1523       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1524       op1 = op0;
1525     }
1526
1527   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1528   gimple_set_location (mult_stmt, loc);
1529   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1530
1531   return ssa_target;
1532 }
1533
1534 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1535    This function needs to be kept in sync with powi_cost above.  */
1536
1537 tree
1538 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1539                tree arg0, HOST_WIDE_INT n)
1540 {
1541   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1542   gassign *div_stmt;
1543   tree target;
1544
1545   if (n == 0)
1546     return build_one_cst (type);
1547
1548   memset (cache, 0, sizeof (cache));
1549   cache[1] = arg0;
1550
1551   result = powi_as_mults_1 (gsi, loc, type, absu_hwi (n), cache);
1552   if (n >= 0)
1553     return result;
1554
1555   /* If the original exponent was negative, reciprocate the result.  */
1556   target = make_temp_ssa_name (type, NULL, "powmult");
1557   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1558                                   build_real (type, dconst1), result);
1559   gimple_set_location (div_stmt, loc);
1560   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1561
1562   return target;
1563 }
1564
1565 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1566    location info LOC.  If the arguments are appropriate, create an
1567    equivalent sequence of statements prior to GSI using an optimal
1568    number of multiplications, and return an expession holding the
1569    result.  */
1570
1571 static tree
1572 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1573                             tree arg0, HOST_WIDE_INT n)
1574 {
1575   if ((n >= -1 && n <= 2)
1576       || (optimize_function_for_speed_p (cfun)
1577           && powi_cost (n) <= POWI_MAX_MULTS))
1578     return powi_as_mults (gsi, loc, arg0, n);
1579
1580   return NULL_TREE;
1581 }
1582
1583 /* Build a gimple call statement that calls FN with argument ARG.
1584    Set the lhs of the call statement to a fresh SSA name.  Insert the
1585    statement prior to GSI's current position, and return the fresh
1586    SSA name.  */
1587
1588 static tree
1589 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1590                        tree fn, tree arg)
1591 {
1592   gcall *call_stmt;
1593   tree ssa_target;
1594
1595   call_stmt = gimple_build_call (fn, 1, arg);
1596   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1597   gimple_set_lhs (call_stmt, ssa_target);
1598   gimple_set_location (call_stmt, loc);
1599   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1600
1601   return ssa_target;
1602 }
1603
1604 /* Build a gimple binary operation with the given CODE and arguments
1605    ARG0, ARG1, assigning the result to a new SSA name for variable
1606    TARGET.  Insert the statement prior to GSI's current position, and
1607    return the fresh SSA name.*/
1608
1609 static tree
1610 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1611                         const char *name, enum tree_code code,
1612                         tree arg0, tree arg1)
1613 {
1614   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1615   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1616   gimple_set_location (stmt, loc);
1617   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1618   return result;
1619 }
1620
1621 /* Build a gimple reference operation with the given CODE and argument
1622    ARG, assigning the result to a new SSA name of TYPE with NAME.
1623    Insert the statement prior to GSI's current position, and return
1624    the fresh SSA name.  */
1625
1626 static inline tree
1627 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1628                       const char *name, enum tree_code code, tree arg0)
1629 {
1630   tree result = make_temp_ssa_name (type, NULL, name);
1631   gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1632   gimple_set_location (stmt, loc);
1633   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1634   return result;
1635 }
1636
1637 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1638    prior to GSI's current position, and return the fresh SSA name.  */
1639
1640 static tree
1641 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1642                        tree type, tree val)
1643 {
1644   tree result = make_ssa_name (type);
1645   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1646   gimple_set_location (stmt, loc);
1647   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1648   return result;
1649 }
1650
1651 struct pow_synth_sqrt_info
1652 {
1653   bool *factors;
1654   unsigned int deepest;
1655   unsigned int num_mults;
1656 };
1657
1658 /* Return true iff the real value C can be represented as a
1659    sum of powers of 0.5 up to N.  That is:
1660    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1661    Record in INFO the various parameters of the synthesis algorithm such
1662    as the factors a[i], the maximum 0.5 power and the number of
1663    multiplications that will be required.  */
1664
1665 bool
1666 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1667                                  struct pow_synth_sqrt_info *info)
1668 {
1669   REAL_VALUE_TYPE factor = dconsthalf;
1670   REAL_VALUE_TYPE remainder = c;
1671
1672   info->deepest = 0;
1673   info->num_mults = 0;
1674   memset (info->factors, 0, n * sizeof (bool));
1675
1676   for (unsigned i = 0; i < n; i++)
1677     {
1678       REAL_VALUE_TYPE res;
1679
1680       /* If something inexact happened bail out now.  */
1681       if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1682         return false;
1683
1684       /* We have hit zero.  The number is representable as a sum
1685          of powers of 0.5.  */
1686       if (real_equal (&res, &dconst0))
1687         {
1688           info->factors[i] = true;
1689           info->deepest = i + 1;
1690           return true;
1691         }
1692       else if (!REAL_VALUE_NEGATIVE (res))
1693         {
1694           remainder = res;
1695           info->factors[i] = true;
1696           info->num_mults++;
1697         }
1698       else
1699         info->factors[i] = false;
1700
1701       real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1702     }
1703   return false;
1704 }
1705
1706 /* Return the tree corresponding to FN being applied
1707    to ARG N times at GSI and LOC.
1708    Look up previous results from CACHE if need be.
1709    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1710
1711 static tree
1712 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1713               tree fn, location_t loc, tree *cache)
1714 {
1715   tree res = cache[n];
1716   if (!res)
1717     {
1718       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1719       res = build_and_insert_call (gsi, loc, fn, prev);
1720       cache[n] = res;
1721     }
1722
1723   return res;
1724 }
1725
1726 /* Print to STREAM the repeated application of function FNAME to ARG
1727    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1728    "foo (foo (x))".  */
1729
1730 static void
1731 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1732                  unsigned int n)
1733 {
1734   if (n == 0)
1735     fprintf (stream, "%s", arg);
1736   else
1737     {
1738       fprintf (stream, "%s (", fname);
1739       print_nested_fn (stream, fname, arg, n - 1);
1740       fprintf (stream, ")");
1741     }
1742 }
1743
1744 /* Print to STREAM the fractional sequence of sqrt chains
1745    applied to ARG, described by INFO.  Used for the dump file.  */
1746
1747 static void
1748 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1749                                 struct pow_synth_sqrt_info *info)
1750 {
1751   for (unsigned int i = 0; i < info->deepest; i++)
1752     {
1753       bool is_set = info->factors[i];
1754       if (is_set)
1755         {
1756           print_nested_fn (stream, "sqrt", arg, i + 1);
1757           if (i != info->deepest - 1)
1758             fprintf (stream, " * ");
1759         }
1760     }
1761 }
1762
1763 /* Print to STREAM a representation of raising ARG to an integer
1764    power N.  Used for the dump file.  */
1765
1766 static void
1767 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1768 {
1769   if (n > 1)
1770     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1771   else if (n == 1)
1772     fprintf (stream, "%s", arg);
1773 }
1774
1775 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1776    square roots.  Place at GSI and LOC.  Limit the maximum depth
1777    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1778    result of the expanded sequence or NULL_TREE if the expansion failed.
1779
1780    This routine assumes that ARG1 is a real number with a fractional part
1781    (the integer exponent case will have been handled earlier in
1782    gimple_expand_builtin_pow).
1783
1784    For ARG1 > 0.0:
1785    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1786      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1787                     FRAC_PART == ARG1 - WHOLE_PART:
1788      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1789      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1790      if it can be expressed as such, that is if FRAC_PART satisfies:
1791      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1792      where integer a[i] is either 0 or 1.
1793
1794      Example:
1795      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1796        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1797
1798    For ARG1 < 0.0 there are two approaches:
1799    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1800          is calculated as above.
1801
1802      Example:
1803      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1804        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1805
1806    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1807            FRAC_PART  := ARG1 - WHOLE_PART
1808      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1809      Example:
1810      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1811        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1812
1813    For ARG1 < 0.0 we choose between (A) and (B) depending on
1814    how many multiplications we'd have to do.
1815    So, for the example in (B): POW (x, -5.875), if we were to
1816    follow algorithm (A) we would produce:
1817    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1818    which contains more multiplications than approach (B).
1819
1820    Hopefully, this approach will eliminate potentially expensive POW library
1821    calls when unsafe floating point math is enabled and allow the compiler to
1822    further optimise the multiplies, square roots and divides produced by this
1823    function.  */
1824
1825 static tree
1826 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1827                      tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1828 {
1829   tree type = TREE_TYPE (arg0);
1830   machine_mode mode = TYPE_MODE (type);
1831   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1832   bool one_over = true;
1833
1834   if (!sqrtfn)
1835     return NULL_TREE;
1836
1837   if (TREE_CODE (arg1) != REAL_CST)
1838     return NULL_TREE;
1839
1840   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1841
1842   gcc_assert (max_depth > 0);
1843   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1844
1845   struct pow_synth_sqrt_info synth_info;
1846   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1847   synth_info.deepest = 0;
1848   synth_info.num_mults = 0;
1849
1850   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1851   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1852
1853   /* The whole and fractional parts of exp.  */
1854   REAL_VALUE_TYPE whole_part;
1855   REAL_VALUE_TYPE frac_part;
1856
1857   real_floor (&whole_part, mode, &exp);
1858   real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1859
1860
1861   REAL_VALUE_TYPE ceil_whole = dconst0;
1862   REAL_VALUE_TYPE ceil_fract = dconst0;
1863
1864   if (neg_exp)
1865     {
1866       real_ceil (&ceil_whole, mode, &exp);
1867       real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1868     }
1869
1870   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1871     return NULL_TREE;
1872
1873   /* Check whether it's more profitable to not use 1.0 / ...  */
1874   if (neg_exp)
1875     {
1876       struct pow_synth_sqrt_info alt_synth_info;
1877       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1878       alt_synth_info.deepest = 0;
1879       alt_synth_info.num_mults = 0;
1880
1881       if (representable_as_half_series_p (ceil_fract, max_depth,
1882                                            &alt_synth_info)
1883           && alt_synth_info.deepest <= synth_info.deepest
1884           && alt_synth_info.num_mults < synth_info.num_mults)
1885         {
1886           whole_part = ceil_whole;
1887           frac_part = ceil_fract;
1888           synth_info.deepest = alt_synth_info.deepest;
1889           synth_info.num_mults = alt_synth_info.num_mults;
1890           memcpy (synth_info.factors, alt_synth_info.factors,
1891                   (max_depth + 1) * sizeof (bool));
1892           one_over = false;
1893         }
1894     }
1895
1896   HOST_WIDE_INT n = real_to_integer (&whole_part);
1897   REAL_VALUE_TYPE cint;
1898   real_from_integer (&cint, VOIDmode, n, SIGNED);
1899
1900   if (!real_identical (&whole_part, &cint))
1901     return NULL_TREE;
1902
1903   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1904     return NULL_TREE;
1905
1906   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1907
1908   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1909
1910   /* Calculate the integer part of the exponent.  */
1911   if (n > 1)
1912     {
1913       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1914       if (!integer_res)
1915         return NULL_TREE;
1916     }
1917
1918   if (dump_file)
1919     {
1920       char string[64];
1921
1922       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1923       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1924
1925       if (neg_exp)
1926         {
1927           if (one_over)
1928             {
1929               fprintf (dump_file, "1.0 / (");
1930               dump_integer_part (dump_file, "x", n);
1931               if (n > 0)
1932                 fprintf (dump_file, " * ");
1933               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1934               fprintf (dump_file, ")");
1935             }
1936           else
1937             {
1938               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1939               fprintf (dump_file, " / (");
1940               dump_integer_part (dump_file, "x", n);
1941               fprintf (dump_file, ")");
1942             }
1943         }
1944       else
1945         {
1946           dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1947           if (n > 0)
1948             fprintf (dump_file, " * ");
1949           dump_integer_part (dump_file, "x", n);
1950         }
1951
1952       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1953     }
1954
1955
1956   tree fract_res = NULL_TREE;
1957   cache[0] = arg0;
1958
1959   /* Calculate the fractional part of the exponent.  */
1960   for (unsigned i = 0; i < synth_info.deepest; i++)
1961     {
1962       if (synth_info.factors[i])
1963         {
1964           tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1965
1966           if (!fract_res)
1967               fract_res = sqrt_chain;
1968
1969           else
1970             fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1971                                            fract_res, sqrt_chain);
1972         }
1973     }
1974
1975   tree res = NULL_TREE;
1976
1977   if (neg_exp)
1978     {
1979       if (one_over)
1980         {
1981           if (n > 0)
1982             res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1983                                            fract_res, integer_res);
1984           else
1985             res = fract_res;
1986
1987           res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1988                                           build_real (type, dconst1), res);
1989         }
1990       else
1991         {
1992           res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1993                                          fract_res, integer_res);
1994         }
1995     }
1996   else
1997     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1998                                    fract_res, integer_res);
1999   return res;
2000 }
2001
2002 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
2003    with location info LOC.  If possible, create an equivalent and
2004    less expensive sequence of statements prior to GSI, and return an
2005    expession holding the result.  */
2006
2007 static tree
2008 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2009                            tree arg0, tree arg1)
2010 {
2011   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2012   REAL_VALUE_TYPE c2, dconst3;
2013   HOST_WIDE_INT n;
2014   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2015   machine_mode mode;
2016   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2017   bool hw_sqrt_exists, c_is_int, c2_is_int;
2018
2019   dconst1_4 = dconst1;
2020   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2021
2022   /* If the exponent isn't a constant, there's nothing of interest
2023      to be done.  */
2024   if (TREE_CODE (arg1) != REAL_CST)
2025     return NULL_TREE;
2026
2027   /* Don't perform the operation if flag_signaling_nans is on
2028      and the operand is a signaling NaN.  */
2029   if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2030       && ((TREE_CODE (arg0) == REAL_CST
2031            && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2032           || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2033     return NULL_TREE;
2034
2035   /* If the exponent is equivalent to an integer, expand to an optimal
2036      multiplication sequence when profitable.  */
2037   c = TREE_REAL_CST (arg1);
2038   n = real_to_integer (&c);
2039   real_from_integer (&cint, VOIDmode, n, SIGNED);
2040   c_is_int = real_identical (&c, &cint);
2041
2042   if (c_is_int
2043       && ((n >= -1 && n <= 2)
2044           || (flag_unsafe_math_optimizations
2045               && speed_p
2046               && powi_cost (n) <= POWI_MAX_MULTS)))
2047     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2048
2049   /* Attempt various optimizations using sqrt and cbrt.  */
2050   type = TREE_TYPE (arg0);
2051   mode = TYPE_MODE (type);
2052   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2053
2054   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
2055      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
2056      sqrt(-0) = -0.  */
2057   if (sqrtfn
2058       && real_equal (&c, &dconsthalf)
2059       && !HONOR_SIGNED_ZEROS (mode))
2060     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2061
2062   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2063
2064   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
2065      optimizations since 1./3. is not exactly representable.  If x
2066      is negative and finite, the correct value of pow(x,1./3.) is
2067      a NaN with the "invalid" exception raised, because the value
2068      of 1./3. actually has an even denominator.  The correct value
2069      of cbrt(x) is a negative real value.  */
2070   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2071   dconst1_3 = real_value_truncate (mode, dconst_third ());
2072
2073   if (flag_unsafe_math_optimizations
2074       && cbrtfn
2075       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2076       && real_equal (&c, &dconst1_3))
2077     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2078
2079   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
2080      if we don't have a hardware sqrt insn.  */
2081   dconst1_6 = dconst1_3;
2082   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2083
2084   if (flag_unsafe_math_optimizations
2085       && sqrtfn
2086       && cbrtfn
2087       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2088       && speed_p
2089       && hw_sqrt_exists
2090       && real_equal (&c, &dconst1_6))
2091     {
2092       /* sqrt(x)  */
2093       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2094
2095       /* cbrt(sqrt(x))  */
2096       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2097     }
2098
2099
2100   /* Attempt to expand the POW as a product of square root chains.
2101      Expand the 0.25 case even when otpimising for size.  */
2102   if (flag_unsafe_math_optimizations
2103       && sqrtfn
2104       && hw_sqrt_exists
2105       && (speed_p || real_equal (&c, &dconst1_4))
2106       && !HONOR_SIGNED_ZEROS (mode))
2107     {
2108       unsigned int max_depth = speed_p
2109                                 ? param_max_pow_sqrt_depth
2110                                 : 2;
2111
2112       tree expand_with_sqrts
2113         = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2114
2115       if (expand_with_sqrts)
2116         return expand_with_sqrts;
2117     }
2118
2119   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2120   n = real_to_integer (&c2);
2121   real_from_integer (&cint, VOIDmode, n, SIGNED);
2122   c2_is_int = real_identical (&c2, &cint);
2123
2124   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2125
2126      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
2127      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
2128
2129      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
2130      different from pow(x, 1./3.) due to rounding and behavior with
2131      negative x, we need to constrain this transformation to unsafe
2132      math and positive x or finite math.  */
2133   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2134   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2135   real_round (&c2, mode, &c2);
2136   n = real_to_integer (&c2);
2137   real_from_integer (&cint, VOIDmode, n, SIGNED);
2138   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2139   real_convert (&c2, mode, &c2);
2140
2141   if (flag_unsafe_math_optimizations
2142       && cbrtfn
2143       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2144       && real_identical (&c2, &c)
2145       && !c2_is_int
2146       && optimize_function_for_speed_p (cfun)
2147       && powi_cost (n / 3) <= POWI_MAX_MULTS)
2148     {
2149       tree powi_x_ndiv3 = NULL_TREE;
2150
2151       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
2152          possible or profitable, give up.  Skip the degenerate case when
2153          abs(n) < 3, where the result is always 1.  */
2154       if (absu_hwi (n) >= 3)
2155         {
2156           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2157                                                      abs_hwi (n / 3));
2158           if (!powi_x_ndiv3)
2159             return NULL_TREE;
2160         }
2161
2162       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
2163          as that creates an unnecessary variable.  Instead, just produce
2164          either cbrt(x) or cbrt(x) * cbrt(x).  */
2165       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2166
2167       if (absu_hwi (n) % 3 == 1)
2168         powi_cbrt_x = cbrt_x;
2169       else
2170         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2171                                               cbrt_x, cbrt_x);
2172
2173       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
2174       if (absu_hwi (n) < 3)
2175         result = powi_cbrt_x;
2176       else
2177         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2178                                          powi_x_ndiv3, powi_cbrt_x);
2179
2180       /* If n is negative, reciprocate the result.  */
2181       if (n < 0)
2182         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2183                                          build_real (type, dconst1), result);
2184
2185       return result;
2186     }
2187
2188   /* No optimizations succeeded.  */
2189   return NULL_TREE;
2190 }
2191
2192 /* ARG is the argument to a cabs builtin call in GSI with location info
2193    LOC.  Create a sequence of statements prior to GSI that calculates
2194    sqrt(R*R + I*I), where R and I are the real and imaginary components
2195    of ARG, respectively.  Return an expression holding the result.  */
2196
2197 static tree
2198 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2199 {
2200   tree real_part, imag_part, addend1, addend2, sum, result;
2201   tree type = TREE_TYPE (TREE_TYPE (arg));
2202   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2203   machine_mode mode = TYPE_MODE (type);
2204
2205   if (!flag_unsafe_math_optimizations
2206       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2207       || !sqrtfn
2208       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2209     return NULL_TREE;
2210
2211   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2212                                     REALPART_EXPR, arg);
2213   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2214                                     real_part, real_part);
2215   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2216                                     IMAGPART_EXPR, arg);
2217   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2218                                     imag_part, imag_part);
2219   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2220   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2221
2222   return result;
2223 }
2224
2225 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2226    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
2227    an optimal number of multiplies, when n is a constant.  */
2228
2229 namespace {
2230
2231 const pass_data pass_data_cse_sincos =
2232 {
2233   GIMPLE_PASS, /* type */
2234   "sincos", /* name */
2235   OPTGROUP_NONE, /* optinfo_flags */
2236   TV_TREE_SINCOS, /* tv_id */
2237   PROP_ssa, /* properties_required */
2238   PROP_gimple_opt_math, /* properties_provided */
2239   0, /* properties_destroyed */
2240   0, /* todo_flags_start */
2241   TODO_update_ssa, /* todo_flags_finish */
2242 };
2243
2244 class pass_cse_sincos : public gimple_opt_pass
2245 {
2246 public:
2247   pass_cse_sincos (gcc::context *ctxt)
2248     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2249   {}
2250
2251   /* opt_pass methods: */
2252   virtual bool gate (function *)
2253     {
2254       /* We no longer require either sincos or cexp, since powi expansion
2255          piggybacks on this pass.  */
2256       return optimize;
2257     }
2258
2259   virtual unsigned int execute (function *);
2260
2261 }; // class pass_cse_sincos
2262
2263 unsigned int
2264 pass_cse_sincos::execute (function *fun)
2265 {
2266   basic_block bb;
2267   bool cfg_changed = false;
2268
2269   calculate_dominance_info (CDI_DOMINATORS);
2270   memset (&sincos_stats, 0, sizeof (sincos_stats));
2271
2272   FOR_EACH_BB_FN (bb, fun)
2273     {
2274       gimple_stmt_iterator gsi;
2275       bool cleanup_eh = false;
2276
2277       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2278         {
2279           gimple *stmt = gsi_stmt (gsi);
2280
2281           /* Only the last stmt in a bb could throw, no need to call
2282              gimple_purge_dead_eh_edges if we change something in the middle
2283              of a basic block.  */
2284           cleanup_eh = false;
2285
2286           if (is_gimple_call (stmt)
2287               && gimple_call_lhs (stmt))
2288             {
2289               tree arg, arg0, arg1, result;
2290               HOST_WIDE_INT n;
2291               location_t loc;
2292
2293               switch (gimple_call_combined_fn (stmt))
2294                 {
2295                 CASE_CFN_COS:
2296                 CASE_CFN_SIN:
2297                 CASE_CFN_CEXPI:
2298                   arg = gimple_call_arg (stmt, 0);
2299                   /* Make sure we have either sincos or cexp.  */
2300                   if (!targetm.libc_has_function (function_c99_math_complex,
2301                                                   TREE_TYPE (arg))
2302                       && !targetm.libc_has_function (function_sincos,
2303                                                      TREE_TYPE (arg)))
2304                     break;
2305
2306                   if (TREE_CODE (arg) == SSA_NAME)
2307                     cfg_changed |= execute_cse_sincos_1 (arg);
2308                   break;
2309
2310                 CASE_CFN_POW:
2311                   arg0 = gimple_call_arg (stmt, 0);
2312                   arg1 = gimple_call_arg (stmt, 1);
2313
2314                   loc = gimple_location (stmt);
2315                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2316
2317                   if (result)
2318                     {
2319                       tree lhs = gimple_get_lhs (stmt);
2320                       gassign *new_stmt = gimple_build_assign (lhs, result);
2321                       gimple_set_location (new_stmt, loc);
2322                       unlink_stmt_vdef (stmt);
2323                       gsi_replace (&gsi, new_stmt, true);
2324                       cleanup_eh = true;
2325                       if (gimple_vdef (stmt))
2326                         release_ssa_name (gimple_vdef (stmt));
2327                     }
2328                   break;
2329
2330                 CASE_CFN_POWI:
2331                   arg0 = gimple_call_arg (stmt, 0);
2332                   arg1 = gimple_call_arg (stmt, 1);
2333                   loc = gimple_location (stmt);
2334
2335                   if (real_minus_onep (arg0))
2336                     {
2337                       tree t0, t1, cond, one, minus_one;
2338                       gassign *stmt;
2339
2340                       t0 = TREE_TYPE (arg0);
2341                       t1 = TREE_TYPE (arg1);
2342                       one = build_real (t0, dconst1);
2343                       minus_one = build_real (t0, dconstm1);
2344
2345                       cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2346                       stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2347                                                   arg1, build_int_cst (t1, 1));
2348                       gimple_set_location (stmt, loc);
2349                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2350
2351                       result = make_temp_ssa_name (t0, NULL, "powi");
2352                       stmt = gimple_build_assign (result, COND_EXPR, cond,
2353                                                   minus_one, one);
2354                       gimple_set_location (stmt, loc);
2355                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2356                     }
2357                   else
2358                     {
2359                       if (!tree_fits_shwi_p (arg1))
2360                         break;
2361
2362                       n = tree_to_shwi (arg1);
2363                       result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2364                     }
2365
2366                   if (result)
2367                     {
2368                       tree lhs = gimple_get_lhs (stmt);
2369                       gassign *new_stmt = gimple_build_assign (lhs, result);
2370                       gimple_set_location (new_stmt, loc);
2371                       unlink_stmt_vdef (stmt);
2372                       gsi_replace (&gsi, new_stmt, true);
2373                       cleanup_eh = true;
2374                       if (gimple_vdef (stmt))
2375                         release_ssa_name (gimple_vdef (stmt));
2376                     }
2377                   break;
2378
2379                 CASE_CFN_CABS:
2380                   arg0 = gimple_call_arg (stmt, 0);
2381                   loc = gimple_location (stmt);
2382                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2383
2384                   if (result)
2385                     {
2386                       tree lhs = gimple_get_lhs (stmt);
2387                       gassign *new_stmt = gimple_build_assign (lhs, result);
2388                       gimple_set_location (new_stmt, loc);
2389                       unlink_stmt_vdef (stmt);
2390                       gsi_replace (&gsi, new_stmt, true);
2391                       cleanup_eh = true;
2392                       if (gimple_vdef (stmt))
2393                         release_ssa_name (gimple_vdef (stmt));
2394                     }
2395                   break;
2396
2397                 default:;
2398                 }
2399             }
2400         }
2401       if (cleanup_eh)
2402         cfg_changed |= gimple_purge_dead_eh_edges (bb);
2403     }
2404
2405   statistics_counter_event (fun, "sincos statements inserted",
2406                             sincos_stats.inserted);
2407   statistics_counter_event (fun, "conv statements removed",
2408                             sincos_stats.conv_removed);
2409
2410   return cfg_changed ? TODO_cleanup_cfg : 0;
2411 }
2412
2413 } // anon namespace
2414
2415 gimple_opt_pass *
2416 make_pass_cse_sincos (gcc::context *ctxt)
2417 {
2418   return new pass_cse_sincos (ctxt);
2419 }
2420
2421 /* Return true if stmt is a type conversion operation that can be stripped
2422    when used in a widening multiply operation.  */
2423 static bool
2424 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2425 {
2426   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2427
2428   if (TREE_CODE (result_type) == INTEGER_TYPE)
2429     {
2430       tree op_type;
2431       tree inner_op_type;
2432
2433       if (!CONVERT_EXPR_CODE_P (rhs_code))
2434         return false;
2435
2436       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2437
2438       /* If the type of OP has the same precision as the result, then
2439          we can strip this conversion.  The multiply operation will be
2440          selected to create the correct extension as a by-product.  */
2441       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2442         return true;
2443
2444       /* We can also strip a conversion if it preserves the signed-ness of
2445          the operation and doesn't narrow the range.  */
2446       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2447
2448       /* If the inner-most type is unsigned, then we can strip any
2449          intermediate widening operation.  If it's signed, then the
2450          intermediate widening operation must also be signed.  */
2451       if ((TYPE_UNSIGNED (inner_op_type)
2452            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2453           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2454         return true;
2455
2456       return false;
2457     }
2458
2459   return rhs_code == FIXED_CONVERT_EXPR;
2460 }
2461
2462 /* Return true if RHS is a suitable operand for a widening multiplication,
2463    assuming a target type of TYPE.
2464    There are two cases:
2465
2466      - RHS makes some value at least twice as wide.  Store that value
2467        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2468
2469      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2470        but leave *TYPE_OUT untouched.  */
2471
2472 static bool
2473 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2474                         tree *new_rhs_out)
2475 {
2476   gimple *stmt;
2477   tree type1, rhs1;
2478
2479   if (TREE_CODE (rhs) == SSA_NAME)
2480     {
2481       stmt = SSA_NAME_DEF_STMT (rhs);
2482       if (is_gimple_assign (stmt))
2483         {
2484           if (! widening_mult_conversion_strippable_p (type, stmt))
2485             rhs1 = rhs;
2486           else
2487             {
2488               rhs1 = gimple_assign_rhs1 (stmt);
2489
2490               if (TREE_CODE (rhs1) == INTEGER_CST)
2491                 {
2492                   *new_rhs_out = rhs1;
2493                   *type_out = NULL;
2494                   return true;
2495                 }
2496             }
2497         }
2498       else
2499         rhs1 = rhs;
2500
2501       type1 = TREE_TYPE (rhs1);
2502
2503       if (TREE_CODE (type1) != TREE_CODE (type)
2504           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2505         return false;
2506
2507       *new_rhs_out = rhs1;
2508       *type_out = type1;
2509       return true;
2510     }
2511
2512   if (TREE_CODE (rhs) == INTEGER_CST)
2513     {
2514       *new_rhs_out = rhs;
2515       *type_out = NULL;
2516       return true;
2517     }
2518
2519   return false;
2520 }
2521
2522 /* Return true if STMT performs a widening multiplication, assuming the
2523    output type is TYPE.  If so, store the unwidened types of the operands
2524    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2525    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2526    and *TYPE2_OUT would give the operands of the multiplication.  */
2527
2528 static bool
2529 is_widening_mult_p (gimple *stmt,
2530                     tree *type1_out, tree *rhs1_out,
2531                     tree *type2_out, tree *rhs2_out)
2532 {
2533   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2534
2535   if (TREE_CODE (type) == INTEGER_TYPE)
2536     {
2537       if (TYPE_OVERFLOW_TRAPS (type))
2538         return false;
2539     }
2540   else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2541     return false;
2542
2543   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2544                                rhs1_out))
2545     return false;
2546
2547   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2548                                rhs2_out))
2549     return false;
2550
2551   if (*type1_out == NULL)
2552     {
2553       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2554         return false;
2555       *type1_out = *type2_out;
2556     }
2557
2558   if (*type2_out == NULL)
2559     {
2560       if (!int_fits_type_p (*rhs2_out, *type1_out))
2561         return false;
2562       *type2_out = *type1_out;
2563     }
2564
2565   /* Ensure that the larger of the two operands comes first. */
2566   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2567     {
2568       std::swap (*type1_out, *type2_out);
2569       std::swap (*rhs1_out, *rhs2_out);
2570     }
2571
2572   return true;
2573 }
2574
2575 /* Check to see if the CALL statement is an invocation of copysign
2576    with 1. being the first argument.  */
2577 static bool
2578 is_copysign_call_with_1 (gimple *call)
2579 {
2580   gcall *c = dyn_cast <gcall *> (call);
2581   if (! c)
2582     return false;
2583
2584   enum combined_fn code = gimple_call_combined_fn (c);
2585
2586   if (code == CFN_LAST)
2587     return false;
2588
2589   if (builtin_fn_p (code))
2590     {
2591       switch (as_builtin_fn (code))
2592         {
2593         CASE_FLT_FN (BUILT_IN_COPYSIGN):
2594         CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2595           return real_onep (gimple_call_arg (c, 0));
2596         default:
2597           return false;
2598         }
2599     }
2600
2601   if (internal_fn_p (code))
2602     {
2603       switch (as_internal_fn (code))
2604         {
2605         case IFN_COPYSIGN:
2606           return real_onep (gimple_call_arg (c, 0));
2607         default:
2608           return false;
2609         }
2610     }
2611
2612    return false;
2613 }
2614
2615 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2616    This only happens when the xorsign optab is defined, if the
2617    pattern is not a xorsign pattern or if expansion fails FALSE is
2618    returned, otherwise TRUE is returned.  */
2619 static bool
2620 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2621 {
2622   tree treeop0, treeop1, lhs, type;
2623   location_t loc = gimple_location (stmt);
2624   lhs = gimple_assign_lhs (stmt);
2625   treeop0 = gimple_assign_rhs1 (stmt);
2626   treeop1 = gimple_assign_rhs2 (stmt);
2627   type = TREE_TYPE (lhs);
2628   machine_mode mode = TYPE_MODE (type);
2629
2630   if (HONOR_SNANS (type))
2631     return false;
2632
2633   if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2634     {
2635       gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2636       if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2637         {
2638           call0 = SSA_NAME_DEF_STMT (treeop1);
2639           if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2640             return false;
2641
2642           treeop1 = treeop0;
2643         }
2644         if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2645           return false;
2646
2647         gcall *c = as_a<gcall*> (call0);
2648         treeop0 = gimple_call_arg (c, 1);
2649
2650         gcall *call_stmt
2651           = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2652         gimple_set_lhs (call_stmt, lhs);
2653         gimple_set_location (call_stmt, loc);
2654         gsi_replace (gsi, call_stmt, true);
2655         return true;
2656     }
2657
2658   return false;
2659 }
2660
2661 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2662    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
2663    value is true iff we converted the statement.  */
2664
2665 static bool
2666 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2667 {
2668   tree lhs, rhs1, rhs2, type, type1, type2;
2669   enum insn_code handler;
2670   scalar_int_mode to_mode, from_mode, actual_mode;
2671   optab op;
2672   int actual_precision;
2673   location_t loc = gimple_location (stmt);
2674   bool from_unsigned1, from_unsigned2;
2675
2676   lhs = gimple_assign_lhs (stmt);
2677   type = TREE_TYPE (lhs);
2678   if (TREE_CODE (type) != INTEGER_TYPE)
2679     return false;
2680
2681   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2682     return false;
2683
2684   to_mode = SCALAR_INT_TYPE_MODE (type);
2685   from_mode = SCALAR_INT_TYPE_MODE (type1);
2686   if (to_mode == from_mode)
2687     return false;
2688
2689   from_unsigned1 = TYPE_UNSIGNED (type1);
2690   from_unsigned2 = TYPE_UNSIGNED (type2);
2691
2692   if (from_unsigned1 && from_unsigned2)
2693     op = umul_widen_optab;
2694   else if (!from_unsigned1 && !from_unsigned2)
2695     op = smul_widen_optab;
2696   else
2697     op = usmul_widen_optab;
2698
2699   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2700                                                   &actual_mode);
2701
2702   if (handler == CODE_FOR_nothing)
2703     {
2704       if (op != smul_widen_optab)
2705         {
2706           /* We can use a signed multiply with unsigned types as long as
2707              there is a wider mode to use, or it is the smaller of the two
2708              types that is unsigned.  Note that type1 >= type2, always.  */
2709           if ((TYPE_UNSIGNED (type1)
2710                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2711               || (TYPE_UNSIGNED (type2)
2712                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2713             {
2714               if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2715                   || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2716                 return false;
2717             }
2718
2719           op = smul_widen_optab;
2720           handler = find_widening_optab_handler_and_mode (op, to_mode,
2721                                                           from_mode,
2722                                                           &actual_mode);
2723
2724           if (handler == CODE_FOR_nothing)
2725             return false;
2726
2727           from_unsigned1 = from_unsigned2 = false;
2728         }
2729       else
2730         {
2731           /* Expand can synthesize smul_widen_optab if the target
2732              supports umul_widen_optab.  */
2733           op = umul_widen_optab;
2734           handler = find_widening_optab_handler_and_mode (op, to_mode,
2735                                                           from_mode,
2736                                                           &actual_mode);
2737           if (handler == CODE_FOR_nothing)
2738             return false;
2739         }
2740     }
2741
2742   /* Ensure that the inputs to the handler are in the correct precison
2743      for the opcode.  This will be the full mode size.  */
2744   actual_precision = GET_MODE_PRECISION (actual_mode);
2745   if (2 * actual_precision > TYPE_PRECISION (type))
2746     return false;
2747   if (actual_precision != TYPE_PRECISION (type1)
2748       || from_unsigned1 != TYPE_UNSIGNED (type1))
2749     rhs1 = build_and_insert_cast (gsi, loc,
2750                                   build_nonstandard_integer_type
2751                                     (actual_precision, from_unsigned1), rhs1);
2752   if (actual_precision != TYPE_PRECISION (type2)
2753       || from_unsigned2 != TYPE_UNSIGNED (type2))
2754     rhs2 = build_and_insert_cast (gsi, loc,
2755                                   build_nonstandard_integer_type
2756                                     (actual_precision, from_unsigned2), rhs2);
2757
2758   /* Handle constants.  */
2759   if (TREE_CODE (rhs1) == INTEGER_CST)
2760     rhs1 = fold_convert (type1, rhs1);
2761   if (TREE_CODE (rhs2) == INTEGER_CST)
2762     rhs2 = fold_convert (type2, rhs2);
2763
2764   gimple_assign_set_rhs1 (stmt, rhs1);
2765   gimple_assign_set_rhs2 (stmt, rhs2);
2766   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2767   update_stmt (stmt);
2768   widen_mul_stats.widen_mults_inserted++;
2769   return true;
2770 }
2771
2772 /* Process a single gimple statement STMT, which is found at the
2773    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2774    rhs (given by CODE), and try to convert it into a
2775    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
2776    is true iff we converted the statement.  */
2777
2778 static bool
2779 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2780                             enum tree_code code)
2781 {
2782   gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2783   gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2784   tree type, type1, type2, optype;
2785   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2786   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2787   optab this_optab;
2788   enum tree_code wmult_code;
2789   enum insn_code handler;
2790   scalar_mode to_mode, from_mode, actual_mode;
2791   location_t loc = gimple_location (stmt);
2792   int actual_precision;
2793   bool from_unsigned1, from_unsigned2;
2794
2795   lhs = gimple_assign_lhs (stmt);
2796   type = TREE_TYPE (lhs);
2797   if (TREE_CODE (type) != INTEGER_TYPE
2798       && TREE_CODE (type) != FIXED_POINT_TYPE)
2799     return false;
2800
2801   if (code == MINUS_EXPR)
2802     wmult_code = WIDEN_MULT_MINUS_EXPR;
2803   else
2804     wmult_code = WIDEN_MULT_PLUS_EXPR;
2805
2806   rhs1 = gimple_assign_rhs1 (stmt);
2807   rhs2 = gimple_assign_rhs2 (stmt);
2808
2809   if (TREE_CODE (rhs1) == SSA_NAME)
2810     {
2811       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2812       if (is_gimple_assign (rhs1_stmt))
2813         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2814     }
2815
2816   if (TREE_CODE (rhs2) == SSA_NAME)
2817     {
2818       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2819       if (is_gimple_assign (rhs2_stmt))
2820         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2821     }
2822
2823   /* Allow for one conversion statement between the multiply
2824      and addition/subtraction statement.  If there are more than
2825      one conversions then we assume they would invalidate this
2826      transformation.  If that's not the case then they should have
2827      been folded before now.  */
2828   if (CONVERT_EXPR_CODE_P (rhs1_code))
2829     {
2830       conv1_stmt = rhs1_stmt;
2831       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2832       if (TREE_CODE (rhs1) == SSA_NAME)
2833         {
2834           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2835           if (is_gimple_assign (rhs1_stmt))
2836             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2837         }
2838       else
2839         return false;
2840     }
2841   if (CONVERT_EXPR_CODE_P (rhs2_code))
2842     {
2843       conv2_stmt = rhs2_stmt;
2844       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2845       if (TREE_CODE (rhs2) == SSA_NAME)
2846         {
2847           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2848           if (is_gimple_assign (rhs2_stmt))
2849             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2850         }
2851       else
2852         return false;
2853     }
2854
2855   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2856      is_widening_mult_p, but we still need the rhs returns.
2857
2858      It might also appear that it would be sufficient to use the existing
2859      operands of the widening multiply, but that would limit the choice of
2860      multiply-and-accumulate instructions.
2861
2862      If the widened-multiplication result has more than one uses, it is
2863      probably wiser not to do the conversion.  Also restrict this operation
2864      to single basic block to avoid moving the multiply to a different block
2865      with a higher execution frequency.  */
2866   if (code == PLUS_EXPR
2867       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2868     {
2869       if (!has_single_use (rhs1)
2870           || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2871           || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2872                                   &type2, &mult_rhs2))
2873         return false;
2874       add_rhs = rhs2;
2875       conv_stmt = conv1_stmt;
2876     }
2877   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2878     {
2879       if (!has_single_use (rhs2)
2880           || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2881           || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2882                                   &type2, &mult_rhs2))
2883         return false;
2884       add_rhs = rhs1;
2885       conv_stmt = conv2_stmt;
2886     }
2887   else
2888     return false;
2889
2890   to_mode = SCALAR_TYPE_MODE (type);
2891   from_mode = SCALAR_TYPE_MODE (type1);
2892   if (to_mode == from_mode)
2893     return false;
2894
2895   from_unsigned1 = TYPE_UNSIGNED (type1);
2896   from_unsigned2 = TYPE_UNSIGNED (type2);
2897   optype = type1;
2898
2899   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
2900   if (from_unsigned1 != from_unsigned2)
2901     {
2902       if (!INTEGRAL_TYPE_P (type))
2903         return false;
2904       /* We can use a signed multiply with unsigned types as long as
2905          there is a wider mode to use, or it is the smaller of the two
2906          types that is unsigned.  Note that type1 >= type2, always.  */
2907       if ((from_unsigned1
2908            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2909           || (from_unsigned2
2910               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2911         {
2912           if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2913               || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2914             return false;
2915         }
2916
2917       from_unsigned1 = from_unsigned2 = false;
2918       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2919                                                false);
2920     }
2921
2922   /* If there was a conversion between the multiply and addition
2923      then we need to make sure it fits a multiply-and-accumulate.
2924      The should be a single mode change which does not change the
2925      value.  */
2926   if (conv_stmt)
2927     {
2928       /* We use the original, unmodified data types for this.  */
2929       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2930       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2931       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2932       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2933
2934       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2935         {
2936           /* Conversion is a truncate.  */
2937           if (TYPE_PRECISION (to_type) < data_size)
2938             return false;
2939         }
2940       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2941         {
2942           /* Conversion is an extend.  Check it's the right sort.  */
2943           if (TYPE_UNSIGNED (from_type) != is_unsigned
2944               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2945             return false;
2946         }
2947       /* else convert is a no-op for our purposes.  */
2948     }
2949
2950   /* Verify that the machine can perform a widening multiply
2951      accumulate in this mode/signedness combination, otherwise
2952      this transformation is likely to pessimize code.  */
2953   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2954   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2955                                                   from_mode, &actual_mode);
2956
2957   if (handler == CODE_FOR_nothing)
2958     return false;
2959
2960   /* Ensure that the inputs to the handler are in the correct precison
2961      for the opcode.  This will be the full mode size.  */
2962   actual_precision = GET_MODE_PRECISION (actual_mode);
2963   if (actual_precision != TYPE_PRECISION (type1)
2964       || from_unsigned1 != TYPE_UNSIGNED (type1))
2965     mult_rhs1 = build_and_insert_cast (gsi, loc,
2966                                        build_nonstandard_integer_type
2967                                          (actual_precision, from_unsigned1),
2968                                        mult_rhs1);
2969   if (actual_precision != TYPE_PRECISION (type2)
2970       || from_unsigned2 != TYPE_UNSIGNED (type2))
2971     mult_rhs2 = build_and_insert_cast (gsi, loc,
2972                                        build_nonstandard_integer_type
2973                                          (actual_precision, from_unsigned2),
2974                                        mult_rhs2);
2975
2976   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2977     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2978
2979   /* Handle constants.  */
2980   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2981     mult_rhs1 = fold_convert (type1, mult_rhs1);
2982   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2983     mult_rhs2 = fold_convert (type2, mult_rhs2);
2984
2985   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2986                                   add_rhs);
2987   update_stmt (gsi_stmt (*gsi));
2988   widen_mul_stats.maccs_inserted++;
2989   return true;
2990 }
2991
2992 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
2993    OP2 and which we know is used in statements that can be, together with the
2994    multiplication, converted to FMAs, perform the transformation.  */
2995
2996 static void
2997 convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
2998 {
2999   tree type = TREE_TYPE (mul_result);
3000   gimple *use_stmt;
3001   imm_use_iterator imm_iter;
3002   gcall *fma_stmt;
3003
3004   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3005     {
3006       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3007       tree addop, mulop1 = op1, result = mul_result;
3008       bool negate_p = false;
3009       gimple_seq seq = NULL;
3010
3011       if (is_gimple_debug (use_stmt))
3012         continue;
3013
3014       if (is_gimple_assign (use_stmt)
3015           && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3016         {
3017           result = gimple_assign_lhs (use_stmt);
3018           use_operand_p use_p;
3019           gimple *neguse_stmt;
3020           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3021           gsi_remove (&gsi, true);
3022           release_defs (use_stmt);
3023
3024           use_stmt = neguse_stmt;
3025           gsi = gsi_for_stmt (use_stmt);
3026           negate_p = true;
3027         }
3028
3029       tree cond, else_value, ops[3];
3030       tree_code code;
3031       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3032                                               ops, &else_value))
3033         gcc_unreachable ();
3034       addop = ops[0] == result ? ops[1] : ops[0];
3035
3036       if (code == MINUS_EXPR)
3037         {
3038           if (ops[0] == result)
3039             /* a * b - c -> a * b + (-c)  */
3040             addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
3041           else
3042             /* a - b * c -> (-b) * c + a */
3043             negate_p = !negate_p;
3044         }
3045
3046       if (negate_p)
3047         mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
3048
3049       if (seq)
3050         gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3051
3052       if (cond)
3053         fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3054                                                op2, addop, else_value);
3055       else
3056         fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3057       gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3058       gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3059                                                                    use_stmt));
3060       gsi_replace (&gsi, fma_stmt, true);
3061       /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3062          regardless of where the negation occurs.  */
3063       gimple *orig_stmt = gsi_stmt (gsi);
3064       if (fold_stmt (&gsi, follow_all_ssa_edges))
3065         {
3066           if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3067             gcc_unreachable ();
3068           update_stmt (gsi_stmt (gsi));
3069         }
3070
3071       if (dump_file && (dump_flags & TDF_DETAILS))
3072         {
3073           fprintf (dump_file, "Generated FMA ");
3074           print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3075           fprintf (dump_file, "\n");
3076         }
3077
3078       /* If the FMA result is negated in a single use, fold the negation
3079          too.  */
3080       orig_stmt = gsi_stmt (gsi);
3081       use_operand_p use_p;
3082       gimple *neg_stmt;
3083       if (is_gimple_call (orig_stmt)
3084           && gimple_call_internal_p (orig_stmt)
3085           && gimple_call_lhs (orig_stmt)
3086           && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3087           && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3088           && is_gimple_assign (neg_stmt)
3089           && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3090           && !stmt_could_throw_p (cfun, neg_stmt))
3091         {
3092           gsi = gsi_for_stmt (neg_stmt);
3093           if (fold_stmt (&gsi, follow_all_ssa_edges))
3094             {
3095               if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3096                 gcc_unreachable ();
3097               update_stmt (gsi_stmt (gsi));
3098               if (dump_file && (dump_flags & TDF_DETAILS))
3099                 {
3100                   fprintf (dump_file, "Folded FMA negation ");
3101                   print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3102                   fprintf (dump_file, "\n");
3103                 }
3104             }
3105         }
3106
3107       widen_mul_stats.fmas_inserted++;
3108     }
3109 }
3110
3111 /* Data necessary to perform the actual transformation from a multiplication
3112    and an addition to an FMA after decision is taken it should be done and to
3113    then delete the multiplication statement from the function IL.  */
3114
3115 struct fma_transformation_info
3116 {
3117   gimple *mul_stmt;
3118   tree mul_result;
3119   tree op1;
3120   tree op2;
3121 };
3122
3123 /* Structure containing the current state of FMA deferring, i.e. whether we are
3124    deferring, whether to continue deferring, and all data necessary to come
3125    back and perform all deferred transformations.  */
3126
3127 class fma_deferring_state
3128 {
3129 public:
3130   /* Class constructor.  Pass true as PERFORM_DEFERRING in order to actually
3131      do any deferring.  */
3132
3133   fma_deferring_state (bool perform_deferring)
3134     : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3135       m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3136
3137   /* List of FMA candidates for which we the transformation has been determined
3138      possible but we at this point in BB analysis we do not consider them
3139      beneficial.  */
3140   auto_vec<fma_transformation_info, 8> m_candidates;
3141
3142   /* Set of results of multiplication that are part of an already deferred FMA
3143      candidates.  */
3144   hash_set<tree> m_mul_result_set;
3145
3146   /* The PHI that supposedly feeds back result of a FMA to another over loop
3147      boundary.  */
3148   gphi *m_initial_phi;
3149
3150   /* Result of the last produced FMA candidate or NULL if there has not been
3151      one.  */
3152   tree m_last_result;
3153
3154   /* If true, deferring might still be profitable.  If false, transform all
3155      candidates and no longer defer.  */
3156   bool m_deferring_p;
3157 };
3158
3159 /* Transform all deferred FMA candidates and mark STATE as no longer
3160    deferring.  */
3161
3162 static void
3163 cancel_fma_deferring (fma_deferring_state *state)
3164 {
3165   if (!state->m_deferring_p)
3166     return;
3167
3168   for (unsigned i = 0; i < state->m_candidates.length (); i++)
3169     {
3170       if (dump_file && (dump_flags & TDF_DETAILS))
3171         fprintf (dump_file, "Generating deferred FMA\n");
3172
3173       const fma_transformation_info &fti = state->m_candidates[i];
3174       convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3175
3176       gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3177       gsi_remove (&gsi, true);
3178       release_defs (fti.mul_stmt);
3179     }
3180   state->m_deferring_p = false;
3181 }
3182
3183 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3184    Otherwise return NULL.  */
3185
3186 static gphi *
3187 result_of_phi (tree op)
3188 {
3189   if (TREE_CODE (op) != SSA_NAME)
3190     return NULL;
3191
3192   return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3193 }
3194
3195 /* After processing statements of a BB and recording STATE, return true if the
3196    initial phi is fed by the last FMA candidate result ore one such result from
3197    previously processed BBs marked in LAST_RESULT_SET.  */
3198
3199 static bool
3200 last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3201                                       hash_set<tree> *last_result_set)
3202 {
3203   ssa_op_iter iter;
3204   use_operand_p use;
3205   FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3206     {
3207       tree t = USE_FROM_PTR (use);
3208       if (t == state->m_last_result
3209           || last_result_set->contains (t))
3210         return true;
3211     }
3212
3213   return false;
3214 }
3215
3216 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3217    with uses in additions and subtractions to form fused multiply-add
3218    operations.  Returns true if successful and MUL_STMT should be removed.
3219    If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3220    on MUL_COND, otherwise it is unconditional.
3221
3222    If STATE indicates that we are deferring FMA transformation, that means
3223    that we do not produce FMAs for basic blocks which look like:
3224
3225     <bb 6>
3226     # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3227     _65 = _14 * _16;
3228     accumulator_66 = _65 + accumulator_111;
3229
3230   or its unrolled version, i.e. with several FMA candidates that feed result
3231   of one into the addend of another.  Instead, we add them to a list in STATE
3232   and if we later discover an FMA candidate that is not part of such a chain,
3233   we go back and perform all deferred past candidates.  */
3234
3235 static bool
3236 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3237                      fma_deferring_state *state, tree mul_cond = NULL_TREE)
3238 {
3239   tree mul_result = gimple_get_lhs (mul_stmt);
3240   /* If there isn't a LHS then this can't be an FMA.  There can be no LHS
3241      if the statement was left just for the side-effects.  */
3242   if (!mul_result)
3243     return false;
3244   tree type = TREE_TYPE (mul_result);
3245   gimple *use_stmt, *neguse_stmt;
3246   use_operand_p use_p;
3247   imm_use_iterator imm_iter;
3248
3249   if (FLOAT_TYPE_P (type)
3250       && flag_fp_contract_mode == FP_CONTRACT_OFF)
3251     return false;
3252
3253   /* We don't want to do bitfield reduction ops.  */
3254   if (INTEGRAL_TYPE_P (type)
3255       && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3256     return false;
3257
3258   /* If the target doesn't support it, don't generate it.  We assume that
3259      if fma isn't available then fms, fnma or fnms are not either.  */
3260   optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3261   if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3262     return false;
3263
3264   /* If the multiplication has zero uses, it is kept around probably because
3265      of -fnon-call-exceptions.  Don't optimize it away in that case,
3266      it is DCE job.  */
3267   if (has_zero_uses (mul_result))
3268     return false;
3269
3270   bool check_defer
3271     = (state->m_deferring_p
3272        && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)),
3273                     param_avoid_fma_max_bits));
3274   bool defer = check_defer;
3275   bool seen_negate_p = false;
3276   /* Make sure that the multiplication statement becomes dead after
3277      the transformation, thus that all uses are transformed to FMAs.
3278      This means we assume that an FMA operation has the same cost
3279      as an addition.  */
3280   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3281     {
3282       tree result = mul_result;
3283       bool negate_p = false;
3284
3285       use_stmt = USE_STMT (use_p);
3286
3287       if (is_gimple_debug (use_stmt))
3288         continue;
3289
3290       /* For now restrict this operations to single basic blocks.  In theory
3291          we would want to support sinking the multiplication in
3292          m = a*b;
3293          if ()
3294            ma = m + c;
3295          else
3296            d = m;
3297          to form a fma in the then block and sink the multiplication to the
3298          else block.  */
3299       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3300         return false;
3301
3302       /* A negate on the multiplication leads to FNMA.  */
3303       if (is_gimple_assign (use_stmt)
3304           && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3305         {
3306           ssa_op_iter iter;
3307           use_operand_p usep;
3308
3309           /* If (due to earlier missed optimizations) we have two
3310              negates of the same value, treat them as equivalent
3311              to a single negate with multiple uses.  */
3312           if (seen_negate_p)
3313             return false;
3314
3315           result = gimple_assign_lhs (use_stmt);
3316
3317           /* Make sure the negate statement becomes dead with this
3318              single transformation.  */
3319           if (!single_imm_use (gimple_assign_lhs (use_stmt),
3320                                &use_p, &neguse_stmt))
3321             return false;
3322
3323           /* Make sure the multiplication isn't also used on that stmt.  */
3324           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3325             if (USE_FROM_PTR (usep) == mul_result)
3326               return false;
3327
3328           /* Re-validate.  */
3329           use_stmt = neguse_stmt;
3330           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3331             return false;
3332
3333           negate_p = seen_negate_p = true;
3334         }
3335
3336       tree cond, else_value, ops[3];
3337       tree_code code;
3338       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3339                                               &else_value))
3340         return false;
3341
3342       switch (code)
3343         {
3344         case MINUS_EXPR:
3345           if (ops[1] == result)
3346             negate_p = !negate_p;
3347           break;
3348         case PLUS_EXPR:
3349           break;
3350         default:
3351           /* FMA can only be formed from PLUS and MINUS.  */
3352           return false;
3353         }
3354
3355       if (mul_cond && cond != mul_cond)
3356         return false;
3357
3358       if (cond)
3359         {
3360           if (cond == result || else_value == result)
3361             return false;
3362           if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type))
3363             return false;
3364         }
3365
3366       /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3367          we'll visit later, we might be able to get a more profitable
3368          match with fnma.
3369          OTOH, if we don't, a negate / fma pair has likely lower latency
3370          that a mult / subtract pair.  */
3371       if (code == MINUS_EXPR
3372           && !negate_p
3373           && ops[0] == result
3374           && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3375           && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3376           && TREE_CODE (ops[1]) == SSA_NAME
3377           && has_single_use (ops[1]))
3378         {
3379           gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3380           if (is_gimple_assign (stmt2)
3381               && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3382             return false;
3383         }
3384
3385       /* We can't handle a * b + a * b.  */
3386       if (ops[0] == ops[1])
3387         return false;
3388       /* If deferring, make sure we are not looking at an instruction that
3389          wouldn't have existed if we were not.  */
3390       if (state->m_deferring_p
3391           && (state->m_mul_result_set.contains (ops[0])
3392               || state->m_mul_result_set.contains (ops[1])))
3393         return false;
3394
3395       if (check_defer)
3396         {
3397           tree use_lhs = gimple_get_lhs (use_stmt);
3398           if (state->m_last_result)
3399             {
3400               if (ops[1] == state->m_last_result
3401                   || ops[0] == state->m_last_result)
3402                 defer = true;
3403               else
3404                 defer = false;
3405             }
3406           else
3407             {
3408               gcc_checking_assert (!state->m_initial_phi);
3409               gphi *phi;
3410               if (ops[0] == result)
3411                 phi = result_of_phi (ops[1]);
3412               else
3413                 {
3414                   gcc_assert (ops[1] == result);
3415                   phi = result_of_phi (ops[0]);
3416                 }
3417
3418               if (phi)
3419                 {
3420                   state->m_initial_phi = phi;
3421                   defer = true;
3422                 }
3423               else
3424                 defer = false;
3425             }
3426
3427           state->m_last_result = use_lhs;
3428           check_defer = false;
3429         }
3430       else
3431         defer = false;
3432
3433       /* While it is possible to validate whether or not the exact form that
3434          we've recognized is available in the backend, the assumption is that
3435          if the deferring logic above did not trigger, the transformation is
3436          never a loss.  For instance, suppose the target only has the plain FMA
3437          pattern available.  Consider a*b-c -> fma(a,b,-c): we've exchanged
3438          MUL+SUB for FMA+NEG, which is still two operations.  Consider
3439          -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3440          form the two NEGs are independent and could be run in parallel.  */
3441     }
3442
3443   if (defer)
3444     {
3445       fma_transformation_info fti;
3446       fti.mul_stmt = mul_stmt;
3447       fti.mul_result = mul_result;
3448       fti.op1 = op1;
3449       fti.op2 = op2;
3450       state->m_candidates.safe_push (fti);
3451       state->m_mul_result_set.add (mul_result);
3452
3453       if (dump_file && (dump_flags & TDF_DETAILS))
3454         {
3455           fprintf (dump_file, "Deferred generating FMA for multiplication ");
3456           print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3457           fprintf (dump_file, "\n");
3458         }
3459
3460       return false;
3461     }
3462   else
3463     {
3464       if (state->m_deferring_p)
3465         cancel_fma_deferring (state);
3466       convert_mult_to_fma_1 (mul_result, op1, op2);
3467       return true;
3468     }
3469 }
3470
3471
3472 /* Helper function of match_arith_overflow.  For MUL_OVERFLOW, if we have
3473    a check for non-zero like:
3474    _1 = x_4(D) * y_5(D);
3475    *res_7(D) = _1;
3476    if (x_4(D) != 0)
3477      goto <bb 3>; [50.00%]
3478    else
3479      goto <bb 4>; [50.00%]
3480
3481    <bb 3> [local count: 536870913]:
3482    _2 = _1 / x_4(D);
3483    _9 = _2 != y_5(D);
3484    _10 = (int) _9;
3485
3486    <bb 4> [local count: 1073741824]:
3487    # iftmp.0_3 = PHI <_10(3), 0(2)>
3488    then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3489    optimize the x_4(D) != 0 condition to 1.  */
3490
3491 static void
3492 maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3493                                gimple *div_stmt, bool *cfg_changed)
3494 {
3495   basic_block bb = gimple_bb (cond_stmt);
3496   if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3497     return;
3498   edge pred_edge = single_pred_edge (bb);
3499   basic_block pred_bb = pred_edge->src;
3500   if (EDGE_COUNT (pred_bb->succs) != 2)
3501     return;
3502   edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3503   edge other_succ_edge = NULL;
3504   if (gimple_code (cond_stmt) == GIMPLE_COND)
3505     {
3506       if (EDGE_COUNT (bb->succs) != 2)
3507         return;
3508       other_succ_edge = EDGE_SUCC (bb, 0);
3509       if (gimple_cond_code (cond_stmt) == NE_EXPR)
3510         {
3511           if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3512             other_succ_edge = EDGE_SUCC (bb, 1);
3513         }
3514       else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3515         other_succ_edge = EDGE_SUCC (bb, 0);
3516       if (other_edge->dest != other_succ_edge->dest)
3517         return;
3518     }
3519   else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3520     return;
3521   gimple *zero_cond = last_stmt (pred_bb);
3522   if (zero_cond == NULL
3523       || gimple_code (zero_cond) != GIMPLE_COND
3524       || (gimple_cond_code (zero_cond)
3525           != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3526       || !integer_zerop (gimple_cond_rhs (zero_cond)))
3527     return;
3528   tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3529   if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3530     return;
3531   if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3532     {
3533       /* Allow the divisor to be result of a same precision cast
3534          from zero_cond_lhs.  */
3535       tree rhs2 = gimple_assign_rhs2 (div_stmt);
3536       if (TREE_CODE (rhs2) != SSA_NAME)
3537         return;
3538       gimple *g = SSA_NAME_DEF_STMT (rhs2);
3539       if (!gimple_assign_cast_p (g)
3540           || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3541           || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3542           || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3543               != TYPE_PRECISION (TREE_TYPE (rhs2))))
3544         return;
3545     }
3546   gimple_stmt_iterator gsi = gsi_after_labels (bb);
3547   mul_stmts.quick_push (div_stmt);
3548   if (is_gimple_debug (gsi_stmt (gsi)))
3549     gsi_next_nondebug (&gsi);
3550   unsigned cast_count = 0;
3551   while (gsi_stmt (gsi) != cond_stmt)
3552     {
3553       /* If original mul_stmt has a single use, allow it in the same bb,
3554          we are looking then just at __builtin_mul_overflow_p.
3555          Though, in that case the original mul_stmt will be replaced
3556          by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts.  */
3557       gimple *mul_stmt;
3558       unsigned int i;
3559       bool ok = false;
3560       FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3561         {
3562           if (gsi_stmt (gsi) == mul_stmt)
3563             {
3564               ok = true;
3565               break;
3566             }
3567         }
3568       if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3569         ok = true;
3570       if (!ok)
3571         return;
3572       gsi_next_nondebug (&gsi);
3573     }
3574   if (gimple_code (cond_stmt) == GIMPLE_COND)
3575     {
3576       basic_block succ_bb = other_edge->dest;
3577       for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3578            gsi_next (&gpi))
3579         {
3580           gphi *phi = gpi.phi ();
3581           tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3582           tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3583           if (!operand_equal_p (v1, v2, 0))
3584             return;
3585         }
3586     }
3587   else
3588     {
3589       tree lhs = gimple_assign_lhs (cond_stmt);
3590       if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3591         return;
3592       gsi_next_nondebug (&gsi);
3593       if (!gsi_end_p (gsi))
3594         {
3595           if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3596             return;
3597           gimple *cast_stmt = gsi_stmt (gsi);
3598           if (!gimple_assign_cast_p (cast_stmt))
3599             return;
3600           tree new_lhs = gimple_assign_lhs (cast_stmt);
3601           gsi_next_nondebug (&gsi);
3602           if (!gsi_end_p (gsi)
3603               || !new_lhs
3604               || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3605               || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3606             return;
3607           lhs = new_lhs;
3608         }
3609       edge succ_edge = single_succ_edge (bb);
3610       basic_block succ_bb = succ_edge->dest;
3611       gsi = gsi_start_phis (succ_bb);
3612       if (gsi_end_p (gsi))
3613         return;
3614       gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3615       gsi_next (&gsi);
3616       if (!gsi_end_p (gsi))
3617         return;
3618       if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3619         return;
3620       tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3621       if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3622         {
3623           tree cond = gimple_assign_rhs1 (cond_stmt);
3624           if (TREE_CODE (cond) == NE_EXPR)
3625             {
3626               if (!operand_equal_p (other_val,
3627                                     gimple_assign_rhs3 (cond_stmt), 0))
3628                 return;
3629             }
3630           else if (!operand_equal_p (other_val,
3631                                      gimple_assign_rhs2 (cond_stmt), 0))
3632             return;
3633         }
3634       else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3635         {
3636           if (!integer_zerop (other_val))
3637             return;
3638         }
3639       else if (!integer_onep (other_val))
3640         return;
3641     }
3642   gcond *zero_gcond = as_a <gcond *> (zero_cond);
3643   if (pred_edge->flags & EDGE_TRUE_VALUE)
3644     gimple_cond_make_true (zero_gcond);
3645   else
3646     gimple_cond_make_false (zero_gcond);
3647   update_stmt (zero_cond);
3648   *cfg_changed = true;
3649 }
3650
3651 /* Helper function for arith_overflow_check_p.  Return true
3652    if VAL1 is equal to VAL2 cast to corresponding integral type
3653    with other signedness or vice versa.  */
3654
3655 static bool
3656 arith_cast_equal_p (tree val1, tree val2)
3657 {
3658   if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3659     return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3660   else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3661     return false;
3662   if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3663       && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3664     return true;
3665   if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3666       && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3667     return true;
3668   return false;
3669 }
3670
3671 /* Helper function of match_arith_overflow.  Return 1
3672    if USE_STMT is unsigned overflow check ovf != 0 for
3673    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3674    and 0 otherwise.  */
3675
3676 static int
3677 arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3678                         tree maxval, tree *other)
3679 {
3680   enum tree_code ccode = ERROR_MARK;
3681   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3682   enum tree_code code = gimple_assign_rhs_code (stmt);
3683   tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3684   tree rhs1 = gimple_assign_rhs1 (stmt);
3685   tree rhs2 = gimple_assign_rhs2 (stmt);
3686   tree multop = NULL_TREE, divlhs = NULL_TREE;
3687   gimple *cur_use_stmt = use_stmt;
3688
3689   if (code == MULT_EXPR)
3690     {
3691       if (!is_gimple_assign (use_stmt))
3692         return 0;
3693       if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3694         return 0;
3695       if (gimple_assign_rhs1 (use_stmt) != lhs)
3696         return 0;
3697       if (cast_stmt)
3698         {
3699           if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3700             multop = rhs2;
3701           else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3702             multop = rhs1;
3703           else
3704             return 0;
3705         }
3706       else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3707         multop = rhs2;
3708       else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3709         multop = rhs1;
3710       else
3711         return 0;
3712       if (stmt_ends_bb_p (use_stmt))
3713         return 0;
3714       divlhs = gimple_assign_lhs (use_stmt);
3715       if (!divlhs)
3716         return 0;
3717       use_operand_p use;
3718       if (!single_imm_use (divlhs, &use, &cur_use_stmt))
3719         return 0;
3720     }
3721   if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3722     {
3723       ccode = gimple_cond_code (cur_use_stmt);
3724       crhs1 = gimple_cond_lhs (cur_use_stmt);
3725       crhs2 = gimple_cond_rhs (cur_use_stmt);
3726     }
3727   else if (is_gimple_assign (cur_use_stmt))
3728     {
3729       if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
3730         {
3731           ccode = gimple_assign_rhs_code (cur_use_stmt);
3732           crhs1 = gimple_assign_rhs1 (cur_use_stmt);
3733           crhs2 = gimple_assign_rhs2 (cur_use_stmt);
3734         }
3735       else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
3736         {
3737           tree cond = gimple_assign_rhs1 (cur_use_stmt);
3738           if (COMPARISON_CLASS_P (cond))
3739             {
3740               ccode = TREE_CODE (cond);
3741               crhs1 = TREE_OPERAND (cond, 0);
3742               crhs2 = TREE_OPERAND (cond, 1);
3743             }
3744           else
3745             return 0;
3746         }
3747       else
3748         return 0;
3749     }
3750   else
3751     return 0;
3752
3753   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3754     return 0;
3755
3756   switch (ccode)
3757     {
3758     case GT_EXPR:
3759     case LE_EXPR:
3760       if (maxval)
3761         {
3762           /* r = a + b; r > maxval or r <= maxval  */
3763           if (crhs1 == lhs
3764               && TREE_CODE (crhs2) == INTEGER_CST
3765               && tree_int_cst_equal (crhs2, maxval))
3766             return ccode == GT_EXPR ? 1 : -1;
3767           break;
3768         }
3769       /* r = a - b; r > a or r <= a
3770          r = a + b; a > r or a <= r or b > r or b <= r.  */
3771       if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3772           || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3773               && crhs2 == lhs))
3774         return ccode == GT_EXPR ? 1 : -1;
3775       /* r = ~a; b > r or b <= r.  */
3776       if (code == BIT_NOT_EXPR && crhs2 == lhs)
3777         {
3778           if (other)
3779             *other = crhs1;
3780           return ccode == GT_EXPR ? 1 : -1;
3781         }
3782       break;
3783     case LT_EXPR:
3784     case GE_EXPR:
3785       if (maxval)
3786         break;
3787       /* r = a - b; a < r or a >= r
3788          r = a + b; r < a or r >= a or r < b or r >= b.  */
3789       if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3790           || (code == PLUS_EXPR && crhs1 == lhs
3791               && (crhs2 == rhs1 || crhs2 == rhs2)))
3792         return ccode == LT_EXPR ? 1 : -1;
3793       /* r = ~a; r < b or r >= b.  */
3794       if (code == BIT_NOT_EXPR && crhs1 == lhs)
3795         {
3796           if (other)
3797             *other = crhs2;
3798           return ccode == LT_EXPR ? 1 : -1;
3799         }
3800       break;
3801     case EQ_EXPR:
3802     case NE_EXPR:
3803       /* r = a * b; _1 = r / a; _1 == b
3804          r = a * b; _1 = r / b; _1 == a
3805          r = a * b; _1 = r / a; _1 != b
3806          r = a * b; _1 = r / b; _1 != a.  */
3807       if (code == MULT_EXPR)
3808         {
3809           if (cast_stmt)
3810             {
3811               if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
3812                   || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
3813                 {
3814                   use_stmt = cur_use_stmt;
3815                   return ccode == NE_EXPR ? 1 : -1;
3816                 }
3817             }
3818           else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
3819                    || (crhs2 == divlhs && crhs1 == multop))
3820             {
3821               use_stmt = cur_use_stmt;
3822               return ccode == NE_EXPR ? 1 : -1;
3823             }
3824         }
3825       break;
3826     default:
3827       break;
3828     }
3829   return 0;
3830 }
3831
3832 /* Recognize for unsigned x
3833    x = y - z;
3834    if (x > y)
3835    where there are other uses of x and replace it with
3836    _7 = .SUB_OVERFLOW (y, z);
3837    x = REALPART_EXPR <_7>;
3838    _8 = IMAGPART_EXPR <_7>;
3839    if (_8)
3840    and similarly for addition.
3841
3842    Also recognize:
3843    yc = (type) y;
3844    zc = (type) z;
3845    x = yc + zc;
3846    if (x > max)
3847    where y and z have unsigned types with maximum max
3848    and there are other uses of x and all of those cast x
3849    back to that unsigned type and again replace it with
3850    _7 = .ADD_OVERFLOW (y, z);
3851    _9 = REALPART_EXPR <_7>;
3852    _8 = IMAGPART_EXPR <_7>;
3853    if (_8)
3854    and replace (utype) x with _9.
3855
3856    Also recognize:
3857    x = ~z;
3858    if (y > x)
3859    and replace it with
3860    _7 = .ADD_OVERFLOW (y, z);
3861    _8 = IMAGPART_EXPR <_7>;
3862    if (_8)
3863
3864    And also recognize:
3865    z = x * y;
3866    if (x != 0)
3867      goto <bb 3>; [50.00%]
3868    else
3869      goto <bb 4>; [50.00%]
3870
3871    <bb 3> [local count: 536870913]:
3872    _2 = z / x;
3873    _9 = _2 != y;
3874    _10 = (int) _9;
3875
3876    <bb 4> [local count: 1073741824]:
3877    # iftmp.0_3 = PHI <_10(3), 0(2)>
3878    and replace it with
3879    _7 = .MUL_OVERFLOW (x, y);
3880    z = IMAGPART_EXPR <_7>;
3881    _8 = IMAGPART_EXPR <_7>;
3882    _9 = _8 != 0;
3883    iftmp.0_3 = (int) _9;  */
3884
3885 static bool
3886 match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3887                       enum tree_code code, bool *cfg_changed)
3888 {
3889   tree lhs = gimple_assign_lhs (stmt);
3890   tree type = TREE_TYPE (lhs);
3891   use_operand_p use_p;
3892   imm_use_iterator iter;
3893   bool use_seen = false;
3894   bool ovf_use_seen = false;
3895   gimple *use_stmt;
3896   gimple *add_stmt = NULL;
3897   bool add_first = false;
3898   gimple *cond_stmt = NULL;
3899   gimple *cast_stmt = NULL;
3900   tree cast_lhs = NULL_TREE;
3901
3902   gcc_checking_assert (code == PLUS_EXPR
3903                        || code == MINUS_EXPR
3904                        || code == MULT_EXPR
3905                        || code == BIT_NOT_EXPR);
3906   if (!INTEGRAL_TYPE_P (type)
3907       || !TYPE_UNSIGNED (type)
3908       || has_zero_uses (lhs)
3909       || (code != PLUS_EXPR
3910           && code != MULT_EXPR
3911           && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
3912                             TYPE_MODE (type)) == CODE_FOR_nothing))
3913     return false;
3914
3915   tree rhs1 = gimple_assign_rhs1 (stmt);
3916   tree rhs2 = gimple_assign_rhs2 (stmt);
3917   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3918     {
3919       use_stmt = USE_STMT (use_p);
3920       if (is_gimple_debug (use_stmt))
3921         continue;
3922
3923       tree other = NULL_TREE;
3924       if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
3925         {
3926           if (code == BIT_NOT_EXPR)
3927             {
3928               gcc_assert (other);
3929               if (TREE_CODE (other) != SSA_NAME)
3930                 return false;
3931               if (rhs2 == NULL)
3932                 rhs2 = other;
3933               else
3934                 return false;
3935               cond_stmt = use_stmt;
3936             }
3937           ovf_use_seen = true;
3938         }
3939       else
3940         {
3941           use_seen = true;
3942           if (code == MULT_EXPR
3943               && cast_stmt == NULL
3944               && gimple_assign_cast_p (use_stmt))
3945             {
3946               cast_lhs = gimple_assign_lhs (use_stmt);
3947               if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3948                   && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3949                   && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3950                       == TYPE_PRECISION (TREE_TYPE (lhs))))
3951                 cast_stmt = use_stmt;
3952               else
3953                 cast_lhs = NULL_TREE;
3954             }
3955         }
3956       if (ovf_use_seen && use_seen)
3957         break;
3958     }
3959
3960   if (!ovf_use_seen
3961       && code == MULT_EXPR
3962       && cast_stmt)
3963     {
3964       if (TREE_CODE (rhs1) != SSA_NAME
3965           || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
3966         return false;
3967       FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
3968         {
3969           use_stmt = USE_STMT (use_p);
3970           if (is_gimple_debug (use_stmt))
3971             continue;
3972
3973           if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
3974                                       NULL_TREE, NULL))
3975             ovf_use_seen = true;
3976         }
3977     }
3978   else
3979     {
3980       cast_stmt = NULL;
3981       cast_lhs = NULL_TREE;
3982     }
3983
3984   tree maxval = NULL_TREE;
3985   if (!ovf_use_seen
3986       || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
3987       || (code == PLUS_EXPR
3988           && optab_handler (uaddv4_optab,
3989                             TYPE_MODE (type)) == CODE_FOR_nothing)
3990       || (code == MULT_EXPR
3991           && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
3992                             TYPE_MODE (type)) == CODE_FOR_nothing))
3993     {
3994       if (code != PLUS_EXPR)
3995         return false;
3996       if (TREE_CODE (rhs1) != SSA_NAME
3997           || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
3998         return false;
3999       rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
4000       tree type1 = TREE_TYPE (rhs1);
4001       if (!INTEGRAL_TYPE_P (type1)
4002           || !TYPE_UNSIGNED (type1)
4003           || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
4004           || (TYPE_PRECISION (type1)
4005               != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
4006         return false;
4007       if (TREE_CODE (rhs2) == INTEGER_CST)
4008         {
4009           if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
4010                                     TYPE_PRECISION (type1),
4011                                     UNSIGNED), 0))
4012             return false;
4013           rhs2 = fold_convert (type1, rhs2);
4014         }
4015       else
4016         {
4017           if (TREE_CODE (rhs2) != SSA_NAME
4018               || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4019             return false;
4020           rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4021           tree type2 = TREE_TYPE (rhs2);
4022           if (!INTEGRAL_TYPE_P (type2)
4023               || !TYPE_UNSIGNED (type2)
4024               || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4025               || (TYPE_PRECISION (type2)
4026                   != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4027             return false;
4028         }
4029       if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4030         type = type1;
4031       else
4032         type = TREE_TYPE (rhs2);
4033
4034       if (TREE_CODE (type) != INTEGER_TYPE
4035           || optab_handler (uaddv4_optab,
4036                             TYPE_MODE (type)) == CODE_FOR_nothing)
4037         return false;
4038
4039       maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4040                                                       UNSIGNED));
4041       ovf_use_seen = false;
4042       use_seen = false;
4043       basic_block use_bb = NULL;
4044       FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4045         {
4046           use_stmt = USE_STMT (use_p);
4047           if (is_gimple_debug (use_stmt))
4048             continue;
4049
4050           if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4051             {
4052               ovf_use_seen = true;
4053               use_bb = gimple_bb (use_stmt);
4054             }
4055           else
4056             {
4057               if (!gimple_assign_cast_p (use_stmt)
4058                   || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4059                 return false;
4060               tree use_lhs = gimple_assign_lhs (use_stmt);
4061               if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4062                   || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4063                       > TYPE_PRECISION (type)))
4064                 return false;
4065               use_seen = true;
4066             }
4067         }
4068       if (!ovf_use_seen)
4069         return false;
4070       if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4071         {
4072           if (!use_seen)
4073             return false;
4074           tree new_rhs1 = make_ssa_name (type);
4075           gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4076           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4077           rhs1 = new_rhs1;
4078         }
4079       else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4080         {
4081           if (!use_seen)
4082             return false;
4083           tree new_rhs2 = make_ssa_name (type);
4084           gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4085           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4086           rhs2 = new_rhs2;
4087         }
4088       else if (!use_seen)
4089         {
4090           /* If there are no uses of the wider addition, check if
4091              forwprop has not created a narrower addition.
4092              Require it to be in the same bb as the overflow check.  */
4093           FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4094             {
4095               use_stmt = USE_STMT (use_p);
4096               if (is_gimple_debug (use_stmt))
4097                 continue;
4098
4099               if (use_stmt == stmt)
4100                 continue;
4101
4102               if (!is_gimple_assign (use_stmt)
4103                   || gimple_bb (use_stmt) != use_bb
4104                   || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4105                 continue;
4106
4107               if (gimple_assign_rhs1 (use_stmt) == rhs1)
4108                 {
4109                   if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4110                                         rhs2, 0))
4111                     continue;
4112                 }
4113               else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4114                 {
4115                   if (gimple_assign_rhs1 (use_stmt) != rhs2)
4116                     continue;
4117                 }
4118               else
4119                 continue;
4120
4121               add_stmt = use_stmt;
4122               break;
4123             }
4124           if (add_stmt == NULL)
4125             return false;
4126
4127           /* If stmt and add_stmt are in the same bb, we need to find out
4128              which one is earlier.  If they are in different bbs, we've
4129              checked add_stmt is in the same bb as one of the uses of the
4130              stmt lhs, so stmt needs to dominate add_stmt too.  */
4131           if (gimple_bb (stmt) == gimple_bb (add_stmt))
4132             {
4133               gimple_stmt_iterator gsif = *gsi;
4134               gimple_stmt_iterator gsib = *gsi;
4135               int i;
4136               /* Search both forward and backward from stmt and have a small
4137                  upper bound.  */
4138               for (i = 0; i < 128; i++)
4139                 {
4140                   if (!gsi_end_p (gsib))
4141                     {
4142                       gsi_prev_nondebug (&gsib);
4143                       if (gsi_stmt (gsib) == add_stmt)
4144                         {
4145                           add_first = true;
4146                           break;
4147                         }
4148                     }
4149                   else if (gsi_end_p (gsif))
4150                     break;
4151                   if (!gsi_end_p (gsif))
4152                     {
4153                       gsi_next_nondebug (&gsif);
4154                       if (gsi_stmt (gsif) == add_stmt)
4155                         break;
4156                     }
4157                 }
4158               if (i == 128)
4159                 return false;
4160               if (add_first)
4161                 *gsi = gsi_for_stmt (add_stmt);
4162             }
4163         }
4164     }
4165
4166   if (code == BIT_NOT_EXPR)
4167     *gsi = gsi_for_stmt (cond_stmt);
4168
4169   auto_vec<gimple *, 8> mul_stmts;
4170   if (code == MULT_EXPR && cast_stmt)
4171     {
4172       type = TREE_TYPE (cast_lhs);
4173       gimple *g = SSA_NAME_DEF_STMT (rhs1);
4174       if (gimple_assign_cast_p (g)
4175           && useless_type_conversion_p (type,
4176                                         TREE_TYPE (gimple_assign_rhs1 (g)))
4177           && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4178         rhs1 = gimple_assign_rhs1 (g);
4179       else
4180         {
4181           g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4182           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4183           rhs1 = gimple_assign_lhs (g);
4184           mul_stmts.quick_push (g);
4185         }
4186       if (TREE_CODE (rhs2) == INTEGER_CST)
4187         rhs2 = fold_convert (type, rhs2);
4188       else
4189         {
4190           g = SSA_NAME_DEF_STMT (rhs2);
4191           if (gimple_assign_cast_p (g)
4192               && useless_type_conversion_p (type,
4193                                             TREE_TYPE (gimple_assign_rhs1 (g)))
4194               && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4195             rhs2 = gimple_assign_rhs1 (g);
4196           else
4197             {
4198               g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4199               gsi_insert_before (gsi, g, GSI_SAME_STMT);
4200               rhs2 = gimple_assign_lhs (g);
4201               mul_stmts.quick_push (g);
4202             }
4203         }
4204     }
4205   tree ctype = build_complex_type (type);
4206   gcall *g = gimple_build_call_internal (code == MULT_EXPR
4207                                          ? IFN_MUL_OVERFLOW
4208                                          : code != MINUS_EXPR
4209                                          ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4210                                          2, rhs1, rhs2);
4211   tree ctmp = make_ssa_name (ctype);
4212   gimple_call_set_lhs (g, ctmp);
4213   gsi_insert_before (gsi, g, GSI_SAME_STMT);
4214   tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4215   gassign *g2;
4216   if (code != BIT_NOT_EXPR)
4217     {
4218       g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4219                                 build1 (REALPART_EXPR, type, ctmp));
4220       if (maxval || cast_stmt)
4221         {
4222           gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4223           if (add_first)
4224             *gsi = gsi_for_stmt (stmt);
4225         }
4226       else
4227         gsi_replace (gsi, g2, true);
4228       if (code == MULT_EXPR)
4229         {
4230           mul_stmts.quick_push (g);
4231           mul_stmts.quick_push (g2);
4232           if (cast_stmt)
4233             {
4234               g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4235               gsi_replace (gsi, g2, true);
4236               mul_stmts.quick_push (g2);
4237             }
4238         }
4239     }
4240   tree ovf = make_ssa_name (type);
4241   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4242                             build1 (IMAGPART_EXPR, type, ctmp));
4243   if (code != BIT_NOT_EXPR)
4244     gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4245   else
4246     gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4247   if (code == MULT_EXPR)
4248     mul_stmts.quick_push (g2);
4249
4250   FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4251     {
4252       if (is_gimple_debug (use_stmt))
4253         continue;
4254
4255       gimple *orig_use_stmt = use_stmt;
4256       int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4257                                             maxval, NULL);
4258       if (ovf_use == 0)
4259         {
4260           gcc_assert (code != BIT_NOT_EXPR);
4261           if (maxval)
4262             {
4263               tree use_lhs = gimple_assign_lhs (use_stmt);
4264               gimple_assign_set_rhs1 (use_stmt, new_lhs);
4265               if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4266                                              TREE_TYPE (new_lhs)))
4267                 gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
4268               update_stmt (use_stmt);
4269             }
4270           continue;
4271         }
4272       if (gimple_code (use_stmt) == GIMPLE_COND)
4273         {
4274           gcond *cond_stmt = as_a <gcond *> (use_stmt);
4275           gimple_cond_set_lhs (cond_stmt, ovf);
4276           gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4277           gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4278         }
4279       else
4280         {
4281           gcc_checking_assert (is_gimple_assign (use_stmt));
4282           if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
4283             {
4284               gimple_assign_set_rhs1 (use_stmt, ovf);
4285               gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
4286               gimple_assign_set_rhs_code (use_stmt,
4287                                           ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4288             }
4289           else
4290             {
4291               gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4292                                    == COND_EXPR);
4293               tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4294                                   boolean_type_node, ovf,
4295                                   build_int_cst (type, 0));
4296               gimple_assign_set_rhs1 (use_stmt, cond);
4297             }
4298         }
4299       update_stmt (use_stmt);
4300       if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4301         {
4302           gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4303           maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
4304                                          cfg_changed);
4305           gsi_remove (&gsi2, true);
4306           release_ssa_name (gimple_assign_lhs (orig_use_stmt));
4307         }
4308     }
4309   if (maxval)
4310     {
4311       gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
4312       gsi_remove (&gsi2, true);
4313       if (add_stmt)
4314         {
4315           gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
4316                                            new_lhs);
4317           gsi2 = gsi_for_stmt (add_stmt);
4318           gsi_replace (&gsi2, g, true);
4319         }
4320     }
4321   else if (code == BIT_NOT_EXPR)
4322     {
4323       *gsi = gsi_for_stmt (stmt);
4324       gsi_remove (gsi, true);
4325       release_ssa_name (lhs);
4326       return true;
4327     }
4328   return false;
4329 }
4330
4331 /* Return true if target has support for divmod.  */
4332
4333 static bool
4334 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
4335 {
4336   /* If target supports hardware divmod insn, use it for divmod.  */
4337   if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
4338     return true;
4339
4340   /* Check if libfunc for divmod is available.  */
4341   rtx libfunc = optab_libfunc (divmod_optab, mode);
4342   if (libfunc != NULL_RTX)
4343     {
4344       /* If optab_handler exists for div_optab, perhaps in a wider mode,
4345          we don't want to use the libfunc even if it exists for given mode.  */
4346       machine_mode div_mode;
4347       FOR_EACH_MODE_FROM (div_mode, mode)
4348         if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
4349           return false;
4350
4351       return targetm.expand_divmod_libfunc != NULL;
4352     }
4353
4354   return false;
4355 }
4356
4357 /* Check if stmt is candidate for divmod transform.  */
4358
4359 static bool
4360 divmod_candidate_p (gassign *stmt)
4361 {
4362   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
4363   machine_mode mode = TYPE_MODE (type);
4364   optab divmod_optab, div_optab;
4365
4366   if (TYPE_UNSIGNED (type))
4367     {
4368       divmod_optab = udivmod_optab;
4369       div_optab = udiv_optab;
4370     }
4371   else
4372     {
4373       divmod_optab = sdivmod_optab;
4374       div_optab = sdiv_optab;
4375     }
4376
4377   tree op1 = gimple_assign_rhs1 (stmt);
4378   tree op2 = gimple_assign_rhs2 (stmt);
4379
4380   /* Disable the transform if either is a constant, since division-by-constant
4381      may have specialized expansion.  */
4382   if (CONSTANT_CLASS_P (op1))
4383     return false;
4384
4385   if (CONSTANT_CLASS_P (op2))
4386     {
4387       if (integer_pow2p (op2))
4388         return false;
4389
4390       if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
4391           && TYPE_PRECISION (type) <= BITS_PER_WORD)
4392         return false;
4393
4394       /* If the divisor is not power of 2 and the precision wider than
4395          HWI, expand_divmod punts on that, so in that case it is better
4396          to use divmod optab or libfunc.  Similarly if choose_multiplier
4397          might need pre/post shifts of BITS_PER_WORD or more.  */
4398     }
4399
4400   /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
4401      expand using the [su]divv optabs.  */
4402   if (TYPE_OVERFLOW_TRAPS (type))
4403     return false;
4404
4405   if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
4406     return false;
4407
4408   return true;
4409 }
4410
4411 /* This function looks for:
4412    t1 = a TRUNC_DIV_EXPR b;
4413    t2 = a TRUNC_MOD_EXPR b;
4414    and transforms it to the following sequence:
4415    complex_tmp = DIVMOD (a, b);
4416    t1 = REALPART_EXPR(a);
4417    t2 = IMAGPART_EXPR(b);
4418    For conditions enabling the transform see divmod_candidate_p().
4419
4420    The pass has three parts:
4421    1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4422       other trunc_div_expr and trunc_mod_expr stmts.
4423    2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4424       to stmts vector.
4425    3) Insert DIVMOD call just before top_stmt and update entries in
4426       stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4427       IMAGPART_EXPR for mod).  */
4428
4429 static bool
4430 convert_to_divmod (gassign *stmt)
4431 {
4432   if (stmt_can_throw_internal (cfun, stmt)
4433       || !divmod_candidate_p (stmt))
4434     return false;
4435
4436   tree op1 = gimple_assign_rhs1 (stmt);
4437   tree op2 = gimple_assign_rhs2 (stmt);
4438
4439   imm_use_iterator use_iter;
4440   gimple *use_stmt;
4441   auto_vec<gimple *> stmts;
4442
4443   gimple *top_stmt = stmt;
4444   basic_block top_bb = gimple_bb (stmt);
4445
4446   /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4447      at-least stmt and possibly other trunc_div/trunc_mod stmts
4448      having same operands as stmt.  */
4449
4450   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
4451     {
4452       if (is_gimple_assign (use_stmt)
4453           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4454               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4455           && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
4456           && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
4457         {
4458           if (stmt_can_throw_internal (cfun, use_stmt))
4459             continue;
4460
4461           basic_block bb = gimple_bb (use_stmt);
4462
4463           if (bb == top_bb)
4464             {
4465               if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
4466                 top_stmt = use_stmt;
4467             }
4468           else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
4469             {
4470               top_bb = bb;
4471               top_stmt = use_stmt;
4472             }
4473         }
4474     }
4475
4476   tree top_op1 = gimple_assign_rhs1 (top_stmt);
4477   tree top_op2 = gimple_assign_rhs2 (top_stmt);
4478
4479   stmts.safe_push (top_stmt);
4480   bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
4481
4482   /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4483      to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4484      gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4485      2nd loop ends up adding at-least single trunc_mod_expr stmt.  */
4486
4487   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
4488     {
4489       if (is_gimple_assign (use_stmt)
4490           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4491               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4492           && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
4493           && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
4494         {
4495           if (use_stmt == top_stmt
4496               || stmt_can_throw_internal (cfun, use_stmt)
4497               || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
4498             continue;
4499
4500           stmts.safe_push (use_stmt);
4501           if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
4502             div_seen = true;
4503         }
4504     }
4505
4506   if (!div_seen)
4507     return false;
4508
4509   /* Part 3: Create libcall to internal fn DIVMOD:
4510      divmod_tmp = DIVMOD (op1, op2).  */
4511
4512   gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
4513   tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
4514                                  call_stmt, "divmod_tmp");
4515   gimple_call_set_lhs (call_stmt, res);
4516   /* We rejected throwing statements above.  */
4517   gimple_call_set_nothrow (call_stmt, true);
4518
4519   /* Insert the call before top_stmt.  */
4520   gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
4521   gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
4522
4523   widen_mul_stats.divmod_calls_inserted++;
4524
4525   /* Update all statements in stmts vector:
4526      lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4527      lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>.  */
4528
4529   for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
4530     {
4531       tree new_rhs;
4532
4533       switch (gimple_assign_rhs_code (use_stmt))
4534         {
4535           case TRUNC_DIV_EXPR:
4536             new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
4537             break;
4538
4539           case TRUNC_MOD_EXPR:
4540             new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
4541             break;
4542
4543           default:
4544             gcc_unreachable ();
4545         }
4546
4547       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
4548       gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
4549       update_stmt (use_stmt);
4550     }
4551
4552   return true;
4553 }
4554
4555 /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
4556    its rhs, and try to convert it into a MULT_HIGHPART_EXPR.  The return
4557    value is true iff we converted the statement.  */
4558
4559 static bool
4560 convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi)
4561 {
4562   tree lhs = gimple_assign_lhs (stmt);
4563   tree stype = TREE_TYPE (lhs);
4564   tree sarg0 = gimple_assign_rhs1 (stmt);
4565   tree sarg1 = gimple_assign_rhs2 (stmt);
4566
4567   if (TREE_CODE (stype) != INTEGER_TYPE
4568       || TREE_CODE (sarg1) != INTEGER_CST
4569       || TREE_CODE (sarg0) != SSA_NAME
4570       || !tree_fits_uhwi_p (sarg1)
4571       || !has_single_use (sarg0))
4572     return false;
4573
4574   gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0));
4575   if (!def)
4576     return false;
4577
4578   enum tree_code mcode = gimple_assign_rhs_code (def);
4579   if (mcode == NOP_EXPR)
4580     {
4581       tree tmp = gimple_assign_rhs1 (def);
4582       if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (tmp))
4583         return false;
4584       def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp));
4585       if (!def)
4586         return false;
4587       mcode = gimple_assign_rhs_code (def);
4588     }
4589
4590   if (mcode != WIDEN_MULT_EXPR
4591       || gimple_bb (def) != gimple_bb (stmt))
4592     return false;
4593   tree mtype = TREE_TYPE (gimple_assign_lhs (def));
4594   if (TREE_CODE (mtype) != INTEGER_TYPE
4595       || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
4596     return false;
4597
4598   tree mop1 = gimple_assign_rhs1 (def);
4599   tree mop2 = gimple_assign_rhs2 (def);
4600   tree optype = TREE_TYPE (mop1);
4601   bool unsignedp = TYPE_UNSIGNED (optype);
4602   unsigned int prec = TYPE_PRECISION (optype);
4603
4604   if (unsignedp != TYPE_UNSIGNED (mtype)
4605       || TYPE_PRECISION (mtype) != 2 * prec)
4606     return false;
4607
4608   unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1);
4609   if (bits < prec || bits >= 2 * prec)
4610     return false;
4611
4612   /* For the time being, require operands to have the same sign.  */
4613   if (unsignedp != TYPE_UNSIGNED (TREE_TYPE (mop2)))
4614     return false;
4615
4616   machine_mode mode = TYPE_MODE (optype);
4617   optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
4618   if (optab_handler (tab, mode) == CODE_FOR_nothing)
4619     return false;
4620
4621   location_t loc = gimple_location (stmt);
4622   tree highpart1 = build_and_insert_binop (gsi, loc, "highparttmp",
4623                                            MULT_HIGHPART_EXPR, mop1, mop2);
4624   tree highpart2 = highpart1;
4625   tree ntype = optype;
4626
4627   if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype))
4628     {
4629       ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype)
4630                                     : signed_type_for (optype);
4631       highpart2 = build_and_insert_cast (gsi, loc, ntype, highpart1);
4632     }
4633   if (bits > prec)
4634     highpart2 = build_and_insert_binop (gsi, loc, "highparttmp",
4635                                         RSHIFT_EXPR, highpart2,
4636                                         build_int_cst (ntype, bits - prec));
4637
4638   gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2);
4639   gsi_replace (gsi, new_stmt, true);
4640
4641   widen_mul_stats.highpart_mults_inserted++;
4642   return true;
4643 }
4644
4645 /* If target has spaceship<MODE>3 expander, pattern recognize
4646    <bb 2> [local count: 1073741824]:
4647    if (a_2(D) == b_3(D))
4648      goto <bb 6>; [34.00%]
4649    else
4650      goto <bb 3>; [66.00%]
4651
4652    <bb 3> [local count: 708669601]:
4653    if (a_2(D) < b_3(D))
4654      goto <bb 6>; [1.04%]
4655    else
4656      goto <bb 4>; [98.96%]
4657
4658    <bb 4> [local count: 701299439]:
4659    if (a_2(D) > b_3(D))
4660      goto <bb 5>; [48.89%]
4661    else
4662      goto <bb 6>; [51.11%]
4663
4664    <bb 5> [local count: 342865295]:
4665
4666    <bb 6> [local count: 1073741824]:
4667    and turn it into:
4668    <bb 2> [local count: 1073741824]:
4669    _1 = .SPACESHIP (a_2(D), b_3(D));
4670    if (_1 == 0)
4671      goto <bb 6>; [34.00%]
4672    else
4673      goto <bb 3>; [66.00%]
4674
4675    <bb 3> [local count: 708669601]:
4676    if (_1 == -1)
4677      goto <bb 6>; [1.04%]
4678    else
4679      goto <bb 4>; [98.96%]
4680
4681    <bb 4> [local count: 701299439]:
4682    if (_1 == 1)
4683      goto <bb 5>; [48.89%]
4684    else
4685      goto <bb 6>; [51.11%]
4686
4687    <bb 5> [local count: 342865295]:
4688
4689    <bb 6> [local count: 1073741824]:
4690    so that the backend can emit optimal comparison and
4691    conditional jump sequence.  */
4692
4693 static void
4694 optimize_spaceship (gimple *stmt)
4695 {
4696   enum tree_code code = gimple_cond_code (stmt);
4697   if (code != EQ_EXPR && code != NE_EXPR)
4698     return;
4699   tree arg1 = gimple_cond_lhs (stmt);
4700   tree arg2 = gimple_cond_rhs (stmt);
4701   if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1))
4702       || optab_handler (spaceship_optab,
4703                         TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing
4704       || operand_equal_p (arg1, arg2, 0))
4705     return;
4706
4707   basic_block bb0 = gimple_bb (stmt), bb1, bb2 = NULL;
4708   edge em1 = NULL, e1 = NULL, e2 = NULL;
4709   bb1 = EDGE_SUCC (bb0, 1)->dest;
4710   if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR))
4711     bb1 = EDGE_SUCC (bb0, 0)->dest;
4712
4713   gimple *g = last_stmt (bb1);
4714   if (g == NULL
4715       || gimple_code (g) != GIMPLE_COND
4716       || !single_pred_p (bb1)
4717       || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4718           ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
4719           : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
4720              || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
4721       || !cond_only_block_p (bb1))
4722     return;
4723
4724   enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4725                           ? LT_EXPR : GT_EXPR);
4726   switch (gimple_cond_code (g))
4727     {
4728     case LT_EXPR:
4729     case LE_EXPR:
4730       break;
4731     case GT_EXPR:
4732     case GE_EXPR:
4733       ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR;
4734       break;
4735     default:
4736       return;
4737     }
4738
4739   for (int i = 0; i < 2; ++i)
4740     {
4741       /* With NaNs, </<=/>/>= are false, so we need to look for the
4742          third comparison on the false edge from whatever non-equality
4743          comparison the second comparison is.  */
4744       if (HONOR_NANS (TREE_TYPE (arg1))
4745           && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)
4746         continue;
4747
4748       bb2 = EDGE_SUCC (bb1, i)->dest;
4749       g = last_stmt (bb2);
4750       if (g == NULL
4751           || gimple_code (g) != GIMPLE_COND
4752           || !single_pred_p (bb2)
4753           || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4754               ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
4755               : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
4756                  || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
4757           || !cond_only_block_p (bb2)
4758           || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
4759         continue;
4760
4761       enum tree_code ccode2
4762         = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR);
4763       switch (gimple_cond_code (g))
4764         {
4765         case LT_EXPR:
4766         case LE_EXPR:
4767           break;
4768         case GT_EXPR:
4769         case GE_EXPR:
4770           ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
4771           break;
4772         default:
4773           continue;
4774         }
4775       if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2)
4776         continue;
4777
4778       if ((ccode == LT_EXPR)
4779           ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0))
4780         {
4781           em1 = EDGE_SUCC (bb1, 1 - i);
4782           e1 = EDGE_SUCC (bb2, 0);
4783           e2 = EDGE_SUCC (bb2, 1);
4784           if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0))
4785             std::swap (e1, e2);
4786         }
4787       else
4788         {
4789           e1 = EDGE_SUCC (bb1, 1 - i);
4790           em1 = EDGE_SUCC (bb2, 0);
4791           e2 = EDGE_SUCC (bb2, 1);
4792           if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0))
4793             std::swap (em1, e2);
4794         }
4795       break;
4796     }
4797
4798   if (em1 == NULL)
4799     {
4800       if ((ccode == LT_EXPR)
4801           ^ ((EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0))
4802         {
4803           em1 = EDGE_SUCC (bb1, 1);
4804           e1 = EDGE_SUCC (bb1, 0);
4805           e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
4806         }
4807       else
4808         {
4809           em1 = EDGE_SUCC (bb1, 0);
4810           e1 = EDGE_SUCC (bb1, 1);
4811           e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
4812         }
4813     }
4814
4815   g = gimple_build_call_internal (IFN_SPACESHIP, 2, arg1, arg2);
4816   tree lhs = make_ssa_name (integer_type_node);
4817   gimple_call_set_lhs (g, lhs);
4818   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4819   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4820
4821   gcond *cond = as_a <gcond *> (stmt);
4822   gimple_cond_set_lhs (cond, lhs);
4823   gimple_cond_set_rhs (cond, integer_zero_node);
4824   update_stmt (stmt);
4825
4826   g = last_stmt (bb1);
4827   cond = as_a <gcond *> (g);
4828   gimple_cond_set_lhs (cond, lhs);
4829   if (em1->src == bb1 && e2 != em1)
4830     {
4831       gimple_cond_set_rhs (cond, integer_minus_one_node);
4832       gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE)
4833                                   ? EQ_EXPR : NE_EXPR);
4834     }
4835   else
4836     {
4837       gcc_assert (e1->src == bb1 && e2 != e1);
4838       gimple_cond_set_rhs (cond, integer_one_node);
4839       gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE)
4840                                   ? EQ_EXPR : NE_EXPR);
4841     }
4842   update_stmt (g);
4843
4844   if (e2 != e1 && e2 != em1)
4845     {
4846       g = last_stmt (bb2);
4847       cond = as_a <gcond *> (g);
4848       gimple_cond_set_lhs (cond, lhs);
4849       if (em1->src == bb2)
4850         gimple_cond_set_rhs (cond, integer_minus_one_node);
4851       else
4852         {
4853           gcc_assert (e1->src == bb2);
4854           gimple_cond_set_rhs (cond, integer_one_node);
4855         }
4856       gimple_cond_set_code (cond,
4857                             (e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR);
4858       update_stmt (g);
4859     }
4860
4861   wide_int wm1 = wi::minus_one (TYPE_PRECISION (integer_type_node));
4862   wide_int w2 = wi::two (TYPE_PRECISION (integer_type_node));
4863   value_range vr (TREE_TYPE (lhs), wm1, w2);
4864   set_range_info (lhs, vr);
4865 }
4866
4867
4868 /* Find integer multiplications where the operands are extended from
4869    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4870    or MULT_HIGHPART_EXPR where appropriate.  */
4871
4872 namespace {
4873
4874 const pass_data pass_data_optimize_widening_mul =
4875 {
4876   GIMPLE_PASS, /* type */
4877   "widening_mul", /* name */
4878   OPTGROUP_NONE, /* optinfo_flags */
4879   TV_TREE_WIDEN_MUL, /* tv_id */
4880   PROP_ssa, /* properties_required */
4881   0, /* properties_provided */
4882   0, /* properties_destroyed */
4883   0, /* todo_flags_start */
4884   TODO_update_ssa, /* todo_flags_finish */
4885 };
4886
4887 class pass_optimize_widening_mul : public gimple_opt_pass
4888 {
4889 public:
4890   pass_optimize_widening_mul (gcc::context *ctxt)
4891     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
4892   {}
4893
4894   /* opt_pass methods: */
4895   virtual bool gate (function *)
4896     {
4897       return flag_expensive_optimizations && optimize;
4898     }
4899
4900   virtual unsigned int execute (function *);
4901
4902 }; // class pass_optimize_widening_mul
4903
4904 /* Walker class to perform the transformation in reverse dominance order. */
4905
4906 class math_opts_dom_walker : public dom_walker
4907 {
4908 public:
4909   /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
4910      if walking modidifes the CFG.  */
4911
4912   math_opts_dom_walker (bool *cfg_changed_p)
4913     : dom_walker (CDI_DOMINATORS), m_last_result_set (),
4914       m_cfg_changed_p (cfg_changed_p) {}
4915
4916   /* The actual actions performed in the walk.  */
4917
4918   virtual void after_dom_children (basic_block);
4919
4920   /* Set of results of chains of multiply and add statement combinations that
4921      were not transformed into FMAs because of active deferring.  */
4922   hash_set<tree> m_last_result_set;
4923
4924   /* Pointer to a flag of the user that needs to be set if CFG has been
4925      modified.  */
4926   bool *m_cfg_changed_p;
4927 };
4928
4929 void
4930 math_opts_dom_walker::after_dom_children (basic_block bb)
4931 {
4932   gimple_stmt_iterator gsi;
4933
4934   fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
4935
4936   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
4937     {
4938       gimple *stmt = gsi_stmt (gsi);
4939       enum tree_code code;
4940
4941       if (is_gimple_assign (stmt))
4942         {
4943           code = gimple_assign_rhs_code (stmt);
4944           switch (code)
4945             {
4946             case MULT_EXPR:
4947               if (!convert_mult_to_widen (stmt, &gsi)
4948                   && !convert_expand_mult_copysign (stmt, &gsi)
4949                   && convert_mult_to_fma (stmt,
4950                                           gimple_assign_rhs1 (stmt),
4951                                           gimple_assign_rhs2 (stmt),
4952                                           &fma_state))
4953                 {
4954                   gsi_remove (&gsi, true);
4955                   release_defs (stmt);
4956                   continue;
4957                 }
4958               match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4959               break;
4960
4961             case PLUS_EXPR:
4962             case MINUS_EXPR:
4963               if (!convert_plusminus_to_widen (&gsi, stmt, code))
4964                 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4965               break;
4966
4967             case BIT_NOT_EXPR:
4968               if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
4969                 continue;
4970               break;
4971
4972             case TRUNC_MOD_EXPR:
4973               convert_to_divmod (as_a<gassign *> (stmt));
4974               break;
4975
4976             case RSHIFT_EXPR:
4977               convert_mult_to_highpart (as_a<gassign *> (stmt), &gsi);
4978               break;
4979
4980             default:;
4981             }
4982         }
4983       else if (is_gimple_call (stmt))
4984         {
4985           switch (gimple_call_combined_fn (stmt))
4986             {
4987             CASE_CFN_POW:
4988               if (gimple_call_lhs (stmt)
4989                   && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
4990                   && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
4991                                  &dconst2)
4992                   && convert_mult_to_fma (stmt,
4993                                           gimple_call_arg (stmt, 0),
4994                                           gimple_call_arg (stmt, 0),
4995                                           &fma_state))
4996                 {
4997                   unlink_stmt_vdef (stmt);
4998                   if (gsi_remove (&gsi, true)
4999                       && gimple_purge_dead_eh_edges (bb))
5000                     *m_cfg_changed_p = true;
5001                   release_defs (stmt);
5002                   continue;
5003                 }
5004               break;
5005
5006             case CFN_COND_MUL:
5007               if (convert_mult_to_fma (stmt,
5008                                        gimple_call_arg (stmt, 1),
5009                                        gimple_call_arg (stmt, 2),
5010                                        &fma_state,
5011                                        gimple_call_arg (stmt, 0)))
5012
5013                 {
5014                   gsi_remove (&gsi, true);
5015                   release_defs (stmt);
5016                   continue;
5017                 }
5018               break;
5019
5020             case CFN_LAST:
5021               cancel_fma_deferring (&fma_state);
5022               break;
5023
5024             default:
5025               break;
5026             }
5027         }
5028       else if (gimple_code (stmt) == GIMPLE_COND)
5029         optimize_spaceship (stmt);
5030       gsi_next (&gsi);
5031     }
5032   if (fma_state.m_deferring_p
5033       && fma_state.m_initial_phi)
5034     {
5035       gcc_checking_assert (fma_state.m_last_result);
5036       if (!last_fma_candidate_feeds_initial_phi (&fma_state,
5037                                                  &m_last_result_set))
5038         cancel_fma_deferring (&fma_state);
5039       else
5040         m_last_result_set.add (fma_state.m_last_result);
5041     }
5042 }
5043
5044
5045 unsigned int
5046 pass_optimize_widening_mul::execute (function *fun)
5047 {
5048   bool cfg_changed = false;
5049
5050   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
5051   calculate_dominance_info (CDI_DOMINATORS);
5052   renumber_gimple_stmt_uids (cfun);
5053
5054   math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5055
5056   statistics_counter_event (fun, "widening multiplications inserted",
5057                             widen_mul_stats.widen_mults_inserted);
5058   statistics_counter_event (fun, "widening maccs inserted",
5059                             widen_mul_stats.maccs_inserted);
5060   statistics_counter_event (fun, "fused multiply-adds inserted",
5061                             widen_mul_stats.fmas_inserted);
5062   statistics_counter_event (fun, "divmod calls inserted",
5063                             widen_mul_stats.divmod_calls_inserted);
5064   statistics_counter_event (fun, "highpart multiplications inserted",
5065                             widen_mul_stats.highpart_mults_inserted);
5066
5067   return cfg_changed ? TODO_cleanup_cfg : 0;
5068 }
5069
5070 } // anon namespace
5071
5072 gimple_opt_pass *
5073 make_pass_optimize_widening_mul (gcc::context *ctxt)
5074 {
5075   return new pass_optimize_widening_mul (ctxt);
5076 }