gcc/tree-ssa-math-opts.c

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005-2021 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  21    operations.  These are common in sequences such as this one:
  22
  23         modulus = sqrt(x*x + y*y + z*z);
  24         x = x / modulus;
  25         y = y / modulus;
  26         z = z / modulus;
  27
  28    that can be optimized to
  29
  30         modulus = sqrt(x*x + y*y + z*z);
  31         rmodulus = 1.0 / modulus;
  32         x = x * rmodulus;
  33         y = y * rmodulus;
  34         z = z * rmodulus;
  35
  36    We do this for loop invariant divisors, and with this pass whenever
  37    we notice that a division has the same divisor multiple times.
  38
  39    Of course, like in PRE, we don't insert a division if a dominator
  40    already has one.  However, this cannot be done as an extension of
  41    PRE for several reasons.
  42
  43    First of all, with some experiments it was found out that the
  44    transformation is not always useful if there are only two divisions
  45    by the same divisor.  This is probably because modern processors
  46    can pipeline the divisions; on older, in-order processors it should
  47    still be effective to optimize two divisions by the same number.
  48    We make this a param, and it shall be called N in the remainder of
  49    this comment.
  50
  51    Second, if trapping math is active, we have less freedom on where
  52    to insert divisions: we can only do so in basic blocks that already
  53    contain one.  (If divisions don't trap, instead, we can insert
  54    divisions elsewhere, which will be in blocks that are common dominators
  55    of those that have the division).
  56
  57    We really don't want to compute the reciprocal unless a division will
  58    be found.  To do this, we won't insert the division in a basic block
  59    that has less than N divisions *post-dominating* it.
  60
  61    The algorithm constructs a subset of the dominator tree, holding the
  62    blocks containing the divisions and the common dominators to them,
  63    and walk it twice.  The first walk is in post-order, and it annotates
  64    each block with the number of divisions that post-dominate it: this
  65    gives information on where divisions can be inserted profitably.
  66    The second walk is in pre-order, and it inserts divisions as explained
  67    above, and replaces divisions by multiplications.
  68
  69    In the best case, the cost of the pass is O(n_statements).  In the
  70    worst-case, the cost is due to creating the dominator tree subset,
  71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  72    for n_statements / n_basic_blocks statements.  So, the amortized cost
  73    of creating the dominator tree subset is O(n_basic_blocks) and the
  74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  75
  76    More practically, the cost will be small because there are few
  77    divisions, and they tend to be in the same basic block, so insert_bb
  78    is called very few times.
  79
  80    If we did this using domwalk.c, an efficient implementation would have
  81    to work on all the variables in a single pass, because we could not
  82    work on just a subset of the dominator tree, as we do now, and the
  83    cost would also be something like O(n_statements * n_basic_blocks).
  84    The data structures would be more complex in order to work on all the
  85    variables in a single pass.  */
  86
  87 #include "config.h"
  88 #include "system.h"
  89 #include "coretypes.h"
  90 #include "backend.h"
  91 #include "target.h"
  92 #include "rtl.h"
  93 #include "tree.h"
  94 #include "gimple.h"
  95 #include "predict.h"
  96 #include "alloc-pool.h"
  97 #include "tree-pass.h"
  98 #include "ssa.h"
  99 #include "optabs-tree.h"
 100 #include "gimple-pretty-print.h"
 101 #include "alias.h"
 102 #include "fold-const.h"
 103 #include "gimple-fold.h"
 104 #include "gimple-iterator.h"
 105 #include "gimplify.h"
 106 #include "gimplify-me.h"
 107 #include "stor-layout.h"
 108 #include "tree-cfg.h"
 109 #include "tree-dfa.h"
 110 #include "tree-ssa.h"
 111 #include "builtins.h"
 112 #include "internal-fn.h"
 113 #include "case-cfn-macros.h"
 114 #include "optabs-libfuncs.h"
 115 #include "tree-eh.h"
 116 #include "targhooks.h"
 117 #include "domwalk.h"
 118
 119 /* This structure represents one basic block that either computes a
 120    division, or is a common dominator for basic block that compute a
 121    division.  */
 122 struct occurrence {
 123   /* The basic block represented by this structure.  */
 124   basic_block bb = basic_block();
 125
 126   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 127      inserted in BB.  */
 128   tree recip_def = tree();
 129
 130   /* If non-NULL, the SSA_NAME holding the definition for a squared
 131      reciprocal inserted in BB.  */
 132   tree square_recip_def = tree();
 133
 134   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 135      was inserted in BB.  */
 136   gimple *recip_def_stmt = nullptr;
 137
 138   /* Pointer to a list of "struct occurrence"s for blocks dominated
 139      by BB.  */
 140   struct occurrence *children = nullptr;
 141
 142   /* Pointer to the next "struct occurrence"s in the list of blocks
 143      sharing a common dominator.  */
 144   struct occurrence *next = nullptr;
 145
 146   /* The number of divisions that are in BB before compute_merit.  The
 147      number of divisions that are in BB or post-dominate it after
 148      compute_merit.  */
 149   int num_divisions = 0;
 150
 151   /* True if the basic block has a division, false if it is a common
 152      dominator for basic blocks that do.  If it is false and trapping
 153      math is active, BB is not a candidate for inserting a reciprocal.  */
 154   bool bb_has_division = false;
 155
 156   /* Construct a struct occurrence for basic block BB, and whose
 157      children list is headed by CHILDREN.  */
 158   occurrence (basic_block bb, struct occurrence *children)
 159   : bb (bb), children (children)
 160   {
 161     bb->aux = this;
 162   }
 163
 164   /* Destroy a struct occurrence and remove it from its basic block.  */
 165   ~occurrence ()
 166   {
 167     bb->aux = nullptr;
 168   }
 169
 170   /* Allocate memory for a struct occurrence from OCC_POOL.  */
 171   static void* operator new (size_t);
 172
 173   /* Return memory for a struct occurrence to OCC_POOL.  */
 174   static void operator delete (void*, size_t);
 175 };
 176
 177 static struct
 178 {
 179   /* Number of 1.0/X ops inserted.  */
 180   int rdivs_inserted;
 181
 182   /* Number of 1.0/FUNC ops inserted.  */
 183   int rfuncs_inserted;
 184 } reciprocal_stats;
 185
 186 static struct
 187 {
 188   /* Number of cexpi calls inserted.  */
 189   int inserted;
 190
 191   /* Number of conversions removed.  */
 192   int conv_removed;
 193
 194 } sincos_stats;
 195
 196 static struct
 197 {
 198   /* Number of widening multiplication ops inserted.  */
 199   int widen_mults_inserted;
 200
 201   /* Number of integer multiply-and-accumulate ops inserted.  */
 202   int maccs_inserted;
 203
 204   /* Number of fp fused multiply-add ops inserted.  */
 205   int fmas_inserted;
 206
 207   /* Number of divmod calls inserted.  */
 208   int divmod_calls_inserted;
 209 } widen_mul_stats;
 210
 211 /* The instance of "struct occurrence" representing the highest
 212    interesting block in the dominator tree.  */
 213 static struct occurrence *occ_head;
 214
 215 /* Allocation pool for getting instances of "struct occurrence".  */
 216 static object_allocator<occurrence> *occ_pool;
 217
 218 void* occurrence::operator new (size_t n)
 219 {
 220   gcc_assert (n == sizeof(occurrence));
 221   return occ_pool->allocate_raw ();
 222 }
 223
 224 void occurrence::operator delete (void *occ, size_t n)
 225 {
 226   gcc_assert (n == sizeof(occurrence));
 227   occ_pool->remove_raw (occ);
 228 }
 229
 230 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 231    list of "struct occurrence"s, one per basic block, having IDOM as
 232    their common dominator.
 233
 234    We try to insert NEW_OCC as deep as possible in the tree, and we also
 235    insert any other block that is a common dominator for BB and one
 236    block already in the tree.  */
 237
 238 static void
 239 insert_bb (struct occurrence *new_occ, basic_block idom,
 240            struct occurrence **p_head)
 241 {
 242   struct occurrence *occ, **p_occ;
 243
 244   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 245     {
 246       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 247       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 248       if (dom == bb)
 249         {
 250           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 251              from its list.  */
 252           *p_occ = occ->next;
 253           occ->next = new_occ->children;
 254           new_occ->children = occ;
 255
 256           /* Try the next block (it may as well be dominated by BB).  */
 257         }
 258
 259       else if (dom == occ_bb)
 260         {
 261           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 262           insert_bb (new_occ, dom, &occ->children);
 263           return;
 264         }
 265
 266       else if (dom != idom)
 267         {
 268           gcc_assert (!dom->aux);
 269
 270           /* There is a dominator between IDOM and BB, add it and make
 271              two children out of NEW_OCC and OCC.  First, remove OCC from
 272              its list.  */
 273           *p_occ = occ->next;
 274           new_occ->next = occ;
 275           occ->next = NULL;
 276
 277           /* None of the previous blocks has DOM as a dominator: if we tail
 278              recursed, we would reexamine them uselessly. Just switch BB with
 279              DOM, and go on looking for blocks dominated by DOM.  */
 280           new_occ = new occurrence (dom, new_occ);
 281         }
 282
 283       else
 284         {
 285           /* Nothing special, go on with the next element.  */
 286           p_occ = &occ->next;
 287         }
 288     }
 289
 290   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 291   new_occ->next = *p_head;
 292   *p_head = new_occ;
 293 }
 294
 295 /* Register that we found a division in BB.
 296    IMPORTANCE is a measure of how much weighting to give
 297    that division.  Use IMPORTANCE = 2 to register a single
 298    division.  If the division is going to be found multiple
 299    times use 1 (as it is with squares).  */
 300
 301 static inline void
 302 register_division_in (basic_block bb, int importance)
 303 {
 304   struct occurrence *occ;
 305
 306   occ = (struct occurrence *) bb->aux;
 307   if (!occ)
 308     {
 309       occ = new occurrence (bb, NULL);
 310       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
 311     }
 312
 313   occ->bb_has_division = true;
 314   occ->num_divisions += importance;
 315 }
 316
 317
 318 /* Compute the number of divisions that postdominate each block in OCC and
 319    its children.  */
 320
 321 static void
 322 compute_merit (struct occurrence *occ)
 323 {
 324   struct occurrence *occ_child;
 325   basic_block dom = occ->bb;
 326
 327   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 328     {
 329       basic_block bb;
 330       if (occ_child->children)
 331         compute_merit (occ_child);
 332
 333       if (flag_exceptions)
 334         bb = single_noncomplex_succ (dom);
 335       else
 336         bb = dom;
 337
 338       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 339         occ->num_divisions += occ_child->num_divisions;
 340     }
 341 }
 342
 343
 344 /* Return whether USE_STMT is a floating-point division by DEF.  */
 345 static inline bool
 346 is_division_by (gimple *use_stmt, tree def)
 347 {
 348   return is_gimple_assign (use_stmt)
 349          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 350          && gimple_assign_rhs2 (use_stmt) == def
 351          /* Do not recognize x / x as valid division, as we are getting
 352             confused later by replacing all immediate uses x in such
 353             a stmt.  */
 354          && gimple_assign_rhs1 (use_stmt) != def
 355          && !stmt_can_throw_internal (cfun, use_stmt);
 356 }
 357
 358 /* Return TRUE if USE_STMT is a multiplication of DEF by A.  */
 359 static inline bool
 360 is_mult_by (gimple *use_stmt, tree def, tree a)
 361 {
 362   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 363       && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
 364     {
 365       tree op0 = gimple_assign_rhs1 (use_stmt);
 366       tree op1 = gimple_assign_rhs2 (use_stmt);
 367
 368       return (op0 == def && op1 == a)
 369               || (op0 == a && op1 == def);
 370     }
 371   return 0;
 372 }
 373
 374 /* Return whether USE_STMT is DEF * DEF.  */
 375 static inline bool
 376 is_square_of (gimple *use_stmt, tree def)
 377 {
 378   return is_mult_by (use_stmt, def, def);
 379 }
 380
 381 /* Return whether USE_STMT is a floating-point division by
 382    DEF * DEF.  */
 383 static inline bool
 384 is_division_by_square (gimple *use_stmt, tree def)
 385 {
 386   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 387       && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 388       && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
 389       && !stmt_can_throw_internal (cfun, use_stmt))
 390     {
 391       tree denominator = gimple_assign_rhs2 (use_stmt);
 392       if (TREE_CODE (denominator) == SSA_NAME)
 393         return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
 394     }
 395   return 0;
 396 }
 397
 398 /* Walk the subset of the dominator tree rooted at OCC, setting the
 399    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 400    the given basic block.  The field may be left NULL, of course,
 401    if it is not possible or profitable to do the optimization.
 402
 403    DEF_BSI is an iterator pointing at the statement defining DEF.
 404    If RECIP_DEF is set, a dominator already has a computation that can
 405    be used.
 406
 407    If should_insert_square_recip is set, then this also inserts
 408    the square of the reciprocal immediately after the definition
 409    of the reciprocal.  */
 410
 411 static void
 412 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 413                     tree def, tree recip_def, tree square_recip_def,
 414                     int should_insert_square_recip, int threshold)
 415 {
 416   tree type;
 417   gassign *new_stmt, *new_square_stmt;
 418   gimple_stmt_iterator gsi;
 419   struct occurrence *occ_child;
 420
 421   if (!recip_def
 422       && (occ->bb_has_division || !flag_trapping_math)
 423       /* Divide by two as all divisions are counted twice in
 424          the costing loop.  */
 425       && occ->num_divisions / 2 >= threshold)
 426     {
 427       /* Make a variable with the replacement and substitute it.  */
 428       type = TREE_TYPE (def);
 429       recip_def = create_tmp_reg (type, "reciptmp");
 430       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
 431                                       build_one_cst (type), def);
 432
 433       if (should_insert_square_recip)
 434         {
 435           square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
 436           new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
 437                                                  recip_def, recip_def);
 438         }
 439
 440       if (occ->bb_has_division)
 441         {
 442           /* Case 1: insert before an existing division.  */
 443           gsi = gsi_after_labels (occ->bb);
 444           while (!gsi_end_p (gsi)
 445                  && (!is_division_by (gsi_stmt (gsi), def))
 446                  && (!is_division_by_square (gsi_stmt (gsi), def)))
 447             gsi_next (&gsi);
 448
 449           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 450           if (should_insert_square_recip)
 451             gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
 452         }
 453       else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
 454         {
 455           /* Case 2: insert right after the definition.  Note that this will
 456              never happen if the definition statement can throw, because in
 457              that case the sole successor of the statement's basic block will
 458              dominate all the uses as well.  */
 459           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 460           if (should_insert_square_recip)
 461             gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
 462         }
 463       else
 464         {
 465           /* Case 3: insert in a basic block not containing defs/uses.  */
 466           gsi = gsi_after_labels (occ->bb);
 467           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 468           if (should_insert_square_recip)
 469             gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
 470         }
 471
 472       reciprocal_stats.rdivs_inserted++;
 473
 474       occ->recip_def_stmt = new_stmt;
 475     }
 476
 477   occ->recip_def = recip_def;
 478   occ->square_recip_def = square_recip_def;
 479   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 480     insert_reciprocals (def_gsi, occ_child, def, recip_def,
 481                         square_recip_def, should_insert_square_recip,
 482                         threshold);
 483 }
 484
 485 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
 486    Take as argument the use for (x * x).  */
 487 static inline void
 488 replace_reciprocal_squares (use_operand_p use_p)
 489 {
 490   gimple *use_stmt = USE_STMT (use_p);
 491   basic_block bb = gimple_bb (use_stmt);
 492   struct occurrence *occ = (struct occurrence *) bb->aux;
 493
 494   if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
 495       && occ->recip_def)
 496     {
 497       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 498       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 499       gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
 500       SET_USE (use_p, occ->square_recip_def);
 501       fold_stmt_inplace (&gsi);
 502       update_stmt (use_stmt);
 503     }
 504 }
 505
 506
 507 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 508    possible.  */
 509
 510 static inline void
 511 replace_reciprocal (use_operand_p use_p)
 512 {
 513   gimple *use_stmt = USE_STMT (use_p);
 514   basic_block bb = gimple_bb (use_stmt);
 515   struct occurrence *occ = (struct occurrence *) bb->aux;
 516
 517   if (optimize_bb_for_speed_p (bb)
 518       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 519     {
 520       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 521       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 522       SET_USE (use_p, occ->recip_def);
 523       fold_stmt_inplace (&gsi);
 524       update_stmt (use_stmt);
 525     }
 526 }
 527
 528
 529 /* Free OCC and return one more "struct occurrence" to be freed.  */
 530
 531 static struct occurrence *
 532 free_bb (struct occurrence *occ)
 533 {
 534   struct occurrence *child, *next;
 535
 536   /* First get the two pointers hanging off OCC.  */
 537   next = occ->next;
 538   child = occ->children;
 539   delete occ;
 540
 541   /* Now ensure that we don't recurse unless it is necessary.  */
 542   if (!child)
 543     return next;
 544   else
 545     {
 546       while (next)
 547         next = free_bb (next);
 548
 549       return child;
 550     }
 551 }
 552
 553 /* Transform sequences like
 554    t = sqrt (a)
 555    x = 1.0 / t;
 556    r1 = x * x;
 557    r2 = a * x;
 558    into:
 559    t = sqrt (a)
 560    r1 = 1.0 / a;
 561    r2 = t;
 562    x = r1 * r2;
 563    depending on the uses of x, r1, r2.  This removes one multiplication and
 564    allows the sqrt and division operations to execute in parallel.
 565    DEF_GSI is the gsi of the initial division by sqrt that defines
 566    DEF (x in the example above).  */
 567
 568 static void
 569 optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
 570 {
 571   gimple *use_stmt;
 572   imm_use_iterator use_iter;
 573   gimple *stmt = gsi_stmt (*def_gsi);
 574   tree x = def;
 575   tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
 576   tree div_rhs1 = gimple_assign_rhs1 (stmt);
 577
 578   if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
 579       || TREE_CODE (div_rhs1) != REAL_CST
 580       || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
 581     return;
 582
 583   gcall *sqrt_stmt
 584     = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
 585
 586   if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
 587     return;
 588
 589   switch (gimple_call_combined_fn (sqrt_stmt))
 590     {
 591     CASE_CFN_SQRT:
 592     CASE_CFN_SQRT_FN:
 593       break;
 594
 595     default:
 596       return;
 597     }
 598   tree a = gimple_call_arg (sqrt_stmt, 0);
 599
 600   /* We have 'a' and 'x'.  Now analyze the uses of 'x'.  */
 601
 602   /* Statements that use x in x * x.  */
 603   auto_vec<gimple *> sqr_stmts;
 604   /* Statements that use x in a * x.  */
 605   auto_vec<gimple *> mult_stmts;
 606   bool has_other_use = false;
 607   bool mult_on_main_path = false;
 608
 609   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
 610     {
 611       if (is_gimple_debug (use_stmt))
 612         continue;
 613       if (is_square_of (use_stmt, x))
 614         {
 615           sqr_stmts.safe_push (use_stmt);
 616           if (gimple_bb (use_stmt) == gimple_bb (stmt))
 617             mult_on_main_path = true;
 618         }
 619       else if (is_mult_by (use_stmt, x, a))
 620         {
 621           mult_stmts.safe_push (use_stmt);
 622           if (gimple_bb (use_stmt) == gimple_bb (stmt))
 623             mult_on_main_path = true;
 624         }
 625       else
 626         has_other_use = true;
 627     }
 628
 629   /* In the x * x and a * x cases we just rewire stmt operands or
 630      remove multiplications.  In the has_other_use case we introduce
 631      a multiplication so make sure we don't introduce a multiplication
 632      on a path where there was none.  */
 633   if (has_other_use && !mult_on_main_path)
 634     return;
 635
 636   if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
 637     return;
 638
 639   /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
 640      to be able to compose it from the sqr and mult cases.  */
 641   if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
 642     return;
 643
 644   if (dump_file)
 645     {
 646       fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
 647       print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
 648       print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 649       fprintf (dump_file, "\n");
 650     }
 651
 652   bool delete_div = !has_other_use;
 653   tree sqr_ssa_name = NULL_TREE;
 654   if (!sqr_stmts.is_empty ())
 655     {
 656       /* r1 = x * x.  Transform the original
 657          x = 1.0 / t
 658          into
 659          tmp1 = 1.0 / a
 660          r1 = tmp1.  */
 661
 662       sqr_ssa_name
 663         = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
 664
 665       if (dump_file)
 666         {
 667           fprintf (dump_file, "Replacing original division\n");
 668           print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 669           fprintf (dump_file, "with new division\n");
 670         }
 671       stmt
 672         = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
 673                                gimple_assign_rhs1 (stmt), a);
 674       gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
 675       gsi_remove (def_gsi, true);
 676       *def_gsi = gsi_for_stmt (stmt);
 677       fold_stmt_inplace (def_gsi);
 678       update_stmt (stmt);
 679
 680       if (dump_file)
 681         print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 682
 683       delete_div = false;
 684       gimple *sqr_stmt;
 685       unsigned int i;
 686       FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
 687         {
 688           gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
 689           gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
 690           update_stmt (sqr_stmt);
 691         }
 692     }
 693   if (!mult_stmts.is_empty ())
 694     {
 695       /* r2 = a * x.  Transform this into:
 696          r2 = t (The original sqrt (a)).  */
 697       unsigned int i;
 698       gimple *mult_stmt = NULL;
 699       FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
 700         {
 701           gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
 702
 703           if (dump_file)
 704             {
 705               fprintf (dump_file, "Replacing squaring multiplication\n");
 706               print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
 707               fprintf (dump_file, "with assignment\n");
 708             }
 709           gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
 710           fold_stmt_inplace (&gsi2);
 711           update_stmt (mult_stmt);
 712           if (dump_file)
 713             print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
 714       }
 715     }
 716
 717   if (has_other_use)
 718     {
 719       /* Using the two temporaries tmp1, tmp2 from above
 720          the original x is now:
 721          x = tmp1 * tmp2.  */
 722       gcc_assert (orig_sqrt_ssa_name);
 723       gcc_assert (sqr_ssa_name);
 724
 725       gimple *new_stmt
 726         = gimple_build_assign (x, MULT_EXPR,
 727                                orig_sqrt_ssa_name, sqr_ssa_name);
 728       gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 729       update_stmt (stmt);
 730     }
 731   else if (delete_div)
 732     {
 733       /* Remove the original division.  */
 734       gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
 735       gsi_remove (&gsi2, true);
 736       release_defs (stmt);
 737     }
 738   else
 739     release_ssa_name (x);
 740 }
 741
 742 /* Look for floating-point divisions among DEF's uses, and try to
 743    replace them by multiplications with the reciprocal.  Add
 744    as many statements computing the reciprocal as needed.
 745
 746    DEF must be a GIMPLE register of a floating-point type.  */
 747
 748 static void
 749 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 750 {
 751   use_operand_p use_p, square_use_p;
 752   imm_use_iterator use_iter, square_use_iter;
 753   tree square_def;
 754   struct occurrence *occ;
 755   int count = 0;
 756   int threshold;
 757   int square_recip_count = 0;
 758   int sqrt_recip_count = 0;
 759
 760   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
 761   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 762
 763   /* If DEF is a square (x * x), count the number of divisions by x.
 764      If there are more divisions by x than by (DEF * DEF), prefer to optimize
 765      the reciprocal of x instead of DEF.  This improves cases like:
 766        def = x * x
 767        t0 = a / def
 768        t1 = b / def
 769        t2 = c / x
 770      Reciprocal optimization of x results in 1 division rather than 2 or 3.  */
 771   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
 772
 773   if (is_gimple_assign (def_stmt)
 774       && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
 775       && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
 776       && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
 777     {
 778       tree op0 = gimple_assign_rhs1 (def_stmt);
 779
 780       FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
 781         {
 782           gimple *use_stmt = USE_STMT (use_p);
 783           if (is_division_by (use_stmt, op0))
 784             sqrt_recip_count++;
 785         }
 786     }
 787
 788   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 789     {
 790       gimple *use_stmt = USE_STMT (use_p);
 791       if (is_division_by (use_stmt, def))
 792         {
 793           register_division_in (gimple_bb (use_stmt), 2);
 794           count++;
 795         }
 796
 797       if (is_square_of (use_stmt, def))
 798         {
 799           square_def = gimple_assign_lhs (use_stmt);
 800           FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
 801             {
 802               gimple *square_use_stmt = USE_STMT (square_use_p);
 803               if (is_division_by (square_use_stmt, square_def))
 804                 {
 805                   /* This is executed twice for each division by a square.  */
 806                   register_division_in (gimple_bb (square_use_stmt), 1);
 807                   square_recip_count++;
 808                 }
 809             }
 810         }
 811     }
 812
 813   /* Square reciprocals were counted twice above.  */
 814   square_recip_count /= 2;
 815
 816   /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x).  */
 817   if (sqrt_recip_count > square_recip_count)
 818     goto out;
 819
 820   /* Do the expensive part only if we can hope to optimize something.  */
 821   if (count + square_recip_count >= threshold && count >= 1)
 822     {
 823       gimple *use_stmt;
 824       for (occ = occ_head; occ; occ = occ->next)
 825         {
 826           compute_merit (occ);
 827           insert_reciprocals (def_gsi, occ, def, NULL, NULL,
 828                               square_recip_count, threshold);
 829         }
 830
 831       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 832         {
 833           if (is_division_by (use_stmt, def))
 834             {
 835               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 836                 replace_reciprocal (use_p);
 837             }
 838           else if (square_recip_count > 0 && is_square_of (use_stmt, def))
 839             {
 840               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 841                 {
 842                   /* Find all uses of the square that are divisions and
 843                    * replace them by multiplications with the inverse.  */
 844                   imm_use_iterator square_iterator;
 845                   gimple *powmult_use_stmt = USE_STMT (use_p);
 846                   tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
 847
 848                   FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
 849                                          square_iterator, powmult_def_name)
 850                     FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
 851                       {
 852                         gimple *powmult_use_stmt = USE_STMT (square_use_p);
 853                         if (is_division_by (powmult_use_stmt, powmult_def_name))
 854                           replace_reciprocal_squares (square_use_p);
 855                       }
 856                 }
 857             }
 858         }
 859     }
 860
 861 out:
 862   for (occ = occ_head; occ; )
 863     occ = free_bb (occ);
 864
 865   occ_head = NULL;
 866 }
 867
 868 /* Return an internal function that implements the reciprocal of CALL,
 869    or IFN_LAST if there is no such function that the target supports.  */
 870
 871 internal_fn
 872 internal_fn_reciprocal (gcall *call)
 873 {
 874   internal_fn ifn;
 875
 876   switch (gimple_call_combined_fn (call))
 877     {
 878     CASE_CFN_SQRT:
 879     CASE_CFN_SQRT_FN:
 880       ifn = IFN_RSQRT;
 881       break;
 882
 883     default:
 884       return IFN_LAST;
 885     }
 886
 887   tree_pair types = direct_internal_fn_types (ifn, call);
 888   if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
 889     return IFN_LAST;
 890
 891   return ifn;
 892 }
 893
 894 /* Go through all the floating-point SSA_NAMEs, and call
 895    execute_cse_reciprocals_1 on each of them.  */
 896 namespace {
 897
 898 const pass_data pass_data_cse_reciprocals =
 899 {
 900   GIMPLE_PASS, /* type */
 901   "recip", /* name */
 902   OPTGROUP_NONE, /* optinfo_flags */
 903   TV_TREE_RECIP, /* tv_id */
 904   PROP_ssa, /* properties_required */
 905   0, /* properties_provided */
 906   0, /* properties_destroyed */
 907   0, /* todo_flags_start */
 908   TODO_update_ssa, /* todo_flags_finish */
 909 };
 910
 911 class pass_cse_reciprocals : public gimple_opt_pass
 912 {
 913 public:
 914   pass_cse_reciprocals (gcc::context *ctxt)
 915     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
 916   {}
 917
 918   /* opt_pass methods: */
 919   virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
 920   virtual unsigned int execute (function *);
 921
 922 }; // class pass_cse_reciprocals
 923
 924 unsigned int
 925 pass_cse_reciprocals::execute (function *fun)
 926 {
 927   basic_block bb;
 928   tree arg;
 929
 930   occ_pool = new object_allocator<occurrence> ("dominators for recip");
 931
 932   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 933   calculate_dominance_info (CDI_DOMINATORS);
 934   calculate_dominance_info (CDI_POST_DOMINATORS);
 935
 936   if (flag_checking)
 937     FOR_EACH_BB_FN (bb, fun)
 938       gcc_assert (!bb->aux);
 939
 940   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
 941     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 942         && is_gimple_reg (arg))
 943       {
 944         tree name = ssa_default_def (fun, arg);
 945         if (name)
 946           execute_cse_reciprocals_1 (NULL, name);
 947       }
 948
 949   FOR_EACH_BB_FN (bb, fun)
 950     {
 951       tree def;
 952
 953       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 954            gsi_next (&gsi))
 955         {
 956           gphi *phi = gsi.phi ();
 957           def = PHI_RESULT (phi);
 958           if (! virtual_operand_p (def)
 959               && FLOAT_TYPE_P (TREE_TYPE (def)))
 960             execute_cse_reciprocals_1 (NULL, def);
 961         }
 962
 963       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 964            gsi_next (&gsi))
 965         {
 966           gimple *stmt = gsi_stmt (gsi);
 967
 968           if (gimple_has_lhs (stmt)
 969               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 970               && FLOAT_TYPE_P (TREE_TYPE (def))
 971               && TREE_CODE (def) == SSA_NAME)
 972             {
 973               execute_cse_reciprocals_1 (&gsi, def);
 974               stmt = gsi_stmt (gsi);
 975               if (flag_unsafe_math_optimizations
 976                   && is_gimple_assign (stmt)
 977                   && gimple_assign_lhs (stmt) == def
 978                   && !stmt_can_throw_internal (cfun, stmt)
 979                   && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 980                 optimize_recip_sqrt (&gsi, def);
 981             }
 982         }
 983
 984       if (optimize_bb_for_size_p (bb))
 985         continue;
 986
 987       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 988       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 989            gsi_next (&gsi))
 990         {
 991           gimple *stmt = gsi_stmt (gsi);
 992
 993           if (is_gimple_assign (stmt)
 994               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 995             {
 996               tree arg1 = gimple_assign_rhs2 (stmt);
 997               gimple *stmt1;
 998
 999               if (TREE_CODE (arg1) != SSA_NAME)
1000                 continue;
1001
1002               stmt1 = SSA_NAME_DEF_STMT (arg1);
1003
1004               if (is_gimple_call (stmt1)
1005                   && gimple_call_lhs (stmt1))
1006                 {
1007                   bool fail;
1008                   imm_use_iterator ui;
1009                   use_operand_p use_p;
1010                   tree fndecl = NULL_TREE;
1011
1012                   gcall *call = as_a <gcall *> (stmt1);
1013                   internal_fn ifn = internal_fn_reciprocal (call);
1014                   if (ifn == IFN_LAST)
1015                     {
1016                       fndecl = gimple_call_fndecl (call);
1017                       if (!fndecl
1018                           || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1019                         continue;
1020                       fndecl = targetm.builtin_reciprocal (fndecl);
1021                       if (!fndecl)
1022                         continue;
1023                     }
1024
1025                   /* Check that all uses of the SSA name are divisions,
1026                      otherwise replacing the defining statement will do
1027                      the wrong thing.  */
1028                   fail = false;
1029                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1030                     {
1031                       gimple *stmt2 = USE_STMT (use_p);
1032                       if (is_gimple_debug (stmt2))
1033                         continue;
1034                       if (!is_gimple_assign (stmt2)
1035                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1036                           || gimple_assign_rhs1 (stmt2) == arg1
1037                           || gimple_assign_rhs2 (stmt2) != arg1)
1038                         {
1039                           fail = true;
1040                           break;
1041                         }
1042                     }
1043                   if (fail)
1044                     continue;
1045
1046                   gimple_replace_ssa_lhs (call, arg1);
1047                   if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1048                     {
1049                       auto_vec<tree, 4> args;
1050                       for (unsigned int i = 0;
1051                            i < gimple_call_num_args (call); i++)
1052                         args.safe_push (gimple_call_arg (call, i));
1053                       gcall *stmt2;
1054                       if (ifn == IFN_LAST)
1055                         stmt2 = gimple_build_call_vec (fndecl, args);
1056                       else
1057                         stmt2 = gimple_build_call_internal_vec (ifn, args);
1058                       gimple_call_set_lhs (stmt2, arg1);
1059                       gimple_move_vops (stmt2, call);
1060                       gimple_call_set_nothrow (stmt2,
1061                                                gimple_call_nothrow_p (call));
1062                       gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1063                       gsi_replace (&gsi2, stmt2, true);
1064                     }
1065                   else
1066                     {
1067                       if (ifn == IFN_LAST)
1068                         gimple_call_set_fndecl (call, fndecl);
1069                       else
1070                         gimple_call_set_internal_fn (call, ifn);
1071                       update_stmt (call);
1072                     }
1073                   reciprocal_stats.rfuncs_inserted++;
1074
1075                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1076                     {
1077                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1078                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1079                       fold_stmt_inplace (&gsi);
1080                       update_stmt (stmt);
1081                     }
1082                 }
1083             }
1084         }
1085     }
1086
1087   statistics_counter_event (fun, "reciprocal divs inserted",
1088                             reciprocal_stats.rdivs_inserted);
1089   statistics_counter_event (fun, "reciprocal functions inserted",
1090                             reciprocal_stats.rfuncs_inserted);
1091
1092   free_dominance_info (CDI_DOMINATORS);
1093   free_dominance_info (CDI_POST_DOMINATORS);
1094   delete occ_pool;
1095   return 0;
1096 }
1097
1098 } // anon namespace
1099
1100 gimple_opt_pass *
1101 make_pass_cse_reciprocals (gcc::context *ctxt)
1102 {
1103   return new pass_cse_reciprocals (ctxt);
1104 }
1105
1106 /* If NAME is the result of a type conversion, look for other
1107    equivalent dominating or dominated conversions, and replace all
1108    uses with the earliest dominating name, removing the redundant
1109    conversions.  Return the prevailing name.  */
1110
1111 static tree
1112 execute_cse_conv_1 (tree name)
1113 {
1114   if (SSA_NAME_IS_DEFAULT_DEF (name)
1115       || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1116     return name;
1117
1118   gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1119
1120   if (!gimple_assign_cast_p (def_stmt))
1121     return name;
1122
1123   tree src = gimple_assign_rhs1 (def_stmt);
1124
1125   if (TREE_CODE (src) != SSA_NAME)
1126     return name;
1127
1128   imm_use_iterator use_iter;
1129   gimple *use_stmt;
1130
1131   /* Find the earliest dominating def.    */
1132   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1133     {
1134       if (use_stmt == def_stmt
1135           || !gimple_assign_cast_p (use_stmt))
1136         continue;
1137
1138       tree lhs = gimple_assign_lhs (use_stmt);
1139
1140       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1141           || (gimple_assign_rhs1 (use_stmt)
1142               != gimple_assign_rhs1 (def_stmt))
1143           || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1144         continue;
1145
1146       bool use_dominates;
1147       if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1148         {
1149           gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1150           while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1151             gsi_next (&gsi);
1152           use_dominates = !gsi_end_p (gsi);
1153         }
1154       else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1155                                gimple_bb (def_stmt)))
1156         use_dominates = false;
1157       else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1158                                gimple_bb (use_stmt)))
1159         use_dominates = true;
1160       else
1161         continue;
1162
1163       if (use_dominates)
1164         {
1165           std::swap (name, lhs);
1166           std::swap (def_stmt, use_stmt);
1167         }
1168     }
1169
1170   /* Now go through all uses of SRC again, replacing the equivalent
1171      dominated conversions.  We may replace defs that were not
1172      dominated by the then-prevailing defs when we first visited
1173      them.  */
1174   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1175     {
1176       if (use_stmt == def_stmt
1177           || !gimple_assign_cast_p (use_stmt))
1178         continue;
1179
1180       tree lhs = gimple_assign_lhs (use_stmt);
1181
1182       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1183           || (gimple_assign_rhs1 (use_stmt)
1184               != gimple_assign_rhs1 (def_stmt))
1185           || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1186         continue;
1187
1188       if (gimple_bb (def_stmt) == gimple_bb (use_stmt)
1189           || dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1190                              gimple_bb (def_stmt)))
1191         {
1192           sincos_stats.conv_removed++;
1193
1194           gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1195           replace_uses_by (lhs, name);
1196           gsi_remove (&gsi, true);
1197         }
1198     }
1199
1200   return name;
1201 }
1202
1203 /* Records an occurrence at statement USE_STMT in the vector of trees
1204    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1205    is not yet initialized.  Returns true if the occurrence was pushed on
1206    the vector.  Adjusts *TOP_BB to be the basic block dominating all
1207    statements in the vector.  */
1208
1209 static bool
1210 maybe_record_sincos (vec<gimple *> *stmts,
1211                      basic_block *top_bb, gimple *use_stmt)
1212 {
1213   basic_block use_bb = gimple_bb (use_stmt);
1214   if (*top_bb
1215       && (*top_bb == use_bb
1216           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1217     stmts->safe_push (use_stmt);
1218   else if (!*top_bb
1219            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1220     {
1221       stmts->safe_push (use_stmt);
1222       *top_bb = use_bb;
1223     }
1224   else
1225     return false;
1226
1227   return true;
1228 }
1229
1230 /* Look for sin, cos and cexpi calls with the same argument NAME and
1231    create a single call to cexpi CSEing the result in this case.
1232    We first walk over all immediate uses of the argument collecting
1233    statements that we can CSE in a vector and in a second pass replace
1234    the statement rhs with a REALPART or IMAGPART expression on the
1235    result of the cexpi call we insert before the use statement that
1236    dominates all other candidates.  */
1237
1238 static bool
1239 execute_cse_sincos_1 (tree name)
1240 {
1241   gimple_stmt_iterator gsi;
1242   imm_use_iterator use_iter;
1243   tree fndecl, res, type = NULL_TREE;
1244   gimple *def_stmt, *use_stmt, *stmt;
1245   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1246   auto_vec<gimple *> stmts;
1247   basic_block top_bb = NULL;
1248   int i;
1249   bool cfg_changed = false;
1250
1251   name = execute_cse_conv_1 (name);
1252
1253   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1254     {
1255       if (gimple_code (use_stmt) != GIMPLE_CALL
1256           || !gimple_call_lhs (use_stmt))
1257         continue;
1258
1259       switch (gimple_call_combined_fn (use_stmt))
1260         {
1261         CASE_CFN_COS:
1262           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1263           break;
1264
1265         CASE_CFN_SIN:
1266           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1267           break;
1268
1269         CASE_CFN_CEXPI:
1270           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1271           break;
1272
1273         default:;
1274           continue;
1275         }
1276
1277       tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt));
1278       if (!type)
1279         {
1280           type = t;
1281           t = TREE_TYPE (name);
1282         }
1283       /* This checks that NAME has the right type in the first round,
1284          and, in subsequent rounds, that the built_in type is the same
1285          type, or a compatible type.  */
1286       if (type != t && !types_compatible_p (type, t))
1287         RETURN_FROM_IMM_USE_STMT (use_iter, false);
1288     }
1289   if (seen_cos + seen_sin + seen_cexpi <= 1)
1290     return false;
1291
1292   /* Simply insert cexpi at the beginning of top_bb but not earlier than
1293      the name def statement.  */
1294   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1295   if (!fndecl)
1296     return false;
1297   stmt = gimple_build_call (fndecl, 1, name);
1298   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1299   gimple_call_set_lhs (stmt, res);
1300
1301   def_stmt = SSA_NAME_DEF_STMT (name);
1302   if (!SSA_NAME_IS_DEFAULT_DEF (name)
1303       && gimple_code (def_stmt) != GIMPLE_PHI
1304       && gimple_bb (def_stmt) == top_bb)
1305     {
1306       gsi = gsi_for_stmt (def_stmt);
1307       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1308     }
1309   else
1310     {
1311       gsi = gsi_after_labels (top_bb);
1312       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1313     }
1314   sincos_stats.inserted++;
1315
1316   /* And adjust the recorded old call sites.  */
1317   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1318     {
1319       tree rhs = NULL;
1320
1321       switch (gimple_call_combined_fn (use_stmt))
1322         {
1323         CASE_CFN_COS:
1324           rhs = fold_build1 (REALPART_EXPR, type, res);
1325           break;
1326
1327         CASE_CFN_SIN:
1328           rhs = fold_build1 (IMAGPART_EXPR, type, res);
1329           break;
1330
1331         CASE_CFN_CEXPI:
1332           rhs = res;
1333           break;
1334
1335         default:;
1336           gcc_unreachable ();
1337         }
1338
1339         /* Replace call with a copy.  */
1340         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1341
1342         gsi = gsi_for_stmt (use_stmt);
1343         gsi_replace (&gsi, stmt, true);
1344         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1345           cfg_changed = true;
1346     }
1347
1348   return cfg_changed;
1349 }
1350
1351 /* To evaluate powi(x,n), the floating point value x raised to the
1352    constant integer exponent n, we use a hybrid algorithm that
1353    combines the "window method" with look-up tables.  For an
1354    introduction to exponentiation algorithms and "addition chains",
1355    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1356    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1357    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1358    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
1359
1360 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1361    multiplications to inline before calling the system library's pow
1362    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1363    so this default never requires calling pow, powf or powl.  */
1364
1365 #ifndef POWI_MAX_MULTS
1366 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
1367 #endif
1368
1369 /* The size of the "optimal power tree" lookup table.  All
1370    exponents less than this value are simply looked up in the
1371    powi_table below.  This threshold is also used to size the
1372    cache of pseudo registers that hold intermediate results.  */
1373 #define POWI_TABLE_SIZE 256
1374
1375 /* The size, in bits of the window, used in the "window method"
1376    exponentiation algorithm.  This is equivalent to a radix of
1377    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
1378 #define POWI_WINDOW_SIZE 3
1379
1380 /* The following table is an efficient representation of an
1381    "optimal power tree".  For each value, i, the corresponding
1382    value, j, in the table states than an optimal evaluation
1383    sequence for calculating pow(x,i) can be found by evaluating
1384    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
1385    100 integers is given in Knuth's "Seminumerical algorithms".  */
1386
1387 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1388   {
1389       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
1390       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
1391       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
1392      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
1393      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
1394      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
1395      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
1396      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
1397      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
1398      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
1399      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
1400      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
1401      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
1402      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
1403      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
1404      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
1405      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
1406      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
1407      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
1408      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
1409      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
1410      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
1411      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
1412      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
1413      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
1414     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
1415     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
1416     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
1417     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
1418     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
1419     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
1420     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
1421   };
1422
1423
1424 /* Return the number of multiplications required to calculate
1425    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
1426    subroutine of powi_cost.  CACHE is an array indicating
1427    which exponents have already been calculated.  */
1428
1429 static int
1430 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1431 {
1432   /* If we've already calculated this exponent, then this evaluation
1433      doesn't require any additional multiplications.  */
1434   if (cache[n])
1435     return 0;
1436
1437   cache[n] = true;
1438   return powi_lookup_cost (n - powi_table[n], cache)
1439          + powi_lookup_cost (powi_table[n], cache) + 1;
1440 }
1441
1442 /* Return the number of multiplications required to calculate
1443    powi(x,n) for an arbitrary x, given the exponent N.  This
1444    function needs to be kept in sync with powi_as_mults below.  */
1445
1446 static int
1447 powi_cost (HOST_WIDE_INT n)
1448 {
1449   bool cache[POWI_TABLE_SIZE];
1450   unsigned HOST_WIDE_INT digit;
1451   unsigned HOST_WIDE_INT val;
1452   int result;
1453
1454   if (n == 0)
1455     return 0;
1456
1457   /* Ignore the reciprocal when calculating the cost.  */
1458   val = (n < 0) ? -n : n;
1459
1460   /* Initialize the exponent cache.  */
1461   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1462   cache[1] = true;
1463
1464   result = 0;
1465
1466   while (val >= POWI_TABLE_SIZE)
1467     {
1468       if (val & 1)
1469         {
1470           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1471           result += powi_lookup_cost (digit, cache)
1472                     + POWI_WINDOW_SIZE + 1;
1473           val >>= POWI_WINDOW_SIZE;
1474         }
1475       else
1476         {
1477           val >>= 1;
1478           result++;
1479         }
1480     }
1481
1482   return result + powi_lookup_cost (val, cache);
1483 }
1484
1485 /* Recursive subroutine of powi_as_mults.  This function takes the
1486    array, CACHE, of already calculated exponents and an exponent N and
1487    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
1488
1489 static tree
1490 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1491                  HOST_WIDE_INT n, tree *cache)
1492 {
1493   tree op0, op1, ssa_target;
1494   unsigned HOST_WIDE_INT digit;
1495   gassign *mult_stmt;
1496
1497   if (n < POWI_TABLE_SIZE && cache[n])
1498     return cache[n];
1499
1500   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1501
1502   if (n < POWI_TABLE_SIZE)
1503     {
1504       cache[n] = ssa_target;
1505       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1506       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1507     }
1508   else if (n & 1)
1509     {
1510       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1511       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1512       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1513     }
1514   else
1515     {
1516       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1517       op1 = op0;
1518     }
1519
1520   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1521   gimple_set_location (mult_stmt, loc);
1522   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1523
1524   return ssa_target;
1525 }
1526
1527 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1528    This function needs to be kept in sync with powi_cost above.  */
1529
1530 static tree
1531 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1532                tree arg0, HOST_WIDE_INT n)
1533 {
1534   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1535   gassign *div_stmt;
1536   tree target;
1537
1538   if (n == 0)
1539     return build_real (type, dconst1);
1540
1541   memset (cache, 0,  sizeof (cache));
1542   cache[1] = arg0;
1543
1544   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1545   if (n >= 0)
1546     return result;
1547
1548   /* If the original exponent was negative, reciprocate the result.  */
1549   target = make_temp_ssa_name (type, NULL, "powmult");
1550   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1551                                   build_real (type, dconst1), result);
1552   gimple_set_location (div_stmt, loc);
1553   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1554
1555   return target;
1556 }
1557
1558 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1559    location info LOC.  If the arguments are appropriate, create an
1560    equivalent sequence of statements prior to GSI using an optimal
1561    number of multiplications, and return an expession holding the
1562    result.  */
1563
1564 static tree
1565 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1566                             tree arg0, HOST_WIDE_INT n)
1567 {
1568   /* Avoid largest negative number.  */
1569   if (n != -n
1570       && ((n >= -1 && n <= 2)
1571           || (optimize_function_for_speed_p (cfun)
1572               && powi_cost (n) <= POWI_MAX_MULTS)))
1573     return powi_as_mults (gsi, loc, arg0, n);
1574
1575   return NULL_TREE;
1576 }
1577
1578 /* Build a gimple call statement that calls FN with argument ARG.
1579    Set the lhs of the call statement to a fresh SSA name.  Insert the
1580    statement prior to GSI's current position, and return the fresh
1581    SSA name.  */
1582
1583 static tree
1584 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1585                        tree fn, tree arg)
1586 {
1587   gcall *call_stmt;
1588   tree ssa_target;
1589
1590   call_stmt = gimple_build_call (fn, 1, arg);
1591   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1592   gimple_set_lhs (call_stmt, ssa_target);
1593   gimple_set_location (call_stmt, loc);
1594   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1595
1596   return ssa_target;
1597 }
1598
1599 /* Build a gimple binary operation with the given CODE and arguments
1600    ARG0, ARG1, assigning the result to a new SSA name for variable
1601    TARGET.  Insert the statement prior to GSI's current position, and
1602    return the fresh SSA name.*/
1603
1604 static tree
1605 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1606                         const char *name, enum tree_code code,
1607                         tree arg0, tree arg1)
1608 {
1609   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1610   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1611   gimple_set_location (stmt, loc);
1612   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1613   return result;
1614 }
1615
1616 /* Build a gimple reference operation with the given CODE and argument
1617    ARG, assigning the result to a new SSA name of TYPE with NAME.
1618    Insert the statement prior to GSI's current position, and return
1619    the fresh SSA name.  */
1620
1621 static inline tree
1622 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1623                       const char *name, enum tree_code code, tree arg0)
1624 {
1625   tree result = make_temp_ssa_name (type, NULL, name);
1626   gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1627   gimple_set_location (stmt, loc);
1628   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1629   return result;
1630 }
1631
1632 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1633    prior to GSI's current position, and return the fresh SSA name.  */
1634
1635 static tree
1636 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1637                        tree type, tree val)
1638 {
1639   tree result = make_ssa_name (type);
1640   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1641   gimple_set_location (stmt, loc);
1642   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1643   return result;
1644 }
1645
1646 struct pow_synth_sqrt_info
1647 {
1648   bool *factors;
1649   unsigned int deepest;
1650   unsigned int num_mults;
1651 };
1652
1653 /* Return true iff the real value C can be represented as a
1654    sum of powers of 0.5 up to N.  That is:
1655    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1656    Record in INFO the various parameters of the synthesis algorithm such
1657    as the factors a[i], the maximum 0.5 power and the number of
1658    multiplications that will be required.  */
1659
1660 bool
1661 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1662                                  struct pow_synth_sqrt_info *info)
1663 {
1664   REAL_VALUE_TYPE factor = dconsthalf;
1665   REAL_VALUE_TYPE remainder = c;
1666
1667   info->deepest = 0;
1668   info->num_mults = 0;
1669   memset (info->factors, 0, n * sizeof (bool));
1670
1671   for (unsigned i = 0; i < n; i++)
1672     {
1673       REAL_VALUE_TYPE res;
1674
1675       /* If something inexact happened bail out now.  */
1676       if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1677         return false;
1678
1679       /* We have hit zero.  The number is representable as a sum
1680          of powers of 0.5.  */
1681       if (real_equal (&res, &dconst0))
1682         {
1683           info->factors[i] = true;
1684           info->deepest = i + 1;
1685           return true;
1686         }
1687       else if (!REAL_VALUE_NEGATIVE (res))
1688         {
1689           remainder = res;
1690           info->factors[i] = true;
1691           info->num_mults++;
1692         }
1693       else
1694         info->factors[i] = false;
1695
1696       real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1697     }
1698   return false;
1699 }
1700
1701 /* Return the tree corresponding to FN being applied
1702    to ARG N times at GSI and LOC.
1703    Look up previous results from CACHE if need be.
1704    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1705
1706 static tree
1707 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1708               tree fn, location_t loc, tree *cache)
1709 {
1710   tree res = cache[n];
1711   if (!res)
1712     {
1713       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1714       res = build_and_insert_call (gsi, loc, fn, prev);
1715       cache[n] = res;
1716     }
1717
1718   return res;
1719 }
1720
1721 /* Print to STREAM the repeated application of function FNAME to ARG
1722    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1723    "foo (foo (x))".  */
1724
1725 static void
1726 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1727                  unsigned int n)
1728 {
1729   if (n == 0)
1730     fprintf (stream, "%s", arg);
1731   else
1732     {
1733       fprintf (stream, "%s (", fname);
1734       print_nested_fn (stream, fname, arg, n - 1);
1735       fprintf (stream, ")");
1736     }
1737 }
1738
1739 /* Print to STREAM the fractional sequence of sqrt chains
1740    applied to ARG, described by INFO.  Used for the dump file.  */
1741
1742 static void
1743 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1744                                 struct pow_synth_sqrt_info *info)
1745 {
1746   for (unsigned int i = 0; i < info->deepest; i++)
1747     {
1748       bool is_set = info->factors[i];
1749       if (is_set)
1750         {
1751           print_nested_fn (stream, "sqrt", arg, i + 1);
1752           if (i != info->deepest - 1)
1753             fprintf (stream, " * ");
1754         }
1755     }
1756 }
1757
1758 /* Print to STREAM a representation of raising ARG to an integer
1759    power N.  Used for the dump file.  */
1760
1761 static void
1762 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1763 {
1764   if (n > 1)
1765     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1766   else if (n == 1)
1767     fprintf (stream, "%s", arg);
1768 }
1769
1770 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1771    square roots.  Place at GSI and LOC.  Limit the maximum depth
1772    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1773    result of the expanded sequence or NULL_TREE if the expansion failed.
1774
1775    This routine assumes that ARG1 is a real number with a fractional part
1776    (the integer exponent case will have been handled earlier in
1777    gimple_expand_builtin_pow).
1778
1779    For ARG1 > 0.0:
1780    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1781      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1782                     FRAC_PART == ARG1 - WHOLE_PART:
1783      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1784      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1785      if it can be expressed as such, that is if FRAC_PART satisfies:
1786      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1787      where integer a[i] is either 0 or 1.
1788
1789      Example:
1790      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1791        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1792
1793    For ARG1 < 0.0 there are two approaches:
1794    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1795          is calculated as above.
1796
1797      Example:
1798      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1799        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1800
1801    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1802            FRAC_PART  := ARG1 - WHOLE_PART
1803      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1804      Example:
1805      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1806        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1807
1808    For ARG1 < 0.0 we choose between (A) and (B) depending on
1809    how many multiplications we'd have to do.
1810    So, for the example in (B): POW (x, -5.875), if we were to
1811    follow algorithm (A) we would produce:
1812    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1813    which contains more multiplications than approach (B).
1814
1815    Hopefully, this approach will eliminate potentially expensive POW library
1816    calls when unsafe floating point math is enabled and allow the compiler to
1817    further optimise the multiplies, square roots and divides produced by this
1818    function.  */
1819
1820 static tree
1821 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1822                      tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1823 {
1824   tree type = TREE_TYPE (arg0);
1825   machine_mode mode = TYPE_MODE (type);
1826   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1827   bool one_over = true;
1828
1829   if (!sqrtfn)
1830     return NULL_TREE;
1831
1832   if (TREE_CODE (arg1) != REAL_CST)
1833     return NULL_TREE;
1834
1835   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1836
1837   gcc_assert (max_depth > 0);
1838   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1839
1840   struct pow_synth_sqrt_info synth_info;
1841   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1842   synth_info.deepest = 0;
1843   synth_info.num_mults = 0;
1844
1845   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1846   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1847
1848   /* The whole and fractional parts of exp.  */
1849   REAL_VALUE_TYPE whole_part;
1850   REAL_VALUE_TYPE frac_part;
1851
1852   real_floor (&whole_part, mode, &exp);
1853   real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1854
1855
1856   REAL_VALUE_TYPE ceil_whole = dconst0;
1857   REAL_VALUE_TYPE ceil_fract = dconst0;
1858
1859   if (neg_exp)
1860     {
1861       real_ceil (&ceil_whole, mode, &exp);
1862       real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1863     }
1864
1865   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1866     return NULL_TREE;
1867
1868   /* Check whether it's more profitable to not use 1.0 / ...  */
1869   if (neg_exp)
1870     {
1871       struct pow_synth_sqrt_info alt_synth_info;
1872       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1873       alt_synth_info.deepest = 0;
1874       alt_synth_info.num_mults = 0;
1875
1876       if (representable_as_half_series_p (ceil_fract, max_depth,
1877                                            &alt_synth_info)
1878           && alt_synth_info.deepest <= synth_info.deepest
1879           && alt_synth_info.num_mults < synth_info.num_mults)
1880         {
1881           whole_part = ceil_whole;
1882           frac_part = ceil_fract;
1883           synth_info.deepest = alt_synth_info.deepest;
1884           synth_info.num_mults = alt_synth_info.num_mults;
1885           memcpy (synth_info.factors, alt_synth_info.factors,
1886                   (max_depth + 1) * sizeof (bool));
1887           one_over = false;
1888         }
1889     }
1890
1891   HOST_WIDE_INT n = real_to_integer (&whole_part);
1892   REAL_VALUE_TYPE cint;
1893   real_from_integer (&cint, VOIDmode, n, SIGNED);
1894
1895   if (!real_identical (&whole_part, &cint))
1896     return NULL_TREE;
1897
1898   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1899     return NULL_TREE;
1900
1901   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1902
1903   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1904
1905   /* Calculate the integer part of the exponent.  */
1906   if (n > 1)
1907     {
1908       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1909       if (!integer_res)
1910         return NULL_TREE;
1911     }
1912
1913   if (dump_file)
1914     {
1915       char string[64];
1916
1917       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1918       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1919
1920       if (neg_exp)
1921         {
1922           if (one_over)
1923             {
1924               fprintf (dump_file, "1.0 / (");
1925               dump_integer_part (dump_file, "x", n);
1926               if (n > 0)
1927                 fprintf (dump_file, " * ");
1928               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1929               fprintf (dump_file, ")");
1930             }
1931           else
1932             {
1933               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1934               fprintf (dump_file, " / (");
1935               dump_integer_part (dump_file, "x", n);
1936               fprintf (dump_file, ")");
1937             }
1938         }
1939       else
1940         {
1941           dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1942           if (n > 0)
1943             fprintf (dump_file, " * ");
1944           dump_integer_part (dump_file, "x", n);
1945         }
1946
1947       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1948     }
1949
1950
1951   tree fract_res = NULL_TREE;
1952   cache[0] = arg0;
1953
1954   /* Calculate the fractional part of the exponent.  */
1955   for (unsigned i = 0; i < synth_info.deepest; i++)
1956     {
1957       if (synth_info.factors[i])
1958         {
1959           tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1960
1961           if (!fract_res)
1962               fract_res = sqrt_chain;
1963
1964           else
1965             fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1966                                            fract_res, sqrt_chain);
1967         }
1968     }
1969
1970   tree res = NULL_TREE;
1971
1972   if (neg_exp)
1973     {
1974       if (one_over)
1975         {
1976           if (n > 0)
1977             res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1978                                            fract_res, integer_res);
1979           else
1980             res = fract_res;
1981
1982           res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1983                                           build_real (type, dconst1), res);
1984         }
1985       else
1986         {
1987           res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1988                                          fract_res, integer_res);
1989         }
1990     }
1991   else
1992     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1993                                    fract_res, integer_res);
1994   return res;
1995 }
1996
1997 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1998    with location info LOC.  If possible, create an equivalent and
1999    less expensive sequence of statements prior to GSI, and return an
2000    expession holding the result.  */
2001
2002 static tree
2003 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2004                            tree arg0, tree arg1)
2005 {
2006   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2007   REAL_VALUE_TYPE c2, dconst3;
2008   HOST_WIDE_INT n;
2009   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2010   machine_mode mode;
2011   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2012   bool hw_sqrt_exists, c_is_int, c2_is_int;
2013
2014   dconst1_4 = dconst1;
2015   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2016
2017   /* If the exponent isn't a constant, there's nothing of interest
2018      to be done.  */
2019   if (TREE_CODE (arg1) != REAL_CST)
2020     return NULL_TREE;
2021
2022   /* Don't perform the operation if flag_signaling_nans is on
2023      and the operand is a signaling NaN.  */
2024   if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2025       && ((TREE_CODE (arg0) == REAL_CST
2026            && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2027           || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2028     return NULL_TREE;
2029
2030   /* If the exponent is equivalent to an integer, expand to an optimal
2031      multiplication sequence when profitable.  */
2032   c = TREE_REAL_CST (arg1);
2033   n = real_to_integer (&c);
2034   real_from_integer (&cint, VOIDmode, n, SIGNED);
2035   c_is_int = real_identical (&c, &cint);
2036
2037   if (c_is_int
2038       && ((n >= -1 && n <= 2)
2039           || (flag_unsafe_math_optimizations
2040               && speed_p
2041               && powi_cost (n) <= POWI_MAX_MULTS)))
2042     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2043
2044   /* Attempt various optimizations using sqrt and cbrt.  */
2045   type = TREE_TYPE (arg0);
2046   mode = TYPE_MODE (type);
2047   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2048
2049   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
2050      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
2051      sqrt(-0) = -0.  */
2052   if (sqrtfn
2053       && real_equal (&c, &dconsthalf)
2054       && !HONOR_SIGNED_ZEROS (mode))
2055     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2056
2057   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2058
2059   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
2060      optimizations since 1./3. is not exactly representable.  If x
2061      is negative and finite, the correct value of pow(x,1./3.) is
2062      a NaN with the "invalid" exception raised, because the value
2063      of 1./3. actually has an even denominator.  The correct value
2064      of cbrt(x) is a negative real value.  */
2065   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2066   dconst1_3 = real_value_truncate (mode, dconst_third ());
2067
2068   if (flag_unsafe_math_optimizations
2069       && cbrtfn
2070       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2071       && real_equal (&c, &dconst1_3))
2072     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2073
2074   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
2075      if we don't have a hardware sqrt insn.  */
2076   dconst1_6 = dconst1_3;
2077   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2078
2079   if (flag_unsafe_math_optimizations
2080       && sqrtfn
2081       && cbrtfn
2082       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2083       && speed_p
2084       && hw_sqrt_exists
2085       && real_equal (&c, &dconst1_6))
2086     {
2087       /* sqrt(x)  */
2088       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2089
2090       /* cbrt(sqrt(x))  */
2091       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2092     }
2093
2094
2095   /* Attempt to expand the POW as a product of square root chains.
2096      Expand the 0.25 case even when otpimising for size.  */
2097   if (flag_unsafe_math_optimizations
2098       && sqrtfn
2099       && hw_sqrt_exists
2100       && (speed_p || real_equal (&c, &dconst1_4))
2101       && !HONOR_SIGNED_ZEROS (mode))
2102     {
2103       unsigned int max_depth = speed_p
2104                                 ? param_max_pow_sqrt_depth
2105                                 : 2;
2106
2107       tree expand_with_sqrts
2108         = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2109
2110       if (expand_with_sqrts)
2111         return expand_with_sqrts;
2112     }
2113
2114   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2115   n = real_to_integer (&c2);
2116   real_from_integer (&cint, VOIDmode, n, SIGNED);
2117   c2_is_int = real_identical (&c2, &cint);
2118
2119   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2120
2121      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
2122      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
2123
2124      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
2125      different from pow(x, 1./3.) due to rounding and behavior with
2126      negative x, we need to constrain this transformation to unsafe
2127      math and positive x or finite math.  */
2128   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2129   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2130   real_round (&c2, mode, &c2);
2131   n = real_to_integer (&c2);
2132   real_from_integer (&cint, VOIDmode, n, SIGNED);
2133   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2134   real_convert (&c2, mode, &c2);
2135
2136   if (flag_unsafe_math_optimizations
2137       && cbrtfn
2138       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2139       && real_identical (&c2, &c)
2140       && !c2_is_int
2141       && optimize_function_for_speed_p (cfun)
2142       && powi_cost (n / 3) <= POWI_MAX_MULTS)
2143     {
2144       tree powi_x_ndiv3 = NULL_TREE;
2145
2146       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
2147          possible or profitable, give up.  Skip the degenerate case when
2148          abs(n) < 3, where the result is always 1.  */
2149       if (absu_hwi (n) >= 3)
2150         {
2151           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2152                                                      abs_hwi (n / 3));
2153           if (!powi_x_ndiv3)
2154             return NULL_TREE;
2155         }
2156
2157       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
2158          as that creates an unnecessary variable.  Instead, just produce
2159          either cbrt(x) or cbrt(x) * cbrt(x).  */
2160       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2161
2162       if (absu_hwi (n) % 3 == 1)
2163         powi_cbrt_x = cbrt_x;
2164       else
2165         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2166                                               cbrt_x, cbrt_x);
2167
2168       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
2169       if (absu_hwi (n) < 3)
2170         result = powi_cbrt_x;
2171       else
2172         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2173                                          powi_x_ndiv3, powi_cbrt_x);
2174
2175       /* If n is negative, reciprocate the result.  */
2176       if (n < 0)
2177         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2178                                          build_real (type, dconst1), result);
2179
2180       return result;
2181     }
2182
2183   /* No optimizations succeeded.  */
2184   return NULL_TREE;
2185 }
2186
2187 /* ARG is the argument to a cabs builtin call in GSI with location info
2188    LOC.  Create a sequence of statements prior to GSI that calculates
2189    sqrt(R*R + I*I), where R and I are the real and imaginary components
2190    of ARG, respectively.  Return an expression holding the result.  */
2191
2192 static tree
2193 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2194 {
2195   tree real_part, imag_part, addend1, addend2, sum, result;
2196   tree type = TREE_TYPE (TREE_TYPE (arg));
2197   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2198   machine_mode mode = TYPE_MODE (type);
2199
2200   if (!flag_unsafe_math_optimizations
2201       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2202       || !sqrtfn
2203       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2204     return NULL_TREE;
2205
2206   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2207                                     REALPART_EXPR, arg);
2208   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2209                                     real_part, real_part);
2210   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2211                                     IMAGPART_EXPR, arg);
2212   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2213                                     imag_part, imag_part);
2214   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2215   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2216
2217   return result;
2218 }
2219
2220 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2221    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
2222    an optimal number of multiplies, when n is a constant.  */
2223
2224 namespace {
2225
2226 const pass_data pass_data_cse_sincos =
2227 {
2228   GIMPLE_PASS, /* type */
2229   "sincos", /* name */
2230   OPTGROUP_NONE, /* optinfo_flags */
2231   TV_TREE_SINCOS, /* tv_id */
2232   PROP_ssa, /* properties_required */
2233   PROP_gimple_opt_math, /* properties_provided */
2234   0, /* properties_destroyed */
2235   0, /* todo_flags_start */
2236   TODO_update_ssa, /* todo_flags_finish */
2237 };
2238
2239 class pass_cse_sincos : public gimple_opt_pass
2240 {
2241 public:
2242   pass_cse_sincos (gcc::context *ctxt)
2243     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2244   {}
2245
2246   /* opt_pass methods: */
2247   virtual bool gate (function *)
2248     {
2249       /* We no longer require either sincos or cexp, since powi expansion
2250          piggybacks on this pass.  */
2251       return optimize;
2252     }
2253
2254   virtual unsigned int execute (function *);
2255
2256 }; // class pass_cse_sincos
2257
2258 unsigned int
2259 pass_cse_sincos::execute (function *fun)
2260 {
2261   basic_block bb;
2262   bool cfg_changed = false;
2263
2264   calculate_dominance_info (CDI_DOMINATORS);
2265   memset (&sincos_stats, 0, sizeof (sincos_stats));
2266
2267   FOR_EACH_BB_FN (bb, fun)
2268     {
2269       gimple_stmt_iterator gsi;
2270       bool cleanup_eh = false;
2271
2272       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2273         {
2274           gimple *stmt = gsi_stmt (gsi);
2275
2276           /* Only the last stmt in a bb could throw, no need to call
2277              gimple_purge_dead_eh_edges if we change something in the middle
2278              of a basic block.  */
2279           cleanup_eh = false;
2280
2281           if (is_gimple_call (stmt)
2282               && gimple_call_lhs (stmt))
2283             {
2284               tree arg, arg0, arg1, result;
2285               HOST_WIDE_INT n;
2286               location_t loc;
2287
2288               switch (gimple_call_combined_fn (stmt))
2289                 {
2290                 CASE_CFN_COS:
2291                 CASE_CFN_SIN:
2292                 CASE_CFN_CEXPI:
2293                   arg = gimple_call_arg (stmt, 0);
2294                   /* Make sure we have either sincos or cexp.  */
2295                   if (!targetm.libc_has_function (function_c99_math_complex,
2296                                                   TREE_TYPE (arg))
2297                       && !targetm.libc_has_function (function_sincos,
2298                                                      TREE_TYPE (arg)))
2299                     break;
2300
2301                   if (TREE_CODE (arg) == SSA_NAME)
2302                     cfg_changed |= execute_cse_sincos_1 (arg);
2303                   break;
2304
2305                 CASE_CFN_POW:
2306                   arg0 = gimple_call_arg (stmt, 0);
2307                   arg1 = gimple_call_arg (stmt, 1);
2308
2309                   loc = gimple_location (stmt);
2310                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2311
2312                   if (result)
2313                     {
2314                       tree lhs = gimple_get_lhs (stmt);
2315                       gassign *new_stmt = gimple_build_assign (lhs, result);
2316                       gimple_set_location (new_stmt, loc);
2317                       unlink_stmt_vdef (stmt);
2318                       gsi_replace (&gsi, new_stmt, true);
2319                       cleanup_eh = true;
2320                       if (gimple_vdef (stmt))
2321                         release_ssa_name (gimple_vdef (stmt));
2322                     }
2323                   break;
2324
2325                 CASE_CFN_POWI:
2326                   arg0 = gimple_call_arg (stmt, 0);
2327                   arg1 = gimple_call_arg (stmt, 1);
2328                   loc = gimple_location (stmt);
2329
2330                   if (real_minus_onep (arg0))
2331                     {
2332                       tree t0, t1, cond, one, minus_one;
2333                       gassign *stmt;
2334
2335                       t0 = TREE_TYPE (arg0);
2336                       t1 = TREE_TYPE (arg1);
2337                       one = build_real (t0, dconst1);
2338                       minus_one = build_real (t0, dconstm1);
2339
2340                       cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2341                       stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2342                                                   arg1, build_int_cst (t1, 1));
2343                       gimple_set_location (stmt, loc);
2344                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2345
2346                       result = make_temp_ssa_name (t0, NULL, "powi");
2347                       stmt = gimple_build_assign (result, COND_EXPR, cond,
2348                                                   minus_one, one);
2349                       gimple_set_location (stmt, loc);
2350                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2351                     }
2352                   else
2353                     {
2354                       if (!tree_fits_shwi_p (arg1))
2355                         break;
2356
2357                       n = tree_to_shwi (arg1);
2358                       result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2359                     }
2360
2361                   if (result)
2362                     {
2363                       tree lhs = gimple_get_lhs (stmt);
2364                       gassign *new_stmt = gimple_build_assign (lhs, result);
2365                       gimple_set_location (new_stmt, loc);
2366                       unlink_stmt_vdef (stmt);
2367                       gsi_replace (&gsi, new_stmt, true);
2368                       cleanup_eh = true;
2369                       if (gimple_vdef (stmt))
2370                         release_ssa_name (gimple_vdef (stmt));
2371                     }
2372                   break;
2373
2374                 CASE_CFN_CABS:
2375                   arg0 = gimple_call_arg (stmt, 0);
2376                   loc = gimple_location (stmt);
2377                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2378
2379                   if (result)
2380                     {
2381                       tree lhs = gimple_get_lhs (stmt);
2382                       gassign *new_stmt = gimple_build_assign (lhs, result);
2383                       gimple_set_location (new_stmt, loc);
2384                       unlink_stmt_vdef (stmt);
2385                       gsi_replace (&gsi, new_stmt, true);
2386                       cleanup_eh = true;
2387                       if (gimple_vdef (stmt))
2388                         release_ssa_name (gimple_vdef (stmt));
2389                     }
2390                   break;
2391
2392                 default:;
2393                 }
2394             }
2395         }
2396       if (cleanup_eh)
2397         cfg_changed |= gimple_purge_dead_eh_edges (bb);
2398     }
2399
2400   statistics_counter_event (fun, "sincos statements inserted",
2401                             sincos_stats.inserted);
2402   statistics_counter_event (fun, "conv statements removed",
2403                             sincos_stats.conv_removed);
2404
2405   return cfg_changed ? TODO_cleanup_cfg : 0;
2406 }
2407
2408 } // anon namespace
2409
2410 gimple_opt_pass *
2411 make_pass_cse_sincos (gcc::context *ctxt)
2412 {
2413   return new pass_cse_sincos (ctxt);
2414 }
2415
2416 /* Return true if stmt is a type conversion operation that can be stripped
2417    when used in a widening multiply operation.  */
2418 static bool
2419 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2420 {
2421   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2422
2423   if (TREE_CODE (result_type) == INTEGER_TYPE)
2424     {
2425       tree op_type;
2426       tree inner_op_type;
2427
2428       if (!CONVERT_EXPR_CODE_P (rhs_code))
2429         return false;
2430
2431       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2432
2433       /* If the type of OP has the same precision as the result, then
2434          we can strip this conversion.  The multiply operation will be
2435          selected to create the correct extension as a by-product.  */
2436       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2437         return true;
2438
2439       /* We can also strip a conversion if it preserves the signed-ness of
2440          the operation and doesn't narrow the range.  */
2441       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2442
2443       /* If the inner-most type is unsigned, then we can strip any
2444          intermediate widening operation.  If it's signed, then the
2445          intermediate widening operation must also be signed.  */
2446       if ((TYPE_UNSIGNED (inner_op_type)
2447            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2448           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2449         return true;
2450
2451       return false;
2452     }
2453
2454   return rhs_code == FIXED_CONVERT_EXPR;
2455 }
2456
2457 /* Return true if RHS is a suitable operand for a widening multiplication,
2458    assuming a target type of TYPE.
2459    There are two cases:
2460
2461      - RHS makes some value at least twice as wide.  Store that value
2462        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2463
2464      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2465        but leave *TYPE_OUT untouched.  */
2466
2467 static bool
2468 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2469                         tree *new_rhs_out)
2470 {
2471   gimple *stmt;
2472   tree type1, rhs1;
2473
2474   if (TREE_CODE (rhs) == SSA_NAME)
2475     {
2476       stmt = SSA_NAME_DEF_STMT (rhs);
2477       if (is_gimple_assign (stmt))
2478         {
2479           if (! widening_mult_conversion_strippable_p (type, stmt))
2480             rhs1 = rhs;
2481           else
2482             {
2483               rhs1 = gimple_assign_rhs1 (stmt);
2484
2485               if (TREE_CODE (rhs1) == INTEGER_CST)
2486                 {
2487                   *new_rhs_out = rhs1;
2488                   *type_out = NULL;
2489                   return true;
2490                 }
2491             }
2492         }
2493       else
2494         rhs1 = rhs;
2495
2496       type1 = TREE_TYPE (rhs1);
2497
2498       if (TREE_CODE (type1) != TREE_CODE (type)
2499           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2500         return false;
2501
2502       *new_rhs_out = rhs1;
2503       *type_out = type1;
2504       return true;
2505     }
2506
2507   if (TREE_CODE (rhs) == INTEGER_CST)
2508     {
2509       *new_rhs_out = rhs;
2510       *type_out = NULL;
2511       return true;
2512     }
2513
2514   return false;
2515 }
2516
2517 /* Return true if STMT performs a widening multiplication, assuming the
2518    output type is TYPE.  If so, store the unwidened types of the operands
2519    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2520    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2521    and *TYPE2_OUT would give the operands of the multiplication.  */
2522
2523 static bool
2524 is_widening_mult_p (gimple *stmt,
2525                     tree *type1_out, tree *rhs1_out,
2526                     tree *type2_out, tree *rhs2_out)
2527 {
2528   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2529
2530   if (TREE_CODE (type) == INTEGER_TYPE)
2531     {
2532       if (TYPE_OVERFLOW_TRAPS (type))
2533         return false;
2534     }
2535   else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2536     return false;
2537
2538   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2539                                rhs1_out))
2540     return false;
2541
2542   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2543                                rhs2_out))
2544     return false;
2545
2546   if (*type1_out == NULL)
2547     {
2548       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2549         return false;
2550       *type1_out = *type2_out;
2551     }
2552
2553   if (*type2_out == NULL)
2554     {
2555       if (!int_fits_type_p (*rhs2_out, *type1_out))
2556         return false;
2557       *type2_out = *type1_out;
2558     }
2559
2560   /* Ensure that the larger of the two operands comes first. */
2561   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2562     {
2563       std::swap (*type1_out, *type2_out);
2564       std::swap (*rhs1_out, *rhs2_out);
2565     }
2566
2567   return true;
2568 }
2569
2570 /* Check to see if the CALL statement is an invocation of copysign
2571    with 1. being the first argument.  */
2572 static bool
2573 is_copysign_call_with_1 (gimple *call)
2574 {
2575   gcall *c = dyn_cast <gcall *> (call);
2576   if (! c)
2577     return false;
2578
2579   enum combined_fn code = gimple_call_combined_fn (c);
2580
2581   if (code == CFN_LAST)
2582     return false;
2583
2584   if (builtin_fn_p (code))
2585     {
2586       switch (as_builtin_fn (code))
2587         {
2588         CASE_FLT_FN (BUILT_IN_COPYSIGN):
2589         CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2590           return real_onep (gimple_call_arg (c, 0));
2591         default:
2592           return false;
2593         }
2594     }
2595
2596   if (internal_fn_p (code))
2597     {
2598       switch (as_internal_fn (code))
2599         {
2600         case IFN_COPYSIGN:
2601           return real_onep (gimple_call_arg (c, 0));
2602         default:
2603           return false;
2604         }
2605     }
2606
2607    return false;
2608 }
2609
2610 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2611    This only happens when the xorsign optab is defined, if the
2612    pattern is not a xorsign pattern or if expansion fails FALSE is
2613    returned, otherwise TRUE is returned.  */
2614 static bool
2615 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2616 {
2617   tree treeop0, treeop1, lhs, type;
2618   location_t loc = gimple_location (stmt);
2619   lhs = gimple_assign_lhs (stmt);
2620   treeop0 = gimple_assign_rhs1 (stmt);
2621   treeop1 = gimple_assign_rhs2 (stmt);
2622   type = TREE_TYPE (lhs);
2623   machine_mode mode = TYPE_MODE (type);
2624
2625   if (HONOR_SNANS (type))
2626     return false;
2627
2628   if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2629     {
2630       gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2631       if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2632         {
2633           call0 = SSA_NAME_DEF_STMT (treeop1);
2634           if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2635             return false;
2636
2637           treeop1 = treeop0;
2638         }
2639         if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2640           return false;
2641
2642         gcall *c = as_a<gcall*> (call0);
2643         treeop0 = gimple_call_arg (c, 1);
2644
2645         gcall *call_stmt
2646           = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2647         gimple_set_lhs (call_stmt, lhs);
2648         gimple_set_location (call_stmt, loc);
2649         gsi_replace (gsi, call_stmt, true);
2650         return true;
2651     }
2652
2653   return false;
2654 }
2655
2656 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2657    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
2658    value is true iff we converted the statement.  */
2659
2660 static bool
2661 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2662 {
2663   tree lhs, rhs1, rhs2, type, type1, type2;
2664   enum insn_code handler;
2665   scalar_int_mode to_mode, from_mode, actual_mode;
2666   optab op;
2667   int actual_precision;
2668   location_t loc = gimple_location (stmt);
2669   bool from_unsigned1, from_unsigned2;
2670
2671   lhs = gimple_assign_lhs (stmt);
2672   type = TREE_TYPE (lhs);
2673   if (TREE_CODE (type) != INTEGER_TYPE)
2674     return false;
2675
2676   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2677     return false;
2678
2679   to_mode = SCALAR_INT_TYPE_MODE (type);
2680   from_mode = SCALAR_INT_TYPE_MODE (type1);
2681   if (to_mode == from_mode)
2682     return false;
2683
2684   from_unsigned1 = TYPE_UNSIGNED (type1);
2685   from_unsigned2 = TYPE_UNSIGNED (type2);
2686
2687   if (from_unsigned1 && from_unsigned2)
2688     op = umul_widen_optab;
2689   else if (!from_unsigned1 && !from_unsigned2)
2690     op = smul_widen_optab;
2691   else
2692     op = usmul_widen_optab;
2693
2694   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2695                                                   &actual_mode);
2696
2697   if (handler == CODE_FOR_nothing)
2698     {
2699       if (op != smul_widen_optab)
2700         {
2701           /* We can use a signed multiply with unsigned types as long as
2702              there is a wider mode to use, or it is the smaller of the two
2703              types that is unsigned.  Note that type1 >= type2, always.  */
2704           if ((TYPE_UNSIGNED (type1)
2705                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2706               || (TYPE_UNSIGNED (type2)
2707                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2708             {
2709               if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2710                   || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2711                 return false;
2712             }
2713
2714           op = smul_widen_optab;
2715           handler = find_widening_optab_handler_and_mode (op, to_mode,
2716                                                           from_mode,
2717                                                           &actual_mode);
2718
2719           if (handler == CODE_FOR_nothing)
2720             return false;
2721
2722           from_unsigned1 = from_unsigned2 = false;
2723         }
2724       else
2725         return false;
2726     }
2727
2728   /* Ensure that the inputs to the handler are in the correct precison
2729      for the opcode.  This will be the full mode size.  */
2730   actual_precision = GET_MODE_PRECISION (actual_mode);
2731   if (2 * actual_precision > TYPE_PRECISION (type))
2732     return false;
2733   if (actual_precision != TYPE_PRECISION (type1)
2734       || from_unsigned1 != TYPE_UNSIGNED (type1))
2735     rhs1 = build_and_insert_cast (gsi, loc,
2736                                   build_nonstandard_integer_type
2737                                     (actual_precision, from_unsigned1), rhs1);
2738   if (actual_precision != TYPE_PRECISION (type2)
2739       || from_unsigned2 != TYPE_UNSIGNED (type2))
2740     rhs2 = build_and_insert_cast (gsi, loc,
2741                                   build_nonstandard_integer_type
2742                                     (actual_precision, from_unsigned2), rhs2);
2743
2744   /* Handle constants.  */
2745   if (TREE_CODE (rhs1) == INTEGER_CST)
2746     rhs1 = fold_convert (type1, rhs1);
2747   if (TREE_CODE (rhs2) == INTEGER_CST)
2748     rhs2 = fold_convert (type2, rhs2);
2749
2750   gimple_assign_set_rhs1 (stmt, rhs1);
2751   gimple_assign_set_rhs2 (stmt, rhs2);
2752   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2753   update_stmt (stmt);
2754   widen_mul_stats.widen_mults_inserted++;
2755   return true;
2756 }
2757
2758 /* Process a single gimple statement STMT, which is found at the
2759    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2760    rhs (given by CODE), and try to convert it into a
2761    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
2762    is true iff we converted the statement.  */
2763
2764 static bool
2765 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2766                             enum tree_code code)
2767 {
2768   gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2769   gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2770   tree type, type1, type2, optype;
2771   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2772   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2773   optab this_optab;
2774   enum tree_code wmult_code;
2775   enum insn_code handler;
2776   scalar_mode to_mode, from_mode, actual_mode;
2777   location_t loc = gimple_location (stmt);
2778   int actual_precision;
2779   bool from_unsigned1, from_unsigned2;
2780
2781   lhs = gimple_assign_lhs (stmt);
2782   type = TREE_TYPE (lhs);
2783   if (TREE_CODE (type) != INTEGER_TYPE
2784       && TREE_CODE (type) != FIXED_POINT_TYPE)
2785     return false;
2786
2787   if (code == MINUS_EXPR)
2788     wmult_code = WIDEN_MULT_MINUS_EXPR;
2789   else
2790     wmult_code = WIDEN_MULT_PLUS_EXPR;
2791
2792   rhs1 = gimple_assign_rhs1 (stmt);
2793   rhs2 = gimple_assign_rhs2 (stmt);
2794
2795   if (TREE_CODE (rhs1) == SSA_NAME)
2796     {
2797       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2798       if (is_gimple_assign (rhs1_stmt))
2799         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2800     }
2801
2802   if (TREE_CODE (rhs2) == SSA_NAME)
2803     {
2804       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2805       if (is_gimple_assign (rhs2_stmt))
2806         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2807     }
2808
2809   /* Allow for one conversion statement between the multiply
2810      and addition/subtraction statement.  If there are more than
2811      one conversions then we assume they would invalidate this
2812      transformation.  If that's not the case then they should have
2813      been folded before now.  */
2814   if (CONVERT_EXPR_CODE_P (rhs1_code))
2815     {
2816       conv1_stmt = rhs1_stmt;
2817       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2818       if (TREE_CODE (rhs1) == SSA_NAME)
2819         {
2820           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2821           if (is_gimple_assign (rhs1_stmt))
2822             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2823         }
2824       else
2825         return false;
2826     }
2827   if (CONVERT_EXPR_CODE_P (rhs2_code))
2828     {
2829       conv2_stmt = rhs2_stmt;
2830       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2831       if (TREE_CODE (rhs2) == SSA_NAME)
2832         {
2833           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2834           if (is_gimple_assign (rhs2_stmt))
2835             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2836         }
2837       else
2838         return false;
2839     }
2840
2841   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2842      is_widening_mult_p, but we still need the rhs returns.
2843
2844      It might also appear that it would be sufficient to use the existing
2845      operands of the widening multiply, but that would limit the choice of
2846      multiply-and-accumulate instructions.
2847
2848      If the widened-multiplication result has more than one uses, it is
2849      probably wiser not to do the conversion.  Also restrict this operation
2850      to single basic block to avoid moving the multiply to a different block
2851      with a higher execution frequency.  */
2852   if (code == PLUS_EXPR
2853       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2854     {
2855       if (!has_single_use (rhs1)
2856           || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2857           || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2858                                   &type2, &mult_rhs2))
2859         return false;
2860       add_rhs = rhs2;
2861       conv_stmt = conv1_stmt;
2862     }
2863   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2864     {
2865       if (!has_single_use (rhs2)
2866           || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2867           || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2868                                   &type2, &mult_rhs2))
2869         return false;
2870       add_rhs = rhs1;
2871       conv_stmt = conv2_stmt;
2872     }
2873   else
2874     return false;
2875
2876   to_mode = SCALAR_TYPE_MODE (type);
2877   from_mode = SCALAR_TYPE_MODE (type1);
2878   if (to_mode == from_mode)
2879     return false;
2880
2881   from_unsigned1 = TYPE_UNSIGNED (type1);
2882   from_unsigned2 = TYPE_UNSIGNED (type2);
2883   optype = type1;
2884
2885   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
2886   if (from_unsigned1 != from_unsigned2)
2887     {
2888       if (!INTEGRAL_TYPE_P (type))
2889         return false;
2890       /* We can use a signed multiply with unsigned types as long as
2891          there is a wider mode to use, or it is the smaller of the two
2892          types that is unsigned.  Note that type1 >= type2, always.  */
2893       if ((from_unsigned1
2894            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2895           || (from_unsigned2
2896               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2897         {
2898           if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2899               || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2900             return false;
2901         }
2902
2903       from_unsigned1 = from_unsigned2 = false;
2904       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2905                                                false);
2906     }
2907
2908   /* If there was a conversion between the multiply and addition
2909      then we need to make sure it fits a multiply-and-accumulate.
2910      The should be a single mode change which does not change the
2911      value.  */
2912   if (conv_stmt)
2913     {
2914       /* We use the original, unmodified data types for this.  */
2915       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2916       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2917       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2918       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2919
2920       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2921         {
2922           /* Conversion is a truncate.  */
2923           if (TYPE_PRECISION (to_type) < data_size)
2924             return false;
2925         }
2926       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2927         {
2928           /* Conversion is an extend.  Check it's the right sort.  */
2929           if (TYPE_UNSIGNED (from_type) != is_unsigned
2930               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2931             return false;
2932         }
2933       /* else convert is a no-op for our purposes.  */
2934     }
2935
2936   /* Verify that the machine can perform a widening multiply
2937      accumulate in this mode/signedness combination, otherwise
2938      this transformation is likely to pessimize code.  */
2939   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2940   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2941                                                   from_mode, &actual_mode);
2942
2943   if (handler == CODE_FOR_nothing)
2944     return false;
2945
2946   /* Ensure that the inputs to the handler are in the correct precison
2947      for the opcode.  This will be the full mode size.  */
2948   actual_precision = GET_MODE_PRECISION (actual_mode);
2949   if (actual_precision != TYPE_PRECISION (type1)
2950       || from_unsigned1 != TYPE_UNSIGNED (type1))
2951     mult_rhs1 = build_and_insert_cast (gsi, loc,
2952                                        build_nonstandard_integer_type
2953                                          (actual_precision, from_unsigned1),
2954                                        mult_rhs1);
2955   if (actual_precision != TYPE_PRECISION (type2)
2956       || from_unsigned2 != TYPE_UNSIGNED (type2))
2957     mult_rhs2 = build_and_insert_cast (gsi, loc,
2958                                        build_nonstandard_integer_type
2959                                          (actual_precision, from_unsigned2),
2960                                        mult_rhs2);
2961
2962   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2963     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2964
2965   /* Handle constants.  */
2966   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2967     mult_rhs1 = fold_convert (type1, mult_rhs1);
2968   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2969     mult_rhs2 = fold_convert (type2, mult_rhs2);
2970
2971   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2972                                   add_rhs);
2973   update_stmt (gsi_stmt (*gsi));
2974   widen_mul_stats.maccs_inserted++;
2975   return true;
2976 }
2977
2978 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
2979    OP2 and which we know is used in statements that can be, together with the
2980    multiplication, converted to FMAs, perform the transformation.  */
2981
2982 static void
2983 convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
2984 {
2985   tree type = TREE_TYPE (mul_result);
2986   gimple *use_stmt;
2987   imm_use_iterator imm_iter;
2988   gcall *fma_stmt;
2989
2990   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
2991     {
2992       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
2993       tree addop, mulop1 = op1, result = mul_result;
2994       bool negate_p = false;
2995       gimple_seq seq = NULL;
2996
2997       if (is_gimple_debug (use_stmt))
2998         continue;
2999
3000       if (is_gimple_assign (use_stmt)
3001           && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3002         {
3003           result = gimple_assign_lhs (use_stmt);
3004           use_operand_p use_p;
3005           gimple *neguse_stmt;
3006           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3007           gsi_remove (&gsi, true);
3008           release_defs (use_stmt);
3009
3010           use_stmt = neguse_stmt;
3011           gsi = gsi_for_stmt (use_stmt);
3012           negate_p = true;
3013         }
3014
3015       tree cond, else_value, ops[3];
3016       tree_code code;
3017       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3018                                               ops, &else_value))
3019         gcc_unreachable ();
3020       addop = ops[0] == result ? ops[1] : ops[0];
3021
3022       if (code == MINUS_EXPR)
3023         {
3024           if (ops[0] == result)
3025             /* a * b - c -> a * b + (-c)  */
3026             addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
3027           else
3028             /* a - b * c -> (-b) * c + a */
3029             negate_p = !negate_p;
3030         }
3031
3032       if (negate_p)
3033         mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
3034
3035       if (seq)
3036         gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3037
3038       if (cond)
3039         fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3040                                                op2, addop, else_value);
3041       else
3042         fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3043       gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3044       gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3045                                                                    use_stmt));
3046       gsi_replace (&gsi, fma_stmt, true);
3047       /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3048          regardless of where the negation occurs.  */
3049       gimple *orig_stmt = gsi_stmt (gsi);
3050       if (fold_stmt (&gsi, follow_all_ssa_edges))
3051         {
3052           if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3053             gcc_unreachable ();
3054           update_stmt (gsi_stmt (gsi));
3055         }
3056
3057       if (dump_file && (dump_flags & TDF_DETAILS))
3058         {
3059           fprintf (dump_file, "Generated FMA ");
3060           print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3061           fprintf (dump_file, "\n");
3062         }
3063
3064       /* If the FMA result is negated in a single use, fold the negation
3065          too.  */
3066       orig_stmt = gsi_stmt (gsi);
3067       use_operand_p use_p;
3068       gimple *neg_stmt;
3069       if (is_gimple_call (orig_stmt)
3070           && gimple_call_internal_p (orig_stmt)
3071           && gimple_call_lhs (orig_stmt)
3072           && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3073           && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3074           && is_gimple_assign (neg_stmt)
3075           && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3076           && !stmt_could_throw_p (cfun, neg_stmt))
3077         {
3078           gsi = gsi_for_stmt (neg_stmt);
3079           if (fold_stmt (&gsi, follow_all_ssa_edges))
3080             {
3081               if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3082                 gcc_unreachable ();
3083               update_stmt (gsi_stmt (gsi));
3084               if (dump_file && (dump_flags & TDF_DETAILS))
3085                 {
3086                   fprintf (dump_file, "Folded FMA negation ");
3087                   print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3088                   fprintf (dump_file, "\n");
3089                 }
3090             }
3091         }
3092
3093       widen_mul_stats.fmas_inserted++;
3094     }
3095 }
3096
3097 /* Data necessary to perform the actual transformation from a multiplication
3098    and an addition to an FMA after decision is taken it should be done and to
3099    then delete the multiplication statement from the function IL.  */
3100
3101 struct fma_transformation_info
3102 {
3103   gimple *mul_stmt;
3104   tree mul_result;
3105   tree op1;
3106   tree op2;
3107 };
3108
3109 /* Structure containing the current state of FMA deferring, i.e. whether we are
3110    deferring, whether to continue deferring, and all data necessary to come
3111    back and perform all deferred transformations.  */
3112
3113 class fma_deferring_state
3114 {
3115 public:
3116   /* Class constructor.  Pass true as PERFORM_DEFERRING in order to actually
3117      do any deferring.  */
3118
3119   fma_deferring_state (bool perform_deferring)
3120     : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3121       m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3122
3123   /* List of FMA candidates for which we the transformation has been determined
3124      possible but we at this point in BB analysis we do not consider them
3125      beneficial.  */
3126   auto_vec<fma_transformation_info, 8> m_candidates;
3127
3128   /* Set of results of multiplication that are part of an already deferred FMA
3129      candidates.  */
3130   hash_set<tree> m_mul_result_set;
3131
3132   /* The PHI that supposedly feeds back result of a FMA to another over loop
3133      boundary.  */
3134   gphi *m_initial_phi;
3135
3136   /* Result of the last produced FMA candidate or NULL if there has not been
3137      one.  */
3138   tree m_last_result;
3139
3140   /* If true, deferring might still be profitable.  If false, transform all
3141      candidates and no longer defer.  */
3142   bool m_deferring_p;
3143 };
3144
3145 /* Transform all deferred FMA candidates and mark STATE as no longer
3146    deferring.  */
3147
3148 static void
3149 cancel_fma_deferring (fma_deferring_state *state)
3150 {
3151   if (!state->m_deferring_p)
3152     return;
3153
3154   for (unsigned i = 0; i < state->m_candidates.length (); i++)
3155     {
3156       if (dump_file && (dump_flags & TDF_DETAILS))
3157         fprintf (dump_file, "Generating deferred FMA\n");
3158
3159       const fma_transformation_info &fti = state->m_candidates[i];
3160       convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3161
3162       gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3163       gsi_remove (&gsi, true);
3164       release_defs (fti.mul_stmt);
3165     }
3166   state->m_deferring_p = false;
3167 }
3168
3169 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3170    Otherwise return NULL.  */
3171
3172 static gphi *
3173 result_of_phi (tree op)
3174 {
3175   if (TREE_CODE (op) != SSA_NAME)
3176     return NULL;
3177
3178   return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3179 }
3180
3181 /* After processing statements of a BB and recording STATE, return true if the
3182    initial phi is fed by the last FMA candidate result ore one such result from
3183    previously processed BBs marked in LAST_RESULT_SET.  */
3184
3185 static bool
3186 last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3187                                       hash_set<tree> *last_result_set)
3188 {
3189   ssa_op_iter iter;
3190   use_operand_p use;
3191   FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3192     {
3193       tree t = USE_FROM_PTR (use);
3194       if (t == state->m_last_result
3195           || last_result_set->contains (t))
3196         return true;
3197     }
3198
3199   return false;
3200 }
3201
3202 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3203    with uses in additions and subtractions to form fused multiply-add
3204    operations.  Returns true if successful and MUL_STMT should be removed.
3205    If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3206    on MUL_COND, otherwise it is unconditional.
3207
3208    If STATE indicates that we are deferring FMA transformation, that means
3209    that we do not produce FMAs for basic blocks which look like:
3210
3211     <bb 6>
3212     # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3213     _65 = _14 * _16;
3214     accumulator_66 = _65 + accumulator_111;
3215
3216   or its unrolled version, i.e. with several FMA candidates that feed result
3217   of one into the addend of another.  Instead, we add them to a list in STATE
3218   and if we later discover an FMA candidate that is not part of such a chain,
3219   we go back and perform all deferred past candidates.  */
3220
3221 static bool
3222 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3223                      fma_deferring_state *state, tree mul_cond = NULL_TREE)
3224 {
3225   tree mul_result = gimple_get_lhs (mul_stmt);
3226   tree type = TREE_TYPE (mul_result);
3227   gimple *use_stmt, *neguse_stmt;
3228   use_operand_p use_p;
3229   imm_use_iterator imm_iter;
3230
3231   if (FLOAT_TYPE_P (type)
3232       && flag_fp_contract_mode == FP_CONTRACT_OFF)
3233     return false;
3234
3235   /* We don't want to do bitfield reduction ops.  */
3236   if (INTEGRAL_TYPE_P (type)
3237       && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3238     return false;
3239
3240   /* If the target doesn't support it, don't generate it.  We assume that
3241      if fma isn't available then fms, fnma or fnms are not either.  */
3242   optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3243   if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3244     return false;
3245
3246   /* If the multiplication has zero uses, it is kept around probably because
3247      of -fnon-call-exceptions.  Don't optimize it away in that case,
3248      it is DCE job.  */
3249   if (has_zero_uses (mul_result))
3250     return false;
3251
3252   bool check_defer
3253     = (state->m_deferring_p
3254        && (tree_to_shwi (TYPE_SIZE (type))
3255            <= param_avoid_fma_max_bits));
3256   bool defer = check_defer;
3257   bool seen_negate_p = false;
3258   /* Make sure that the multiplication statement becomes dead after
3259      the transformation, thus that all uses are transformed to FMAs.
3260      This means we assume that an FMA operation has the same cost
3261      as an addition.  */
3262   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3263     {
3264       tree result = mul_result;
3265       bool negate_p = false;
3266
3267       use_stmt = USE_STMT (use_p);
3268
3269       if (is_gimple_debug (use_stmt))
3270         continue;
3271
3272       /* For now restrict this operations to single basic blocks.  In theory
3273          we would want to support sinking the multiplication in
3274          m = a*b;
3275          if ()
3276            ma = m + c;
3277          else
3278            d = m;
3279          to form a fma in the then block and sink the multiplication to the
3280          else block.  */
3281       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3282         return false;
3283
3284       /* A negate on the multiplication leads to FNMA.  */
3285       if (is_gimple_assign (use_stmt)
3286           && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3287         {
3288           ssa_op_iter iter;
3289           use_operand_p usep;
3290
3291           /* If (due to earlier missed optimizations) we have two
3292              negates of the same value, treat them as equivalent
3293              to a single negate with multiple uses.  */
3294           if (seen_negate_p)
3295             return false;
3296
3297           result = gimple_assign_lhs (use_stmt);
3298
3299           /* Make sure the negate statement becomes dead with this
3300              single transformation.  */
3301           if (!single_imm_use (gimple_assign_lhs (use_stmt),
3302                                &use_p, &neguse_stmt))
3303             return false;
3304
3305           /* Make sure the multiplication isn't also used on that stmt.  */
3306           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3307             if (USE_FROM_PTR (usep) == mul_result)
3308               return false;
3309
3310           /* Re-validate.  */
3311           use_stmt = neguse_stmt;
3312           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3313             return false;
3314
3315           negate_p = seen_negate_p = true;
3316         }
3317
3318       tree cond, else_value, ops[3];
3319       tree_code code;
3320       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3321                                               &else_value))
3322         return false;
3323
3324       switch (code)
3325         {
3326         case MINUS_EXPR:
3327           if (ops[1] == result)
3328             negate_p = !negate_p;
3329           break;
3330         case PLUS_EXPR:
3331           break;
3332         default:
3333           /* FMA can only be formed from PLUS and MINUS.  */
3334           return false;
3335         }
3336
3337       if (mul_cond && cond != mul_cond)
3338         return false;
3339
3340       if (cond)
3341         {
3342           if (cond == result || else_value == result)
3343             return false;
3344           if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type))
3345             return false;
3346         }
3347
3348       /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3349          we'll visit later, we might be able to get a more profitable
3350          match with fnma.
3351          OTOH, if we don't, a negate / fma pair has likely lower latency
3352          that a mult / subtract pair.  */
3353       if (code == MINUS_EXPR
3354           && !negate_p
3355           && ops[0] == result
3356           && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3357           && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3358           && TREE_CODE (ops[1]) == SSA_NAME
3359           && has_single_use (ops[1]))
3360         {
3361           gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3362           if (is_gimple_assign (stmt2)
3363               && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3364             return false;
3365         }
3366
3367       /* We can't handle a * b + a * b.  */
3368       if (ops[0] == ops[1])
3369         return false;
3370       /* If deferring, make sure we are not looking at an instruction that
3371          wouldn't have existed if we were not.  */
3372       if (state->m_deferring_p
3373           && (state->m_mul_result_set.contains (ops[0])
3374               || state->m_mul_result_set.contains (ops[1])))
3375         return false;
3376
3377       if (check_defer)
3378         {
3379           tree use_lhs = gimple_get_lhs (use_stmt);
3380           if (state->m_last_result)
3381             {
3382               if (ops[1] == state->m_last_result
3383                   || ops[0] == state->m_last_result)
3384                 defer = true;
3385               else
3386                 defer = false;
3387             }
3388           else
3389             {
3390               gcc_checking_assert (!state->m_initial_phi);
3391               gphi *phi;
3392               if (ops[0] == result)
3393                 phi = result_of_phi (ops[1]);
3394               else
3395                 {
3396                   gcc_assert (ops[1] == result);
3397                   phi = result_of_phi (ops[0]);
3398                 }
3399
3400               if (phi)
3401                 {
3402                   state->m_initial_phi = phi;
3403                   defer = true;
3404                 }
3405               else
3406                 defer = false;
3407             }
3408
3409           state->m_last_result = use_lhs;
3410           check_defer = false;
3411         }
3412       else
3413         defer = false;
3414
3415       /* While it is possible to validate whether or not the exact form that
3416          we've recognized is available in the backend, the assumption is that
3417          if the deferring logic above did not trigger, the transformation is
3418          never a loss.  For instance, suppose the target only has the plain FMA
3419          pattern available.  Consider a*b-c -> fma(a,b,-c): we've exchanged
3420          MUL+SUB for FMA+NEG, which is still two operations.  Consider
3421          -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3422          form the two NEGs are independent and could be run in parallel.  */
3423     }
3424
3425   if (defer)
3426     {
3427       fma_transformation_info fti;
3428       fti.mul_stmt = mul_stmt;
3429       fti.mul_result = mul_result;
3430       fti.op1 = op1;
3431       fti.op2 = op2;
3432       state->m_candidates.safe_push (fti);
3433       state->m_mul_result_set.add (mul_result);
3434
3435       if (dump_file && (dump_flags & TDF_DETAILS))
3436         {
3437           fprintf (dump_file, "Deferred generating FMA for multiplication ");
3438           print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3439           fprintf (dump_file, "\n");
3440         }
3441
3442       return false;
3443     }
3444   else
3445     {
3446       if (state->m_deferring_p)
3447         cancel_fma_deferring (state);
3448       convert_mult_to_fma_1 (mul_result, op1, op2);
3449       return true;
3450     }
3451 }
3452
3453
3454 /* Helper function of match_arith_overflow.  For MUL_OVERFLOW, if we have
3455    a check for non-zero like:
3456    _1 = x_4(D) * y_5(D);
3457    *res_7(D) = _1;
3458    if (x_4(D) != 0)
3459      goto <bb 3>; [50.00%]
3460    else
3461      goto <bb 4>; [50.00%]
3462
3463    <bb 3> [local count: 536870913]:
3464    _2 = _1 / x_4(D);
3465    _9 = _2 != y_5(D);
3466    _10 = (int) _9;
3467
3468    <bb 4> [local count: 1073741824]:
3469    # iftmp.0_3 = PHI <_10(3), 0(2)>
3470    then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3471    optimize the x_4(D) != 0 condition to 1.  */
3472
3473 static void
3474 maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3475                                gimple *div_stmt, bool *cfg_changed)
3476 {
3477   basic_block bb = gimple_bb (cond_stmt);
3478   if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3479     return;
3480   edge pred_edge = single_pred_edge (bb);
3481   basic_block pred_bb = pred_edge->src;
3482   if (EDGE_COUNT (pred_bb->succs) != 2)
3483     return;
3484   edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3485   edge other_succ_edge = NULL;
3486   if (gimple_code (cond_stmt) == GIMPLE_COND)
3487     {
3488       if (EDGE_COUNT (bb->succs) != 2)
3489         return;
3490       other_succ_edge = EDGE_SUCC (bb, 0);
3491       if (gimple_cond_code (cond_stmt) == NE_EXPR)
3492         {
3493           if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3494             other_succ_edge = EDGE_SUCC (bb, 1);
3495         }
3496       else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3497         other_succ_edge = EDGE_SUCC (bb, 0);
3498       if (other_edge->dest != other_succ_edge->dest)
3499         return;
3500     }
3501   else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3502     return;
3503   gimple *zero_cond = last_stmt (pred_bb);
3504   if (zero_cond == NULL
3505       || gimple_code (zero_cond) != GIMPLE_COND
3506       || (gimple_cond_code (zero_cond)
3507           != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3508       || !integer_zerop (gimple_cond_rhs (zero_cond)))
3509     return;
3510   tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3511   if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3512     return;
3513   if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3514     {
3515       /* Allow the divisor to be result of a same precision cast
3516          from zero_cond_lhs.  */
3517       tree rhs2 = gimple_assign_rhs2 (div_stmt);
3518       if (TREE_CODE (rhs2) != SSA_NAME)
3519         return;
3520       gimple *g = SSA_NAME_DEF_STMT (rhs2);
3521       if (!gimple_assign_cast_p (g)
3522           || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3523           || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3524           || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3525               != TYPE_PRECISION (TREE_TYPE (rhs2))))
3526         return;
3527     }
3528   gimple_stmt_iterator gsi = gsi_after_labels (bb);
3529   mul_stmts.quick_push (div_stmt);
3530   if (is_gimple_debug (gsi_stmt (gsi)))
3531     gsi_next_nondebug (&gsi);
3532   unsigned cast_count = 0;
3533   while (gsi_stmt (gsi) != cond_stmt)
3534     {
3535       /* If original mul_stmt has a single use, allow it in the same bb,
3536          we are looking then just at __builtin_mul_overflow_p.
3537          Though, in that case the original mul_stmt will be replaced
3538          by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts.  */
3539       gimple *mul_stmt;
3540       unsigned int i;
3541       bool ok = false;
3542       FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3543         {
3544           if (gsi_stmt (gsi) == mul_stmt)
3545             {
3546               ok = true;
3547               break;
3548             }
3549         }
3550       if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3551         ok = true;
3552       if (!ok)
3553         return;
3554       gsi_next_nondebug (&gsi);
3555     }
3556   if (gimple_code (cond_stmt) == GIMPLE_COND)
3557     {
3558       basic_block succ_bb = other_edge->dest;
3559       for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3560            gsi_next (&gpi))
3561         {
3562           gphi *phi = gpi.phi ();
3563           tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3564           tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3565           if (!operand_equal_p (v1, v2, 0))
3566             return;
3567         }
3568     }
3569   else
3570     {
3571       tree lhs = gimple_assign_lhs (cond_stmt);
3572       if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3573         return;
3574       gsi_next_nondebug (&gsi);
3575       if (!gsi_end_p (gsi))
3576         {
3577           if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3578             return;
3579           gimple *cast_stmt = gsi_stmt (gsi);
3580           if (!gimple_assign_cast_p (cast_stmt))
3581             return;
3582           tree new_lhs = gimple_assign_lhs (cast_stmt);
3583           gsi_next_nondebug (&gsi);
3584           if (!gsi_end_p (gsi)
3585               || !new_lhs
3586               || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3587               || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3588             return;
3589           lhs = new_lhs;
3590         }
3591       edge succ_edge = single_succ_edge (bb);
3592       basic_block succ_bb = succ_edge->dest;
3593       gsi = gsi_start_phis (succ_bb);
3594       if (gsi_end_p (gsi))
3595         return;
3596       gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3597       gsi_next (&gsi);
3598       if (!gsi_end_p (gsi))
3599         return;
3600       if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3601         return;
3602       tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3603       if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3604         {
3605           tree cond = gimple_assign_rhs1 (cond_stmt);
3606           if (TREE_CODE (cond) == NE_EXPR)
3607             {
3608               if (!operand_equal_p (other_val,
3609                                     gimple_assign_rhs3 (cond_stmt), 0))
3610                 return;
3611             }
3612           else if (!operand_equal_p (other_val,
3613                                      gimple_assign_rhs2 (cond_stmt), 0))
3614             return;
3615         }
3616       else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3617         {
3618           if (!integer_zerop (other_val))
3619             return;
3620         }
3621       else if (!integer_onep (other_val))
3622         return;
3623     }
3624   gcond *zero_gcond = as_a <gcond *> (zero_cond);
3625   if (pred_edge->flags & EDGE_TRUE_VALUE)
3626     gimple_cond_make_true (zero_gcond);
3627   else
3628     gimple_cond_make_false (zero_gcond);
3629   update_stmt (zero_cond);
3630   *cfg_changed = true;
3631 }
3632
3633 /* Helper function for arith_overflow_check_p.  Return true
3634    if VAL1 is equal to VAL2 cast to corresponding integral type
3635    with other signedness or vice versa.  */
3636
3637 static bool
3638 arith_cast_equal_p (tree val1, tree val2)
3639 {
3640   if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3641     return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3642   else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3643     return false;
3644   if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3645       && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3646     return true;
3647   if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3648       && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3649     return true;
3650   return false;
3651 }
3652
3653 /* Helper function of match_arith_overflow.  Return 1
3654    if USE_STMT is unsigned overflow check ovf != 0 for
3655    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3656    and 0 otherwise.  */
3657
3658 static int
3659 arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3660                         tree maxval, tree *other)
3661 {
3662   enum tree_code ccode = ERROR_MARK;
3663   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3664   enum tree_code code = gimple_assign_rhs_code (stmt);
3665   tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3666   tree rhs1 = gimple_assign_rhs1 (stmt);
3667   tree rhs2 = gimple_assign_rhs2 (stmt);
3668   tree multop = NULL_TREE, divlhs = NULL_TREE;
3669
3670   if (code == MULT_EXPR)
3671     {
3672       if (!is_gimple_assign (use_stmt))
3673         return 0;
3674       if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3675         return 0;
3676       if (gimple_assign_rhs1 (use_stmt) != lhs)
3677         return 0;
3678       if (cast_stmt)
3679         {
3680           if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3681             multop = rhs2;
3682           else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3683             multop = rhs1;
3684           else
3685             return 0;
3686         }
3687       else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3688         multop = rhs2;
3689       else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3690         multop = rhs1;
3691       else
3692         return 0;
3693       if (stmt_ends_bb_p (use_stmt))
3694         return 0;
3695       divlhs = gimple_assign_lhs (use_stmt);
3696       if (!divlhs)
3697         return 0;
3698       use_operand_p use;
3699       if (!single_imm_use (divlhs, &use, &use_stmt))
3700         return 0;
3701     }
3702   if (gimple_code (use_stmt) == GIMPLE_COND)
3703     {
3704       ccode = gimple_cond_code (use_stmt);
3705       crhs1 = gimple_cond_lhs (use_stmt);
3706       crhs2 = gimple_cond_rhs (use_stmt);
3707     }
3708   else if (is_gimple_assign (use_stmt))
3709     {
3710       if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3711         {
3712           ccode = gimple_assign_rhs_code (use_stmt);
3713           crhs1 = gimple_assign_rhs1 (use_stmt);
3714           crhs2 = gimple_assign_rhs2 (use_stmt);
3715         }
3716       else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
3717         {
3718           tree cond = gimple_assign_rhs1 (use_stmt);
3719           if (COMPARISON_CLASS_P (cond))
3720             {
3721               ccode = TREE_CODE (cond);
3722               crhs1 = TREE_OPERAND (cond, 0);
3723               crhs2 = TREE_OPERAND (cond, 1);
3724             }
3725           else
3726             return 0;
3727         }
3728       else
3729         return 0;
3730     }
3731   else
3732     return 0;
3733
3734   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3735     return 0;
3736
3737   switch (ccode)
3738     {
3739     case GT_EXPR:
3740     case LE_EXPR:
3741       if (maxval)
3742         {
3743           /* r = a + b; r > maxval or r <= maxval  */
3744           if (crhs1 == lhs
3745               && TREE_CODE (crhs2) == INTEGER_CST
3746               && tree_int_cst_equal (crhs2, maxval))
3747             return ccode == GT_EXPR ? 1 : -1;
3748           break;
3749         }
3750       /* r = a - b; r > a or r <= a
3751          r = a + b; a > r or a <= r or b > r or b <= r.  */
3752       if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3753           || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3754               && crhs2 == lhs))
3755         return ccode == GT_EXPR ? 1 : -1;
3756       /* r = ~a; b > r or b <= r.  */
3757       if (code == BIT_NOT_EXPR && crhs2 == lhs)
3758         {
3759           if (other)
3760             *other = crhs1;
3761           return ccode == GT_EXPR ? 1 : -1;
3762         }
3763       break;
3764     case LT_EXPR:
3765     case GE_EXPR:
3766       if (maxval)
3767         break;
3768       /* r = a - b; a < r or a >= r
3769          r = a + b; r < a or r >= a or r < b or r >= b.  */
3770       if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3771           || (code == PLUS_EXPR && crhs1 == lhs
3772               && (crhs2 == rhs1 || crhs2 == rhs2)))
3773         return ccode == LT_EXPR ? 1 : -1;
3774       /* r = ~a; r < b or r >= b.  */
3775       if (code == BIT_NOT_EXPR && crhs1 == lhs)
3776         {
3777           if (other)
3778             *other = crhs2;
3779           return ccode == LT_EXPR ? 1 : -1;
3780         }
3781       break;
3782     case EQ_EXPR:
3783     case NE_EXPR:
3784       /* r = a * b; _1 = r / a; _1 == b
3785          r = a * b; _1 = r / b; _1 == a
3786          r = a * b; _1 = r / a; _1 != b
3787          r = a * b; _1 = r / b; _1 != a.  */
3788       if (code == MULT_EXPR)
3789         {
3790           if (cast_stmt)
3791             {
3792               if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
3793                   || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
3794                 return ccode == NE_EXPR ? 1 : -1;
3795             }
3796           else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
3797                    || (crhs2 == divlhs && crhs1 == multop))
3798             return ccode == NE_EXPR ? 1 : -1;
3799         }
3800       break;
3801     default:
3802       break;
3803     }
3804   return 0;
3805 }
3806
3807 /* Recognize for unsigned x
3808    x = y - z;
3809    if (x > y)
3810    where there are other uses of x and replace it with
3811    _7 = .SUB_OVERFLOW (y, z);
3812    x = REALPART_EXPR <_7>;
3813    _8 = IMAGPART_EXPR <_7>;
3814    if (_8)
3815    and similarly for addition.
3816
3817    Also recognize:
3818    yc = (type) y;
3819    zc = (type) z;
3820    x = yc + zc;
3821    if (x > max)
3822    where y and z have unsigned types with maximum max
3823    and there are other uses of x and all of those cast x
3824    back to that unsigned type and again replace it with
3825    _7 = .ADD_OVERFLOW (y, z);
3826    _9 = REALPART_EXPR <_7>;
3827    _8 = IMAGPART_EXPR <_7>;
3828    if (_8)
3829    and replace (utype) x with _9.
3830
3831    Also recognize:
3832    x = ~z;
3833    if (y > x)
3834    and replace it with
3835    _7 = .ADD_OVERFLOW (y, z);
3836    _8 = IMAGPART_EXPR <_7>;
3837    if (_8)
3838
3839    And also recognize:
3840    z = x * y;
3841    if (x != 0)
3842      goto <bb 3>; [50.00%]
3843    else
3844      goto <bb 4>; [50.00%]
3845
3846    <bb 3> [local count: 536870913]:
3847    _2 = z / x;
3848    _9 = _2 != y;
3849    _10 = (int) _9;
3850
3851    <bb 4> [local count: 1073741824]:
3852    # iftmp.0_3 = PHI <_10(3), 0(2)>
3853    and replace it with
3854    _7 = .MUL_OVERFLOW (x, y);
3855    z = IMAGPART_EXPR <_7>;
3856    _8 = IMAGPART_EXPR <_7>;
3857    _9 = _8 != 0;
3858    iftmp.0_3 = (int) _9;  */
3859
3860 static bool
3861 match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3862                       enum tree_code code, bool *cfg_changed)
3863 {
3864   tree lhs = gimple_assign_lhs (stmt);
3865   tree type = TREE_TYPE (lhs);
3866   use_operand_p use_p;
3867   imm_use_iterator iter;
3868   bool use_seen = false;
3869   bool ovf_use_seen = false;
3870   gimple *use_stmt;
3871   gimple *add_stmt = NULL;
3872   bool add_first = false;
3873   gimple *cond_stmt = NULL;
3874   gimple *cast_stmt = NULL;
3875   tree cast_lhs = NULL_TREE;
3876
3877   gcc_checking_assert (code == PLUS_EXPR
3878                        || code == MINUS_EXPR
3879                        || code == MULT_EXPR
3880                        || code == BIT_NOT_EXPR);
3881   if (!INTEGRAL_TYPE_P (type)
3882       || !TYPE_UNSIGNED (type)
3883       || has_zero_uses (lhs)
3884       || (code != PLUS_EXPR
3885           && code != MULT_EXPR
3886           && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
3887                             TYPE_MODE (type)) == CODE_FOR_nothing))
3888     return false;
3889
3890   tree rhs1 = gimple_assign_rhs1 (stmt);
3891   tree rhs2 = gimple_assign_rhs2 (stmt);
3892   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3893     {
3894       use_stmt = USE_STMT (use_p);
3895       if (is_gimple_debug (use_stmt))
3896         continue;
3897
3898       tree other = NULL_TREE;
3899       if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
3900         {
3901           if (code == BIT_NOT_EXPR)
3902             {
3903               gcc_assert (other);
3904               if (TREE_CODE (other) != SSA_NAME)
3905                 return false;
3906               if (rhs2 == NULL)
3907                 rhs2 = other;
3908               else
3909                 return false;
3910               cond_stmt = use_stmt;
3911             }
3912           ovf_use_seen = true;
3913         }
3914       else
3915         {
3916           use_seen = true;
3917           if (code == MULT_EXPR
3918               && cast_stmt == NULL
3919               && gimple_assign_cast_p (use_stmt))
3920             {
3921               cast_lhs = gimple_assign_lhs (use_stmt);
3922               if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3923                   && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3924                   && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3925                       == TYPE_PRECISION (TREE_TYPE (lhs))))
3926                 cast_stmt = use_stmt;
3927               else
3928                 cast_lhs = NULL_TREE;
3929             }
3930         }
3931       if (ovf_use_seen && use_seen)
3932         break;
3933     }
3934
3935   if (!ovf_use_seen
3936       && code == MULT_EXPR
3937       && cast_stmt)
3938     {
3939       if (TREE_CODE (rhs1) != SSA_NAME
3940           || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
3941         return false;
3942       FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
3943         {
3944           use_stmt = USE_STMT (use_p);
3945           if (is_gimple_debug (use_stmt))
3946             continue;
3947
3948           if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
3949                                       NULL_TREE, NULL))
3950             ovf_use_seen = true;
3951         }
3952     }
3953   else
3954     {
3955       cast_stmt = NULL;
3956       cast_lhs = NULL_TREE;
3957     }
3958
3959   tree maxval = NULL_TREE;
3960   if (!ovf_use_seen
3961       || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
3962       || (code == PLUS_EXPR
3963           && optab_handler (uaddv4_optab,
3964                             TYPE_MODE (type)) == CODE_FOR_nothing)
3965       || (code == MULT_EXPR
3966           && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
3967                             TYPE_MODE (type)) == CODE_FOR_nothing))
3968     {
3969       if (code != PLUS_EXPR)
3970         return false;
3971       if (TREE_CODE (rhs1) != SSA_NAME
3972           || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
3973         return false;
3974       rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
3975       tree type1 = TREE_TYPE (rhs1);
3976       if (!INTEGRAL_TYPE_P (type1)
3977           || !TYPE_UNSIGNED (type1)
3978           || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
3979           || (TYPE_PRECISION (type1)
3980               != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
3981         return false;
3982       if (TREE_CODE (rhs2) == INTEGER_CST)
3983         {
3984           if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
3985                                     TYPE_PRECISION (type1),
3986                                     UNSIGNED), 0))
3987             return false;
3988           rhs2 = fold_convert (type1, rhs2);
3989         }
3990       else
3991         {
3992           if (TREE_CODE (rhs2) != SSA_NAME
3993               || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
3994             return false;
3995           rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
3996           tree type2 = TREE_TYPE (rhs2);
3997           if (!INTEGRAL_TYPE_P (type2)
3998               || !TYPE_UNSIGNED (type2)
3999               || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4000               || (TYPE_PRECISION (type2)
4001                   != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4002             return false;
4003         }
4004       if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4005         type = type1;
4006       else
4007         type = TREE_TYPE (rhs2);
4008
4009       if (TREE_CODE (type) != INTEGER_TYPE
4010           || optab_handler (uaddv4_optab,
4011                             TYPE_MODE (type)) == CODE_FOR_nothing)
4012         return false;
4013
4014       maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4015                                                       UNSIGNED));
4016       ovf_use_seen = false;
4017       use_seen = false;
4018       basic_block use_bb = NULL;
4019       FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4020         {
4021           use_stmt = USE_STMT (use_p);
4022           if (is_gimple_debug (use_stmt))
4023             continue;
4024
4025           if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4026             {
4027               ovf_use_seen = true;
4028               use_bb = gimple_bb (use_stmt);
4029             }
4030           else
4031             {
4032               if (!gimple_assign_cast_p (use_stmt)
4033                   || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4034                 return false;
4035               tree use_lhs = gimple_assign_lhs (use_stmt);
4036               if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4037                   || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4038                       > TYPE_PRECISION (type)))
4039                 return false;
4040               use_seen = true;
4041             }
4042         }
4043       if (!ovf_use_seen)
4044         return false;
4045       if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4046         {
4047           if (!use_seen)
4048             return false;
4049           tree new_rhs1 = make_ssa_name (type);
4050           gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4051           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4052           rhs1 = new_rhs1;
4053         }
4054       else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4055         {
4056           if (!use_seen)
4057             return false;
4058           tree new_rhs2 = make_ssa_name (type);
4059           gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4060           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4061           rhs2 = new_rhs2;
4062         }
4063       else if (!use_seen)
4064         {
4065           /* If there are no uses of the wider addition, check if
4066              forwprop has not created a narrower addition.
4067              Require it to be in the same bb as the overflow check.  */
4068           FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4069             {
4070               use_stmt = USE_STMT (use_p);
4071               if (is_gimple_debug (use_stmt))
4072                 continue;
4073
4074               if (use_stmt == stmt)
4075                 continue;
4076
4077               if (!is_gimple_assign (use_stmt)
4078                   || gimple_bb (use_stmt) != use_bb
4079                   || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4080                 continue;
4081
4082               if (gimple_assign_rhs1 (use_stmt) == rhs1)
4083                 {
4084                   if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4085                                         rhs2, 0))
4086                     continue;
4087                 }
4088               else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4089                 {
4090                   if (gimple_assign_rhs1 (use_stmt) != rhs2)
4091                     continue;
4092                 }
4093               else
4094                 continue;
4095
4096               add_stmt = use_stmt;
4097               break;
4098             }
4099           if (add_stmt == NULL)
4100             return false;
4101
4102           /* If stmt and add_stmt are in the same bb, we need to find out
4103              which one is earlier.  If they are in different bbs, we've
4104              checked add_stmt is in the same bb as one of the uses of the
4105              stmt lhs, so stmt needs to dominate add_stmt too.  */
4106           if (gimple_bb (stmt) == gimple_bb (add_stmt))
4107             {
4108               gimple_stmt_iterator gsif = *gsi;
4109               gimple_stmt_iterator gsib = *gsi;
4110               int i;
4111               /* Search both forward and backward from stmt and have a small
4112                  upper bound.  */
4113               for (i = 0; i < 128; i++)
4114                 {
4115                   if (!gsi_end_p (gsib))
4116                     {
4117                       gsi_prev_nondebug (&gsib);
4118                       if (gsi_stmt (gsib) == add_stmt)
4119                         {
4120                           add_first = true;
4121                           break;
4122                         }
4123                     }
4124                   else if (gsi_end_p (gsif))
4125                     break;
4126                   if (!gsi_end_p (gsif))
4127                     {
4128                       gsi_next_nondebug (&gsif);
4129                       if (gsi_stmt (gsif) == add_stmt)
4130                         break;
4131                     }
4132                 }
4133               if (i == 128)
4134                 return false;
4135               if (add_first)
4136                 *gsi = gsi_for_stmt (add_stmt);
4137             }
4138         }
4139     }
4140
4141   if (code == BIT_NOT_EXPR)
4142     *gsi = gsi_for_stmt (cond_stmt);
4143
4144   auto_vec<gimple *, 8> mul_stmts;
4145   if (code == MULT_EXPR && cast_stmt)
4146     {
4147       type = TREE_TYPE (cast_lhs);
4148       gimple *g = SSA_NAME_DEF_STMT (rhs1);
4149       if (gimple_assign_cast_p (g)
4150           && useless_type_conversion_p (type,
4151                                         TREE_TYPE (gimple_assign_rhs1 (g)))
4152           && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4153         rhs1 = gimple_assign_rhs1 (g);
4154       else
4155         {
4156           g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4157           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4158           rhs1 = gimple_assign_lhs (g);
4159           mul_stmts.quick_push (g);
4160         }
4161       if (TREE_CODE (rhs2) == INTEGER_CST)
4162         rhs2 = fold_convert (type, rhs2);
4163       else
4164         {
4165           if (gimple_assign_cast_p (g)
4166               && useless_type_conversion_p (type,
4167                                             TREE_TYPE (gimple_assign_rhs1 (g)))
4168               && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4169             rhs2 = gimple_assign_rhs1 (g);
4170           else
4171             {
4172               g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4173               gsi_insert_before (gsi, g, GSI_SAME_STMT);
4174               rhs2 = gimple_assign_lhs (g);
4175               mul_stmts.quick_push (g);
4176             }
4177         }
4178     }
4179   tree ctype = build_complex_type (type);
4180   gcall *g = gimple_build_call_internal (code == MULT_EXPR
4181                                          ? IFN_MUL_OVERFLOW
4182                                          : code != MINUS_EXPR
4183                                          ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4184                                          2, rhs1, rhs2);
4185   tree ctmp = make_ssa_name (ctype);
4186   gimple_call_set_lhs (g, ctmp);
4187   gsi_insert_before (gsi, g, GSI_SAME_STMT);
4188   tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4189   gassign *g2;
4190   if (code != BIT_NOT_EXPR)
4191     {
4192       g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4193                                 build1 (REALPART_EXPR, type, ctmp));
4194       if (maxval || cast_stmt)
4195         {
4196           gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4197           if (add_first)
4198             *gsi = gsi_for_stmt (stmt);
4199         }
4200       else
4201         gsi_replace (gsi, g2, true);
4202       if (code == MULT_EXPR)
4203         {
4204           mul_stmts.quick_push (g);
4205           mul_stmts.quick_push (g2);
4206           if (cast_stmt)
4207             {
4208               g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4209               gsi_replace (gsi, g2, true);
4210               mul_stmts.quick_push (g2);
4211             }
4212         }
4213     }
4214   tree ovf = make_ssa_name (type);
4215   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4216                             build1 (IMAGPART_EXPR, type, ctmp));
4217   if (code != BIT_NOT_EXPR)
4218     gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4219   else
4220     gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4221   if (code == MULT_EXPR)
4222     mul_stmts.quick_push (g2);
4223
4224   FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4225     {
4226       if (is_gimple_debug (use_stmt))
4227         continue;
4228
4229       gimple *orig_use_stmt = use_stmt;
4230       int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4231                                             maxval, NULL);
4232       if (ovf_use == 0)
4233         {
4234           gcc_assert (code != BIT_NOT_EXPR);
4235           if (maxval)
4236             {
4237               tree use_lhs = gimple_assign_lhs (use_stmt);
4238               gimple_assign_set_rhs1 (use_stmt, new_lhs);
4239               if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4240                                              TREE_TYPE (new_lhs)))
4241                 gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
4242               update_stmt (use_stmt);
4243             }
4244           continue;
4245         }
4246       if (gimple_code (use_stmt) == GIMPLE_COND)
4247         {
4248           gcond *cond_stmt = as_a <gcond *> (use_stmt);
4249           gimple_cond_set_lhs (cond_stmt, ovf);
4250           gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4251           gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4252         }
4253       else
4254         {
4255           gcc_checking_assert (is_gimple_assign (use_stmt));
4256           if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
4257             {
4258               gimple_assign_set_rhs1 (use_stmt, ovf);
4259               gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
4260               gimple_assign_set_rhs_code (use_stmt,
4261                                           ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4262             }
4263           else
4264             {
4265               gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4266                                    == COND_EXPR);
4267               tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4268                                   boolean_type_node, ovf,
4269                                   build_int_cst (type, 0));
4270               gimple_assign_set_rhs1 (use_stmt, cond);
4271             }
4272         }
4273       update_stmt (use_stmt);
4274       if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4275         {
4276           gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4277           maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
4278                                          cfg_changed);
4279           gsi_remove (&gsi2, true);
4280           release_ssa_name (gimple_assign_lhs (orig_use_stmt));
4281         }
4282     }
4283   if (maxval)
4284     {
4285       gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
4286       gsi_remove (&gsi2, true);
4287       if (add_stmt)
4288         {
4289           gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
4290                                            new_lhs);
4291           gsi2 = gsi_for_stmt (add_stmt);
4292           gsi_replace (&gsi2, g, true);
4293         }
4294     }
4295   else if (code == BIT_NOT_EXPR)
4296     {
4297       *gsi = gsi_for_stmt (stmt);
4298       gsi_remove (gsi, true);
4299       release_ssa_name (lhs);
4300       return true;
4301     }
4302   return false;
4303 }
4304
4305 /* Return true if target has support for divmod.  */
4306
4307 static bool
4308 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
4309 {
4310   /* If target supports hardware divmod insn, use it for divmod.  */
4311   if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
4312     return true;
4313
4314   /* Check if libfunc for divmod is available.  */
4315   rtx libfunc = optab_libfunc (divmod_optab, mode);
4316   if (libfunc != NULL_RTX)
4317     {
4318       /* If optab_handler exists for div_optab, perhaps in a wider mode,
4319          we don't want to use the libfunc even if it exists for given mode.  */
4320       machine_mode div_mode;
4321       FOR_EACH_MODE_FROM (div_mode, mode)
4322         if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
4323           return false;
4324
4325       return targetm.expand_divmod_libfunc != NULL;
4326     }
4327
4328   return false;
4329 }
4330
4331 /* Check if stmt is candidate for divmod transform.  */
4332
4333 static bool
4334 divmod_candidate_p (gassign *stmt)
4335 {
4336   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
4337   machine_mode mode = TYPE_MODE (type);
4338   optab divmod_optab, div_optab;
4339
4340   if (TYPE_UNSIGNED (type))
4341     {
4342       divmod_optab = udivmod_optab;
4343       div_optab = udiv_optab;
4344     }
4345   else
4346     {
4347       divmod_optab = sdivmod_optab;
4348       div_optab = sdiv_optab;
4349     }
4350
4351   tree op1 = gimple_assign_rhs1 (stmt);
4352   tree op2 = gimple_assign_rhs2 (stmt);
4353
4354   /* Disable the transform if either is a constant, since division-by-constant
4355      may have specialized expansion.  */
4356   if (CONSTANT_CLASS_P (op1))
4357     return false;
4358
4359   if (CONSTANT_CLASS_P (op2))
4360     {
4361       if (integer_pow2p (op2))
4362         return false;
4363
4364       if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
4365           && TYPE_PRECISION (type) <= BITS_PER_WORD)
4366         return false;
4367
4368       /* If the divisor is not power of 2 and the precision wider than
4369          HWI, expand_divmod punts on that, so in that case it is better
4370          to use divmod optab or libfunc.  Similarly if choose_multiplier
4371          might need pre/post shifts of BITS_PER_WORD or more.  */
4372     }
4373
4374   /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
4375      expand using the [su]divv optabs.  */
4376   if (TYPE_OVERFLOW_TRAPS (type))
4377     return false;
4378
4379   if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
4380     return false;
4381
4382   return true;
4383 }
4384
4385 /* This function looks for:
4386    t1 = a TRUNC_DIV_EXPR b;
4387    t2 = a TRUNC_MOD_EXPR b;
4388    and transforms it to the following sequence:
4389    complex_tmp = DIVMOD (a, b);
4390    t1 = REALPART_EXPR(a);
4391    t2 = IMAGPART_EXPR(b);
4392    For conditions enabling the transform see divmod_candidate_p().
4393
4394    The pass has three parts:
4395    1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4396       other trunc_div_expr and trunc_mod_expr stmts.
4397    2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4398       to stmts vector.
4399    3) Insert DIVMOD call just before top_stmt and update entries in
4400       stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4401       IMAGPART_EXPR for mod).  */
4402
4403 static bool
4404 convert_to_divmod (gassign *stmt)
4405 {
4406   if (stmt_can_throw_internal (cfun, stmt)
4407       || !divmod_candidate_p (stmt))
4408     return false;
4409
4410   tree op1 = gimple_assign_rhs1 (stmt);
4411   tree op2 = gimple_assign_rhs2 (stmt);
4412
4413   imm_use_iterator use_iter;
4414   gimple *use_stmt;
4415   auto_vec<gimple *> stmts;
4416
4417   gimple *top_stmt = stmt;
4418   basic_block top_bb = gimple_bb (stmt);
4419
4420   /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4421      at-least stmt and possibly other trunc_div/trunc_mod stmts
4422      having same operands as stmt.  */
4423
4424   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
4425     {
4426       if (is_gimple_assign (use_stmt)
4427           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4428               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4429           && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
4430           && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
4431         {
4432           if (stmt_can_throw_internal (cfun, use_stmt))
4433             continue;
4434
4435           basic_block bb = gimple_bb (use_stmt);
4436
4437           if (bb == top_bb)
4438             {
4439               if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
4440                 top_stmt = use_stmt;
4441             }
4442           else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
4443             {
4444               top_bb = bb;
4445               top_stmt = use_stmt;
4446             }
4447         }
4448     }
4449
4450   tree top_op1 = gimple_assign_rhs1 (top_stmt);
4451   tree top_op2 = gimple_assign_rhs2 (top_stmt);
4452
4453   stmts.safe_push (top_stmt);
4454   bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
4455
4456   /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4457      to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4458      gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4459      2nd loop ends up adding at-least single trunc_mod_expr stmt.  */
4460
4461   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
4462     {
4463       if (is_gimple_assign (use_stmt)
4464           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4465               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4466           && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
4467           && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
4468         {
4469           if (use_stmt == top_stmt
4470               || stmt_can_throw_internal (cfun, use_stmt)
4471               || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
4472             continue;
4473
4474           stmts.safe_push (use_stmt);
4475           if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
4476             div_seen = true;
4477         }
4478     }
4479
4480   if (!div_seen)
4481     return false;
4482
4483   /* Part 3: Create libcall to internal fn DIVMOD:
4484      divmod_tmp = DIVMOD (op1, op2).  */
4485
4486   gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
4487   tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
4488                                  call_stmt, "divmod_tmp");
4489   gimple_call_set_lhs (call_stmt, res);
4490   /* We rejected throwing statements above.  */
4491   gimple_call_set_nothrow (call_stmt, true);
4492
4493   /* Insert the call before top_stmt.  */
4494   gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
4495   gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
4496
4497   widen_mul_stats.divmod_calls_inserted++;
4498
4499   /* Update all statements in stmts vector:
4500      lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4501      lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>.  */
4502
4503   for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
4504     {
4505       tree new_rhs;
4506
4507       switch (gimple_assign_rhs_code (use_stmt))
4508         {
4509           case TRUNC_DIV_EXPR:
4510             new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
4511             break;
4512
4513           case TRUNC_MOD_EXPR:
4514             new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
4515             break;
4516
4517           default:
4518             gcc_unreachable ();
4519         }
4520
4521       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
4522       gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
4523       update_stmt (use_stmt);
4524     }
4525
4526   return true;
4527 }
4528
4529 /* Find integer multiplications where the operands are extended from
4530    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4531    where appropriate.  */
4532
4533 namespace {
4534
4535 const pass_data pass_data_optimize_widening_mul =
4536 {
4537   GIMPLE_PASS, /* type */
4538   "widening_mul", /* name */
4539   OPTGROUP_NONE, /* optinfo_flags */
4540   TV_TREE_WIDEN_MUL, /* tv_id */
4541   PROP_ssa, /* properties_required */
4542   0, /* properties_provided */
4543   0, /* properties_destroyed */
4544   0, /* todo_flags_start */
4545   TODO_update_ssa, /* todo_flags_finish */
4546 };
4547
4548 class pass_optimize_widening_mul : public gimple_opt_pass
4549 {
4550 public:
4551   pass_optimize_widening_mul (gcc::context *ctxt)
4552     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
4553   {}
4554
4555   /* opt_pass methods: */
4556   virtual bool gate (function *)
4557     {
4558       return flag_expensive_optimizations && optimize;
4559     }
4560
4561   virtual unsigned int execute (function *);
4562
4563 }; // class pass_optimize_widening_mul
4564
4565 /* Walker class to perform the transformation in reverse dominance order. */
4566
4567 class math_opts_dom_walker : public dom_walker
4568 {
4569 public:
4570   /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
4571      if walking modidifes the CFG.  */
4572
4573   math_opts_dom_walker (bool *cfg_changed_p)
4574     : dom_walker (CDI_DOMINATORS), m_last_result_set (),
4575       m_cfg_changed_p (cfg_changed_p) {}
4576
4577   /* The actual actions performed in the walk.  */
4578
4579   virtual void after_dom_children (basic_block);
4580
4581   /* Set of results of chains of multiply and add statement combinations that
4582      were not transformed into FMAs because of active deferring.  */
4583   hash_set<tree> m_last_result_set;
4584
4585   /* Pointer to a flag of the user that needs to be set if CFG has been
4586      modified.  */
4587   bool *m_cfg_changed_p;
4588 };
4589
4590 void
4591 math_opts_dom_walker::after_dom_children (basic_block bb)
4592 {
4593   gimple_stmt_iterator gsi;
4594
4595   fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
4596
4597   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
4598     {
4599       gimple *stmt = gsi_stmt (gsi);
4600       enum tree_code code;
4601
4602       if (is_gimple_assign (stmt))
4603         {
4604           code = gimple_assign_rhs_code (stmt);
4605           switch (code)
4606             {
4607             case MULT_EXPR:
4608               if (!convert_mult_to_widen (stmt, &gsi)
4609                   && !convert_expand_mult_copysign (stmt, &gsi)
4610                   && convert_mult_to_fma (stmt,
4611                                           gimple_assign_rhs1 (stmt),
4612                                           gimple_assign_rhs2 (stmt),
4613                                           &fma_state))
4614                 {
4615                   gsi_remove (&gsi, true);
4616                   release_defs (stmt);
4617                   continue;
4618                 }
4619               match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4620               break;
4621
4622             case PLUS_EXPR:
4623             case MINUS_EXPR:
4624               if (!convert_plusminus_to_widen (&gsi, stmt, code))
4625                 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4626               break;
4627
4628             case BIT_NOT_EXPR:
4629               if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
4630                 continue;
4631               break;
4632
4633             case TRUNC_MOD_EXPR:
4634               convert_to_divmod (as_a<gassign *> (stmt));
4635               break;
4636
4637             default:;
4638             }
4639         }
4640       else if (is_gimple_call (stmt))
4641         {
4642           switch (gimple_call_combined_fn (stmt))
4643             {
4644             CASE_CFN_POW:
4645               if (gimple_call_lhs (stmt)
4646                   && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
4647                   && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
4648                                  &dconst2)
4649                   && convert_mult_to_fma (stmt,
4650                                           gimple_call_arg (stmt, 0),
4651                                           gimple_call_arg (stmt, 0),
4652                                           &fma_state))
4653                 {
4654                   unlink_stmt_vdef (stmt);
4655                   if (gsi_remove (&gsi, true)
4656                       && gimple_purge_dead_eh_edges (bb))
4657                     *m_cfg_changed_p = true;
4658                   release_defs (stmt);
4659                   continue;
4660                 }
4661               break;
4662
4663             case CFN_COND_MUL:
4664               if (convert_mult_to_fma (stmt,
4665                                        gimple_call_arg (stmt, 1),
4666                                        gimple_call_arg (stmt, 2),
4667                                        &fma_state,
4668                                        gimple_call_arg (stmt, 0)))
4669
4670                 {
4671                   gsi_remove (&gsi, true);
4672                   release_defs (stmt);
4673                   continue;
4674                 }
4675               break;
4676
4677             case CFN_LAST:
4678               cancel_fma_deferring (&fma_state);
4679               break;
4680
4681             default:
4682               break;
4683             }
4684         }
4685       gsi_next (&gsi);
4686     }
4687   if (fma_state.m_deferring_p
4688       && fma_state.m_initial_phi)
4689     {
4690       gcc_checking_assert (fma_state.m_last_result);
4691       if (!last_fma_candidate_feeds_initial_phi (&fma_state,
4692                                                  &m_last_result_set))
4693         cancel_fma_deferring (&fma_state);
4694       else
4695         m_last_result_set.add (fma_state.m_last_result);
4696     }
4697 }
4698
4699
4700 unsigned int
4701 pass_optimize_widening_mul::execute (function *fun)
4702 {
4703   bool cfg_changed = false;
4704
4705   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
4706   calculate_dominance_info (CDI_DOMINATORS);
4707   renumber_gimple_stmt_uids (cfun);
4708
4709   math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4710
4711   statistics_counter_event (fun, "widening multiplications inserted",
4712                             widen_mul_stats.widen_mults_inserted);
4713   statistics_counter_event (fun, "widening maccs inserted",
4714                             widen_mul_stats.maccs_inserted);
4715   statistics_counter_event (fun, "fused multiply-adds inserted",
4716                             widen_mul_stats.fmas_inserted);
4717   statistics_counter_event (fun, "divmod calls inserted",
4718                             widen_mul_stats.divmod_calls_inserted);
4719
4720   return cfg_changed ? TODO_cleanup_cfg : 0;
4721 }
4722
4723 } // anon namespace
4724
4725 gimple_opt_pass *
4726 make_pass_optimize_widening_mul (gcc::context *ctxt)
4727 {
4728   return new pass_optimize_widening_mul (ctxt);
4729 }