gcc/tree-ssa-math-opts.c

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005-2020 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  21    operations.  These are common in sequences such as this one:
  22
  23         modulus = sqrt(x*x + y*y + z*z);
  24         x = x / modulus;
  25         y = y / modulus;
  26         z = z / modulus;
  27
  28    that can be optimized to
  29
  30         modulus = sqrt(x*x + y*y + z*z);
  31         rmodulus = 1.0 / modulus;
  32         x = x * rmodulus;
  33         y = y * rmodulus;
  34         z = z * rmodulus;
  35
  36    We do this for loop invariant divisors, and with this pass whenever
  37    we notice that a division has the same divisor multiple times.
  38
  39    Of course, like in PRE, we don't insert a division if a dominator
  40    already has one.  However, this cannot be done as an extension of
  41    PRE for several reasons.
  42
  43    First of all, with some experiments it was found out that the
  44    transformation is not always useful if there are only two divisions
  45    by the same divisor.  This is probably because modern processors
  46    can pipeline the divisions; on older, in-order processors it should
  47    still be effective to optimize two divisions by the same number.
  48    We make this a param, and it shall be called N in the remainder of
  49    this comment.
  50
  51    Second, if trapping math is active, we have less freedom on where
  52    to insert divisions: we can only do so in basic blocks that already
  53    contain one.  (If divisions don't trap, instead, we can insert
  54    divisions elsewhere, which will be in blocks that are common dominators
  55    of those that have the division).
  56
  57    We really don't want to compute the reciprocal unless a division will
  58    be found.  To do this, we won't insert the division in a basic block
  59    that has less than N divisions *post-dominating* it.
  60
  61    The algorithm constructs a subset of the dominator tree, holding the
  62    blocks containing the divisions and the common dominators to them,
  63    and walk it twice.  The first walk is in post-order, and it annotates
  64    each block with the number of divisions that post-dominate it: this
  65    gives information on where divisions can be inserted profitably.
  66    The second walk is in pre-order, and it inserts divisions as explained
  67    above, and replaces divisions by multiplications.
  68
  69    In the best case, the cost of the pass is O(n_statements).  In the
  70    worst-case, the cost is due to creating the dominator tree subset,
  71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  72    for n_statements / n_basic_blocks statements.  So, the amortized cost
  73    of creating the dominator tree subset is O(n_basic_blocks) and the
  74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  75
  76    More practically, the cost will be small because there are few
  77    divisions, and they tend to be in the same basic block, so insert_bb
  78    is called very few times.
  79
  80    If we did this using domwalk.c, an efficient implementation would have
  81    to work on all the variables in a single pass, because we could not
  82    work on just a subset of the dominator tree, as we do now, and the
  83    cost would also be something like O(n_statements * n_basic_blocks).
  84    The data structures would be more complex in order to work on all the
  85    variables in a single pass.  */
  86
  87 #include "config.h"
  88 #include "system.h"
  89 #include "coretypes.h"
  90 #include "backend.h"
  91 #include "target.h"
  92 #include "rtl.h"
  93 #include "tree.h"
  94 #include "gimple.h"
  95 #include "predict.h"
  96 #include "alloc-pool.h"
  97 #include "tree-pass.h"
  98 #include "ssa.h"
  99 #include "optabs-tree.h"
 100 #include "gimple-pretty-print.h"
 101 #include "alias.h"
 102 #include "fold-const.h"
 103 #include "gimple-fold.h"
 104 #include "gimple-iterator.h"
 105 #include "gimplify.h"
 106 #include "gimplify-me.h"
 107 #include "stor-layout.h"
 108 #include "tree-cfg.h"
 109 #include "tree-dfa.h"
 110 #include "tree-ssa.h"
 111 #include "builtins.h"
 112 #include "internal-fn.h"
 113 #include "case-cfn-macros.h"
 114 #include "optabs-libfuncs.h"
 115 #include "tree-eh.h"
 116 #include "targhooks.h"
 117 #include "domwalk.h"
 118
 119 /* This structure represents one basic block that either computes a
 120    division, or is a common dominator for basic block that compute a
 121    division.  */
 122 struct occurrence {
 123   /* The basic block represented by this structure.  */
 124   basic_block bb;
 125
 126   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 127      inserted in BB.  */
 128   tree recip_def;
 129
 130   /* If non-NULL, the SSA_NAME holding the definition for a squared
 131      reciprocal inserted in BB.  */
 132   tree square_recip_def;
 133
 134   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 135      was inserted in BB.  */
 136   gimple *recip_def_stmt;
 137
 138   /* Pointer to a list of "struct occurrence"s for blocks dominated
 139      by BB.  */
 140   struct occurrence *children;
 141
 142   /* Pointer to the next "struct occurrence"s in the list of blocks
 143      sharing a common dominator.  */
 144   struct occurrence *next;
 145
 146   /* The number of divisions that are in BB before compute_merit.  The
 147      number of divisions that are in BB or post-dominate it after
 148      compute_merit.  */
 149   int num_divisions;
 150
 151   /* True if the basic block has a division, false if it is a common
 152      dominator for basic blocks that do.  If it is false and trapping
 153      math is active, BB is not a candidate for inserting a reciprocal.  */
 154   bool bb_has_division;
 155 };
 156
 157 static struct
 158 {
 159   /* Number of 1.0/X ops inserted.  */
 160   int rdivs_inserted;
 161
 162   /* Number of 1.0/FUNC ops inserted.  */
 163   int rfuncs_inserted;
 164 } reciprocal_stats;
 165
 166 static struct
 167 {
 168   /* Number of cexpi calls inserted.  */
 169   int inserted;
 170 } sincos_stats;
 171
 172 static struct
 173 {
 174   /* Number of widening multiplication ops inserted.  */
 175   int widen_mults_inserted;
 176
 177   /* Number of integer multiply-and-accumulate ops inserted.  */
 178   int maccs_inserted;
 179
 180   /* Number of fp fused multiply-add ops inserted.  */
 181   int fmas_inserted;
 182
 183   /* Number of divmod calls inserted.  */
 184   int divmod_calls_inserted;
 185 } widen_mul_stats;
 186
 187 /* The instance of "struct occurrence" representing the highest
 188    interesting block in the dominator tree.  */
 189 static struct occurrence *occ_head;
 190
 191 /* Allocation pool for getting instances of "struct occurrence".  */
 192 static object_allocator<occurrence> *occ_pool;
 193
 194
 195
 196 /* Allocate and return a new struct occurrence for basic block BB, and
 197    whose children list is headed by CHILDREN.  */
 198 static struct occurrence *
 199 occ_new (basic_block bb, struct occurrence *children)
 200 {
 201   struct occurrence *occ;
 202
 203   bb->aux = occ = occ_pool->allocate ();
 204   memset (occ, 0, sizeof (struct occurrence));
 205
 206   occ->bb = bb;
 207   occ->children = children;
 208   return occ;
 209 }
 210
 211
 212 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 213    list of "struct occurrence"s, one per basic block, having IDOM as
 214    their common dominator.
 215
 216    We try to insert NEW_OCC as deep as possible in the tree, and we also
 217    insert any other block that is a common dominator for BB and one
 218    block already in the tree.  */
 219
 220 static void
 221 insert_bb (struct occurrence *new_occ, basic_block idom,
 222            struct occurrence **p_head)
 223 {
 224   struct occurrence *occ, **p_occ;
 225
 226   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 227     {
 228       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 229       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 230       if (dom == bb)
 231         {
 232           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 233              from its list.  */
 234           *p_occ = occ->next;
 235           occ->next = new_occ->children;
 236           new_occ->children = occ;
 237
 238           /* Try the next block (it may as well be dominated by BB).  */
 239         }
 240
 241       else if (dom == occ_bb)
 242         {
 243           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 244           insert_bb (new_occ, dom, &occ->children);
 245           return;
 246         }
 247
 248       else if (dom != idom)
 249         {
 250           gcc_assert (!dom->aux);
 251
 252           /* There is a dominator between IDOM and BB, add it and make
 253              two children out of NEW_OCC and OCC.  First, remove OCC from
 254              its list.  */
 255           *p_occ = occ->next;
 256           new_occ->next = occ;
 257           occ->next = NULL;
 258
 259           /* None of the previous blocks has DOM as a dominator: if we tail
 260              recursed, we would reexamine them uselessly. Just switch BB with
 261              DOM, and go on looking for blocks dominated by DOM.  */
 262           new_occ = occ_new (dom, new_occ);
 263         }
 264
 265       else
 266         {
 267           /* Nothing special, go on with the next element.  */
 268           p_occ = &occ->next;
 269         }
 270     }
 271
 272   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 273   new_occ->next = *p_head;
 274   *p_head = new_occ;
 275 }
 276
 277 /* Register that we found a division in BB.
 278    IMPORTANCE is a measure of how much weighting to give
 279    that division.  Use IMPORTANCE = 2 to register a single
 280    division.  If the division is going to be found multiple
 281    times use 1 (as it is with squares).  */
 282
 283 static inline void
 284 register_division_in (basic_block bb, int importance)
 285 {
 286   struct occurrence *occ;
 287
 288   occ = (struct occurrence *) bb->aux;
 289   if (!occ)
 290     {
 291       occ = occ_new (bb, NULL);
 292       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
 293     }
 294
 295   occ->bb_has_division = true;
 296   occ->num_divisions += importance;
 297 }
 298
 299
 300 /* Compute the number of divisions that postdominate each block in OCC and
 301    its children.  */
 302
 303 static void
 304 compute_merit (struct occurrence *occ)
 305 {
 306   struct occurrence *occ_child;
 307   basic_block dom = occ->bb;
 308
 309   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 310     {
 311       basic_block bb;
 312       if (occ_child->children)
 313         compute_merit (occ_child);
 314
 315       if (flag_exceptions)
 316         bb = single_noncomplex_succ (dom);
 317       else
 318         bb = dom;
 319
 320       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 321         occ->num_divisions += occ_child->num_divisions;
 322     }
 323 }
 324
 325
 326 /* Return whether USE_STMT is a floating-point division by DEF.  */
 327 static inline bool
 328 is_division_by (gimple *use_stmt, tree def)
 329 {
 330   return is_gimple_assign (use_stmt)
 331          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 332          && gimple_assign_rhs2 (use_stmt) == def
 333          /* Do not recognize x / x as valid division, as we are getting
 334             confused later by replacing all immediate uses x in such
 335             a stmt.  */
 336          && gimple_assign_rhs1 (use_stmt) != def
 337          && !stmt_can_throw_internal (cfun, use_stmt);
 338 }
 339
 340 /* Return TRUE if USE_STMT is a multiplication of DEF by A.  */
 341 static inline bool
 342 is_mult_by (gimple *use_stmt, tree def, tree a)
 343 {
 344   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 345       && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
 346     {
 347       tree op0 = gimple_assign_rhs1 (use_stmt);
 348       tree op1 = gimple_assign_rhs2 (use_stmt);
 349
 350       return (op0 == def && op1 == a)
 351               || (op0 == a && op1 == def);
 352     }
 353   return 0;
 354 }
 355
 356 /* Return whether USE_STMT is DEF * DEF.  */
 357 static inline bool
 358 is_square_of (gimple *use_stmt, tree def)
 359 {
 360   return is_mult_by (use_stmt, def, def);
 361 }
 362
 363 /* Return whether USE_STMT is a floating-point division by
 364    DEF * DEF.  */
 365 static inline bool
 366 is_division_by_square (gimple *use_stmt, tree def)
 367 {
 368   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 369       && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 370       && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
 371       && !stmt_can_throw_internal (cfun, use_stmt))
 372     {
 373       tree denominator = gimple_assign_rhs2 (use_stmt);
 374       if (TREE_CODE (denominator) == SSA_NAME)
 375         return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
 376     }
 377   return 0;
 378 }
 379
 380 /* Walk the subset of the dominator tree rooted at OCC, setting the
 381    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 382    the given basic block.  The field may be left NULL, of course,
 383    if it is not possible or profitable to do the optimization.
 384
 385    DEF_BSI is an iterator pointing at the statement defining DEF.
 386    If RECIP_DEF is set, a dominator already has a computation that can
 387    be used.
 388
 389    If should_insert_square_recip is set, then this also inserts
 390    the square of the reciprocal immediately after the definition
 391    of the reciprocal.  */
 392
 393 static void
 394 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 395                     tree def, tree recip_def, tree square_recip_def,
 396                     int should_insert_square_recip, int threshold)
 397 {
 398   tree type;
 399   gassign *new_stmt, *new_square_stmt;
 400   gimple_stmt_iterator gsi;
 401   struct occurrence *occ_child;
 402
 403   if (!recip_def
 404       && (occ->bb_has_division || !flag_trapping_math)
 405       /* Divide by two as all divisions are counted twice in
 406          the costing loop.  */
 407       && occ->num_divisions / 2 >= threshold)
 408     {
 409       /* Make a variable with the replacement and substitute it.  */
 410       type = TREE_TYPE (def);
 411       recip_def = create_tmp_reg (type, "reciptmp");
 412       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
 413                                       build_one_cst (type), def);
 414
 415       if (should_insert_square_recip)
 416         {
 417           square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
 418           new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
 419                                                  recip_def, recip_def);
 420         }
 421
 422       if (occ->bb_has_division)
 423         {
 424           /* Case 1: insert before an existing division.  */
 425           gsi = gsi_after_labels (occ->bb);
 426           while (!gsi_end_p (gsi)
 427                  && (!is_division_by (gsi_stmt (gsi), def))
 428                  && (!is_division_by_square (gsi_stmt (gsi), def)))
 429             gsi_next (&gsi);
 430
 431           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 432           if (should_insert_square_recip)
 433             gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
 434         }
 435       else if (def_gsi && occ->bb == def_gsi->bb)
 436         {
 437           /* Case 2: insert right after the definition.  Note that this will
 438              never happen if the definition statement can throw, because in
 439              that case the sole successor of the statement's basic block will
 440              dominate all the uses as well.  */
 441           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 442           if (should_insert_square_recip)
 443             gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
 444         }
 445       else
 446         {
 447           /* Case 3: insert in a basic block not containing defs/uses.  */
 448           gsi = gsi_after_labels (occ->bb);
 449           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 450           if (should_insert_square_recip)
 451             gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
 452         }
 453
 454       reciprocal_stats.rdivs_inserted++;
 455
 456       occ->recip_def_stmt = new_stmt;
 457     }
 458
 459   occ->recip_def = recip_def;
 460   occ->square_recip_def = square_recip_def;
 461   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 462     insert_reciprocals (def_gsi, occ_child, def, recip_def,
 463                         square_recip_def, should_insert_square_recip,
 464                         threshold);
 465 }
 466
 467 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
 468    Take as argument the use for (x * x).  */
 469 static inline void
 470 replace_reciprocal_squares (use_operand_p use_p)
 471 {
 472   gimple *use_stmt = USE_STMT (use_p);
 473   basic_block bb = gimple_bb (use_stmt);
 474   struct occurrence *occ = (struct occurrence *) bb->aux;
 475
 476   if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
 477       && occ->recip_def)
 478     {
 479       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 480       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 481       gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
 482       SET_USE (use_p, occ->square_recip_def);
 483       fold_stmt_inplace (&gsi);
 484       update_stmt (use_stmt);
 485     }
 486 }
 487
 488
 489 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 490    possible.  */
 491
 492 static inline void
 493 replace_reciprocal (use_operand_p use_p)
 494 {
 495   gimple *use_stmt = USE_STMT (use_p);
 496   basic_block bb = gimple_bb (use_stmt);
 497   struct occurrence *occ = (struct occurrence *) bb->aux;
 498
 499   if (optimize_bb_for_speed_p (bb)
 500       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 501     {
 502       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 503       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 504       SET_USE (use_p, occ->recip_def);
 505       fold_stmt_inplace (&gsi);
 506       update_stmt (use_stmt);
 507     }
 508 }
 509
 510
 511 /* Free OCC and return one more "struct occurrence" to be freed.  */
 512
 513 static struct occurrence *
 514 free_bb (struct occurrence *occ)
 515 {
 516   struct occurrence *child, *next;
 517
 518   /* First get the two pointers hanging off OCC.  */
 519   next = occ->next;
 520   child = occ->children;
 521   occ->bb->aux = NULL;
 522   occ_pool->remove (occ);
 523
 524   /* Now ensure that we don't recurse unless it is necessary.  */
 525   if (!child)
 526     return next;
 527   else
 528     {
 529       while (next)
 530         next = free_bb (next);
 531
 532       return child;
 533     }
 534 }
 535
 536 /* Transform sequences like
 537    t = sqrt (a)
 538    x = 1.0 / t;
 539    r1 = x * x;
 540    r2 = a * x;
 541    into:
 542    t = sqrt (a)
 543    r1 = 1.0 / a;
 544    r2 = t;
 545    x = r1 * r2;
 546    depending on the uses of x, r1, r2.  This removes one multiplication and
 547    allows the sqrt and division operations to execute in parallel.
 548    DEF_GSI is the gsi of the initial division by sqrt that defines
 549    DEF (x in the example above).  */
 550
 551 static void
 552 optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
 553 {
 554   gimple *use_stmt;
 555   imm_use_iterator use_iter;
 556   gimple *stmt = gsi_stmt (*def_gsi);
 557   tree x = def;
 558   tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
 559   tree div_rhs1 = gimple_assign_rhs1 (stmt);
 560
 561   if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
 562       || TREE_CODE (div_rhs1) != REAL_CST
 563       || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
 564     return;
 565
 566   gcall *sqrt_stmt
 567     = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
 568
 569   if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
 570     return;
 571
 572   switch (gimple_call_combined_fn (sqrt_stmt))
 573     {
 574     CASE_CFN_SQRT:
 575     CASE_CFN_SQRT_FN:
 576       break;
 577
 578     default:
 579       return;
 580     }
 581   tree a = gimple_call_arg (sqrt_stmt, 0);
 582
 583   /* We have 'a' and 'x'.  Now analyze the uses of 'x'.  */
 584
 585   /* Statements that use x in x * x.  */
 586   auto_vec<gimple *> sqr_stmts;
 587   /* Statements that use x in a * x.  */
 588   auto_vec<gimple *> mult_stmts;
 589   bool has_other_use = false;
 590   bool mult_on_main_path = false;
 591
 592   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
 593     {
 594       if (is_gimple_debug (use_stmt))
 595         continue;
 596       if (is_square_of (use_stmt, x))
 597         {
 598           sqr_stmts.safe_push (use_stmt);
 599           if (gimple_bb (use_stmt) == gimple_bb (stmt))
 600             mult_on_main_path = true;
 601         }
 602       else if (is_mult_by (use_stmt, x, a))
 603         {
 604           mult_stmts.safe_push (use_stmt);
 605           if (gimple_bb (use_stmt) == gimple_bb (stmt))
 606             mult_on_main_path = true;
 607         }
 608       else
 609         has_other_use = true;
 610     }
 611
 612   /* In the x * x and a * x cases we just rewire stmt operands or
 613      remove multiplications.  In the has_other_use case we introduce
 614      a multiplication so make sure we don't introduce a multiplication
 615      on a path where there was none.  */
 616   if (has_other_use && !mult_on_main_path)
 617     return;
 618
 619   if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
 620     return;
 621
 622   /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
 623      to be able to compose it from the sqr and mult cases.  */
 624   if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
 625     return;
 626
 627   if (dump_file)
 628     {
 629       fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
 630       print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
 631       print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 632       fprintf (dump_file, "\n");
 633     }
 634
 635   bool delete_div = !has_other_use;
 636   tree sqr_ssa_name = NULL_TREE;
 637   if (!sqr_stmts.is_empty ())
 638     {
 639       /* r1 = x * x.  Transform the original
 640          x = 1.0 / t
 641          into
 642          tmp1 = 1.0 / a
 643          r1 = tmp1.  */
 644
 645       sqr_ssa_name
 646         = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
 647
 648       if (dump_file)
 649         {
 650           fprintf (dump_file, "Replacing original division\n");
 651           print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 652           fprintf (dump_file, "with new division\n");
 653         }
 654       stmt
 655         = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
 656                                gimple_assign_rhs1 (stmt), a);
 657       gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
 658       gsi_remove (def_gsi, true);
 659       *def_gsi = gsi_for_stmt (stmt);
 660       fold_stmt_inplace (def_gsi);
 661       update_stmt (stmt);
 662
 663       if (dump_file)
 664         print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 665
 666       delete_div = false;
 667       gimple *sqr_stmt;
 668       unsigned int i;
 669       FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
 670         {
 671           gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
 672           gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
 673           update_stmt (sqr_stmt);
 674         }
 675     }
 676   if (!mult_stmts.is_empty ())
 677     {
 678       /* r2 = a * x.  Transform this into:
 679          r2 = t (The original sqrt (a)).  */
 680       unsigned int i;
 681       gimple *mult_stmt = NULL;
 682       FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
 683         {
 684           gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
 685
 686           if (dump_file)
 687             {
 688               fprintf (dump_file, "Replacing squaring multiplication\n");
 689               print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
 690               fprintf (dump_file, "with assignment\n");
 691             }
 692           gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
 693           fold_stmt_inplace (&gsi2);
 694           update_stmt (mult_stmt);
 695           if (dump_file)
 696             print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
 697       }
 698     }
 699
 700   if (has_other_use)
 701     {
 702       /* Using the two temporaries tmp1, tmp2 from above
 703          the original x is now:
 704          x = tmp1 * tmp2.  */
 705       gcc_assert (orig_sqrt_ssa_name);
 706       gcc_assert (sqr_ssa_name);
 707
 708       gimple *new_stmt
 709         = gimple_build_assign (x, MULT_EXPR,
 710                                orig_sqrt_ssa_name, sqr_ssa_name);
 711       gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 712       update_stmt (stmt);
 713     }
 714   else if (delete_div)
 715     {
 716       /* Remove the original division.  */
 717       gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
 718       gsi_remove (&gsi2, true);
 719       release_defs (stmt);
 720     }
 721   else
 722     release_ssa_name (x);
 723 }
 724
 725 /* Look for floating-point divisions among DEF's uses, and try to
 726    replace them by multiplications with the reciprocal.  Add
 727    as many statements computing the reciprocal as needed.
 728
 729    DEF must be a GIMPLE register of a floating-point type.  */
 730
 731 static void
 732 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 733 {
 734   use_operand_p use_p, square_use_p;
 735   imm_use_iterator use_iter, square_use_iter;
 736   tree square_def;
 737   struct occurrence *occ;
 738   int count = 0;
 739   int threshold;
 740   int square_recip_count = 0;
 741   int sqrt_recip_count = 0;
 742
 743   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
 744   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 745
 746   /* If DEF is a square (x * x), count the number of divisions by x.
 747      If there are more divisions by x than by (DEF * DEF), prefer to optimize
 748      the reciprocal of x instead of DEF.  This improves cases like:
 749        def = x * x
 750        t0 = a / def
 751        t1 = b / def
 752        t2 = c / x
 753      Reciprocal optimization of x results in 1 division rather than 2 or 3.  */
 754   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
 755
 756   if (is_gimple_assign (def_stmt)
 757       && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
 758       && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
 759       && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
 760     {
 761       tree op0 = gimple_assign_rhs1 (def_stmt);
 762
 763       FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
 764         {
 765           gimple *use_stmt = USE_STMT (use_p);
 766           if (is_division_by (use_stmt, op0))
 767             sqrt_recip_count++;
 768         }
 769     }
 770
 771   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 772     {
 773       gimple *use_stmt = USE_STMT (use_p);
 774       if (is_division_by (use_stmt, def))
 775         {
 776           register_division_in (gimple_bb (use_stmt), 2);
 777           count++;
 778         }
 779
 780       if (is_square_of (use_stmt, def))
 781         {
 782           square_def = gimple_assign_lhs (use_stmt);
 783           FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
 784             {
 785               gimple *square_use_stmt = USE_STMT (square_use_p);
 786               if (is_division_by (square_use_stmt, square_def))
 787                 {
 788                   /* This is executed twice for each division by a square.  */
 789                   register_division_in (gimple_bb (square_use_stmt), 1);
 790                   square_recip_count++;
 791                 }
 792             }
 793         }
 794     }
 795
 796   /* Square reciprocals were counted twice above.  */
 797   square_recip_count /= 2;
 798
 799   /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x).  */
 800   if (sqrt_recip_count > square_recip_count)
 801     goto out;
 802
 803   /* Do the expensive part only if we can hope to optimize something.  */
 804   if (count + square_recip_count >= threshold && count >= 1)
 805     {
 806       gimple *use_stmt;
 807       for (occ = occ_head; occ; occ = occ->next)
 808         {
 809           compute_merit (occ);
 810           insert_reciprocals (def_gsi, occ, def, NULL, NULL,
 811                               square_recip_count, threshold);
 812         }
 813
 814       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 815         {
 816           if (is_division_by (use_stmt, def))
 817             {
 818               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 819                 replace_reciprocal (use_p);
 820             }
 821           else if (square_recip_count > 0 && is_square_of (use_stmt, def))
 822             {
 823               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 824                 {
 825                   /* Find all uses of the square that are divisions and
 826                    * replace them by multiplications with the inverse.  */
 827                   imm_use_iterator square_iterator;
 828                   gimple *powmult_use_stmt = USE_STMT (use_p);
 829                   tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
 830
 831                   FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
 832                                          square_iterator, powmult_def_name)
 833                     FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
 834                       {
 835                         gimple *powmult_use_stmt = USE_STMT (square_use_p);
 836                         if (is_division_by (powmult_use_stmt, powmult_def_name))
 837                           replace_reciprocal_squares (square_use_p);
 838                       }
 839                 }
 840             }
 841         }
 842     }
 843
 844 out:
 845   for (occ = occ_head; occ; )
 846     occ = free_bb (occ);
 847
 848   occ_head = NULL;
 849 }
 850
 851 /* Return an internal function that implements the reciprocal of CALL,
 852    or IFN_LAST if there is no such function that the target supports.  */
 853
 854 internal_fn
 855 internal_fn_reciprocal (gcall *call)
 856 {
 857   internal_fn ifn;
 858
 859   switch (gimple_call_combined_fn (call))
 860     {
 861     CASE_CFN_SQRT:
 862     CASE_CFN_SQRT_FN:
 863       ifn = IFN_RSQRT;
 864       break;
 865
 866     default:
 867       return IFN_LAST;
 868     }
 869
 870   tree_pair types = direct_internal_fn_types (ifn, call);
 871   if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
 872     return IFN_LAST;
 873
 874   return ifn;
 875 }
 876
 877 /* Go through all the floating-point SSA_NAMEs, and call
 878    execute_cse_reciprocals_1 on each of them.  */
 879 namespace {
 880
 881 const pass_data pass_data_cse_reciprocals =
 882 {
 883   GIMPLE_PASS, /* type */
 884   "recip", /* name */
 885   OPTGROUP_NONE, /* optinfo_flags */
 886   TV_TREE_RECIP, /* tv_id */
 887   PROP_ssa, /* properties_required */
 888   0, /* properties_provided */
 889   0, /* properties_destroyed */
 890   0, /* todo_flags_start */
 891   TODO_update_ssa, /* todo_flags_finish */
 892 };
 893
 894 class pass_cse_reciprocals : public gimple_opt_pass
 895 {
 896 public:
 897   pass_cse_reciprocals (gcc::context *ctxt)
 898     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
 899   {}
 900
 901   /* opt_pass methods: */
 902   virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
 903   virtual unsigned int execute (function *);
 904
 905 }; // class pass_cse_reciprocals
 906
 907 unsigned int
 908 pass_cse_reciprocals::execute (function *fun)
 909 {
 910   basic_block bb;
 911   tree arg;
 912
 913   occ_pool = new object_allocator<occurrence> ("dominators for recip");
 914
 915   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 916   calculate_dominance_info (CDI_DOMINATORS);
 917   calculate_dominance_info (CDI_POST_DOMINATORS);
 918
 919   if (flag_checking)
 920     FOR_EACH_BB_FN (bb, fun)
 921       gcc_assert (!bb->aux);
 922
 923   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
 924     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 925         && is_gimple_reg (arg))
 926       {
 927         tree name = ssa_default_def (fun, arg);
 928         if (name)
 929           execute_cse_reciprocals_1 (NULL, name);
 930       }
 931
 932   FOR_EACH_BB_FN (bb, fun)
 933     {
 934       tree def;
 935
 936       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 937            gsi_next (&gsi))
 938         {
 939           gphi *phi = gsi.phi ();
 940           def = PHI_RESULT (phi);
 941           if (! virtual_operand_p (def)
 942               && FLOAT_TYPE_P (TREE_TYPE (def)))
 943             execute_cse_reciprocals_1 (NULL, def);
 944         }
 945
 946       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 947            gsi_next (&gsi))
 948         {
 949           gimple *stmt = gsi_stmt (gsi);
 950
 951           if (gimple_has_lhs (stmt)
 952               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 953               && FLOAT_TYPE_P (TREE_TYPE (def))
 954               && TREE_CODE (def) == SSA_NAME)
 955             {
 956               execute_cse_reciprocals_1 (&gsi, def);
 957               stmt = gsi_stmt (gsi);
 958               if (flag_unsafe_math_optimizations
 959                   && is_gimple_assign (stmt)
 960                   && gimple_assign_lhs (stmt) == def
 961                   && !stmt_can_throw_internal (cfun, stmt)
 962                   && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 963                 optimize_recip_sqrt (&gsi, def);
 964             }
 965         }
 966
 967       if (optimize_bb_for_size_p (bb))
 968         continue;
 969
 970       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 971       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 972            gsi_next (&gsi))
 973         {
 974           gimple *stmt = gsi_stmt (gsi);
 975
 976           if (is_gimple_assign (stmt)
 977               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 978             {
 979               tree arg1 = gimple_assign_rhs2 (stmt);
 980               gimple *stmt1;
 981
 982               if (TREE_CODE (arg1) != SSA_NAME)
 983                 continue;
 984
 985               stmt1 = SSA_NAME_DEF_STMT (arg1);
 986
 987               if (is_gimple_call (stmt1)
 988                   && gimple_call_lhs (stmt1))
 989                 {
 990                   bool fail;
 991                   imm_use_iterator ui;
 992                   use_operand_p use_p;
 993                   tree fndecl = NULL_TREE;
 994
 995                   gcall *call = as_a <gcall *> (stmt1);
 996                   internal_fn ifn = internal_fn_reciprocal (call);
 997                   if (ifn == IFN_LAST)
 998                     {
 999                       fndecl = gimple_call_fndecl (call);
1000                       if (!fndecl
1001                           || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1002                         continue;
1003                       fndecl = targetm.builtin_reciprocal (fndecl);
1004                       if (!fndecl)
1005                         continue;
1006                     }
1007
1008                   /* Check that all uses of the SSA name are divisions,
1009                      otherwise replacing the defining statement will do
1010                      the wrong thing.  */
1011                   fail = false;
1012                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1013                     {
1014                       gimple *stmt2 = USE_STMT (use_p);
1015                       if (is_gimple_debug (stmt2))
1016                         continue;
1017                       if (!is_gimple_assign (stmt2)
1018                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1019                           || gimple_assign_rhs1 (stmt2) == arg1
1020                           || gimple_assign_rhs2 (stmt2) != arg1)
1021                         {
1022                           fail = true;
1023                           break;
1024                         }
1025                     }
1026                   if (fail)
1027                     continue;
1028
1029                   gimple_replace_ssa_lhs (call, arg1);
1030                   if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1031                     {
1032                       auto_vec<tree, 4> args;
1033                       for (unsigned int i = 0;
1034                            i < gimple_call_num_args (call); i++)
1035                         args.safe_push (gimple_call_arg (call, i));
1036                       gcall *stmt2;
1037                       if (ifn == IFN_LAST)
1038                         stmt2 = gimple_build_call_vec (fndecl, args);
1039                       else
1040                         stmt2 = gimple_build_call_internal_vec (ifn, args);
1041                       gimple_call_set_lhs (stmt2, arg1);
1042                       gimple_move_vops (stmt2, call);
1043                       gimple_call_set_nothrow (stmt2,
1044                                                gimple_call_nothrow_p (call));
1045                       gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1046                       gsi_replace (&gsi2, stmt2, true);
1047                     }
1048                   else
1049                     {
1050                       if (ifn == IFN_LAST)
1051                         gimple_call_set_fndecl (call, fndecl);
1052                       else
1053                         gimple_call_set_internal_fn (call, ifn);
1054                       update_stmt (call);
1055                     }
1056                   reciprocal_stats.rfuncs_inserted++;
1057
1058                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1059                     {
1060                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1061                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1062                       fold_stmt_inplace (&gsi);
1063                       update_stmt (stmt);
1064                     }
1065                 }
1066             }
1067         }
1068     }
1069
1070   statistics_counter_event (fun, "reciprocal divs inserted",
1071                             reciprocal_stats.rdivs_inserted);
1072   statistics_counter_event (fun, "reciprocal functions inserted",
1073                             reciprocal_stats.rfuncs_inserted);
1074
1075   free_dominance_info (CDI_DOMINATORS);
1076   free_dominance_info (CDI_POST_DOMINATORS);
1077   delete occ_pool;
1078   return 0;
1079 }
1080
1081 } // anon namespace
1082
1083 gimple_opt_pass *
1084 make_pass_cse_reciprocals (gcc::context *ctxt)
1085 {
1086   return new pass_cse_reciprocals (ctxt);
1087 }
1088
1089 /* Records an occurrence at statement USE_STMT in the vector of trees
1090    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1091    is not yet initialized.  Returns true if the occurrence was pushed on
1092    the vector.  Adjusts *TOP_BB to be the basic block dominating all
1093    statements in the vector.  */
1094
1095 static bool
1096 maybe_record_sincos (vec<gimple *> *stmts,
1097                      basic_block *top_bb, gimple *use_stmt)
1098 {
1099   basic_block use_bb = gimple_bb (use_stmt);
1100   if (*top_bb
1101       && (*top_bb == use_bb
1102           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1103     stmts->safe_push (use_stmt);
1104   else if (!*top_bb
1105            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1106     {
1107       stmts->safe_push (use_stmt);
1108       *top_bb = use_bb;
1109     }
1110   else
1111     return false;
1112
1113   return true;
1114 }
1115
1116 /* Look for sin, cos and cexpi calls with the same argument NAME and
1117    create a single call to cexpi CSEing the result in this case.
1118    We first walk over all immediate uses of the argument collecting
1119    statements that we can CSE in a vector and in a second pass replace
1120    the statement rhs with a REALPART or IMAGPART expression on the
1121    result of the cexpi call we insert before the use statement that
1122    dominates all other candidates.  */
1123
1124 static bool
1125 execute_cse_sincos_1 (tree name)
1126 {
1127   gimple_stmt_iterator gsi;
1128   imm_use_iterator use_iter;
1129   tree fndecl, res, type;
1130   gimple *def_stmt, *use_stmt, *stmt;
1131   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1132   auto_vec<gimple *> stmts;
1133   basic_block top_bb = NULL;
1134   int i;
1135   bool cfg_changed = false;
1136
1137   type = TREE_TYPE (name);
1138   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1139     {
1140       if (gimple_code (use_stmt) != GIMPLE_CALL
1141           || !gimple_call_lhs (use_stmt))
1142         continue;
1143
1144       switch (gimple_call_combined_fn (use_stmt))
1145         {
1146         CASE_CFN_COS:
1147           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1148           break;
1149
1150         CASE_CFN_SIN:
1151           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1152           break;
1153
1154         CASE_CFN_CEXPI:
1155           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1156           break;
1157
1158         default:;
1159         }
1160     }
1161
1162   if (seen_cos + seen_sin + seen_cexpi <= 1)
1163     return false;
1164
1165   /* Simply insert cexpi at the beginning of top_bb but not earlier than
1166      the name def statement.  */
1167   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1168   if (!fndecl)
1169     return false;
1170   stmt = gimple_build_call (fndecl, 1, name);
1171   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1172   gimple_call_set_lhs (stmt, res);
1173
1174   def_stmt = SSA_NAME_DEF_STMT (name);
1175   if (!SSA_NAME_IS_DEFAULT_DEF (name)
1176       && gimple_code (def_stmt) != GIMPLE_PHI
1177       && gimple_bb (def_stmt) == top_bb)
1178     {
1179       gsi = gsi_for_stmt (def_stmt);
1180       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1181     }
1182   else
1183     {
1184       gsi = gsi_after_labels (top_bb);
1185       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1186     }
1187   sincos_stats.inserted++;
1188
1189   /* And adjust the recorded old call sites.  */
1190   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1191     {
1192       tree rhs = NULL;
1193
1194       switch (gimple_call_combined_fn (use_stmt))
1195         {
1196         CASE_CFN_COS:
1197           rhs = fold_build1 (REALPART_EXPR, type, res);
1198           break;
1199
1200         CASE_CFN_SIN:
1201           rhs = fold_build1 (IMAGPART_EXPR, type, res);
1202           break;
1203
1204         CASE_CFN_CEXPI:
1205           rhs = res;
1206           break;
1207
1208         default:;
1209           gcc_unreachable ();
1210         }
1211
1212         /* Replace call with a copy.  */
1213         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1214
1215         gsi = gsi_for_stmt (use_stmt);
1216         gsi_replace (&gsi, stmt, true);
1217         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1218           cfg_changed = true;
1219     }
1220
1221   return cfg_changed;
1222 }
1223
1224 /* To evaluate powi(x,n), the floating point value x raised to the
1225    constant integer exponent n, we use a hybrid algorithm that
1226    combines the "window method" with look-up tables.  For an
1227    introduction to exponentiation algorithms and "addition chains",
1228    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1229    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1230    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1231    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
1232
1233 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1234    multiplications to inline before calling the system library's pow
1235    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1236    so this default never requires calling pow, powf or powl.  */
1237
1238 #ifndef POWI_MAX_MULTS
1239 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
1240 #endif
1241
1242 /* The size of the "optimal power tree" lookup table.  All
1243    exponents less than this value are simply looked up in the
1244    powi_table below.  This threshold is also used to size the
1245    cache of pseudo registers that hold intermediate results.  */
1246 #define POWI_TABLE_SIZE 256
1247
1248 /* The size, in bits of the window, used in the "window method"
1249    exponentiation algorithm.  This is equivalent to a radix of
1250    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
1251 #define POWI_WINDOW_SIZE 3
1252
1253 /* The following table is an efficient representation of an
1254    "optimal power tree".  For each value, i, the corresponding
1255    value, j, in the table states than an optimal evaluation
1256    sequence for calculating pow(x,i) can be found by evaluating
1257    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
1258    100 integers is given in Knuth's "Seminumerical algorithms".  */
1259
1260 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1261   {
1262       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
1263       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
1264       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
1265      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
1266      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
1267      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
1268      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
1269      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
1270      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
1271      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
1272      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
1273      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
1274      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
1275      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
1276      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
1277      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
1278      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
1279      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
1280      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
1281      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
1282      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
1283      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
1284      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
1285      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
1286      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
1287     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
1288     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
1289     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
1290     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
1291     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
1292     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
1293     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
1294   };
1295
1296
1297 /* Return the number of multiplications required to calculate
1298    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
1299    subroutine of powi_cost.  CACHE is an array indicating
1300    which exponents have already been calculated.  */
1301
1302 static int
1303 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1304 {
1305   /* If we've already calculated this exponent, then this evaluation
1306      doesn't require any additional multiplications.  */
1307   if (cache[n])
1308     return 0;
1309
1310   cache[n] = true;
1311   return powi_lookup_cost (n - powi_table[n], cache)
1312          + powi_lookup_cost (powi_table[n], cache) + 1;
1313 }
1314
1315 /* Return the number of multiplications required to calculate
1316    powi(x,n) for an arbitrary x, given the exponent N.  This
1317    function needs to be kept in sync with powi_as_mults below.  */
1318
1319 static int
1320 powi_cost (HOST_WIDE_INT n)
1321 {
1322   bool cache[POWI_TABLE_SIZE];
1323   unsigned HOST_WIDE_INT digit;
1324   unsigned HOST_WIDE_INT val;
1325   int result;
1326
1327   if (n == 0)
1328     return 0;
1329
1330   /* Ignore the reciprocal when calculating the cost.  */
1331   val = (n < 0) ? -n : n;
1332
1333   /* Initialize the exponent cache.  */
1334   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1335   cache[1] = true;
1336
1337   result = 0;
1338
1339   while (val >= POWI_TABLE_SIZE)
1340     {
1341       if (val & 1)
1342         {
1343           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1344           result += powi_lookup_cost (digit, cache)
1345                     + POWI_WINDOW_SIZE + 1;
1346           val >>= POWI_WINDOW_SIZE;
1347         }
1348       else
1349         {
1350           val >>= 1;
1351           result++;
1352         }
1353     }
1354
1355   return result + powi_lookup_cost (val, cache);
1356 }
1357
1358 /* Recursive subroutine of powi_as_mults.  This function takes the
1359    array, CACHE, of already calculated exponents and an exponent N and
1360    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
1361
1362 static tree
1363 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1364                  HOST_WIDE_INT n, tree *cache)
1365 {
1366   tree op0, op1, ssa_target;
1367   unsigned HOST_WIDE_INT digit;
1368   gassign *mult_stmt;
1369
1370   if (n < POWI_TABLE_SIZE && cache[n])
1371     return cache[n];
1372
1373   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1374
1375   if (n < POWI_TABLE_SIZE)
1376     {
1377       cache[n] = ssa_target;
1378       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1379       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1380     }
1381   else if (n & 1)
1382     {
1383       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1384       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1385       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1386     }
1387   else
1388     {
1389       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1390       op1 = op0;
1391     }
1392
1393   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1394   gimple_set_location (mult_stmt, loc);
1395   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1396
1397   return ssa_target;
1398 }
1399
1400 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1401    This function needs to be kept in sync with powi_cost above.  */
1402
1403 static tree
1404 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1405                tree arg0, HOST_WIDE_INT n)
1406 {
1407   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1408   gassign *div_stmt;
1409   tree target;
1410
1411   if (n == 0)
1412     return build_real (type, dconst1);
1413
1414   memset (cache, 0,  sizeof (cache));
1415   cache[1] = arg0;
1416
1417   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1418   if (n >= 0)
1419     return result;
1420
1421   /* If the original exponent was negative, reciprocate the result.  */
1422   target = make_temp_ssa_name (type, NULL, "powmult");
1423   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1424                                   build_real (type, dconst1), result);
1425   gimple_set_location (div_stmt, loc);
1426   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1427
1428   return target;
1429 }
1430
1431 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1432    location info LOC.  If the arguments are appropriate, create an
1433    equivalent sequence of statements prior to GSI using an optimal
1434    number of multiplications, and return an expession holding the
1435    result.  */
1436
1437 static tree
1438 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1439                             tree arg0, HOST_WIDE_INT n)
1440 {
1441   /* Avoid largest negative number.  */
1442   if (n != -n
1443       && ((n >= -1 && n <= 2)
1444           || (optimize_function_for_speed_p (cfun)
1445               && powi_cost (n) <= POWI_MAX_MULTS)))
1446     return powi_as_mults (gsi, loc, arg0, n);
1447
1448   return NULL_TREE;
1449 }
1450
1451 /* Build a gimple call statement that calls FN with argument ARG.
1452    Set the lhs of the call statement to a fresh SSA name.  Insert the
1453    statement prior to GSI's current position, and return the fresh
1454    SSA name.  */
1455
1456 static tree
1457 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1458                        tree fn, tree arg)
1459 {
1460   gcall *call_stmt;
1461   tree ssa_target;
1462
1463   call_stmt = gimple_build_call (fn, 1, arg);
1464   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1465   gimple_set_lhs (call_stmt, ssa_target);
1466   gimple_set_location (call_stmt, loc);
1467   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1468
1469   return ssa_target;
1470 }
1471
1472 /* Build a gimple binary operation with the given CODE and arguments
1473    ARG0, ARG1, assigning the result to a new SSA name for variable
1474    TARGET.  Insert the statement prior to GSI's current position, and
1475    return the fresh SSA name.*/
1476
1477 static tree
1478 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1479                         const char *name, enum tree_code code,
1480                         tree arg0, tree arg1)
1481 {
1482   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1483   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1484   gimple_set_location (stmt, loc);
1485   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1486   return result;
1487 }
1488
1489 /* Build a gimple reference operation with the given CODE and argument
1490    ARG, assigning the result to a new SSA name of TYPE with NAME.
1491    Insert the statement prior to GSI's current position, and return
1492    the fresh SSA name.  */
1493
1494 static inline tree
1495 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1496                       const char *name, enum tree_code code, tree arg0)
1497 {
1498   tree result = make_temp_ssa_name (type, NULL, name);
1499   gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1500   gimple_set_location (stmt, loc);
1501   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1502   return result;
1503 }
1504
1505 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1506    prior to GSI's current position, and return the fresh SSA name.  */
1507
1508 static tree
1509 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1510                        tree type, tree val)
1511 {
1512   tree result = make_ssa_name (type);
1513   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1514   gimple_set_location (stmt, loc);
1515   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1516   return result;
1517 }
1518
1519 struct pow_synth_sqrt_info
1520 {
1521   bool *factors;
1522   unsigned int deepest;
1523   unsigned int num_mults;
1524 };
1525
1526 /* Return true iff the real value C can be represented as a
1527    sum of powers of 0.5 up to N.  That is:
1528    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1529    Record in INFO the various parameters of the synthesis algorithm such
1530    as the factors a[i], the maximum 0.5 power and the number of
1531    multiplications that will be required.  */
1532
1533 bool
1534 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1535                                  struct pow_synth_sqrt_info *info)
1536 {
1537   REAL_VALUE_TYPE factor = dconsthalf;
1538   REAL_VALUE_TYPE remainder = c;
1539
1540   info->deepest = 0;
1541   info->num_mults = 0;
1542   memset (info->factors, 0, n * sizeof (bool));
1543
1544   for (unsigned i = 0; i < n; i++)
1545     {
1546       REAL_VALUE_TYPE res;
1547
1548       /* If something inexact happened bail out now.  */
1549       if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1550         return false;
1551
1552       /* We have hit zero.  The number is representable as a sum
1553          of powers of 0.5.  */
1554       if (real_equal (&res, &dconst0))
1555         {
1556           info->factors[i] = true;
1557           info->deepest = i + 1;
1558           return true;
1559         }
1560       else if (!REAL_VALUE_NEGATIVE (res))
1561         {
1562           remainder = res;
1563           info->factors[i] = true;
1564           info->num_mults++;
1565         }
1566       else
1567         info->factors[i] = false;
1568
1569       real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1570     }
1571   return false;
1572 }
1573
1574 /* Return the tree corresponding to FN being applied
1575    to ARG N times at GSI and LOC.
1576    Look up previous results from CACHE if need be.
1577    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1578
1579 static tree
1580 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1581               tree fn, location_t loc, tree *cache)
1582 {
1583   tree res = cache[n];
1584   if (!res)
1585     {
1586       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1587       res = build_and_insert_call (gsi, loc, fn, prev);
1588       cache[n] = res;
1589     }
1590
1591   return res;
1592 }
1593
1594 /* Print to STREAM the repeated application of function FNAME to ARG
1595    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1596    "foo (foo (x))".  */
1597
1598 static void
1599 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1600                  unsigned int n)
1601 {
1602   if (n == 0)
1603     fprintf (stream, "%s", arg);
1604   else
1605     {
1606       fprintf (stream, "%s (", fname);
1607       print_nested_fn (stream, fname, arg, n - 1);
1608       fprintf (stream, ")");
1609     }
1610 }
1611
1612 /* Print to STREAM the fractional sequence of sqrt chains
1613    applied to ARG, described by INFO.  Used for the dump file.  */
1614
1615 static void
1616 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1617                                 struct pow_synth_sqrt_info *info)
1618 {
1619   for (unsigned int i = 0; i < info->deepest; i++)
1620     {
1621       bool is_set = info->factors[i];
1622       if (is_set)
1623         {
1624           print_nested_fn (stream, "sqrt", arg, i + 1);
1625           if (i != info->deepest - 1)
1626             fprintf (stream, " * ");
1627         }
1628     }
1629 }
1630
1631 /* Print to STREAM a representation of raising ARG to an integer
1632    power N.  Used for the dump file.  */
1633
1634 static void
1635 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1636 {
1637   if (n > 1)
1638     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1639   else if (n == 1)
1640     fprintf (stream, "%s", arg);
1641 }
1642
1643 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1644    square roots.  Place at GSI and LOC.  Limit the maximum depth
1645    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1646    result of the expanded sequence or NULL_TREE if the expansion failed.
1647
1648    This routine assumes that ARG1 is a real number with a fractional part
1649    (the integer exponent case will have been handled earlier in
1650    gimple_expand_builtin_pow).
1651
1652    For ARG1 > 0.0:
1653    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1654      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1655                     FRAC_PART == ARG1 - WHOLE_PART:
1656      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1657      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1658      if it can be expressed as such, that is if FRAC_PART satisfies:
1659      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1660      where integer a[i] is either 0 or 1.
1661
1662      Example:
1663      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1664        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1665
1666    For ARG1 < 0.0 there are two approaches:
1667    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1668          is calculated as above.
1669
1670      Example:
1671      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1672        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1673
1674    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1675            FRAC_PART  := ARG1 - WHOLE_PART
1676      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1677      Example:
1678      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1679        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1680
1681    For ARG1 < 0.0 we choose between (A) and (B) depending on
1682    how many multiplications we'd have to do.
1683    So, for the example in (B): POW (x, -5.875), if we were to
1684    follow algorithm (A) we would produce:
1685    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1686    which contains more multiplications than approach (B).
1687
1688    Hopefully, this approach will eliminate potentially expensive POW library
1689    calls when unsafe floating point math is enabled and allow the compiler to
1690    further optimise the multiplies, square roots and divides produced by this
1691    function.  */
1692
1693 static tree
1694 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1695                      tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1696 {
1697   tree type = TREE_TYPE (arg0);
1698   machine_mode mode = TYPE_MODE (type);
1699   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1700   bool one_over = true;
1701
1702   if (!sqrtfn)
1703     return NULL_TREE;
1704
1705   if (TREE_CODE (arg1) != REAL_CST)
1706     return NULL_TREE;
1707
1708   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1709
1710   gcc_assert (max_depth > 0);
1711   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1712
1713   struct pow_synth_sqrt_info synth_info;
1714   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1715   synth_info.deepest = 0;
1716   synth_info.num_mults = 0;
1717
1718   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1719   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1720
1721   /* The whole and fractional parts of exp.  */
1722   REAL_VALUE_TYPE whole_part;
1723   REAL_VALUE_TYPE frac_part;
1724
1725   real_floor (&whole_part, mode, &exp);
1726   real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1727
1728
1729   REAL_VALUE_TYPE ceil_whole = dconst0;
1730   REAL_VALUE_TYPE ceil_fract = dconst0;
1731
1732   if (neg_exp)
1733     {
1734       real_ceil (&ceil_whole, mode, &exp);
1735       real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1736     }
1737
1738   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1739     return NULL_TREE;
1740
1741   /* Check whether it's more profitable to not use 1.0 / ...  */
1742   if (neg_exp)
1743     {
1744       struct pow_synth_sqrt_info alt_synth_info;
1745       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1746       alt_synth_info.deepest = 0;
1747       alt_synth_info.num_mults = 0;
1748
1749       if (representable_as_half_series_p (ceil_fract, max_depth,
1750                                            &alt_synth_info)
1751           && alt_synth_info.deepest <= synth_info.deepest
1752           && alt_synth_info.num_mults < synth_info.num_mults)
1753         {
1754           whole_part = ceil_whole;
1755           frac_part = ceil_fract;
1756           synth_info.deepest = alt_synth_info.deepest;
1757           synth_info.num_mults = alt_synth_info.num_mults;
1758           memcpy (synth_info.factors, alt_synth_info.factors,
1759                   (max_depth + 1) * sizeof (bool));
1760           one_over = false;
1761         }
1762     }
1763
1764   HOST_WIDE_INT n = real_to_integer (&whole_part);
1765   REAL_VALUE_TYPE cint;
1766   real_from_integer (&cint, VOIDmode, n, SIGNED);
1767
1768   if (!real_identical (&whole_part, &cint))
1769     return NULL_TREE;
1770
1771   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1772     return NULL_TREE;
1773
1774   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1775
1776   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1777
1778   /* Calculate the integer part of the exponent.  */
1779   if (n > 1)
1780     {
1781       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1782       if (!integer_res)
1783         return NULL_TREE;
1784     }
1785
1786   if (dump_file)
1787     {
1788       char string[64];
1789
1790       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1791       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1792
1793       if (neg_exp)
1794         {
1795           if (one_over)
1796             {
1797               fprintf (dump_file, "1.0 / (");
1798               dump_integer_part (dump_file, "x", n);
1799               if (n > 0)
1800                 fprintf (dump_file, " * ");
1801               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1802               fprintf (dump_file, ")");
1803             }
1804           else
1805             {
1806               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1807               fprintf (dump_file, " / (");
1808               dump_integer_part (dump_file, "x", n);
1809               fprintf (dump_file, ")");
1810             }
1811         }
1812       else
1813         {
1814           dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1815           if (n > 0)
1816             fprintf (dump_file, " * ");
1817           dump_integer_part (dump_file, "x", n);
1818         }
1819
1820       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1821     }
1822
1823
1824   tree fract_res = NULL_TREE;
1825   cache[0] = arg0;
1826
1827   /* Calculate the fractional part of the exponent.  */
1828   for (unsigned i = 0; i < synth_info.deepest; i++)
1829     {
1830       if (synth_info.factors[i])
1831         {
1832           tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1833
1834           if (!fract_res)
1835               fract_res = sqrt_chain;
1836
1837           else
1838             fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1839                                            fract_res, sqrt_chain);
1840         }
1841     }
1842
1843   tree res = NULL_TREE;
1844
1845   if (neg_exp)
1846     {
1847       if (one_over)
1848         {
1849           if (n > 0)
1850             res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1851                                            fract_res, integer_res);
1852           else
1853             res = fract_res;
1854
1855           res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1856                                           build_real (type, dconst1), res);
1857         }
1858       else
1859         {
1860           res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1861                                          fract_res, integer_res);
1862         }
1863     }
1864   else
1865     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1866                                    fract_res, integer_res);
1867   return res;
1868 }
1869
1870 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1871    with location info LOC.  If possible, create an equivalent and
1872    less expensive sequence of statements prior to GSI, and return an
1873    expession holding the result.  */
1874
1875 static tree
1876 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1877                            tree arg0, tree arg1)
1878 {
1879   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
1880   REAL_VALUE_TYPE c2, dconst3;
1881   HOST_WIDE_INT n;
1882   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
1883   machine_mode mode;
1884   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
1885   bool hw_sqrt_exists, c_is_int, c2_is_int;
1886
1887   dconst1_4 = dconst1;
1888   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1889
1890   /* If the exponent isn't a constant, there's nothing of interest
1891      to be done.  */
1892   if (TREE_CODE (arg1) != REAL_CST)
1893     return NULL_TREE;
1894
1895   /* Don't perform the operation if flag_signaling_nans is on
1896      and the operand is a signaling NaN.  */
1897   if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
1898       && ((TREE_CODE (arg0) == REAL_CST
1899            && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
1900           || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
1901     return NULL_TREE;
1902
1903   /* If the exponent is equivalent to an integer, expand to an optimal
1904      multiplication sequence when profitable.  */
1905   c = TREE_REAL_CST (arg1);
1906   n = real_to_integer (&c);
1907   real_from_integer (&cint, VOIDmode, n, SIGNED);
1908   c_is_int = real_identical (&c, &cint);
1909
1910   if (c_is_int
1911       && ((n >= -1 && n <= 2)
1912           || (flag_unsafe_math_optimizations
1913               && speed_p
1914               && powi_cost (n) <= POWI_MAX_MULTS)))
1915     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1916
1917   /* Attempt various optimizations using sqrt and cbrt.  */
1918   type = TREE_TYPE (arg0);
1919   mode = TYPE_MODE (type);
1920   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1921
1922   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
1923      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
1924      sqrt(-0) = -0.  */
1925   if (sqrtfn
1926       && real_equal (&c, &dconsthalf)
1927       && !HONOR_SIGNED_ZEROS (mode))
1928     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
1929
1930   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1931
1932   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
1933      optimizations since 1./3. is not exactly representable.  If x
1934      is negative and finite, the correct value of pow(x,1./3.) is
1935      a NaN with the "invalid" exception raised, because the value
1936      of 1./3. actually has an even denominator.  The correct value
1937      of cbrt(x) is a negative real value.  */
1938   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1939   dconst1_3 = real_value_truncate (mode, dconst_third ());
1940
1941   if (flag_unsafe_math_optimizations
1942       && cbrtfn
1943       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1944       && real_equal (&c, &dconst1_3))
1945     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
1946
1947   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
1948      if we don't have a hardware sqrt insn.  */
1949   dconst1_6 = dconst1_3;
1950   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1951
1952   if (flag_unsafe_math_optimizations
1953       && sqrtfn
1954       && cbrtfn
1955       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1956       && speed_p
1957       && hw_sqrt_exists
1958       && real_equal (&c, &dconst1_6))
1959     {
1960       /* sqrt(x)  */
1961       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1962
1963       /* cbrt(sqrt(x))  */
1964       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
1965     }
1966
1967
1968   /* Attempt to expand the POW as a product of square root chains.
1969      Expand the 0.25 case even when otpimising for size.  */
1970   if (flag_unsafe_math_optimizations
1971       && sqrtfn
1972       && hw_sqrt_exists
1973       && (speed_p || real_equal (&c, &dconst1_4))
1974       && !HONOR_SIGNED_ZEROS (mode))
1975     {
1976       unsigned int max_depth = speed_p
1977                                 ? param_max_pow_sqrt_depth
1978                                 : 2;
1979
1980       tree expand_with_sqrts
1981         = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
1982
1983       if (expand_with_sqrts)
1984         return expand_with_sqrts;
1985     }
1986
1987   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1988   n = real_to_integer (&c2);
1989   real_from_integer (&cint, VOIDmode, n, SIGNED);
1990   c2_is_int = real_identical (&c2, &cint);
1991
1992   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1993
1994      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
1995      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
1996
1997      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
1998      different from pow(x, 1./3.) due to rounding and behavior with
1999      negative x, we need to constrain this transformation to unsafe
2000      math and positive x or finite math.  */
2001   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2002   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2003   real_round (&c2, mode, &c2);
2004   n = real_to_integer (&c2);
2005   real_from_integer (&cint, VOIDmode, n, SIGNED);
2006   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2007   real_convert (&c2, mode, &c2);
2008
2009   if (flag_unsafe_math_optimizations
2010       && cbrtfn
2011       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2012       && real_identical (&c2, &c)
2013       && !c2_is_int
2014       && optimize_function_for_speed_p (cfun)
2015       && powi_cost (n / 3) <= POWI_MAX_MULTS)
2016     {
2017       tree powi_x_ndiv3 = NULL_TREE;
2018
2019       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
2020          possible or profitable, give up.  Skip the degenerate case when
2021          abs(n) < 3, where the result is always 1.  */
2022       if (absu_hwi (n) >= 3)
2023         {
2024           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2025                                                      abs_hwi (n / 3));
2026           if (!powi_x_ndiv3)
2027             return NULL_TREE;
2028         }
2029
2030       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
2031          as that creates an unnecessary variable.  Instead, just produce
2032          either cbrt(x) or cbrt(x) * cbrt(x).  */
2033       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2034
2035       if (absu_hwi (n) % 3 == 1)
2036         powi_cbrt_x = cbrt_x;
2037       else
2038         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2039                                               cbrt_x, cbrt_x);
2040
2041       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
2042       if (absu_hwi (n) < 3)
2043         result = powi_cbrt_x;
2044       else
2045         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2046                                          powi_x_ndiv3, powi_cbrt_x);
2047
2048       /* If n is negative, reciprocate the result.  */
2049       if (n < 0)
2050         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2051                                          build_real (type, dconst1), result);
2052
2053       return result;
2054     }
2055
2056   /* No optimizations succeeded.  */
2057   return NULL_TREE;
2058 }
2059
2060 /* ARG is the argument to a cabs builtin call in GSI with location info
2061    LOC.  Create a sequence of statements prior to GSI that calculates
2062    sqrt(R*R + I*I), where R and I are the real and imaginary components
2063    of ARG, respectively.  Return an expression holding the result.  */
2064
2065 static tree
2066 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2067 {
2068   tree real_part, imag_part, addend1, addend2, sum, result;
2069   tree type = TREE_TYPE (TREE_TYPE (arg));
2070   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2071   machine_mode mode = TYPE_MODE (type);
2072
2073   if (!flag_unsafe_math_optimizations
2074       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2075       || !sqrtfn
2076       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2077     return NULL_TREE;
2078
2079   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2080                                     REALPART_EXPR, arg);
2081   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2082                                     real_part, real_part);
2083   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2084                                     IMAGPART_EXPR, arg);
2085   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2086                                     imag_part, imag_part);
2087   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2088   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2089
2090   return result;
2091 }
2092
2093 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2094    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
2095    an optimal number of multiplies, when n is a constant.  */
2096
2097 namespace {
2098
2099 const pass_data pass_data_cse_sincos =
2100 {
2101   GIMPLE_PASS, /* type */
2102   "sincos", /* name */
2103   OPTGROUP_NONE, /* optinfo_flags */
2104   TV_TREE_SINCOS, /* tv_id */
2105   PROP_ssa, /* properties_required */
2106   PROP_gimple_opt_math, /* properties_provided */
2107   0, /* properties_destroyed */
2108   0, /* todo_flags_start */
2109   TODO_update_ssa, /* todo_flags_finish */
2110 };
2111
2112 class pass_cse_sincos : public gimple_opt_pass
2113 {
2114 public:
2115   pass_cse_sincos (gcc::context *ctxt)
2116     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2117   {}
2118
2119   /* opt_pass methods: */
2120   virtual bool gate (function *)
2121     {
2122       /* We no longer require either sincos or cexp, since powi expansion
2123          piggybacks on this pass.  */
2124       return optimize;
2125     }
2126
2127   virtual unsigned int execute (function *);
2128
2129 }; // class pass_cse_sincos
2130
2131 unsigned int
2132 pass_cse_sincos::execute (function *fun)
2133 {
2134   basic_block bb;
2135   bool cfg_changed = false;
2136
2137   calculate_dominance_info (CDI_DOMINATORS);
2138   memset (&sincos_stats, 0, sizeof (sincos_stats));
2139
2140   FOR_EACH_BB_FN (bb, fun)
2141     {
2142       gimple_stmt_iterator gsi;
2143       bool cleanup_eh = false;
2144
2145       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2146         {
2147           gimple *stmt = gsi_stmt (gsi);
2148
2149           /* Only the last stmt in a bb could throw, no need to call
2150              gimple_purge_dead_eh_edges if we change something in the middle
2151              of a basic block.  */
2152           cleanup_eh = false;
2153
2154           if (is_gimple_call (stmt)
2155               && gimple_call_lhs (stmt))
2156             {
2157               tree arg, arg0, arg1, result;
2158               HOST_WIDE_INT n;
2159               location_t loc;
2160
2161               switch (gimple_call_combined_fn (stmt))
2162                 {
2163                 CASE_CFN_COS:
2164                 CASE_CFN_SIN:
2165                 CASE_CFN_CEXPI:
2166                   /* Make sure we have either sincos or cexp.  */
2167                   if (!targetm.libc_has_function (function_c99_math_complex)
2168                       && !targetm.libc_has_function (function_sincos))
2169                     break;
2170
2171                   arg = gimple_call_arg (stmt, 0);
2172                   if (TREE_CODE (arg) == SSA_NAME)
2173                     cfg_changed |= execute_cse_sincos_1 (arg);
2174                   break;
2175
2176                 CASE_CFN_POW:
2177                   arg0 = gimple_call_arg (stmt, 0);
2178                   arg1 = gimple_call_arg (stmt, 1);
2179
2180                   loc = gimple_location (stmt);
2181                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2182
2183                   if (result)
2184                     {
2185                       tree lhs = gimple_get_lhs (stmt);
2186                       gassign *new_stmt = gimple_build_assign (lhs, result);
2187                       gimple_set_location (new_stmt, loc);
2188                       unlink_stmt_vdef (stmt);
2189                       gsi_replace (&gsi, new_stmt, true);
2190                       cleanup_eh = true;
2191                       if (gimple_vdef (stmt))
2192                         release_ssa_name (gimple_vdef (stmt));
2193                     }
2194                   break;
2195
2196                 CASE_CFN_POWI:
2197                   arg0 = gimple_call_arg (stmt, 0);
2198                   arg1 = gimple_call_arg (stmt, 1);
2199                   loc = gimple_location (stmt);
2200
2201                   if (real_minus_onep (arg0))
2202                     {
2203                       tree t0, t1, cond, one, minus_one;
2204                       gassign *stmt;
2205
2206                       t0 = TREE_TYPE (arg0);
2207                       t1 = TREE_TYPE (arg1);
2208                       one = build_real (t0, dconst1);
2209                       minus_one = build_real (t0, dconstm1);
2210
2211                       cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2212                       stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2213                                                   arg1, build_int_cst (t1, 1));
2214                       gimple_set_location (stmt, loc);
2215                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2216
2217                       result = make_temp_ssa_name (t0, NULL, "powi");
2218                       stmt = gimple_build_assign (result, COND_EXPR, cond,
2219                                                   minus_one, one);
2220                       gimple_set_location (stmt, loc);
2221                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2222                     }
2223                   else
2224                     {
2225                       if (!tree_fits_shwi_p (arg1))
2226                         break;
2227
2228                       n = tree_to_shwi (arg1);
2229                       result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2230                     }
2231
2232                   if (result)
2233                     {
2234                       tree lhs = gimple_get_lhs (stmt);
2235                       gassign *new_stmt = gimple_build_assign (lhs, result);
2236                       gimple_set_location (new_stmt, loc);
2237                       unlink_stmt_vdef (stmt);
2238                       gsi_replace (&gsi, new_stmt, true);
2239                       cleanup_eh = true;
2240                       if (gimple_vdef (stmt))
2241                         release_ssa_name (gimple_vdef (stmt));
2242                     }
2243                   break;
2244
2245                 CASE_CFN_CABS:
2246                   arg0 = gimple_call_arg (stmt, 0);
2247                   loc = gimple_location (stmt);
2248                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2249
2250                   if (result)
2251                     {
2252                       tree lhs = gimple_get_lhs (stmt);
2253                       gassign *new_stmt = gimple_build_assign (lhs, result);
2254                       gimple_set_location (new_stmt, loc);
2255                       unlink_stmt_vdef (stmt);
2256                       gsi_replace (&gsi, new_stmt, true);
2257                       cleanup_eh = true;
2258                       if (gimple_vdef (stmt))
2259                         release_ssa_name (gimple_vdef (stmt));
2260                     }
2261                   break;
2262
2263                 default:;
2264                 }
2265             }
2266         }
2267       if (cleanup_eh)
2268         cfg_changed |= gimple_purge_dead_eh_edges (bb);
2269     }
2270
2271   statistics_counter_event (fun, "sincos statements inserted",
2272                             sincos_stats.inserted);
2273
2274   return cfg_changed ? TODO_cleanup_cfg : 0;
2275 }
2276
2277 } // anon namespace
2278
2279 gimple_opt_pass *
2280 make_pass_cse_sincos (gcc::context *ctxt)
2281 {
2282   return new pass_cse_sincos (ctxt);
2283 }
2284
2285 /* Return true if stmt is a type conversion operation that can be stripped
2286    when used in a widening multiply operation.  */
2287 static bool
2288 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2289 {
2290   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2291
2292   if (TREE_CODE (result_type) == INTEGER_TYPE)
2293     {
2294       tree op_type;
2295       tree inner_op_type;
2296
2297       if (!CONVERT_EXPR_CODE_P (rhs_code))
2298         return false;
2299
2300       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2301
2302       /* If the type of OP has the same precision as the result, then
2303          we can strip this conversion.  The multiply operation will be
2304          selected to create the correct extension as a by-product.  */
2305       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2306         return true;
2307
2308       /* We can also strip a conversion if it preserves the signed-ness of
2309          the operation and doesn't narrow the range.  */
2310       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2311
2312       /* If the inner-most type is unsigned, then we can strip any
2313          intermediate widening operation.  If it's signed, then the
2314          intermediate widening operation must also be signed.  */
2315       if ((TYPE_UNSIGNED (inner_op_type)
2316            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2317           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2318         return true;
2319
2320       return false;
2321     }
2322
2323   return rhs_code == FIXED_CONVERT_EXPR;
2324 }
2325
2326 /* Return true if RHS is a suitable operand for a widening multiplication,
2327    assuming a target type of TYPE.
2328    There are two cases:
2329
2330      - RHS makes some value at least twice as wide.  Store that value
2331        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2332
2333      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2334        but leave *TYPE_OUT untouched.  */
2335
2336 static bool
2337 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2338                         tree *new_rhs_out)
2339 {
2340   gimple *stmt;
2341   tree type1, rhs1;
2342
2343   if (TREE_CODE (rhs) == SSA_NAME)
2344     {
2345       stmt = SSA_NAME_DEF_STMT (rhs);
2346       if (is_gimple_assign (stmt))
2347         {
2348           if (! widening_mult_conversion_strippable_p (type, stmt))
2349             rhs1 = rhs;
2350           else
2351             {
2352               rhs1 = gimple_assign_rhs1 (stmt);
2353
2354               if (TREE_CODE (rhs1) == INTEGER_CST)
2355                 {
2356                   *new_rhs_out = rhs1;
2357                   *type_out = NULL;
2358                   return true;
2359                 }
2360             }
2361         }
2362       else
2363         rhs1 = rhs;
2364
2365       type1 = TREE_TYPE (rhs1);
2366
2367       if (TREE_CODE (type1) != TREE_CODE (type)
2368           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2369         return false;
2370
2371       *new_rhs_out = rhs1;
2372       *type_out = type1;
2373       return true;
2374     }
2375
2376   if (TREE_CODE (rhs) == INTEGER_CST)
2377     {
2378       *new_rhs_out = rhs;
2379       *type_out = NULL;
2380       return true;
2381     }
2382
2383   return false;
2384 }
2385
2386 /* Return true if STMT performs a widening multiplication, assuming the
2387    output type is TYPE.  If so, store the unwidened types of the operands
2388    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2389    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2390    and *TYPE2_OUT would give the operands of the multiplication.  */
2391
2392 static bool
2393 is_widening_mult_p (gimple *stmt,
2394                     tree *type1_out, tree *rhs1_out,
2395                     tree *type2_out, tree *rhs2_out)
2396 {
2397   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2398
2399   if (TREE_CODE (type) == INTEGER_TYPE)
2400     {
2401       if (TYPE_OVERFLOW_TRAPS (type))
2402         return false;
2403     }
2404   else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2405     return false;
2406
2407   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2408                                rhs1_out))
2409     return false;
2410
2411   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2412                                rhs2_out))
2413     return false;
2414
2415   if (*type1_out == NULL)
2416     {
2417       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2418         return false;
2419       *type1_out = *type2_out;
2420     }
2421
2422   if (*type2_out == NULL)
2423     {
2424       if (!int_fits_type_p (*rhs2_out, *type1_out))
2425         return false;
2426       *type2_out = *type1_out;
2427     }
2428
2429   /* Ensure that the larger of the two operands comes first. */
2430   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2431     {
2432       std::swap (*type1_out, *type2_out);
2433       std::swap (*rhs1_out, *rhs2_out);
2434     }
2435
2436   return true;
2437 }
2438
2439 /* Check to see if the CALL statement is an invocation of copysign
2440    with 1. being the first argument.  */
2441 static bool
2442 is_copysign_call_with_1 (gimple *call)
2443 {
2444   gcall *c = dyn_cast <gcall *> (call);
2445   if (! c)
2446     return false;
2447
2448   enum combined_fn code = gimple_call_combined_fn (c);
2449
2450   if (code == CFN_LAST)
2451     return false;
2452
2453   if (builtin_fn_p (code))
2454     {
2455       switch (as_builtin_fn (code))
2456         {
2457         CASE_FLT_FN (BUILT_IN_COPYSIGN):
2458         CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2459           return real_onep (gimple_call_arg (c, 0));
2460         default:
2461           return false;
2462         }
2463     }
2464
2465   if (internal_fn_p (code))
2466     {
2467       switch (as_internal_fn (code))
2468         {
2469         case IFN_COPYSIGN:
2470           return real_onep (gimple_call_arg (c, 0));
2471         default:
2472           return false;
2473         }
2474     }
2475
2476    return false;
2477 }
2478
2479 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2480    This only happens when the xorsign optab is defined, if the
2481    pattern is not a xorsign pattern or if expansion fails FALSE is
2482    returned, otherwise TRUE is returned.  */
2483 static bool
2484 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2485 {
2486   tree treeop0, treeop1, lhs, type;
2487   location_t loc = gimple_location (stmt);
2488   lhs = gimple_assign_lhs (stmt);
2489   treeop0 = gimple_assign_rhs1 (stmt);
2490   treeop1 = gimple_assign_rhs2 (stmt);
2491   type = TREE_TYPE (lhs);
2492   machine_mode mode = TYPE_MODE (type);
2493
2494   if (HONOR_SNANS (type))
2495     return false;
2496
2497   if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2498     {
2499       gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2500       if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2501         {
2502           call0 = SSA_NAME_DEF_STMT (treeop1);
2503           if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2504             return false;
2505
2506           treeop1 = treeop0;
2507         }
2508         if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2509           return false;
2510
2511         gcall *c = as_a<gcall*> (call0);
2512         treeop0 = gimple_call_arg (c, 1);
2513
2514         gcall *call_stmt
2515           = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2516         gimple_set_lhs (call_stmt, lhs);
2517         gimple_set_location (call_stmt, loc);
2518         gsi_replace (gsi, call_stmt, true);
2519         return true;
2520     }
2521
2522   return false;
2523 }
2524
2525 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2526    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
2527    value is true iff we converted the statement.  */
2528
2529 static bool
2530 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2531 {
2532   tree lhs, rhs1, rhs2, type, type1, type2;
2533   enum insn_code handler;
2534   scalar_int_mode to_mode, from_mode, actual_mode;
2535   optab op;
2536   int actual_precision;
2537   location_t loc = gimple_location (stmt);
2538   bool from_unsigned1, from_unsigned2;
2539
2540   lhs = gimple_assign_lhs (stmt);
2541   type = TREE_TYPE (lhs);
2542   if (TREE_CODE (type) != INTEGER_TYPE)
2543     return false;
2544
2545   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2546     return false;
2547
2548   to_mode = SCALAR_INT_TYPE_MODE (type);
2549   from_mode = SCALAR_INT_TYPE_MODE (type1);
2550   if (to_mode == from_mode)
2551     return false;
2552
2553   from_unsigned1 = TYPE_UNSIGNED (type1);
2554   from_unsigned2 = TYPE_UNSIGNED (type2);
2555
2556   if (from_unsigned1 && from_unsigned2)
2557     op = umul_widen_optab;
2558   else if (!from_unsigned1 && !from_unsigned2)
2559     op = smul_widen_optab;
2560   else
2561     op = usmul_widen_optab;
2562
2563   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2564                                                   &actual_mode);
2565
2566   if (handler == CODE_FOR_nothing)
2567     {
2568       if (op != smul_widen_optab)
2569         {
2570           /* We can use a signed multiply with unsigned types as long as
2571              there is a wider mode to use, or it is the smaller of the two
2572              types that is unsigned.  Note that type1 >= type2, always.  */
2573           if ((TYPE_UNSIGNED (type1)
2574                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2575               || (TYPE_UNSIGNED (type2)
2576                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2577             {
2578               if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2579                   || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2580                 return false;
2581             }
2582
2583           op = smul_widen_optab;
2584           handler = find_widening_optab_handler_and_mode (op, to_mode,
2585                                                           from_mode,
2586                                                           &actual_mode);
2587
2588           if (handler == CODE_FOR_nothing)
2589             return false;
2590
2591           from_unsigned1 = from_unsigned2 = false;
2592         }
2593       else
2594         return false;
2595     }
2596
2597   /* Ensure that the inputs to the handler are in the correct precison
2598      for the opcode.  This will be the full mode size.  */
2599   actual_precision = GET_MODE_PRECISION (actual_mode);
2600   if (2 * actual_precision > TYPE_PRECISION (type))
2601     return false;
2602   if (actual_precision != TYPE_PRECISION (type1)
2603       || from_unsigned1 != TYPE_UNSIGNED (type1))
2604     rhs1 = build_and_insert_cast (gsi, loc,
2605                                   build_nonstandard_integer_type
2606                                     (actual_precision, from_unsigned1), rhs1);
2607   if (actual_precision != TYPE_PRECISION (type2)
2608       || from_unsigned2 != TYPE_UNSIGNED (type2))
2609     rhs2 = build_and_insert_cast (gsi, loc,
2610                                   build_nonstandard_integer_type
2611                                     (actual_precision, from_unsigned2), rhs2);
2612
2613   /* Handle constants.  */
2614   if (TREE_CODE (rhs1) == INTEGER_CST)
2615     rhs1 = fold_convert (type1, rhs1);
2616   if (TREE_CODE (rhs2) == INTEGER_CST)
2617     rhs2 = fold_convert (type2, rhs2);
2618
2619   gimple_assign_set_rhs1 (stmt, rhs1);
2620   gimple_assign_set_rhs2 (stmt, rhs2);
2621   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2622   update_stmt (stmt);
2623   widen_mul_stats.widen_mults_inserted++;
2624   return true;
2625 }
2626
2627 /* Process a single gimple statement STMT, which is found at the
2628    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2629    rhs (given by CODE), and try to convert it into a
2630    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
2631    is true iff we converted the statement.  */
2632
2633 static bool
2634 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2635                             enum tree_code code)
2636 {
2637   gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2638   gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2639   tree type, type1, type2, optype;
2640   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2641   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2642   optab this_optab;
2643   enum tree_code wmult_code;
2644   enum insn_code handler;
2645   scalar_mode to_mode, from_mode, actual_mode;
2646   location_t loc = gimple_location (stmt);
2647   int actual_precision;
2648   bool from_unsigned1, from_unsigned2;
2649
2650   lhs = gimple_assign_lhs (stmt);
2651   type = TREE_TYPE (lhs);
2652   if (TREE_CODE (type) != INTEGER_TYPE
2653       && TREE_CODE (type) != FIXED_POINT_TYPE)
2654     return false;
2655
2656   if (code == MINUS_EXPR)
2657     wmult_code = WIDEN_MULT_MINUS_EXPR;
2658   else
2659     wmult_code = WIDEN_MULT_PLUS_EXPR;
2660
2661   rhs1 = gimple_assign_rhs1 (stmt);
2662   rhs2 = gimple_assign_rhs2 (stmt);
2663
2664   if (TREE_CODE (rhs1) == SSA_NAME)
2665     {
2666       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2667       if (is_gimple_assign (rhs1_stmt))
2668         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2669     }
2670
2671   if (TREE_CODE (rhs2) == SSA_NAME)
2672     {
2673       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2674       if (is_gimple_assign (rhs2_stmt))
2675         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2676     }
2677
2678   /* Allow for one conversion statement between the multiply
2679      and addition/subtraction statement.  If there are more than
2680      one conversions then we assume they would invalidate this
2681      transformation.  If that's not the case then they should have
2682      been folded before now.  */
2683   if (CONVERT_EXPR_CODE_P (rhs1_code))
2684     {
2685       conv1_stmt = rhs1_stmt;
2686       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2687       if (TREE_CODE (rhs1) == SSA_NAME)
2688         {
2689           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2690           if (is_gimple_assign (rhs1_stmt))
2691             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2692         }
2693       else
2694         return false;
2695     }
2696   if (CONVERT_EXPR_CODE_P (rhs2_code))
2697     {
2698       conv2_stmt = rhs2_stmt;
2699       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2700       if (TREE_CODE (rhs2) == SSA_NAME)
2701         {
2702           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2703           if (is_gimple_assign (rhs2_stmt))
2704             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2705         }
2706       else
2707         return false;
2708     }
2709
2710   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2711      is_widening_mult_p, but we still need the rhs returns.
2712
2713      It might also appear that it would be sufficient to use the existing
2714      operands of the widening multiply, but that would limit the choice of
2715      multiply-and-accumulate instructions.
2716
2717      If the widened-multiplication result has more than one uses, it is
2718      probably wiser not to do the conversion.  Also restrict this operation
2719      to single basic block to avoid moving the multiply to a different block
2720      with a higher execution frequency.  */
2721   if (code == PLUS_EXPR
2722       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2723     {
2724       if (!has_single_use (rhs1)
2725           || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2726           || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2727                                   &type2, &mult_rhs2))
2728         return false;
2729       add_rhs = rhs2;
2730       conv_stmt = conv1_stmt;
2731     }
2732   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2733     {
2734       if (!has_single_use (rhs2)
2735           || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2736           || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2737                                   &type2, &mult_rhs2))
2738         return false;
2739       add_rhs = rhs1;
2740       conv_stmt = conv2_stmt;
2741     }
2742   else
2743     return false;
2744
2745   to_mode = SCALAR_TYPE_MODE (type);
2746   from_mode = SCALAR_TYPE_MODE (type1);
2747   if (to_mode == from_mode)
2748     return false;
2749
2750   from_unsigned1 = TYPE_UNSIGNED (type1);
2751   from_unsigned2 = TYPE_UNSIGNED (type2);
2752   optype = type1;
2753
2754   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
2755   if (from_unsigned1 != from_unsigned2)
2756     {
2757       if (!INTEGRAL_TYPE_P (type))
2758         return false;
2759       /* We can use a signed multiply with unsigned types as long as
2760          there is a wider mode to use, or it is the smaller of the two
2761          types that is unsigned.  Note that type1 >= type2, always.  */
2762       if ((from_unsigned1
2763            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2764           || (from_unsigned2
2765               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2766         {
2767           if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2768               || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2769             return false;
2770         }
2771
2772       from_unsigned1 = from_unsigned2 = false;
2773       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2774                                                false);
2775     }
2776
2777   /* If there was a conversion between the multiply and addition
2778      then we need to make sure it fits a multiply-and-accumulate.
2779      The should be a single mode change which does not change the
2780      value.  */
2781   if (conv_stmt)
2782     {
2783       /* We use the original, unmodified data types for this.  */
2784       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2785       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2786       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2787       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2788
2789       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2790         {
2791           /* Conversion is a truncate.  */
2792           if (TYPE_PRECISION (to_type) < data_size)
2793             return false;
2794         }
2795       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2796         {
2797           /* Conversion is an extend.  Check it's the right sort.  */
2798           if (TYPE_UNSIGNED (from_type) != is_unsigned
2799               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2800             return false;
2801         }
2802       /* else convert is a no-op for our purposes.  */
2803     }
2804
2805   /* Verify that the machine can perform a widening multiply
2806      accumulate in this mode/signedness combination, otherwise
2807      this transformation is likely to pessimize code.  */
2808   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2809   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2810                                                   from_mode, &actual_mode);
2811
2812   if (handler == CODE_FOR_nothing)
2813     return false;
2814
2815   /* Ensure that the inputs to the handler are in the correct precison
2816      for the opcode.  This will be the full mode size.  */
2817   actual_precision = GET_MODE_PRECISION (actual_mode);
2818   if (actual_precision != TYPE_PRECISION (type1)
2819       || from_unsigned1 != TYPE_UNSIGNED (type1))
2820     mult_rhs1 = build_and_insert_cast (gsi, loc,
2821                                        build_nonstandard_integer_type
2822                                          (actual_precision, from_unsigned1),
2823                                        mult_rhs1);
2824   if (actual_precision != TYPE_PRECISION (type2)
2825       || from_unsigned2 != TYPE_UNSIGNED (type2))
2826     mult_rhs2 = build_and_insert_cast (gsi, loc,
2827                                        build_nonstandard_integer_type
2828                                          (actual_precision, from_unsigned2),
2829                                        mult_rhs2);
2830
2831   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2832     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2833
2834   /* Handle constants.  */
2835   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2836     mult_rhs1 = fold_convert (type1, mult_rhs1);
2837   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2838     mult_rhs2 = fold_convert (type2, mult_rhs2);
2839
2840   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2841                                   add_rhs);
2842   update_stmt (gsi_stmt (*gsi));
2843   widen_mul_stats.maccs_inserted++;
2844   return true;
2845 }
2846
2847 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
2848    OP2 and which we know is used in statements that can be, together with the
2849    multiplication, converted to FMAs, perform the transformation.  */
2850
2851 static void
2852 convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
2853 {
2854   tree type = TREE_TYPE (mul_result);
2855   gimple *use_stmt;
2856   imm_use_iterator imm_iter;
2857   gcall *fma_stmt;
2858
2859   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
2860     {
2861       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
2862       tree addop, mulop1 = op1, result = mul_result;
2863       bool negate_p = false;
2864       gimple_seq seq = NULL;
2865
2866       if (is_gimple_debug (use_stmt))
2867         continue;
2868
2869       if (is_gimple_assign (use_stmt)
2870           && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
2871         {
2872           result = gimple_assign_lhs (use_stmt);
2873           use_operand_p use_p;
2874           gimple *neguse_stmt;
2875           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
2876           gsi_remove (&gsi, true);
2877           release_defs (use_stmt);
2878
2879           use_stmt = neguse_stmt;
2880           gsi = gsi_for_stmt (use_stmt);
2881           negate_p = true;
2882         }
2883
2884       tree cond, else_value, ops[3];
2885       tree_code code;
2886       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
2887                                               ops, &else_value))
2888         gcc_unreachable ();
2889       addop = ops[0] == result ? ops[1] : ops[0];
2890
2891       if (code == MINUS_EXPR)
2892         {
2893           if (ops[0] == result)
2894             /* a * b - c -> a * b + (-c)  */
2895             addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
2896           else
2897             /* a - b * c -> (-b) * c + a */
2898             negate_p = !negate_p;
2899         }
2900
2901       if (negate_p)
2902         mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
2903
2904       if (seq)
2905         gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2906
2907       if (cond)
2908         fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
2909                                                op2, addop, else_value);
2910       else
2911         fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
2912       gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
2913       gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
2914                                                                    use_stmt));
2915       gsi_replace (&gsi, fma_stmt, true);
2916       /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
2917          regardless of where the negation occurs.  */
2918       gimple *orig_stmt = gsi_stmt (gsi);
2919       if (fold_stmt (&gsi, follow_all_ssa_edges))
2920         {
2921           if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
2922             gcc_unreachable ();
2923           update_stmt (gsi_stmt (gsi));
2924         }
2925
2926       if (dump_file && (dump_flags & TDF_DETAILS))
2927         {
2928           fprintf (dump_file, "Generated FMA ");
2929           print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
2930           fprintf (dump_file, "\n");
2931         }
2932
2933       /* If the FMA result is negated in a single use, fold the negation
2934          too.  */
2935       orig_stmt = gsi_stmt (gsi);
2936       use_operand_p use_p;
2937       gimple *neg_stmt;
2938       if (is_gimple_call (orig_stmt)
2939           && gimple_call_internal_p (orig_stmt)
2940           && gimple_call_lhs (orig_stmt)
2941           && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
2942           && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
2943           && is_gimple_assign (neg_stmt)
2944           && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
2945           && !stmt_could_throw_p (cfun, neg_stmt))
2946         {
2947           gsi = gsi_for_stmt (neg_stmt);
2948           if (fold_stmt (&gsi, follow_all_ssa_edges))
2949             {
2950               if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
2951                 gcc_unreachable ();
2952               update_stmt (gsi_stmt (gsi));
2953               if (dump_file && (dump_flags & TDF_DETAILS))
2954                 {
2955                   fprintf (dump_file, "Folded FMA negation ");
2956                   print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
2957                   fprintf (dump_file, "\n");
2958                 }
2959             }
2960         }
2961
2962       widen_mul_stats.fmas_inserted++;
2963     }
2964 }
2965
2966 /* Data necessary to perform the actual transformation from a multiplication
2967    and an addition to an FMA after decision is taken it should be done and to
2968    then delete the multiplication statement from the function IL.  */
2969
2970 struct fma_transformation_info
2971 {
2972   gimple *mul_stmt;
2973   tree mul_result;
2974   tree op1;
2975   tree op2;
2976 };
2977
2978 /* Structure containing the current state of FMA deferring, i.e. whether we are
2979    deferring, whether to continue deferring, and all data necessary to come
2980    back and perform all deferred transformations.  */
2981
2982 class fma_deferring_state
2983 {
2984 public:
2985   /* Class constructor.  Pass true as PERFORM_DEFERRING in order to actually
2986      do any deferring.  */
2987
2988   fma_deferring_state (bool perform_deferring)
2989     : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
2990       m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
2991
2992   /* List of FMA candidates for which we the transformation has been determined
2993      possible but we at this point in BB analysis we do not consider them
2994      beneficial.  */
2995   auto_vec<fma_transformation_info, 8> m_candidates;
2996
2997   /* Set of results of multiplication that are part of an already deferred FMA
2998      candidates.  */
2999   hash_set<tree> m_mul_result_set;
3000
3001   /* The PHI that supposedly feeds back result of a FMA to another over loop
3002      boundary.  */
3003   gphi *m_initial_phi;
3004
3005   /* Result of the last produced FMA candidate or NULL if there has not been
3006      one.  */
3007   tree m_last_result;
3008
3009   /* If true, deferring might still be profitable.  If false, transform all
3010      candidates and no longer defer.  */
3011   bool m_deferring_p;
3012 };
3013
3014 /* Transform all deferred FMA candidates and mark STATE as no longer
3015    deferring.  */
3016
3017 static void
3018 cancel_fma_deferring (fma_deferring_state *state)
3019 {
3020   if (!state->m_deferring_p)
3021     return;
3022
3023   for (unsigned i = 0; i < state->m_candidates.length (); i++)
3024     {
3025       if (dump_file && (dump_flags & TDF_DETAILS))
3026         fprintf (dump_file, "Generating deferred FMA\n");
3027
3028       const fma_transformation_info &fti = state->m_candidates[i];
3029       convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3030
3031       gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3032       gsi_remove (&gsi, true);
3033       release_defs (fti.mul_stmt);
3034     }
3035   state->m_deferring_p = false;
3036 }
3037
3038 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3039    Otherwise return NULL.  */
3040
3041 static gphi *
3042 result_of_phi (tree op)
3043 {
3044   if (TREE_CODE (op) != SSA_NAME)
3045     return NULL;
3046
3047   return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3048 }
3049
3050 /* After processing statements of a BB and recording STATE, return true if the
3051    initial phi is fed by the last FMA candidate result ore one such result from
3052    previously processed BBs marked in LAST_RESULT_SET.  */
3053
3054 static bool
3055 last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3056                                       hash_set<tree> *last_result_set)
3057 {
3058   ssa_op_iter iter;
3059   use_operand_p use;
3060   FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3061     {
3062       tree t = USE_FROM_PTR (use);
3063       if (t == state->m_last_result
3064           || last_result_set->contains (t))
3065         return true;
3066     }
3067
3068   return false;
3069 }
3070
3071 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3072    with uses in additions and subtractions to form fused multiply-add
3073    operations.  Returns true if successful and MUL_STMT should be removed.
3074    If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3075    on MUL_COND, otherwise it is unconditional.
3076
3077    If STATE indicates that we are deferring FMA transformation, that means
3078    that we do not produce FMAs for basic blocks which look like:
3079
3080     <bb 6>
3081     # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3082     _65 = _14 * _16;
3083     accumulator_66 = _65 + accumulator_111;
3084
3085   or its unrolled version, i.e. with several FMA candidates that feed result
3086   of one into the addend of another.  Instead, we add them to a list in STATE
3087   and if we later discover an FMA candidate that is not part of such a chain,
3088   we go back and perform all deferred past candidates.  */
3089
3090 static bool
3091 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3092                      fma_deferring_state *state, tree mul_cond = NULL_TREE)
3093 {
3094   tree mul_result = gimple_get_lhs (mul_stmt);
3095   tree type = TREE_TYPE (mul_result);
3096   gimple *use_stmt, *neguse_stmt;
3097   use_operand_p use_p;
3098   imm_use_iterator imm_iter;
3099
3100   if (FLOAT_TYPE_P (type)
3101       && flag_fp_contract_mode == FP_CONTRACT_OFF)
3102     return false;
3103
3104   /* We don't want to do bitfield reduction ops.  */
3105   if (INTEGRAL_TYPE_P (type)
3106       && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3107     return false;
3108
3109   /* If the target doesn't support it, don't generate it.  We assume that
3110      if fma isn't available then fms, fnma or fnms are not either.  */
3111   optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3112   if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3113     return false;
3114
3115   /* If the multiplication has zero uses, it is kept around probably because
3116      of -fnon-call-exceptions.  Don't optimize it away in that case,
3117      it is DCE job.  */
3118   if (has_zero_uses (mul_result))
3119     return false;
3120
3121   bool check_defer
3122     = (state->m_deferring_p
3123        && (tree_to_shwi (TYPE_SIZE (type))
3124            <= param_avoid_fma_max_bits));
3125   bool defer = check_defer;
3126   bool seen_negate_p = false;
3127   /* Make sure that the multiplication statement becomes dead after
3128      the transformation, thus that all uses are transformed to FMAs.
3129      This means we assume that an FMA operation has the same cost
3130      as an addition.  */
3131   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3132     {
3133       tree result = mul_result;
3134       bool negate_p = false;
3135
3136       use_stmt = USE_STMT (use_p);
3137
3138       if (is_gimple_debug (use_stmt))
3139         continue;
3140
3141       /* For now restrict this operations to single basic blocks.  In theory
3142          we would want to support sinking the multiplication in
3143          m = a*b;
3144          if ()
3145            ma = m + c;
3146          else
3147            d = m;
3148          to form a fma in the then block and sink the multiplication to the
3149          else block.  */
3150       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3151         return false;
3152
3153       /* A negate on the multiplication leads to FNMA.  */
3154       if (is_gimple_assign (use_stmt)
3155           && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3156         {
3157           ssa_op_iter iter;
3158           use_operand_p usep;
3159
3160           /* If (due to earlier missed optimizations) we have two
3161              negates of the same value, treat them as equivalent
3162              to a single negate with multiple uses.  */
3163           if (seen_negate_p)
3164             return false;
3165
3166           result = gimple_assign_lhs (use_stmt);
3167
3168           /* Make sure the negate statement becomes dead with this
3169              single transformation.  */
3170           if (!single_imm_use (gimple_assign_lhs (use_stmt),
3171                                &use_p, &neguse_stmt))
3172             return false;
3173
3174           /* Make sure the multiplication isn't also used on that stmt.  */
3175           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3176             if (USE_FROM_PTR (usep) == mul_result)
3177               return false;
3178
3179           /* Re-validate.  */
3180           use_stmt = neguse_stmt;
3181           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3182             return false;
3183
3184           negate_p = seen_negate_p = true;
3185         }
3186
3187       tree cond, else_value, ops[3];
3188       tree_code code;
3189       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3190                                               &else_value))
3191         return false;
3192
3193       switch (code)
3194         {
3195         case MINUS_EXPR:
3196           if (ops[1] == result)
3197             negate_p = !negate_p;
3198           break;
3199         case PLUS_EXPR:
3200           break;
3201         default:
3202           /* FMA can only be formed from PLUS and MINUS.  */
3203           return false;
3204         }
3205
3206       if (mul_cond && cond != mul_cond)
3207         return false;
3208
3209       if (cond)
3210         {
3211           if (cond == result || else_value == result)
3212             return false;
3213           if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type))
3214             return false;
3215         }
3216
3217       /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3218          we'll visit later, we might be able to get a more profitable
3219          match with fnma.
3220          OTOH, if we don't, a negate / fma pair has likely lower latency
3221          that a mult / subtract pair.  */
3222       if (code == MINUS_EXPR
3223           && !negate_p
3224           && ops[0] == result
3225           && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3226           && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3227           && TREE_CODE (ops[1]) == SSA_NAME
3228           && has_single_use (ops[1]))
3229         {
3230           gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3231           if (is_gimple_assign (stmt2)
3232               && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3233             return false;
3234         }
3235
3236       /* We can't handle a * b + a * b.  */
3237       if (ops[0] == ops[1])
3238         return false;
3239       /* If deferring, make sure we are not looking at an instruction that
3240          wouldn't have existed if we were not.  */
3241       if (state->m_deferring_p
3242           && (state->m_mul_result_set.contains (ops[0])
3243               || state->m_mul_result_set.contains (ops[1])))
3244         return false;
3245
3246       if (check_defer)
3247         {
3248           tree use_lhs = gimple_get_lhs (use_stmt);
3249           if (state->m_last_result)
3250             {
3251               if (ops[1] == state->m_last_result
3252                   || ops[0] == state->m_last_result)
3253                 defer = true;
3254               else
3255                 defer = false;
3256             }
3257           else
3258             {
3259               gcc_checking_assert (!state->m_initial_phi);
3260               gphi *phi;
3261               if (ops[0] == result)
3262                 phi = result_of_phi (ops[1]);
3263               else
3264                 {
3265                   gcc_assert (ops[1] == result);
3266                   phi = result_of_phi (ops[0]);
3267                 }
3268
3269               if (phi)
3270                 {
3271                   state->m_initial_phi = phi;
3272                   defer = true;
3273                 }
3274               else
3275                 defer = false;
3276             }
3277
3278           state->m_last_result = use_lhs;
3279           check_defer = false;
3280         }
3281       else
3282         defer = false;
3283
3284       /* While it is possible to validate whether or not the exact form that
3285          we've recognized is available in the backend, the assumption is that
3286          if the deferring logic above did not trigger, the transformation is
3287          never a loss.  For instance, suppose the target only has the plain FMA
3288          pattern available.  Consider a*b-c -> fma(a,b,-c): we've exchanged
3289          MUL+SUB for FMA+NEG, which is still two operations.  Consider
3290          -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3291          form the two NEGs are independent and could be run in parallel.  */
3292     }
3293
3294   if (defer)
3295     {
3296       fma_transformation_info fti;
3297       fti.mul_stmt = mul_stmt;
3298       fti.mul_result = mul_result;
3299       fti.op1 = op1;
3300       fti.op2 = op2;
3301       state->m_candidates.safe_push (fti);
3302       state->m_mul_result_set.add (mul_result);
3303
3304       if (dump_file && (dump_flags & TDF_DETAILS))
3305         {
3306           fprintf (dump_file, "Deferred generating FMA for multiplication ");
3307           print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3308           fprintf (dump_file, "\n");
3309         }
3310
3311       return false;
3312     }
3313   else
3314     {
3315       if (state->m_deferring_p)
3316         cancel_fma_deferring (state);
3317       convert_mult_to_fma_1 (mul_result, op1, op2);
3318       return true;
3319     }
3320 }
3321
3322
3323 /* Helper function of match_uaddsub_overflow.  Return 1
3324    if USE_STMT is unsigned overflow check ovf != 0 for
3325    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3326    and 0 otherwise.  */
3327
3328 static int
3329 uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt)
3330 {
3331   enum tree_code ccode = ERROR_MARK;
3332   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3333   if (gimple_code (use_stmt) == GIMPLE_COND)
3334     {
3335       ccode = gimple_cond_code (use_stmt);
3336       crhs1 = gimple_cond_lhs (use_stmt);
3337       crhs2 = gimple_cond_rhs (use_stmt);
3338     }
3339   else if (is_gimple_assign (use_stmt))
3340     {
3341       if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3342         {
3343           ccode = gimple_assign_rhs_code (use_stmt);
3344           crhs1 = gimple_assign_rhs1 (use_stmt);
3345           crhs2 = gimple_assign_rhs2 (use_stmt);
3346         }
3347       else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
3348         {
3349           tree cond = gimple_assign_rhs1 (use_stmt);
3350           if (COMPARISON_CLASS_P (cond))
3351             {
3352               ccode = TREE_CODE (cond);
3353               crhs1 = TREE_OPERAND (cond, 0);
3354               crhs2 = TREE_OPERAND (cond, 1);
3355             }
3356           else
3357             return 0;
3358         }
3359       else
3360         return 0;
3361     }
3362   else
3363     return 0;
3364
3365   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3366     return 0;
3367
3368   enum tree_code code = gimple_assign_rhs_code (stmt);
3369   tree lhs = gimple_assign_lhs (stmt);
3370   tree rhs1 = gimple_assign_rhs1 (stmt);
3371   tree rhs2 = gimple_assign_rhs2 (stmt);
3372
3373   switch (ccode)
3374     {
3375     case GT_EXPR:
3376     case LE_EXPR:
3377       /* r = a - b; r > a or r <= a
3378          r = a + b; a > r or a <= r or b > r or b <= r.  */
3379       if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3380           || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3381               && crhs2 == lhs))
3382         return ccode == GT_EXPR ? 1 : -1;
3383       break;
3384     case LT_EXPR:
3385     case GE_EXPR:
3386       /* r = a - b; a < r or a >= r
3387          r = a + b; r < a or r >= a or r < b or r >= b.  */
3388       if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3389           || (code == PLUS_EXPR && crhs1 == lhs
3390               && (crhs2 == rhs1 || crhs2 == rhs2)))
3391         return ccode == LT_EXPR ? 1 : -1;
3392       break;
3393     default:
3394       break;
3395     }
3396   return 0;
3397 }
3398
3399 /* Recognize for unsigned x
3400    x = y - z;
3401    if (x > y)
3402    where there are other uses of x and replace it with
3403    _7 = SUB_OVERFLOW (y, z);
3404    x = REALPART_EXPR <_7>;
3405    _8 = IMAGPART_EXPR <_7>;
3406    if (_8)
3407    and similarly for addition.  */
3408
3409 static bool
3410 match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3411                         enum tree_code code)
3412 {
3413   tree lhs = gimple_assign_lhs (stmt);
3414   tree type = TREE_TYPE (lhs);
3415   use_operand_p use_p;
3416   imm_use_iterator iter;
3417   bool use_seen = false;
3418   bool ovf_use_seen = false;
3419   gimple *use_stmt;
3420
3421   gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
3422   if (!INTEGRAL_TYPE_P (type)
3423       || !TYPE_UNSIGNED (type)
3424       || has_zero_uses (lhs)
3425       || has_single_use (lhs)
3426       || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
3427                         TYPE_MODE (type)) == CODE_FOR_nothing)
3428     return false;
3429
3430   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3431     {
3432       use_stmt = USE_STMT (use_p);
3433       if (is_gimple_debug (use_stmt))
3434         continue;
3435
3436       if (uaddsub_overflow_check_p (stmt, use_stmt))
3437         ovf_use_seen = true;
3438       else
3439         use_seen = true;
3440       if (ovf_use_seen && use_seen)
3441         break;
3442     }
3443
3444   if (!ovf_use_seen || !use_seen)
3445     return false;
3446
3447   tree ctype = build_complex_type (type);
3448   tree rhs1 = gimple_assign_rhs1 (stmt);
3449   tree rhs2 = gimple_assign_rhs2 (stmt);
3450   gcall *g = gimple_build_call_internal (code == PLUS_EXPR
3451                                          ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
3452                                          2, rhs1, rhs2);
3453   tree ctmp = make_ssa_name (ctype);
3454   gimple_call_set_lhs (g, ctmp);
3455   gsi_insert_before (gsi, g, GSI_SAME_STMT);
3456   gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR,
3457                                      build1 (REALPART_EXPR, type, ctmp));
3458   gsi_replace (gsi, g2, true);
3459   tree ovf = make_ssa_name (type);
3460   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
3461                             build1 (IMAGPART_EXPR, type, ctmp));
3462   gsi_insert_after (gsi, g2, GSI_NEW_STMT);
3463
3464   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3465     {
3466       if (is_gimple_debug (use_stmt))
3467         continue;
3468
3469       int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt);
3470       if (ovf_use == 0)
3471         continue;
3472       if (gimple_code (use_stmt) == GIMPLE_COND)
3473         {
3474           gcond *cond_stmt = as_a <gcond *> (use_stmt);
3475           gimple_cond_set_lhs (cond_stmt, ovf);
3476           gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
3477           gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3478         }
3479       else
3480         {
3481           gcc_checking_assert (is_gimple_assign (use_stmt));
3482           if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3483             {
3484               gimple_assign_set_rhs1 (use_stmt, ovf);
3485               gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
3486               gimple_assign_set_rhs_code (use_stmt,
3487                                           ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3488             }
3489           else
3490             {
3491               gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
3492                                    == COND_EXPR);
3493               tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
3494                                   boolean_type_node, ovf,
3495                                   build_int_cst (type, 0));
3496               gimple_assign_set_rhs1 (use_stmt, cond);
3497             }
3498         }
3499       update_stmt (use_stmt);
3500     }
3501   return true;
3502 }
3503
3504 /* Return true if target has support for divmod.  */
3505
3506 static bool
3507 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
3508 {
3509   /* If target supports hardware divmod insn, use it for divmod.  */
3510   if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
3511     return true;
3512
3513   /* Check if libfunc for divmod is available.  */
3514   rtx libfunc = optab_libfunc (divmod_optab, mode);
3515   if (libfunc != NULL_RTX)
3516     {
3517       /* If optab_handler exists for div_optab, perhaps in a wider mode,
3518          we don't want to use the libfunc even if it exists for given mode.  */
3519       machine_mode div_mode;
3520       FOR_EACH_MODE_FROM (div_mode, mode)
3521         if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
3522           return false;
3523
3524       return targetm.expand_divmod_libfunc != NULL;
3525     }
3526
3527   return false;
3528 }
3529
3530 /* Check if stmt is candidate for divmod transform.  */
3531
3532 static bool
3533 divmod_candidate_p (gassign *stmt)
3534 {
3535   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
3536   machine_mode mode = TYPE_MODE (type);
3537   optab divmod_optab, div_optab;
3538
3539   if (TYPE_UNSIGNED (type))
3540     {
3541       divmod_optab = udivmod_optab;
3542       div_optab = udiv_optab;
3543     }
3544   else
3545     {
3546       divmod_optab = sdivmod_optab;
3547       div_optab = sdiv_optab;
3548     }
3549
3550   tree op1 = gimple_assign_rhs1 (stmt);
3551   tree op2 = gimple_assign_rhs2 (stmt);
3552
3553   /* Disable the transform if either is a constant, since division-by-constant
3554      may have specialized expansion.  */
3555   if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
3556     return false;
3557
3558   /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
3559      expand using the [su]divv optabs.  */
3560   if (TYPE_OVERFLOW_TRAPS (type))
3561     return false;
3562
3563   if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
3564     return false;
3565
3566   return true;
3567 }
3568
3569 /* This function looks for:
3570    t1 = a TRUNC_DIV_EXPR b;
3571    t2 = a TRUNC_MOD_EXPR b;
3572    and transforms it to the following sequence:
3573    complex_tmp = DIVMOD (a, b);
3574    t1 = REALPART_EXPR(a);
3575    t2 = IMAGPART_EXPR(b);
3576    For conditions enabling the transform see divmod_candidate_p().
3577
3578    The pass has three parts:
3579    1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
3580       other trunc_div_expr and trunc_mod_expr stmts.
3581    2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
3582       to stmts vector.
3583    3) Insert DIVMOD call just before top_stmt and update entries in
3584       stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
3585       IMAGPART_EXPR for mod).  */
3586
3587 static bool
3588 convert_to_divmod (gassign *stmt)
3589 {
3590   if (stmt_can_throw_internal (cfun, stmt)
3591       || !divmod_candidate_p (stmt))
3592     return false;
3593
3594   tree op1 = gimple_assign_rhs1 (stmt);
3595   tree op2 = gimple_assign_rhs2 (stmt);
3596
3597   imm_use_iterator use_iter;
3598   gimple *use_stmt;
3599   auto_vec<gimple *> stmts;
3600
3601   gimple *top_stmt = stmt;
3602   basic_block top_bb = gimple_bb (stmt);
3603
3604   /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
3605      at-least stmt and possibly other trunc_div/trunc_mod stmts
3606      having same operands as stmt.  */
3607
3608   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
3609     {
3610       if (is_gimple_assign (use_stmt)
3611           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
3612               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
3613           && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
3614           && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
3615         {
3616           if (stmt_can_throw_internal (cfun, use_stmt))
3617             continue;
3618
3619           basic_block bb = gimple_bb (use_stmt);
3620
3621           if (bb == top_bb)
3622             {
3623               if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
3624                 top_stmt = use_stmt;
3625             }
3626           else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
3627             {
3628               top_bb = bb;
3629               top_stmt = use_stmt;
3630             }
3631         }
3632     }
3633
3634   tree top_op1 = gimple_assign_rhs1 (top_stmt);
3635   tree top_op2 = gimple_assign_rhs2 (top_stmt);
3636
3637   stmts.safe_push (top_stmt);
3638   bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
3639
3640   /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
3641      to stmts vector. The 2nd loop will always add stmt to stmts vector, since
3642      gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
3643      2nd loop ends up adding at-least single trunc_mod_expr stmt.  */
3644
3645   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
3646     {
3647       if (is_gimple_assign (use_stmt)
3648           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
3649               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
3650           && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
3651           && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
3652         {
3653           if (use_stmt == top_stmt
3654               || stmt_can_throw_internal (cfun, use_stmt)
3655               || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
3656             continue;
3657
3658           stmts.safe_push (use_stmt);
3659           if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
3660             div_seen = true;
3661         }
3662     }
3663
3664   if (!div_seen)
3665     return false;
3666
3667   /* Part 3: Create libcall to internal fn DIVMOD:
3668      divmod_tmp = DIVMOD (op1, op2).  */
3669
3670   gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
3671   tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
3672                                  call_stmt, "divmod_tmp");
3673   gimple_call_set_lhs (call_stmt, res);
3674   /* We rejected throwing statements above.  */
3675   gimple_call_set_nothrow (call_stmt, true);
3676
3677   /* Insert the call before top_stmt.  */
3678   gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
3679   gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
3680
3681   widen_mul_stats.divmod_calls_inserted++;
3682
3683   /* Update all statements in stmts vector:
3684      lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
3685      lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>.  */
3686
3687   for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
3688     {
3689       tree new_rhs;
3690
3691       switch (gimple_assign_rhs_code (use_stmt))
3692         {
3693           case TRUNC_DIV_EXPR:
3694             new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
3695             break;
3696
3697           case TRUNC_MOD_EXPR:
3698             new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
3699             break;
3700
3701           default:
3702             gcc_unreachable ();
3703         }
3704
3705       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3706       gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
3707       update_stmt (use_stmt);
3708     }
3709
3710   return true;
3711 }
3712
3713 /* Find integer multiplications where the operands are extended from
3714    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
3715    where appropriate.  */
3716
3717 namespace {
3718
3719 const pass_data pass_data_optimize_widening_mul =
3720 {
3721   GIMPLE_PASS, /* type */
3722   "widening_mul", /* name */
3723   OPTGROUP_NONE, /* optinfo_flags */
3724   TV_TREE_WIDEN_MUL, /* tv_id */
3725   PROP_ssa, /* properties_required */
3726   0, /* properties_provided */
3727   0, /* properties_destroyed */
3728   0, /* todo_flags_start */
3729   TODO_update_ssa, /* todo_flags_finish */
3730 };
3731
3732 class pass_optimize_widening_mul : public gimple_opt_pass
3733 {
3734 public:
3735   pass_optimize_widening_mul (gcc::context *ctxt)
3736     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
3737   {}
3738
3739   /* opt_pass methods: */
3740   virtual bool gate (function *)
3741     {
3742       return flag_expensive_optimizations && optimize;
3743     }
3744
3745   virtual unsigned int execute (function *);
3746
3747 }; // class pass_optimize_widening_mul
3748
3749 /* Walker class to perform the transformation in reverse dominance order. */
3750
3751 class math_opts_dom_walker : public dom_walker
3752 {
3753 public:
3754   /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
3755      if walking modidifes the CFG.  */
3756
3757   math_opts_dom_walker (bool *cfg_changed_p)
3758     : dom_walker (CDI_DOMINATORS), m_last_result_set (),
3759       m_cfg_changed_p (cfg_changed_p) {}
3760
3761   /* The actual actions performed in the walk.  */
3762
3763   virtual void after_dom_children (basic_block);
3764
3765   /* Set of results of chains of multiply and add statement combinations that
3766      were not transformed into FMAs because of active deferring.  */
3767   hash_set<tree> m_last_result_set;
3768
3769   /* Pointer to a flag of the user that needs to be set if CFG has been
3770      modified.  */
3771   bool *m_cfg_changed_p;
3772 };
3773
3774 void
3775 math_opts_dom_walker::after_dom_children (basic_block bb)
3776 {
3777   gimple_stmt_iterator gsi;
3778
3779   fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
3780
3781   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3782     {
3783       gimple *stmt = gsi_stmt (gsi);
3784       enum tree_code code;
3785
3786       if (is_gimple_assign (stmt))
3787         {
3788           code = gimple_assign_rhs_code (stmt);
3789           switch (code)
3790             {
3791             case MULT_EXPR:
3792               if (!convert_mult_to_widen (stmt, &gsi)
3793                   && !convert_expand_mult_copysign (stmt, &gsi)
3794                   && convert_mult_to_fma (stmt,
3795                                           gimple_assign_rhs1 (stmt),
3796                                           gimple_assign_rhs2 (stmt),
3797                                           &fma_state))
3798                 {
3799                   gsi_remove (&gsi, true);
3800                   release_defs (stmt);
3801                   continue;
3802                 }
3803               break;
3804
3805             case PLUS_EXPR:
3806             case MINUS_EXPR:
3807               if (!convert_plusminus_to_widen (&gsi, stmt, code))
3808                 match_uaddsub_overflow (&gsi, stmt, code);
3809               break;
3810
3811             case TRUNC_MOD_EXPR:
3812               convert_to_divmod (as_a<gassign *> (stmt));
3813               break;
3814
3815             default:;
3816             }
3817         }
3818       else if (is_gimple_call (stmt))
3819         {
3820           switch (gimple_call_combined_fn (stmt))
3821             {
3822             CASE_CFN_POW:
3823               if (gimple_call_lhs (stmt)
3824                   && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
3825                   && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
3826                                  &dconst2)
3827                   && convert_mult_to_fma (stmt,
3828                                           gimple_call_arg (stmt, 0),
3829                                           gimple_call_arg (stmt, 0),
3830                                           &fma_state))
3831                 {
3832                   unlink_stmt_vdef (stmt);
3833                   if (gsi_remove (&gsi, true)
3834                       && gimple_purge_dead_eh_edges (bb))
3835                     *m_cfg_changed_p = true;
3836                   release_defs (stmt);
3837                   continue;
3838                 }
3839               break;
3840
3841             case CFN_COND_MUL:
3842               if (convert_mult_to_fma (stmt,
3843                                        gimple_call_arg (stmt, 1),
3844                                        gimple_call_arg (stmt, 2),
3845                                        &fma_state,
3846                                        gimple_call_arg (stmt, 0)))
3847
3848                 {
3849                   gsi_remove (&gsi, true);
3850                   release_defs (stmt);
3851                   continue;
3852                 }
3853               break;
3854
3855             case CFN_LAST:
3856               cancel_fma_deferring (&fma_state);
3857               break;
3858
3859             default:
3860               break;
3861             }
3862         }
3863       gsi_next (&gsi);
3864     }
3865   if (fma_state.m_deferring_p
3866       && fma_state.m_initial_phi)
3867     {
3868       gcc_checking_assert (fma_state.m_last_result);
3869       if (!last_fma_candidate_feeds_initial_phi (&fma_state,
3870                                                  &m_last_result_set))
3871         cancel_fma_deferring (&fma_state);
3872       else
3873         m_last_result_set.add (fma_state.m_last_result);
3874     }
3875 }
3876
3877
3878 unsigned int
3879 pass_optimize_widening_mul::execute (function *fun)
3880 {
3881   bool cfg_changed = false;
3882
3883   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
3884   calculate_dominance_info (CDI_DOMINATORS);
3885   renumber_gimple_stmt_uids (cfun);
3886
3887   math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
3888
3889   statistics_counter_event (fun, "widening multiplications inserted",
3890                             widen_mul_stats.widen_mults_inserted);
3891   statistics_counter_event (fun, "widening maccs inserted",
3892                             widen_mul_stats.maccs_inserted);
3893   statistics_counter_event (fun, "fused multiply-adds inserted",
3894                             widen_mul_stats.fmas_inserted);
3895   statistics_counter_event (fun, "divmod calls inserted",
3896                             widen_mul_stats.divmod_calls_inserted);
3897
3898   return cfg_changed ? TODO_cleanup_cfg : 0;
3899 }
3900
3901 } // anon namespace
3902
3903 gimple_opt_pass *
3904 make_pass_optimize_widening_mul (gcc::context *ctxt)
3905 {
3906   return new pass_optimize_widening_mul (ctxt);
3907 }