gcc/tree-ssa-math-opts.c

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005-2017 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  21    operations.  These are common in sequences such as this one:
  22
  23         modulus = sqrt(x*x + y*y + z*z);
  24         x = x / modulus;
  25         y = y / modulus;
  26         z = z / modulus;
  27
  28    that can be optimized to
  29
  30         modulus = sqrt(x*x + y*y + z*z);
  31         rmodulus = 1.0 / modulus;
  32         x = x * rmodulus;
  33         y = y * rmodulus;
  34         z = z * rmodulus;
  35
  36    We do this for loop invariant divisors, and with this pass whenever
  37    we notice that a division has the same divisor multiple times.
  38
  39    Of course, like in PRE, we don't insert a division if a dominator
  40    already has one.  However, this cannot be done as an extension of
  41    PRE for several reasons.
  42
  43    First of all, with some experiments it was found out that the
  44    transformation is not always useful if there are only two divisions
  45    by the same divisor.  This is probably because modern processors
  46    can pipeline the divisions; on older, in-order processors it should
  47    still be effective to optimize two divisions by the same number.
  48    We make this a param, and it shall be called N in the remainder of
  49    this comment.
  50
  51    Second, if trapping math is active, we have less freedom on where
  52    to insert divisions: we can only do so in basic blocks that already
  53    contain one.  (If divisions don't trap, instead, we can insert
  54    divisions elsewhere, which will be in blocks that are common dominators
  55    of those that have the division).
  56
  57    We really don't want to compute the reciprocal unless a division will
  58    be found.  To do this, we won't insert the division in a basic block
  59    that has less than N divisions *post-dominating* it.
  60
  61    The algorithm constructs a subset of the dominator tree, holding the
  62    blocks containing the divisions and the common dominators to them,
  63    and walk it twice.  The first walk is in post-order, and it annotates
  64    each block with the number of divisions that post-dominate it: this
  65    gives information on where divisions can be inserted profitably.
  66    The second walk is in pre-order, and it inserts divisions as explained
  67    above, and replaces divisions by multiplications.
  68
  69    In the best case, the cost of the pass is O(n_statements).  In the
  70    worst-case, the cost is due to creating the dominator tree subset,
  71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  72    for n_statements / n_basic_blocks statements.  So, the amortized cost
  73    of creating the dominator tree subset is O(n_basic_blocks) and the
  74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  75
  76    More practically, the cost will be small because there are few
  77    divisions, and they tend to be in the same basic block, so insert_bb
  78    is called very few times.
  79
  80    If we did this using domwalk.c, an efficient implementation would have
  81    to work on all the variables in a single pass, because we could not
  82    work on just a subset of the dominator tree, as we do now, and the
  83    cost would also be something like O(n_statements * n_basic_blocks).
  84    The data structures would be more complex in order to work on all the
  85    variables in a single pass.  */
  86
  87 #include "config.h"
  88 #include "system.h"
  89 #include "coretypes.h"
  90 #include "backend.h"
  91 #include "target.h"
  92 #include "rtl.h"
  93 #include "tree.h"
  94 #include "gimple.h"
  95 #include "predict.h"
  96 #include "alloc-pool.h"
  97 #include "tree-pass.h"
  98 #include "ssa.h"
  99 #include "optabs-tree.h"
 100 #include "gimple-pretty-print.h"
 101 #include "alias.h"
 102 #include "fold-const.h"
 103 #include "gimple-fold.h"
 104 #include "gimple-iterator.h"
 105 #include "gimplify.h"
 106 #include "gimplify-me.h"
 107 #include "stor-layout.h"
 108 #include "tree-cfg.h"
 109 #include "tree-dfa.h"
 110 #include "tree-ssa.h"
 111 #include "builtins.h"
 112 #include "params.h"
 113 #include "internal-fn.h"
 114 #include "case-cfn-macros.h"
 115 #include "optabs-libfuncs.h"
 116 #include "tree-eh.h"
 117 #include "targhooks.h"
 118
 119 /* This structure represents one basic block that either computes a
 120    division, or is a common dominator for basic block that compute a
 121    division.  */
 122 struct occurrence {
 123   /* The basic block represented by this structure.  */
 124   basic_block bb;
 125
 126   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 127      inserted in BB.  */
 128   tree recip_def;
 129
 130   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 131      was inserted in BB.  */
 132   gimple *recip_def_stmt;
 133
 134   /* Pointer to a list of "struct occurrence"s for blocks dominated
 135      by BB.  */
 136   struct occurrence *children;
 137
 138   /* Pointer to the next "struct occurrence"s in the list of blocks
 139      sharing a common dominator.  */
 140   struct occurrence *next;
 141
 142   /* The number of divisions that are in BB before compute_merit.  The
 143      number of divisions that are in BB or post-dominate it after
 144      compute_merit.  */
 145   int num_divisions;
 146
 147   /* True if the basic block has a division, false if it is a common
 148      dominator for basic blocks that do.  If it is false and trapping
 149      math is active, BB is not a candidate for inserting a reciprocal.  */
 150   bool bb_has_division;
 151 };
 152
 153 static struct
 154 {
 155   /* Number of 1.0/X ops inserted.  */
 156   int rdivs_inserted;
 157
 158   /* Number of 1.0/FUNC ops inserted.  */
 159   int rfuncs_inserted;
 160 } reciprocal_stats;
 161
 162 static struct
 163 {
 164   /* Number of cexpi calls inserted.  */
 165   int inserted;
 166 } sincos_stats;
 167
 168 static struct
 169 {
 170   /* Number of hand-written 16-bit nop / bswaps found.  */
 171   int found_16bit;
 172
 173   /* Number of hand-written 32-bit nop / bswaps found.  */
 174   int found_32bit;
 175
 176   /* Number of hand-written 64-bit nop / bswaps found.  */
 177   int found_64bit;
 178 } nop_stats, bswap_stats;
 179
 180 static struct
 181 {
 182   /* Number of widening multiplication ops inserted.  */
 183   int widen_mults_inserted;
 184
 185   /* Number of integer multiply-and-accumulate ops inserted.  */
 186   int maccs_inserted;
 187
 188   /* Number of fp fused multiply-add ops inserted.  */
 189   int fmas_inserted;
 190
 191   /* Number of divmod calls inserted.  */
 192   int divmod_calls_inserted;
 193 } widen_mul_stats;
 194
 195 /* The instance of "struct occurrence" representing the highest
 196    interesting block in the dominator tree.  */
 197 static struct occurrence *occ_head;
 198
 199 /* Allocation pool for getting instances of "struct occurrence".  */
 200 static object_allocator<occurrence> *occ_pool;
 201
 202
 203
 204 /* Allocate and return a new struct occurrence for basic block BB, and
 205    whose children list is headed by CHILDREN.  */
 206 static struct occurrence *
 207 occ_new (basic_block bb, struct occurrence *children)
 208 {
 209   struct occurrence *occ;
 210
 211   bb->aux = occ = occ_pool->allocate ();
 212   memset (occ, 0, sizeof (struct occurrence));
 213
 214   occ->bb = bb;
 215   occ->children = children;
 216   return occ;
 217 }
 218
 219
 220 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 221    list of "struct occurrence"s, one per basic block, having IDOM as
 222    their common dominator.
 223
 224    We try to insert NEW_OCC as deep as possible in the tree, and we also
 225    insert any other block that is a common dominator for BB and one
 226    block already in the tree.  */
 227
 228 static void
 229 insert_bb (struct occurrence *new_occ, basic_block idom,
 230            struct occurrence **p_head)
 231 {
 232   struct occurrence *occ, **p_occ;
 233
 234   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 235     {
 236       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 237       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 238       if (dom == bb)
 239         {
 240           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 241              from its list.  */
 242           *p_occ = occ->next;
 243           occ->next = new_occ->children;
 244           new_occ->children = occ;
 245
 246           /* Try the next block (it may as well be dominated by BB).  */
 247         }
 248
 249       else if (dom == occ_bb)
 250         {
 251           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 252           insert_bb (new_occ, dom, &occ->children);
 253           return;
 254         }
 255
 256       else if (dom != idom)
 257         {
 258           gcc_assert (!dom->aux);
 259
 260           /* There is a dominator between IDOM and BB, add it and make
 261              two children out of NEW_OCC and OCC.  First, remove OCC from
 262              its list.  */
 263           *p_occ = occ->next;
 264           new_occ->next = occ;
 265           occ->next = NULL;
 266
 267           /* None of the previous blocks has DOM as a dominator: if we tail
 268              recursed, we would reexamine them uselessly. Just switch BB with
 269              DOM, and go on looking for blocks dominated by DOM.  */
 270           new_occ = occ_new (dom, new_occ);
 271         }
 272
 273       else
 274         {
 275           /* Nothing special, go on with the next element.  */
 276           p_occ = &occ->next;
 277         }
 278     }
 279
 280   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 281   new_occ->next = *p_head;
 282   *p_head = new_occ;
 283 }
 284
 285 /* Register that we found a division in BB.  */
 286
 287 static inline void
 288 register_division_in (basic_block bb)
 289 {
 290   struct occurrence *occ;
 291
 292   occ = (struct occurrence *) bb->aux;
 293   if (!occ)
 294     {
 295       occ = occ_new (bb, NULL);
 296       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
 297     }
 298
 299   occ->bb_has_division = true;
 300   occ->num_divisions++;
 301 }
 302
 303
 304 /* Compute the number of divisions that postdominate each block in OCC and
 305    its children.  */
 306
 307 static void
 308 compute_merit (struct occurrence *occ)
 309 {
 310   struct occurrence *occ_child;
 311   basic_block dom = occ->bb;
 312
 313   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 314     {
 315       basic_block bb;
 316       if (occ_child->children)
 317         compute_merit (occ_child);
 318
 319       if (flag_exceptions)
 320         bb = single_noncomplex_succ (dom);
 321       else
 322         bb = dom;
 323
 324       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 325         occ->num_divisions += occ_child->num_divisions;
 326     }
 327 }
 328
 329
 330 /* Return whether USE_STMT is a floating-point division by DEF.  */
 331 static inline bool
 332 is_division_by (gimple *use_stmt, tree def)
 333 {
 334   return is_gimple_assign (use_stmt)
 335          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 336          && gimple_assign_rhs2 (use_stmt) == def
 337          /* Do not recognize x / x as valid division, as we are getting
 338             confused later by replacing all immediate uses x in such
 339             a stmt.  */
 340          && gimple_assign_rhs1 (use_stmt) != def;
 341 }
 342
 343 /* Walk the subset of the dominator tree rooted at OCC, setting the
 344    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 345    the given basic block.  The field may be left NULL, of course,
 346    if it is not possible or profitable to do the optimization.
 347
 348    DEF_BSI is an iterator pointing at the statement defining DEF.
 349    If RECIP_DEF is set, a dominator already has a computation that can
 350    be used.  */
 351
 352 static void
 353 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 354                     tree def, tree recip_def, int threshold)
 355 {
 356   tree type;
 357   gassign *new_stmt;
 358   gimple_stmt_iterator gsi;
 359   struct occurrence *occ_child;
 360
 361   if (!recip_def
 362       && (occ->bb_has_division || !flag_trapping_math)
 363       && occ->num_divisions >= threshold)
 364     {
 365       /* Make a variable with the replacement and substitute it.  */
 366       type = TREE_TYPE (def);
 367       recip_def = create_tmp_reg (type, "reciptmp");
 368       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
 369                                       build_one_cst (type), def);
 370
 371       if (occ->bb_has_division)
 372         {
 373           /* Case 1: insert before an existing division.  */
 374           gsi = gsi_after_labels (occ->bb);
 375           while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
 376             gsi_next (&gsi);
 377
 378           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 379         }
 380       else if (def_gsi && occ->bb == def_gsi->bb)
 381         {
 382           /* Case 2: insert right after the definition.  Note that this will
 383              never happen if the definition statement can throw, because in
 384              that case the sole successor of the statement's basic block will
 385              dominate all the uses as well.  */
 386           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 387         }
 388       else
 389         {
 390           /* Case 3: insert in a basic block not containing defs/uses.  */
 391           gsi = gsi_after_labels (occ->bb);
 392           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 393         }
 394
 395       reciprocal_stats.rdivs_inserted++;
 396
 397       occ->recip_def_stmt = new_stmt;
 398     }
 399
 400   occ->recip_def = recip_def;
 401   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 402     insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
 403 }
 404
 405
 406 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 407    possible.  */
 408
 409 static inline void
 410 replace_reciprocal (use_operand_p use_p)
 411 {
 412   gimple *use_stmt = USE_STMT (use_p);
 413   basic_block bb = gimple_bb (use_stmt);
 414   struct occurrence *occ = (struct occurrence *) bb->aux;
 415
 416   if (optimize_bb_for_speed_p (bb)
 417       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 418     {
 419       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 420       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 421       SET_USE (use_p, occ->recip_def);
 422       fold_stmt_inplace (&gsi);
 423       update_stmt (use_stmt);
 424     }
 425 }
 426
 427
 428 /* Free OCC and return one more "struct occurrence" to be freed.  */
 429
 430 static struct occurrence *
 431 free_bb (struct occurrence *occ)
 432 {
 433   struct occurrence *child, *next;
 434
 435   /* First get the two pointers hanging off OCC.  */
 436   next = occ->next;
 437   child = occ->children;
 438   occ->bb->aux = NULL;
 439   occ_pool->remove (occ);
 440
 441   /* Now ensure that we don't recurse unless it is necessary.  */
 442   if (!child)
 443     return next;
 444   else
 445     {
 446       while (next)
 447         next = free_bb (next);
 448
 449       return child;
 450     }
 451 }
 452
 453
 454 /* Look for floating-point divisions among DEF's uses, and try to
 455    replace them by multiplications with the reciprocal.  Add
 456    as many statements computing the reciprocal as needed.
 457
 458    DEF must be a GIMPLE register of a floating-point type.  */
 459
 460 static void
 461 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 462 {
 463   use_operand_p use_p;
 464   imm_use_iterator use_iter;
 465   struct occurrence *occ;
 466   int count = 0, threshold;
 467
 468   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
 469
 470   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 471     {
 472       gimple *use_stmt = USE_STMT (use_p);
 473       if (is_division_by (use_stmt, def))
 474         {
 475           register_division_in (gimple_bb (use_stmt));
 476           count++;
 477         }
 478     }
 479
 480   /* Do the expensive part only if we can hope to optimize something.  */
 481   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 482   if (count >= threshold)
 483     {
 484       gimple *use_stmt;
 485       for (occ = occ_head; occ; occ = occ->next)
 486         {
 487           compute_merit (occ);
 488           insert_reciprocals (def_gsi, occ, def, NULL, threshold);
 489         }
 490
 491       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 492         {
 493           if (is_division_by (use_stmt, def))
 494             {
 495               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 496                 replace_reciprocal (use_p);
 497             }
 498         }
 499     }
 500
 501   for (occ = occ_head; occ; )
 502     occ = free_bb (occ);
 503
 504   occ_head = NULL;
 505 }
 506
 507 /* Return an internal function that implements the reciprocal of CALL,
 508    or IFN_LAST if there is no such function that the target supports.  */
 509
 510 internal_fn
 511 internal_fn_reciprocal (gcall *call)
 512 {
 513   internal_fn ifn;
 514
 515   switch (gimple_call_combined_fn (call))
 516     {
 517     CASE_CFN_SQRT:
 518       ifn = IFN_RSQRT;
 519       break;
 520
 521     default:
 522       return IFN_LAST;
 523     }
 524
 525   tree_pair types = direct_internal_fn_types (ifn, call);
 526   if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
 527     return IFN_LAST;
 528
 529   return ifn;
 530 }
 531
 532 /* Go through all the floating-point SSA_NAMEs, and call
 533    execute_cse_reciprocals_1 on each of them.  */
 534 namespace {
 535
 536 const pass_data pass_data_cse_reciprocals =
 537 {
 538   GIMPLE_PASS, /* type */
 539   "recip", /* name */
 540   OPTGROUP_NONE, /* optinfo_flags */
 541   TV_NONE, /* tv_id */
 542   PROP_ssa, /* properties_required */
 543   0, /* properties_provided */
 544   0, /* properties_destroyed */
 545   0, /* todo_flags_start */
 546   TODO_update_ssa, /* todo_flags_finish */
 547 };
 548
 549 class pass_cse_reciprocals : public gimple_opt_pass
 550 {
 551 public:
 552   pass_cse_reciprocals (gcc::context *ctxt)
 553     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
 554   {}
 555
 556   /* opt_pass methods: */
 557   virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
 558   virtual unsigned int execute (function *);
 559
 560 }; // class pass_cse_reciprocals
 561
 562 unsigned int
 563 pass_cse_reciprocals::execute (function *fun)
 564 {
 565   basic_block bb;
 566   tree arg;
 567
 568   occ_pool = new object_allocator<occurrence> ("dominators for recip");
 569
 570   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 571   calculate_dominance_info (CDI_DOMINATORS);
 572   calculate_dominance_info (CDI_POST_DOMINATORS);
 573
 574   if (flag_checking)
 575     FOR_EACH_BB_FN (bb, fun)
 576       gcc_assert (!bb->aux);
 577
 578   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
 579     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 580         && is_gimple_reg (arg))
 581       {
 582         tree name = ssa_default_def (fun, arg);
 583         if (name)
 584           execute_cse_reciprocals_1 (NULL, name);
 585       }
 586
 587   FOR_EACH_BB_FN (bb, fun)
 588     {
 589       tree def;
 590
 591       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 592            gsi_next (&gsi))
 593         {
 594           gphi *phi = gsi.phi ();
 595           def = PHI_RESULT (phi);
 596           if (! virtual_operand_p (def)
 597               && FLOAT_TYPE_P (TREE_TYPE (def)))
 598             execute_cse_reciprocals_1 (NULL, def);
 599         }
 600
 601       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 602            gsi_next (&gsi))
 603         {
 604           gimple *stmt = gsi_stmt (gsi);
 605
 606           if (gimple_has_lhs (stmt)
 607               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 608               && FLOAT_TYPE_P (TREE_TYPE (def))
 609               && TREE_CODE (def) == SSA_NAME)
 610             execute_cse_reciprocals_1 (&gsi, def);
 611         }
 612
 613       if (optimize_bb_for_size_p (bb))
 614         continue;
 615
 616       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 617       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 618            gsi_next (&gsi))
 619         {
 620           gimple *stmt = gsi_stmt (gsi);
 621
 622           if (is_gimple_assign (stmt)
 623               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 624             {
 625               tree arg1 = gimple_assign_rhs2 (stmt);
 626               gimple *stmt1;
 627
 628               if (TREE_CODE (arg1) != SSA_NAME)
 629                 continue;
 630
 631               stmt1 = SSA_NAME_DEF_STMT (arg1);
 632
 633               if (is_gimple_call (stmt1)
 634                   && gimple_call_lhs (stmt1))
 635                 {
 636                   bool fail;
 637                   imm_use_iterator ui;
 638                   use_operand_p use_p;
 639                   tree fndecl = NULL_TREE;
 640
 641                   gcall *call = as_a <gcall *> (stmt1);
 642                   internal_fn ifn = internal_fn_reciprocal (call);
 643                   if (ifn == IFN_LAST)
 644                     {
 645                       fndecl = gimple_call_fndecl (call);
 646                       if (!fndecl
 647                           || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_MD)
 648                         continue;
 649                       fndecl = targetm.builtin_reciprocal (fndecl);
 650                       if (!fndecl)
 651                         continue;
 652                     }
 653
 654                   /* Check that all uses of the SSA name are divisions,
 655                      otherwise replacing the defining statement will do
 656                      the wrong thing.  */
 657                   fail = false;
 658                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
 659                     {
 660                       gimple *stmt2 = USE_STMT (use_p);
 661                       if (is_gimple_debug (stmt2))
 662                         continue;
 663                       if (!is_gimple_assign (stmt2)
 664                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
 665                           || gimple_assign_rhs1 (stmt2) == arg1
 666                           || gimple_assign_rhs2 (stmt2) != arg1)
 667                         {
 668                           fail = true;
 669                           break;
 670                         }
 671                     }
 672                   if (fail)
 673                     continue;
 674
 675                   gimple_replace_ssa_lhs (call, arg1);
 676                   if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
 677                     {
 678                       auto_vec<tree, 4> args;
 679                       for (unsigned int i = 0;
 680                            i < gimple_call_num_args (call); i++)
 681                         args.safe_push (gimple_call_arg (call, i));
 682                       gcall *stmt2;
 683                       if (ifn == IFN_LAST)
 684                         stmt2 = gimple_build_call_vec (fndecl, args);
 685                       else
 686                         stmt2 = gimple_build_call_internal_vec (ifn, args);
 687                       gimple_call_set_lhs (stmt2, arg1);
 688                       if (gimple_vdef (call))
 689                         {
 690                           gimple_set_vdef (stmt2, gimple_vdef (call));
 691                           SSA_NAME_DEF_STMT (gimple_vdef (stmt2)) = stmt2;
 692                         }
 693                       gimple_call_set_nothrow (stmt2,
 694                                                gimple_call_nothrow_p (call));
 695                       gimple_set_vuse (stmt2, gimple_vuse (call));
 696                       gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
 697                       gsi_replace (&gsi2, stmt2, true);
 698                     }
 699                   else
 700                     {
 701                       if (ifn == IFN_LAST)
 702                         gimple_call_set_fndecl (call, fndecl);
 703                       else
 704                         gimple_call_set_internal_fn (call, ifn);
 705                       update_stmt (call);
 706                     }
 707                   reciprocal_stats.rfuncs_inserted++;
 708
 709                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
 710                     {
 711                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
 712                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
 713                       fold_stmt_inplace (&gsi);
 714                       update_stmt (stmt);
 715                     }
 716                 }
 717             }
 718         }
 719     }
 720
 721   statistics_counter_event (fun, "reciprocal divs inserted",
 722                             reciprocal_stats.rdivs_inserted);
 723   statistics_counter_event (fun, "reciprocal functions inserted",
 724                             reciprocal_stats.rfuncs_inserted);
 725
 726   free_dominance_info (CDI_DOMINATORS);
 727   free_dominance_info (CDI_POST_DOMINATORS);
 728   delete occ_pool;
 729   return 0;
 730 }
 731
 732 } // anon namespace
 733
 734 gimple_opt_pass *
 735 make_pass_cse_reciprocals (gcc::context *ctxt)
 736 {
 737   return new pass_cse_reciprocals (ctxt);
 738 }
 739
 740 /* Records an occurrence at statement USE_STMT in the vector of trees
 741    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
 742    is not yet initialized.  Returns true if the occurrence was pushed on
 743    the vector.  Adjusts *TOP_BB to be the basic block dominating all
 744    statements in the vector.  */
 745
 746 static bool
 747 maybe_record_sincos (vec<gimple *> *stmts,
 748                      basic_block *top_bb, gimple *use_stmt)
 749 {
 750   basic_block use_bb = gimple_bb (use_stmt);
 751   if (*top_bb
 752       && (*top_bb == use_bb
 753           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
 754     stmts->safe_push (use_stmt);
 755   else if (!*top_bb
 756            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
 757     {
 758       stmts->safe_push (use_stmt);
 759       *top_bb = use_bb;
 760     }
 761   else
 762     return false;
 763
 764   return true;
 765 }
 766
 767 /* Look for sin, cos and cexpi calls with the same argument NAME and
 768    create a single call to cexpi CSEing the result in this case.
 769    We first walk over all immediate uses of the argument collecting
 770    statements that we can CSE in a vector and in a second pass replace
 771    the statement rhs with a REALPART or IMAGPART expression on the
 772    result of the cexpi call we insert before the use statement that
 773    dominates all other candidates.  */
 774
 775 static bool
 776 execute_cse_sincos_1 (tree name)
 777 {
 778   gimple_stmt_iterator gsi;
 779   imm_use_iterator use_iter;
 780   tree fndecl, res, type;
 781   gimple *def_stmt, *use_stmt, *stmt;
 782   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
 783   auto_vec<gimple *> stmts;
 784   basic_block top_bb = NULL;
 785   int i;
 786   bool cfg_changed = false;
 787
 788   type = TREE_TYPE (name);
 789   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
 790     {
 791       if (gimple_code (use_stmt) != GIMPLE_CALL
 792           || !gimple_call_lhs (use_stmt))
 793         continue;
 794
 795       switch (gimple_call_combined_fn (use_stmt))
 796         {
 797         CASE_CFN_COS:
 798           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 799           break;
 800
 801         CASE_CFN_SIN:
 802           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 803           break;
 804
 805         CASE_CFN_CEXPI:
 806           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 807           break;
 808
 809         default:;
 810         }
 811     }
 812
 813   if (seen_cos + seen_sin + seen_cexpi <= 1)
 814     return false;
 815
 816   /* Simply insert cexpi at the beginning of top_bb but not earlier than
 817      the name def statement.  */
 818   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
 819   if (!fndecl)
 820     return false;
 821   stmt = gimple_build_call (fndecl, 1, name);
 822   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
 823   gimple_call_set_lhs (stmt, res);
 824
 825   def_stmt = SSA_NAME_DEF_STMT (name);
 826   if (!SSA_NAME_IS_DEFAULT_DEF (name)
 827       && gimple_code (def_stmt) != GIMPLE_PHI
 828       && gimple_bb (def_stmt) == top_bb)
 829     {
 830       gsi = gsi_for_stmt (def_stmt);
 831       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
 832     }
 833   else
 834     {
 835       gsi = gsi_after_labels (top_bb);
 836       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
 837     }
 838   sincos_stats.inserted++;
 839
 840   /* And adjust the recorded old call sites.  */
 841   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
 842     {
 843       tree rhs = NULL;
 844
 845       switch (gimple_call_combined_fn (use_stmt))
 846         {
 847         CASE_CFN_COS:
 848           rhs = fold_build1 (REALPART_EXPR, type, res);
 849           break;
 850
 851         CASE_CFN_SIN:
 852           rhs = fold_build1 (IMAGPART_EXPR, type, res);
 853           break;
 854
 855         CASE_CFN_CEXPI:
 856           rhs = res;
 857           break;
 858
 859         default:;
 860           gcc_unreachable ();
 861         }
 862
 863         /* Replace call with a copy.  */
 864         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
 865
 866         gsi = gsi_for_stmt (use_stmt);
 867         gsi_replace (&gsi, stmt, true);
 868         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
 869           cfg_changed = true;
 870     }
 871
 872   return cfg_changed;
 873 }
 874
 875 /* To evaluate powi(x,n), the floating point value x raised to the
 876    constant integer exponent n, we use a hybrid algorithm that
 877    combines the "window method" with look-up tables.  For an
 878    introduction to exponentiation algorithms and "addition chains",
 879    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
 880    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
 881    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
 882    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
 883
 884 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
 885    multiplications to inline before calling the system library's pow
 886    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
 887    so this default never requires calling pow, powf or powl.  */
 888
 889 #ifndef POWI_MAX_MULTS
 890 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
 891 #endif
 892
 893 /* The size of the "optimal power tree" lookup table.  All
 894    exponents less than this value are simply looked up in the
 895    powi_table below.  This threshold is also used to size the
 896    cache of pseudo registers that hold intermediate results.  */
 897 #define POWI_TABLE_SIZE 256
 898
 899 /* The size, in bits of the window, used in the "window method"
 900    exponentiation algorithm.  This is equivalent to a radix of
 901    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
 902 #define POWI_WINDOW_SIZE 3
 903
 904 /* The following table is an efficient representation of an
 905    "optimal power tree".  For each value, i, the corresponding
 906    value, j, in the table states than an optimal evaluation
 907    sequence for calculating pow(x,i) can be found by evaluating
 908    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
 909    100 integers is given in Knuth's "Seminumerical algorithms".  */
 910
 911 static const unsigned char powi_table[POWI_TABLE_SIZE] =
 912   {
 913       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
 914       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
 915       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
 916      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
 917      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
 918      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
 919      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
 920      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
 921      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
 922      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
 923      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
 924      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
 925      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
 926      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
 927      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
 928      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
 929      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
 930      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
 931      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
 932      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
 933      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
 934      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
 935      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
 936      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
 937      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
 938     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
 939     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
 940     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
 941     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
 942     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
 943     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
 944     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
 945   };
 946
 947
 948 /* Return the number of multiplications required to calculate
 949    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
 950    subroutine of powi_cost.  CACHE is an array indicating
 951    which exponents have already been calculated.  */
 952
 953 static int
 954 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
 955 {
 956   /* If we've already calculated this exponent, then this evaluation
 957      doesn't require any additional multiplications.  */
 958   if (cache[n])
 959     return 0;
 960
 961   cache[n] = true;
 962   return powi_lookup_cost (n - powi_table[n], cache)
 963          + powi_lookup_cost (powi_table[n], cache) + 1;
 964 }
 965
 966 /* Return the number of multiplications required to calculate
 967    powi(x,n) for an arbitrary x, given the exponent N.  This
 968    function needs to be kept in sync with powi_as_mults below.  */
 969
 970 static int
 971 powi_cost (HOST_WIDE_INT n)
 972 {
 973   bool cache[POWI_TABLE_SIZE];
 974   unsigned HOST_WIDE_INT digit;
 975   unsigned HOST_WIDE_INT val;
 976   int result;
 977
 978   if (n == 0)
 979     return 0;
 980
 981   /* Ignore the reciprocal when calculating the cost.  */
 982   val = (n < 0) ? -n : n;
 983
 984   /* Initialize the exponent cache.  */
 985   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
 986   cache[1] = true;
 987
 988   result = 0;
 989
 990   while (val >= POWI_TABLE_SIZE)
 991     {
 992       if (val & 1)
 993         {
 994           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
 995           result += powi_lookup_cost (digit, cache)
 996                     + POWI_WINDOW_SIZE + 1;
 997           val >>= POWI_WINDOW_SIZE;
 998         }
 999       else
1000         {
1001           val >>= 1;
1002           result++;
1003         }
1004     }
1005
1006   return result + powi_lookup_cost (val, cache);
1007 }
1008
1009 /* Recursive subroutine of powi_as_mults.  This function takes the
1010    array, CACHE, of already calculated exponents and an exponent N and
1011    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
1012
1013 static tree
1014 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1015                  HOST_WIDE_INT n, tree *cache)
1016 {
1017   tree op0, op1, ssa_target;
1018   unsigned HOST_WIDE_INT digit;
1019   gassign *mult_stmt;
1020
1021   if (n < POWI_TABLE_SIZE && cache[n])
1022     return cache[n];
1023
1024   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1025
1026   if (n < POWI_TABLE_SIZE)
1027     {
1028       cache[n] = ssa_target;
1029       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1030       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1031     }
1032   else if (n & 1)
1033     {
1034       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1035       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1036       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1037     }
1038   else
1039     {
1040       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1041       op1 = op0;
1042     }
1043
1044   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1045   gimple_set_location (mult_stmt, loc);
1046   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1047
1048   return ssa_target;
1049 }
1050
1051 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1052    This function needs to be kept in sync with powi_cost above.  */
1053
1054 static tree
1055 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1056                tree arg0, HOST_WIDE_INT n)
1057 {
1058   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1059   gassign *div_stmt;
1060   tree target;
1061
1062   if (n == 0)
1063     return build_real (type, dconst1);
1064
1065   memset (cache, 0,  sizeof (cache));
1066   cache[1] = arg0;
1067
1068   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1069   if (n >= 0)
1070     return result;
1071
1072   /* If the original exponent was negative, reciprocate the result.  */
1073   target = make_temp_ssa_name (type, NULL, "powmult");
1074   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1075                                   build_real (type, dconst1), result);
1076   gimple_set_location (div_stmt, loc);
1077   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1078
1079   return target;
1080 }
1081
1082 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1083    location info LOC.  If the arguments are appropriate, create an
1084    equivalent sequence of statements prior to GSI using an optimal
1085    number of multiplications, and return an expession holding the
1086    result.  */
1087
1088 static tree
1089 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1090                             tree arg0, HOST_WIDE_INT n)
1091 {
1092   /* Avoid largest negative number.  */
1093   if (n != -n
1094       && ((n >= -1 && n <= 2)
1095           || (optimize_function_for_speed_p (cfun)
1096               && powi_cost (n) <= POWI_MAX_MULTS)))
1097     return powi_as_mults (gsi, loc, arg0, n);
1098
1099   return NULL_TREE;
1100 }
1101
1102 /* Build a gimple call statement that calls FN with argument ARG.
1103    Set the lhs of the call statement to a fresh SSA name.  Insert the
1104    statement prior to GSI's current position, and return the fresh
1105    SSA name.  */
1106
1107 static tree
1108 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1109                        tree fn, tree arg)
1110 {
1111   gcall *call_stmt;
1112   tree ssa_target;
1113
1114   call_stmt = gimple_build_call (fn, 1, arg);
1115   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1116   gimple_set_lhs (call_stmt, ssa_target);
1117   gimple_set_location (call_stmt, loc);
1118   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1119
1120   return ssa_target;
1121 }
1122
1123 /* Build a gimple binary operation with the given CODE and arguments
1124    ARG0, ARG1, assigning the result to a new SSA name for variable
1125    TARGET.  Insert the statement prior to GSI's current position, and
1126    return the fresh SSA name.*/
1127
1128 static tree
1129 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1130                         const char *name, enum tree_code code,
1131                         tree arg0, tree arg1)
1132 {
1133   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1134   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1135   gimple_set_location (stmt, loc);
1136   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1137   return result;
1138 }
1139
1140 /* Build a gimple reference operation with the given CODE and argument
1141    ARG, assigning the result to a new SSA name of TYPE with NAME.
1142    Insert the statement prior to GSI's current position, and return
1143    the fresh SSA name.  */
1144
1145 static inline tree
1146 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1147                       const char *name, enum tree_code code, tree arg0)
1148 {
1149   tree result = make_temp_ssa_name (type, NULL, name);
1150   gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1151   gimple_set_location (stmt, loc);
1152   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1153   return result;
1154 }
1155
1156 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1157    prior to GSI's current position, and return the fresh SSA name.  */
1158
1159 static tree
1160 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1161                        tree type, tree val)
1162 {
1163   tree result = make_ssa_name (type);
1164   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1165   gimple_set_location (stmt, loc);
1166   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1167   return result;
1168 }
1169
1170 struct pow_synth_sqrt_info
1171 {
1172   bool *factors;
1173   unsigned int deepest;
1174   unsigned int num_mults;
1175 };
1176
1177 /* Return true iff the real value C can be represented as a
1178    sum of powers of 0.5 up to N.  That is:
1179    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1180    Record in INFO the various parameters of the synthesis algorithm such
1181    as the factors a[i], the maximum 0.5 power and the number of
1182    multiplications that will be required.  */
1183
1184 bool
1185 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1186                                  struct pow_synth_sqrt_info *info)
1187 {
1188   REAL_VALUE_TYPE factor = dconsthalf;
1189   REAL_VALUE_TYPE remainder = c;
1190
1191   info->deepest = 0;
1192   info->num_mults = 0;
1193   memset (info->factors, 0, n * sizeof (bool));
1194
1195   for (unsigned i = 0; i < n; i++)
1196     {
1197       REAL_VALUE_TYPE res;
1198
1199       /* If something inexact happened bail out now.  */
1200       if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1201         return false;
1202
1203       /* We have hit zero.  The number is representable as a sum
1204          of powers of 0.5.  */
1205       if (real_equal (&res, &dconst0))
1206         {
1207           info->factors[i] = true;
1208           info->deepest = i + 1;
1209           return true;
1210         }
1211       else if (!REAL_VALUE_NEGATIVE (res))
1212         {
1213           remainder = res;
1214           info->factors[i] = true;
1215           info->num_mults++;
1216         }
1217       else
1218         info->factors[i] = false;
1219
1220       real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1221     }
1222   return false;
1223 }
1224
1225 /* Return the tree corresponding to FN being applied
1226    to ARG N times at GSI and LOC.
1227    Look up previous results from CACHE if need be.
1228    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1229
1230 static tree
1231 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1232               tree fn, location_t loc, tree *cache)
1233 {
1234   tree res = cache[n];
1235   if (!res)
1236     {
1237       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1238       res = build_and_insert_call (gsi, loc, fn, prev);
1239       cache[n] = res;
1240     }
1241
1242   return res;
1243 }
1244
1245 /* Print to STREAM the repeated application of function FNAME to ARG
1246    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1247    "foo (foo (x))".  */
1248
1249 static void
1250 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1251                  unsigned int n)
1252 {
1253   if (n == 0)
1254     fprintf (stream, "%s", arg);
1255   else
1256     {
1257       fprintf (stream, "%s (", fname);
1258       print_nested_fn (stream, fname, arg, n - 1);
1259       fprintf (stream, ")");
1260     }
1261 }
1262
1263 /* Print to STREAM the fractional sequence of sqrt chains
1264    applied to ARG, described by INFO.  Used for the dump file.  */
1265
1266 static void
1267 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1268                                 struct pow_synth_sqrt_info *info)
1269 {
1270   for (unsigned int i = 0; i < info->deepest; i++)
1271     {
1272       bool is_set = info->factors[i];
1273       if (is_set)
1274         {
1275           print_nested_fn (stream, "sqrt", arg, i + 1);
1276           if (i != info->deepest - 1)
1277             fprintf (stream, " * ");
1278         }
1279     }
1280 }
1281
1282 /* Print to STREAM a representation of raising ARG to an integer
1283    power N.  Used for the dump file.  */
1284
1285 static void
1286 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1287 {
1288   if (n > 1)
1289     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1290   else if (n == 1)
1291     fprintf (stream, "%s", arg);
1292 }
1293
1294 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1295    square roots.  Place at GSI and LOC.  Limit the maximum depth
1296    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1297    result of the expanded sequence or NULL_TREE if the expansion failed.
1298
1299    This routine assumes that ARG1 is a real number with a fractional part
1300    (the integer exponent case will have been handled earlier in
1301    gimple_expand_builtin_pow).
1302
1303    For ARG1 > 0.0:
1304    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1305      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1306                     FRAC_PART == ARG1 - WHOLE_PART:
1307      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1308      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1309      if it can be expressed as such, that is if FRAC_PART satisfies:
1310      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1311      where integer a[i] is either 0 or 1.
1312
1313      Example:
1314      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1315        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1316
1317    For ARG1 < 0.0 there are two approaches:
1318    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1319          is calculated as above.
1320
1321      Example:
1322      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1323        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1324
1325    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1326            FRAC_PART  := ARG1 - WHOLE_PART
1327      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1328      Example:
1329      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1330        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1331
1332    For ARG1 < 0.0 we choose between (A) and (B) depending on
1333    how many multiplications we'd have to do.
1334    So, for the example in (B): POW (x, -5.875), if we were to
1335    follow algorithm (A) we would produce:
1336    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1337    which contains more multiplications than approach (B).
1338
1339    Hopefully, this approach will eliminate potentially expensive POW library
1340    calls when unsafe floating point math is enabled and allow the compiler to
1341    further optimise the multiplies, square roots and divides produced by this
1342    function.  */
1343
1344 static tree
1345 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1346                      tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1347 {
1348   tree type = TREE_TYPE (arg0);
1349   machine_mode mode = TYPE_MODE (type);
1350   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1351   bool one_over = true;
1352
1353   if (!sqrtfn)
1354     return NULL_TREE;
1355
1356   if (TREE_CODE (arg1) != REAL_CST)
1357     return NULL_TREE;
1358
1359   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1360
1361   gcc_assert (max_depth > 0);
1362   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1363
1364   struct pow_synth_sqrt_info synth_info;
1365   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1366   synth_info.deepest = 0;
1367   synth_info.num_mults = 0;
1368
1369   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1370   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1371
1372   /* The whole and fractional parts of exp.  */
1373   REAL_VALUE_TYPE whole_part;
1374   REAL_VALUE_TYPE frac_part;
1375
1376   real_floor (&whole_part, mode, &exp);
1377   real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1378
1379
1380   REAL_VALUE_TYPE ceil_whole = dconst0;
1381   REAL_VALUE_TYPE ceil_fract = dconst0;
1382
1383   if (neg_exp)
1384     {
1385       real_ceil (&ceil_whole, mode, &exp);
1386       real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1387     }
1388
1389   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1390     return NULL_TREE;
1391
1392   /* Check whether it's more profitable to not use 1.0 / ...  */
1393   if (neg_exp)
1394     {
1395       struct pow_synth_sqrt_info alt_synth_info;
1396       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1397       alt_synth_info.deepest = 0;
1398       alt_synth_info.num_mults = 0;
1399
1400       if (representable_as_half_series_p (ceil_fract, max_depth,
1401                                            &alt_synth_info)
1402           && alt_synth_info.deepest <= synth_info.deepest
1403           && alt_synth_info.num_mults < synth_info.num_mults)
1404         {
1405           whole_part = ceil_whole;
1406           frac_part = ceil_fract;
1407           synth_info.deepest = alt_synth_info.deepest;
1408           synth_info.num_mults = alt_synth_info.num_mults;
1409           memcpy (synth_info.factors, alt_synth_info.factors,
1410                   (max_depth + 1) * sizeof (bool));
1411           one_over = false;
1412         }
1413     }
1414
1415   HOST_WIDE_INT n = real_to_integer (&whole_part);
1416   REAL_VALUE_TYPE cint;
1417   real_from_integer (&cint, VOIDmode, n, SIGNED);
1418
1419   if (!real_identical (&whole_part, &cint))
1420     return NULL_TREE;
1421
1422   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1423     return NULL_TREE;
1424
1425   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1426
1427   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1428
1429   /* Calculate the integer part of the exponent.  */
1430   if (n > 1)
1431     {
1432       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1433       if (!integer_res)
1434         return NULL_TREE;
1435     }
1436
1437   if (dump_file)
1438     {
1439       char string[64];
1440
1441       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1442       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1443
1444       if (neg_exp)
1445         {
1446           if (one_over)
1447             {
1448               fprintf (dump_file, "1.0 / (");
1449               dump_integer_part (dump_file, "x", n);
1450               if (n > 0)
1451                 fprintf (dump_file, " * ");
1452               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1453               fprintf (dump_file, ")");
1454             }
1455           else
1456             {
1457               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1458               fprintf (dump_file, " / (");
1459               dump_integer_part (dump_file, "x", n);
1460               fprintf (dump_file, ")");
1461             }
1462         }
1463       else
1464         {
1465           dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1466           if (n > 0)
1467             fprintf (dump_file, " * ");
1468           dump_integer_part (dump_file, "x", n);
1469         }
1470
1471       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1472     }
1473
1474
1475   tree fract_res = NULL_TREE;
1476   cache[0] = arg0;
1477
1478   /* Calculate the fractional part of the exponent.  */
1479   for (unsigned i = 0; i < synth_info.deepest; i++)
1480     {
1481       if (synth_info.factors[i])
1482         {
1483           tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1484
1485           if (!fract_res)
1486               fract_res = sqrt_chain;
1487
1488           else
1489             fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1490                                            fract_res, sqrt_chain);
1491         }
1492     }
1493
1494   tree res = NULL_TREE;
1495
1496   if (neg_exp)
1497     {
1498       if (one_over)
1499         {
1500           if (n > 0)
1501             res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1502                                            fract_res, integer_res);
1503           else
1504             res = fract_res;
1505
1506           res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1507                                           build_real (type, dconst1), res);
1508         }
1509       else
1510         {
1511           res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1512                                          fract_res, integer_res);
1513         }
1514     }
1515   else
1516     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1517                                    fract_res, integer_res);
1518   return res;
1519 }
1520
1521 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1522    with location info LOC.  If possible, create an equivalent and
1523    less expensive sequence of statements prior to GSI, and return an
1524    expession holding the result.  */
1525
1526 static tree
1527 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1528                            tree arg0, tree arg1)
1529 {
1530   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
1531   REAL_VALUE_TYPE c2, dconst3;
1532   HOST_WIDE_INT n;
1533   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
1534   machine_mode mode;
1535   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
1536   bool hw_sqrt_exists, c_is_int, c2_is_int;
1537
1538   dconst1_4 = dconst1;
1539   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1540
1541   /* If the exponent isn't a constant, there's nothing of interest
1542      to be done.  */
1543   if (TREE_CODE (arg1) != REAL_CST)
1544     return NULL_TREE;
1545
1546   /* Don't perform the operation if flag_signaling_nans is on
1547      and the operand is a signaling NaN.  */
1548   if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
1549       && ((TREE_CODE (arg0) == REAL_CST
1550            && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
1551           || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
1552     return NULL_TREE;
1553
1554   /* If the exponent is equivalent to an integer, expand to an optimal
1555      multiplication sequence when profitable.  */
1556   c = TREE_REAL_CST (arg1);
1557   n = real_to_integer (&c);
1558   real_from_integer (&cint, VOIDmode, n, SIGNED);
1559   c_is_int = real_identical (&c, &cint);
1560
1561   if (c_is_int
1562       && ((n >= -1 && n <= 2)
1563           || (flag_unsafe_math_optimizations
1564               && speed_p
1565               && powi_cost (n) <= POWI_MAX_MULTS)))
1566     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1567
1568   /* Attempt various optimizations using sqrt and cbrt.  */
1569   type = TREE_TYPE (arg0);
1570   mode = TYPE_MODE (type);
1571   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1572
1573   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
1574      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
1575      sqrt(-0) = -0.  */
1576   if (sqrtfn
1577       && real_equal (&c, &dconsthalf)
1578       && !HONOR_SIGNED_ZEROS (mode))
1579     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
1580
1581   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1582
1583   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
1584      optimizations since 1./3. is not exactly representable.  If x
1585      is negative and finite, the correct value of pow(x,1./3.) is
1586      a NaN with the "invalid" exception raised, because the value
1587      of 1./3. actually has an even denominator.  The correct value
1588      of cbrt(x) is a negative real value.  */
1589   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1590   dconst1_3 = real_value_truncate (mode, dconst_third ());
1591
1592   if (flag_unsafe_math_optimizations
1593       && cbrtfn
1594       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1595       && real_equal (&c, &dconst1_3))
1596     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
1597
1598   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
1599      if we don't have a hardware sqrt insn.  */
1600   dconst1_6 = dconst1_3;
1601   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1602
1603   if (flag_unsafe_math_optimizations
1604       && sqrtfn
1605       && cbrtfn
1606       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1607       && speed_p
1608       && hw_sqrt_exists
1609       && real_equal (&c, &dconst1_6))
1610     {
1611       /* sqrt(x)  */
1612       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1613
1614       /* cbrt(sqrt(x))  */
1615       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
1616     }
1617
1618
1619   /* Attempt to expand the POW as a product of square root chains.
1620      Expand the 0.25 case even when otpimising for size.  */
1621   if (flag_unsafe_math_optimizations
1622       && sqrtfn
1623       && hw_sqrt_exists
1624       && (speed_p || real_equal (&c, &dconst1_4))
1625       && !HONOR_SIGNED_ZEROS (mode))
1626     {
1627       unsigned int max_depth = speed_p
1628                                 ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH)
1629                                 : 2;
1630
1631       tree expand_with_sqrts
1632         = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
1633
1634       if (expand_with_sqrts)
1635         return expand_with_sqrts;
1636     }
1637
1638   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1639   n = real_to_integer (&c2);
1640   real_from_integer (&cint, VOIDmode, n, SIGNED);
1641   c2_is_int = real_identical (&c2, &cint);
1642
1643   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1644
1645      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
1646      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
1647
1648      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
1649      different from pow(x, 1./3.) due to rounding and behavior with
1650      negative x, we need to constrain this transformation to unsafe
1651      math and positive x or finite math.  */
1652   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
1653   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
1654   real_round (&c2, mode, &c2);
1655   n = real_to_integer (&c2);
1656   real_from_integer (&cint, VOIDmode, n, SIGNED);
1657   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
1658   real_convert (&c2, mode, &c2);
1659
1660   if (flag_unsafe_math_optimizations
1661       && cbrtfn
1662       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1663       && real_identical (&c2, &c)
1664       && !c2_is_int
1665       && optimize_function_for_speed_p (cfun)
1666       && powi_cost (n / 3) <= POWI_MAX_MULTS)
1667     {
1668       tree powi_x_ndiv3 = NULL_TREE;
1669
1670       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
1671          possible or profitable, give up.  Skip the degenerate case when
1672          abs(n) < 3, where the result is always 1.  */
1673       if (absu_hwi (n) >= 3)
1674         {
1675           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
1676                                                      abs_hwi (n / 3));
1677           if (!powi_x_ndiv3)
1678             return NULL_TREE;
1679         }
1680
1681       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
1682          as that creates an unnecessary variable.  Instead, just produce
1683          either cbrt(x) or cbrt(x) * cbrt(x).  */
1684       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
1685
1686       if (absu_hwi (n) % 3 == 1)
1687         powi_cbrt_x = cbrt_x;
1688       else
1689         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1690                                               cbrt_x, cbrt_x);
1691
1692       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
1693       if (absu_hwi (n) < 3)
1694         result = powi_cbrt_x;
1695       else
1696         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1697                                          powi_x_ndiv3, powi_cbrt_x);
1698
1699       /* If n is negative, reciprocate the result.  */
1700       if (n < 0)
1701         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1702                                          build_real (type, dconst1), result);
1703
1704       return result;
1705     }
1706
1707   /* No optimizations succeeded.  */
1708   return NULL_TREE;
1709 }
1710
1711 /* ARG is the argument to a cabs builtin call in GSI with location info
1712    LOC.  Create a sequence of statements prior to GSI that calculates
1713    sqrt(R*R + I*I), where R and I are the real and imaginary components
1714    of ARG, respectively.  Return an expression holding the result.  */
1715
1716 static tree
1717 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
1718 {
1719   tree real_part, imag_part, addend1, addend2, sum, result;
1720   tree type = TREE_TYPE (TREE_TYPE (arg));
1721   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1722   machine_mode mode = TYPE_MODE (type);
1723
1724   if (!flag_unsafe_math_optimizations
1725       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
1726       || !sqrtfn
1727       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
1728     return NULL_TREE;
1729
1730   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
1731                                     REALPART_EXPR, arg);
1732   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1733                                     real_part, real_part);
1734   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
1735                                     IMAGPART_EXPR, arg);
1736   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1737                                     imag_part, imag_part);
1738   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
1739   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
1740
1741   return result;
1742 }
1743
1744 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
1745    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
1746    an optimal number of multiplies, when n is a constant.  */
1747
1748 namespace {
1749
1750 const pass_data pass_data_cse_sincos =
1751 {
1752   GIMPLE_PASS, /* type */
1753   "sincos", /* name */
1754   OPTGROUP_NONE, /* optinfo_flags */
1755   TV_NONE, /* tv_id */
1756   PROP_ssa, /* properties_required */
1757   PROP_gimple_opt_math, /* properties_provided */
1758   0, /* properties_destroyed */
1759   0, /* todo_flags_start */
1760   TODO_update_ssa, /* todo_flags_finish */
1761 };
1762
1763 class pass_cse_sincos : public gimple_opt_pass
1764 {
1765 public:
1766   pass_cse_sincos (gcc::context *ctxt)
1767     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
1768   {}
1769
1770   /* opt_pass methods: */
1771   virtual bool gate (function *)
1772     {
1773       /* We no longer require either sincos or cexp, since powi expansion
1774          piggybacks on this pass.  */
1775       return optimize;
1776     }
1777
1778   virtual unsigned int execute (function *);
1779
1780 }; // class pass_cse_sincos
1781
1782 unsigned int
1783 pass_cse_sincos::execute (function *fun)
1784 {
1785   basic_block bb;
1786   bool cfg_changed = false;
1787
1788   calculate_dominance_info (CDI_DOMINATORS);
1789   memset (&sincos_stats, 0, sizeof (sincos_stats));
1790
1791   FOR_EACH_BB_FN (bb, fun)
1792     {
1793       gimple_stmt_iterator gsi;
1794       bool cleanup_eh = false;
1795
1796       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1797         {
1798           gimple *stmt = gsi_stmt (gsi);
1799
1800           /* Only the last stmt in a bb could throw, no need to call
1801              gimple_purge_dead_eh_edges if we change something in the middle
1802              of a basic block.  */
1803           cleanup_eh = false;
1804
1805           if (is_gimple_call (stmt)
1806               && gimple_call_lhs (stmt))
1807             {
1808               tree arg, arg0, arg1, result;
1809               HOST_WIDE_INT n;
1810               location_t loc;
1811
1812               switch (gimple_call_combined_fn (stmt))
1813                 {
1814                 CASE_CFN_COS:
1815                 CASE_CFN_SIN:
1816                 CASE_CFN_CEXPI:
1817                   /* Make sure we have either sincos or cexp.  */
1818                   if (!targetm.libc_has_function (function_c99_math_complex)
1819                       && !targetm.libc_has_function (function_sincos))
1820                     break;
1821
1822                   arg = gimple_call_arg (stmt, 0);
1823                   if (TREE_CODE (arg) == SSA_NAME)
1824                     cfg_changed |= execute_cse_sincos_1 (arg);
1825                   break;
1826
1827                 CASE_CFN_POW:
1828                   arg0 = gimple_call_arg (stmt, 0);
1829                   arg1 = gimple_call_arg (stmt, 1);
1830
1831                   loc = gimple_location (stmt);
1832                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
1833
1834                   if (result)
1835                     {
1836                       tree lhs = gimple_get_lhs (stmt);
1837                       gassign *new_stmt = gimple_build_assign (lhs, result);
1838                       gimple_set_location (new_stmt, loc);
1839                       unlink_stmt_vdef (stmt);
1840                       gsi_replace (&gsi, new_stmt, true);
1841                       cleanup_eh = true;
1842                       if (gimple_vdef (stmt))
1843                         release_ssa_name (gimple_vdef (stmt));
1844                     }
1845                   break;
1846
1847                 CASE_CFN_POWI:
1848                   arg0 = gimple_call_arg (stmt, 0);
1849                   arg1 = gimple_call_arg (stmt, 1);
1850                   loc = gimple_location (stmt);
1851
1852                   if (real_minus_onep (arg0))
1853                     {
1854                       tree t0, t1, cond, one, minus_one;
1855                       gassign *stmt;
1856
1857                       t0 = TREE_TYPE (arg0);
1858                       t1 = TREE_TYPE (arg1);
1859                       one = build_real (t0, dconst1);
1860                       minus_one = build_real (t0, dconstm1);
1861
1862                       cond = make_temp_ssa_name (t1, NULL, "powi_cond");
1863                       stmt = gimple_build_assign (cond, BIT_AND_EXPR,
1864                                                   arg1, build_int_cst (t1, 1));
1865                       gimple_set_location (stmt, loc);
1866                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1867
1868                       result = make_temp_ssa_name (t0, NULL, "powi");
1869                       stmt = gimple_build_assign (result, COND_EXPR, cond,
1870                                                   minus_one, one);
1871                       gimple_set_location (stmt, loc);
1872                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1873                     }
1874                   else
1875                     {
1876                       if (!tree_fits_shwi_p (arg1))
1877                         break;
1878
1879                       n = tree_to_shwi (arg1);
1880                       result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
1881                     }
1882
1883                   if (result)
1884                     {
1885                       tree lhs = gimple_get_lhs (stmt);
1886                       gassign *new_stmt = gimple_build_assign (lhs, result);
1887                       gimple_set_location (new_stmt, loc);
1888                       unlink_stmt_vdef (stmt);
1889                       gsi_replace (&gsi, new_stmt, true);
1890                       cleanup_eh = true;
1891                       if (gimple_vdef (stmt))
1892                         release_ssa_name (gimple_vdef (stmt));
1893                     }
1894                   break;
1895
1896                 CASE_CFN_CABS:
1897                   arg0 = gimple_call_arg (stmt, 0);
1898                   loc = gimple_location (stmt);
1899                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
1900
1901                   if (result)
1902                     {
1903                       tree lhs = gimple_get_lhs (stmt);
1904                       gassign *new_stmt = gimple_build_assign (lhs, result);
1905                       gimple_set_location (new_stmt, loc);
1906                       unlink_stmt_vdef (stmt);
1907                       gsi_replace (&gsi, new_stmt, true);
1908                       cleanup_eh = true;
1909                       if (gimple_vdef (stmt))
1910                         release_ssa_name (gimple_vdef (stmt));
1911                     }
1912                   break;
1913
1914                 default:;
1915                 }
1916             }
1917         }
1918       if (cleanup_eh)
1919         cfg_changed |= gimple_purge_dead_eh_edges (bb);
1920     }
1921
1922   statistics_counter_event (fun, "sincos statements inserted",
1923                             sincos_stats.inserted);
1924
1925   return cfg_changed ? TODO_cleanup_cfg : 0;
1926 }
1927
1928 } // anon namespace
1929
1930 gimple_opt_pass *
1931 make_pass_cse_sincos (gcc::context *ctxt)
1932 {
1933   return new pass_cse_sincos (ctxt);
1934 }
1935
1936 /* A symbolic number structure is used to detect byte permutation and selection
1937    patterns of a source.  To achieve that, its field N contains an artificial
1938    number consisting of BITS_PER_MARKER sized markers tracking where does each
1939    byte come from in the source:
1940
1941    0       - target byte has the value 0
1942    FF      - target byte has an unknown value (eg. due to sign extension)
1943    1..size - marker value is the byte index in the source (0 for lsb).
1944
1945    To detect permutations on memory sources (arrays and structures), a symbolic
1946    number is also associated:
1947    - a base address BASE_ADDR and an OFFSET giving the address of the source;
1948    - a range which gives the difference between the highest and lowest accessed
1949      memory location to make such a symbolic number;
1950    - the address SRC of the source element of lowest address as a convenience
1951      to easily get BASE_ADDR + offset + lowest bytepos;
1952    - number of expressions N_OPS bitwise ored together to represent
1953      approximate cost of the computation.
1954
1955    Note 1: the range is different from size as size reflects the size of the
1956    type of the current expression.  For instance, for an array char a[],
1957    (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
1958    (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
1959    time a range of 1.
1960
1961    Note 2: for non-memory sources, range holds the same value as size.
1962
1963    Note 3: SRC points to the SSA_NAME in case of non-memory source.  */
1964
1965 struct symbolic_number {
1966   uint64_t n;
1967   tree type;
1968   tree base_addr;
1969   tree offset;
1970   HOST_WIDE_INT bytepos;
1971   tree src;
1972   tree alias_set;
1973   tree vuse;
1974   unsigned HOST_WIDE_INT range;
1975   int n_ops;
1976 };
1977
1978 #define BITS_PER_MARKER 8
1979 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
1980 #define MARKER_BYTE_UNKNOWN MARKER_MASK
1981 #define HEAD_MARKER(n, size) \
1982   ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
1983
1984 /* The number which the find_bswap_or_nop_1 result should match in
1985    order to have a nop.  The number is masked according to the size of
1986    the symbolic number before using it.  */
1987 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
1988   (uint64_t)0x08070605 << 32 | 0x04030201)
1989
1990 /* The number which the find_bswap_or_nop_1 result should match in
1991    order to have a byte swap.  The number is masked according to the
1992    size of the symbolic number before using it.  */
1993 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
1994   (uint64_t)0x01020304 << 32 | 0x05060708)
1995
1996 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
1997    number N.  Return false if the requested operation is not permitted
1998    on a symbolic number.  */
1999
2000 static inline bool
2001 do_shift_rotate (enum tree_code code,
2002                  struct symbolic_number *n,
2003                  int count)
2004 {
2005   int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2006   unsigned head_marker;
2007
2008   if (count % BITS_PER_UNIT != 0)
2009     return false;
2010   count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
2011
2012   /* Zero out the extra bits of N in order to avoid them being shifted
2013      into the significant bits.  */
2014   if (size < 64 / BITS_PER_MARKER)
2015     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
2016
2017   switch (code)
2018     {
2019     case LSHIFT_EXPR:
2020       n->n <<= count;
2021       break;
2022     case RSHIFT_EXPR:
2023       head_marker = HEAD_MARKER (n->n, size);
2024       n->n >>= count;
2025       /* Arithmetic shift of signed type: result is dependent on the value.  */
2026       if (!TYPE_UNSIGNED (n->type) && head_marker)
2027         for (i = 0; i < count / BITS_PER_MARKER; i++)
2028           n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
2029                   << ((size - 1 - i) * BITS_PER_MARKER);
2030       break;
2031     case LROTATE_EXPR:
2032       n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
2033       break;
2034     case RROTATE_EXPR:
2035       n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
2036       break;
2037     default:
2038       return false;
2039     }
2040   /* Zero unused bits for size.  */
2041   if (size < 64 / BITS_PER_MARKER)
2042     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
2043   return true;
2044 }
2045
2046 /* Perform sanity checking for the symbolic number N and the gimple
2047    statement STMT.  */
2048
2049 static inline bool
2050 verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
2051 {
2052   tree lhs_type;
2053
2054   lhs_type = gimple_expr_type (stmt);
2055
2056   if (TREE_CODE (lhs_type) != INTEGER_TYPE)
2057     return false;
2058
2059   if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
2060     return false;
2061
2062   return true;
2063 }
2064
2065 /* Initialize the symbolic number N for the bswap pass from the base element
2066    SRC manipulated by the bitwise OR expression.  */
2067
2068 static bool
2069 init_symbolic_number (struct symbolic_number *n, tree src)
2070 {
2071   int size;
2072
2073   if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
2074     return false;
2075
2076   n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
2077   n->src = src;
2078
2079   /* Set up the symbolic number N by setting each byte to a value between 1 and
2080      the byte size of rhs1.  The highest order byte is set to n->size and the
2081      lowest order byte to 1.  */
2082   n->type = TREE_TYPE (src);
2083   size = TYPE_PRECISION (n->type);
2084   if (size % BITS_PER_UNIT != 0)
2085     return false;
2086   size /= BITS_PER_UNIT;
2087   if (size > 64 / BITS_PER_MARKER)
2088     return false;
2089   n->range = size;
2090   n->n = CMPNOP;
2091   n->n_ops = 1;
2092
2093   if (size < 64 / BITS_PER_MARKER)
2094     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
2095
2096   return true;
2097 }
2098
2099 /* Check if STMT might be a byte swap or a nop from a memory source and returns
2100    the answer. If so, REF is that memory source and the base of the memory area
2101    accessed and the offset of the access from that base are recorded in N.  */
2102
2103 bool
2104 find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
2105 {
2106   /* Leaf node is an array or component ref. Memorize its base and
2107      offset from base to compare to other such leaf node.  */
2108   HOST_WIDE_INT bitsize, bitpos;
2109   machine_mode mode;
2110   int unsignedp, reversep, volatilep;
2111   tree offset, base_addr;
2112
2113   /* Not prepared to handle PDP endian.  */
2114   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
2115     return false;
2116
2117   if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
2118     return false;
2119
2120   base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
2121                                    &unsignedp, &reversep, &volatilep);
2122
2123   if (TREE_CODE (base_addr) == MEM_REF)
2124     {
2125       offset_int bit_offset = 0;
2126       tree off = TREE_OPERAND (base_addr, 1);
2127
2128       if (!integer_zerop (off))
2129         {
2130           offset_int boff, coff = mem_ref_offset (base_addr);
2131           boff = coff << LOG2_BITS_PER_UNIT;
2132           bit_offset += boff;
2133         }
2134
2135       base_addr = TREE_OPERAND (base_addr, 0);
2136
2137       /* Avoid returning a negative bitpos as this may wreak havoc later.  */
2138       if (wi::neg_p (bit_offset))
2139         {
2140           offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false);
2141           offset_int tem = bit_offset.and_not (mask);
2142           /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
2143              Subtract it to BIT_OFFSET and add it (scaled) to OFFSET.  */
2144           bit_offset -= tem;
2145           tem >>= LOG2_BITS_PER_UNIT;
2146           if (offset)
2147             offset = size_binop (PLUS_EXPR, offset,
2148                                     wide_int_to_tree (sizetype, tem));
2149           else
2150             offset = wide_int_to_tree (sizetype, tem);
2151         }
2152
2153       bitpos += bit_offset.to_shwi ();
2154     }
2155
2156   if (bitpos % BITS_PER_UNIT)
2157     return false;
2158   if (bitsize % BITS_PER_UNIT)
2159     return false;
2160   if (reversep)
2161     return false;
2162
2163   if (!init_symbolic_number (n, ref))
2164     return false;
2165   n->base_addr = base_addr;
2166   n->offset = offset;
2167   n->bytepos = bitpos / BITS_PER_UNIT;
2168   n->alias_set = reference_alias_ptr_type (ref);
2169   n->vuse = gimple_vuse (stmt);
2170   return true;
2171 }
2172
2173 /* Compute the symbolic number N representing the result of a bitwise OR on 2
2174    symbolic number N1 and N2 whose source statements are respectively
2175    SOURCE_STMT1 and SOURCE_STMT2.  */
2176
2177 static gimple *
2178 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
2179                         gimple *source_stmt2, struct symbolic_number *n2,
2180                         struct symbolic_number *n)
2181 {
2182   int i, size;
2183   uint64_t mask;
2184   gimple *source_stmt;
2185   struct symbolic_number *n_start;
2186
2187   tree rhs1 = gimple_assign_rhs1 (source_stmt1);
2188   if (TREE_CODE (rhs1) == BIT_FIELD_REF
2189       && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
2190     rhs1 = TREE_OPERAND (rhs1, 0);
2191   tree rhs2 = gimple_assign_rhs1 (source_stmt2);
2192   if (TREE_CODE (rhs2) == BIT_FIELD_REF
2193       && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
2194     rhs2 = TREE_OPERAND (rhs2, 0);
2195
2196   /* Sources are different, cancel bswap if they are not memory location with
2197      the same base (array, structure, ...).  */
2198   if (rhs1 != rhs2)
2199     {
2200       uint64_t inc;
2201       HOST_WIDE_INT start_sub, end_sub, end1, end2, end;
2202       struct symbolic_number *toinc_n_ptr, *n_end;
2203       basic_block bb1, bb2;
2204
2205       if (!n1->base_addr || !n2->base_addr
2206           || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
2207         return NULL;
2208
2209       if (!n1->offset != !n2->offset
2210           || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
2211         return NULL;
2212
2213       if (n1->bytepos < n2->bytepos)
2214         {
2215           n_start = n1;
2216           start_sub = n2->bytepos - n1->bytepos;
2217         }
2218       else
2219         {
2220           n_start = n2;
2221           start_sub = n1->bytepos - n2->bytepos;
2222         }
2223
2224       bb1 = gimple_bb (source_stmt1);
2225       bb2 = gimple_bb (source_stmt2);
2226       if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
2227         source_stmt = source_stmt1;
2228       else
2229         source_stmt = source_stmt2;
2230
2231       /* Find the highest address at which a load is performed and
2232          compute related info.  */
2233       end1 = n1->bytepos + (n1->range - 1);
2234       end2 = n2->bytepos + (n2->range - 1);
2235       if (end1 < end2)
2236         {
2237           end = end2;
2238           end_sub = end2 - end1;
2239         }
2240       else
2241         {
2242           end = end1;
2243           end_sub = end1 - end2;
2244         }
2245       n_end = (end2 > end1) ? n2 : n1;
2246
2247       /* Find symbolic number whose lsb is the most significant.  */
2248       if (BYTES_BIG_ENDIAN)
2249         toinc_n_ptr = (n_end == n1) ? n2 : n1;
2250       else
2251         toinc_n_ptr = (n_start == n1) ? n2 : n1;
2252
2253       n->range = end - n_start->bytepos + 1;
2254
2255       /* Check that the range of memory covered can be represented by
2256          a symbolic number.  */
2257       if (n->range > 64 / BITS_PER_MARKER)
2258         return NULL;
2259
2260       /* Reinterpret byte marks in symbolic number holding the value of
2261          bigger weight according to target endianness.  */
2262       inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
2263       size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
2264       for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
2265         {
2266           unsigned marker
2267             = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
2268           if (marker && marker != MARKER_BYTE_UNKNOWN)
2269             toinc_n_ptr->n += inc;
2270         }
2271     }
2272   else
2273     {
2274       n->range = n1->range;
2275       n_start = n1;
2276       source_stmt = source_stmt1;
2277     }
2278
2279   if (!n1->alias_set
2280       || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
2281     n->alias_set = n1->alias_set;
2282   else
2283     n->alias_set = ptr_type_node;
2284   n->vuse = n_start->vuse;
2285   n->base_addr = n_start->base_addr;
2286   n->offset = n_start->offset;
2287   n->src = n_start->src;
2288   n->bytepos = n_start->bytepos;
2289   n->type = n_start->type;
2290   size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2291
2292   for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
2293     {
2294       uint64_t masked1, masked2;
2295
2296       masked1 = n1->n & mask;
2297       masked2 = n2->n & mask;
2298       if (masked1 && masked2 && masked1 != masked2)
2299         return NULL;
2300     }
2301   n->n = n1->n | n2->n;
2302   n->n_ops = n1->n_ops + n2->n_ops;
2303
2304   return source_stmt;
2305 }
2306
2307 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
2308    the operation given by the rhs of STMT on the result.  If the operation
2309    could successfully be executed the function returns a gimple stmt whose
2310    rhs's first tree is the expression of the source operand and NULL
2311    otherwise.  */
2312
2313 static gimple *
2314 find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
2315 {
2316   enum tree_code code;
2317   tree rhs1, rhs2 = NULL;
2318   gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
2319   enum gimple_rhs_class rhs_class;
2320
2321   if (!limit || !is_gimple_assign (stmt))
2322     return NULL;
2323
2324   rhs1 = gimple_assign_rhs1 (stmt);
2325
2326   if (find_bswap_or_nop_load (stmt, rhs1, n))
2327     return stmt;
2328
2329   /* Handle BIT_FIELD_REF.  */
2330   if (TREE_CODE (rhs1) == BIT_FIELD_REF
2331       && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
2332     {
2333       unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
2334       unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
2335       if (bitpos % BITS_PER_UNIT == 0
2336           && bitsize % BITS_PER_UNIT == 0
2337           && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
2338         {
2339           /* Handle big-endian bit numbering in BIT_FIELD_REF.  */
2340           if (BYTES_BIG_ENDIAN)
2341             bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
2342
2343           /* Shift.  */
2344           if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
2345             return NULL;
2346
2347           /* Mask.  */
2348           uint64_t mask = 0;
2349           uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
2350           for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
2351                i++, tmp <<= BITS_PER_UNIT)
2352             mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
2353           n->n &= mask;
2354
2355           /* Convert.  */
2356           n->type = TREE_TYPE (rhs1);
2357           if (!n->base_addr)
2358             n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2359
2360           return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
2361         }
2362
2363       return NULL;
2364     }
2365
2366   if (TREE_CODE (rhs1) != SSA_NAME)
2367     return NULL;
2368
2369   code = gimple_assign_rhs_code (stmt);
2370   rhs_class = gimple_assign_rhs_class (stmt);
2371   rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2372
2373   if (rhs_class == GIMPLE_BINARY_RHS)
2374     rhs2 = gimple_assign_rhs2 (stmt);
2375
2376   /* Handle unary rhs and binary rhs with integer constants as second
2377      operand.  */
2378
2379   if (rhs_class == GIMPLE_UNARY_RHS
2380       || (rhs_class == GIMPLE_BINARY_RHS
2381           && TREE_CODE (rhs2) == INTEGER_CST))
2382     {
2383       if (code != BIT_AND_EXPR
2384           && code != LSHIFT_EXPR
2385           && code != RSHIFT_EXPR
2386           && code != LROTATE_EXPR
2387           && code != RROTATE_EXPR
2388           && !CONVERT_EXPR_CODE_P (code))
2389         return NULL;
2390
2391       source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
2392
2393       /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
2394          we have to initialize the symbolic number.  */
2395       if (!source_stmt1)
2396         {
2397           if (gimple_assign_load_p (stmt)
2398               || !init_symbolic_number (n, rhs1))
2399             return NULL;
2400           source_stmt1 = stmt;
2401         }
2402
2403       switch (code)
2404         {
2405         case BIT_AND_EXPR:
2406           {
2407             int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2408             uint64_t val = int_cst_value (rhs2), mask = 0;
2409             uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
2410
2411             /* Only constants masking full bytes are allowed.  */
2412             for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
2413               if ((val & tmp) != 0 && (val & tmp) != tmp)
2414                 return NULL;
2415               else if (val & tmp)
2416                 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
2417
2418             n->n &= mask;
2419           }
2420           break;
2421         case LSHIFT_EXPR:
2422         case RSHIFT_EXPR:
2423         case LROTATE_EXPR:
2424         case RROTATE_EXPR:
2425           if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
2426             return NULL;
2427           break;
2428         CASE_CONVERT:
2429           {
2430             int i, type_size, old_type_size;
2431             tree type;
2432
2433             type = gimple_expr_type (stmt);
2434             type_size = TYPE_PRECISION (type);
2435             if (type_size % BITS_PER_UNIT != 0)
2436               return NULL;
2437             type_size /= BITS_PER_UNIT;
2438             if (type_size > 64 / BITS_PER_MARKER)
2439               return NULL;
2440
2441             /* Sign extension: result is dependent on the value.  */
2442             old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2443             if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
2444                 && HEAD_MARKER (n->n, old_type_size))
2445               for (i = 0; i < type_size - old_type_size; i++)
2446                 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
2447                         << ((type_size - 1 - i) * BITS_PER_MARKER);
2448
2449             if (type_size < 64 / BITS_PER_MARKER)
2450               {
2451                 /* If STMT casts to a smaller type mask out the bits not
2452                    belonging to the target type.  */
2453                 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
2454               }
2455             n->type = type;
2456             if (!n->base_addr)
2457               n->range = type_size;
2458           }
2459           break;
2460         default:
2461           return NULL;
2462         };
2463       return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
2464     }
2465
2466   /* Handle binary rhs.  */
2467
2468   if (rhs_class == GIMPLE_BINARY_RHS)
2469     {
2470       struct symbolic_number n1, n2;
2471       gimple *source_stmt, *source_stmt2;
2472
2473       if (code != BIT_IOR_EXPR)
2474         return NULL;
2475
2476       if (TREE_CODE (rhs2) != SSA_NAME)
2477         return NULL;
2478
2479       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2480
2481       switch (code)
2482         {
2483         case BIT_IOR_EXPR:
2484           source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
2485
2486           if (!source_stmt1)
2487             return NULL;
2488
2489           source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
2490
2491           if (!source_stmt2)
2492             return NULL;
2493
2494           if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
2495             return NULL;
2496
2497           if (!n1.vuse != !n2.vuse
2498               || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
2499             return NULL;
2500
2501           source_stmt
2502             = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
2503
2504           if (!source_stmt)
2505             return NULL;
2506
2507           if (!verify_symbolic_number_p (n, stmt))
2508             return NULL;
2509
2510           break;
2511         default:
2512           return NULL;
2513         }
2514       return source_stmt;
2515     }
2516   return NULL;
2517 }
2518
2519 /* Check if STMT completes a bswap implementation or a read in a given
2520    endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
2521    accordingly.  It also sets N to represent the kind of operations
2522    performed: size of the resulting expression and whether it works on
2523    a memory source, and if so alias-set and vuse.  At last, the
2524    function returns a stmt whose rhs's first tree is the source
2525    expression.  */
2526
2527 static gimple *
2528 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
2529 {
2530   unsigned rsize;
2531   uint64_t tmpn, mask;
2532 /* The number which the find_bswap_or_nop_1 result should match in order
2533    to have a full byte swap.  The number is shifted to the right
2534    according to the size of the symbolic number before using it.  */
2535   uint64_t cmpxchg = CMPXCHG;
2536   uint64_t cmpnop = CMPNOP;
2537
2538   gimple *ins_stmt;
2539   int limit;
2540
2541   /* The last parameter determines the depth search limit.  It usually
2542      correlates directly to the number n of bytes to be touched.  We
2543      increase that number by log2(n) + 1 here in order to also
2544      cover signed -> unsigned conversions of the src operand as can be seen
2545      in libgcc, and for initial shift/and operation of the src operand.  */
2546   limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
2547   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
2548   ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
2549
2550   if (!ins_stmt)
2551     return NULL;
2552
2553   /* Find real size of result (highest non-zero byte).  */
2554   if (n->base_addr)
2555     for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
2556   else
2557     rsize = n->range;
2558
2559   /* Zero out the bits corresponding to untouched bytes in original gimple
2560      expression.  */
2561   if (n->range < (int) sizeof (int64_t))
2562     {
2563       mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
2564       cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
2565       cmpnop &= mask;
2566     }
2567
2568   /* Zero out the bits corresponding to unused bytes in the result of the
2569      gimple expression.  */
2570   if (rsize < n->range)
2571     {
2572       if (BYTES_BIG_ENDIAN)
2573         {
2574           mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
2575           cmpxchg &= mask;
2576           cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
2577         }
2578       else
2579         {
2580           mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
2581           cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
2582           cmpnop &= mask;
2583         }
2584       n->range = rsize;
2585     }
2586
2587   /* A complete byte swap should make the symbolic number to start with
2588      the largest digit in the highest order byte. Unchanged symbolic
2589      number indicates a read with same endianness as target architecture.  */
2590   if (n->n == cmpnop)
2591     *bswap = false;
2592   else if (n->n == cmpxchg)
2593     *bswap = true;
2594   else
2595     return NULL;
2596
2597   /* Useless bit manipulation performed by code.  */
2598   if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
2599     return NULL;
2600
2601   n->range *= BITS_PER_UNIT;
2602   return ins_stmt;
2603 }
2604
2605 namespace {
2606
2607 const pass_data pass_data_optimize_bswap =
2608 {
2609   GIMPLE_PASS, /* type */
2610   "bswap", /* name */
2611   OPTGROUP_NONE, /* optinfo_flags */
2612   TV_NONE, /* tv_id */
2613   PROP_ssa, /* properties_required */
2614   0, /* properties_provided */
2615   0, /* properties_destroyed */
2616   0, /* todo_flags_start */
2617   0, /* todo_flags_finish */
2618 };
2619
2620 class pass_optimize_bswap : public gimple_opt_pass
2621 {
2622 public:
2623   pass_optimize_bswap (gcc::context *ctxt)
2624     : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
2625   {}
2626
2627   /* opt_pass methods: */
2628   virtual bool gate (function *)
2629     {
2630       return flag_expensive_optimizations && optimize;
2631     }
2632
2633   virtual unsigned int execute (function *);
2634
2635 }; // class pass_optimize_bswap
2636
2637 /* Perform the bswap optimization: replace the expression computed in the rhs
2638    of CUR_STMT by an equivalent bswap, load or load + bswap expression.
2639    Which of these alternatives replace the rhs is given by N->base_addr (non
2640    null if a load is needed) and BSWAP.  The type, VUSE and set-alias of the
2641    load to perform are also given in N while the builtin bswap invoke is given
2642    in FNDEL.  Finally, if a load is involved, SRC_STMT refers to one of the
2643    load statements involved to construct the rhs in CUR_STMT and N->range gives
2644    the size of the rhs expression for maintaining some statistics.
2645
2646    Note that if the replacement involve a load, CUR_STMT is moved just after
2647    SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
2648    changing of basic block.  */
2649
2650 static bool
2651 bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
2652                tree bswap_type, tree load_type, struct symbolic_number *n,
2653                bool bswap)
2654 {
2655   gimple_stmt_iterator gsi;
2656   tree src, tmp, tgt;
2657   gimple *bswap_stmt;
2658
2659   gsi = gsi_for_stmt (cur_stmt);
2660   src = n->src;
2661   tgt = gimple_assign_lhs (cur_stmt);
2662
2663   /* Need to load the value from memory first.  */
2664   if (n->base_addr)
2665     {
2666       gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt);
2667       tree addr_expr, addr_tmp, val_expr, val_tmp;
2668       tree load_offset_ptr, aligned_load_type;
2669       gimple *addr_stmt, *load_stmt;
2670       unsigned align;
2671       HOST_WIDE_INT load_offset = 0;
2672       basic_block ins_bb, cur_bb;
2673
2674       ins_bb = gimple_bb (ins_stmt);
2675       cur_bb = gimple_bb (cur_stmt);
2676       if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
2677         return false;
2678
2679       align = get_object_alignment (src);
2680
2681       /* Move cur_stmt just before  one of the load of the original
2682          to ensure it has the same VUSE.  See PR61517 for what could
2683          go wrong.  */
2684       if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
2685         reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
2686       gsi_move_before (&gsi, &gsi_ins);
2687       gsi = gsi_for_stmt (cur_stmt);
2688
2689       /* Compute address to load from and cast according to the size
2690          of the load.  */
2691       addr_expr = build_fold_addr_expr (unshare_expr (src));
2692       if (is_gimple_mem_ref_addr (addr_expr))
2693         addr_tmp = addr_expr;
2694       else
2695         {
2696           addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
2697                                          "load_src");
2698           addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
2699           gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
2700         }
2701
2702       /* Perform the load.  */
2703       aligned_load_type = load_type;
2704       if (align < TYPE_ALIGN (load_type))
2705         aligned_load_type = build_aligned_type (load_type, align);
2706       load_offset_ptr = build_int_cst (n->alias_set, load_offset);
2707       val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
2708                               load_offset_ptr);
2709
2710       if (!bswap)
2711         {
2712           if (n->range == 16)
2713             nop_stats.found_16bit++;
2714           else if (n->range == 32)
2715             nop_stats.found_32bit++;
2716           else
2717             {
2718               gcc_assert (n->range == 64);
2719               nop_stats.found_64bit++;
2720             }
2721
2722           /* Convert the result of load if necessary.  */
2723           if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
2724             {
2725               val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
2726                                             "load_dst");
2727               load_stmt = gimple_build_assign (val_tmp, val_expr);
2728               gimple_set_vuse (load_stmt, n->vuse);
2729               gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
2730               gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
2731             }
2732           else
2733             {
2734               gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
2735               gimple_set_vuse (cur_stmt, n->vuse);
2736             }
2737           update_stmt (cur_stmt);
2738
2739           if (dump_file)
2740             {
2741               fprintf (dump_file,
2742                        "%d bit load in target endianness found at: ",
2743                        (int) n->range);
2744               print_gimple_stmt (dump_file, cur_stmt, 0);
2745             }
2746           return true;
2747         }
2748       else
2749         {
2750           val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
2751           load_stmt = gimple_build_assign (val_tmp, val_expr);
2752           gimple_set_vuse (load_stmt, n->vuse);
2753           gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
2754         }
2755       src = val_tmp;
2756     }
2757   else if (!bswap)
2758     {
2759       gimple *g;
2760       if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
2761         {
2762           if (!is_gimple_val (src))
2763             return false;
2764           g = gimple_build_assign (tgt, NOP_EXPR, src);
2765         }
2766       else
2767         g = gimple_build_assign (tgt, src);
2768       if (n->range == 16)
2769         nop_stats.found_16bit++;
2770       else if (n->range == 32)
2771         nop_stats.found_32bit++;
2772       else
2773         {
2774           gcc_assert (n->range == 64);
2775           nop_stats.found_64bit++;
2776         }
2777       if (dump_file)
2778         {
2779           fprintf (dump_file,
2780                    "%d bit reshuffle in target endianness found at: ",
2781                    (int) n->range);
2782           print_gimple_stmt (dump_file, cur_stmt, 0);
2783         }
2784       gsi_replace (&gsi, g, true);
2785       return true;
2786     }
2787   else if (TREE_CODE (src) == BIT_FIELD_REF)
2788     src = TREE_OPERAND (src, 0);
2789
2790   if (n->range == 16)
2791     bswap_stats.found_16bit++;
2792   else if (n->range == 32)
2793     bswap_stats.found_32bit++;
2794   else
2795     {
2796       gcc_assert (n->range == 64);
2797       bswap_stats.found_64bit++;
2798     }
2799
2800   tmp = src;
2801
2802   /* Convert the src expression if necessary.  */
2803   if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
2804     {
2805       gimple *convert_stmt;
2806
2807       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
2808       convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
2809       gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
2810     }
2811
2812   /* Canonical form for 16 bit bswap is a rotate expression.  Only 16bit values
2813      are considered as rotation of 2N bit values by N bits is generally not
2814      equivalent to a bswap.  Consider for instance 0x01020304 r>> 16 which
2815      gives 0x03040102 while a bswap for that value is 0x04030201.  */
2816   if (bswap && n->range == 16)
2817     {
2818       tree count = build_int_cst (NULL, BITS_PER_UNIT);
2819       src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
2820       bswap_stmt = gimple_build_assign (NULL, src);
2821     }
2822   else
2823     bswap_stmt = gimple_build_call (fndecl, 1, tmp);
2824
2825   tmp = tgt;
2826
2827   /* Convert the result if necessary.  */
2828   if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
2829     {
2830       gimple *convert_stmt;
2831
2832       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
2833       convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
2834       gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
2835     }
2836
2837   gimple_set_lhs (bswap_stmt, tmp);
2838
2839   if (dump_file)
2840     {
2841       fprintf (dump_file, "%d bit bswap implementation found at: ",
2842                (int) n->range);
2843       print_gimple_stmt (dump_file, cur_stmt, 0);
2844     }
2845
2846   gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
2847   gsi_remove (&gsi, true);
2848   return true;
2849 }
2850
2851 /* Find manual byte swap implementations as well as load in a given
2852    endianness. Byte swaps are turned into a bswap builtin invokation
2853    while endian loads are converted to bswap builtin invokation or
2854    simple load according to the target endianness.  */
2855
2856 unsigned int
2857 pass_optimize_bswap::execute (function *fun)
2858 {
2859   basic_block bb;
2860   bool bswap32_p, bswap64_p;
2861   bool changed = false;
2862   tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
2863
2864   if (BITS_PER_UNIT != 8)
2865     return 0;
2866
2867   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2868                && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
2869   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2870                && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
2871                    || (bswap32_p && word_mode == SImode)));
2872
2873   /* Determine the argument type of the builtins.  The code later on
2874      assumes that the return and argument type are the same.  */
2875   if (bswap32_p)
2876     {
2877       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
2878       bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2879     }
2880
2881   if (bswap64_p)
2882     {
2883       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
2884       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2885     }
2886
2887   memset (&nop_stats, 0, sizeof (nop_stats));
2888   memset (&bswap_stats, 0, sizeof (bswap_stats));
2889   calculate_dominance_info (CDI_DOMINATORS);
2890
2891   FOR_EACH_BB_FN (bb, fun)
2892     {
2893       gimple_stmt_iterator gsi;
2894
2895       /* We do a reverse scan for bswap patterns to make sure we get the
2896          widest match. As bswap pattern matching doesn't handle previously
2897          inserted smaller bswap replacements as sub-patterns, the wider
2898          variant wouldn't be detected.  */
2899       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
2900         {
2901           gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
2902           tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
2903           enum tree_code code;
2904           struct symbolic_number n;
2905           bool bswap;
2906
2907           /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
2908              might be moved to a different basic block by bswap_replace and gsi
2909              must not points to it if that's the case.  Moving the gsi_prev
2910              there make sure that gsi points to the statement previous to
2911              cur_stmt while still making sure that all statements are
2912              considered in this basic block.  */
2913           gsi_prev (&gsi);
2914
2915           if (!is_gimple_assign (cur_stmt))
2916             continue;
2917
2918           code = gimple_assign_rhs_code (cur_stmt);
2919           switch (code)
2920             {
2921             case LROTATE_EXPR:
2922             case RROTATE_EXPR:
2923               if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
2924                   || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
2925                      % BITS_PER_UNIT)
2926                 continue;
2927               /* Fall through.  */
2928             case BIT_IOR_EXPR:
2929               break;
2930             default:
2931               continue;
2932             }
2933
2934           ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
2935
2936           if (!ins_stmt)
2937             continue;
2938
2939           switch (n.range)
2940             {
2941             case 16:
2942               /* Already in canonical form, nothing to do.  */
2943               if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2944                 continue;
2945               load_type = bswap_type = uint16_type_node;
2946               break;
2947             case 32:
2948               load_type = uint32_type_node;
2949               if (bswap32_p)
2950                 {
2951                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
2952                   bswap_type = bswap32_type;
2953                 }
2954               break;
2955             case 64:
2956               load_type = uint64_type_node;
2957               if (bswap64_p)
2958                 {
2959                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
2960                   bswap_type = bswap64_type;
2961                 }
2962               break;
2963             default:
2964               continue;
2965             }
2966
2967           if (bswap && !fndecl && n.range != 16)
2968             continue;
2969
2970           if (bswap_replace (cur_stmt, ins_stmt, fndecl, bswap_type, load_type,
2971                              &n, bswap))
2972             changed = true;
2973         }
2974     }
2975
2976   statistics_counter_event (fun, "16-bit nop implementations found",
2977                             nop_stats.found_16bit);
2978   statistics_counter_event (fun, "32-bit nop implementations found",
2979                             nop_stats.found_32bit);
2980   statistics_counter_event (fun, "64-bit nop implementations found",
2981                             nop_stats.found_64bit);
2982   statistics_counter_event (fun, "16-bit bswap implementations found",
2983                             bswap_stats.found_16bit);
2984   statistics_counter_event (fun, "32-bit bswap implementations found",
2985                             bswap_stats.found_32bit);
2986   statistics_counter_event (fun, "64-bit bswap implementations found",
2987                             bswap_stats.found_64bit);
2988
2989   return (changed ? TODO_update_ssa : 0);
2990 }
2991
2992 } // anon namespace
2993
2994 gimple_opt_pass *
2995 make_pass_optimize_bswap (gcc::context *ctxt)
2996 {
2997   return new pass_optimize_bswap (ctxt);
2998 }
2999
3000 /* Return true if stmt is a type conversion operation that can be stripped
3001    when used in a widening multiply operation.  */
3002 static bool
3003 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
3004 {
3005   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
3006
3007   if (TREE_CODE (result_type) == INTEGER_TYPE)
3008     {
3009       tree op_type;
3010       tree inner_op_type;
3011
3012       if (!CONVERT_EXPR_CODE_P (rhs_code))
3013         return false;
3014
3015       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
3016
3017       /* If the type of OP has the same precision as the result, then
3018          we can strip this conversion.  The multiply operation will be
3019          selected to create the correct extension as a by-product.  */
3020       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
3021         return true;
3022
3023       /* We can also strip a conversion if it preserves the signed-ness of
3024          the operation and doesn't narrow the range.  */
3025       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
3026
3027       /* If the inner-most type is unsigned, then we can strip any
3028          intermediate widening operation.  If it's signed, then the
3029          intermediate widening operation must also be signed.  */
3030       if ((TYPE_UNSIGNED (inner_op_type)
3031            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
3032           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
3033         return true;
3034
3035       return false;
3036     }
3037
3038   return rhs_code == FIXED_CONVERT_EXPR;
3039 }
3040
3041 /* Return true if RHS is a suitable operand for a widening multiplication,
3042    assuming a target type of TYPE.
3043    There are two cases:
3044
3045      - RHS makes some value at least twice as wide.  Store that value
3046        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
3047
3048      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
3049        but leave *TYPE_OUT untouched.  */
3050
3051 static bool
3052 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
3053                         tree *new_rhs_out)
3054 {
3055   gimple *stmt;
3056   tree type1, rhs1;
3057
3058   if (TREE_CODE (rhs) == SSA_NAME)
3059     {
3060       stmt = SSA_NAME_DEF_STMT (rhs);
3061       if (is_gimple_assign (stmt))
3062         {
3063           if (! widening_mult_conversion_strippable_p (type, stmt))
3064             rhs1 = rhs;
3065           else
3066             {
3067               rhs1 = gimple_assign_rhs1 (stmt);
3068
3069               if (TREE_CODE (rhs1) == INTEGER_CST)
3070                 {
3071                   *new_rhs_out = rhs1;
3072                   *type_out = NULL;
3073                   return true;
3074                 }
3075             }
3076         }
3077       else
3078         rhs1 = rhs;
3079
3080       type1 = TREE_TYPE (rhs1);
3081
3082       if (TREE_CODE (type1) != TREE_CODE (type)
3083           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
3084         return false;
3085
3086       *new_rhs_out = rhs1;
3087       *type_out = type1;
3088       return true;
3089     }
3090
3091   if (TREE_CODE (rhs) == INTEGER_CST)
3092     {
3093       *new_rhs_out = rhs;
3094       *type_out = NULL;
3095       return true;
3096     }
3097
3098   return false;
3099 }
3100
3101 /* Return true if STMT performs a widening multiplication, assuming the
3102    output type is TYPE.  If so, store the unwidened types of the operands
3103    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
3104    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
3105    and *TYPE2_OUT would give the operands of the multiplication.  */
3106
3107 static bool
3108 is_widening_mult_p (gimple *stmt,
3109                     tree *type1_out, tree *rhs1_out,
3110                     tree *type2_out, tree *rhs2_out)
3111 {
3112   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
3113
3114   if (TREE_CODE (type) != INTEGER_TYPE
3115       && TREE_CODE (type) != FIXED_POINT_TYPE)
3116     return false;
3117
3118   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
3119                                rhs1_out))
3120     return false;
3121
3122   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
3123                                rhs2_out))
3124     return false;
3125
3126   if (*type1_out == NULL)
3127     {
3128       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
3129         return false;
3130       *type1_out = *type2_out;
3131     }
3132
3133   if (*type2_out == NULL)
3134     {
3135       if (!int_fits_type_p (*rhs2_out, *type1_out))
3136         return false;
3137       *type2_out = *type1_out;
3138     }
3139
3140   /* Ensure that the larger of the two operands comes first. */
3141   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
3142     {
3143       std::swap (*type1_out, *type2_out);
3144       std::swap (*rhs1_out, *rhs2_out);
3145     }
3146
3147   return true;
3148 }
3149
3150 /* Check to see if the CALL statement is an invocation of copysign
3151    with 1. being the first argument.  */
3152 static bool
3153 is_copysign_call_with_1 (gimple *call)
3154 {
3155   gcall *c = dyn_cast <gcall *> (call);
3156   if (! c)
3157     return false;
3158
3159   enum combined_fn code = gimple_call_combined_fn (c);
3160
3161   if (code == CFN_LAST)
3162     return false;
3163
3164   if (builtin_fn_p (code))
3165     {
3166       switch (as_builtin_fn (code))
3167         {
3168         CASE_FLT_FN (BUILT_IN_COPYSIGN):
3169         CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
3170           return real_onep (gimple_call_arg (c, 0));
3171         default:
3172           return false;
3173         }
3174     }
3175
3176   if (internal_fn_p (code))
3177     {
3178       switch (as_internal_fn (code))
3179         {
3180         case IFN_COPYSIGN:
3181           return real_onep (gimple_call_arg (c, 0));
3182         default:
3183           return false;
3184         }
3185     }
3186
3187    return false;
3188 }
3189
3190 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
3191    This only happens when the the xorsign optab is defined, if the
3192    pattern is not a xorsign pattern or if expansion fails FALSE is
3193    returned, otherwise TRUE is returned.  */
3194 static bool
3195 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
3196 {
3197   tree treeop0, treeop1, lhs, type;
3198   location_t loc = gimple_location (stmt);
3199   lhs = gimple_assign_lhs (stmt);
3200   treeop0 = gimple_assign_rhs1 (stmt);
3201   treeop1 = gimple_assign_rhs2 (stmt);
3202   type = TREE_TYPE (lhs);
3203   machine_mode mode = TYPE_MODE (type);
3204
3205   if (HONOR_SNANS (type))
3206     return false;
3207
3208   if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
3209     {
3210       gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
3211       if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
3212         {
3213           call0 = SSA_NAME_DEF_STMT (treeop1);
3214           if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
3215             return false;
3216
3217           treeop1 = treeop0;
3218         }
3219         if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
3220           return false;
3221
3222         gcall *c = as_a<gcall*> (call0);
3223         treeop0 = gimple_call_arg (c, 1);
3224
3225         gcall *call_stmt
3226           = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
3227         gimple_set_lhs (call_stmt, lhs);
3228         gimple_set_location (call_stmt, loc);
3229         gsi_replace (gsi, call_stmt, true);
3230         return true;
3231     }
3232
3233   return false;
3234 }
3235
3236 /* Process a single gimple statement STMT, which has a MULT_EXPR as
3237    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
3238    value is true iff we converted the statement.  */
3239
3240 static bool
3241 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
3242 {
3243   tree lhs, rhs1, rhs2, type, type1, type2;
3244   enum insn_code handler;
3245   machine_mode to_mode, from_mode, actual_mode;
3246   optab op;
3247   int actual_precision;
3248   location_t loc = gimple_location (stmt);
3249   bool from_unsigned1, from_unsigned2;
3250
3251   lhs = gimple_assign_lhs (stmt);
3252   type = TREE_TYPE (lhs);
3253   if (TREE_CODE (type) != INTEGER_TYPE)
3254     return false;
3255
3256   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
3257     return false;
3258
3259   to_mode = TYPE_MODE (type);
3260   from_mode = TYPE_MODE (type1);
3261   from_unsigned1 = TYPE_UNSIGNED (type1);
3262   from_unsigned2 = TYPE_UNSIGNED (type2);
3263
3264   if (from_unsigned1 && from_unsigned2)
3265     op = umul_widen_optab;
3266   else if (!from_unsigned1 && !from_unsigned2)
3267     op = smul_widen_optab;
3268   else
3269     op = usmul_widen_optab;
3270
3271   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
3272                                                   0, &actual_mode);
3273
3274   if (handler == CODE_FOR_nothing)
3275     {
3276       if (op != smul_widen_optab)
3277         {
3278           /* We can use a signed multiply with unsigned types as long as
3279              there is a wider mode to use, or it is the smaller of the two
3280              types that is unsigned.  Note that type1 >= type2, always.  */
3281           if ((TYPE_UNSIGNED (type1)
3282                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3283               || (TYPE_UNSIGNED (type2)
3284                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3285             {
3286               if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
3287                   || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
3288                 return false;
3289             }
3290
3291           op = smul_widen_optab;
3292           handler = find_widening_optab_handler_and_mode (op, to_mode,
3293                                                           from_mode, 0,
3294                                                           &actual_mode);
3295
3296           if (handler == CODE_FOR_nothing)
3297             return false;
3298
3299           from_unsigned1 = from_unsigned2 = false;
3300         }
3301       else
3302         return false;
3303     }
3304
3305   /* Ensure that the inputs to the handler are in the correct precison
3306      for the opcode.  This will be the full mode size.  */
3307   actual_precision = GET_MODE_PRECISION (actual_mode);
3308   if (2 * actual_precision > TYPE_PRECISION (type))
3309     return false;
3310   if (actual_precision != TYPE_PRECISION (type1)
3311       || from_unsigned1 != TYPE_UNSIGNED (type1))
3312     rhs1 = build_and_insert_cast (gsi, loc,
3313                                   build_nonstandard_integer_type
3314                                     (actual_precision, from_unsigned1), rhs1);
3315   if (actual_precision != TYPE_PRECISION (type2)
3316       || from_unsigned2 != TYPE_UNSIGNED (type2))
3317     rhs2 = build_and_insert_cast (gsi, loc,
3318                                   build_nonstandard_integer_type
3319                                     (actual_precision, from_unsigned2), rhs2);
3320
3321   /* Handle constants.  */
3322   if (TREE_CODE (rhs1) == INTEGER_CST)
3323     rhs1 = fold_convert (type1, rhs1);
3324   if (TREE_CODE (rhs2) == INTEGER_CST)
3325     rhs2 = fold_convert (type2, rhs2);
3326
3327   gimple_assign_set_rhs1 (stmt, rhs1);
3328   gimple_assign_set_rhs2 (stmt, rhs2);
3329   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
3330   update_stmt (stmt);
3331   widen_mul_stats.widen_mults_inserted++;
3332   return true;
3333 }
3334
3335 /* Process a single gimple statement STMT, which is found at the
3336    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
3337    rhs (given by CODE), and try to convert it into a
3338    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
3339    is true iff we converted the statement.  */
3340
3341 static bool
3342 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
3343                             enum tree_code code)
3344 {
3345   gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
3346   gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
3347   tree type, type1, type2, optype;
3348   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
3349   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
3350   optab this_optab;
3351   enum tree_code wmult_code;
3352   enum insn_code handler;
3353   machine_mode to_mode, from_mode, actual_mode;
3354   location_t loc = gimple_location (stmt);
3355   int actual_precision;
3356   bool from_unsigned1, from_unsigned2;
3357
3358   lhs = gimple_assign_lhs (stmt);
3359   type = TREE_TYPE (lhs);
3360   if (TREE_CODE (type) != INTEGER_TYPE
3361       && TREE_CODE (type) != FIXED_POINT_TYPE)
3362     return false;
3363
3364   if (code == MINUS_EXPR)
3365     wmult_code = WIDEN_MULT_MINUS_EXPR;
3366   else
3367     wmult_code = WIDEN_MULT_PLUS_EXPR;
3368
3369   rhs1 = gimple_assign_rhs1 (stmt);
3370   rhs2 = gimple_assign_rhs2 (stmt);
3371
3372   if (TREE_CODE (rhs1) == SSA_NAME)
3373     {
3374       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
3375       if (is_gimple_assign (rhs1_stmt))
3376         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
3377     }
3378
3379   if (TREE_CODE (rhs2) == SSA_NAME)
3380     {
3381       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
3382       if (is_gimple_assign (rhs2_stmt))
3383         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
3384     }
3385
3386   /* Allow for one conversion statement between the multiply
3387      and addition/subtraction statement.  If there are more than
3388      one conversions then we assume they would invalidate this
3389      transformation.  If that's not the case then they should have
3390      been folded before now.  */
3391   if (CONVERT_EXPR_CODE_P (rhs1_code))
3392     {
3393       conv1_stmt = rhs1_stmt;
3394       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
3395       if (TREE_CODE (rhs1) == SSA_NAME)
3396         {
3397           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
3398           if (is_gimple_assign (rhs1_stmt))
3399             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
3400         }
3401       else
3402         return false;
3403     }
3404   if (CONVERT_EXPR_CODE_P (rhs2_code))
3405     {
3406       conv2_stmt = rhs2_stmt;
3407       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
3408       if (TREE_CODE (rhs2) == SSA_NAME)
3409         {
3410           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
3411           if (is_gimple_assign (rhs2_stmt))
3412             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
3413         }
3414       else
3415         return false;
3416     }
3417
3418   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
3419      is_widening_mult_p, but we still need the rhs returns.
3420
3421      It might also appear that it would be sufficient to use the existing
3422      operands of the widening multiply, but that would limit the choice of
3423      multiply-and-accumulate instructions.
3424
3425      If the widened-multiplication result has more than one uses, it is
3426      probably wiser not to do the conversion.  */
3427   if (code == PLUS_EXPR
3428       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
3429     {
3430       if (!has_single_use (rhs1)
3431           || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
3432                                   &type2, &mult_rhs2))
3433         return false;
3434       add_rhs = rhs2;
3435       conv_stmt = conv1_stmt;
3436     }
3437   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
3438     {
3439       if (!has_single_use (rhs2)
3440           || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
3441                                   &type2, &mult_rhs2))
3442         return false;
3443       add_rhs = rhs1;
3444       conv_stmt = conv2_stmt;
3445     }
3446   else
3447     return false;
3448
3449   to_mode = TYPE_MODE (type);
3450   from_mode = TYPE_MODE (type1);
3451   from_unsigned1 = TYPE_UNSIGNED (type1);
3452   from_unsigned2 = TYPE_UNSIGNED (type2);
3453   optype = type1;
3454
3455   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
3456   if (from_unsigned1 != from_unsigned2)
3457     {
3458       if (!INTEGRAL_TYPE_P (type))
3459         return false;
3460       /* We can use a signed multiply with unsigned types as long as
3461          there is a wider mode to use, or it is the smaller of the two
3462          types that is unsigned.  Note that type1 >= type2, always.  */
3463       if ((from_unsigned1
3464            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3465           || (from_unsigned2
3466               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3467         {
3468           if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
3469               || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
3470             return false;
3471         }
3472
3473       from_unsigned1 = from_unsigned2 = false;
3474       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
3475                                                false);
3476     }
3477
3478   /* If there was a conversion between the multiply and addition
3479      then we need to make sure it fits a multiply-and-accumulate.
3480      The should be a single mode change which does not change the
3481      value.  */
3482   if (conv_stmt)
3483     {
3484       /* We use the original, unmodified data types for this.  */
3485       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
3486       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
3487       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
3488       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
3489
3490       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3491         {
3492           /* Conversion is a truncate.  */
3493           if (TYPE_PRECISION (to_type) < data_size)
3494             return false;
3495         }
3496       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3497         {
3498           /* Conversion is an extend.  Check it's the right sort.  */
3499           if (TYPE_UNSIGNED (from_type) != is_unsigned
3500               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3501             return false;
3502         }
3503       /* else convert is a no-op for our purposes.  */
3504     }
3505
3506   /* Verify that the machine can perform a widening multiply
3507      accumulate in this mode/signedness combination, otherwise
3508      this transformation is likely to pessimize code.  */
3509   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
3510   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
3511                                                   from_mode, 0, &actual_mode);
3512
3513   if (handler == CODE_FOR_nothing)
3514     return false;
3515
3516   /* Ensure that the inputs to the handler are in the correct precison
3517      for the opcode.  This will be the full mode size.  */
3518   actual_precision = GET_MODE_PRECISION (actual_mode);
3519   if (actual_precision != TYPE_PRECISION (type1)
3520       || from_unsigned1 != TYPE_UNSIGNED (type1))
3521     mult_rhs1 = build_and_insert_cast (gsi, loc,
3522                                        build_nonstandard_integer_type
3523                                          (actual_precision, from_unsigned1),
3524                                        mult_rhs1);
3525   if (actual_precision != TYPE_PRECISION (type2)
3526       || from_unsigned2 != TYPE_UNSIGNED (type2))
3527     mult_rhs2 = build_and_insert_cast (gsi, loc,
3528                                        build_nonstandard_integer_type
3529                                          (actual_precision, from_unsigned2),
3530                                        mult_rhs2);
3531
3532   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
3533     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
3534
3535   /* Handle constants.  */
3536   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3537     mult_rhs1 = fold_convert (type1, mult_rhs1);
3538   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3539     mult_rhs2 = fold_convert (type2, mult_rhs2);
3540
3541   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3542                                   add_rhs);
3543   update_stmt (gsi_stmt (*gsi));
3544   widen_mul_stats.maccs_inserted++;
3545   return true;
3546 }
3547
3548 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3549    with uses in additions and subtractions to form fused multiply-add
3550    operations.  Returns true if successful and MUL_STMT should be removed.  */
3551
3552 static bool
3553 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2)
3554 {
3555   tree mul_result = gimple_get_lhs (mul_stmt);
3556   tree type = TREE_TYPE (mul_result);
3557   gimple *use_stmt, *neguse_stmt;
3558   gassign *fma_stmt;
3559   use_operand_p use_p;
3560   imm_use_iterator imm_iter;
3561
3562   if (FLOAT_TYPE_P (type)
3563       && flag_fp_contract_mode == FP_CONTRACT_OFF)
3564     return false;
3565
3566   /* We don't want to do bitfield reduction ops.  */
3567   if (INTEGRAL_TYPE_P (type)
3568       && !type_has_mode_precision_p (type))
3569     return false;
3570
3571   /* If the target doesn't support it, don't generate it.  We assume that
3572      if fma isn't available then fms, fnma or fnms are not either.  */
3573   if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
3574     return false;
3575
3576   /* If the multiplication has zero uses, it is kept around probably because
3577      of -fnon-call-exceptions.  Don't optimize it away in that case,
3578      it is DCE job.  */
3579   if (has_zero_uses (mul_result))
3580     return false;
3581
3582   /* Make sure that the multiplication statement becomes dead after
3583      the transformation, thus that all uses are transformed to FMAs.
3584      This means we assume that an FMA operation has the same cost
3585      as an addition.  */
3586   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3587     {
3588       enum tree_code use_code;
3589       tree result = mul_result;
3590       bool negate_p = false;
3591
3592       use_stmt = USE_STMT (use_p);
3593
3594       if (is_gimple_debug (use_stmt))
3595         continue;
3596
3597       /* For now restrict this operations to single basic blocks.  In theory
3598          we would want to support sinking the multiplication in
3599          m = a*b;
3600          if ()
3601            ma = m + c;
3602          else
3603            d = m;
3604          to form a fma in the then block and sink the multiplication to the
3605          else block.  */
3606       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3607         return false;
3608
3609       if (!is_gimple_assign (use_stmt))
3610         return false;
3611
3612       use_code = gimple_assign_rhs_code (use_stmt);
3613
3614       /* A negate on the multiplication leads to FNMA.  */
3615       if (use_code == NEGATE_EXPR)
3616         {
3617           ssa_op_iter iter;
3618           use_operand_p usep;
3619
3620           result = gimple_assign_lhs (use_stmt);
3621
3622           /* Make sure the negate statement becomes dead with this
3623              single transformation.  */
3624           if (!single_imm_use (gimple_assign_lhs (use_stmt),
3625                                &use_p, &neguse_stmt))
3626             return false;
3627
3628           /* Make sure the multiplication isn't also used on that stmt.  */
3629           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3630             if (USE_FROM_PTR (usep) == mul_result)
3631               return false;
3632
3633           /* Re-validate.  */
3634           use_stmt = neguse_stmt;
3635           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3636             return false;
3637           if (!is_gimple_assign (use_stmt))
3638             return false;
3639
3640           use_code = gimple_assign_rhs_code (use_stmt);
3641           negate_p = true;
3642         }
3643
3644       switch (use_code)
3645         {
3646         case MINUS_EXPR:
3647           if (gimple_assign_rhs2 (use_stmt) == result)
3648             negate_p = !negate_p;
3649           break;
3650         case PLUS_EXPR:
3651           break;
3652         default:
3653           /* FMA can only be formed from PLUS and MINUS.  */
3654           return false;
3655         }
3656
3657       /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
3658          by a MULT_EXPR that we'll visit later, we might be able to
3659          get a more profitable match with fnma.
3660          OTOH, if we don't, a negate / fma pair has likely lower latency
3661          that a mult / subtract pair.  */
3662       if (use_code == MINUS_EXPR && !negate_p
3663           && gimple_assign_rhs1 (use_stmt) == result
3664           && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
3665           && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
3666         {
3667           tree rhs2 = gimple_assign_rhs2 (use_stmt);
3668
3669           if (TREE_CODE (rhs2) == SSA_NAME)
3670             {
3671               gimple *stmt2 = SSA_NAME_DEF_STMT (rhs2);
3672               if (has_single_use (rhs2)
3673                   && is_gimple_assign (stmt2)
3674                   && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3675               return false;
3676             }
3677         }
3678
3679       /* We can't handle a * b + a * b.  */
3680       if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
3681         return false;
3682
3683       /* While it is possible to validate whether or not the exact form
3684          that we've recognized is available in the backend, the assumption
3685          is that the transformation is never a loss.  For instance, suppose
3686          the target only has the plain FMA pattern available.  Consider
3687          a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
3688          is still two operations.  Consider -(a*b)-c -> fma(-a,b,-c): we
3689          still have 3 operations, but in the FMA form the two NEGs are
3690          independent and could be run in parallel.  */
3691     }
3692
3693   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3694     {
3695       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3696       enum tree_code use_code;
3697       tree addop, mulop1 = op1, result = mul_result;
3698       bool negate_p = false;
3699
3700       if (is_gimple_debug (use_stmt))
3701         continue;
3702
3703       use_code = gimple_assign_rhs_code (use_stmt);
3704       if (use_code == NEGATE_EXPR)
3705         {
3706           result = gimple_assign_lhs (use_stmt);
3707           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3708           gsi_remove (&gsi, true);
3709           release_defs (use_stmt);
3710
3711           use_stmt = neguse_stmt;
3712           gsi = gsi_for_stmt (use_stmt);
3713           use_code = gimple_assign_rhs_code (use_stmt);
3714           negate_p = true;
3715         }
3716
3717       if (gimple_assign_rhs1 (use_stmt) == result)
3718         {
3719           addop = gimple_assign_rhs2 (use_stmt);
3720           /* a * b - c -> a * b + (-c)  */
3721           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
3722             addop = force_gimple_operand_gsi (&gsi,
3723                                               build1 (NEGATE_EXPR,
3724                                                       type, addop),
3725                                               true, NULL_TREE, true,
3726                                               GSI_SAME_STMT);
3727         }
3728       else
3729         {
3730           addop = gimple_assign_rhs1 (use_stmt);
3731           /* a - b * c -> (-b) * c + a */
3732           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
3733             negate_p = !negate_p;
3734         }
3735
3736       if (negate_p)
3737         mulop1 = force_gimple_operand_gsi (&gsi,
3738                                            build1 (NEGATE_EXPR,
3739                                                    type, mulop1),
3740                                            true, NULL_TREE, true,
3741                                            GSI_SAME_STMT);
3742
3743       fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt),
3744                                       FMA_EXPR, mulop1, op2, addop);
3745       gsi_replace (&gsi, fma_stmt, true);
3746       widen_mul_stats.fmas_inserted++;
3747     }
3748
3749   return true;
3750 }
3751
3752
3753 /* Helper function of match_uaddsub_overflow.  Return 1
3754    if USE_STMT is unsigned overflow check ovf != 0 for
3755    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3756    and 0 otherwise.  */
3757
3758 static int
3759 uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt)
3760 {
3761   enum tree_code ccode = ERROR_MARK;
3762   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3763   if (gimple_code (use_stmt) == GIMPLE_COND)
3764     {
3765       ccode = gimple_cond_code (use_stmt);
3766       crhs1 = gimple_cond_lhs (use_stmt);
3767       crhs2 = gimple_cond_rhs (use_stmt);
3768     }
3769   else if (is_gimple_assign (use_stmt))
3770     {
3771       if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3772         {
3773           ccode = gimple_assign_rhs_code (use_stmt);
3774           crhs1 = gimple_assign_rhs1 (use_stmt);
3775           crhs2 = gimple_assign_rhs2 (use_stmt);
3776         }
3777       else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
3778         {
3779           tree cond = gimple_assign_rhs1 (use_stmt);
3780           if (COMPARISON_CLASS_P (cond))
3781             {
3782               ccode = TREE_CODE (cond);
3783               crhs1 = TREE_OPERAND (cond, 0);
3784               crhs2 = TREE_OPERAND (cond, 1);
3785             }
3786           else
3787             return 0;
3788         }
3789       else
3790         return 0;
3791     }
3792   else
3793     return 0;
3794
3795   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3796     return 0;
3797
3798   enum tree_code code = gimple_assign_rhs_code (stmt);
3799   tree lhs = gimple_assign_lhs (stmt);
3800   tree rhs1 = gimple_assign_rhs1 (stmt);
3801   tree rhs2 = gimple_assign_rhs2 (stmt);
3802
3803   switch (ccode)
3804     {
3805     case GT_EXPR:
3806     case LE_EXPR:
3807       /* r = a - b; r > a or r <= a
3808          r = a + b; a > r or a <= r or b > r or b <= r.  */
3809       if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3810           || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3811               && crhs2 == lhs))
3812         return ccode == GT_EXPR ? 1 : -1;
3813       break;
3814     case LT_EXPR:
3815     case GE_EXPR:
3816       /* r = a - b; a < r or a >= r
3817          r = a + b; r < a or r >= a or r < b or r >= b.  */
3818       if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3819           || (code == PLUS_EXPR && crhs1 == lhs
3820               && (crhs2 == rhs1 || crhs2 == rhs2)))
3821         return ccode == LT_EXPR ? 1 : -1;
3822       break;
3823     default:
3824       break;
3825     }
3826   return 0;
3827 }
3828
3829 /* Recognize for unsigned x
3830    x = y - z;
3831    if (x > y)
3832    where there are other uses of x and replace it with
3833    _7 = SUB_OVERFLOW (y, z);
3834    x = REALPART_EXPR <_7>;
3835    _8 = IMAGPART_EXPR <_7>;
3836    if (_8)
3837    and similarly for addition.  */
3838
3839 static bool
3840 match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3841                         enum tree_code code)
3842 {
3843   tree lhs = gimple_assign_lhs (stmt);
3844   tree type = TREE_TYPE (lhs);
3845   use_operand_p use_p;
3846   imm_use_iterator iter;
3847   bool use_seen = false;
3848   bool ovf_use_seen = false;
3849   gimple *use_stmt;
3850
3851   gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
3852   if (!INTEGRAL_TYPE_P (type)
3853       || !TYPE_UNSIGNED (type)
3854       || has_zero_uses (lhs)
3855       || has_single_use (lhs)
3856       || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
3857                         TYPE_MODE (type)) == CODE_FOR_nothing)
3858     return false;
3859
3860   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3861     {
3862       use_stmt = USE_STMT (use_p);
3863       if (is_gimple_debug (use_stmt))
3864         continue;
3865
3866       if (uaddsub_overflow_check_p (stmt, use_stmt))
3867         ovf_use_seen = true;
3868       else
3869         use_seen = true;
3870       if (ovf_use_seen && use_seen)
3871         break;
3872     }
3873
3874   if (!ovf_use_seen || !use_seen)
3875     return false;
3876
3877   tree ctype = build_complex_type (type);
3878   tree rhs1 = gimple_assign_rhs1 (stmt);
3879   tree rhs2 = gimple_assign_rhs2 (stmt);
3880   gcall *g = gimple_build_call_internal (code == PLUS_EXPR
3881                                          ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
3882                                          2, rhs1, rhs2);
3883   tree ctmp = make_ssa_name (ctype);
3884   gimple_call_set_lhs (g, ctmp);
3885   gsi_insert_before (gsi, g, GSI_SAME_STMT);
3886   gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR,
3887                                      build1 (REALPART_EXPR, type, ctmp));
3888   gsi_replace (gsi, g2, true);
3889   tree ovf = make_ssa_name (type);
3890   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
3891                             build1 (IMAGPART_EXPR, type, ctmp));
3892   gsi_insert_after (gsi, g2, GSI_NEW_STMT);
3893
3894   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3895     {
3896       if (is_gimple_debug (use_stmt))
3897         continue;
3898
3899       int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt);
3900       if (ovf_use == 0)
3901         continue;
3902       if (gimple_code (use_stmt) == GIMPLE_COND)
3903         {
3904           gcond *cond_stmt = as_a <gcond *> (use_stmt);
3905           gimple_cond_set_lhs (cond_stmt, ovf);
3906           gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
3907           gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3908         }
3909       else
3910         {
3911           gcc_checking_assert (is_gimple_assign (use_stmt));
3912           if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3913             {
3914               gimple_assign_set_rhs1 (use_stmt, ovf);
3915               gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
3916               gimple_assign_set_rhs_code (use_stmt,
3917                                           ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3918             }
3919           else
3920             {
3921               gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
3922                                    == COND_EXPR);
3923               tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
3924                                   boolean_type_node, ovf,
3925                                   build_int_cst (type, 0));
3926               gimple_assign_set_rhs1 (use_stmt, cond);
3927             }
3928         }
3929       update_stmt (use_stmt);
3930     }
3931   return true;
3932 }
3933
3934 /* Return true if target has support for divmod.  */
3935
3936 static bool
3937 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
3938 {
3939   /* If target supports hardware divmod insn, use it for divmod.  */
3940   if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
3941     return true;
3942
3943   /* Check if libfunc for divmod is available.  */
3944   rtx libfunc = optab_libfunc (divmod_optab, mode);
3945   if (libfunc != NULL_RTX)
3946     {
3947       /* If optab_handler exists for div_optab, perhaps in a wider mode,
3948          we don't want to use the libfunc even if it exists for given mode.  */
3949       machine_mode div_mode;
3950       FOR_EACH_MODE_FROM (div_mode, mode)
3951         if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
3952           return false;
3953
3954       return targetm.expand_divmod_libfunc != NULL;
3955     }
3956
3957   return false;
3958 }
3959
3960 /* Check if stmt is candidate for divmod transform.  */
3961
3962 static bool
3963 divmod_candidate_p (gassign *stmt)
3964 {
3965   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
3966   machine_mode mode = TYPE_MODE (type);
3967   optab divmod_optab, div_optab;
3968
3969   if (TYPE_UNSIGNED (type))
3970     {
3971       divmod_optab = udivmod_optab;
3972       div_optab = udiv_optab;
3973     }
3974   else
3975     {
3976       divmod_optab = sdivmod_optab;
3977       div_optab = sdiv_optab;
3978     }
3979
3980   tree op1 = gimple_assign_rhs1 (stmt);
3981   tree op2 = gimple_assign_rhs2 (stmt);
3982
3983   /* Disable the transform if either is a constant, since division-by-constant
3984      may have specialized expansion.  */
3985   if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
3986     return false;
3987
3988   /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
3989      expand using the [su]divv optabs.  */
3990   if (TYPE_OVERFLOW_TRAPS (type))
3991     return false;
3992
3993   if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
3994     return false;
3995
3996   return true;
3997 }
3998
3999 /* This function looks for:
4000    t1 = a TRUNC_DIV_EXPR b;
4001    t2 = a TRUNC_MOD_EXPR b;
4002    and transforms it to the following sequence:
4003    complex_tmp = DIVMOD (a, b);
4004    t1 = REALPART_EXPR(a);
4005    t2 = IMAGPART_EXPR(b);
4006    For conditions enabling the transform see divmod_candidate_p().
4007
4008    The pass has three parts:
4009    1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4010       other trunc_div_expr and trunc_mod_expr stmts.
4011    2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4012       to stmts vector.
4013    3) Insert DIVMOD call just before top_stmt and update entries in
4014       stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4015       IMAGPART_EXPR for mod).  */
4016
4017 static bool
4018 convert_to_divmod (gassign *stmt)
4019 {
4020   if (stmt_can_throw_internal (stmt)
4021       || !divmod_candidate_p (stmt))
4022     return false;
4023
4024   tree op1 = gimple_assign_rhs1 (stmt);
4025   tree op2 = gimple_assign_rhs2 (stmt);
4026
4027   imm_use_iterator use_iter;
4028   gimple *use_stmt;
4029   auto_vec<gimple *> stmts;
4030
4031   gimple *top_stmt = stmt;
4032   basic_block top_bb = gimple_bb (stmt);
4033
4034   /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4035      at-least stmt and possibly other trunc_div/trunc_mod stmts
4036      having same operands as stmt.  */
4037
4038   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
4039     {
4040       if (is_gimple_assign (use_stmt)
4041           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4042               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4043           && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
4044           && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
4045         {
4046           if (stmt_can_throw_internal (use_stmt))
4047             continue;
4048
4049           basic_block bb = gimple_bb (use_stmt);
4050
4051           if (bb == top_bb)
4052             {
4053               if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
4054                 top_stmt = use_stmt;
4055             }
4056           else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
4057             {
4058               top_bb = bb;
4059               top_stmt = use_stmt;
4060             }
4061         }
4062     }
4063
4064   tree top_op1 = gimple_assign_rhs1 (top_stmt);
4065   tree top_op2 = gimple_assign_rhs2 (top_stmt);
4066
4067   stmts.safe_push (top_stmt);
4068   bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
4069
4070   /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4071      to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4072      gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4073      2nd loop ends up adding at-least single trunc_mod_expr stmt.  */
4074
4075   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
4076     {
4077       if (is_gimple_assign (use_stmt)
4078           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4079               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4080           && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
4081           && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
4082         {
4083           if (use_stmt == top_stmt
4084               || stmt_can_throw_internal (use_stmt)
4085               || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
4086             continue;
4087
4088           stmts.safe_push (use_stmt);
4089           if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
4090             div_seen = true;
4091         }
4092     }
4093
4094   if (!div_seen)
4095     return false;
4096
4097   /* Part 3: Create libcall to internal fn DIVMOD:
4098      divmod_tmp = DIVMOD (op1, op2).  */
4099
4100   gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
4101   tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
4102                                  call_stmt, "divmod_tmp");
4103   gimple_call_set_lhs (call_stmt, res);
4104   /* We rejected throwing statements above.  */
4105   gimple_call_set_nothrow (call_stmt, true);
4106
4107   /* Insert the call before top_stmt.  */
4108   gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
4109   gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
4110
4111   widen_mul_stats.divmod_calls_inserted++;
4112
4113   /* Update all statements in stmts vector:
4114      lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4115      lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>.  */
4116
4117   for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
4118     {
4119       tree new_rhs;
4120
4121       switch (gimple_assign_rhs_code (use_stmt))
4122         {
4123           case TRUNC_DIV_EXPR:
4124             new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
4125             break;
4126
4127           case TRUNC_MOD_EXPR:
4128             new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
4129             break;
4130
4131           default:
4132             gcc_unreachable ();
4133         }
4134
4135       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
4136       gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
4137       update_stmt (use_stmt);
4138     }
4139
4140   return true;
4141 }
4142
4143 /* Find integer multiplications where the operands are extended from
4144    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4145    where appropriate.  */
4146
4147 namespace {
4148
4149 const pass_data pass_data_optimize_widening_mul =
4150 {
4151   GIMPLE_PASS, /* type */
4152   "widening_mul", /* name */
4153   OPTGROUP_NONE, /* optinfo_flags */
4154   TV_NONE, /* tv_id */
4155   PROP_ssa, /* properties_required */
4156   0, /* properties_provided */
4157   0, /* properties_destroyed */
4158   0, /* todo_flags_start */
4159   TODO_update_ssa, /* todo_flags_finish */
4160 };
4161
4162 class pass_optimize_widening_mul : public gimple_opt_pass
4163 {
4164 public:
4165   pass_optimize_widening_mul (gcc::context *ctxt)
4166     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
4167   {}
4168
4169   /* opt_pass methods: */
4170   virtual bool gate (function *)
4171     {
4172       return flag_expensive_optimizations && optimize;
4173     }
4174
4175   virtual unsigned int execute (function *);
4176
4177 }; // class pass_optimize_widening_mul
4178
4179 unsigned int
4180 pass_optimize_widening_mul::execute (function *fun)
4181 {
4182   basic_block bb;
4183   bool cfg_changed = false;
4184
4185   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
4186   calculate_dominance_info (CDI_DOMINATORS);
4187   renumber_gimple_stmt_uids ();
4188
4189   FOR_EACH_BB_FN (bb, fun)
4190     {
4191       gimple_stmt_iterator gsi;
4192
4193       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
4194         {
4195           gimple *stmt = gsi_stmt (gsi);
4196           enum tree_code code;
4197
4198           if (is_gimple_assign (stmt))
4199             {
4200               code = gimple_assign_rhs_code (stmt);
4201               switch (code)
4202                 {
4203                 case MULT_EXPR:
4204                   if (!convert_mult_to_widen (stmt, &gsi)
4205                       && !convert_expand_mult_copysign (stmt, &gsi)
4206                       && convert_mult_to_fma (stmt,
4207                                               gimple_assign_rhs1 (stmt),
4208                                               gimple_assign_rhs2 (stmt)))
4209                     {
4210                       gsi_remove (&gsi, true);
4211                       release_defs (stmt);
4212                       continue;
4213                     }
4214                   break;
4215
4216                 case PLUS_EXPR:
4217                 case MINUS_EXPR:
4218                   if (!convert_plusminus_to_widen (&gsi, stmt, code))
4219                     match_uaddsub_overflow (&gsi, stmt, code);
4220                   break;
4221
4222                 case TRUNC_MOD_EXPR:
4223                   convert_to_divmod (as_a<gassign *> (stmt));
4224                   break;
4225
4226                 default:;
4227                 }
4228             }
4229           else if (is_gimple_call (stmt)
4230                    && gimple_call_lhs (stmt))
4231             {
4232               tree fndecl = gimple_call_fndecl (stmt);
4233               if (fndecl
4234                   && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
4235                 {
4236                   switch (DECL_FUNCTION_CODE (fndecl))
4237                     {
4238                       case BUILT_IN_POWF:
4239                       case BUILT_IN_POW:
4240                       case BUILT_IN_POWL:
4241                         if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
4242                             && real_equal
4243                                  (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
4244                                   &dconst2)
4245                             && convert_mult_to_fma (stmt,
4246                                                     gimple_call_arg (stmt, 0),
4247                                                     gimple_call_arg (stmt, 0)))
4248                           {
4249                             unlink_stmt_vdef (stmt);
4250                             if (gsi_remove (&gsi, true)
4251                                 && gimple_purge_dead_eh_edges (bb))
4252                               cfg_changed = true;
4253                             release_defs (stmt);
4254                             continue;
4255                           }
4256                           break;
4257
4258                       default:;
4259                     }
4260                 }
4261             }
4262           gsi_next (&gsi);
4263         }
4264     }
4265
4266   statistics_counter_event (fun, "widening multiplications inserted",
4267                             widen_mul_stats.widen_mults_inserted);
4268   statistics_counter_event (fun, "widening maccs inserted",
4269                             widen_mul_stats.maccs_inserted);
4270   statistics_counter_event (fun, "fused multiply-adds inserted",
4271                             widen_mul_stats.fmas_inserted);
4272   statistics_counter_event (fun, "divmod calls inserted",
4273                             widen_mul_stats.divmod_calls_inserted);
4274
4275   return cfg_changed ? TODO_cleanup_cfg : 0;
4276 }
4277
4278 } // anon namespace
4279
4280 gimple_opt_pass *
4281 make_pass_optimize_widening_mul (gcc::context *ctxt)
4282 {
4283   return new pass_optimize_widening_mul (ctxt);
4284 }