1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2020 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
23 modulus = sqrt(x*x + y*y + z*z);
28 that can be optimized to
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 by the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
80 If we did this using domwalk.c, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
89 #include "coretypes.h"
96 #include "alloc-pool.h"
97 #include "tree-pass.h"
99 #include "optabs-tree.h"
100 #include "gimple-pretty-print.h"
102 #include "fold-const.h"
103 #include "gimple-fold.h"
104 #include "gimple-iterator.h"
105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
112 #include "internal-fn.h"
113 #include "case-cfn-macros.h"
114 #include "optabs-libfuncs.h"
116 #include "targhooks.h"
119 /* This structure represents one basic block that either computes a
120 division, or is a common dominator for basic block that compute a
123 /* The basic block represented by this structure. */
124 basic_block bb
= basic_block();
126 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
128 tree recip_def
= tree();
130 /* If non-NULL, the SSA_NAME holding the definition for a squared
131 reciprocal inserted in BB. */
132 tree square_recip_def
= tree();
134 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
135 was inserted in BB. */
136 gimple
*recip_def_stmt
= nullptr;
138 /* Pointer to a list of "struct occurrence"s for blocks dominated
140 struct occurrence
*children
= nullptr;
142 /* Pointer to the next "struct occurrence"s in the list of blocks
143 sharing a common dominator. */
144 struct occurrence
*next
= nullptr;
146 /* The number of divisions that are in BB before compute_merit. The
147 number of divisions that are in BB or post-dominate it after
149 int num_divisions
= 0;
151 /* True if the basic block has a division, false if it is a common
152 dominator for basic blocks that do. If it is false and trapping
153 math is active, BB is not a candidate for inserting a reciprocal. */
154 bool bb_has_division
= false;
156 /* Construct a struct occurrence for basic block BB, and whose
157 children list is headed by CHILDREN. */
158 occurrence (basic_block bb
, struct occurrence
*children
)
159 : bb (bb
), children (children
)
164 /* Destroy a struct occurrence and remove it from its basic block. */
170 /* Allocate memory for a struct occurrence from OCC_POOL. */
171 static void* operator new (size_t);
173 /* Return memory for a struct occurrence to OCC_POOL. */
174 static void operator delete (void*, size_t);
179 /* Number of 1.0/X ops inserted. */
182 /* Number of 1.0/FUNC ops inserted. */
188 /* Number of cexpi calls inserted. */
194 /* Number of widening multiplication ops inserted. */
195 int widen_mults_inserted
;
197 /* Number of integer multiply-and-accumulate ops inserted. */
200 /* Number of fp fused multiply-add ops inserted. */
203 /* Number of divmod calls inserted. */
204 int divmod_calls_inserted
;
207 /* The instance of "struct occurrence" representing the highest
208 interesting block in the dominator tree. */
209 static struct occurrence
*occ_head
;
211 /* Allocation pool for getting instances of "struct occurrence". */
212 static object_allocator
<occurrence
> *occ_pool
;
214 void* occurrence::operator new (size_t n
)
216 gcc_assert (n
== sizeof(occurrence
));
217 return occ_pool
->allocate_raw ();
220 void occurrence::operator delete (void *occ
, size_t n
)
222 gcc_assert (n
== sizeof(occurrence
));
223 occ_pool
->remove_raw (occ
);
226 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
227 list of "struct occurrence"s, one per basic block, having IDOM as
228 their common dominator.
230 We try to insert NEW_OCC as deep as possible in the tree, and we also
231 insert any other block that is a common dominator for BB and one
232 block already in the tree. */
235 insert_bb (struct occurrence
*new_occ
, basic_block idom
,
236 struct occurrence
**p_head
)
238 struct occurrence
*occ
, **p_occ
;
240 for (p_occ
= p_head
; (occ
= *p_occ
) != NULL
; )
242 basic_block bb
= new_occ
->bb
, occ_bb
= occ
->bb
;
243 basic_block dom
= nearest_common_dominator (CDI_DOMINATORS
, occ_bb
, bb
);
246 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
249 occ
->next
= new_occ
->children
;
250 new_occ
->children
= occ
;
252 /* Try the next block (it may as well be dominated by BB). */
255 else if (dom
== occ_bb
)
257 /* OCC_BB dominates BB. Tail recurse to look deeper. */
258 insert_bb (new_occ
, dom
, &occ
->children
);
262 else if (dom
!= idom
)
264 gcc_assert (!dom
->aux
);
266 /* There is a dominator between IDOM and BB, add it and make
267 two children out of NEW_OCC and OCC. First, remove OCC from
273 /* None of the previous blocks has DOM as a dominator: if we tail
274 recursed, we would reexamine them uselessly. Just switch BB with
275 DOM, and go on looking for blocks dominated by DOM. */
276 new_occ
= new occurrence (dom
, new_occ
);
281 /* Nothing special, go on with the next element. */
286 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
287 new_occ
->next
= *p_head
;
291 /* Register that we found a division in BB.
292 IMPORTANCE is a measure of how much weighting to give
293 that division. Use IMPORTANCE = 2 to register a single
294 division. If the division is going to be found multiple
295 times use 1 (as it is with squares). */
298 register_division_in (basic_block bb
, int importance
)
300 struct occurrence
*occ
;
302 occ
= (struct occurrence
*) bb
->aux
;
305 occ
= new occurrence (bb
, NULL
);
306 insert_bb (occ
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), &occ_head
);
309 occ
->bb_has_division
= true;
310 occ
->num_divisions
+= importance
;
314 /* Compute the number of divisions that postdominate each block in OCC and
318 compute_merit (struct occurrence
*occ
)
320 struct occurrence
*occ_child
;
321 basic_block dom
= occ
->bb
;
323 for (occ_child
= occ
->children
; occ_child
; occ_child
= occ_child
->next
)
326 if (occ_child
->children
)
327 compute_merit (occ_child
);
330 bb
= single_noncomplex_succ (dom
);
334 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, occ_child
->bb
))
335 occ
->num_divisions
+= occ_child
->num_divisions
;
340 /* Return whether USE_STMT is a floating-point division by DEF. */
342 is_division_by (gimple
*use_stmt
, tree def
)
344 return is_gimple_assign (use_stmt
)
345 && gimple_assign_rhs_code (use_stmt
) == RDIV_EXPR
346 && gimple_assign_rhs2 (use_stmt
) == def
347 /* Do not recognize x / x as valid division, as we are getting
348 confused later by replacing all immediate uses x in such
350 && gimple_assign_rhs1 (use_stmt
) != def
351 && !stmt_can_throw_internal (cfun
, use_stmt
);
354 /* Return TRUE if USE_STMT is a multiplication of DEF by A. */
356 is_mult_by (gimple
*use_stmt
, tree def
, tree a
)
358 if (gimple_code (use_stmt
) == GIMPLE_ASSIGN
359 && gimple_assign_rhs_code (use_stmt
) == MULT_EXPR
)
361 tree op0
= gimple_assign_rhs1 (use_stmt
);
362 tree op1
= gimple_assign_rhs2 (use_stmt
);
364 return (op0
== def
&& op1
== a
)
365 || (op0
== a
&& op1
== def
);
370 /* Return whether USE_STMT is DEF * DEF. */
372 is_square_of (gimple
*use_stmt
, tree def
)
374 return is_mult_by (use_stmt
, def
, def
);
377 /* Return whether USE_STMT is a floating-point division by
380 is_division_by_square (gimple
*use_stmt
, tree def
)
382 if (gimple_code (use_stmt
) == GIMPLE_ASSIGN
383 && gimple_assign_rhs_code (use_stmt
) == RDIV_EXPR
384 && gimple_assign_rhs1 (use_stmt
) != gimple_assign_rhs2 (use_stmt
)
385 && !stmt_can_throw_internal (cfun
, use_stmt
))
387 tree denominator
= gimple_assign_rhs2 (use_stmt
);
388 if (TREE_CODE (denominator
) == SSA_NAME
)
389 return is_square_of (SSA_NAME_DEF_STMT (denominator
), def
);
394 /* Walk the subset of the dominator tree rooted at OCC, setting the
395 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
396 the given basic block. The field may be left NULL, of course,
397 if it is not possible or profitable to do the optimization.
399 DEF_BSI is an iterator pointing at the statement defining DEF.
400 If RECIP_DEF is set, a dominator already has a computation that can
403 If should_insert_square_recip is set, then this also inserts
404 the square of the reciprocal immediately after the definition
405 of the reciprocal. */
408 insert_reciprocals (gimple_stmt_iterator
*def_gsi
, struct occurrence
*occ
,
409 tree def
, tree recip_def
, tree square_recip_def
,
410 int should_insert_square_recip
, int threshold
)
413 gassign
*new_stmt
, *new_square_stmt
;
414 gimple_stmt_iterator gsi
;
415 struct occurrence
*occ_child
;
418 && (occ
->bb_has_division
|| !flag_trapping_math
)
419 /* Divide by two as all divisions are counted twice in
421 && occ
->num_divisions
/ 2 >= threshold
)
423 /* Make a variable with the replacement and substitute it. */
424 type
= TREE_TYPE (def
);
425 recip_def
= create_tmp_reg (type
, "reciptmp");
426 new_stmt
= gimple_build_assign (recip_def
, RDIV_EXPR
,
427 build_one_cst (type
), def
);
429 if (should_insert_square_recip
)
431 square_recip_def
= create_tmp_reg (type
, "powmult_reciptmp");
432 new_square_stmt
= gimple_build_assign (square_recip_def
, MULT_EXPR
,
433 recip_def
, recip_def
);
436 if (occ
->bb_has_division
)
438 /* Case 1: insert before an existing division. */
439 gsi
= gsi_after_labels (occ
->bb
);
440 while (!gsi_end_p (gsi
)
441 && (!is_division_by (gsi_stmt (gsi
), def
))
442 && (!is_division_by_square (gsi_stmt (gsi
), def
)))
445 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
446 if (should_insert_square_recip
)
447 gsi_insert_before (&gsi
, new_square_stmt
, GSI_SAME_STMT
);
449 else if (def_gsi
&& occ
->bb
== gsi_bb (*def_gsi
))
451 /* Case 2: insert right after the definition. Note that this will
452 never happen if the definition statement can throw, because in
453 that case the sole successor of the statement's basic block will
454 dominate all the uses as well. */
455 gsi_insert_after (def_gsi
, new_stmt
, GSI_NEW_STMT
);
456 if (should_insert_square_recip
)
457 gsi_insert_after (def_gsi
, new_square_stmt
, GSI_NEW_STMT
);
461 /* Case 3: insert in a basic block not containing defs/uses. */
462 gsi
= gsi_after_labels (occ
->bb
);
463 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
464 if (should_insert_square_recip
)
465 gsi_insert_before (&gsi
, new_square_stmt
, GSI_SAME_STMT
);
468 reciprocal_stats
.rdivs_inserted
++;
470 occ
->recip_def_stmt
= new_stmt
;
473 occ
->recip_def
= recip_def
;
474 occ
->square_recip_def
= square_recip_def
;
475 for (occ_child
= occ
->children
; occ_child
; occ_child
= occ_child
->next
)
476 insert_reciprocals (def_gsi
, occ_child
, def
, recip_def
,
477 square_recip_def
, should_insert_square_recip
,
481 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
482 Take as argument the use for (x * x). */
484 replace_reciprocal_squares (use_operand_p use_p
)
486 gimple
*use_stmt
= USE_STMT (use_p
);
487 basic_block bb
= gimple_bb (use_stmt
);
488 struct occurrence
*occ
= (struct occurrence
*) bb
->aux
;
490 if (optimize_bb_for_speed_p (bb
) && occ
->square_recip_def
493 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
494 gimple_assign_set_rhs_code (use_stmt
, MULT_EXPR
);
495 gimple_assign_set_rhs2 (use_stmt
, occ
->square_recip_def
);
496 SET_USE (use_p
, occ
->square_recip_def
);
497 fold_stmt_inplace (&gsi
);
498 update_stmt (use_stmt
);
503 /* Replace the division at USE_P with a multiplication by the reciprocal, if
507 replace_reciprocal (use_operand_p use_p
)
509 gimple
*use_stmt
= USE_STMT (use_p
);
510 basic_block bb
= gimple_bb (use_stmt
);
511 struct occurrence
*occ
= (struct occurrence
*) bb
->aux
;
513 if (optimize_bb_for_speed_p (bb
)
514 && occ
->recip_def
&& use_stmt
!= occ
->recip_def_stmt
)
516 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
517 gimple_assign_set_rhs_code (use_stmt
, MULT_EXPR
);
518 SET_USE (use_p
, occ
->recip_def
);
519 fold_stmt_inplace (&gsi
);
520 update_stmt (use_stmt
);
525 /* Free OCC and return one more "struct occurrence" to be freed. */
527 static struct occurrence
*
528 free_bb (struct occurrence
*occ
)
530 struct occurrence
*child
, *next
;
532 /* First get the two pointers hanging off OCC. */
534 child
= occ
->children
;
537 /* Now ensure that we don't recurse unless it is necessary. */
543 next
= free_bb (next
);
549 /* Transform sequences like
559 depending on the uses of x, r1, r2. This removes one multiplication and
560 allows the sqrt and division operations to execute in parallel.
561 DEF_GSI is the gsi of the initial division by sqrt that defines
562 DEF (x in the example above). */
565 optimize_recip_sqrt (gimple_stmt_iterator
*def_gsi
, tree def
)
568 imm_use_iterator use_iter
;
569 gimple
*stmt
= gsi_stmt (*def_gsi
);
571 tree orig_sqrt_ssa_name
= gimple_assign_rhs2 (stmt
);
572 tree div_rhs1
= gimple_assign_rhs1 (stmt
);
574 if (TREE_CODE (orig_sqrt_ssa_name
) != SSA_NAME
575 || TREE_CODE (div_rhs1
) != REAL_CST
576 || !real_equal (&TREE_REAL_CST (div_rhs1
), &dconst1
))
580 = dyn_cast
<gcall
*> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name
));
582 if (!sqrt_stmt
|| !gimple_call_lhs (sqrt_stmt
))
585 switch (gimple_call_combined_fn (sqrt_stmt
))
594 tree a
= gimple_call_arg (sqrt_stmt
, 0);
596 /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
598 /* Statements that use x in x * x. */
599 auto_vec
<gimple
*> sqr_stmts
;
600 /* Statements that use x in a * x. */
601 auto_vec
<gimple
*> mult_stmts
;
602 bool has_other_use
= false;
603 bool mult_on_main_path
= false;
605 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, x
)
607 if (is_gimple_debug (use_stmt
))
609 if (is_square_of (use_stmt
, x
))
611 sqr_stmts
.safe_push (use_stmt
);
612 if (gimple_bb (use_stmt
) == gimple_bb (stmt
))
613 mult_on_main_path
= true;
615 else if (is_mult_by (use_stmt
, x
, a
))
617 mult_stmts
.safe_push (use_stmt
);
618 if (gimple_bb (use_stmt
) == gimple_bb (stmt
))
619 mult_on_main_path
= true;
622 has_other_use
= true;
625 /* In the x * x and a * x cases we just rewire stmt operands or
626 remove multiplications. In the has_other_use case we introduce
627 a multiplication so make sure we don't introduce a multiplication
628 on a path where there was none. */
629 if (has_other_use
&& !mult_on_main_path
)
632 if (sqr_stmts
.is_empty () && mult_stmts
.is_empty ())
635 /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
636 to be able to compose it from the sqr and mult cases. */
637 if (has_other_use
&& (sqr_stmts
.is_empty () || mult_stmts
.is_empty ()))
642 fprintf (dump_file
, "Optimizing reciprocal sqrt multiplications of\n");
643 print_gimple_stmt (dump_file
, sqrt_stmt
, 0, TDF_NONE
);
644 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
645 fprintf (dump_file
, "\n");
648 bool delete_div
= !has_other_use
;
649 tree sqr_ssa_name
= NULL_TREE
;
650 if (!sqr_stmts
.is_empty ())
652 /* r1 = x * x. Transform the original
659 = make_temp_ssa_name (TREE_TYPE (a
), NULL
, "recip_sqrt_sqr");
663 fprintf (dump_file
, "Replacing original division\n");
664 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
665 fprintf (dump_file
, "with new division\n");
668 = gimple_build_assign (sqr_ssa_name
, gimple_assign_rhs_code (stmt
),
669 gimple_assign_rhs1 (stmt
), a
);
670 gsi_insert_before (def_gsi
, stmt
, GSI_SAME_STMT
);
671 gsi_remove (def_gsi
, true);
672 *def_gsi
= gsi_for_stmt (stmt
);
673 fold_stmt_inplace (def_gsi
);
677 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
682 FOR_EACH_VEC_ELT (sqr_stmts
, i
, sqr_stmt
)
684 gimple_stmt_iterator gsi2
= gsi_for_stmt (sqr_stmt
);
685 gimple_assign_set_rhs_from_tree (&gsi2
, sqr_ssa_name
);
686 update_stmt (sqr_stmt
);
689 if (!mult_stmts
.is_empty ())
691 /* r2 = a * x. Transform this into:
692 r2 = t (The original sqrt (a)). */
694 gimple
*mult_stmt
= NULL
;
695 FOR_EACH_VEC_ELT (mult_stmts
, i
, mult_stmt
)
697 gimple_stmt_iterator gsi2
= gsi_for_stmt (mult_stmt
);
701 fprintf (dump_file
, "Replacing squaring multiplication\n");
702 print_gimple_stmt (dump_file
, mult_stmt
, 0, TDF_NONE
);
703 fprintf (dump_file
, "with assignment\n");
705 gimple_assign_set_rhs_from_tree (&gsi2
, orig_sqrt_ssa_name
);
706 fold_stmt_inplace (&gsi2
);
707 update_stmt (mult_stmt
);
709 print_gimple_stmt (dump_file
, mult_stmt
, 0, TDF_NONE
);
715 /* Using the two temporaries tmp1, tmp2 from above
716 the original x is now:
718 gcc_assert (orig_sqrt_ssa_name
);
719 gcc_assert (sqr_ssa_name
);
722 = gimple_build_assign (x
, MULT_EXPR
,
723 orig_sqrt_ssa_name
, sqr_ssa_name
);
724 gsi_insert_after (def_gsi
, new_stmt
, GSI_NEW_STMT
);
729 /* Remove the original division. */
730 gimple_stmt_iterator gsi2
= gsi_for_stmt (stmt
);
731 gsi_remove (&gsi2
, true);
735 release_ssa_name (x
);
738 /* Look for floating-point divisions among DEF's uses, and try to
739 replace them by multiplications with the reciprocal. Add
740 as many statements computing the reciprocal as needed.
742 DEF must be a GIMPLE register of a floating-point type. */
745 execute_cse_reciprocals_1 (gimple_stmt_iterator
*def_gsi
, tree def
)
747 use_operand_p use_p
, square_use_p
;
748 imm_use_iterator use_iter
, square_use_iter
;
750 struct occurrence
*occ
;
753 int square_recip_count
= 0;
754 int sqrt_recip_count
= 0;
756 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def
)) && TREE_CODE (def
) == SSA_NAME
);
757 threshold
= targetm
.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def
)));
759 /* If DEF is a square (x * x), count the number of divisions by x.
760 If there are more divisions by x than by (DEF * DEF), prefer to optimize
761 the reciprocal of x instead of DEF. This improves cases like:
766 Reciprocal optimization of x results in 1 division rather than 2 or 3. */
767 gimple
*def_stmt
= SSA_NAME_DEF_STMT (def
);
769 if (is_gimple_assign (def_stmt
)
770 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
771 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == SSA_NAME
772 && gimple_assign_rhs1 (def_stmt
) == gimple_assign_rhs2 (def_stmt
))
774 tree op0
= gimple_assign_rhs1 (def_stmt
);
776 FOR_EACH_IMM_USE_FAST (use_p
, use_iter
, op0
)
778 gimple
*use_stmt
= USE_STMT (use_p
);
779 if (is_division_by (use_stmt
, op0
))
784 FOR_EACH_IMM_USE_FAST (use_p
, use_iter
, def
)
786 gimple
*use_stmt
= USE_STMT (use_p
);
787 if (is_division_by (use_stmt
, def
))
789 register_division_in (gimple_bb (use_stmt
), 2);
793 if (is_square_of (use_stmt
, def
))
795 square_def
= gimple_assign_lhs (use_stmt
);
796 FOR_EACH_IMM_USE_FAST (square_use_p
, square_use_iter
, square_def
)
798 gimple
*square_use_stmt
= USE_STMT (square_use_p
);
799 if (is_division_by (square_use_stmt
, square_def
))
801 /* This is executed twice for each division by a square. */
802 register_division_in (gimple_bb (square_use_stmt
), 1);
803 square_recip_count
++;
809 /* Square reciprocals were counted twice above. */
810 square_recip_count
/= 2;
812 /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
813 if (sqrt_recip_count
> square_recip_count
)
816 /* Do the expensive part only if we can hope to optimize something. */
817 if (count
+ square_recip_count
>= threshold
&& count
>= 1)
820 for (occ
= occ_head
; occ
; occ
= occ
->next
)
823 insert_reciprocals (def_gsi
, occ
, def
, NULL
, NULL
,
824 square_recip_count
, threshold
);
827 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, def
)
829 if (is_division_by (use_stmt
, def
))
831 FOR_EACH_IMM_USE_ON_STMT (use_p
, use_iter
)
832 replace_reciprocal (use_p
);
834 else if (square_recip_count
> 0 && is_square_of (use_stmt
, def
))
836 FOR_EACH_IMM_USE_ON_STMT (use_p
, use_iter
)
838 /* Find all uses of the square that are divisions and
839 * replace them by multiplications with the inverse. */
840 imm_use_iterator square_iterator
;
841 gimple
*powmult_use_stmt
= USE_STMT (use_p
);
842 tree powmult_def_name
= gimple_assign_lhs (powmult_use_stmt
);
844 FOR_EACH_IMM_USE_STMT (powmult_use_stmt
,
845 square_iterator
, powmult_def_name
)
846 FOR_EACH_IMM_USE_ON_STMT (square_use_p
, square_iterator
)
848 gimple
*powmult_use_stmt
= USE_STMT (square_use_p
);
849 if (is_division_by (powmult_use_stmt
, powmult_def_name
))
850 replace_reciprocal_squares (square_use_p
);
858 for (occ
= occ_head
; occ
; )
864 /* Return an internal function that implements the reciprocal of CALL,
865 or IFN_LAST if there is no such function that the target supports. */
868 internal_fn_reciprocal (gcall
*call
)
872 switch (gimple_call_combined_fn (call
))
883 tree_pair types
= direct_internal_fn_types (ifn
, call
);
884 if (!direct_internal_fn_supported_p (ifn
, types
, OPTIMIZE_FOR_SPEED
))
890 /* Go through all the floating-point SSA_NAMEs, and call
891 execute_cse_reciprocals_1 on each of them. */
894 const pass_data pass_data_cse_reciprocals
=
896 GIMPLE_PASS
, /* type */
898 OPTGROUP_NONE
, /* optinfo_flags */
899 TV_TREE_RECIP
, /* tv_id */
900 PROP_ssa
, /* properties_required */
901 0, /* properties_provided */
902 0, /* properties_destroyed */
903 0, /* todo_flags_start */
904 TODO_update_ssa
, /* todo_flags_finish */
907 class pass_cse_reciprocals
: public gimple_opt_pass
910 pass_cse_reciprocals (gcc::context
*ctxt
)
911 : gimple_opt_pass (pass_data_cse_reciprocals
, ctxt
)
914 /* opt_pass methods: */
915 virtual bool gate (function
*) { return optimize
&& flag_reciprocal_math
; }
916 virtual unsigned int execute (function
*);
918 }; // class pass_cse_reciprocals
921 pass_cse_reciprocals::execute (function
*fun
)
926 occ_pool
= new object_allocator
<occurrence
> ("dominators for recip");
928 memset (&reciprocal_stats
, 0, sizeof (reciprocal_stats
));
929 calculate_dominance_info (CDI_DOMINATORS
);
930 calculate_dominance_info (CDI_POST_DOMINATORS
);
933 FOR_EACH_BB_FN (bb
, fun
)
934 gcc_assert (!bb
->aux
);
936 for (arg
= DECL_ARGUMENTS (fun
->decl
); arg
; arg
= DECL_CHAIN (arg
))
937 if (FLOAT_TYPE_P (TREE_TYPE (arg
))
938 && is_gimple_reg (arg
))
940 tree name
= ssa_default_def (fun
, arg
);
942 execute_cse_reciprocals_1 (NULL
, name
);
945 FOR_EACH_BB_FN (bb
, fun
)
949 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
952 gphi
*phi
= gsi
.phi ();
953 def
= PHI_RESULT (phi
);
954 if (! virtual_operand_p (def
)
955 && FLOAT_TYPE_P (TREE_TYPE (def
)))
956 execute_cse_reciprocals_1 (NULL
, def
);
959 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
962 gimple
*stmt
= gsi_stmt (gsi
);
964 if (gimple_has_lhs (stmt
)
965 && (def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
)) != NULL
966 && FLOAT_TYPE_P (TREE_TYPE (def
))
967 && TREE_CODE (def
) == SSA_NAME
)
969 execute_cse_reciprocals_1 (&gsi
, def
);
970 stmt
= gsi_stmt (gsi
);
971 if (flag_unsafe_math_optimizations
972 && is_gimple_assign (stmt
)
973 && gimple_assign_lhs (stmt
) == def
974 && !stmt_can_throw_internal (cfun
, stmt
)
975 && gimple_assign_rhs_code (stmt
) == RDIV_EXPR
)
976 optimize_recip_sqrt (&gsi
, def
);
980 if (optimize_bb_for_size_p (bb
))
983 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
984 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
987 gimple
*stmt
= gsi_stmt (gsi
);
989 if (is_gimple_assign (stmt
)
990 && gimple_assign_rhs_code (stmt
) == RDIV_EXPR
)
992 tree arg1
= gimple_assign_rhs2 (stmt
);
995 if (TREE_CODE (arg1
) != SSA_NAME
)
998 stmt1
= SSA_NAME_DEF_STMT (arg1
);
1000 if (is_gimple_call (stmt1
)
1001 && gimple_call_lhs (stmt1
))
1004 imm_use_iterator ui
;
1005 use_operand_p use_p
;
1006 tree fndecl
= NULL_TREE
;
1008 gcall
*call
= as_a
<gcall
*> (stmt1
);
1009 internal_fn ifn
= internal_fn_reciprocal (call
);
1010 if (ifn
== IFN_LAST
)
1012 fndecl
= gimple_call_fndecl (call
);
1014 || !fndecl_built_in_p (fndecl
, BUILT_IN_MD
))
1016 fndecl
= targetm
.builtin_reciprocal (fndecl
);
1021 /* Check that all uses of the SSA name are divisions,
1022 otherwise replacing the defining statement will do
1025 FOR_EACH_IMM_USE_FAST (use_p
, ui
, arg1
)
1027 gimple
*stmt2
= USE_STMT (use_p
);
1028 if (is_gimple_debug (stmt2
))
1030 if (!is_gimple_assign (stmt2
)
1031 || gimple_assign_rhs_code (stmt2
) != RDIV_EXPR
1032 || gimple_assign_rhs1 (stmt2
) == arg1
1033 || gimple_assign_rhs2 (stmt2
) != arg1
)
1042 gimple_replace_ssa_lhs (call
, arg1
);
1043 if (gimple_call_internal_p (call
) != (ifn
!= IFN_LAST
))
1045 auto_vec
<tree
, 4> args
;
1046 for (unsigned int i
= 0;
1047 i
< gimple_call_num_args (call
); i
++)
1048 args
.safe_push (gimple_call_arg (call
, i
));
1050 if (ifn
== IFN_LAST
)
1051 stmt2
= gimple_build_call_vec (fndecl
, args
);
1053 stmt2
= gimple_build_call_internal_vec (ifn
, args
);
1054 gimple_call_set_lhs (stmt2
, arg1
);
1055 gimple_move_vops (stmt2
, call
);
1056 gimple_call_set_nothrow (stmt2
,
1057 gimple_call_nothrow_p (call
));
1058 gimple_stmt_iterator gsi2
= gsi_for_stmt (call
);
1059 gsi_replace (&gsi2
, stmt2
, true);
1063 if (ifn
== IFN_LAST
)
1064 gimple_call_set_fndecl (call
, fndecl
);
1066 gimple_call_set_internal_fn (call
, ifn
);
1069 reciprocal_stats
.rfuncs_inserted
++;
1071 FOR_EACH_IMM_USE_STMT (stmt
, ui
, arg1
)
1073 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1074 gimple_assign_set_rhs_code (stmt
, MULT_EXPR
);
1075 fold_stmt_inplace (&gsi
);
1083 statistics_counter_event (fun
, "reciprocal divs inserted",
1084 reciprocal_stats
.rdivs_inserted
);
1085 statistics_counter_event (fun
, "reciprocal functions inserted",
1086 reciprocal_stats
.rfuncs_inserted
);
1088 free_dominance_info (CDI_DOMINATORS
);
1089 free_dominance_info (CDI_POST_DOMINATORS
);
1097 make_pass_cse_reciprocals (gcc::context
*ctxt
)
1099 return new pass_cse_reciprocals (ctxt
);
1102 /* Records an occurrence at statement USE_STMT in the vector of trees
1103 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1104 is not yet initialized. Returns true if the occurrence was pushed on
1105 the vector. Adjusts *TOP_BB to be the basic block dominating all
1106 statements in the vector. */
1109 maybe_record_sincos (vec
<gimple
*> *stmts
,
1110 basic_block
*top_bb
, gimple
*use_stmt
)
1112 basic_block use_bb
= gimple_bb (use_stmt
);
1114 && (*top_bb
== use_bb
1115 || dominated_by_p (CDI_DOMINATORS
, use_bb
, *top_bb
)))
1116 stmts
->safe_push (use_stmt
);
1118 || dominated_by_p (CDI_DOMINATORS
, *top_bb
, use_bb
))
1120 stmts
->safe_push (use_stmt
);
1129 /* Look for sin, cos and cexpi calls with the same argument NAME and
1130 create a single call to cexpi CSEing the result in this case.
1131 We first walk over all immediate uses of the argument collecting
1132 statements that we can CSE in a vector and in a second pass replace
1133 the statement rhs with a REALPART or IMAGPART expression on the
1134 result of the cexpi call we insert before the use statement that
1135 dominates all other candidates. */
1138 execute_cse_sincos_1 (tree name
)
1140 gimple_stmt_iterator gsi
;
1141 imm_use_iterator use_iter
;
1142 tree fndecl
, res
, type
= NULL_TREE
;
1143 gimple
*def_stmt
, *use_stmt
, *stmt
;
1144 int seen_cos
= 0, seen_sin
= 0, seen_cexpi
= 0;
1145 auto_vec
<gimple
*> stmts
;
1146 basic_block top_bb
= NULL
;
1148 bool cfg_changed
= false;
1150 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, name
)
1152 if (gimple_code (use_stmt
) != GIMPLE_CALL
1153 || !gimple_call_lhs (use_stmt
))
1156 switch (gimple_call_combined_fn (use_stmt
))
1159 seen_cos
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1163 seen_sin
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1167 seen_cexpi
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1174 tree t
= mathfn_built_in_type (gimple_call_combined_fn (use_stmt
));
1178 t
= TREE_TYPE (name
);
1180 /* This checks that NAME has the right type in the first round,
1181 and, in subsequent rounds, that the built_in type is the same
1182 type, or a compatible type. */
1183 if (type
!= t
&& !types_compatible_p (type
, t
))
1186 if (seen_cos
+ seen_sin
+ seen_cexpi
<= 1)
1189 /* Simply insert cexpi at the beginning of top_bb but not earlier than
1190 the name def statement. */
1191 fndecl
= mathfn_built_in (type
, BUILT_IN_CEXPI
);
1194 stmt
= gimple_build_call (fndecl
, 1, name
);
1195 res
= make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl
)), stmt
, "sincostmp");
1196 gimple_call_set_lhs (stmt
, res
);
1198 def_stmt
= SSA_NAME_DEF_STMT (name
);
1199 if (!SSA_NAME_IS_DEFAULT_DEF (name
)
1200 && gimple_code (def_stmt
) != GIMPLE_PHI
1201 && gimple_bb (def_stmt
) == top_bb
)
1203 gsi
= gsi_for_stmt (def_stmt
);
1204 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
1208 gsi
= gsi_after_labels (top_bb
);
1209 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1211 sincos_stats
.inserted
++;
1213 /* And adjust the recorded old call sites. */
1214 for (i
= 0; stmts
.iterate (i
, &use_stmt
); ++i
)
1218 switch (gimple_call_combined_fn (use_stmt
))
1221 rhs
= fold_build1 (REALPART_EXPR
, type
, res
);
1225 rhs
= fold_build1 (IMAGPART_EXPR
, type
, res
);
1236 /* Replace call with a copy. */
1237 stmt
= gimple_build_assign (gimple_call_lhs (use_stmt
), rhs
);
1239 gsi
= gsi_for_stmt (use_stmt
);
1240 gsi_replace (&gsi
, stmt
, true);
1241 if (gimple_purge_dead_eh_edges (gimple_bb (stmt
)))
1248 /* To evaluate powi(x,n), the floating point value x raised to the
1249 constant integer exponent n, we use a hybrid algorithm that
1250 combines the "window method" with look-up tables. For an
1251 introduction to exponentiation algorithms and "addition chains",
1252 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1253 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1254 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1255 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1257 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1258 multiplications to inline before calling the system library's pow
1259 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1260 so this default never requires calling pow, powf or powl. */
1262 #ifndef POWI_MAX_MULTS
1263 #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1266 /* The size of the "optimal power tree" lookup table. All
1267 exponents less than this value are simply looked up in the
1268 powi_table below. This threshold is also used to size the
1269 cache of pseudo registers that hold intermediate results. */
1270 #define POWI_TABLE_SIZE 256
1272 /* The size, in bits of the window, used in the "window method"
1273 exponentiation algorithm. This is equivalent to a radix of
1274 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1275 #define POWI_WINDOW_SIZE 3
1277 /* The following table is an efficient representation of an
1278 "optimal power tree". For each value, i, the corresponding
1279 value, j, in the table states than an optimal evaluation
1280 sequence for calculating pow(x,i) can be found by evaluating
1281 pow(x,j)*pow(x,i-j). An optimal power tree for the first
1282 100 integers is given in Knuth's "Seminumerical algorithms". */
1284 static const unsigned char powi_table
[POWI_TABLE_SIZE
] =
1286 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1287 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1288 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1289 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1290 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1291 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1292 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1293 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1294 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1295 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1296 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1297 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1298 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1299 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1300 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1301 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1302 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1303 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1304 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1305 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1306 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1307 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1308 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1309 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1310 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1311 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1312 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1313 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1314 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1315 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1316 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1317 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1321 /* Return the number of multiplications required to calculate
1322 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1323 subroutine of powi_cost. CACHE is an array indicating
1324 which exponents have already been calculated. */
1327 powi_lookup_cost (unsigned HOST_WIDE_INT n
, bool *cache
)
1329 /* If we've already calculated this exponent, then this evaluation
1330 doesn't require any additional multiplications. */
1335 return powi_lookup_cost (n
- powi_table
[n
], cache
)
1336 + powi_lookup_cost (powi_table
[n
], cache
) + 1;
1339 /* Return the number of multiplications required to calculate
1340 powi(x,n) for an arbitrary x, given the exponent N. This
1341 function needs to be kept in sync with powi_as_mults below. */
1344 powi_cost (HOST_WIDE_INT n
)
1346 bool cache
[POWI_TABLE_SIZE
];
1347 unsigned HOST_WIDE_INT digit
;
1348 unsigned HOST_WIDE_INT val
;
1354 /* Ignore the reciprocal when calculating the cost. */
1355 val
= (n
< 0) ? -n
: n
;
1357 /* Initialize the exponent cache. */
1358 memset (cache
, 0, POWI_TABLE_SIZE
* sizeof (bool));
1363 while (val
>= POWI_TABLE_SIZE
)
1367 digit
= val
& ((1 << POWI_WINDOW_SIZE
) - 1);
1368 result
+= powi_lookup_cost (digit
, cache
)
1369 + POWI_WINDOW_SIZE
+ 1;
1370 val
>>= POWI_WINDOW_SIZE
;
1379 return result
+ powi_lookup_cost (val
, cache
);
1382 /* Recursive subroutine of powi_as_mults. This function takes the
1383 array, CACHE, of already calculated exponents and an exponent N and
1384 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1387 powi_as_mults_1 (gimple_stmt_iterator
*gsi
, location_t loc
, tree type
,
1388 HOST_WIDE_INT n
, tree
*cache
)
1390 tree op0
, op1
, ssa_target
;
1391 unsigned HOST_WIDE_INT digit
;
1394 if (n
< POWI_TABLE_SIZE
&& cache
[n
])
1397 ssa_target
= make_temp_ssa_name (type
, NULL
, "powmult");
1399 if (n
< POWI_TABLE_SIZE
)
1401 cache
[n
] = ssa_target
;
1402 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
- powi_table
[n
], cache
);
1403 op1
= powi_as_mults_1 (gsi
, loc
, type
, powi_table
[n
], cache
);
1407 digit
= n
& ((1 << POWI_WINDOW_SIZE
) - 1);
1408 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
- digit
, cache
);
1409 op1
= powi_as_mults_1 (gsi
, loc
, type
, digit
, cache
);
1413 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
>> 1, cache
);
1417 mult_stmt
= gimple_build_assign (ssa_target
, MULT_EXPR
, op0
, op1
);
1418 gimple_set_location (mult_stmt
, loc
);
1419 gsi_insert_before (gsi
, mult_stmt
, GSI_SAME_STMT
);
1424 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1425 This function needs to be kept in sync with powi_cost above. */
1428 powi_as_mults (gimple_stmt_iterator
*gsi
, location_t loc
,
1429 tree arg0
, HOST_WIDE_INT n
)
1431 tree cache
[POWI_TABLE_SIZE
], result
, type
= TREE_TYPE (arg0
);
1436 return build_real (type
, dconst1
);
1438 memset (cache
, 0, sizeof (cache
));
1441 result
= powi_as_mults_1 (gsi
, loc
, type
, (n
< 0) ? -n
: n
, cache
);
1445 /* If the original exponent was negative, reciprocate the result. */
1446 target
= make_temp_ssa_name (type
, NULL
, "powmult");
1447 div_stmt
= gimple_build_assign (target
, RDIV_EXPR
,
1448 build_real (type
, dconst1
), result
);
1449 gimple_set_location (div_stmt
, loc
);
1450 gsi_insert_before (gsi
, div_stmt
, GSI_SAME_STMT
);
1455 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1456 location info LOC. If the arguments are appropriate, create an
1457 equivalent sequence of statements prior to GSI using an optimal
1458 number of multiplications, and return an expession holding the
1462 gimple_expand_builtin_powi (gimple_stmt_iterator
*gsi
, location_t loc
,
1463 tree arg0
, HOST_WIDE_INT n
)
1465 /* Avoid largest negative number. */
1467 && ((n
>= -1 && n
<= 2)
1468 || (optimize_function_for_speed_p (cfun
)
1469 && powi_cost (n
) <= POWI_MAX_MULTS
)))
1470 return powi_as_mults (gsi
, loc
, arg0
, n
);
1475 /* Build a gimple call statement that calls FN with argument ARG.
1476 Set the lhs of the call statement to a fresh SSA name. Insert the
1477 statement prior to GSI's current position, and return the fresh
1481 build_and_insert_call (gimple_stmt_iterator
*gsi
, location_t loc
,
1487 call_stmt
= gimple_build_call (fn
, 1, arg
);
1488 ssa_target
= make_temp_ssa_name (TREE_TYPE (arg
), NULL
, "powroot");
1489 gimple_set_lhs (call_stmt
, ssa_target
);
1490 gimple_set_location (call_stmt
, loc
);
1491 gsi_insert_before (gsi
, call_stmt
, GSI_SAME_STMT
);
1496 /* Build a gimple binary operation with the given CODE and arguments
1497 ARG0, ARG1, assigning the result to a new SSA name for variable
1498 TARGET. Insert the statement prior to GSI's current position, and
1499 return the fresh SSA name.*/
1502 build_and_insert_binop (gimple_stmt_iterator
*gsi
, location_t loc
,
1503 const char *name
, enum tree_code code
,
1504 tree arg0
, tree arg1
)
1506 tree result
= make_temp_ssa_name (TREE_TYPE (arg0
), NULL
, name
);
1507 gassign
*stmt
= gimple_build_assign (result
, code
, arg0
, arg1
);
1508 gimple_set_location (stmt
, loc
);
1509 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1513 /* Build a gimple reference operation with the given CODE and argument
1514 ARG, assigning the result to a new SSA name of TYPE with NAME.
1515 Insert the statement prior to GSI's current position, and return
1516 the fresh SSA name. */
1519 build_and_insert_ref (gimple_stmt_iterator
*gsi
, location_t loc
, tree type
,
1520 const char *name
, enum tree_code code
, tree arg0
)
1522 tree result
= make_temp_ssa_name (type
, NULL
, name
);
1523 gimple
*stmt
= gimple_build_assign (result
, build1 (code
, type
, arg0
));
1524 gimple_set_location (stmt
, loc
);
1525 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1529 /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1530 prior to GSI's current position, and return the fresh SSA name. */
1533 build_and_insert_cast (gimple_stmt_iterator
*gsi
, location_t loc
,
1534 tree type
, tree val
)
1536 tree result
= make_ssa_name (type
);
1537 gassign
*stmt
= gimple_build_assign (result
, NOP_EXPR
, val
);
1538 gimple_set_location (stmt
, loc
);
1539 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1543 struct pow_synth_sqrt_info
1546 unsigned int deepest
;
1547 unsigned int num_mults
;
1550 /* Return true iff the real value C can be represented as a
1551 sum of powers of 0.5 up to N. That is:
1552 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1553 Record in INFO the various parameters of the synthesis algorithm such
1554 as the factors a[i], the maximum 0.5 power and the number of
1555 multiplications that will be required. */
1558 representable_as_half_series_p (REAL_VALUE_TYPE c
, unsigned n
,
1559 struct pow_synth_sqrt_info
*info
)
1561 REAL_VALUE_TYPE factor
= dconsthalf
;
1562 REAL_VALUE_TYPE remainder
= c
;
1565 info
->num_mults
= 0;
1566 memset (info
->factors
, 0, n
* sizeof (bool));
1568 for (unsigned i
= 0; i
< n
; i
++)
1570 REAL_VALUE_TYPE res
;
1572 /* If something inexact happened bail out now. */
1573 if (real_arithmetic (&res
, MINUS_EXPR
, &remainder
, &factor
))
1576 /* We have hit zero. The number is representable as a sum
1577 of powers of 0.5. */
1578 if (real_equal (&res
, &dconst0
))
1580 info
->factors
[i
] = true;
1581 info
->deepest
= i
+ 1;
1584 else if (!REAL_VALUE_NEGATIVE (res
))
1587 info
->factors
[i
] = true;
1591 info
->factors
[i
] = false;
1593 real_arithmetic (&factor
, MULT_EXPR
, &factor
, &dconsthalf
);
1598 /* Return the tree corresponding to FN being applied
1599 to ARG N times at GSI and LOC.
1600 Look up previous results from CACHE if need be.
1601 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1604 get_fn_chain (tree arg
, unsigned int n
, gimple_stmt_iterator
*gsi
,
1605 tree fn
, location_t loc
, tree
*cache
)
1607 tree res
= cache
[n
];
1610 tree prev
= get_fn_chain (arg
, n
- 1, gsi
, fn
, loc
, cache
);
1611 res
= build_and_insert_call (gsi
, loc
, fn
, prev
);
1618 /* Print to STREAM the repeated application of function FNAME to ARG
1619 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1623 print_nested_fn (FILE* stream
, const char *fname
, const char* arg
,
1627 fprintf (stream
, "%s", arg
);
1630 fprintf (stream
, "%s (", fname
);
1631 print_nested_fn (stream
, fname
, arg
, n
- 1);
1632 fprintf (stream
, ")");
1636 /* Print to STREAM the fractional sequence of sqrt chains
1637 applied to ARG, described by INFO. Used for the dump file. */
1640 dump_fractional_sqrt_sequence (FILE *stream
, const char *arg
,
1641 struct pow_synth_sqrt_info
*info
)
1643 for (unsigned int i
= 0; i
< info
->deepest
; i
++)
1645 bool is_set
= info
->factors
[i
];
1648 print_nested_fn (stream
, "sqrt", arg
, i
+ 1);
1649 if (i
!= info
->deepest
- 1)
1650 fprintf (stream
, " * ");
1655 /* Print to STREAM a representation of raising ARG to an integer
1656 power N. Used for the dump file. */
1659 dump_integer_part (FILE *stream
, const char* arg
, HOST_WIDE_INT n
)
1662 fprintf (stream
, "powi (%s, " HOST_WIDE_INT_PRINT_DEC
")", arg
, n
);
1664 fprintf (stream
, "%s", arg
);
1667 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1668 square roots. Place at GSI and LOC. Limit the maximum depth
1669 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1670 result of the expanded sequence or NULL_TREE if the expansion failed.
1672 This routine assumes that ARG1 is a real number with a fractional part
1673 (the integer exponent case will have been handled earlier in
1674 gimple_expand_builtin_pow).
1677 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1678 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1679 FRAC_PART == ARG1 - WHOLE_PART:
1680 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1681 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1682 if it can be expressed as such, that is if FRAC_PART satisfies:
1683 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1684 where integer a[i] is either 0 or 1.
1687 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1688 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1690 For ARG1 < 0.0 there are two approaches:
1691 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1692 is calculated as above.
1695 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1696 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1698 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1699 FRAC_PART := ARG1 - WHOLE_PART
1700 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1702 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1703 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1705 For ARG1 < 0.0 we choose between (A) and (B) depending on
1706 how many multiplications we'd have to do.
1707 So, for the example in (B): POW (x, -5.875), if we were to
1708 follow algorithm (A) we would produce:
1709 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1710 which contains more multiplications than approach (B).
1712 Hopefully, this approach will eliminate potentially expensive POW library
1713 calls when unsafe floating point math is enabled and allow the compiler to
1714 further optimise the multiplies, square roots and divides produced by this
1718 expand_pow_as_sqrts (gimple_stmt_iterator
*gsi
, location_t loc
,
1719 tree arg0
, tree arg1
, HOST_WIDE_INT max_depth
)
1721 tree type
= TREE_TYPE (arg0
);
1722 machine_mode mode
= TYPE_MODE (type
);
1723 tree sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
1724 bool one_over
= true;
1729 if (TREE_CODE (arg1
) != REAL_CST
)
1732 REAL_VALUE_TYPE exp_init
= TREE_REAL_CST (arg1
);
1734 gcc_assert (max_depth
> 0);
1735 tree
*cache
= XALLOCAVEC (tree
, max_depth
+ 1);
1737 struct pow_synth_sqrt_info synth_info
;
1738 synth_info
.factors
= XALLOCAVEC (bool, max_depth
+ 1);
1739 synth_info
.deepest
= 0;
1740 synth_info
.num_mults
= 0;
1742 bool neg_exp
= REAL_VALUE_NEGATIVE (exp_init
);
1743 REAL_VALUE_TYPE exp
= real_value_abs (&exp_init
);
1745 /* The whole and fractional parts of exp. */
1746 REAL_VALUE_TYPE whole_part
;
1747 REAL_VALUE_TYPE frac_part
;
1749 real_floor (&whole_part
, mode
, &exp
);
1750 real_arithmetic (&frac_part
, MINUS_EXPR
, &exp
, &whole_part
);
1753 REAL_VALUE_TYPE ceil_whole
= dconst0
;
1754 REAL_VALUE_TYPE ceil_fract
= dconst0
;
1758 real_ceil (&ceil_whole
, mode
, &exp
);
1759 real_arithmetic (&ceil_fract
, MINUS_EXPR
, &ceil_whole
, &exp
);
1762 if (!representable_as_half_series_p (frac_part
, max_depth
, &synth_info
))
1765 /* Check whether it's more profitable to not use 1.0 / ... */
1768 struct pow_synth_sqrt_info alt_synth_info
;
1769 alt_synth_info
.factors
= XALLOCAVEC (bool, max_depth
+ 1);
1770 alt_synth_info
.deepest
= 0;
1771 alt_synth_info
.num_mults
= 0;
1773 if (representable_as_half_series_p (ceil_fract
, max_depth
,
1775 && alt_synth_info
.deepest
<= synth_info
.deepest
1776 && alt_synth_info
.num_mults
< synth_info
.num_mults
)
1778 whole_part
= ceil_whole
;
1779 frac_part
= ceil_fract
;
1780 synth_info
.deepest
= alt_synth_info
.deepest
;
1781 synth_info
.num_mults
= alt_synth_info
.num_mults
;
1782 memcpy (synth_info
.factors
, alt_synth_info
.factors
,
1783 (max_depth
+ 1) * sizeof (bool));
1788 HOST_WIDE_INT n
= real_to_integer (&whole_part
);
1789 REAL_VALUE_TYPE cint
;
1790 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
1792 if (!real_identical (&whole_part
, &cint
))
1795 if (powi_cost (n
) + synth_info
.num_mults
> POWI_MAX_MULTS
)
1798 memset (cache
, 0, (max_depth
+ 1) * sizeof (tree
));
1800 tree integer_res
= n
== 0 ? build_real (type
, dconst1
) : arg0
;
1802 /* Calculate the integer part of the exponent. */
1805 integer_res
= gimple_expand_builtin_powi (gsi
, loc
, arg0
, n
);
1814 real_to_decimal (string
, &exp_init
, sizeof (string
), 0, 1);
1815 fprintf (dump_file
, "synthesizing pow (x, %s) as:\n", string
);
1821 fprintf (dump_file
, "1.0 / (");
1822 dump_integer_part (dump_file
, "x", n
);
1824 fprintf (dump_file
, " * ");
1825 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1826 fprintf (dump_file
, ")");
1830 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1831 fprintf (dump_file
, " / (");
1832 dump_integer_part (dump_file
, "x", n
);
1833 fprintf (dump_file
, ")");
1838 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1840 fprintf (dump_file
, " * ");
1841 dump_integer_part (dump_file
, "x", n
);
1844 fprintf (dump_file
, "\ndeepest sqrt chain: %d\n", synth_info
.deepest
);
1848 tree fract_res
= NULL_TREE
;
1851 /* Calculate the fractional part of the exponent. */
1852 for (unsigned i
= 0; i
< synth_info
.deepest
; i
++)
1854 if (synth_info
.factors
[i
])
1856 tree sqrt_chain
= get_fn_chain (arg0
, i
+ 1, gsi
, sqrtfn
, loc
, cache
);
1859 fract_res
= sqrt_chain
;
1862 fract_res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1863 fract_res
, sqrt_chain
);
1867 tree res
= NULL_TREE
;
1874 res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1875 fract_res
, integer_res
);
1879 res
= build_and_insert_binop (gsi
, loc
, "powrootrecip", RDIV_EXPR
,
1880 build_real (type
, dconst1
), res
);
1884 res
= build_and_insert_binop (gsi
, loc
, "powroot", RDIV_EXPR
,
1885 fract_res
, integer_res
);
1889 res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1890 fract_res
, integer_res
);
1894 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1895 with location info LOC. If possible, create an equivalent and
1896 less expensive sequence of statements prior to GSI, and return an
1897 expession holding the result. */
1900 gimple_expand_builtin_pow (gimple_stmt_iterator
*gsi
, location_t loc
,
1901 tree arg0
, tree arg1
)
1903 REAL_VALUE_TYPE c
, cint
, dconst1_3
, dconst1_4
, dconst1_6
;
1904 REAL_VALUE_TYPE c2
, dconst3
;
1906 tree type
, sqrtfn
, cbrtfn
, sqrt_arg0
, result
, cbrt_x
, powi_cbrt_x
;
1908 bool speed_p
= optimize_bb_for_speed_p (gsi_bb (*gsi
));
1909 bool hw_sqrt_exists
, c_is_int
, c2_is_int
;
1911 dconst1_4
= dconst1
;
1912 SET_REAL_EXP (&dconst1_4
, REAL_EXP (&dconst1_4
) - 2);
1914 /* If the exponent isn't a constant, there's nothing of interest
1916 if (TREE_CODE (arg1
) != REAL_CST
)
1919 /* Don't perform the operation if flag_signaling_nans is on
1920 and the operand is a signaling NaN. */
1921 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1
)))
1922 && ((TREE_CODE (arg0
) == REAL_CST
1923 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0
)))
1924 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1
))))
1927 /* If the exponent is equivalent to an integer, expand to an optimal
1928 multiplication sequence when profitable. */
1929 c
= TREE_REAL_CST (arg1
);
1930 n
= real_to_integer (&c
);
1931 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
1932 c_is_int
= real_identical (&c
, &cint
);
1935 && ((n
>= -1 && n
<= 2)
1936 || (flag_unsafe_math_optimizations
1938 && powi_cost (n
) <= POWI_MAX_MULTS
)))
1939 return gimple_expand_builtin_powi (gsi
, loc
, arg0
, n
);
1941 /* Attempt various optimizations using sqrt and cbrt. */
1942 type
= TREE_TYPE (arg0
);
1943 mode
= TYPE_MODE (type
);
1944 sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
1946 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
1947 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
1950 && real_equal (&c
, &dconsthalf
)
1951 && !HONOR_SIGNED_ZEROS (mode
))
1952 return build_and_insert_call (gsi
, loc
, sqrtfn
, arg0
);
1954 hw_sqrt_exists
= optab_handler (sqrt_optab
, mode
) != CODE_FOR_nothing
;
1956 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
1957 optimizations since 1./3. is not exactly representable. If x
1958 is negative and finite, the correct value of pow(x,1./3.) is
1959 a NaN with the "invalid" exception raised, because the value
1960 of 1./3. actually has an even denominator. The correct value
1961 of cbrt(x) is a negative real value. */
1962 cbrtfn
= mathfn_built_in (type
, BUILT_IN_CBRT
);
1963 dconst1_3
= real_value_truncate (mode
, dconst_third ());
1965 if (flag_unsafe_math_optimizations
1967 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
1968 && real_equal (&c
, &dconst1_3
))
1969 return build_and_insert_call (gsi
, loc
, cbrtfn
, arg0
);
1971 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
1972 if we don't have a hardware sqrt insn. */
1973 dconst1_6
= dconst1_3
;
1974 SET_REAL_EXP (&dconst1_6
, REAL_EXP (&dconst1_6
) - 1);
1976 if (flag_unsafe_math_optimizations
1979 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
1982 && real_equal (&c
, &dconst1_6
))
1985 sqrt_arg0
= build_and_insert_call (gsi
, loc
, sqrtfn
, arg0
);
1988 return build_and_insert_call (gsi
, loc
, cbrtfn
, sqrt_arg0
);
1992 /* Attempt to expand the POW as a product of square root chains.
1993 Expand the 0.25 case even when otpimising for size. */
1994 if (flag_unsafe_math_optimizations
1997 && (speed_p
|| real_equal (&c
, &dconst1_4
))
1998 && !HONOR_SIGNED_ZEROS (mode
))
2000 unsigned int max_depth
= speed_p
2001 ? param_max_pow_sqrt_depth
2004 tree expand_with_sqrts
2005 = expand_pow_as_sqrts (gsi
, loc
, arg0
, arg1
, max_depth
);
2007 if (expand_with_sqrts
)
2008 return expand_with_sqrts
;
2011 real_arithmetic (&c2
, MULT_EXPR
, &c
, &dconst2
);
2012 n
= real_to_integer (&c2
);
2013 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
2014 c2_is_int
= real_identical (&c2
, &cint
);
2016 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2018 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2019 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2021 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2022 different from pow(x, 1./3.) due to rounding and behavior with
2023 negative x, we need to constrain this transformation to unsafe
2024 math and positive x or finite math. */
2025 real_from_integer (&dconst3
, VOIDmode
, 3, SIGNED
);
2026 real_arithmetic (&c2
, MULT_EXPR
, &c
, &dconst3
);
2027 real_round (&c2
, mode
, &c2
);
2028 n
= real_to_integer (&c2
);
2029 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
2030 real_arithmetic (&c2
, RDIV_EXPR
, &cint
, &dconst3
);
2031 real_convert (&c2
, mode
, &c2
);
2033 if (flag_unsafe_math_optimizations
2035 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
2036 && real_identical (&c2
, &c
)
2038 && optimize_function_for_speed_p (cfun
)
2039 && powi_cost (n
/ 3) <= POWI_MAX_MULTS
)
2041 tree powi_x_ndiv3
= NULL_TREE
;
2043 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2044 possible or profitable, give up. Skip the degenerate case when
2045 abs(n) < 3, where the result is always 1. */
2046 if (absu_hwi (n
) >= 3)
2048 powi_x_ndiv3
= gimple_expand_builtin_powi (gsi
, loc
, arg0
,
2054 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2055 as that creates an unnecessary variable. Instead, just produce
2056 either cbrt(x) or cbrt(x) * cbrt(x). */
2057 cbrt_x
= build_and_insert_call (gsi
, loc
, cbrtfn
, arg0
);
2059 if (absu_hwi (n
) % 3 == 1)
2060 powi_cbrt_x
= cbrt_x
;
2062 powi_cbrt_x
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
2065 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2066 if (absu_hwi (n
) < 3)
2067 result
= powi_cbrt_x
;
2069 result
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
2070 powi_x_ndiv3
, powi_cbrt_x
);
2072 /* If n is negative, reciprocate the result. */
2074 result
= build_and_insert_binop (gsi
, loc
, "powroot", RDIV_EXPR
,
2075 build_real (type
, dconst1
), result
);
2080 /* No optimizations succeeded. */
2084 /* ARG is the argument to a cabs builtin call in GSI with location info
2085 LOC. Create a sequence of statements prior to GSI that calculates
2086 sqrt(R*R + I*I), where R and I are the real and imaginary components
2087 of ARG, respectively. Return an expression holding the result. */
2090 gimple_expand_builtin_cabs (gimple_stmt_iterator
*gsi
, location_t loc
, tree arg
)
2092 tree real_part
, imag_part
, addend1
, addend2
, sum
, result
;
2093 tree type
= TREE_TYPE (TREE_TYPE (arg
));
2094 tree sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
2095 machine_mode mode
= TYPE_MODE (type
);
2097 if (!flag_unsafe_math_optimizations
2098 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi
)))
2100 || optab_handler (sqrt_optab
, mode
) == CODE_FOR_nothing
)
2103 real_part
= build_and_insert_ref (gsi
, loc
, type
, "cabs",
2104 REALPART_EXPR
, arg
);
2105 addend1
= build_and_insert_binop (gsi
, loc
, "cabs", MULT_EXPR
,
2106 real_part
, real_part
);
2107 imag_part
= build_and_insert_ref (gsi
, loc
, type
, "cabs",
2108 IMAGPART_EXPR
, arg
);
2109 addend2
= build_and_insert_binop (gsi
, loc
, "cabs", MULT_EXPR
,
2110 imag_part
, imag_part
);
2111 sum
= build_and_insert_binop (gsi
, loc
, "cabs", PLUS_EXPR
, addend1
, addend2
);
2112 result
= build_and_insert_call (gsi
, loc
, sqrtfn
, sum
);
2117 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2118 on the SSA_NAME argument of each of them. Also expand powi(x,n) into
2119 an optimal number of multiplies, when n is a constant. */
2123 const pass_data pass_data_cse_sincos
=
2125 GIMPLE_PASS
, /* type */
2126 "sincos", /* name */
2127 OPTGROUP_NONE
, /* optinfo_flags */
2128 TV_TREE_SINCOS
, /* tv_id */
2129 PROP_ssa
, /* properties_required */
2130 PROP_gimple_opt_math
, /* properties_provided */
2131 0, /* properties_destroyed */
2132 0, /* todo_flags_start */
2133 TODO_update_ssa
, /* todo_flags_finish */
2136 class pass_cse_sincos
: public gimple_opt_pass
2139 pass_cse_sincos (gcc::context
*ctxt
)
2140 : gimple_opt_pass (pass_data_cse_sincos
, ctxt
)
2143 /* opt_pass methods: */
2144 virtual bool gate (function
*)
2146 /* We no longer require either sincos or cexp, since powi expansion
2147 piggybacks on this pass. */
2151 virtual unsigned int execute (function
*);
2153 }; // class pass_cse_sincos
2156 pass_cse_sincos::execute (function
*fun
)
2159 bool cfg_changed
= false;
2161 calculate_dominance_info (CDI_DOMINATORS
);
2162 memset (&sincos_stats
, 0, sizeof (sincos_stats
));
2164 FOR_EACH_BB_FN (bb
, fun
)
2166 gimple_stmt_iterator gsi
;
2167 bool cleanup_eh
= false;
2169 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2171 gimple
*stmt
= gsi_stmt (gsi
);
2173 /* Only the last stmt in a bb could throw, no need to call
2174 gimple_purge_dead_eh_edges if we change something in the middle
2175 of a basic block. */
2178 if (is_gimple_call (stmt
)
2179 && gimple_call_lhs (stmt
))
2181 tree arg
, arg0
, arg1
, result
;
2185 switch (gimple_call_combined_fn (stmt
))
2190 arg
= gimple_call_arg (stmt
, 0);
2191 /* Make sure we have either sincos or cexp. */
2192 if (!targetm
.libc_has_function (function_c99_math_complex
,
2194 && !targetm
.libc_has_function (function_sincos
,
2198 if (TREE_CODE (arg
) == SSA_NAME
)
2199 cfg_changed
|= execute_cse_sincos_1 (arg
);
2203 arg0
= gimple_call_arg (stmt
, 0);
2204 arg1
= gimple_call_arg (stmt
, 1);
2206 loc
= gimple_location (stmt
);
2207 result
= gimple_expand_builtin_pow (&gsi
, loc
, arg0
, arg1
);
2211 tree lhs
= gimple_get_lhs (stmt
);
2212 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
2213 gimple_set_location (new_stmt
, loc
);
2214 unlink_stmt_vdef (stmt
);
2215 gsi_replace (&gsi
, new_stmt
, true);
2217 if (gimple_vdef (stmt
))
2218 release_ssa_name (gimple_vdef (stmt
));
2223 arg0
= gimple_call_arg (stmt
, 0);
2224 arg1
= gimple_call_arg (stmt
, 1);
2225 loc
= gimple_location (stmt
);
2227 if (real_minus_onep (arg0
))
2229 tree t0
, t1
, cond
, one
, minus_one
;
2232 t0
= TREE_TYPE (arg0
);
2233 t1
= TREE_TYPE (arg1
);
2234 one
= build_real (t0
, dconst1
);
2235 minus_one
= build_real (t0
, dconstm1
);
2237 cond
= make_temp_ssa_name (t1
, NULL
, "powi_cond");
2238 stmt
= gimple_build_assign (cond
, BIT_AND_EXPR
,
2239 arg1
, build_int_cst (t1
, 1));
2240 gimple_set_location (stmt
, loc
);
2241 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
2243 result
= make_temp_ssa_name (t0
, NULL
, "powi");
2244 stmt
= gimple_build_assign (result
, COND_EXPR
, cond
,
2246 gimple_set_location (stmt
, loc
);
2247 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
2251 if (!tree_fits_shwi_p (arg1
))
2254 n
= tree_to_shwi (arg1
);
2255 result
= gimple_expand_builtin_powi (&gsi
, loc
, arg0
, n
);
2260 tree lhs
= gimple_get_lhs (stmt
);
2261 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
2262 gimple_set_location (new_stmt
, loc
);
2263 unlink_stmt_vdef (stmt
);
2264 gsi_replace (&gsi
, new_stmt
, true);
2266 if (gimple_vdef (stmt
))
2267 release_ssa_name (gimple_vdef (stmt
));
2272 arg0
= gimple_call_arg (stmt
, 0);
2273 loc
= gimple_location (stmt
);
2274 result
= gimple_expand_builtin_cabs (&gsi
, loc
, arg0
);
2278 tree lhs
= gimple_get_lhs (stmt
);
2279 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
2280 gimple_set_location (new_stmt
, loc
);
2281 unlink_stmt_vdef (stmt
);
2282 gsi_replace (&gsi
, new_stmt
, true);
2284 if (gimple_vdef (stmt
))
2285 release_ssa_name (gimple_vdef (stmt
));
2294 cfg_changed
|= gimple_purge_dead_eh_edges (bb
);
2297 statistics_counter_event (fun
, "sincos statements inserted",
2298 sincos_stats
.inserted
);
2300 return cfg_changed
? TODO_cleanup_cfg
: 0;
2306 make_pass_cse_sincos (gcc::context
*ctxt
)
2308 return new pass_cse_sincos (ctxt
);
2311 /* Return true if stmt is a type conversion operation that can be stripped
2312 when used in a widening multiply operation. */
2314 widening_mult_conversion_strippable_p (tree result_type
, gimple
*stmt
)
2316 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
2318 if (TREE_CODE (result_type
) == INTEGER_TYPE
)
2323 if (!CONVERT_EXPR_CODE_P (rhs_code
))
2326 op_type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2328 /* If the type of OP has the same precision as the result, then
2329 we can strip this conversion. The multiply operation will be
2330 selected to create the correct extension as a by-product. */
2331 if (TYPE_PRECISION (result_type
) == TYPE_PRECISION (op_type
))
2334 /* We can also strip a conversion if it preserves the signed-ness of
2335 the operation and doesn't narrow the range. */
2336 inner_op_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
2338 /* If the inner-most type is unsigned, then we can strip any
2339 intermediate widening operation. If it's signed, then the
2340 intermediate widening operation must also be signed. */
2341 if ((TYPE_UNSIGNED (inner_op_type
)
2342 || TYPE_UNSIGNED (op_type
) == TYPE_UNSIGNED (inner_op_type
))
2343 && TYPE_PRECISION (op_type
) > TYPE_PRECISION (inner_op_type
))
2349 return rhs_code
== FIXED_CONVERT_EXPR
;
2352 /* Return true if RHS is a suitable operand for a widening multiplication,
2353 assuming a target type of TYPE.
2354 There are two cases:
2356 - RHS makes some value at least twice as wide. Store that value
2357 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2359 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2360 but leave *TYPE_OUT untouched. */
2363 is_widening_mult_rhs_p (tree type
, tree rhs
, tree
*type_out
,
2369 if (TREE_CODE (rhs
) == SSA_NAME
)
2371 stmt
= SSA_NAME_DEF_STMT (rhs
);
2372 if (is_gimple_assign (stmt
))
2374 if (! widening_mult_conversion_strippable_p (type
, stmt
))
2378 rhs1
= gimple_assign_rhs1 (stmt
);
2380 if (TREE_CODE (rhs1
) == INTEGER_CST
)
2382 *new_rhs_out
= rhs1
;
2391 type1
= TREE_TYPE (rhs1
);
2393 if (TREE_CODE (type1
) != TREE_CODE (type
)
2394 || TYPE_PRECISION (type1
) * 2 > TYPE_PRECISION (type
))
2397 *new_rhs_out
= rhs1
;
2402 if (TREE_CODE (rhs
) == INTEGER_CST
)
2412 /* Return true if STMT performs a widening multiplication, assuming the
2413 output type is TYPE. If so, store the unwidened types of the operands
2414 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2415 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2416 and *TYPE2_OUT would give the operands of the multiplication. */
2419 is_widening_mult_p (gimple
*stmt
,
2420 tree
*type1_out
, tree
*rhs1_out
,
2421 tree
*type2_out
, tree
*rhs2_out
)
2423 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2425 if (TREE_CODE (type
) == INTEGER_TYPE
)
2427 if (TYPE_OVERFLOW_TRAPS (type
))
2430 else if (TREE_CODE (type
) != FIXED_POINT_TYPE
)
2433 if (!is_widening_mult_rhs_p (type
, gimple_assign_rhs1 (stmt
), type1_out
,
2437 if (!is_widening_mult_rhs_p (type
, gimple_assign_rhs2 (stmt
), type2_out
,
2441 if (*type1_out
== NULL
)
2443 if (*type2_out
== NULL
|| !int_fits_type_p (*rhs1_out
, *type2_out
))
2445 *type1_out
= *type2_out
;
2448 if (*type2_out
== NULL
)
2450 if (!int_fits_type_p (*rhs2_out
, *type1_out
))
2452 *type2_out
= *type1_out
;
2455 /* Ensure that the larger of the two operands comes first. */
2456 if (TYPE_PRECISION (*type1_out
) < TYPE_PRECISION (*type2_out
))
2458 std::swap (*type1_out
, *type2_out
);
2459 std::swap (*rhs1_out
, *rhs2_out
);
2465 /* Check to see if the CALL statement is an invocation of copysign
2466 with 1. being the first argument. */
2468 is_copysign_call_with_1 (gimple
*call
)
2470 gcall
*c
= dyn_cast
<gcall
*> (call
);
2474 enum combined_fn code
= gimple_call_combined_fn (c
);
2476 if (code
== CFN_LAST
)
2479 if (builtin_fn_p (code
))
2481 switch (as_builtin_fn (code
))
2483 CASE_FLT_FN (BUILT_IN_COPYSIGN
):
2484 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN
):
2485 return real_onep (gimple_call_arg (c
, 0));
2491 if (internal_fn_p (code
))
2493 switch (as_internal_fn (code
))
2496 return real_onep (gimple_call_arg (c
, 0));
2505 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2506 This only happens when the xorsign optab is defined, if the
2507 pattern is not a xorsign pattern or if expansion fails FALSE is
2508 returned, otherwise TRUE is returned. */
2510 convert_expand_mult_copysign (gimple
*stmt
, gimple_stmt_iterator
*gsi
)
2512 tree treeop0
, treeop1
, lhs
, type
;
2513 location_t loc
= gimple_location (stmt
);
2514 lhs
= gimple_assign_lhs (stmt
);
2515 treeop0
= gimple_assign_rhs1 (stmt
);
2516 treeop1
= gimple_assign_rhs2 (stmt
);
2517 type
= TREE_TYPE (lhs
);
2518 machine_mode mode
= TYPE_MODE (type
);
2520 if (HONOR_SNANS (type
))
2523 if (TREE_CODE (treeop0
) == SSA_NAME
&& TREE_CODE (treeop1
) == SSA_NAME
)
2525 gimple
*call0
= SSA_NAME_DEF_STMT (treeop0
);
2526 if (!has_single_use (treeop0
) || !is_copysign_call_with_1 (call0
))
2528 call0
= SSA_NAME_DEF_STMT (treeop1
);
2529 if (!has_single_use (treeop1
) || !is_copysign_call_with_1 (call0
))
2534 if (optab_handler (xorsign_optab
, mode
) == CODE_FOR_nothing
)
2537 gcall
*c
= as_a
<gcall
*> (call0
);
2538 treeop0
= gimple_call_arg (c
, 1);
2541 = gimple_build_call_internal (IFN_XORSIGN
, 2, treeop1
, treeop0
);
2542 gimple_set_lhs (call_stmt
, lhs
);
2543 gimple_set_location (call_stmt
, loc
);
2544 gsi_replace (gsi
, call_stmt
, true);
2551 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2552 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2553 value is true iff we converted the statement. */
2556 convert_mult_to_widen (gimple
*stmt
, gimple_stmt_iterator
*gsi
)
2558 tree lhs
, rhs1
, rhs2
, type
, type1
, type2
;
2559 enum insn_code handler
;
2560 scalar_int_mode to_mode
, from_mode
, actual_mode
;
2562 int actual_precision
;
2563 location_t loc
= gimple_location (stmt
);
2564 bool from_unsigned1
, from_unsigned2
;
2566 lhs
= gimple_assign_lhs (stmt
);
2567 type
= TREE_TYPE (lhs
);
2568 if (TREE_CODE (type
) != INTEGER_TYPE
)
2571 if (!is_widening_mult_p (stmt
, &type1
, &rhs1
, &type2
, &rhs2
))
2574 to_mode
= SCALAR_INT_TYPE_MODE (type
);
2575 from_mode
= SCALAR_INT_TYPE_MODE (type1
);
2576 if (to_mode
== from_mode
)
2579 from_unsigned1
= TYPE_UNSIGNED (type1
);
2580 from_unsigned2
= TYPE_UNSIGNED (type2
);
2582 if (from_unsigned1
&& from_unsigned2
)
2583 op
= umul_widen_optab
;
2584 else if (!from_unsigned1
&& !from_unsigned2
)
2585 op
= smul_widen_optab
;
2587 op
= usmul_widen_optab
;
2589 handler
= find_widening_optab_handler_and_mode (op
, to_mode
, from_mode
,
2592 if (handler
== CODE_FOR_nothing
)
2594 if (op
!= smul_widen_optab
)
2596 /* We can use a signed multiply with unsigned types as long as
2597 there is a wider mode to use, or it is the smaller of the two
2598 types that is unsigned. Note that type1 >= type2, always. */
2599 if ((TYPE_UNSIGNED (type1
)
2600 && TYPE_PRECISION (type1
) == GET_MODE_PRECISION (from_mode
))
2601 || (TYPE_UNSIGNED (type2
)
2602 && TYPE_PRECISION (type2
) == GET_MODE_PRECISION (from_mode
)))
2604 if (!GET_MODE_WIDER_MODE (from_mode
).exists (&from_mode
)
2605 || GET_MODE_SIZE (to_mode
) <= GET_MODE_SIZE (from_mode
))
2609 op
= smul_widen_optab
;
2610 handler
= find_widening_optab_handler_and_mode (op
, to_mode
,
2614 if (handler
== CODE_FOR_nothing
)
2617 from_unsigned1
= from_unsigned2
= false;
2623 /* Ensure that the inputs to the handler are in the correct precison
2624 for the opcode. This will be the full mode size. */
2625 actual_precision
= GET_MODE_PRECISION (actual_mode
);
2626 if (2 * actual_precision
> TYPE_PRECISION (type
))
2628 if (actual_precision
!= TYPE_PRECISION (type1
)
2629 || from_unsigned1
!= TYPE_UNSIGNED (type1
))
2630 rhs1
= build_and_insert_cast (gsi
, loc
,
2631 build_nonstandard_integer_type
2632 (actual_precision
, from_unsigned1
), rhs1
);
2633 if (actual_precision
!= TYPE_PRECISION (type2
)
2634 || from_unsigned2
!= TYPE_UNSIGNED (type2
))
2635 rhs2
= build_and_insert_cast (gsi
, loc
,
2636 build_nonstandard_integer_type
2637 (actual_precision
, from_unsigned2
), rhs2
);
2639 /* Handle constants. */
2640 if (TREE_CODE (rhs1
) == INTEGER_CST
)
2641 rhs1
= fold_convert (type1
, rhs1
);
2642 if (TREE_CODE (rhs2
) == INTEGER_CST
)
2643 rhs2
= fold_convert (type2
, rhs2
);
2645 gimple_assign_set_rhs1 (stmt
, rhs1
);
2646 gimple_assign_set_rhs2 (stmt
, rhs2
);
2647 gimple_assign_set_rhs_code (stmt
, WIDEN_MULT_EXPR
);
2649 widen_mul_stats
.widen_mults_inserted
++;
2653 /* Process a single gimple statement STMT, which is found at the
2654 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2655 rhs (given by CODE), and try to convert it into a
2656 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2657 is true iff we converted the statement. */
2660 convert_plusminus_to_widen (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
2661 enum tree_code code
)
2663 gimple
*rhs1_stmt
= NULL
, *rhs2_stmt
= NULL
;
2664 gimple
*conv1_stmt
= NULL
, *conv2_stmt
= NULL
, *conv_stmt
;
2665 tree type
, type1
, type2
, optype
;
2666 tree lhs
, rhs1
, rhs2
, mult_rhs1
, mult_rhs2
, add_rhs
;
2667 enum tree_code rhs1_code
= ERROR_MARK
, rhs2_code
= ERROR_MARK
;
2669 enum tree_code wmult_code
;
2670 enum insn_code handler
;
2671 scalar_mode to_mode
, from_mode
, actual_mode
;
2672 location_t loc
= gimple_location (stmt
);
2673 int actual_precision
;
2674 bool from_unsigned1
, from_unsigned2
;
2676 lhs
= gimple_assign_lhs (stmt
);
2677 type
= TREE_TYPE (lhs
);
2678 if (TREE_CODE (type
) != INTEGER_TYPE
2679 && TREE_CODE (type
) != FIXED_POINT_TYPE
)
2682 if (code
== MINUS_EXPR
)
2683 wmult_code
= WIDEN_MULT_MINUS_EXPR
;
2685 wmult_code
= WIDEN_MULT_PLUS_EXPR
;
2687 rhs1
= gimple_assign_rhs1 (stmt
);
2688 rhs2
= gimple_assign_rhs2 (stmt
);
2690 if (TREE_CODE (rhs1
) == SSA_NAME
)
2692 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2693 if (is_gimple_assign (rhs1_stmt
))
2694 rhs1_code
= gimple_assign_rhs_code (rhs1_stmt
);
2697 if (TREE_CODE (rhs2
) == SSA_NAME
)
2699 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2700 if (is_gimple_assign (rhs2_stmt
))
2701 rhs2_code
= gimple_assign_rhs_code (rhs2_stmt
);
2704 /* Allow for one conversion statement between the multiply
2705 and addition/subtraction statement. If there are more than
2706 one conversions then we assume they would invalidate this
2707 transformation. If that's not the case then they should have
2708 been folded before now. */
2709 if (CONVERT_EXPR_CODE_P (rhs1_code
))
2711 conv1_stmt
= rhs1_stmt
;
2712 rhs1
= gimple_assign_rhs1 (rhs1_stmt
);
2713 if (TREE_CODE (rhs1
) == SSA_NAME
)
2715 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2716 if (is_gimple_assign (rhs1_stmt
))
2717 rhs1_code
= gimple_assign_rhs_code (rhs1_stmt
);
2722 if (CONVERT_EXPR_CODE_P (rhs2_code
))
2724 conv2_stmt
= rhs2_stmt
;
2725 rhs2
= gimple_assign_rhs1 (rhs2_stmt
);
2726 if (TREE_CODE (rhs2
) == SSA_NAME
)
2728 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2729 if (is_gimple_assign (rhs2_stmt
))
2730 rhs2_code
= gimple_assign_rhs_code (rhs2_stmt
);
2736 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2737 is_widening_mult_p, but we still need the rhs returns.
2739 It might also appear that it would be sufficient to use the existing
2740 operands of the widening multiply, but that would limit the choice of
2741 multiply-and-accumulate instructions.
2743 If the widened-multiplication result has more than one uses, it is
2744 probably wiser not to do the conversion. Also restrict this operation
2745 to single basic block to avoid moving the multiply to a different block
2746 with a higher execution frequency. */
2747 if (code
== PLUS_EXPR
2748 && (rhs1_code
== MULT_EXPR
|| rhs1_code
== WIDEN_MULT_EXPR
))
2750 if (!has_single_use (rhs1
)
2751 || gimple_bb (rhs1_stmt
) != gimple_bb (stmt
)
2752 || !is_widening_mult_p (rhs1_stmt
, &type1
, &mult_rhs1
,
2753 &type2
, &mult_rhs2
))
2756 conv_stmt
= conv1_stmt
;
2758 else if (rhs2_code
== MULT_EXPR
|| rhs2_code
== WIDEN_MULT_EXPR
)
2760 if (!has_single_use (rhs2
)
2761 || gimple_bb (rhs2_stmt
) != gimple_bb (stmt
)
2762 || !is_widening_mult_p (rhs2_stmt
, &type1
, &mult_rhs1
,
2763 &type2
, &mult_rhs2
))
2766 conv_stmt
= conv2_stmt
;
2771 to_mode
= SCALAR_TYPE_MODE (type
);
2772 from_mode
= SCALAR_TYPE_MODE (type1
);
2773 if (to_mode
== from_mode
)
2776 from_unsigned1
= TYPE_UNSIGNED (type1
);
2777 from_unsigned2
= TYPE_UNSIGNED (type2
);
2780 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2781 if (from_unsigned1
!= from_unsigned2
)
2783 if (!INTEGRAL_TYPE_P (type
))
2785 /* We can use a signed multiply with unsigned types as long as
2786 there is a wider mode to use, or it is the smaller of the two
2787 types that is unsigned. Note that type1 >= type2, always. */
2789 && TYPE_PRECISION (type1
) == GET_MODE_PRECISION (from_mode
))
2791 && TYPE_PRECISION (type2
) == GET_MODE_PRECISION (from_mode
)))
2793 if (!GET_MODE_WIDER_MODE (from_mode
).exists (&from_mode
)
2794 || GET_MODE_SIZE (from_mode
) >= GET_MODE_SIZE (to_mode
))
2798 from_unsigned1
= from_unsigned2
= false;
2799 optype
= build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode
),
2803 /* If there was a conversion between the multiply and addition
2804 then we need to make sure it fits a multiply-and-accumulate.
2805 The should be a single mode change which does not change the
2809 /* We use the original, unmodified data types for this. */
2810 tree from_type
= TREE_TYPE (gimple_assign_rhs1 (conv_stmt
));
2811 tree to_type
= TREE_TYPE (gimple_assign_lhs (conv_stmt
));
2812 int data_size
= TYPE_PRECISION (type1
) + TYPE_PRECISION (type2
);
2813 bool is_unsigned
= TYPE_UNSIGNED (type1
) && TYPE_UNSIGNED (type2
);
2815 if (TYPE_PRECISION (from_type
) > TYPE_PRECISION (to_type
))
2817 /* Conversion is a truncate. */
2818 if (TYPE_PRECISION (to_type
) < data_size
)
2821 else if (TYPE_PRECISION (from_type
) < TYPE_PRECISION (to_type
))
2823 /* Conversion is an extend. Check it's the right sort. */
2824 if (TYPE_UNSIGNED (from_type
) != is_unsigned
2825 && !(is_unsigned
&& TYPE_PRECISION (from_type
) > data_size
))
2828 /* else convert is a no-op for our purposes. */
2831 /* Verify that the machine can perform a widening multiply
2832 accumulate in this mode/signedness combination, otherwise
2833 this transformation is likely to pessimize code. */
2834 this_optab
= optab_for_tree_code (wmult_code
, optype
, optab_default
);
2835 handler
= find_widening_optab_handler_and_mode (this_optab
, to_mode
,
2836 from_mode
, &actual_mode
);
2838 if (handler
== CODE_FOR_nothing
)
2841 /* Ensure that the inputs to the handler are in the correct precison
2842 for the opcode. This will be the full mode size. */
2843 actual_precision
= GET_MODE_PRECISION (actual_mode
);
2844 if (actual_precision
!= TYPE_PRECISION (type1
)
2845 || from_unsigned1
!= TYPE_UNSIGNED (type1
))
2846 mult_rhs1
= build_and_insert_cast (gsi
, loc
,
2847 build_nonstandard_integer_type
2848 (actual_precision
, from_unsigned1
),
2850 if (actual_precision
!= TYPE_PRECISION (type2
)
2851 || from_unsigned2
!= TYPE_UNSIGNED (type2
))
2852 mult_rhs2
= build_and_insert_cast (gsi
, loc
,
2853 build_nonstandard_integer_type
2854 (actual_precision
, from_unsigned2
),
2857 if (!useless_type_conversion_p (type
, TREE_TYPE (add_rhs
)))
2858 add_rhs
= build_and_insert_cast (gsi
, loc
, type
, add_rhs
);
2860 /* Handle constants. */
2861 if (TREE_CODE (mult_rhs1
) == INTEGER_CST
)
2862 mult_rhs1
= fold_convert (type1
, mult_rhs1
);
2863 if (TREE_CODE (mult_rhs2
) == INTEGER_CST
)
2864 mult_rhs2
= fold_convert (type2
, mult_rhs2
);
2866 gimple_assign_set_rhs_with_ops (gsi
, wmult_code
, mult_rhs1
, mult_rhs2
,
2868 update_stmt (gsi_stmt (*gsi
));
2869 widen_mul_stats
.maccs_inserted
++;
2873 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
2874 OP2 and which we know is used in statements that can be, together with the
2875 multiplication, converted to FMAs, perform the transformation. */
2878 convert_mult_to_fma_1 (tree mul_result
, tree op1
, tree op2
)
2880 tree type
= TREE_TYPE (mul_result
);
2882 imm_use_iterator imm_iter
;
2885 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, mul_result
)
2887 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
2888 tree addop
, mulop1
= op1
, result
= mul_result
;
2889 bool negate_p
= false;
2890 gimple_seq seq
= NULL
;
2892 if (is_gimple_debug (use_stmt
))
2895 if (is_gimple_assign (use_stmt
)
2896 && gimple_assign_rhs_code (use_stmt
) == NEGATE_EXPR
)
2898 result
= gimple_assign_lhs (use_stmt
);
2899 use_operand_p use_p
;
2900 gimple
*neguse_stmt
;
2901 single_imm_use (gimple_assign_lhs (use_stmt
), &use_p
, &neguse_stmt
);
2902 gsi_remove (&gsi
, true);
2903 release_defs (use_stmt
);
2905 use_stmt
= neguse_stmt
;
2906 gsi
= gsi_for_stmt (use_stmt
);
2910 tree cond
, else_value
, ops
[3];
2912 if (!can_interpret_as_conditional_op_p (use_stmt
, &cond
, &code
,
2915 addop
= ops
[0] == result
? ops
[1] : ops
[0];
2917 if (code
== MINUS_EXPR
)
2919 if (ops
[0] == result
)
2920 /* a * b - c -> a * b + (-c) */
2921 addop
= gimple_build (&seq
, NEGATE_EXPR
, type
, addop
);
2923 /* a - b * c -> (-b) * c + a */
2924 negate_p
= !negate_p
;
2928 mulop1
= gimple_build (&seq
, NEGATE_EXPR
, type
, mulop1
);
2931 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2934 fma_stmt
= gimple_build_call_internal (IFN_COND_FMA
, 5, cond
, mulop1
,
2935 op2
, addop
, else_value
);
2937 fma_stmt
= gimple_build_call_internal (IFN_FMA
, 3, mulop1
, op2
, addop
);
2938 gimple_set_lhs (fma_stmt
, gimple_get_lhs (use_stmt
));
2939 gimple_call_set_nothrow (fma_stmt
, !stmt_can_throw_internal (cfun
,
2941 gsi_replace (&gsi
, fma_stmt
, true);
2942 /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
2943 regardless of where the negation occurs. */
2944 gimple
*orig_stmt
= gsi_stmt (gsi
);
2945 if (fold_stmt (&gsi
, follow_all_ssa_edges
))
2947 if (maybe_clean_or_replace_eh_stmt (orig_stmt
, gsi_stmt (gsi
)))
2949 update_stmt (gsi_stmt (gsi
));
2952 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2954 fprintf (dump_file
, "Generated FMA ");
2955 print_gimple_stmt (dump_file
, gsi_stmt (gsi
), 0, TDF_NONE
);
2956 fprintf (dump_file
, "\n");
2959 /* If the FMA result is negated in a single use, fold the negation
2961 orig_stmt
= gsi_stmt (gsi
);
2962 use_operand_p use_p
;
2964 if (is_gimple_call (orig_stmt
)
2965 && gimple_call_internal_p (orig_stmt
)
2966 && gimple_call_lhs (orig_stmt
)
2967 && TREE_CODE (gimple_call_lhs (orig_stmt
)) == SSA_NAME
2968 && single_imm_use (gimple_call_lhs (orig_stmt
), &use_p
, &neg_stmt
)
2969 && is_gimple_assign (neg_stmt
)
2970 && gimple_assign_rhs_code (neg_stmt
) == NEGATE_EXPR
2971 && !stmt_could_throw_p (cfun
, neg_stmt
))
2973 gsi
= gsi_for_stmt (neg_stmt
);
2974 if (fold_stmt (&gsi
, follow_all_ssa_edges
))
2976 if (maybe_clean_or_replace_eh_stmt (neg_stmt
, gsi_stmt (gsi
)))
2978 update_stmt (gsi_stmt (gsi
));
2979 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2981 fprintf (dump_file
, "Folded FMA negation ");
2982 print_gimple_stmt (dump_file
, gsi_stmt (gsi
), 0, TDF_NONE
);
2983 fprintf (dump_file
, "\n");
2988 widen_mul_stats
.fmas_inserted
++;
2992 /* Data necessary to perform the actual transformation from a multiplication
2993 and an addition to an FMA after decision is taken it should be done and to
2994 then delete the multiplication statement from the function IL. */
2996 struct fma_transformation_info
3004 /* Structure containing the current state of FMA deferring, i.e. whether we are
3005 deferring, whether to continue deferring, and all data necessary to come
3006 back and perform all deferred transformations. */
3008 class fma_deferring_state
3011 /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3012 do any deferring. */
3014 fma_deferring_state (bool perform_deferring
)
3015 : m_candidates (), m_mul_result_set (), m_initial_phi (NULL
),
3016 m_last_result (NULL_TREE
), m_deferring_p (perform_deferring
) {}
3018 /* List of FMA candidates for which we the transformation has been determined
3019 possible but we at this point in BB analysis we do not consider them
3021 auto_vec
<fma_transformation_info
, 8> m_candidates
;
3023 /* Set of results of multiplication that are part of an already deferred FMA
3025 hash_set
<tree
> m_mul_result_set
;
3027 /* The PHI that supposedly feeds back result of a FMA to another over loop
3029 gphi
*m_initial_phi
;
3031 /* Result of the last produced FMA candidate or NULL if there has not been
3035 /* If true, deferring might still be profitable. If false, transform all
3036 candidates and no longer defer. */
3040 /* Transform all deferred FMA candidates and mark STATE as no longer
3044 cancel_fma_deferring (fma_deferring_state
*state
)
3046 if (!state
->m_deferring_p
)
3049 for (unsigned i
= 0; i
< state
->m_candidates
.length (); i
++)
3051 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3052 fprintf (dump_file
, "Generating deferred FMA\n");
3054 const fma_transformation_info
&fti
= state
->m_candidates
[i
];
3055 convert_mult_to_fma_1 (fti
.mul_result
, fti
.op1
, fti
.op2
);
3057 gimple_stmt_iterator gsi
= gsi_for_stmt (fti
.mul_stmt
);
3058 gsi_remove (&gsi
, true);
3059 release_defs (fti
.mul_stmt
);
3061 state
->m_deferring_p
= false;
3064 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3065 Otherwise return NULL. */
3068 result_of_phi (tree op
)
3070 if (TREE_CODE (op
) != SSA_NAME
)
3073 return dyn_cast
<gphi
*> (SSA_NAME_DEF_STMT (op
));
3076 /* After processing statements of a BB and recording STATE, return true if the
3077 initial phi is fed by the last FMA candidate result ore one such result from
3078 previously processed BBs marked in LAST_RESULT_SET. */
3081 last_fma_candidate_feeds_initial_phi (fma_deferring_state
*state
,
3082 hash_set
<tree
> *last_result_set
)
3086 FOR_EACH_PHI_ARG (use
, state
->m_initial_phi
, iter
, SSA_OP_USE
)
3088 tree t
= USE_FROM_PTR (use
);
3089 if (t
== state
->m_last_result
3090 || last_result_set
->contains (t
))
3097 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3098 with uses in additions and subtractions to form fused multiply-add
3099 operations. Returns true if successful and MUL_STMT should be removed.
3100 If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3101 on MUL_COND, otherwise it is unconditional.
3103 If STATE indicates that we are deferring FMA transformation, that means
3104 that we do not produce FMAs for basic blocks which look like:
3107 # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3109 accumulator_66 = _65 + accumulator_111;
3111 or its unrolled version, i.e. with several FMA candidates that feed result
3112 of one into the addend of another. Instead, we add them to a list in STATE
3113 and if we later discover an FMA candidate that is not part of such a chain,
3114 we go back and perform all deferred past candidates. */
3117 convert_mult_to_fma (gimple
*mul_stmt
, tree op1
, tree op2
,
3118 fma_deferring_state
*state
, tree mul_cond
= NULL_TREE
)
3120 tree mul_result
= gimple_get_lhs (mul_stmt
);
3121 tree type
= TREE_TYPE (mul_result
);
3122 gimple
*use_stmt
, *neguse_stmt
;
3123 use_operand_p use_p
;
3124 imm_use_iterator imm_iter
;
3126 if (FLOAT_TYPE_P (type
)
3127 && flag_fp_contract_mode
== FP_CONTRACT_OFF
)
3130 /* We don't want to do bitfield reduction ops. */
3131 if (INTEGRAL_TYPE_P (type
)
3132 && (!type_has_mode_precision_p (type
) || TYPE_OVERFLOW_TRAPS (type
)))
3135 /* If the target doesn't support it, don't generate it. We assume that
3136 if fma isn't available then fms, fnma or fnms are not either. */
3137 optimization_type opt_type
= bb_optimization_type (gimple_bb (mul_stmt
));
3138 if (!direct_internal_fn_supported_p (IFN_FMA
, type
, opt_type
))
3141 /* If the multiplication has zero uses, it is kept around probably because
3142 of -fnon-call-exceptions. Don't optimize it away in that case,
3144 if (has_zero_uses (mul_result
))
3148 = (state
->m_deferring_p
3149 && (tree_to_shwi (TYPE_SIZE (type
))
3150 <= param_avoid_fma_max_bits
));
3151 bool defer
= check_defer
;
3152 bool seen_negate_p
= false;
3153 /* Make sure that the multiplication statement becomes dead after
3154 the transformation, thus that all uses are transformed to FMAs.
3155 This means we assume that an FMA operation has the same cost
3157 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, mul_result
)
3159 tree result
= mul_result
;
3160 bool negate_p
= false;
3162 use_stmt
= USE_STMT (use_p
);
3164 if (is_gimple_debug (use_stmt
))
3167 /* For now restrict this operations to single basic blocks. In theory
3168 we would want to support sinking the multiplication in
3174 to form a fma in the then block and sink the multiplication to the
3176 if (gimple_bb (use_stmt
) != gimple_bb (mul_stmt
))
3179 /* A negate on the multiplication leads to FNMA. */
3180 if (is_gimple_assign (use_stmt
)
3181 && gimple_assign_rhs_code (use_stmt
) == NEGATE_EXPR
)
3186 /* If (due to earlier missed optimizations) we have two
3187 negates of the same value, treat them as equivalent
3188 to a single negate with multiple uses. */
3192 result
= gimple_assign_lhs (use_stmt
);
3194 /* Make sure the negate statement becomes dead with this
3195 single transformation. */
3196 if (!single_imm_use (gimple_assign_lhs (use_stmt
),
3197 &use_p
, &neguse_stmt
))
3200 /* Make sure the multiplication isn't also used on that stmt. */
3201 FOR_EACH_PHI_OR_STMT_USE (usep
, neguse_stmt
, iter
, SSA_OP_USE
)
3202 if (USE_FROM_PTR (usep
) == mul_result
)
3206 use_stmt
= neguse_stmt
;
3207 if (gimple_bb (use_stmt
) != gimple_bb (mul_stmt
))
3210 negate_p
= seen_negate_p
= true;
3213 tree cond
, else_value
, ops
[3];
3215 if (!can_interpret_as_conditional_op_p (use_stmt
, &cond
, &code
, ops
,
3222 if (ops
[1] == result
)
3223 negate_p
= !negate_p
;
3228 /* FMA can only be formed from PLUS and MINUS. */
3232 if (mul_cond
&& cond
!= mul_cond
)
3237 if (cond
== result
|| else_value
== result
)
3239 if (!direct_internal_fn_supported_p (IFN_COND_FMA
, type
, opt_type
))
3243 /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3244 we'll visit later, we might be able to get a more profitable
3246 OTOH, if we don't, a negate / fma pair has likely lower latency
3247 that a mult / subtract pair. */
3248 if (code
== MINUS_EXPR
3251 && !direct_internal_fn_supported_p (IFN_FMS
, type
, opt_type
)
3252 && direct_internal_fn_supported_p (IFN_FNMA
, type
, opt_type
)
3253 && TREE_CODE (ops
[1]) == SSA_NAME
3254 && has_single_use (ops
[1]))
3256 gimple
*stmt2
= SSA_NAME_DEF_STMT (ops
[1]);
3257 if (is_gimple_assign (stmt2
)
3258 && gimple_assign_rhs_code (stmt2
) == MULT_EXPR
)
3262 /* We can't handle a * b + a * b. */
3263 if (ops
[0] == ops
[1])
3265 /* If deferring, make sure we are not looking at an instruction that
3266 wouldn't have existed if we were not. */
3267 if (state
->m_deferring_p
3268 && (state
->m_mul_result_set
.contains (ops
[0])
3269 || state
->m_mul_result_set
.contains (ops
[1])))
3274 tree use_lhs
= gimple_get_lhs (use_stmt
);
3275 if (state
->m_last_result
)
3277 if (ops
[1] == state
->m_last_result
3278 || ops
[0] == state
->m_last_result
)
3285 gcc_checking_assert (!state
->m_initial_phi
);
3287 if (ops
[0] == result
)
3288 phi
= result_of_phi (ops
[1]);
3291 gcc_assert (ops
[1] == result
);
3292 phi
= result_of_phi (ops
[0]);
3297 state
->m_initial_phi
= phi
;
3304 state
->m_last_result
= use_lhs
;
3305 check_defer
= false;
3310 /* While it is possible to validate whether or not the exact form that
3311 we've recognized is available in the backend, the assumption is that
3312 if the deferring logic above did not trigger, the transformation is
3313 never a loss. For instance, suppose the target only has the plain FMA
3314 pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3315 MUL+SUB for FMA+NEG, which is still two operations. Consider
3316 -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3317 form the two NEGs are independent and could be run in parallel. */
3322 fma_transformation_info fti
;
3323 fti
.mul_stmt
= mul_stmt
;
3324 fti
.mul_result
= mul_result
;
3327 state
->m_candidates
.safe_push (fti
);
3328 state
->m_mul_result_set
.add (mul_result
);
3330 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3332 fprintf (dump_file
, "Deferred generating FMA for multiplication ");
3333 print_gimple_stmt (dump_file
, mul_stmt
, 0, TDF_NONE
);
3334 fprintf (dump_file
, "\n");
3341 if (state
->m_deferring_p
)
3342 cancel_fma_deferring (state
);
3343 convert_mult_to_fma_1 (mul_result
, op1
, op2
);
3349 /* Helper function of match_uaddsub_overflow. Return 1
3350 if USE_STMT is unsigned overflow check ovf != 0 for
3351 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3355 uaddsub_overflow_check_p (gimple
*stmt
, gimple
*use_stmt
)
3357 enum tree_code ccode
= ERROR_MARK
;
3358 tree crhs1
= NULL_TREE
, crhs2
= NULL_TREE
;
3359 if (gimple_code (use_stmt
) == GIMPLE_COND
)
3361 ccode
= gimple_cond_code (use_stmt
);
3362 crhs1
= gimple_cond_lhs (use_stmt
);
3363 crhs2
= gimple_cond_rhs (use_stmt
);
3365 else if (is_gimple_assign (use_stmt
))
3367 if (gimple_assign_rhs_class (use_stmt
) == GIMPLE_BINARY_RHS
)
3369 ccode
= gimple_assign_rhs_code (use_stmt
);
3370 crhs1
= gimple_assign_rhs1 (use_stmt
);
3371 crhs2
= gimple_assign_rhs2 (use_stmt
);
3373 else if (gimple_assign_rhs_code (use_stmt
) == COND_EXPR
)
3375 tree cond
= gimple_assign_rhs1 (use_stmt
);
3376 if (COMPARISON_CLASS_P (cond
))
3378 ccode
= TREE_CODE (cond
);
3379 crhs1
= TREE_OPERAND (cond
, 0);
3380 crhs2
= TREE_OPERAND (cond
, 1);
3391 if (TREE_CODE_CLASS (ccode
) != tcc_comparison
)
3394 enum tree_code code
= gimple_assign_rhs_code (stmt
);
3395 tree lhs
= gimple_assign_lhs (stmt
);
3396 tree rhs1
= gimple_assign_rhs1 (stmt
);
3397 tree rhs2
= gimple_assign_rhs2 (stmt
);
3403 /* r = a - b; r > a or r <= a
3404 r = a + b; a > r or a <= r or b > r or b <= r. */
3405 if ((code
== MINUS_EXPR
&& crhs1
== lhs
&& crhs2
== rhs1
)
3406 || (code
== PLUS_EXPR
&& (crhs1
== rhs1
|| crhs1
== rhs2
)
3408 return ccode
== GT_EXPR
? 1 : -1;
3412 /* r = a - b; a < r or a >= r
3413 r = a + b; r < a or r >= a or r < b or r >= b. */
3414 if ((code
== MINUS_EXPR
&& crhs1
== rhs1
&& crhs2
== lhs
)
3415 || (code
== PLUS_EXPR
&& crhs1
== lhs
3416 && (crhs2
== rhs1
|| crhs2
== rhs2
)))
3417 return ccode
== LT_EXPR
? 1 : -1;
3425 /* Recognize for unsigned x
3428 where there are other uses of x and replace it with
3429 _7 = SUB_OVERFLOW (y, z);
3430 x = REALPART_EXPR <_7>;
3431 _8 = IMAGPART_EXPR <_7>;
3433 and similarly for addition. */
3436 match_uaddsub_overflow (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
3437 enum tree_code code
)
3439 tree lhs
= gimple_assign_lhs (stmt
);
3440 tree type
= TREE_TYPE (lhs
);
3441 use_operand_p use_p
;
3442 imm_use_iterator iter
;
3443 bool use_seen
= false;
3444 bool ovf_use_seen
= false;
3447 gcc_checking_assert (code
== PLUS_EXPR
|| code
== MINUS_EXPR
);
3448 if (!INTEGRAL_TYPE_P (type
)
3449 || !TYPE_UNSIGNED (type
)
3450 || has_zero_uses (lhs
)
3451 || has_single_use (lhs
)
3452 || optab_handler (code
== PLUS_EXPR
? uaddv4_optab
: usubv4_optab
,
3453 TYPE_MODE (type
)) == CODE_FOR_nothing
)
3456 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lhs
)
3458 use_stmt
= USE_STMT (use_p
);
3459 if (is_gimple_debug (use_stmt
))
3462 if (uaddsub_overflow_check_p (stmt
, use_stmt
))
3463 ovf_use_seen
= true;
3466 if (ovf_use_seen
&& use_seen
)
3470 if (!ovf_use_seen
|| !use_seen
)
3473 tree ctype
= build_complex_type (type
);
3474 tree rhs1
= gimple_assign_rhs1 (stmt
);
3475 tree rhs2
= gimple_assign_rhs2 (stmt
);
3476 gcall
*g
= gimple_build_call_internal (code
== PLUS_EXPR
3477 ? IFN_ADD_OVERFLOW
: IFN_SUB_OVERFLOW
,
3479 tree ctmp
= make_ssa_name (ctype
);
3480 gimple_call_set_lhs (g
, ctmp
);
3481 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
3482 gassign
*g2
= gimple_build_assign (lhs
, REALPART_EXPR
,
3483 build1 (REALPART_EXPR
, type
, ctmp
));
3484 gsi_replace (gsi
, g2
, true);
3485 tree ovf
= make_ssa_name (type
);
3486 g2
= gimple_build_assign (ovf
, IMAGPART_EXPR
,
3487 build1 (IMAGPART_EXPR
, type
, ctmp
));
3488 gsi_insert_after (gsi
, g2
, GSI_NEW_STMT
);
3490 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
3492 if (is_gimple_debug (use_stmt
))
3495 int ovf_use
= uaddsub_overflow_check_p (stmt
, use_stmt
);
3498 if (gimple_code (use_stmt
) == GIMPLE_COND
)
3500 gcond
*cond_stmt
= as_a
<gcond
*> (use_stmt
);
3501 gimple_cond_set_lhs (cond_stmt
, ovf
);
3502 gimple_cond_set_rhs (cond_stmt
, build_int_cst (type
, 0));
3503 gimple_cond_set_code (cond_stmt
, ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
);
3507 gcc_checking_assert (is_gimple_assign (use_stmt
));
3508 if (gimple_assign_rhs_class (use_stmt
) == GIMPLE_BINARY_RHS
)
3510 gimple_assign_set_rhs1 (use_stmt
, ovf
);
3511 gimple_assign_set_rhs2 (use_stmt
, build_int_cst (type
, 0));
3512 gimple_assign_set_rhs_code (use_stmt
,
3513 ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
);
3517 gcc_checking_assert (gimple_assign_rhs_code (use_stmt
)
3519 tree cond
= build2 (ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
,
3520 boolean_type_node
, ovf
,
3521 build_int_cst (type
, 0));
3522 gimple_assign_set_rhs1 (use_stmt
, cond
);
3525 update_stmt (use_stmt
);
3530 /* Return true if target has support for divmod. */
3533 target_supports_divmod_p (optab divmod_optab
, optab div_optab
, machine_mode mode
)
3535 /* If target supports hardware divmod insn, use it for divmod. */
3536 if (optab_handler (divmod_optab
, mode
) != CODE_FOR_nothing
)
3539 /* Check if libfunc for divmod is available. */
3540 rtx libfunc
= optab_libfunc (divmod_optab
, mode
);
3541 if (libfunc
!= NULL_RTX
)
3543 /* If optab_handler exists for div_optab, perhaps in a wider mode,
3544 we don't want to use the libfunc even if it exists for given mode. */
3545 machine_mode div_mode
;
3546 FOR_EACH_MODE_FROM (div_mode
, mode
)
3547 if (optab_handler (div_optab
, div_mode
) != CODE_FOR_nothing
)
3550 return targetm
.expand_divmod_libfunc
!= NULL
;
3556 /* Check if stmt is candidate for divmod transform. */
3559 divmod_candidate_p (gassign
*stmt
)
3561 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
3562 machine_mode mode
= TYPE_MODE (type
);
3563 optab divmod_optab
, div_optab
;
3565 if (TYPE_UNSIGNED (type
))
3567 divmod_optab
= udivmod_optab
;
3568 div_optab
= udiv_optab
;
3572 divmod_optab
= sdivmod_optab
;
3573 div_optab
= sdiv_optab
;
3576 tree op1
= gimple_assign_rhs1 (stmt
);
3577 tree op2
= gimple_assign_rhs2 (stmt
);
3579 /* Disable the transform if either is a constant, since division-by-constant
3580 may have specialized expansion. */
3581 if (CONSTANT_CLASS_P (op1
))
3584 if (CONSTANT_CLASS_P (op2
))
3586 if (integer_pow2p (op2
))
3589 if (TYPE_PRECISION (type
) <= HOST_BITS_PER_WIDE_INT
3590 && TYPE_PRECISION (type
) <= BITS_PER_WORD
)
3593 /* If the divisor is not power of 2 and the precision wider than
3594 HWI, expand_divmod punts on that, so in that case it is better
3595 to use divmod optab or libfunc. Similarly if choose_multiplier
3596 might need pre/post shifts of BITS_PER_WORD or more. */
3599 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
3600 expand using the [su]divv optabs. */
3601 if (TYPE_OVERFLOW_TRAPS (type
))
3604 if (!target_supports_divmod_p (divmod_optab
, div_optab
, mode
))
3610 /* This function looks for:
3611 t1 = a TRUNC_DIV_EXPR b;
3612 t2 = a TRUNC_MOD_EXPR b;
3613 and transforms it to the following sequence:
3614 complex_tmp = DIVMOD (a, b);
3615 t1 = REALPART_EXPR(a);
3616 t2 = IMAGPART_EXPR(b);
3617 For conditions enabling the transform see divmod_candidate_p().
3619 The pass has three parts:
3620 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
3621 other trunc_div_expr and trunc_mod_expr stmts.
3622 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
3624 3) Insert DIVMOD call just before top_stmt and update entries in
3625 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
3626 IMAGPART_EXPR for mod). */
3629 convert_to_divmod (gassign
*stmt
)
3631 if (stmt_can_throw_internal (cfun
, stmt
)
3632 || !divmod_candidate_p (stmt
))
3635 tree op1
= gimple_assign_rhs1 (stmt
);
3636 tree op2
= gimple_assign_rhs2 (stmt
);
3638 imm_use_iterator use_iter
;
3640 auto_vec
<gimple
*> stmts
;
3642 gimple
*top_stmt
= stmt
;
3643 basic_block top_bb
= gimple_bb (stmt
);
3645 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
3646 at-least stmt and possibly other trunc_div/trunc_mod stmts
3647 having same operands as stmt. */
3649 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, op1
)
3651 if (is_gimple_assign (use_stmt
)
3652 && (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
3653 || gimple_assign_rhs_code (use_stmt
) == TRUNC_MOD_EXPR
)
3654 && operand_equal_p (op1
, gimple_assign_rhs1 (use_stmt
), 0)
3655 && operand_equal_p (op2
, gimple_assign_rhs2 (use_stmt
), 0))
3657 if (stmt_can_throw_internal (cfun
, use_stmt
))
3660 basic_block bb
= gimple_bb (use_stmt
);
3664 if (gimple_uid (use_stmt
) < gimple_uid (top_stmt
))
3665 top_stmt
= use_stmt
;
3667 else if (dominated_by_p (CDI_DOMINATORS
, top_bb
, bb
))
3670 top_stmt
= use_stmt
;
3675 tree top_op1
= gimple_assign_rhs1 (top_stmt
);
3676 tree top_op2
= gimple_assign_rhs2 (top_stmt
);
3678 stmts
.safe_push (top_stmt
);
3679 bool div_seen
= (gimple_assign_rhs_code (top_stmt
) == TRUNC_DIV_EXPR
);
3681 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
3682 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
3683 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
3684 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
3686 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, top_op1
)
3688 if (is_gimple_assign (use_stmt
)
3689 && (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
3690 || gimple_assign_rhs_code (use_stmt
) == TRUNC_MOD_EXPR
)
3691 && operand_equal_p (top_op1
, gimple_assign_rhs1 (use_stmt
), 0)
3692 && operand_equal_p (top_op2
, gimple_assign_rhs2 (use_stmt
), 0))
3694 if (use_stmt
== top_stmt
3695 || stmt_can_throw_internal (cfun
, use_stmt
)
3696 || !dominated_by_p (CDI_DOMINATORS
, gimple_bb (use_stmt
), top_bb
))
3699 stmts
.safe_push (use_stmt
);
3700 if (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
)
3708 /* Part 3: Create libcall to internal fn DIVMOD:
3709 divmod_tmp = DIVMOD (op1, op2). */
3711 gcall
*call_stmt
= gimple_build_call_internal (IFN_DIVMOD
, 2, op1
, op2
);
3712 tree res
= make_temp_ssa_name (build_complex_type (TREE_TYPE (op1
)),
3713 call_stmt
, "divmod_tmp");
3714 gimple_call_set_lhs (call_stmt
, res
);
3715 /* We rejected throwing statements above. */
3716 gimple_call_set_nothrow (call_stmt
, true);
3718 /* Insert the call before top_stmt. */
3719 gimple_stmt_iterator top_stmt_gsi
= gsi_for_stmt (top_stmt
);
3720 gsi_insert_before (&top_stmt_gsi
, call_stmt
, GSI_SAME_STMT
);
3722 widen_mul_stats
.divmod_calls_inserted
++;
3724 /* Update all statements in stmts vector:
3725 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
3726 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
3728 for (unsigned i
= 0; stmts
.iterate (i
, &use_stmt
); ++i
)
3732 switch (gimple_assign_rhs_code (use_stmt
))
3734 case TRUNC_DIV_EXPR
:
3735 new_rhs
= fold_build1 (REALPART_EXPR
, TREE_TYPE (op1
), res
);
3738 case TRUNC_MOD_EXPR
:
3739 new_rhs
= fold_build1 (IMAGPART_EXPR
, TREE_TYPE (op1
), res
);
3746 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
3747 gimple_assign_set_rhs_from_tree (&gsi
, new_rhs
);
3748 update_stmt (use_stmt
);
3754 /* Find integer multiplications where the operands are extended from
3755 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
3756 where appropriate. */
3760 const pass_data pass_data_optimize_widening_mul
=
3762 GIMPLE_PASS
, /* type */
3763 "widening_mul", /* name */
3764 OPTGROUP_NONE
, /* optinfo_flags */
3765 TV_TREE_WIDEN_MUL
, /* tv_id */
3766 PROP_ssa
, /* properties_required */
3767 0, /* properties_provided */
3768 0, /* properties_destroyed */
3769 0, /* todo_flags_start */
3770 TODO_update_ssa
, /* todo_flags_finish */
3773 class pass_optimize_widening_mul
: public gimple_opt_pass
3776 pass_optimize_widening_mul (gcc::context
*ctxt
)
3777 : gimple_opt_pass (pass_data_optimize_widening_mul
, ctxt
)
3780 /* opt_pass methods: */
3781 virtual bool gate (function
*)
3783 return flag_expensive_optimizations
&& optimize
;
3786 virtual unsigned int execute (function
*);
3788 }; // class pass_optimize_widening_mul
3790 /* Walker class to perform the transformation in reverse dominance order. */
3792 class math_opts_dom_walker
: public dom_walker
3795 /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
3796 if walking modidifes the CFG. */
3798 math_opts_dom_walker (bool *cfg_changed_p
)
3799 : dom_walker (CDI_DOMINATORS
), m_last_result_set (),
3800 m_cfg_changed_p (cfg_changed_p
) {}
3802 /* The actual actions performed in the walk. */
3804 virtual void after_dom_children (basic_block
);
3806 /* Set of results of chains of multiply and add statement combinations that
3807 were not transformed into FMAs because of active deferring. */
3808 hash_set
<tree
> m_last_result_set
;
3810 /* Pointer to a flag of the user that needs to be set if CFG has been
3812 bool *m_cfg_changed_p
;
3816 math_opts_dom_walker::after_dom_children (basic_block bb
)
3818 gimple_stmt_iterator gsi
;
3820 fma_deferring_state
fma_state (param_avoid_fma_max_bits
> 0);
3822 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);)
3824 gimple
*stmt
= gsi_stmt (gsi
);
3825 enum tree_code code
;
3827 if (is_gimple_assign (stmt
))
3829 code
= gimple_assign_rhs_code (stmt
);
3833 if (!convert_mult_to_widen (stmt
, &gsi
)
3834 && !convert_expand_mult_copysign (stmt
, &gsi
)
3835 && convert_mult_to_fma (stmt
,
3836 gimple_assign_rhs1 (stmt
),
3837 gimple_assign_rhs2 (stmt
),
3840 gsi_remove (&gsi
, true);
3841 release_defs (stmt
);
3848 if (!convert_plusminus_to_widen (&gsi
, stmt
, code
))
3849 match_uaddsub_overflow (&gsi
, stmt
, code
);
3852 case TRUNC_MOD_EXPR
:
3853 convert_to_divmod (as_a
<gassign
*> (stmt
));
3859 else if (is_gimple_call (stmt
))
3861 switch (gimple_call_combined_fn (stmt
))
3864 if (gimple_call_lhs (stmt
)
3865 && TREE_CODE (gimple_call_arg (stmt
, 1)) == REAL_CST
3866 && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt
, 1)),
3868 && convert_mult_to_fma (stmt
,
3869 gimple_call_arg (stmt
, 0),
3870 gimple_call_arg (stmt
, 0),
3873 unlink_stmt_vdef (stmt
);
3874 if (gsi_remove (&gsi
, true)
3875 && gimple_purge_dead_eh_edges (bb
))
3876 *m_cfg_changed_p
= true;
3877 release_defs (stmt
);
3883 if (convert_mult_to_fma (stmt
,
3884 gimple_call_arg (stmt
, 1),
3885 gimple_call_arg (stmt
, 2),
3887 gimple_call_arg (stmt
, 0)))
3890 gsi_remove (&gsi
, true);
3891 release_defs (stmt
);
3897 cancel_fma_deferring (&fma_state
);
3906 if (fma_state
.m_deferring_p
3907 && fma_state
.m_initial_phi
)
3909 gcc_checking_assert (fma_state
.m_last_result
);
3910 if (!last_fma_candidate_feeds_initial_phi (&fma_state
,
3911 &m_last_result_set
))
3912 cancel_fma_deferring (&fma_state
);
3914 m_last_result_set
.add (fma_state
.m_last_result
);
3920 pass_optimize_widening_mul::execute (function
*fun
)
3922 bool cfg_changed
= false;
3924 memset (&widen_mul_stats
, 0, sizeof (widen_mul_stats
));
3925 calculate_dominance_info (CDI_DOMINATORS
);
3926 renumber_gimple_stmt_uids (cfun
);
3928 math_opts_dom_walker (&cfg_changed
).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
3930 statistics_counter_event (fun
, "widening multiplications inserted",
3931 widen_mul_stats
.widen_mults_inserted
);
3932 statistics_counter_event (fun
, "widening maccs inserted",
3933 widen_mul_stats
.maccs_inserted
);
3934 statistics_counter_event (fun
, "fused multiply-adds inserted",
3935 widen_mul_stats
.fmas_inserted
);
3936 statistics_counter_event (fun
, "divmod calls inserted",
3937 widen_mul_stats
.divmod_calls_inserted
);
3939 return cfg_changed
? TODO_cleanup_cfg
: 0;
3945 make_pass_optimize_widening_mul (gcc::context
*ctxt
)
3947 return new pass_optimize_widening_mul (ctxt
);