]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-math-opts.c
* config/i386/i386.md (and<mode>3): Generate zero-extends for
[thirdparty/gcc.git] / gcc / tree-ssa-math-opts.c
CommitLineData
abacb398 1/* Global, SSA-based optimizations using mathematical identities.
fbd26352 2 Copyright (C) 2005-2019 Free Software Foundation, Inc.
48e1416a 3
abacb398 4This file is part of GCC.
48e1416a 5
abacb398 6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8c4c00c1 8Free Software Foundation; either version 3, or (at your option) any
abacb398 9later version.
48e1416a 10
abacb398 11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
48e1416a 15
abacb398 16You should have received a copy of the GNU General Public License
8c4c00c1 17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
abacb398 19
20/* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
22
23 modulus = sqrt(x*x + y*y + z*z);
24 x = x / modulus;
25 y = y / modulus;
26 z = z / modulus;
27
28 that can be optimized to
29
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
32 x = x * rmodulus;
33 y = y * rmodulus;
34 z = z * rmodulus;
35
36 We do this for loop invariant divisors, and with this pass whenever
ac70caad 37 we notice that a division has the same divisor multiple times.
38
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
42
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
24794e79 45 by the same divisor. This is probably because modern processors
ac70caad 46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
49 this comment.
50
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
56
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
60
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
68
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
75
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
79
80 If we did this using domwalk.c, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
abacb398 86
87#include "config.h"
88#include "system.h"
89#include "coretypes.h"
9ef16211 90#include "backend.h"
7c29e30e 91#include "target.h"
92#include "rtl.h"
9ef16211 93#include "tree.h"
94#include "gimple.h"
7c29e30e 95#include "predict.h"
96#include "alloc-pool.h"
97#include "tree-pass.h"
9ef16211 98#include "ssa.h"
7c29e30e 99#include "optabs-tree.h"
100#include "gimple-pretty-print.h"
b20a8bb4 101#include "alias.h"
b20a8bb4 102#include "fold-const.h"
bc61cadb 103#include "gimple-fold.h"
dcf1a1ec 104#include "gimple-iterator.h"
470d5bb5 105#include "gimplify.h"
e795d6e1 106#include "gimplify-me.h"
9ed99284 107#include "stor-layout.h"
073c1fd5 108#include "tree-cfg.h"
073c1fd5 109#include "tree-dfa.h"
69ee5dbb 110#include "tree-ssa.h"
f7715905 111#include "builtins.h"
c3206272 112#include "params.h"
4cfd27a5 113#include "internal-fn.h"
fa0793ad 114#include "case-cfn-macros.h"
67f7b566 115#include "optabs-libfuncs.h"
116#include "tree-eh.h"
117#include "targhooks.h"
ed306e55 118#include "domwalk.h"
ac70caad 119
120/* This structure represents one basic block that either computes a
121 division, or is a common dominator for basic block that compute a
122 division. */
123struct occurrence {
124 /* The basic block represented by this structure. */
125 basic_block bb;
126
127 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
128 inserted in BB. */
129 tree recip_def;
130
472f3f23 131 /* If non-NULL, the SSA_NAME holding the definition for a squared
132 reciprocal inserted in BB. */
133 tree square_recip_def;
134
75a70cf9 135 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
ac70caad 136 was inserted in BB. */
42acab1c 137 gimple *recip_def_stmt;
ac70caad 138
139 /* Pointer to a list of "struct occurrence"s for blocks dominated
140 by BB. */
141 struct occurrence *children;
142
143 /* Pointer to the next "struct occurrence"s in the list of blocks
144 sharing a common dominator. */
145 struct occurrence *next;
146
147 /* The number of divisions that are in BB before compute_merit. The
148 number of divisions that are in BB or post-dominate it after
149 compute_merit. */
150 int num_divisions;
151
152 /* True if the basic block has a division, false if it is a common
153 dominator for basic blocks that do. If it is false and trapping
154 math is active, BB is not a candidate for inserting a reciprocal. */
155 bool bb_has_division;
156};
157
30c4e60d 158static struct
159{
160 /* Number of 1.0/X ops inserted. */
161 int rdivs_inserted;
162
163 /* Number of 1.0/FUNC ops inserted. */
164 int rfuncs_inserted;
165} reciprocal_stats;
166
167static struct
168{
169 /* Number of cexpi calls inserted. */
170 int inserted;
171} sincos_stats;
172
30c4e60d 173static struct
174{
175 /* Number of widening multiplication ops inserted. */
176 int widen_mults_inserted;
177
178 /* Number of integer multiply-and-accumulate ops inserted. */
179 int maccs_inserted;
180
181 /* Number of fp fused multiply-add ops inserted. */
182 int fmas_inserted;
67f7b566 183
184 /* Number of divmod calls inserted. */
185 int divmod_calls_inserted;
30c4e60d 186} widen_mul_stats;
ac70caad 187
188/* The instance of "struct occurrence" representing the highest
189 interesting block in the dominator tree. */
190static struct occurrence *occ_head;
191
192/* Allocation pool for getting instances of "struct occurrence". */
e16712b1 193static object_allocator<occurrence> *occ_pool;
ac70caad 194
195
196
197/* Allocate and return a new struct occurrence for basic block BB, and
198 whose children list is headed by CHILDREN. */
199static struct occurrence *
200occ_new (basic_block bb, struct occurrence *children)
abacb398 201{
ac70caad 202 struct occurrence *occ;
203
d8e7268c 204 bb->aux = occ = occ_pool->allocate ();
ac70caad 205 memset (occ, 0, sizeof (struct occurrence));
206
207 occ->bb = bb;
208 occ->children = children;
209 return occ;
abacb398 210}
211
ac70caad 212
213/* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
214 list of "struct occurrence"s, one per basic block, having IDOM as
215 their common dominator.
216
217 We try to insert NEW_OCC as deep as possible in the tree, and we also
218 insert any other block that is a common dominator for BB and one
219 block already in the tree. */
220
221static void
222insert_bb (struct occurrence *new_occ, basic_block idom,
223 struct occurrence **p_head)
9e583fac 224{
ac70caad 225 struct occurrence *occ, **p_occ;
9e583fac 226
ac70caad 227 for (p_occ = p_head; (occ = *p_occ) != NULL; )
228 {
229 basic_block bb = new_occ->bb, occ_bb = occ->bb;
230 basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
231 if (dom == bb)
232 {
233 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
234 from its list. */
235 *p_occ = occ->next;
236 occ->next = new_occ->children;
237 new_occ->children = occ;
238
239 /* Try the next block (it may as well be dominated by BB). */
240 }
241
242 else if (dom == occ_bb)
243 {
244 /* OCC_BB dominates BB. Tail recurse to look deeper. */
245 insert_bb (new_occ, dom, &occ->children);
246 return;
247 }
248
249 else if (dom != idom)
250 {
251 gcc_assert (!dom->aux);
252
253 /* There is a dominator between IDOM and BB, add it and make
254 two children out of NEW_OCC and OCC. First, remove OCC from
255 its list. */
256 *p_occ = occ->next;
257 new_occ->next = occ;
258 occ->next = NULL;
259
260 /* None of the previous blocks has DOM as a dominator: if we tail
261 recursed, we would reexamine them uselessly. Just switch BB with
262 DOM, and go on looking for blocks dominated by DOM. */
263 new_occ = occ_new (dom, new_occ);
264 }
265
266 else
267 {
268 /* Nothing special, go on with the next element. */
269 p_occ = &occ->next;
270 }
271 }
272
273 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
274 new_occ->next = *p_head;
275 *p_head = new_occ;
276}
277
472f3f23 278/* Register that we found a division in BB.
279 IMPORTANCE is a measure of how much weighting to give
280 that division. Use IMPORTANCE = 2 to register a single
281 division. If the division is going to be found multiple
282 times use 1 (as it is with squares). */
ac70caad 283
284static inline void
472f3f23 285register_division_in (basic_block bb, int importance)
ac70caad 286{
287 struct occurrence *occ;
288
289 occ = (struct occurrence *) bb->aux;
290 if (!occ)
291 {
292 occ = occ_new (bb, NULL);
34154e27 293 insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
ac70caad 294 }
295
296 occ->bb_has_division = true;
472f3f23 297 occ->num_divisions += importance;
ac70caad 298}
299
300
301/* Compute the number of divisions that postdominate each block in OCC and
302 its children. */
abacb398 303
abacb398 304static void
ac70caad 305compute_merit (struct occurrence *occ)
abacb398 306{
ac70caad 307 struct occurrence *occ_child;
308 basic_block dom = occ->bb;
abacb398 309
ac70caad 310 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
abacb398 311 {
ac70caad 312 basic_block bb;
313 if (occ_child->children)
314 compute_merit (occ_child);
315
316 if (flag_exceptions)
317 bb = single_noncomplex_succ (dom);
318 else
319 bb = dom;
320
321 if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
322 occ->num_divisions += occ_child->num_divisions;
323 }
324}
325
326
327/* Return whether USE_STMT is a floating-point division by DEF. */
328static inline bool
42acab1c 329is_division_by (gimple *use_stmt, tree def)
ac70caad 330{
75a70cf9 331 return is_gimple_assign (use_stmt)
332 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
333 && gimple_assign_rhs2 (use_stmt) == def
119368d7 334 /* Do not recognize x / x as valid division, as we are getting
335 confused later by replacing all immediate uses x in such
336 a stmt. */
61c8e77a 337 && gimple_assign_rhs1 (use_stmt) != def
338 && !stmt_can_throw_internal (cfun, use_stmt);
ac70caad 339}
340
3cb2785e 341/* Return TRUE if USE_STMT is a multiplication of DEF by A. */
472f3f23 342static inline bool
3cb2785e 343is_mult_by (gimple *use_stmt, tree def, tree a)
472f3f23 344{
345 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
346 && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
347 {
348 tree op0 = gimple_assign_rhs1 (use_stmt);
349 tree op1 = gimple_assign_rhs2 (use_stmt);
350
3cb2785e 351 return (op0 == def && op1 == a)
352 || (op0 == a && op1 == def);
472f3f23 353 }
354 return 0;
355}
356
3cb2785e 357/* Return whether USE_STMT is DEF * DEF. */
358static inline bool
359is_square_of (gimple *use_stmt, tree def)
360{
361 return is_mult_by (use_stmt, def, def);
362}
363
472f3f23 364/* Return whether USE_STMT is a floating-point division by
365 DEF * DEF. */
366static inline bool
367is_division_by_square (gimple *use_stmt, tree def)
368{
369 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
370 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
61c8e77a 371 && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
372 && !stmt_can_throw_internal (cfun, use_stmt))
472f3f23 373 {
374 tree denominator = gimple_assign_rhs2 (use_stmt);
375 if (TREE_CODE (denominator) == SSA_NAME)
61c8e77a 376 return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
472f3f23 377 }
378 return 0;
379}
380
ac70caad 381/* Walk the subset of the dominator tree rooted at OCC, setting the
382 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
383 the given basic block. The field may be left NULL, of course,
384 if it is not possible or profitable to do the optimization.
385
386 DEF_BSI is an iterator pointing at the statement defining DEF.
387 If RECIP_DEF is set, a dominator already has a computation that can
472f3f23 388 be used.
389
390 If should_insert_square_recip is set, then this also inserts
391 the square of the reciprocal immediately after the definition
392 of the reciprocal. */
ac70caad 393
394static void
75a70cf9 395insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
472f3f23 396 tree def, tree recip_def, tree square_recip_def,
397 int should_insert_square_recip, int threshold)
ac70caad 398{
75a70cf9 399 tree type;
472f3f23 400 gassign *new_stmt, *new_square_stmt;
75a70cf9 401 gimple_stmt_iterator gsi;
ac70caad 402 struct occurrence *occ_child;
403
404 if (!recip_def
405 && (occ->bb_has_division || !flag_trapping_math)
472f3f23 406 /* Divide by two as all divisions are counted twice in
407 the costing loop. */
408 && occ->num_divisions / 2 >= threshold)
ac70caad 409 {
410 /* Make a variable with the replacement and substitute it. */
411 type = TREE_TYPE (def);
072f7ab1 412 recip_def = create_tmp_reg (type, "reciptmp");
e9cf809e 413 new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
414 build_one_cst (type), def);
48e1416a 415
472f3f23 416 if (should_insert_square_recip)
417 {
418 square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
419 new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
420 recip_def, recip_def);
421 }
422
ac70caad 423 if (occ->bb_has_division)
472f3f23 424 {
425 /* Case 1: insert before an existing division. */
426 gsi = gsi_after_labels (occ->bb);
427 while (!gsi_end_p (gsi)
428 && (!is_division_by (gsi_stmt (gsi), def))
429 && (!is_division_by_square (gsi_stmt (gsi), def)))
75a70cf9 430 gsi_next (&gsi);
ac70caad 431
472f3f23 432 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
922f606b 433 if (should_insert_square_recip)
434 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
472f3f23 435 }
75a70cf9 436 else if (def_gsi && occ->bb == def_gsi->bb)
472f3f23 437 {
438 /* Case 2: insert right after the definition. Note that this will
ac70caad 439 never happen if the definition statement can throw, because in
440 that case the sole successor of the statement's basic block will
441 dominate all the uses as well. */
472f3f23 442 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
922f606b 443 if (should_insert_square_recip)
444 gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
472f3f23 445 }
ac70caad 446 else
472f3f23 447 {
448 /* Case 3: insert in a basic block not containing defs/uses. */
449 gsi = gsi_after_labels (occ->bb);
450 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
922f606b 451 if (should_insert_square_recip)
452 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
472f3f23 453 }
454
30c4e60d 455 reciprocal_stats.rdivs_inserted++;
456
ac70caad 457 occ->recip_def_stmt = new_stmt;
abacb398 458 }
459
ac70caad 460 occ->recip_def = recip_def;
472f3f23 461 occ->square_recip_def = square_recip_def;
ac70caad 462 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
472f3f23 463 insert_reciprocals (def_gsi, occ_child, def, recip_def,
464 square_recip_def, should_insert_square_recip,
465 threshold);
466}
467
468/* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
469 Take as argument the use for (x * x). */
470static inline void
471replace_reciprocal_squares (use_operand_p use_p)
472{
473 gimple *use_stmt = USE_STMT (use_p);
474 basic_block bb = gimple_bb (use_stmt);
475 struct occurrence *occ = (struct occurrence *) bb->aux;
476
477 if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
478 && occ->recip_def)
479 {
480 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
481 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
482 gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
483 SET_USE (use_p, occ->square_recip_def);
484 fold_stmt_inplace (&gsi);
485 update_stmt (use_stmt);
486 }
ac70caad 487}
488
489
490/* Replace the division at USE_P with a multiplication by the reciprocal, if
491 possible. */
492
493static inline void
494replace_reciprocal (use_operand_p use_p)
495{
42acab1c 496 gimple *use_stmt = USE_STMT (use_p);
75a70cf9 497 basic_block bb = gimple_bb (use_stmt);
ac70caad 498 struct occurrence *occ = (struct occurrence *) bb->aux;
499
0bfd8d5c 500 if (optimize_bb_for_speed_p (bb)
501 && occ->recip_def && use_stmt != occ->recip_def_stmt)
ac70caad 502 {
50aacf4c 503 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
75a70cf9 504 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
ac70caad 505 SET_USE (use_p, occ->recip_def);
50aacf4c 506 fold_stmt_inplace (&gsi);
ac70caad 507 update_stmt (use_stmt);
508 }
509}
510
511
512/* Free OCC and return one more "struct occurrence" to be freed. */
513
514static struct occurrence *
515free_bb (struct occurrence *occ)
516{
517 struct occurrence *child, *next;
518
519 /* First get the two pointers hanging off OCC. */
520 next = occ->next;
521 child = occ->children;
522 occ->bb->aux = NULL;
d8e7268c 523 occ_pool->remove (occ);
ac70caad 524
525 /* Now ensure that we don't recurse unless it is necessary. */
526 if (!child)
527 return next;
9e583fac 528 else
ac70caad 529 {
530 while (next)
531 next = free_bb (next);
532
533 return child;
534 }
535}
536
3cb2785e 537/* Transform sequences like
538 t = sqrt (a)
539 x = 1.0 / t;
540 r1 = x * x;
541 r2 = a * x;
542 into:
543 t = sqrt (a)
544 r1 = 1.0 / a;
545 r2 = t;
546 x = r1 * r2;
547 depending on the uses of x, r1, r2. This removes one multiplication and
548 allows the sqrt and division operations to execute in parallel.
549 DEF_GSI is the gsi of the initial division by sqrt that defines
4552b6fc 550 DEF (x in the example above). */
3cb2785e 551
552static void
553optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
554{
555 gimple *use_stmt;
556 imm_use_iterator use_iter;
557 gimple *stmt = gsi_stmt (*def_gsi);
558 tree x = def;
559 tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
560 tree div_rhs1 = gimple_assign_rhs1 (stmt);
561
562 if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
563 || TREE_CODE (div_rhs1) != REAL_CST
564 || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
565 return;
566
567 gcall *sqrt_stmt
568 = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
569
570 if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
571 return;
572
573 switch (gimple_call_combined_fn (sqrt_stmt))
574 {
575 CASE_CFN_SQRT:
576 CASE_CFN_SQRT_FN:
577 break;
578
579 default:
580 return;
581 }
582 tree a = gimple_call_arg (sqrt_stmt, 0);
583
584 /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
585
586 /* Statements that use x in x * x. */
587 auto_vec<gimple *> sqr_stmts;
588 /* Statements that use x in a * x. */
589 auto_vec<gimple *> mult_stmts;
590 bool has_other_use = false;
591 bool mult_on_main_path = false;
592
593 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
594 {
595 if (is_gimple_debug (use_stmt))
596 continue;
597 if (is_square_of (use_stmt, x))
598 {
599 sqr_stmts.safe_push (use_stmt);
600 if (gimple_bb (use_stmt) == gimple_bb (stmt))
601 mult_on_main_path = true;
602 }
603 else if (is_mult_by (use_stmt, x, a))
604 {
605 mult_stmts.safe_push (use_stmt);
606 if (gimple_bb (use_stmt) == gimple_bb (stmt))
607 mult_on_main_path = true;
608 }
609 else
610 has_other_use = true;
611 }
612
613 /* In the x * x and a * x cases we just rewire stmt operands or
614 remove multiplications. In the has_other_use case we introduce
615 a multiplication so make sure we don't introduce a multiplication
616 on a path where there was none. */
617 if (has_other_use && !mult_on_main_path)
618 return;
619
620 if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
621 return;
622
623 /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
624 to be able to compose it from the sqr and mult cases. */
625 if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
626 return;
627
628 if (dump_file)
629 {
630 fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
631 print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
632 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
633 fprintf (dump_file, "\n");
634 }
635
636 bool delete_div = !has_other_use;
637 tree sqr_ssa_name = NULL_TREE;
638 if (!sqr_stmts.is_empty ())
639 {
640 /* r1 = x * x. Transform the original
641 x = 1.0 / t
642 into
643 tmp1 = 1.0 / a
644 r1 = tmp1. */
645
646 sqr_ssa_name
647 = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
648
649 if (dump_file)
650 {
651 fprintf (dump_file, "Replacing original division\n");
652 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
653 fprintf (dump_file, "with new division\n");
654 }
b9feec79 655 stmt
656 = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
657 gimple_assign_rhs1 (stmt), a);
658 gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
659 gsi_remove (def_gsi, true);
660 *def_gsi = gsi_for_stmt (stmt);
3cb2785e 661 fold_stmt_inplace (def_gsi);
662 update_stmt (stmt);
663
664 if (dump_file)
665 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
666
667 delete_div = false;
668 gimple *sqr_stmt;
669 unsigned int i;
670 FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
671 {
672 gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
673 gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
674 update_stmt (sqr_stmt);
675 }
676 }
677 if (!mult_stmts.is_empty ())
678 {
679 /* r2 = a * x. Transform this into:
680 r2 = t (The original sqrt (a)). */
681 unsigned int i;
682 gimple *mult_stmt = NULL;
683 FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
684 {
685 gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
686
687 if (dump_file)
688 {
689 fprintf (dump_file, "Replacing squaring multiplication\n");
690 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
691 fprintf (dump_file, "with assignment\n");
692 }
693 gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
694 fold_stmt_inplace (&gsi2);
695 update_stmt (mult_stmt);
696 if (dump_file)
697 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
698 }
699 }
700
701 if (has_other_use)
702 {
703 /* Using the two temporaries tmp1, tmp2 from above
704 the original x is now:
705 x = tmp1 * tmp2. */
706 gcc_assert (orig_sqrt_ssa_name);
707 gcc_assert (sqr_ssa_name);
708
709 gimple *new_stmt
710 = gimple_build_assign (x, MULT_EXPR,
b9feec79 711 orig_sqrt_ssa_name, sqr_ssa_name);
3cb2785e 712 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
713 update_stmt (stmt);
714 }
715 else if (delete_div)
716 {
717 /* Remove the original division. */
718 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
719 gsi_remove (&gsi2, true);
720 release_defs (stmt);
721 }
b9feec79 722 else
723 release_ssa_name (x);
3cb2785e 724}
ac70caad 725
726/* Look for floating-point divisions among DEF's uses, and try to
727 replace them by multiplications with the reciprocal. Add
728 as many statements computing the reciprocal as needed.
729
730 DEF must be a GIMPLE register of a floating-point type. */
731
732static void
75a70cf9 733execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
ac70caad 734{
472f3f23 735 use_operand_p use_p, square_use_p;
736 imm_use_iterator use_iter, square_use_iter;
737 tree square_def;
ac70caad 738 struct occurrence *occ;
472f3f23 739 int count = 0;
740 int threshold;
741 int square_recip_count = 0;
742 int sqrt_recip_count = 0;
abacb398 743
56c4f422 744 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
472f3f23 745 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
746
41e37ac9 747 /* If DEF is a square (x * x), count the number of divisions by x.
748 If there are more divisions by x than by (DEF * DEF), prefer to optimize
749 the reciprocal of x instead of DEF. This improves cases like:
750 def = x * x
751 t0 = a / def
752 t1 = b / def
753 t2 = c / x
754 Reciprocal optimization of x results in 1 division rather than 2 or 3. */
755 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
756
757 if (is_gimple_assign (def_stmt)
758 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
759 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
760 && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
472f3f23 761 {
41e37ac9 762 tree op0 = gimple_assign_rhs1 (def_stmt);
472f3f23 763
41e37ac9 764 FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
472f3f23 765 {
41e37ac9 766 gimple *use_stmt = USE_STMT (use_p);
767 if (is_division_by (use_stmt, op0))
768 sqrt_recip_count++;
472f3f23 769 }
770 }
ac70caad 771
772 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
abacb398 773 {
42acab1c 774 gimple *use_stmt = USE_STMT (use_p);
ac70caad 775 if (is_division_by (use_stmt, def))
abacb398 776 {
472f3f23 777 register_division_in (gimple_bb (use_stmt), 2);
ac70caad 778 count++;
abacb398 779 }
472f3f23 780
781 if (is_square_of (use_stmt, def))
782 {
783 square_def = gimple_assign_lhs (use_stmt);
784 FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
785 {
786 gimple *square_use_stmt = USE_STMT (square_use_p);
787 if (is_division_by (square_use_stmt, square_def))
788 {
41e37ac9 789 /* This is executed twice for each division by a square. */
472f3f23 790 register_division_in (gimple_bb (square_use_stmt), 1);
41e37ac9 791 square_recip_count++;
472f3f23 792 }
793 }
794 }
abacb398 795 }
48e1416a 796
41e37ac9 797 /* Square reciprocals were counted twice above. */
472f3f23 798 square_recip_count /= 2;
799
41e37ac9 800 /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
472f3f23 801 if (sqrt_recip_count > square_recip_count)
d5e9136f 802 goto out;
472f3f23 803
ac70caad 804 /* Do the expensive part only if we can hope to optimize something. */
41e37ac9 805 if (count + square_recip_count >= threshold && count >= 1)
ac70caad 806 {
42acab1c 807 gimple *use_stmt;
ac70caad 808 for (occ = occ_head; occ; occ = occ->next)
809 {
810 compute_merit (occ);
472f3f23 811 insert_reciprocals (def_gsi, occ, def, NULL, NULL,
812 square_recip_count, threshold);
ac70caad 813 }
814
09aca5bc 815 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
ac70caad 816 {
ac70caad 817 if (is_division_by (use_stmt, def))
09aca5bc 818 {
819 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
820 replace_reciprocal (use_p);
821 }
41e37ac9 822 else if (square_recip_count > 0 && is_square_of (use_stmt, def))
472f3f23 823 {
824 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
825 {
826 /* Find all uses of the square that are divisions and
827 * replace them by multiplications with the inverse. */
828 imm_use_iterator square_iterator;
829 gimple *powmult_use_stmt = USE_STMT (use_p);
830 tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
831
832 FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
833 square_iterator, powmult_def_name)
834 FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
835 {
836 gimple *powmult_use_stmt = USE_STMT (square_use_p);
837 if (is_division_by (powmult_use_stmt, powmult_def_name))
838 replace_reciprocal_squares (square_use_p);
839 }
840 }
841 }
ac70caad 842 }
843 }
844
d5e9136f 845out:
ac70caad 846 for (occ = occ_head; occ; )
847 occ = free_bb (occ);
848
849 occ_head = NULL;
abacb398 850}
851
4cfd27a5 852/* Return an internal function that implements the reciprocal of CALL,
853 or IFN_LAST if there is no such function that the target supports. */
854
855internal_fn
856internal_fn_reciprocal (gcall *call)
857{
858 internal_fn ifn;
859
860 switch (gimple_call_combined_fn (call))
861 {
862 CASE_CFN_SQRT:
8c32188e 863 CASE_CFN_SQRT_FN:
4cfd27a5 864 ifn = IFN_RSQRT;
865 break;
866
867 default:
868 return IFN_LAST;
869 }
870
871 tree_pair types = direct_internal_fn_types (ifn, call);
872 if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
873 return IFN_LAST;
874
875 return ifn;
876}
877
ac70caad 878/* Go through all the floating-point SSA_NAMEs, and call
879 execute_cse_reciprocals_1 on each of them. */
65b0537f 880namespace {
881
882const pass_data pass_data_cse_reciprocals =
883{
884 GIMPLE_PASS, /* type */
885 "recip", /* name */
886 OPTGROUP_NONE, /* optinfo_flags */
8ed378fe 887 TV_TREE_RECIP, /* tv_id */
65b0537f 888 PROP_ssa, /* properties_required */
889 0, /* properties_provided */
890 0, /* properties_destroyed */
891 0, /* todo_flags_start */
8b88439e 892 TODO_update_ssa, /* todo_flags_finish */
65b0537f 893};
894
895class pass_cse_reciprocals : public gimple_opt_pass
896{
897public:
898 pass_cse_reciprocals (gcc::context *ctxt)
899 : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
900 {}
901
902 /* opt_pass methods: */
903 virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
904 virtual unsigned int execute (function *);
905
906}; // class pass_cse_reciprocals
907
908unsigned int
909pass_cse_reciprocals::execute (function *fun)
abacb398 910{
911 basic_block bb;
51b60a11 912 tree arg;
685b24f5 913
1dc6c44d 914 occ_pool = new object_allocator<occurrence> ("dominators for recip");
685b24f5 915
30c4e60d 916 memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
c136ae61 917 calculate_dominance_info (CDI_DOMINATORS);
918 calculate_dominance_info (CDI_POST_DOMINATORS);
ac70caad 919
382ecba7 920 if (flag_checking)
921 FOR_EACH_BB_FN (bb, fun)
922 gcc_assert (!bb->aux);
ac70caad 923
65b0537f 924 for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
c6dfe037 925 if (FLOAT_TYPE_P (TREE_TYPE (arg))
ac70caad 926 && is_gimple_reg (arg))
c6dfe037 927 {
65b0537f 928 tree name = ssa_default_def (fun, arg);
c6dfe037 929 if (name)
930 execute_cse_reciprocals_1 (NULL, name);
931 }
51b60a11 932
65b0537f 933 FOR_EACH_BB_FN (bb, fun)
abacb398 934 {
75a70cf9 935 tree def;
abacb398 936
1a91d914 937 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
938 gsi_next (&gsi))
abacb398 939 {
1a91d914 940 gphi *phi = gsi.phi ();
abacb398 941 def = PHI_RESULT (phi);
7c782c9b 942 if (! virtual_operand_p (def)
943 && FLOAT_TYPE_P (TREE_TYPE (def)))
ac70caad 944 execute_cse_reciprocals_1 (NULL, def);
abacb398 945 }
946
1a91d914 947 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
948 gsi_next (&gsi))
abacb398 949 {
42acab1c 950 gimple *stmt = gsi_stmt (gsi);
a0315874 951
75a70cf9 952 if (gimple_has_lhs (stmt)
abacb398 953 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
954 && FLOAT_TYPE_P (TREE_TYPE (def))
51b60a11 955 && TREE_CODE (def) == SSA_NAME)
3cb2785e 956 {
4552b6fc 957 execute_cse_reciprocals_1 (&gsi, def);
958 stmt = gsi_stmt (gsi);
3cb2785e 959 if (flag_unsafe_math_optimizations
960 && is_gimple_assign (stmt)
b9feec79 961 && gimple_assign_lhs (stmt) == def
aac19106 962 && !stmt_can_throw_internal (cfun, stmt)
3cb2785e 963 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
964 optimize_recip_sqrt (&gsi, def);
3cb2785e 965 }
abacb398 966 }
e174638f 967
0bfd8d5c 968 if (optimize_bb_for_size_p (bb))
969 continue;
970
e174638f 971 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
1a91d914 972 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
973 gsi_next (&gsi))
e174638f 974 {
42acab1c 975 gimple *stmt = gsi_stmt (gsi);
e174638f 976
75a70cf9 977 if (is_gimple_assign (stmt)
978 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
e174638f 979 {
75a70cf9 980 tree arg1 = gimple_assign_rhs2 (stmt);
42acab1c 981 gimple *stmt1;
2cd360b6 982
983 if (TREE_CODE (arg1) != SSA_NAME)
984 continue;
985
986 stmt1 = SSA_NAME_DEF_STMT (arg1);
e174638f 987
75a70cf9 988 if (is_gimple_call (stmt1)
4cfd27a5 989 && gimple_call_lhs (stmt1))
e174638f 990 {
851c1b0c 991 bool fail;
774b1cdd 992 imm_use_iterator ui;
993 use_operand_p use_p;
4cfd27a5 994 tree fndecl = NULL_TREE;
e174638f 995
4cfd27a5 996 gcall *call = as_a <gcall *> (stmt1);
997 internal_fn ifn = internal_fn_reciprocal (call);
998 if (ifn == IFN_LAST)
999 {
1000 fndecl = gimple_call_fndecl (call);
1001 if (!fndecl
a0e9bfbb 1002 || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
4cfd27a5 1003 continue;
1004 fndecl = targetm.builtin_reciprocal (fndecl);
1005 if (!fndecl)
1006 continue;
1007 }
e174638f 1008
774b1cdd 1009 /* Check that all uses of the SSA name are divisions,
1010 otherwise replacing the defining statement will do
1011 the wrong thing. */
1012 fail = false;
1013 FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1014 {
42acab1c 1015 gimple *stmt2 = USE_STMT (use_p);
774b1cdd 1016 if (is_gimple_debug (stmt2))
1017 continue;
1018 if (!is_gimple_assign (stmt2)
1019 || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1020 || gimple_assign_rhs1 (stmt2) == arg1
1021 || gimple_assign_rhs2 (stmt2) != arg1)
1022 {
1023 fail = true;
1024 break;
1025 }
1026 }
1027 if (fail)
1028 continue;
1029
4cfd27a5 1030 gimple_replace_ssa_lhs (call, arg1);
1031 if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
851c1b0c 1032 {
1033 auto_vec<tree, 4> args;
1034 for (unsigned int i = 0;
4cfd27a5 1035 i < gimple_call_num_args (call); i++)
1036 args.safe_push (gimple_call_arg (call, i));
1037 gcall *stmt2;
1038 if (ifn == IFN_LAST)
1039 stmt2 = gimple_build_call_vec (fndecl, args);
1040 else
1041 stmt2 = gimple_build_call_internal_vec (ifn, args);
851c1b0c 1042 gimple_call_set_lhs (stmt2, arg1);
4cfd27a5 1043 if (gimple_vdef (call))
851c1b0c 1044 {
4cfd27a5 1045 gimple_set_vdef (stmt2, gimple_vdef (call));
851c1b0c 1046 SSA_NAME_DEF_STMT (gimple_vdef (stmt2)) = stmt2;
1047 }
989f02dc 1048 gimple_call_set_nothrow (stmt2,
1049 gimple_call_nothrow_p (call));
4cfd27a5 1050 gimple_set_vuse (stmt2, gimple_vuse (call));
1051 gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
851c1b0c 1052 gsi_replace (&gsi2, stmt2, true);
1053 }
1054 else
1055 {
4cfd27a5 1056 if (ifn == IFN_LAST)
1057 gimple_call_set_fndecl (call, fndecl);
1058 else
1059 gimple_call_set_internal_fn (call, ifn);
1060 update_stmt (call);
851c1b0c 1061 }
30c4e60d 1062 reciprocal_stats.rfuncs_inserted++;
e174638f 1063
774b1cdd 1064 FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1065 {
50aacf4c 1066 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
774b1cdd 1067 gimple_assign_set_rhs_code (stmt, MULT_EXPR);
50aacf4c 1068 fold_stmt_inplace (&gsi);
774b1cdd 1069 update_stmt (stmt);
1070 }
e174638f 1071 }
1072 }
1073 }
abacb398 1074 }
685b24f5 1075
65b0537f 1076 statistics_counter_event (fun, "reciprocal divs inserted",
30c4e60d 1077 reciprocal_stats.rdivs_inserted);
65b0537f 1078 statistics_counter_event (fun, "reciprocal functions inserted",
30c4e60d 1079 reciprocal_stats.rfuncs_inserted);
1080
c136ae61 1081 free_dominance_info (CDI_DOMINATORS);
1082 free_dominance_info (CDI_POST_DOMINATORS);
d8e7268c 1083 delete occ_pool;
2a1990e9 1084 return 0;
abacb398 1085}
1086
cbe8bda8 1087} // anon namespace
1088
1089gimple_opt_pass *
1090make_pass_cse_reciprocals (gcc::context *ctxt)
1091{
1092 return new pass_cse_reciprocals (ctxt);
1093}
1094
0d424440 1095/* Records an occurrence at statement USE_STMT in the vector of trees
a0315874 1096 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
0d424440 1097 is not yet initialized. Returns true if the occurrence was pushed on
a0315874 1098 the vector. Adjusts *TOP_BB to be the basic block dominating all
1099 statements in the vector. */
1100
1101static bool
42acab1c 1102maybe_record_sincos (vec<gimple *> *stmts,
1103 basic_block *top_bb, gimple *use_stmt)
a0315874 1104{
75a70cf9 1105 basic_block use_bb = gimple_bb (use_stmt);
a0315874 1106 if (*top_bb
1107 && (*top_bb == use_bb
1108 || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
f1f41a6c 1109 stmts->safe_push (use_stmt);
a0315874 1110 else if (!*top_bb
1111 || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1112 {
f1f41a6c 1113 stmts->safe_push (use_stmt);
a0315874 1114 *top_bb = use_bb;
1115 }
1116 else
1117 return false;
1118
1119 return true;
1120}
1121
1122/* Look for sin, cos and cexpi calls with the same argument NAME and
1123 create a single call to cexpi CSEing the result in this case.
1124 We first walk over all immediate uses of the argument collecting
1125 statements that we can CSE in a vector and in a second pass replace
1126 the statement rhs with a REALPART or IMAGPART expression on the
1127 result of the cexpi call we insert before the use statement that
1128 dominates all other candidates. */
1129
4c80086d 1130static bool
a0315874 1131execute_cse_sincos_1 (tree name)
1132{
75a70cf9 1133 gimple_stmt_iterator gsi;
a0315874 1134 imm_use_iterator use_iter;
75a70cf9 1135 tree fndecl, res, type;
42acab1c 1136 gimple *def_stmt, *use_stmt, *stmt;
a0315874 1137 int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
42acab1c 1138 auto_vec<gimple *> stmts;
a0315874 1139 basic_block top_bb = NULL;
1140 int i;
4c80086d 1141 bool cfg_changed = false;
a0315874 1142
1143 type = TREE_TYPE (name);
1144 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1145 {
75a70cf9 1146 if (gimple_code (use_stmt) != GIMPLE_CALL
fa0793ad 1147 || !gimple_call_lhs (use_stmt))
a0315874 1148 continue;
1149
fa0793ad 1150 switch (gimple_call_combined_fn (use_stmt))
a0315874 1151 {
fa0793ad 1152 CASE_CFN_COS:
a0315874 1153 seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1154 break;
1155
fa0793ad 1156 CASE_CFN_SIN:
a0315874 1157 seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1158 break;
1159
fa0793ad 1160 CASE_CFN_CEXPI:
a0315874 1161 seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1162 break;
1163
1164 default:;
1165 }
1166 }
1167
1168 if (seen_cos + seen_sin + seen_cexpi <= 1)
6702d09a 1169 return false;
a0315874 1170
1171 /* Simply insert cexpi at the beginning of top_bb but not earlier than
1172 the name def statement. */
1173 fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1174 if (!fndecl)
4c80086d 1175 return false;
75a70cf9 1176 stmt = gimple_build_call (fndecl, 1, name);
03d37e4e 1177 res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
75a70cf9 1178 gimple_call_set_lhs (stmt, res);
1179
a0315874 1180 def_stmt = SSA_NAME_DEF_STMT (name);
8090c12d 1181 if (!SSA_NAME_IS_DEFAULT_DEF (name)
75a70cf9 1182 && gimple_code (def_stmt) != GIMPLE_PHI
1183 && gimple_bb (def_stmt) == top_bb)
a0315874 1184 {
75a70cf9 1185 gsi = gsi_for_stmt (def_stmt);
1186 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
a0315874 1187 }
1188 else
1189 {
75a70cf9 1190 gsi = gsi_after_labels (top_bb);
1191 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
a0315874 1192 }
30c4e60d 1193 sincos_stats.inserted++;
a0315874 1194
1195 /* And adjust the recorded old call sites. */
f1f41a6c 1196 for (i = 0; stmts.iterate (i, &use_stmt); ++i)
a0315874 1197 {
75a70cf9 1198 tree rhs = NULL;
75a70cf9 1199
fa0793ad 1200 switch (gimple_call_combined_fn (use_stmt))
a0315874 1201 {
fa0793ad 1202 CASE_CFN_COS:
75a70cf9 1203 rhs = fold_build1 (REALPART_EXPR, type, res);
a0315874 1204 break;
1205
fa0793ad 1206 CASE_CFN_SIN:
75a70cf9 1207 rhs = fold_build1 (IMAGPART_EXPR, type, res);
a0315874 1208 break;
1209
fa0793ad 1210 CASE_CFN_CEXPI:
75a70cf9 1211 rhs = res;
a0315874 1212 break;
1213
1214 default:;
1215 gcc_unreachable ();
1216 }
1217
75a70cf9 1218 /* Replace call with a copy. */
1219 stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1220
1221 gsi = gsi_for_stmt (use_stmt);
4c80086d 1222 gsi_replace (&gsi, stmt, true);
1223 if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1224 cfg_changed = true;
a0315874 1225 }
1226
4c80086d 1227 return cfg_changed;
a0315874 1228}
1229
e9a6c4bc 1230/* To evaluate powi(x,n), the floating point value x raised to the
1231 constant integer exponent n, we use a hybrid algorithm that
1232 combines the "window method" with look-up tables. For an
1233 introduction to exponentiation algorithms and "addition chains",
1234 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1235 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1236 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1237 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1238
1239/* Provide a default value for POWI_MAX_MULTS, the maximum number of
1240 multiplications to inline before calling the system library's pow
1241 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1242 so this default never requires calling pow, powf or powl. */
1243
1244#ifndef POWI_MAX_MULTS
1245#define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1246#endif
1247
1248/* The size of the "optimal power tree" lookup table. All
1249 exponents less than this value are simply looked up in the
1250 powi_table below. This threshold is also used to size the
1251 cache of pseudo registers that hold intermediate results. */
1252#define POWI_TABLE_SIZE 256
1253
1254/* The size, in bits of the window, used in the "window method"
1255 exponentiation algorithm. This is equivalent to a radix of
1256 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1257#define POWI_WINDOW_SIZE 3
1258
1259/* The following table is an efficient representation of an
1260 "optimal power tree". For each value, i, the corresponding
1261 value, j, in the table states than an optimal evaluation
1262 sequence for calculating pow(x,i) can be found by evaluating
1263 pow(x,j)*pow(x,i-j). An optimal power tree for the first
1264 100 integers is given in Knuth's "Seminumerical algorithms". */
1265
1266static const unsigned char powi_table[POWI_TABLE_SIZE] =
1267 {
1268 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1269 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1270 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1271 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1272 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1273 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1274 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1275 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1276 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1277 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1278 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1279 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1280 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1281 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1282 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1283 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1284 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1285 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1286 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1287 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1288 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1289 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1290 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1291 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1292 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1293 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1294 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1295 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1296 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1297 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1298 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1299 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1300 };
1301
1302
1303/* Return the number of multiplications required to calculate
1304 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1305 subroutine of powi_cost. CACHE is an array indicating
1306 which exponents have already been calculated. */
1307
1308static int
1309powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1310{
1311 /* If we've already calculated this exponent, then this evaluation
1312 doesn't require any additional multiplications. */
1313 if (cache[n])
1314 return 0;
1315
1316 cache[n] = true;
1317 return powi_lookup_cost (n - powi_table[n], cache)
1318 + powi_lookup_cost (powi_table[n], cache) + 1;
1319}
1320
1321/* Return the number of multiplications required to calculate
1322 powi(x,n) for an arbitrary x, given the exponent N. This
1323 function needs to be kept in sync with powi_as_mults below. */
1324
1325static int
1326powi_cost (HOST_WIDE_INT n)
1327{
1328 bool cache[POWI_TABLE_SIZE];
1329 unsigned HOST_WIDE_INT digit;
1330 unsigned HOST_WIDE_INT val;
1331 int result;
1332
1333 if (n == 0)
1334 return 0;
1335
1336 /* Ignore the reciprocal when calculating the cost. */
1337 val = (n < 0) ? -n : n;
1338
1339 /* Initialize the exponent cache. */
1340 memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1341 cache[1] = true;
1342
1343 result = 0;
1344
1345 while (val >= POWI_TABLE_SIZE)
1346 {
1347 if (val & 1)
1348 {
1349 digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1350 result += powi_lookup_cost (digit, cache)
1351 + POWI_WINDOW_SIZE + 1;
1352 val >>= POWI_WINDOW_SIZE;
1353 }
1354 else
1355 {
1356 val >>= 1;
1357 result++;
1358 }
1359 }
1360
1361 return result + powi_lookup_cost (val, cache);
1362}
1363
1364/* Recursive subroutine of powi_as_mults. This function takes the
1365 array, CACHE, of already calculated exponents and an exponent N and
1366 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1367
1368static tree
1369powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
03d37e4e 1370 HOST_WIDE_INT n, tree *cache)
e9a6c4bc 1371{
1372 tree op0, op1, ssa_target;
1373 unsigned HOST_WIDE_INT digit;
1a91d914 1374 gassign *mult_stmt;
e9a6c4bc 1375
1376 if (n < POWI_TABLE_SIZE && cache[n])
1377 return cache[n];
1378
03d37e4e 1379 ssa_target = make_temp_ssa_name (type, NULL, "powmult");
e9a6c4bc 1380
1381 if (n < POWI_TABLE_SIZE)
1382 {
1383 cache[n] = ssa_target;
03d37e4e 1384 op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1385 op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
e9a6c4bc 1386 }
1387 else if (n & 1)
1388 {
1389 digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
03d37e4e 1390 op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1391 op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
e9a6c4bc 1392 }
1393 else
1394 {
03d37e4e 1395 op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
e9a6c4bc 1396 op1 = op0;
1397 }
1398
e9cf809e 1399 mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
ae43b05e 1400 gimple_set_location (mult_stmt, loc);
e9a6c4bc 1401 gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1402
1403 return ssa_target;
1404}
1405
1406/* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1407 This function needs to be kept in sync with powi_cost above. */
1408
1409static tree
1410powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1411 tree arg0, HOST_WIDE_INT n)
1412{
03d37e4e 1413 tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1a91d914 1414 gassign *div_stmt;
03d37e4e 1415 tree target;
e9a6c4bc 1416
1417 if (n == 0)
1418 return build_real (type, dconst1);
1419
1420 memset (cache, 0, sizeof (cache));
1421 cache[1] = arg0;
1422
03d37e4e 1423 result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
e9a6c4bc 1424 if (n >= 0)
1425 return result;
1426
1427 /* If the original exponent was negative, reciprocate the result. */
03d37e4e 1428 target = make_temp_ssa_name (type, NULL, "powmult");
e9cf809e 1429 div_stmt = gimple_build_assign (target, RDIV_EXPR,
1430 build_real (type, dconst1), result);
ae43b05e 1431 gimple_set_location (div_stmt, loc);
e9a6c4bc 1432 gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1433
1434 return target;
1435}
1436
1437/* ARG0 and N are the two arguments to a powi builtin in GSI with
1438 location info LOC. If the arguments are appropriate, create an
1439 equivalent sequence of statements prior to GSI using an optimal
1440 number of multiplications, and return an expession holding the
1441 result. */
1442
1443static tree
1444gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1445 tree arg0, HOST_WIDE_INT n)
1446{
1447 /* Avoid largest negative number. */
1448 if (n != -n
1449 && ((n >= -1 && n <= 2)
1450 || (optimize_function_for_speed_p (cfun)
1451 && powi_cost (n) <= POWI_MAX_MULTS)))
1452 return powi_as_mults (gsi, loc, arg0, n);
1453
1454 return NULL_TREE;
1455}
1456
ae43b05e 1457/* Build a gimple call statement that calls FN with argument ARG.
03d37e4e 1458 Set the lhs of the call statement to a fresh SSA name. Insert the
ae43b05e 1459 statement prior to GSI's current position, and return the fresh
1460 SSA name. */
1461
1462static tree
ca12eb68 1463build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
03d37e4e 1464 tree fn, tree arg)
ae43b05e 1465{
1a91d914 1466 gcall *call_stmt;
ae43b05e 1467 tree ssa_target;
1468
ae43b05e 1469 call_stmt = gimple_build_call (fn, 1, arg);
03d37e4e 1470 ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
ae43b05e 1471 gimple_set_lhs (call_stmt, ssa_target);
1472 gimple_set_location (call_stmt, loc);
1473 gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1474
1475 return ssa_target;
1476}
1477
ca12eb68 1478/* Build a gimple binary operation with the given CODE and arguments
1479 ARG0, ARG1, assigning the result to a new SSA name for variable
1480 TARGET. Insert the statement prior to GSI's current position, and
1481 return the fresh SSA name.*/
1482
1483static tree
1484build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
03d37e4e 1485 const char *name, enum tree_code code,
1486 tree arg0, tree arg1)
ca12eb68 1487{
03d37e4e 1488 tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
e9cf809e 1489 gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
ca12eb68 1490 gimple_set_location (stmt, loc);
1491 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1492 return result;
1493}
1494
a5c384c1 1495/* Build a gimple reference operation with the given CODE and argument
03d37e4e 1496 ARG, assigning the result to a new SSA name of TYPE with NAME.
a5c384c1 1497 Insert the statement prior to GSI's current position, and return
1498 the fresh SSA name. */
1499
1500static inline tree
1501build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
03d37e4e 1502 const char *name, enum tree_code code, tree arg0)
a5c384c1 1503{
03d37e4e 1504 tree result = make_temp_ssa_name (type, NULL, name);
42acab1c 1505 gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
a5c384c1 1506 gimple_set_location (stmt, loc);
1507 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1508 return result;
1509}
1510
03d37e4e 1511/* Build a gimple assignment to cast VAL to TYPE. Insert the statement
aff5fb4d 1512 prior to GSI's current position, and return the fresh SSA name. */
1513
1514static tree
1515build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
03d37e4e 1516 tree type, tree val)
aff5fb4d 1517{
f9e245b2 1518 tree result = make_ssa_name (type);
e9cf809e 1519 gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
03d37e4e 1520 gimple_set_location (stmt, loc);
1521 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1522 return result;
aff5fb4d 1523}
1524
c3206272 1525struct pow_synth_sqrt_info
1526{
1527 bool *factors;
1528 unsigned int deepest;
1529 unsigned int num_mults;
1530};
1531
1532/* Return true iff the real value C can be represented as a
1533 sum of powers of 0.5 up to N. That is:
1534 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1535 Record in INFO the various parameters of the synthesis algorithm such
1536 as the factors a[i], the maximum 0.5 power and the number of
1537 multiplications that will be required. */
1538
1539bool
1540representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1541 struct pow_synth_sqrt_info *info)
1542{
1543 REAL_VALUE_TYPE factor = dconsthalf;
1544 REAL_VALUE_TYPE remainder = c;
1545
1546 info->deepest = 0;
1547 info->num_mults = 0;
1548 memset (info->factors, 0, n * sizeof (bool));
1549
1550 for (unsigned i = 0; i < n; i++)
1551 {
1552 REAL_VALUE_TYPE res;
1553
1554 /* If something inexact happened bail out now. */
f2ad9e38 1555 if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
c3206272 1556 return false;
1557
1558 /* We have hit zero. The number is representable as a sum
1559 of powers of 0.5. */
20cb53c9 1560 if (real_equal (&res, &dconst0))
c3206272 1561 {
1562 info->factors[i] = true;
1563 info->deepest = i + 1;
1564 return true;
1565 }
1566 else if (!REAL_VALUE_NEGATIVE (res))
1567 {
1568 remainder = res;
1569 info->factors[i] = true;
1570 info->num_mults++;
1571 }
1572 else
1573 info->factors[i] = false;
1574
f2ad9e38 1575 real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
c3206272 1576 }
1577 return false;
1578}
1579
1580/* Return the tree corresponding to FN being applied
1581 to ARG N times at GSI and LOC.
1582 Look up previous results from CACHE if need be.
1583 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1584
1585static tree
1586get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1587 tree fn, location_t loc, tree *cache)
1588{
1589 tree res = cache[n];
1590 if (!res)
1591 {
1592 tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1593 res = build_and_insert_call (gsi, loc, fn, prev);
1594 cache[n] = res;
1595 }
1596
1597 return res;
1598}
1599
1600/* Print to STREAM the repeated application of function FNAME to ARG
1601 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1602 "foo (foo (x))". */
1603
1604static void
1605print_nested_fn (FILE* stream, const char *fname, const char* arg,
1606 unsigned int n)
1607{
1608 if (n == 0)
1609 fprintf (stream, "%s", arg);
1610 else
1611 {
1612 fprintf (stream, "%s (", fname);
1613 print_nested_fn (stream, fname, arg, n - 1);
1614 fprintf (stream, ")");
1615 }
1616}
1617
1618/* Print to STREAM the fractional sequence of sqrt chains
1619 applied to ARG, described by INFO. Used for the dump file. */
1620
1621static void
1622dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1623 struct pow_synth_sqrt_info *info)
1624{
1625 for (unsigned int i = 0; i < info->deepest; i++)
1626 {
1627 bool is_set = info->factors[i];
1628 if (is_set)
1629 {
1630 print_nested_fn (stream, "sqrt", arg, i + 1);
1631 if (i != info->deepest - 1)
1632 fprintf (stream, " * ");
1633 }
1634 }
1635}
1636
1637/* Print to STREAM a representation of raising ARG to an integer
1638 power N. Used for the dump file. */
1639
1640static void
1641dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1642{
1643 if (n > 1)
1644 fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1645 else if (n == 1)
1646 fprintf (stream, "%s", arg);
1647}
1648
1649/* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1650 square roots. Place at GSI and LOC. Limit the maximum depth
1651 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1652 result of the expanded sequence or NULL_TREE if the expansion failed.
1653
1654 This routine assumes that ARG1 is a real number with a fractional part
1655 (the integer exponent case will have been handled earlier in
1656 gimple_expand_builtin_pow).
1657
1658 For ARG1 > 0.0:
1659 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1660 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1661 FRAC_PART == ARG1 - WHOLE_PART:
1662 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1663 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1664 if it can be expressed as such, that is if FRAC_PART satisfies:
1665 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1666 where integer a[i] is either 0 or 1.
1667
1668 Example:
1669 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1670 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1671
1672 For ARG1 < 0.0 there are two approaches:
1673 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1674 is calculated as above.
1675
1676 Example:
1677 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1678 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1679
1680 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1681 FRAC_PART := ARG1 - WHOLE_PART
1682 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1683 Example:
1684 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1685 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1686
1687 For ARG1 < 0.0 we choose between (A) and (B) depending on
1688 how many multiplications we'd have to do.
1689 So, for the example in (B): POW (x, -5.875), if we were to
1690 follow algorithm (A) we would produce:
1691 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1692 which contains more multiplications than approach (B).
1693
1694 Hopefully, this approach will eliminate potentially expensive POW library
1695 calls when unsafe floating point math is enabled and allow the compiler to
1696 further optimise the multiplies, square roots and divides produced by this
1697 function. */
1698
1699static tree
1700expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1701 tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1702{
1703 tree type = TREE_TYPE (arg0);
1704 machine_mode mode = TYPE_MODE (type);
1705 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1706 bool one_over = true;
1707
1708 if (!sqrtfn)
1709 return NULL_TREE;
1710
1711 if (TREE_CODE (arg1) != REAL_CST)
1712 return NULL_TREE;
1713
1714 REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1715
1716 gcc_assert (max_depth > 0);
1717 tree *cache = XALLOCAVEC (tree, max_depth + 1);
1718
1719 struct pow_synth_sqrt_info synth_info;
1720 synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1721 synth_info.deepest = 0;
1722 synth_info.num_mults = 0;
1723
1724 bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1725 REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1726
1727 /* The whole and fractional parts of exp. */
1728 REAL_VALUE_TYPE whole_part;
1729 REAL_VALUE_TYPE frac_part;
1730
1731 real_floor (&whole_part, mode, &exp);
f2ad9e38 1732 real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
c3206272 1733
1734
1735 REAL_VALUE_TYPE ceil_whole = dconst0;
1736 REAL_VALUE_TYPE ceil_fract = dconst0;
1737
1738 if (neg_exp)
1739 {
1740 real_ceil (&ceil_whole, mode, &exp);
f2ad9e38 1741 real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
c3206272 1742 }
1743
1744 if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1745 return NULL_TREE;
1746
1747 /* Check whether it's more profitable to not use 1.0 / ... */
1748 if (neg_exp)
1749 {
1750 struct pow_synth_sqrt_info alt_synth_info;
1751 alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1752 alt_synth_info.deepest = 0;
1753 alt_synth_info.num_mults = 0;
1754
1755 if (representable_as_half_series_p (ceil_fract, max_depth,
1756 &alt_synth_info)
1757 && alt_synth_info.deepest <= synth_info.deepest
1758 && alt_synth_info.num_mults < synth_info.num_mults)
1759 {
1760 whole_part = ceil_whole;
1761 frac_part = ceil_fract;
1762 synth_info.deepest = alt_synth_info.deepest;
1763 synth_info.num_mults = alt_synth_info.num_mults;
1764 memcpy (synth_info.factors, alt_synth_info.factors,
1765 (max_depth + 1) * sizeof (bool));
1766 one_over = false;
1767 }
1768 }
1769
1770 HOST_WIDE_INT n = real_to_integer (&whole_part);
1771 REAL_VALUE_TYPE cint;
1772 real_from_integer (&cint, VOIDmode, n, SIGNED);
1773
1774 if (!real_identical (&whole_part, &cint))
1775 return NULL_TREE;
1776
1777 if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1778 return NULL_TREE;
1779
1780 memset (cache, 0, (max_depth + 1) * sizeof (tree));
1781
1782 tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1783
1784 /* Calculate the integer part of the exponent. */
1785 if (n > 1)
1786 {
1787 integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1788 if (!integer_res)
1789 return NULL_TREE;
1790 }
1791
1792 if (dump_file)
1793 {
1794 char string[64];
1795
1796 real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1797 fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1798
1799 if (neg_exp)
1800 {
1801 if (one_over)
1802 {
1803 fprintf (dump_file, "1.0 / (");
1804 dump_integer_part (dump_file, "x", n);
1805 if (n > 0)
1806 fprintf (dump_file, " * ");
1807 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1808 fprintf (dump_file, ")");
1809 }
1810 else
1811 {
1812 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1813 fprintf (dump_file, " / (");
1814 dump_integer_part (dump_file, "x", n);
1815 fprintf (dump_file, ")");
1816 }
1817 }
1818 else
1819 {
1820 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1821 if (n > 0)
1822 fprintf (dump_file, " * ");
1823 dump_integer_part (dump_file, "x", n);
1824 }
1825
1826 fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1827 }
1828
1829
1830 tree fract_res = NULL_TREE;
1831 cache[0] = arg0;
1832
1833 /* Calculate the fractional part of the exponent. */
1834 for (unsigned i = 0; i < synth_info.deepest; i++)
1835 {
1836 if (synth_info.factors[i])
1837 {
1838 tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1839
1840 if (!fract_res)
1841 fract_res = sqrt_chain;
1842
1843 else
1844 fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1845 fract_res, sqrt_chain);
1846 }
1847 }
1848
1849 tree res = NULL_TREE;
1850
1851 if (neg_exp)
1852 {
1853 if (one_over)
1854 {
1855 if (n > 0)
1856 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1857 fract_res, integer_res);
1858 else
1859 res = fract_res;
1860
1861 res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1862 build_real (type, dconst1), res);
1863 }
1864 else
1865 {
1866 res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1867 fract_res, integer_res);
1868 }
1869 }
1870 else
1871 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1872 fract_res, integer_res);
1873 return res;
1874}
1875
e78306af 1876/* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1877 with location info LOC. If possible, create an equivalent and
1878 less expensive sequence of statements prior to GSI, and return an
1879 expession holding the result. */
1880
1881static tree
1882gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1883 tree arg0, tree arg1)
1884{
c3206272 1885 REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
ca12eb68 1886 REAL_VALUE_TYPE c2, dconst3;
e78306af 1887 HOST_WIDE_INT n;
c3206272 1888 tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
3754d046 1889 machine_mode mode;
c3206272 1890 bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
0190fe95 1891 bool hw_sqrt_exists, c_is_int, c2_is_int;
e78306af 1892
c3206272 1893 dconst1_4 = dconst1;
1894 SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1895
e78306af 1896 /* If the exponent isn't a constant, there's nothing of interest
1897 to be done. */
1898 if (TREE_CODE (arg1) != REAL_CST)
1899 return NULL_TREE;
1900
9f27d92a 1901 /* Don't perform the operation if flag_signaling_nans is on
1902 and the operand is a signaling NaN. */
1903 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2a659064 1904 && ((TREE_CODE (arg0) == REAL_CST
1905 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
9f27d92a 1906 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
1907 return NULL_TREE;
1908
ae43b05e 1909 /* If the exponent is equivalent to an integer, expand to an optimal
1910 multiplication sequence when profitable. */
e78306af 1911 c = TREE_REAL_CST (arg1);
1912 n = real_to_integer (&c);
e913b5cd 1913 real_from_integer (&cint, VOIDmode, n, SIGNED);
0190fe95 1914 c_is_int = real_identical (&c, &cint);
e78306af 1915
0190fe95 1916 if (c_is_int
e78306af 1917 && ((n >= -1 && n <= 2)
1918 || (flag_unsafe_math_optimizations
c3206272 1919 && speed_p
e78306af 1920 && powi_cost (n) <= POWI_MAX_MULTS)))
1921 return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1922
ae43b05e 1923 /* Attempt various optimizations using sqrt and cbrt. */
1924 type = TREE_TYPE (arg0);
1925 mode = TYPE_MODE (type);
1926 sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1927
1928 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
1929 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
1930 sqrt(-0) = -0. */
1931 if (sqrtfn
20cb53c9 1932 && real_equal (&c, &dconsthalf)
ae43b05e 1933 && !HONOR_SIGNED_ZEROS (mode))
03d37e4e 1934 return build_and_insert_call (gsi, loc, sqrtfn, arg0);
ae43b05e 1935
a5c384c1 1936 hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
ae43b05e 1937
ae43b05e 1938 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
1939 optimizations since 1./3. is not exactly representable. If x
1940 is negative and finite, the correct value of pow(x,1./3.) is
1941 a NaN with the "invalid" exception raised, because the value
1942 of 1./3. actually has an even denominator. The correct value
1943 of cbrt(x) is a negative real value. */
1944 cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1945 dconst1_3 = real_value_truncate (mode, dconst_third ());
1946
1947 if (flag_unsafe_math_optimizations
1948 && cbrtfn
ee230333 1949 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
20cb53c9 1950 && real_equal (&c, &dconst1_3))
03d37e4e 1951 return build_and_insert_call (gsi, loc, cbrtfn, arg0);
ae43b05e 1952
1953 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
1954 if we don't have a hardware sqrt insn. */
1955 dconst1_6 = dconst1_3;
1956 SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1957
1958 if (flag_unsafe_math_optimizations
1959 && sqrtfn
1960 && cbrtfn
ee230333 1961 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
c3206272 1962 && speed_p
ae43b05e 1963 && hw_sqrt_exists
20cb53c9 1964 && real_equal (&c, &dconst1_6))
ae43b05e 1965 {
1966 /* sqrt(x) */
03d37e4e 1967 sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
ae43b05e 1968
1969 /* cbrt(sqrt(x)) */
03d37e4e 1970 return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
ca12eb68 1971 }
1972
ca12eb68 1973
c3206272 1974 /* Attempt to expand the POW as a product of square root chains.
1975 Expand the 0.25 case even when otpimising for size. */
ca12eb68 1976 if (flag_unsafe_math_optimizations
1977 && sqrtfn
c3206272 1978 && hw_sqrt_exists
20cb53c9 1979 && (speed_p || real_equal (&c, &dconst1_4))
c3206272 1980 && !HONOR_SIGNED_ZEROS (mode))
ca12eb68 1981 {
c3206272 1982 unsigned int max_depth = speed_p
1983 ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH)
1984 : 2;
ca12eb68 1985
c3206272 1986 tree expand_with_sqrts
1987 = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
ca12eb68 1988
c3206272 1989 if (expand_with_sqrts)
1990 return expand_with_sqrts;
ca12eb68 1991 }
1992
c3206272 1993 real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1994 n = real_to_integer (&c2);
1995 real_from_integer (&cint, VOIDmode, n, SIGNED);
1996 c2_is_int = real_identical (&c2, &cint);
1997
ca12eb68 1998 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1999
2000 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2001 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2002
2003 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2004 different from pow(x, 1./3.) due to rounding and behavior with
2005 negative x, we need to constrain this transformation to unsafe
2006 math and positive x or finite math. */
e913b5cd 2007 real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
ca12eb68 2008 real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2009 real_round (&c2, mode, &c2);
2010 n = real_to_integer (&c2);
e913b5cd 2011 real_from_integer (&cint, VOIDmode, n, SIGNED);
ca12eb68 2012 real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2013 real_convert (&c2, mode, &c2);
2014
2015 if (flag_unsafe_math_optimizations
2016 && cbrtfn
ee230333 2017 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
ca12eb68 2018 && real_identical (&c2, &c)
0190fe95 2019 && !c2_is_int
ca12eb68 2020 && optimize_function_for_speed_p (cfun)
2021 && powi_cost (n / 3) <= POWI_MAX_MULTS)
2022 {
2023 tree powi_x_ndiv3 = NULL_TREE;
2024
2025 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2026 possible or profitable, give up. Skip the degenerate case when
2027 abs(n) < 3, where the result is always 1. */
b1757d46 2028 if (absu_hwi (n) >= 3)
ca12eb68 2029 {
2030 powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
5ebd604f 2031 abs_hwi (n / 3));
ca12eb68 2032 if (!powi_x_ndiv3)
2033 return NULL_TREE;
2034 }
2035
2036 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2037 as that creates an unnecessary variable. Instead, just produce
2038 either cbrt(x) or cbrt(x) * cbrt(x). */
03d37e4e 2039 cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
ca12eb68 2040
b1757d46 2041 if (absu_hwi (n) % 3 == 1)
ca12eb68 2042 powi_cbrt_x = cbrt_x;
2043 else
03d37e4e 2044 powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
ca12eb68 2045 cbrt_x, cbrt_x);
2046
2047 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
b1757d46 2048 if (absu_hwi (n) < 3)
ca12eb68 2049 result = powi_cbrt_x;
2050 else
03d37e4e 2051 result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
ca12eb68 2052 powi_x_ndiv3, powi_cbrt_x);
2053
2054 /* If n is negative, reciprocate the result. */
2055 if (n < 0)
03d37e4e 2056 result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
ca12eb68 2057 build_real (type, dconst1), result);
2058
2059 return result;
ae43b05e 2060 }
2061
ca12eb68 2062 /* No optimizations succeeded. */
e78306af 2063 return NULL_TREE;
2064}
2065
a5c384c1 2066/* ARG is the argument to a cabs builtin call in GSI with location info
2067 LOC. Create a sequence of statements prior to GSI that calculates
2068 sqrt(R*R + I*I), where R and I are the real and imaginary components
2069 of ARG, respectively. Return an expression holding the result. */
2070
2071static tree
2072gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2073{
03d37e4e 2074 tree real_part, imag_part, addend1, addend2, sum, result;
a5c384c1 2075 tree type = TREE_TYPE (TREE_TYPE (arg));
2076 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
3754d046 2077 machine_mode mode = TYPE_MODE (type);
a5c384c1 2078
2079 if (!flag_unsafe_math_optimizations
2080 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2081 || !sqrtfn
2082 || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2083 return NULL_TREE;
2084
03d37e4e 2085 real_part = build_and_insert_ref (gsi, loc, type, "cabs",
a5c384c1 2086 REALPART_EXPR, arg);
03d37e4e 2087 addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
a5c384c1 2088 real_part, real_part);
03d37e4e 2089 imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
a5c384c1 2090 IMAGPART_EXPR, arg);
03d37e4e 2091 addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
a5c384c1 2092 imag_part, imag_part);
03d37e4e 2093 sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2094 result = build_and_insert_call (gsi, loc, sqrtfn, sum);
a5c384c1 2095
2096 return result;
2097}
2098
a0315874 2099/* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
e9a6c4bc 2100 on the SSA_NAME argument of each of them. Also expand powi(x,n) into
2101 an optimal number of multiplies, when n is a constant. */
a0315874 2102
65b0537f 2103namespace {
2104
2105const pass_data pass_data_cse_sincos =
2106{
2107 GIMPLE_PASS, /* type */
2108 "sincos", /* name */
2109 OPTGROUP_NONE, /* optinfo_flags */
8ed378fe 2110 TV_TREE_SINCOS, /* tv_id */
65b0537f 2111 PROP_ssa, /* properties_required */
a153e7b3 2112 PROP_gimple_opt_math, /* properties_provided */
65b0537f 2113 0, /* properties_destroyed */
2114 0, /* todo_flags_start */
8b88439e 2115 TODO_update_ssa, /* todo_flags_finish */
65b0537f 2116};
2117
2118class pass_cse_sincos : public gimple_opt_pass
2119{
2120public:
2121 pass_cse_sincos (gcc::context *ctxt)
2122 : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2123 {}
2124
2125 /* opt_pass methods: */
2126 virtual bool gate (function *)
2127 {
2128 /* We no longer require either sincos or cexp, since powi expansion
2129 piggybacks on this pass. */
2130 return optimize;
2131 }
2132
2133 virtual unsigned int execute (function *);
2134
2135}; // class pass_cse_sincos
2136
2137unsigned int
2138pass_cse_sincos::execute (function *fun)
a0315874 2139{
2140 basic_block bb;
4c80086d 2141 bool cfg_changed = false;
a0315874 2142
2143 calculate_dominance_info (CDI_DOMINATORS);
30c4e60d 2144 memset (&sincos_stats, 0, sizeof (sincos_stats));
a0315874 2145
65b0537f 2146 FOR_EACH_BB_FN (bb, fun)
a0315874 2147 {
75a70cf9 2148 gimple_stmt_iterator gsi;
2a155cf0 2149 bool cleanup_eh = false;
a0315874 2150
75a70cf9 2151 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
a0315874 2152 {
42acab1c 2153 gimple *stmt = gsi_stmt (gsi);
a0315874 2154
2a155cf0 2155 /* Only the last stmt in a bb could throw, no need to call
2156 gimple_purge_dead_eh_edges if we change something in the middle
2157 of a basic block. */
2158 cleanup_eh = false;
2159
fa0793ad 2160 if (is_gimple_call (stmt)
5e8b972c 2161 && gimple_call_lhs (stmt))
a0315874 2162 {
e9a6c4bc 2163 tree arg, arg0, arg1, result;
2164 HOST_WIDE_INT n;
2165 location_t loc;
a0315874 2166
fa0793ad 2167 switch (gimple_call_combined_fn (stmt))
a0315874 2168 {
fa0793ad 2169 CASE_CFN_COS:
2170 CASE_CFN_SIN:
2171 CASE_CFN_CEXPI:
d312d7df 2172 /* Make sure we have either sincos or cexp. */
30f690e0 2173 if (!targetm.libc_has_function (function_c99_math_complex)
2174 && !targetm.libc_has_function (function_sincos))
d312d7df 2175 break;
2176
75a70cf9 2177 arg = gimple_call_arg (stmt, 0);
a0315874 2178 if (TREE_CODE (arg) == SSA_NAME)
4c80086d 2179 cfg_changed |= execute_cse_sincos_1 (arg);
a0315874 2180 break;
2181
fa0793ad 2182 CASE_CFN_POW:
e78306af 2183 arg0 = gimple_call_arg (stmt, 0);
2184 arg1 = gimple_call_arg (stmt, 1);
2185
2186 loc = gimple_location (stmt);
2187 result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2188
2189 if (result)
2190 {
2191 tree lhs = gimple_get_lhs (stmt);
1a91d914 2192 gassign *new_stmt = gimple_build_assign (lhs, result);
e78306af 2193 gimple_set_location (new_stmt, loc);
2194 unlink_stmt_vdef (stmt);
2195 gsi_replace (&gsi, new_stmt, true);
2a155cf0 2196 cleanup_eh = true;
bc8a8451 2197 if (gimple_vdef (stmt))
2198 release_ssa_name (gimple_vdef (stmt));
e78306af 2199 }
2200 break;
2201
fa0793ad 2202 CASE_CFN_POWI:
e9a6c4bc 2203 arg0 = gimple_call_arg (stmt, 0);
2204 arg1 = gimple_call_arg (stmt, 1);
e9a6c4bc 2205 loc = gimple_location (stmt);
377db285 2206
6dfe7d53 2207 if (real_minus_onep (arg0))
377db285 2208 {
2209 tree t0, t1, cond, one, minus_one;
1a91d914 2210 gassign *stmt;
377db285 2211
2212 t0 = TREE_TYPE (arg0);
2213 t1 = TREE_TYPE (arg1);
2214 one = build_real (t0, dconst1);
2215 minus_one = build_real (t0, dconstm1);
2216
2217 cond = make_temp_ssa_name (t1, NULL, "powi_cond");
e9cf809e 2218 stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2219 arg1, build_int_cst (t1, 1));
377db285 2220 gimple_set_location (stmt, loc);
2221 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2222
2223 result = make_temp_ssa_name (t0, NULL, "powi");
e9cf809e 2224 stmt = gimple_build_assign (result, COND_EXPR, cond,
2225 minus_one, one);
377db285 2226 gimple_set_location (stmt, loc);
2227 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2228 }
2229 else
2230 {
e913b5cd 2231 if (!tree_fits_shwi_p (arg1))
d48be958 2232 break;
2233
e913b5cd 2234 n = tree_to_shwi (arg1);
377db285 2235 result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2236 }
e9a6c4bc 2237
2238 if (result)
2239 {
2240 tree lhs = gimple_get_lhs (stmt);
1a91d914 2241 gassign *new_stmt = gimple_build_assign (lhs, result);
e9a6c4bc 2242 gimple_set_location (new_stmt, loc);
a5c384c1 2243 unlink_stmt_vdef (stmt);
2244 gsi_replace (&gsi, new_stmt, true);
2a155cf0 2245 cleanup_eh = true;
bc8a8451 2246 if (gimple_vdef (stmt))
2247 release_ssa_name (gimple_vdef (stmt));
a5c384c1 2248 }
2249 break;
2250
fa0793ad 2251 CASE_CFN_CABS:
a5c384c1 2252 arg0 = gimple_call_arg (stmt, 0);
2253 loc = gimple_location (stmt);
2254 result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2255
2256 if (result)
2257 {
2258 tree lhs = gimple_get_lhs (stmt);
1a91d914 2259 gassign *new_stmt = gimple_build_assign (lhs, result);
a5c384c1 2260 gimple_set_location (new_stmt, loc);
e9a6c4bc 2261 unlink_stmt_vdef (stmt);
2262 gsi_replace (&gsi, new_stmt, true);
2a155cf0 2263 cleanup_eh = true;
bc8a8451 2264 if (gimple_vdef (stmt))
2265 release_ssa_name (gimple_vdef (stmt));
e9a6c4bc 2266 }
2267 break;
2268
a0315874 2269 default:;
2270 }
2271 }
2272 }
2a155cf0 2273 if (cleanup_eh)
2274 cfg_changed |= gimple_purge_dead_eh_edges (bb);
a0315874 2275 }
2276
65b0537f 2277 statistics_counter_event (fun, "sincos statements inserted",
30c4e60d 2278 sincos_stats.inserted);
2279
4c80086d 2280 return cfg_changed ? TODO_cleanup_cfg : 0;
a0315874 2281}
2282
cbe8bda8 2283} // anon namespace
2284
2285gimple_opt_pass *
2286make_pass_cse_sincos (gcc::context *ctxt)
2287{
2288 return new pass_cse_sincos (ctxt);
2289}
2290
71dbd910 2291/* Return true if stmt is a type conversion operation that can be stripped
2292 when used in a widening multiply operation. */
2293static bool
42acab1c 2294widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
71dbd910 2295{
2296 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2297
2298 if (TREE_CODE (result_type) == INTEGER_TYPE)
2299 {
2300 tree op_type;
2301 tree inner_op_type;
2302
2303 if (!CONVERT_EXPR_CODE_P (rhs_code))
2304 return false;
2305
2306 op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2307
2308 /* If the type of OP has the same precision as the result, then
2309 we can strip this conversion. The multiply operation will be
2310 selected to create the correct extension as a by-product. */
2311 if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2312 return true;
2313
2314 /* We can also strip a conversion if it preserves the signed-ness of
2315 the operation and doesn't narrow the range. */
2316 inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2317
8f9d1531 2318 /* If the inner-most type is unsigned, then we can strip any
2319 intermediate widening operation. If it's signed, then the
2320 intermediate widening operation must also be signed. */
2321 if ((TYPE_UNSIGNED (inner_op_type)
2322 || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
71dbd910 2323 && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2324 return true;
2325
2326 return false;
2327 }
2328
2329 return rhs_code == FIXED_CONVERT_EXPR;
2330}
2331
0989f516 2332/* Return true if RHS is a suitable operand for a widening multiplication,
2333 assuming a target type of TYPE.
7e4c867e 2334 There are two cases:
2335
aff5fb4d 2336 - RHS makes some value at least twice as wide. Store that value
2337 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
7e4c867e 2338
2339 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2340 but leave *TYPE_OUT untouched. */
00f4f705 2341
2342static bool
0989f516 2343is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2344 tree *new_rhs_out)
7e4c867e 2345{
42acab1c 2346 gimple *stmt;
0989f516 2347 tree type1, rhs1;
7e4c867e 2348
2349 if (TREE_CODE (rhs) == SSA_NAME)
2350 {
7e4c867e 2351 stmt = SSA_NAME_DEF_STMT (rhs);
0989f516 2352 if (is_gimple_assign (stmt))
2353 {
71dbd910 2354 if (! widening_mult_conversion_strippable_p (type, stmt))
0989f516 2355 rhs1 = rhs;
2356 else
ffebd9c5 2357 {
2358 rhs1 = gimple_assign_rhs1 (stmt);
2359
2360 if (TREE_CODE (rhs1) == INTEGER_CST)
2361 {
2362 *new_rhs_out = rhs1;
2363 *type_out = NULL;
2364 return true;
2365 }
2366 }
0989f516 2367 }
2368 else
2369 rhs1 = rhs;
7e4c867e 2370
7e4c867e 2371 type1 = TREE_TYPE (rhs1);
0989f516 2372
7e4c867e 2373 if (TREE_CODE (type1) != TREE_CODE (type)
aff5fb4d 2374 || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
7e4c867e 2375 return false;
2376
2377 *new_rhs_out = rhs1;
2378 *type_out = type1;
2379 return true;
2380 }
2381
2382 if (TREE_CODE (rhs) == INTEGER_CST)
2383 {
2384 *new_rhs_out = rhs;
2385 *type_out = NULL;
2386 return true;
2387 }
2388
2389 return false;
2390}
2391
0989f516 2392/* Return true if STMT performs a widening multiplication, assuming the
2393 output type is TYPE. If so, store the unwidened types of the operands
2394 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2395 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2396 and *TYPE2_OUT would give the operands of the multiplication. */
7e4c867e 2397
2398static bool
42acab1c 2399is_widening_mult_p (gimple *stmt,
7e4c867e 2400 tree *type1_out, tree *rhs1_out,
2401 tree *type2_out, tree *rhs2_out)
00f4f705 2402{
4333b41f 2403 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2404
f2ef7276 2405 if (TREE_CODE (type) == INTEGER_TYPE)
2406 {
2407 if (TYPE_OVERFLOW_TRAPS (type))
2408 return false;
2409 }
2410 else if (TREE_CODE (type) != FIXED_POINT_TYPE)
7e4c867e 2411 return false;
00f4f705 2412
0989f516 2413 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2414 rhs1_out))
00f4f705 2415 return false;
2416
0989f516 2417 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2418 rhs2_out))
7e4c867e 2419 return false;
00f4f705 2420
7e4c867e 2421 if (*type1_out == NULL)
00f4f705 2422 {
7e4c867e 2423 if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
00f4f705 2424 return false;
7e4c867e 2425 *type1_out = *type2_out;
00f4f705 2426 }
00f4f705 2427
7e4c867e 2428 if (*type2_out == NULL)
00f4f705 2429 {
7e4c867e 2430 if (!int_fits_type_p (*rhs2_out, *type1_out))
00f4f705 2431 return false;
7e4c867e 2432 *type2_out = *type1_out;
00f4f705 2433 }
00f4f705 2434
287c271c 2435 /* Ensure that the larger of the two operands comes first. */
2436 if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2437 {
dfcf26a5 2438 std::swap (*type1_out, *type2_out);
2439 std::swap (*rhs1_out, *rhs2_out);
287c271c 2440 }
aff5fb4d 2441
7e4c867e 2442 return true;
2443}
00f4f705 2444
eb728046 2445/* Check to see if the CALL statement is an invocation of copysign
2446 with 1. being the first argument. */
2447static bool
2448is_copysign_call_with_1 (gimple *call)
2449{
2450 gcall *c = dyn_cast <gcall *> (call);
2451 if (! c)
2452 return false;
2453
2454 enum combined_fn code = gimple_call_combined_fn (c);
2455
2456 if (code == CFN_LAST)
2457 return false;
2458
2459 if (builtin_fn_p (code))
2460 {
2461 switch (as_builtin_fn (code))
2462 {
2463 CASE_FLT_FN (BUILT_IN_COPYSIGN):
2464 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2465 return real_onep (gimple_call_arg (c, 0));
2466 default:
2467 return false;
2468 }
2469 }
2470
2471 if (internal_fn_p (code))
2472 {
2473 switch (as_internal_fn (code))
2474 {
2475 case IFN_COPYSIGN:
2476 return real_onep (gimple_call_arg (c, 0));
2477 default:
2478 return false;
2479 }
2480 }
2481
2482 return false;
2483}
2484
2485/* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2486 This only happens when the the xorsign optab is defined, if the
2487 pattern is not a xorsign pattern or if expansion fails FALSE is
2488 returned, otherwise TRUE is returned. */
2489static bool
2490convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2491{
2492 tree treeop0, treeop1, lhs, type;
2493 location_t loc = gimple_location (stmt);
2494 lhs = gimple_assign_lhs (stmt);
2495 treeop0 = gimple_assign_rhs1 (stmt);
2496 treeop1 = gimple_assign_rhs2 (stmt);
2497 type = TREE_TYPE (lhs);
2498 machine_mode mode = TYPE_MODE (type);
2499
3aa2a10c 2500 if (HONOR_SNANS (type))
eb728046 2501 return false;
2502
2503 if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2504 {
2505 gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
3aa2a10c 2506 if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
eb728046 2507 {
2508 call0 = SSA_NAME_DEF_STMT (treeop1);
3aa2a10c 2509 if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
eb728046 2510 return false;
2511
2512 treeop1 = treeop0;
2513 }
eb728046 2514 if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2515 return false;
2516
2517 gcall *c = as_a<gcall*> (call0);
2518 treeop0 = gimple_call_arg (c, 1);
2519
2520 gcall *call_stmt
2521 = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2522 gimple_set_lhs (call_stmt, lhs);
2523 gimple_set_location (call_stmt, loc);
2524 gsi_replace (gsi, call_stmt, true);
2525 return true;
2526 }
2527
2528 return false;
2529}
2530
7e4c867e 2531/* Process a single gimple statement STMT, which has a MULT_EXPR as
2532 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2533 value is true iff we converted the statement. */
2534
2535static bool
42acab1c 2536convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
7e4c867e 2537{
03d37e4e 2538 tree lhs, rhs1, rhs2, type, type1, type2;
7e4c867e 2539 enum insn_code handler;
d2a1b453 2540 scalar_int_mode to_mode, from_mode, actual_mode;
5a574e8b 2541 optab op;
aff5fb4d 2542 int actual_precision;
2543 location_t loc = gimple_location (stmt);
3f2ab719 2544 bool from_unsigned1, from_unsigned2;
7e4c867e 2545
2546 lhs = gimple_assign_lhs (stmt);
2547 type = TREE_TYPE (lhs);
2548 if (TREE_CODE (type) != INTEGER_TYPE)
00f4f705 2549 return false;
2550
4333b41f 2551 if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
00f4f705 2552 return false;
2553
03b7a719 2554 to_mode = SCALAR_INT_TYPE_MODE (type);
2555 from_mode = SCALAR_INT_TYPE_MODE (type1);
f90f6ff1 2556 if (to_mode == from_mode)
2557 return false;
2558
3f2ab719 2559 from_unsigned1 = TYPE_UNSIGNED (type1);
2560 from_unsigned2 = TYPE_UNSIGNED (type2);
5a574e8b 2561
3f2ab719 2562 if (from_unsigned1 && from_unsigned2)
5a574e8b 2563 op = umul_widen_optab;
3f2ab719 2564 else if (!from_unsigned1 && !from_unsigned2)
5a574e8b 2565 op = smul_widen_optab;
00f4f705 2566 else
5a574e8b 2567 op = usmul_widen_optab;
2568
aff5fb4d 2569 handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
d2a1b453 2570 &actual_mode);
7e4c867e 2571
2572 if (handler == CODE_FOR_nothing)
3f2ab719 2573 {
2574 if (op != smul_widen_optab)
2575 {
22ffd684 2576 /* We can use a signed multiply with unsigned types as long as
2577 there is a wider mode to use, or it is the smaller of the two
2578 types that is unsigned. Note that type1 >= type2, always. */
2579 if ((TYPE_UNSIGNED (type1)
2580 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2581 || (TYPE_UNSIGNED (type2)
2582 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2583 {
28ebc73c 2584 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2585 || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
22ffd684 2586 return false;
2587 }
3f2ab719 2588
2589 op = smul_widen_optab;
2590 handler = find_widening_optab_handler_and_mode (op, to_mode,
d2a1b453 2591 from_mode,
3f2ab719 2592 &actual_mode);
2593
2594 if (handler == CODE_FOR_nothing)
2595 return false;
2596
2597 from_unsigned1 = from_unsigned2 = false;
2598 }
2599 else
2600 return false;
2601 }
7e4c867e 2602
aff5fb4d 2603 /* Ensure that the inputs to the handler are in the correct precison
2604 for the opcode. This will be the full mode size. */
2605 actual_precision = GET_MODE_PRECISION (actual_mode);
b36be69d 2606 if (2 * actual_precision > TYPE_PRECISION (type))
2607 return false;
3f2ab719 2608 if (actual_precision != TYPE_PRECISION (type1)
2609 || from_unsigned1 != TYPE_UNSIGNED (type1))
03d37e4e 2610 rhs1 = build_and_insert_cast (gsi, loc,
2611 build_nonstandard_integer_type
2612 (actual_precision, from_unsigned1), rhs1);
3f2ab719 2613 if (actual_precision != TYPE_PRECISION (type2)
2614 || from_unsigned2 != TYPE_UNSIGNED (type2))
03d37e4e 2615 rhs2 = build_and_insert_cast (gsi, loc,
2616 build_nonstandard_integer_type
2617 (actual_precision, from_unsigned2), rhs2);
aff5fb4d 2618
ffebd9c5 2619 /* Handle constants. */
2620 if (TREE_CODE (rhs1) == INTEGER_CST)
2621 rhs1 = fold_convert (type1, rhs1);
2622 if (TREE_CODE (rhs2) == INTEGER_CST)
2623 rhs2 = fold_convert (type2, rhs2);
2624
aff5fb4d 2625 gimple_assign_set_rhs1 (stmt, rhs1);
2626 gimple_assign_set_rhs2 (stmt, rhs2);
00f4f705 2627 gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2628 update_stmt (stmt);
30c4e60d 2629 widen_mul_stats.widen_mults_inserted++;
00f4f705 2630 return true;
2631}
2632
2633/* Process a single gimple statement STMT, which is found at the
2634 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2635 rhs (given by CODE), and try to convert it into a
2636 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2637 is true iff we converted the statement. */
2638
2639static bool
42acab1c 2640convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
00f4f705 2641 enum tree_code code)
2642{
42acab1c 2643 gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2644 gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
03d37e4e 2645 tree type, type1, type2, optype;
00f4f705 2646 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2647 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2648 optab this_optab;
2649 enum tree_code wmult_code;
aff5fb4d 2650 enum insn_code handler;
d2a1b453 2651 scalar_mode to_mode, from_mode, actual_mode;
aff5fb4d 2652 location_t loc = gimple_location (stmt);
2653 int actual_precision;
3f2ab719 2654 bool from_unsigned1, from_unsigned2;
00f4f705 2655
2656 lhs = gimple_assign_lhs (stmt);
2657 type = TREE_TYPE (lhs);
7e4c867e 2658 if (TREE_CODE (type) != INTEGER_TYPE
2659 && TREE_CODE (type) != FIXED_POINT_TYPE)
00f4f705 2660 return false;
2661
2662 if (code == MINUS_EXPR)
2663 wmult_code = WIDEN_MULT_MINUS_EXPR;
2664 else
2665 wmult_code = WIDEN_MULT_PLUS_EXPR;
2666
00f4f705 2667 rhs1 = gimple_assign_rhs1 (stmt);
2668 rhs2 = gimple_assign_rhs2 (stmt);
2669
2670 if (TREE_CODE (rhs1) == SSA_NAME)
2671 {
2672 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2673 if (is_gimple_assign (rhs1_stmt))
2674 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2675 }
00f4f705 2676
2677 if (TREE_CODE (rhs2) == SSA_NAME)
2678 {
2679 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2680 if (is_gimple_assign (rhs2_stmt))
2681 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2682 }
00f4f705 2683
07ea3e5c 2684 /* Allow for one conversion statement between the multiply
2685 and addition/subtraction statement. If there are more than
2686 one conversions then we assume they would invalidate this
2687 transformation. If that's not the case then they should have
2688 been folded before now. */
2689 if (CONVERT_EXPR_CODE_P (rhs1_code))
2690 {
2691 conv1_stmt = rhs1_stmt;
2692 rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2693 if (TREE_CODE (rhs1) == SSA_NAME)
2694 {
2695 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2696 if (is_gimple_assign (rhs1_stmt))
2697 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2698 }
2699 else
2700 return false;
2701 }
2702 if (CONVERT_EXPR_CODE_P (rhs2_code))
2703 {
2704 conv2_stmt = rhs2_stmt;
2705 rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2706 if (TREE_CODE (rhs2) == SSA_NAME)
2707 {
2708 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2709 if (is_gimple_assign (rhs2_stmt))
2710 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2711 }
2712 else
2713 return false;
2714 }
2715
aff5fb4d 2716 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2717 is_widening_mult_p, but we still need the rhs returns.
2718
2719 It might also appear that it would be sufficient to use the existing
2720 operands of the widening multiply, but that would limit the choice of
e0df5be0 2721 multiply-and-accumulate instructions.
2722
2723 If the widened-multiplication result has more than one uses, it is
2724 probably wiser not to do the conversion. */
aff5fb4d 2725 if (code == PLUS_EXPR
2726 && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
00f4f705 2727 {
e0df5be0 2728 if (!has_single_use (rhs1)
2729 || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2730 &type2, &mult_rhs2))
00f4f705 2731 return false;
7e4c867e 2732 add_rhs = rhs2;
07ea3e5c 2733 conv_stmt = conv1_stmt;
00f4f705 2734 }
aff5fb4d 2735 else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
00f4f705 2736 {
e0df5be0 2737 if (!has_single_use (rhs2)
2738 || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2739 &type2, &mult_rhs2))
00f4f705 2740 return false;
7e4c867e 2741 add_rhs = rhs1;
07ea3e5c 2742 conv_stmt = conv2_stmt;
00f4f705 2743 }
00f4f705 2744 else
2745 return false;
2746
3d2b0034 2747 to_mode = SCALAR_TYPE_MODE (type);
2748 from_mode = SCALAR_TYPE_MODE (type1);
f90f6ff1 2749 if (to_mode == from_mode)
2750 return false;
2751
3f2ab719 2752 from_unsigned1 = TYPE_UNSIGNED (type1);
2753 from_unsigned2 = TYPE_UNSIGNED (type2);
4ccf368d 2754 optype = type1;
aff5fb4d 2755
3f2ab719 2756 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2757 if (from_unsigned1 != from_unsigned2)
2758 {
4ccf368d 2759 if (!INTEGRAL_TYPE_P (type))
2760 return false;
22ffd684 2761 /* We can use a signed multiply with unsigned types as long as
2762 there is a wider mode to use, or it is the smaller of the two
2763 types that is unsigned. Note that type1 >= type2, always. */
2764 if ((from_unsigned1
2765 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2766 || (from_unsigned2
2767 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3f2ab719 2768 {
28ebc73c 2769 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2770 || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
22ffd684 2771 return false;
3f2ab719 2772 }
22ffd684 2773
2774 from_unsigned1 = from_unsigned2 = false;
4ccf368d 2775 optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2776 false);
3f2ab719 2777 }
815a0224 2778
07ea3e5c 2779 /* If there was a conversion between the multiply and addition
2780 then we need to make sure it fits a multiply-and-accumulate.
2781 The should be a single mode change which does not change the
2782 value. */
2783 if (conv_stmt)
2784 {
3f2ab719 2785 /* We use the original, unmodified data types for this. */
07ea3e5c 2786 tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2787 tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2788 int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2789 bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2790
2791 if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2792 {
2793 /* Conversion is a truncate. */
2794 if (TYPE_PRECISION (to_type) < data_size)
2795 return false;
2796 }
2797 else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2798 {
2799 /* Conversion is an extend. Check it's the right sort. */
2800 if (TYPE_UNSIGNED (from_type) != is_unsigned
2801 && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2802 return false;
2803 }
2804 /* else convert is a no-op for our purposes. */
2805 }
2806
815a0224 2807 /* Verify that the machine can perform a widening multiply
2808 accumulate in this mode/signedness combination, otherwise
2809 this transformation is likely to pessimize code. */
3f2ab719 2810 this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
aff5fb4d 2811 handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
d2a1b453 2812 from_mode, &actual_mode);
aff5fb4d 2813
2814 if (handler == CODE_FOR_nothing)
815a0224 2815 return false;
2816
aff5fb4d 2817 /* Ensure that the inputs to the handler are in the correct precison
2818 for the opcode. This will be the full mode size. */
2819 actual_precision = GET_MODE_PRECISION (actual_mode);
3f2ab719 2820 if (actual_precision != TYPE_PRECISION (type1)
2821 || from_unsigned1 != TYPE_UNSIGNED (type1))
03d37e4e 2822 mult_rhs1 = build_and_insert_cast (gsi, loc,
2823 build_nonstandard_integer_type
2824 (actual_precision, from_unsigned1),
2825 mult_rhs1);
3f2ab719 2826 if (actual_precision != TYPE_PRECISION (type2)
2827 || from_unsigned2 != TYPE_UNSIGNED (type2))
03d37e4e 2828 mult_rhs2 = build_and_insert_cast (gsi, loc,
2829 build_nonstandard_integer_type
2830 (actual_precision, from_unsigned2),
2831 mult_rhs2);
00f4f705 2832
12421545 2833 if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
03d37e4e 2834 add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
12421545 2835
ffebd9c5 2836 /* Handle constants. */
2837 if (TREE_CODE (mult_rhs1) == INTEGER_CST)
d5a3bb10 2838 mult_rhs1 = fold_convert (type1, mult_rhs1);
ffebd9c5 2839 if (TREE_CODE (mult_rhs2) == INTEGER_CST)
d5a3bb10 2840 mult_rhs2 = fold_convert (type2, mult_rhs2);
ffebd9c5 2841
806413d2 2842 gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2843 add_rhs);
00f4f705 2844 update_stmt (gsi_stmt (*gsi));
30c4e60d 2845 widen_mul_stats.maccs_inserted++;
00f4f705 2846 return true;
2847}
2848
ed306e55 2849/* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
2850 OP2 and which we know is used in statements that can be, together with the
2851 multiplication, converted to FMAs, perform the transformation. */
2852
2853static void
2854convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
2855{
2856 tree type = TREE_TYPE (mul_result);
2857 gimple *use_stmt;
2858 imm_use_iterator imm_iter;
143c3c9a 2859 gcall *fma_stmt;
ed306e55 2860
2861 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
2862 {
2863 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
ed306e55 2864 tree addop, mulop1 = op1, result = mul_result;
2865 bool negate_p = false;
143c3c9a 2866 gimple_seq seq = NULL;
ed306e55 2867
2868 if (is_gimple_debug (use_stmt))
2869 continue;
2870
e3798ed9 2871 if (is_gimple_assign (use_stmt)
2872 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
ed306e55 2873 {
2874 result = gimple_assign_lhs (use_stmt);
2875 use_operand_p use_p;
2876 gimple *neguse_stmt;
2877 single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
2878 gsi_remove (&gsi, true);
2879 release_defs (use_stmt);
2880
2881 use_stmt = neguse_stmt;
2882 gsi = gsi_for_stmt (use_stmt);
ed306e55 2883 negate_p = true;
2884 }
2885
e3798ed9 2886 tree cond, else_value, ops[3];
2887 tree_code code;
2888 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
2889 ops, &else_value))
2890 gcc_unreachable ();
2891 addop = ops[0] == result ? ops[1] : ops[0];
2892
2893 if (code == MINUS_EXPR)
ed306e55 2894 {
e3798ed9 2895 if (ops[0] == result)
2896 /* a * b - c -> a * b + (-c) */
143c3c9a 2897 addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
e3798ed9 2898 else
2899 /* a - b * c -> (-b) * c + a */
ed306e55 2900 negate_p = !negate_p;
2901 }
2902
2903 if (negate_p)
143c3c9a 2904 mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
ed306e55 2905
143c3c9a 2906 if (seq)
2907 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
e3798ed9 2908
2909 if (cond)
2910 fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
2911 op2, addop, else_value);
2912 else
2913 fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
2914 gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
aac19106 2915 gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
2916 use_stmt));
143c3c9a 2917 gsi_replace (&gsi, fma_stmt, true);
2918 /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
2919 regardless of where the negation occurs. */
43a607ba 2920 gimple *orig_stmt = gsi_stmt (gsi);
143c3c9a 2921 if (fold_stmt (&gsi, follow_all_ssa_edges))
43a607ba 2922 {
2923 if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
2924 gcc_unreachable ();
2925 update_stmt (gsi_stmt (gsi));
2926 }
ed306e55 2927
2928 if (dump_file && (dump_flags & TDF_DETAILS))
2929 {
2930 fprintf (dump_file, "Generated FMA ");
54e7de93 2931 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
ed306e55 2932 fprintf (dump_file, "\n");
2933 }
2934
ed306e55 2935 widen_mul_stats.fmas_inserted++;
2936 }
2937}
2938
2939/* Data necessary to perform the actual transformation from a multiplication
2940 and an addition to an FMA after decision is taken it should be done and to
2941 then delete the multiplication statement from the function IL. */
2942
2943struct fma_transformation_info
2944{
2945 gimple *mul_stmt;
2946 tree mul_result;
2947 tree op1;
2948 tree op2;
2949};
2950
2951/* Structure containing the current state of FMA deferring, i.e. whether we are
2952 deferring, whether to continue deferring, and all data necessary to come
2953 back and perform all deferred transformations. */
2954
2955class fma_deferring_state
2956{
2957public:
2958 /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
2959 do any deferring. */
2960
2961 fma_deferring_state (bool perform_deferring)
2962 : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
2963 m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
2964
2965 /* List of FMA candidates for which we the transformation has been determined
2966 possible but we at this point in BB analysis we do not consider them
2967 beneficial. */
2968 auto_vec<fma_transformation_info, 8> m_candidates;
2969
2970 /* Set of results of multiplication that are part of an already deferred FMA
2971 candidates. */
2972 hash_set<tree> m_mul_result_set;
2973
2974 /* The PHI that supposedly feeds back result of a FMA to another over loop
2975 boundary. */
2976 gphi *m_initial_phi;
2977
2978 /* Result of the last produced FMA candidate or NULL if there has not been
2979 one. */
2980 tree m_last_result;
2981
2982 /* If true, deferring might still be profitable. If false, transform all
2983 candidates and no longer defer. */
2984 bool m_deferring_p;
2985};
2986
2987/* Transform all deferred FMA candidates and mark STATE as no longer
2988 deferring. */
2989
2990static void
2991cancel_fma_deferring (fma_deferring_state *state)
2992{
2993 if (!state->m_deferring_p)
2994 return;
2995
2996 for (unsigned i = 0; i < state->m_candidates.length (); i++)
2997 {
2998 if (dump_file && (dump_flags & TDF_DETAILS))
2999 fprintf (dump_file, "Generating deferred FMA\n");
3000
3001 const fma_transformation_info &fti = state->m_candidates[i];
3002 convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3003
3004 gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3005 gsi_remove (&gsi, true);
3006 release_defs (fti.mul_stmt);
3007 }
3008 state->m_deferring_p = false;
3009}
3010
3011/* If OP is an SSA name defined by a PHI node, return the PHI statement.
3012 Otherwise return NULL. */
3013
3014static gphi *
3015result_of_phi (tree op)
3016{
3017 if (TREE_CODE (op) != SSA_NAME)
3018 return NULL;
3019
3020 return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3021}
3022
3023/* After processing statements of a BB and recording STATE, return true if the
3024 initial phi is fed by the last FMA candidate result ore one such result from
3025 previously processed BBs marked in LAST_RESULT_SET. */
3026
3027static bool
3028last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3029 hash_set<tree> *last_result_set)
3030{
3031 ssa_op_iter iter;
3032 use_operand_p use;
3033 FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3034 {
3035 tree t = USE_FROM_PTR (use);
3036 if (t == state->m_last_result
3037 || last_result_set->contains (t))
3038 return true;
3039 }
3040
3041 return false;
3042}
3043
15dbdc8f 3044/* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3045 with uses in additions and subtractions to form fused multiply-add
ed306e55 3046 operations. Returns true if successful and MUL_STMT should be removed.
3047
3048 If STATE indicates that we are deferring FMA transformation, that means
3049 that we do not produce FMAs for basic blocks which look like:
3050
3051 <bb 6>
3052 # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3053 _65 = _14 * _16;
3054 accumulator_66 = _65 + accumulator_111;
3055
3056 or its unrolled version, i.e. with several FMA candidates that feed result
3057 of one into the addend of another. Instead, we add them to a list in STATE
3058 and if we later discover an FMA candidate that is not part of such a chain,
3059 we go back and perform all deferred past candidates. */
b9be572e 3060
3061static bool
ed306e55 3062convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3063 fma_deferring_state *state)
b9be572e 3064{
15dbdc8f 3065 tree mul_result = gimple_get_lhs (mul_stmt);
b9be572e 3066 tree type = TREE_TYPE (mul_result);
42acab1c 3067 gimple *use_stmt, *neguse_stmt;
b9be572e 3068 use_operand_p use_p;
3069 imm_use_iterator imm_iter;
3070
3071 if (FLOAT_TYPE_P (type)
3072 && flag_fp_contract_mode == FP_CONTRACT_OFF)
3073 return false;
3074
3075 /* We don't want to do bitfield reduction ops. */
3076 if (INTEGRAL_TYPE_P (type)
f2ef7276 3077 && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
b9be572e 3078 return false;
3079
3080 /* If the target doesn't support it, don't generate it. We assume that
3081 if fma isn't available then fms, fnma or fnms are not either. */
143c3c9a 3082 optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3083 if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
b9be572e 3084 return false;
3085
5ed3d3b8 3086 /* If the multiplication has zero uses, it is kept around probably because
3087 of -fnon-call-exceptions. Don't optimize it away in that case,
3088 it is DCE job. */
3089 if (has_zero_uses (mul_result))
3090 return false;
3091
ed306e55 3092 bool check_defer
3093 = (state->m_deferring_p
3094 && (tree_to_shwi (TYPE_SIZE (type))
3095 <= PARAM_VALUE (PARAM_AVOID_FMA_MAX_BITS)));
3096 bool defer = check_defer;
9cbde7ad 3097 bool seen_negate_p = false;
b9be572e 3098 /* Make sure that the multiplication statement becomes dead after
3099 the transformation, thus that all uses are transformed to FMAs.
3100 This means we assume that an FMA operation has the same cost
3101 as an addition. */
3102 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3103 {
44579526 3104 tree result = mul_result;
3105 bool negate_p = false;
b9be572e 3106
3107 use_stmt = USE_STMT (use_p);
3108
17a2c727 3109 if (is_gimple_debug (use_stmt))
3110 continue;
3111
b9be572e 3112 /* For now restrict this operations to single basic blocks. In theory
3113 we would want to support sinking the multiplication in
3114 m = a*b;
3115 if ()
3116 ma = m + c;
3117 else
3118 d = m;
3119 to form a fma in the then block and sink the multiplication to the
3120 else block. */
3121 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3122 return false;
3123
44579526 3124 /* A negate on the multiplication leads to FNMA. */
e3798ed9 3125 if (is_gimple_assign (use_stmt)
3126 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
44579526 3127 {
805ad414 3128 ssa_op_iter iter;
5715c09b 3129 use_operand_p usep;
805ad414 3130
9cbde7ad 3131 /* If (due to earlier missed optimizations) we have two
3132 negates of the same value, treat them as equivalent
3133 to a single negate with multiple uses. */
3134 if (seen_negate_p)
3135 return false;
3136
44579526 3137 result = gimple_assign_lhs (use_stmt);
3138
3139 /* Make sure the negate statement becomes dead with this
3140 single transformation. */
3141 if (!single_imm_use (gimple_assign_lhs (use_stmt),
3142 &use_p, &neguse_stmt))
3143 return false;
3144
805ad414 3145 /* Make sure the multiplication isn't also used on that stmt. */
5715c09b 3146 FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3147 if (USE_FROM_PTR (usep) == mul_result)
805ad414 3148 return false;
3149
44579526 3150 /* Re-validate. */
3151 use_stmt = neguse_stmt;
3152 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3153 return false;
44579526 3154
9cbde7ad 3155 negate_p = seen_negate_p = true;
44579526 3156 }
b9be572e 3157
e3798ed9 3158 tree cond, else_value, ops[3];
3159 tree_code code;
3160 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3161 &else_value))
3162 return false;
3163
3164 switch (code)
44579526 3165 {
3166 case MINUS_EXPR:
e3798ed9 3167 if (ops[1] == result)
8a9d0572 3168 negate_p = !negate_p;
3169 break;
44579526 3170 case PLUS_EXPR:
44579526 3171 break;
44579526 3172 default:
3173 /* FMA can only be formed from PLUS and MINUS. */
3174 return false;
3175 }
b9be572e 3176
e3798ed9 3177 if (cond)
3178 {
3179 if (cond == result || else_value == result)
3180 return false;
3181 if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type))
3182 return false;
3183 }
3184
3185 /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3186 we'll visit later, we might be able to get a more profitable
3187 match with fnma.
b095bd6a 3188 OTOH, if we don't, a negate / fma pair has likely lower latency
3189 that a mult / subtract pair. */
e3798ed9 3190 if (code == MINUS_EXPR
3191 && !negate_p
3192 && ops[0] == result
143c3c9a 3193 && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
e3798ed9 3194 && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3195 && TREE_CODE (ops[1]) == SSA_NAME
3196 && has_single_use (ops[1]))
b095bd6a 3197 {
e3798ed9 3198 gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3199 if (is_gimple_assign (stmt2)
3200 && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3201 return false;
b095bd6a 3202 }
3203
44579526 3204 /* We can't handle a * b + a * b. */
e3798ed9 3205 if (ops[0] == ops[1])
ed306e55 3206 return false;
3207 /* If deferring, make sure we are not looking at an instruction that
3208 wouldn't have existed if we were not. */
3209 if (state->m_deferring_p
e3798ed9 3210 && (state->m_mul_result_set.contains (ops[0])
3211 || state->m_mul_result_set.contains (ops[1])))
44579526 3212 return false;
8a9d0572 3213
ed306e55 3214 if (check_defer)
44579526 3215 {
e3798ed9 3216 tree use_lhs = gimple_get_lhs (use_stmt);
ed306e55 3217 if (state->m_last_result)
3218 {
e3798ed9 3219 if (ops[1] == state->m_last_result
3220 || ops[0] == state->m_last_result)
ed306e55 3221 defer = true;
3222 else
3223 defer = false;
3224 }
3225 else
3226 {
3227 gcc_checking_assert (!state->m_initial_phi);
3228 gphi *phi;
e3798ed9 3229 if (ops[0] == result)
3230 phi = result_of_phi (ops[1]);
ed306e55 3231 else
3232 {
e3798ed9 3233 gcc_assert (ops[1] == result);
3234 phi = result_of_phi (ops[0]);
ed306e55 3235 }
44579526 3236
ed306e55 3237 if (phi)
3238 {
3239 state->m_initial_phi = phi;
3240 defer = true;
3241 }
3242 else
3243 defer = false;
3244 }
44579526 3245
ed306e55 3246 state->m_last_result = use_lhs;
3247 check_defer = false;
b9be572e 3248 }
3249 else
ed306e55 3250 defer = false;
3251
3252 /* While it is possible to validate whether or not the exact form that
3253 we've recognized is available in the backend, the assumption is that
3254 if the deferring logic above did not trigger, the transformation is
3255 never a loss. For instance, suppose the target only has the plain FMA
3256 pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3257 MUL+SUB for FMA+NEG, which is still two operations. Consider
3258 -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3259 form the two NEGs are independent and could be run in parallel. */
3260 }
3261
3262 if (defer)
3263 {
3264 fma_transformation_info fti;
3265 fti.mul_stmt = mul_stmt;
3266 fti.mul_result = mul_result;
3267 fti.op1 = op1;
3268 fti.op2 = op2;
3269 state->m_candidates.safe_push (fti);
3270 state->m_mul_result_set.add (mul_result);
3271
3272 if (dump_file && (dump_flags & TDF_DETAILS))
b9be572e 3273 {
ed306e55 3274 fprintf (dump_file, "Deferred generating FMA for multiplication ");
54e7de93 3275 print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
ed306e55 3276 fprintf (dump_file, "\n");
b9be572e 3277 }
3278
ed306e55 3279 return false;
3280 }
3281 else
3282 {
3283 if (state->m_deferring_p)
3284 cancel_fma_deferring (state);
3285 convert_mult_to_fma_1 (mul_result, op1, op2);
3286 return true;
b9be572e 3287 }
b9be572e 3288}
3289
e11a63e8 3290
3291/* Helper function of match_uaddsub_overflow. Return 1
3292 if USE_STMT is unsigned overflow check ovf != 0 for
3293 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3294 and 0 otherwise. */
3295
3296static int
3297uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt)
3298{
3299 enum tree_code ccode = ERROR_MARK;
3300 tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3301 if (gimple_code (use_stmt) == GIMPLE_COND)
3302 {
3303 ccode = gimple_cond_code (use_stmt);
3304 crhs1 = gimple_cond_lhs (use_stmt);
3305 crhs2 = gimple_cond_rhs (use_stmt);
3306 }
3307 else if (is_gimple_assign (use_stmt))
3308 {
3309 if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3310 {
3311 ccode = gimple_assign_rhs_code (use_stmt);
3312 crhs1 = gimple_assign_rhs1 (use_stmt);
3313 crhs2 = gimple_assign_rhs2 (use_stmt);
3314 }
3315 else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
3316 {
3317 tree cond = gimple_assign_rhs1 (use_stmt);
3318 if (COMPARISON_CLASS_P (cond))
3319 {
3320 ccode = TREE_CODE (cond);
3321 crhs1 = TREE_OPERAND (cond, 0);
3322 crhs2 = TREE_OPERAND (cond, 1);
3323 }
3324 else
3325 return 0;
3326 }
3327 else
3328 return 0;
3329 }
3330 else
3331 return 0;
3332
3333 if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3334 return 0;
3335
3336 enum tree_code code = gimple_assign_rhs_code (stmt);
3337 tree lhs = gimple_assign_lhs (stmt);
3338 tree rhs1 = gimple_assign_rhs1 (stmt);
3339 tree rhs2 = gimple_assign_rhs2 (stmt);
3340
3341 switch (ccode)
3342 {
3343 case GT_EXPR:
3344 case LE_EXPR:
3345 /* r = a - b; r > a or r <= a
3346 r = a + b; a > r or a <= r or b > r or b <= r. */
3347 if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3348 || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3349 && crhs2 == lhs))
3350 return ccode == GT_EXPR ? 1 : -1;
3351 break;
3352 case LT_EXPR:
3353 case GE_EXPR:
3354 /* r = a - b; a < r or a >= r
3355 r = a + b; r < a or r >= a or r < b or r >= b. */
3356 if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3357 || (code == PLUS_EXPR && crhs1 == lhs
3358 && (crhs2 == rhs1 || crhs2 == rhs2)))
3359 return ccode == LT_EXPR ? 1 : -1;
3360 break;
3361 default:
3362 break;
3363 }
3364 return 0;
3365}
3366
3367/* Recognize for unsigned x
3368 x = y - z;
3369 if (x > y)
3370 where there are other uses of x and replace it with
3371 _7 = SUB_OVERFLOW (y, z);
3372 x = REALPART_EXPR <_7>;
3373 _8 = IMAGPART_EXPR <_7>;
3374 if (_8)
3375 and similarly for addition. */
3376
3377static bool
3378match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3379 enum tree_code code)
3380{
3381 tree lhs = gimple_assign_lhs (stmt);
3382 tree type = TREE_TYPE (lhs);
3383 use_operand_p use_p;
3384 imm_use_iterator iter;
3385 bool use_seen = false;
3386 bool ovf_use_seen = false;
3387 gimple *use_stmt;
3388
3389 gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
3390 if (!INTEGRAL_TYPE_P (type)
3391 || !TYPE_UNSIGNED (type)
3392 || has_zero_uses (lhs)
3393 || has_single_use (lhs)
3394 || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
3395 TYPE_MODE (type)) == CODE_FOR_nothing)
3396 return false;
3397
3398 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3399 {
3400 use_stmt = USE_STMT (use_p);
3401 if (is_gimple_debug (use_stmt))
3402 continue;
3403
3404 if (uaddsub_overflow_check_p (stmt, use_stmt))
3405 ovf_use_seen = true;
3406 else
3407 use_seen = true;
3408 if (ovf_use_seen && use_seen)
3409 break;
3410 }
3411
3412 if (!ovf_use_seen || !use_seen)
3413 return false;
3414
3415 tree ctype = build_complex_type (type);
3416 tree rhs1 = gimple_assign_rhs1 (stmt);
3417 tree rhs2 = gimple_assign_rhs2 (stmt);
3418 gcall *g = gimple_build_call_internal (code == PLUS_EXPR
3419 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
3420 2, rhs1, rhs2);
3421 tree ctmp = make_ssa_name (ctype);
3422 gimple_call_set_lhs (g, ctmp);
3423 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3424 gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR,
3425 build1 (REALPART_EXPR, type, ctmp));
3426 gsi_replace (gsi, g2, true);
3427 tree ovf = make_ssa_name (type);
3428 g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
3429 build1 (IMAGPART_EXPR, type, ctmp));
3430 gsi_insert_after (gsi, g2, GSI_NEW_STMT);
3431
3432 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3433 {
3434 if (is_gimple_debug (use_stmt))
3435 continue;
3436
3437 int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt);
3438 if (ovf_use == 0)
3439 continue;
3440 if (gimple_code (use_stmt) == GIMPLE_COND)
3441 {
3442 gcond *cond_stmt = as_a <gcond *> (use_stmt);
3443 gimple_cond_set_lhs (cond_stmt, ovf);
3444 gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
3445 gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3446 }
3447 else
3448 {
3449 gcc_checking_assert (is_gimple_assign (use_stmt));
3450 if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3451 {
3452 gimple_assign_set_rhs1 (use_stmt, ovf);
3453 gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
3454 gimple_assign_set_rhs_code (use_stmt,
3455 ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3456 }
3457 else
3458 {
3459 gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
3460 == COND_EXPR);
3461 tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
3462 boolean_type_node, ovf,
3463 build_int_cst (type, 0));
3464 gimple_assign_set_rhs1 (use_stmt, cond);
3465 }
3466 }
3467 update_stmt (use_stmt);
3468 }
3469 return true;
3470}
3471
67f7b566 3472/* Return true if target has support for divmod. */
3473
3474static bool
3475target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
3476{
3477 /* If target supports hardware divmod insn, use it for divmod. */
3478 if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
3479 return true;
3480
3481 /* Check if libfunc for divmod is available. */
3482 rtx libfunc = optab_libfunc (divmod_optab, mode);
3483 if (libfunc != NULL_RTX)
3484 {
3485 /* If optab_handler exists for div_optab, perhaps in a wider mode,
3486 we don't want to use the libfunc even if it exists for given mode. */
19a4dce4 3487 machine_mode div_mode;
3488 FOR_EACH_MODE_FROM (div_mode, mode)
67f7b566 3489 if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
3490 return false;
3491
3492 return targetm.expand_divmod_libfunc != NULL;
3493 }
3494
3495 return false;
3496}
3497
3498/* Check if stmt is candidate for divmod transform. */
3499
3500static bool
3501divmod_candidate_p (gassign *stmt)
3502{
3503 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
582adad1 3504 machine_mode mode = TYPE_MODE (type);
67f7b566 3505 optab divmod_optab, div_optab;
3506
3507 if (TYPE_UNSIGNED (type))
3508 {
3509 divmod_optab = udivmod_optab;
3510 div_optab = udiv_optab;
3511 }
3512 else
3513 {
3514 divmod_optab = sdivmod_optab;
3515 div_optab = sdiv_optab;
3516 }
3517
3518 tree op1 = gimple_assign_rhs1 (stmt);
3519 tree op2 = gimple_assign_rhs2 (stmt);
3520
3521 /* Disable the transform if either is a constant, since division-by-constant
3522 may have specialized expansion. */
3523 if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
3524 return false;
3525
3526 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
3527 expand using the [su]divv optabs. */
3528 if (TYPE_OVERFLOW_TRAPS (type))
3529 return false;
3530
3531 if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
3532 return false;
3533
3534 return true;
3535}
3536
3537/* This function looks for:
3538 t1 = a TRUNC_DIV_EXPR b;
3539 t2 = a TRUNC_MOD_EXPR b;
3540 and transforms it to the following sequence:
3541 complex_tmp = DIVMOD (a, b);
3542 t1 = REALPART_EXPR(a);
3543 t2 = IMAGPART_EXPR(b);
3544 For conditions enabling the transform see divmod_candidate_p().
3545
3546 The pass has three parts:
3547 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
3548 other trunc_div_expr and trunc_mod_expr stmts.
3549 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
3550 to stmts vector.
3551 3) Insert DIVMOD call just before top_stmt and update entries in
3552 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
3553 IMAGPART_EXPR for mod). */
3554
3555static bool
3556convert_to_divmod (gassign *stmt)
3557{
aac19106 3558 if (stmt_can_throw_internal (cfun, stmt)
67f7b566 3559 || !divmod_candidate_p (stmt))
3560 return false;
3561
3562 tree op1 = gimple_assign_rhs1 (stmt);
3563 tree op2 = gimple_assign_rhs2 (stmt);
3564
3565 imm_use_iterator use_iter;
3566 gimple *use_stmt;
3567 auto_vec<gimple *> stmts;
3568
3569 gimple *top_stmt = stmt;
3570 basic_block top_bb = gimple_bb (stmt);
3571
3572 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
3573 at-least stmt and possibly other trunc_div/trunc_mod stmts
3574 having same operands as stmt. */
3575
3576 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
3577 {
3578 if (is_gimple_assign (use_stmt)
3579 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
3580 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
3581 && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
3582 && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
3583 {
aac19106 3584 if (stmt_can_throw_internal (cfun, use_stmt))
67f7b566 3585 continue;
3586
3587 basic_block bb = gimple_bb (use_stmt);
3588
3589 if (bb == top_bb)
3590 {
3591 if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
3592 top_stmt = use_stmt;
3593 }
3594 else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
3595 {
3596 top_bb = bb;
3597 top_stmt = use_stmt;
3598 }
3599 }
3600 }
3601
3602 tree top_op1 = gimple_assign_rhs1 (top_stmt);
3603 tree top_op2 = gimple_assign_rhs2 (top_stmt);
3604
3605 stmts.safe_push (top_stmt);
3606 bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
3607
3608 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
3609 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
3610 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
3611 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
3612
3613 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
3614 {
3615 if (is_gimple_assign (use_stmt)
3616 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
3617 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
3618 && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
3619 && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
3620 {
3621 if (use_stmt == top_stmt
aac19106 3622 || stmt_can_throw_internal (cfun, use_stmt)
67f7b566 3623 || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
3624 continue;
3625
3626 stmts.safe_push (use_stmt);
3627 if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
3628 div_seen = true;
3629 }
3630 }
3631
3632 if (!div_seen)
3633 return false;
3634
3635 /* Part 3: Create libcall to internal fn DIVMOD:
3636 divmod_tmp = DIVMOD (op1, op2). */
3637
3638 gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
3639 tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
3640 call_stmt, "divmod_tmp");
3641 gimple_call_set_lhs (call_stmt, res);
989f02dc 3642 /* We rejected throwing statements above. */
3643 gimple_call_set_nothrow (call_stmt, true);
67f7b566 3644
3645 /* Insert the call before top_stmt. */
3646 gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
3647 gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
3648
3649 widen_mul_stats.divmod_calls_inserted++;
3650
3651 /* Update all statements in stmts vector:
3652 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
3653 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
3654
3655 for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
3656 {
3657 tree new_rhs;
3658
3659 switch (gimple_assign_rhs_code (use_stmt))
3660 {
3661 case TRUNC_DIV_EXPR:
3662 new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
3663 break;
3664
3665 case TRUNC_MOD_EXPR:
3666 new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
3667 break;
3668
3669 default:
3670 gcc_unreachable ();
3671 }
3672
3673 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3674 gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
3675 update_stmt (use_stmt);
3676 }
3677
3678 return true;
3679}
e11a63e8 3680
62be004c 3681/* Find integer multiplications where the operands are extended from
3682 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
3683 where appropriate. */
3684
65b0537f 3685namespace {
3686
3687const pass_data pass_data_optimize_widening_mul =
3688{
3689 GIMPLE_PASS, /* type */
3690 "widening_mul", /* name */
3691 OPTGROUP_NONE, /* optinfo_flags */
8ed378fe 3692 TV_TREE_WIDEN_MUL, /* tv_id */
65b0537f 3693 PROP_ssa, /* properties_required */
3694 0, /* properties_provided */
3695 0, /* properties_destroyed */
3696 0, /* todo_flags_start */
8b88439e 3697 TODO_update_ssa, /* todo_flags_finish */
65b0537f 3698};
3699
3700class pass_optimize_widening_mul : public gimple_opt_pass
3701{
3702public:
3703 pass_optimize_widening_mul (gcc::context *ctxt)
3704 : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
3705 {}
3706
3707 /* opt_pass methods: */
3708 virtual bool gate (function *)
3709 {
3710 return flag_expensive_optimizations && optimize;
3711 }
3712
3713 virtual unsigned int execute (function *);
3714
3715}; // class pass_optimize_widening_mul
3716
ed306e55 3717/* Walker class to perform the transformation in reverse dominance order. */
3718
3719class math_opts_dom_walker : public dom_walker
62be004c 3720{
ed306e55 3721public:
3722 /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
3723 if walking modidifes the CFG. */
62be004c 3724
ed306e55 3725 math_opts_dom_walker (bool *cfg_changed_p)
3726 : dom_walker (CDI_DOMINATORS), m_last_result_set (),
3727 m_cfg_changed_p (cfg_changed_p) {}
30c4e60d 3728
ed306e55 3729 /* The actual actions performed in the walk. */
3730
3731 virtual void after_dom_children (basic_block);
3732
3733 /* Set of results of chains of multiply and add statement combinations that
3734 were not transformed into FMAs because of active deferring. */
3735 hash_set<tree> m_last_result_set;
3736
3737 /* Pointer to a flag of the user that needs to be set if CFG has been
3738 modified. */
3739 bool *m_cfg_changed_p;
3740};
3741
3742void
3743math_opts_dom_walker::after_dom_children (basic_block bb)
3744{
3745 gimple_stmt_iterator gsi;
3746
3747 fma_deferring_state fma_state (PARAM_VALUE (PARAM_AVOID_FMA_MAX_BITS) > 0);
3748
3749 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
62be004c 3750 {
ed306e55 3751 gimple *stmt = gsi_stmt (gsi);
3752 enum tree_code code;
62be004c 3753
ed306e55 3754 if (is_gimple_assign (stmt))
3755 {
3756 code = gimple_assign_rhs_code (stmt);
3757 switch (code)
3758 {
3759 case MULT_EXPR:
3760 if (!convert_mult_to_widen (stmt, &gsi)
3761 && !convert_expand_mult_copysign (stmt, &gsi)
3762 && convert_mult_to_fma (stmt,
3763 gimple_assign_rhs1 (stmt),
3764 gimple_assign_rhs2 (stmt),
3765 &fma_state))
3766 {
3767 gsi_remove (&gsi, true);
3768 release_defs (stmt);
3769 continue;
3770 }
3771 break;
3772
3773 case PLUS_EXPR:
3774 case MINUS_EXPR:
3775 if (!convert_plusminus_to_widen (&gsi, stmt, code))
3776 match_uaddsub_overflow (&gsi, stmt, code);
3777 break;
62be004c 3778
ed306e55 3779 case TRUNC_MOD_EXPR:
3780 convert_to_divmod (as_a<gassign *> (stmt));
3781 break;
3782
3783 default:;
3784 }
3785 }
3786 else if (is_gimple_call (stmt))
3787 {
3788 tree fndecl = gimple_call_fndecl (stmt);
3789 if (fndecl && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
b9be572e 3790 {
ed306e55 3791 switch (DECL_FUNCTION_CODE (fndecl))
b9be572e 3792 {
ed306e55 3793 case BUILT_IN_POWF:
3794 case BUILT_IN_POW:
3795 case BUILT_IN_POWL:
3796 if (gimple_call_lhs (stmt)
3797 && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
3798 && real_equal
3799 (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
3800 &dconst2)
15dbdc8f 3801 && convert_mult_to_fma (stmt,
ed306e55 3802 gimple_call_arg (stmt, 0),
3803 gimple_call_arg (stmt, 0),
3804 &fma_state))
b9be572e 3805 {
ed306e55 3806 unlink_stmt_vdef (stmt);
3807 if (gsi_remove (&gsi, true)
3808 && gimple_purge_dead_eh_edges (bb))
3809 *m_cfg_changed_p = true;
b9be572e 3810 release_defs (stmt);
3811 continue;
3812 }
3813 break;
3814
b9be572e 3815 default:;
3816 }
3817 }
ed306e55 3818 else
3819 cancel_fma_deferring (&fma_state);
62be004c 3820 }
ed306e55 3821 gsi_next (&gsi);
62be004c 3822 }
ed306e55 3823 if (fma_state.m_deferring_p
3824 && fma_state.m_initial_phi)
3825 {
3826 gcc_checking_assert (fma_state.m_last_result);
3827 if (!last_fma_candidate_feeds_initial_phi (&fma_state,
3828 &m_last_result_set))
3829 cancel_fma_deferring (&fma_state);
3830 else
3831 m_last_result_set.add (fma_state.m_last_result);
3832 }
3833}
3834
3835
3836unsigned int
3837pass_optimize_widening_mul::execute (function *fun)
3838{
3839 bool cfg_changed = false;
3840
3841 memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
3842 calculate_dominance_info (CDI_DOMINATORS);
3843 renumber_gimple_stmt_uids ();
3844
3845 math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
00f4f705 3846
65b0537f 3847 statistics_counter_event (fun, "widening multiplications inserted",
30c4e60d 3848 widen_mul_stats.widen_mults_inserted);
65b0537f 3849 statistics_counter_event (fun, "widening maccs inserted",
30c4e60d 3850 widen_mul_stats.maccs_inserted);
65b0537f 3851 statistics_counter_event (fun, "fused multiply-adds inserted",
30c4e60d 3852 widen_mul_stats.fmas_inserted);
67f7b566 3853 statistics_counter_event (fun, "divmod calls inserted",
3854 widen_mul_stats.divmod_calls_inserted);
30c4e60d 3855
15dbdc8f 3856 return cfg_changed ? TODO_cleanup_cfg : 0;
62be004c 3857}
3858
cbe8bda8 3859} // anon namespace
3860
3861gimple_opt_pass *
3862make_pass_optimize_widening_mul (gcc::context *ctxt)
3863{
3864 return new pass_optimize_widening_mul (ctxt);
3865}