]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-math-opts.c
take type from intrinsic in sincos pass
[thirdparty/gcc.git] / gcc / tree-ssa-math-opts.c
1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2020 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
22
23 modulus = sqrt(x*x + y*y + z*z);
24 x = x / modulus;
25 y = y / modulus;
26 z = z / modulus;
27
28 that can be optimized to
29
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
32 x = x * rmodulus;
33 y = y * rmodulus;
34 z = z * rmodulus;
35
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
38
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
42
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 by the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
49 this comment.
50
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
56
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
60
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
68
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
75
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
79
80 If we did this using domwalk.c, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
86
87 #include "config.h"
88 #include "system.h"
89 #include "coretypes.h"
90 #include "backend.h"
91 #include "target.h"
92 #include "rtl.h"
93 #include "tree.h"
94 #include "gimple.h"
95 #include "predict.h"
96 #include "alloc-pool.h"
97 #include "tree-pass.h"
98 #include "ssa.h"
99 #include "optabs-tree.h"
100 #include "gimple-pretty-print.h"
101 #include "alias.h"
102 #include "fold-const.h"
103 #include "gimple-fold.h"
104 #include "gimple-iterator.h"
105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
112 #include "internal-fn.h"
113 #include "case-cfn-macros.h"
114 #include "optabs-libfuncs.h"
115 #include "tree-eh.h"
116 #include "targhooks.h"
117 #include "domwalk.h"
118
119 /* This structure represents one basic block that either computes a
120 division, or is a common dominator for basic block that compute a
121 division. */
122 struct occurrence {
123 /* The basic block represented by this structure. */
124 basic_block bb = basic_block();
125
126 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
127 inserted in BB. */
128 tree recip_def = tree();
129
130 /* If non-NULL, the SSA_NAME holding the definition for a squared
131 reciprocal inserted in BB. */
132 tree square_recip_def = tree();
133
134 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
135 was inserted in BB. */
136 gimple *recip_def_stmt = nullptr;
137
138 /* Pointer to a list of "struct occurrence"s for blocks dominated
139 by BB. */
140 struct occurrence *children = nullptr;
141
142 /* Pointer to the next "struct occurrence"s in the list of blocks
143 sharing a common dominator. */
144 struct occurrence *next = nullptr;
145
146 /* The number of divisions that are in BB before compute_merit. The
147 number of divisions that are in BB or post-dominate it after
148 compute_merit. */
149 int num_divisions = 0;
150
151 /* True if the basic block has a division, false if it is a common
152 dominator for basic blocks that do. If it is false and trapping
153 math is active, BB is not a candidate for inserting a reciprocal. */
154 bool bb_has_division = false;
155
156 /* Construct a struct occurrence for basic block BB, and whose
157 children list is headed by CHILDREN. */
158 occurrence (basic_block bb, struct occurrence *children)
159 : bb (bb), children (children)
160 {
161 bb->aux = this;
162 }
163
164 /* Destroy a struct occurrence and remove it from its basic block. */
165 ~occurrence ()
166 {
167 bb->aux = nullptr;
168 }
169
170 /* Allocate memory for a struct occurrence from OCC_POOL. */
171 static void* operator new (size_t);
172
173 /* Return memory for a struct occurrence to OCC_POOL. */
174 static void operator delete (void*, size_t);
175 };
176
177 static struct
178 {
179 /* Number of 1.0/X ops inserted. */
180 int rdivs_inserted;
181
182 /* Number of 1.0/FUNC ops inserted. */
183 int rfuncs_inserted;
184 } reciprocal_stats;
185
186 static struct
187 {
188 /* Number of cexpi calls inserted. */
189 int inserted;
190 } sincos_stats;
191
192 static struct
193 {
194 /* Number of widening multiplication ops inserted. */
195 int widen_mults_inserted;
196
197 /* Number of integer multiply-and-accumulate ops inserted. */
198 int maccs_inserted;
199
200 /* Number of fp fused multiply-add ops inserted. */
201 int fmas_inserted;
202
203 /* Number of divmod calls inserted. */
204 int divmod_calls_inserted;
205 } widen_mul_stats;
206
207 /* The instance of "struct occurrence" representing the highest
208 interesting block in the dominator tree. */
209 static struct occurrence *occ_head;
210
211 /* Allocation pool for getting instances of "struct occurrence". */
212 static object_allocator<occurrence> *occ_pool;
213
214 void* occurrence::operator new (size_t n)
215 {
216 gcc_assert (n == sizeof(occurrence));
217 return occ_pool->allocate_raw ();
218 }
219
220 void occurrence::operator delete (void *occ, size_t n)
221 {
222 gcc_assert (n == sizeof(occurrence));
223 occ_pool->remove_raw (occ);
224 }
225
226 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
227 list of "struct occurrence"s, one per basic block, having IDOM as
228 their common dominator.
229
230 We try to insert NEW_OCC as deep as possible in the tree, and we also
231 insert any other block that is a common dominator for BB and one
232 block already in the tree. */
233
234 static void
235 insert_bb (struct occurrence *new_occ, basic_block idom,
236 struct occurrence **p_head)
237 {
238 struct occurrence *occ, **p_occ;
239
240 for (p_occ = p_head; (occ = *p_occ) != NULL; )
241 {
242 basic_block bb = new_occ->bb, occ_bb = occ->bb;
243 basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
244 if (dom == bb)
245 {
246 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
247 from its list. */
248 *p_occ = occ->next;
249 occ->next = new_occ->children;
250 new_occ->children = occ;
251
252 /* Try the next block (it may as well be dominated by BB). */
253 }
254
255 else if (dom == occ_bb)
256 {
257 /* OCC_BB dominates BB. Tail recurse to look deeper. */
258 insert_bb (new_occ, dom, &occ->children);
259 return;
260 }
261
262 else if (dom != idom)
263 {
264 gcc_assert (!dom->aux);
265
266 /* There is a dominator between IDOM and BB, add it and make
267 two children out of NEW_OCC and OCC. First, remove OCC from
268 its list. */
269 *p_occ = occ->next;
270 new_occ->next = occ;
271 occ->next = NULL;
272
273 /* None of the previous blocks has DOM as a dominator: if we tail
274 recursed, we would reexamine them uselessly. Just switch BB with
275 DOM, and go on looking for blocks dominated by DOM. */
276 new_occ = new occurrence (dom, new_occ);
277 }
278
279 else
280 {
281 /* Nothing special, go on with the next element. */
282 p_occ = &occ->next;
283 }
284 }
285
286 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
287 new_occ->next = *p_head;
288 *p_head = new_occ;
289 }
290
291 /* Register that we found a division in BB.
292 IMPORTANCE is a measure of how much weighting to give
293 that division. Use IMPORTANCE = 2 to register a single
294 division. If the division is going to be found multiple
295 times use 1 (as it is with squares). */
296
297 static inline void
298 register_division_in (basic_block bb, int importance)
299 {
300 struct occurrence *occ;
301
302 occ = (struct occurrence *) bb->aux;
303 if (!occ)
304 {
305 occ = new occurrence (bb, NULL);
306 insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
307 }
308
309 occ->bb_has_division = true;
310 occ->num_divisions += importance;
311 }
312
313
314 /* Compute the number of divisions that postdominate each block in OCC and
315 its children. */
316
317 static void
318 compute_merit (struct occurrence *occ)
319 {
320 struct occurrence *occ_child;
321 basic_block dom = occ->bb;
322
323 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
324 {
325 basic_block bb;
326 if (occ_child->children)
327 compute_merit (occ_child);
328
329 if (flag_exceptions)
330 bb = single_noncomplex_succ (dom);
331 else
332 bb = dom;
333
334 if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
335 occ->num_divisions += occ_child->num_divisions;
336 }
337 }
338
339
340 /* Return whether USE_STMT is a floating-point division by DEF. */
341 static inline bool
342 is_division_by (gimple *use_stmt, tree def)
343 {
344 return is_gimple_assign (use_stmt)
345 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
346 && gimple_assign_rhs2 (use_stmt) == def
347 /* Do not recognize x / x as valid division, as we are getting
348 confused later by replacing all immediate uses x in such
349 a stmt. */
350 && gimple_assign_rhs1 (use_stmt) != def
351 && !stmt_can_throw_internal (cfun, use_stmt);
352 }
353
354 /* Return TRUE if USE_STMT is a multiplication of DEF by A. */
355 static inline bool
356 is_mult_by (gimple *use_stmt, tree def, tree a)
357 {
358 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
359 && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
360 {
361 tree op0 = gimple_assign_rhs1 (use_stmt);
362 tree op1 = gimple_assign_rhs2 (use_stmt);
363
364 return (op0 == def && op1 == a)
365 || (op0 == a && op1 == def);
366 }
367 return 0;
368 }
369
370 /* Return whether USE_STMT is DEF * DEF. */
371 static inline bool
372 is_square_of (gimple *use_stmt, tree def)
373 {
374 return is_mult_by (use_stmt, def, def);
375 }
376
377 /* Return whether USE_STMT is a floating-point division by
378 DEF * DEF. */
379 static inline bool
380 is_division_by_square (gimple *use_stmt, tree def)
381 {
382 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
383 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
384 && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
385 && !stmt_can_throw_internal (cfun, use_stmt))
386 {
387 tree denominator = gimple_assign_rhs2 (use_stmt);
388 if (TREE_CODE (denominator) == SSA_NAME)
389 return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
390 }
391 return 0;
392 }
393
394 /* Walk the subset of the dominator tree rooted at OCC, setting the
395 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
396 the given basic block. The field may be left NULL, of course,
397 if it is not possible or profitable to do the optimization.
398
399 DEF_BSI is an iterator pointing at the statement defining DEF.
400 If RECIP_DEF is set, a dominator already has a computation that can
401 be used.
402
403 If should_insert_square_recip is set, then this also inserts
404 the square of the reciprocal immediately after the definition
405 of the reciprocal. */
406
407 static void
408 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
409 tree def, tree recip_def, tree square_recip_def,
410 int should_insert_square_recip, int threshold)
411 {
412 tree type;
413 gassign *new_stmt, *new_square_stmt;
414 gimple_stmt_iterator gsi;
415 struct occurrence *occ_child;
416
417 if (!recip_def
418 && (occ->bb_has_division || !flag_trapping_math)
419 /* Divide by two as all divisions are counted twice in
420 the costing loop. */
421 && occ->num_divisions / 2 >= threshold)
422 {
423 /* Make a variable with the replacement and substitute it. */
424 type = TREE_TYPE (def);
425 recip_def = create_tmp_reg (type, "reciptmp");
426 new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
427 build_one_cst (type), def);
428
429 if (should_insert_square_recip)
430 {
431 square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
432 new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
433 recip_def, recip_def);
434 }
435
436 if (occ->bb_has_division)
437 {
438 /* Case 1: insert before an existing division. */
439 gsi = gsi_after_labels (occ->bb);
440 while (!gsi_end_p (gsi)
441 && (!is_division_by (gsi_stmt (gsi), def))
442 && (!is_division_by_square (gsi_stmt (gsi), def)))
443 gsi_next (&gsi);
444
445 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
446 if (should_insert_square_recip)
447 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
448 }
449 else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
450 {
451 /* Case 2: insert right after the definition. Note that this will
452 never happen if the definition statement can throw, because in
453 that case the sole successor of the statement's basic block will
454 dominate all the uses as well. */
455 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
456 if (should_insert_square_recip)
457 gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
458 }
459 else
460 {
461 /* Case 3: insert in a basic block not containing defs/uses. */
462 gsi = gsi_after_labels (occ->bb);
463 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
464 if (should_insert_square_recip)
465 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
466 }
467
468 reciprocal_stats.rdivs_inserted++;
469
470 occ->recip_def_stmt = new_stmt;
471 }
472
473 occ->recip_def = recip_def;
474 occ->square_recip_def = square_recip_def;
475 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
476 insert_reciprocals (def_gsi, occ_child, def, recip_def,
477 square_recip_def, should_insert_square_recip,
478 threshold);
479 }
480
481 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
482 Take as argument the use for (x * x). */
483 static inline void
484 replace_reciprocal_squares (use_operand_p use_p)
485 {
486 gimple *use_stmt = USE_STMT (use_p);
487 basic_block bb = gimple_bb (use_stmt);
488 struct occurrence *occ = (struct occurrence *) bb->aux;
489
490 if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
491 && occ->recip_def)
492 {
493 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
494 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
495 gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
496 SET_USE (use_p, occ->square_recip_def);
497 fold_stmt_inplace (&gsi);
498 update_stmt (use_stmt);
499 }
500 }
501
502
503 /* Replace the division at USE_P with a multiplication by the reciprocal, if
504 possible. */
505
506 static inline void
507 replace_reciprocal (use_operand_p use_p)
508 {
509 gimple *use_stmt = USE_STMT (use_p);
510 basic_block bb = gimple_bb (use_stmt);
511 struct occurrence *occ = (struct occurrence *) bb->aux;
512
513 if (optimize_bb_for_speed_p (bb)
514 && occ->recip_def && use_stmt != occ->recip_def_stmt)
515 {
516 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
517 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
518 SET_USE (use_p, occ->recip_def);
519 fold_stmt_inplace (&gsi);
520 update_stmt (use_stmt);
521 }
522 }
523
524
525 /* Free OCC and return one more "struct occurrence" to be freed. */
526
527 static struct occurrence *
528 free_bb (struct occurrence *occ)
529 {
530 struct occurrence *child, *next;
531
532 /* First get the two pointers hanging off OCC. */
533 next = occ->next;
534 child = occ->children;
535 delete occ;
536
537 /* Now ensure that we don't recurse unless it is necessary. */
538 if (!child)
539 return next;
540 else
541 {
542 while (next)
543 next = free_bb (next);
544
545 return child;
546 }
547 }
548
549 /* Transform sequences like
550 t = sqrt (a)
551 x = 1.0 / t;
552 r1 = x * x;
553 r2 = a * x;
554 into:
555 t = sqrt (a)
556 r1 = 1.0 / a;
557 r2 = t;
558 x = r1 * r2;
559 depending on the uses of x, r1, r2. This removes one multiplication and
560 allows the sqrt and division operations to execute in parallel.
561 DEF_GSI is the gsi of the initial division by sqrt that defines
562 DEF (x in the example above). */
563
564 static void
565 optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
566 {
567 gimple *use_stmt;
568 imm_use_iterator use_iter;
569 gimple *stmt = gsi_stmt (*def_gsi);
570 tree x = def;
571 tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
572 tree div_rhs1 = gimple_assign_rhs1 (stmt);
573
574 if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
575 || TREE_CODE (div_rhs1) != REAL_CST
576 || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
577 return;
578
579 gcall *sqrt_stmt
580 = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
581
582 if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
583 return;
584
585 switch (gimple_call_combined_fn (sqrt_stmt))
586 {
587 CASE_CFN_SQRT:
588 CASE_CFN_SQRT_FN:
589 break;
590
591 default:
592 return;
593 }
594 tree a = gimple_call_arg (sqrt_stmt, 0);
595
596 /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
597
598 /* Statements that use x in x * x. */
599 auto_vec<gimple *> sqr_stmts;
600 /* Statements that use x in a * x. */
601 auto_vec<gimple *> mult_stmts;
602 bool has_other_use = false;
603 bool mult_on_main_path = false;
604
605 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
606 {
607 if (is_gimple_debug (use_stmt))
608 continue;
609 if (is_square_of (use_stmt, x))
610 {
611 sqr_stmts.safe_push (use_stmt);
612 if (gimple_bb (use_stmt) == gimple_bb (stmt))
613 mult_on_main_path = true;
614 }
615 else if (is_mult_by (use_stmt, x, a))
616 {
617 mult_stmts.safe_push (use_stmt);
618 if (gimple_bb (use_stmt) == gimple_bb (stmt))
619 mult_on_main_path = true;
620 }
621 else
622 has_other_use = true;
623 }
624
625 /* In the x * x and a * x cases we just rewire stmt operands or
626 remove multiplications. In the has_other_use case we introduce
627 a multiplication so make sure we don't introduce a multiplication
628 on a path where there was none. */
629 if (has_other_use && !mult_on_main_path)
630 return;
631
632 if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
633 return;
634
635 /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
636 to be able to compose it from the sqr and mult cases. */
637 if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
638 return;
639
640 if (dump_file)
641 {
642 fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
643 print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
644 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
645 fprintf (dump_file, "\n");
646 }
647
648 bool delete_div = !has_other_use;
649 tree sqr_ssa_name = NULL_TREE;
650 if (!sqr_stmts.is_empty ())
651 {
652 /* r1 = x * x. Transform the original
653 x = 1.0 / t
654 into
655 tmp1 = 1.0 / a
656 r1 = tmp1. */
657
658 sqr_ssa_name
659 = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
660
661 if (dump_file)
662 {
663 fprintf (dump_file, "Replacing original division\n");
664 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
665 fprintf (dump_file, "with new division\n");
666 }
667 stmt
668 = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
669 gimple_assign_rhs1 (stmt), a);
670 gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
671 gsi_remove (def_gsi, true);
672 *def_gsi = gsi_for_stmt (stmt);
673 fold_stmt_inplace (def_gsi);
674 update_stmt (stmt);
675
676 if (dump_file)
677 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
678
679 delete_div = false;
680 gimple *sqr_stmt;
681 unsigned int i;
682 FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
683 {
684 gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
685 gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
686 update_stmt (sqr_stmt);
687 }
688 }
689 if (!mult_stmts.is_empty ())
690 {
691 /* r2 = a * x. Transform this into:
692 r2 = t (The original sqrt (a)). */
693 unsigned int i;
694 gimple *mult_stmt = NULL;
695 FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
696 {
697 gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
698
699 if (dump_file)
700 {
701 fprintf (dump_file, "Replacing squaring multiplication\n");
702 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
703 fprintf (dump_file, "with assignment\n");
704 }
705 gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
706 fold_stmt_inplace (&gsi2);
707 update_stmt (mult_stmt);
708 if (dump_file)
709 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
710 }
711 }
712
713 if (has_other_use)
714 {
715 /* Using the two temporaries tmp1, tmp2 from above
716 the original x is now:
717 x = tmp1 * tmp2. */
718 gcc_assert (orig_sqrt_ssa_name);
719 gcc_assert (sqr_ssa_name);
720
721 gimple *new_stmt
722 = gimple_build_assign (x, MULT_EXPR,
723 orig_sqrt_ssa_name, sqr_ssa_name);
724 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
725 update_stmt (stmt);
726 }
727 else if (delete_div)
728 {
729 /* Remove the original division. */
730 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
731 gsi_remove (&gsi2, true);
732 release_defs (stmt);
733 }
734 else
735 release_ssa_name (x);
736 }
737
738 /* Look for floating-point divisions among DEF's uses, and try to
739 replace them by multiplications with the reciprocal. Add
740 as many statements computing the reciprocal as needed.
741
742 DEF must be a GIMPLE register of a floating-point type. */
743
744 static void
745 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
746 {
747 use_operand_p use_p, square_use_p;
748 imm_use_iterator use_iter, square_use_iter;
749 tree square_def;
750 struct occurrence *occ;
751 int count = 0;
752 int threshold;
753 int square_recip_count = 0;
754 int sqrt_recip_count = 0;
755
756 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
757 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
758
759 /* If DEF is a square (x * x), count the number of divisions by x.
760 If there are more divisions by x than by (DEF * DEF), prefer to optimize
761 the reciprocal of x instead of DEF. This improves cases like:
762 def = x * x
763 t0 = a / def
764 t1 = b / def
765 t2 = c / x
766 Reciprocal optimization of x results in 1 division rather than 2 or 3. */
767 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
768
769 if (is_gimple_assign (def_stmt)
770 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
771 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
772 && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
773 {
774 tree op0 = gimple_assign_rhs1 (def_stmt);
775
776 FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
777 {
778 gimple *use_stmt = USE_STMT (use_p);
779 if (is_division_by (use_stmt, op0))
780 sqrt_recip_count++;
781 }
782 }
783
784 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
785 {
786 gimple *use_stmt = USE_STMT (use_p);
787 if (is_division_by (use_stmt, def))
788 {
789 register_division_in (gimple_bb (use_stmt), 2);
790 count++;
791 }
792
793 if (is_square_of (use_stmt, def))
794 {
795 square_def = gimple_assign_lhs (use_stmt);
796 FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
797 {
798 gimple *square_use_stmt = USE_STMT (square_use_p);
799 if (is_division_by (square_use_stmt, square_def))
800 {
801 /* This is executed twice for each division by a square. */
802 register_division_in (gimple_bb (square_use_stmt), 1);
803 square_recip_count++;
804 }
805 }
806 }
807 }
808
809 /* Square reciprocals were counted twice above. */
810 square_recip_count /= 2;
811
812 /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
813 if (sqrt_recip_count > square_recip_count)
814 goto out;
815
816 /* Do the expensive part only if we can hope to optimize something. */
817 if (count + square_recip_count >= threshold && count >= 1)
818 {
819 gimple *use_stmt;
820 for (occ = occ_head; occ; occ = occ->next)
821 {
822 compute_merit (occ);
823 insert_reciprocals (def_gsi, occ, def, NULL, NULL,
824 square_recip_count, threshold);
825 }
826
827 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
828 {
829 if (is_division_by (use_stmt, def))
830 {
831 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
832 replace_reciprocal (use_p);
833 }
834 else if (square_recip_count > 0 && is_square_of (use_stmt, def))
835 {
836 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
837 {
838 /* Find all uses of the square that are divisions and
839 * replace them by multiplications with the inverse. */
840 imm_use_iterator square_iterator;
841 gimple *powmult_use_stmt = USE_STMT (use_p);
842 tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
843
844 FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
845 square_iterator, powmult_def_name)
846 FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
847 {
848 gimple *powmult_use_stmt = USE_STMT (square_use_p);
849 if (is_division_by (powmult_use_stmt, powmult_def_name))
850 replace_reciprocal_squares (square_use_p);
851 }
852 }
853 }
854 }
855 }
856
857 out:
858 for (occ = occ_head; occ; )
859 occ = free_bb (occ);
860
861 occ_head = NULL;
862 }
863
864 /* Return an internal function that implements the reciprocal of CALL,
865 or IFN_LAST if there is no such function that the target supports. */
866
867 internal_fn
868 internal_fn_reciprocal (gcall *call)
869 {
870 internal_fn ifn;
871
872 switch (gimple_call_combined_fn (call))
873 {
874 CASE_CFN_SQRT:
875 CASE_CFN_SQRT_FN:
876 ifn = IFN_RSQRT;
877 break;
878
879 default:
880 return IFN_LAST;
881 }
882
883 tree_pair types = direct_internal_fn_types (ifn, call);
884 if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
885 return IFN_LAST;
886
887 return ifn;
888 }
889
890 /* Go through all the floating-point SSA_NAMEs, and call
891 execute_cse_reciprocals_1 on each of them. */
892 namespace {
893
894 const pass_data pass_data_cse_reciprocals =
895 {
896 GIMPLE_PASS, /* type */
897 "recip", /* name */
898 OPTGROUP_NONE, /* optinfo_flags */
899 TV_TREE_RECIP, /* tv_id */
900 PROP_ssa, /* properties_required */
901 0, /* properties_provided */
902 0, /* properties_destroyed */
903 0, /* todo_flags_start */
904 TODO_update_ssa, /* todo_flags_finish */
905 };
906
907 class pass_cse_reciprocals : public gimple_opt_pass
908 {
909 public:
910 pass_cse_reciprocals (gcc::context *ctxt)
911 : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
912 {}
913
914 /* opt_pass methods: */
915 virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
916 virtual unsigned int execute (function *);
917
918 }; // class pass_cse_reciprocals
919
920 unsigned int
921 pass_cse_reciprocals::execute (function *fun)
922 {
923 basic_block bb;
924 tree arg;
925
926 occ_pool = new object_allocator<occurrence> ("dominators for recip");
927
928 memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
929 calculate_dominance_info (CDI_DOMINATORS);
930 calculate_dominance_info (CDI_POST_DOMINATORS);
931
932 if (flag_checking)
933 FOR_EACH_BB_FN (bb, fun)
934 gcc_assert (!bb->aux);
935
936 for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
937 if (FLOAT_TYPE_P (TREE_TYPE (arg))
938 && is_gimple_reg (arg))
939 {
940 tree name = ssa_default_def (fun, arg);
941 if (name)
942 execute_cse_reciprocals_1 (NULL, name);
943 }
944
945 FOR_EACH_BB_FN (bb, fun)
946 {
947 tree def;
948
949 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
950 gsi_next (&gsi))
951 {
952 gphi *phi = gsi.phi ();
953 def = PHI_RESULT (phi);
954 if (! virtual_operand_p (def)
955 && FLOAT_TYPE_P (TREE_TYPE (def)))
956 execute_cse_reciprocals_1 (NULL, def);
957 }
958
959 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
960 gsi_next (&gsi))
961 {
962 gimple *stmt = gsi_stmt (gsi);
963
964 if (gimple_has_lhs (stmt)
965 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
966 && FLOAT_TYPE_P (TREE_TYPE (def))
967 && TREE_CODE (def) == SSA_NAME)
968 {
969 execute_cse_reciprocals_1 (&gsi, def);
970 stmt = gsi_stmt (gsi);
971 if (flag_unsafe_math_optimizations
972 && is_gimple_assign (stmt)
973 && gimple_assign_lhs (stmt) == def
974 && !stmt_can_throw_internal (cfun, stmt)
975 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
976 optimize_recip_sqrt (&gsi, def);
977 }
978 }
979
980 if (optimize_bb_for_size_p (bb))
981 continue;
982
983 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
984 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
985 gsi_next (&gsi))
986 {
987 gimple *stmt = gsi_stmt (gsi);
988
989 if (is_gimple_assign (stmt)
990 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
991 {
992 tree arg1 = gimple_assign_rhs2 (stmt);
993 gimple *stmt1;
994
995 if (TREE_CODE (arg1) != SSA_NAME)
996 continue;
997
998 stmt1 = SSA_NAME_DEF_STMT (arg1);
999
1000 if (is_gimple_call (stmt1)
1001 && gimple_call_lhs (stmt1))
1002 {
1003 bool fail;
1004 imm_use_iterator ui;
1005 use_operand_p use_p;
1006 tree fndecl = NULL_TREE;
1007
1008 gcall *call = as_a <gcall *> (stmt1);
1009 internal_fn ifn = internal_fn_reciprocal (call);
1010 if (ifn == IFN_LAST)
1011 {
1012 fndecl = gimple_call_fndecl (call);
1013 if (!fndecl
1014 || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1015 continue;
1016 fndecl = targetm.builtin_reciprocal (fndecl);
1017 if (!fndecl)
1018 continue;
1019 }
1020
1021 /* Check that all uses of the SSA name are divisions,
1022 otherwise replacing the defining statement will do
1023 the wrong thing. */
1024 fail = false;
1025 FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1026 {
1027 gimple *stmt2 = USE_STMT (use_p);
1028 if (is_gimple_debug (stmt2))
1029 continue;
1030 if (!is_gimple_assign (stmt2)
1031 || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1032 || gimple_assign_rhs1 (stmt2) == arg1
1033 || gimple_assign_rhs2 (stmt2) != arg1)
1034 {
1035 fail = true;
1036 break;
1037 }
1038 }
1039 if (fail)
1040 continue;
1041
1042 gimple_replace_ssa_lhs (call, arg1);
1043 if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1044 {
1045 auto_vec<tree, 4> args;
1046 for (unsigned int i = 0;
1047 i < gimple_call_num_args (call); i++)
1048 args.safe_push (gimple_call_arg (call, i));
1049 gcall *stmt2;
1050 if (ifn == IFN_LAST)
1051 stmt2 = gimple_build_call_vec (fndecl, args);
1052 else
1053 stmt2 = gimple_build_call_internal_vec (ifn, args);
1054 gimple_call_set_lhs (stmt2, arg1);
1055 gimple_move_vops (stmt2, call);
1056 gimple_call_set_nothrow (stmt2,
1057 gimple_call_nothrow_p (call));
1058 gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1059 gsi_replace (&gsi2, stmt2, true);
1060 }
1061 else
1062 {
1063 if (ifn == IFN_LAST)
1064 gimple_call_set_fndecl (call, fndecl);
1065 else
1066 gimple_call_set_internal_fn (call, ifn);
1067 update_stmt (call);
1068 }
1069 reciprocal_stats.rfuncs_inserted++;
1070
1071 FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1072 {
1073 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1074 gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1075 fold_stmt_inplace (&gsi);
1076 update_stmt (stmt);
1077 }
1078 }
1079 }
1080 }
1081 }
1082
1083 statistics_counter_event (fun, "reciprocal divs inserted",
1084 reciprocal_stats.rdivs_inserted);
1085 statistics_counter_event (fun, "reciprocal functions inserted",
1086 reciprocal_stats.rfuncs_inserted);
1087
1088 free_dominance_info (CDI_DOMINATORS);
1089 free_dominance_info (CDI_POST_DOMINATORS);
1090 delete occ_pool;
1091 return 0;
1092 }
1093
1094 } // anon namespace
1095
1096 gimple_opt_pass *
1097 make_pass_cse_reciprocals (gcc::context *ctxt)
1098 {
1099 return new pass_cse_reciprocals (ctxt);
1100 }
1101
1102 /* Records an occurrence at statement USE_STMT in the vector of trees
1103 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1104 is not yet initialized. Returns true if the occurrence was pushed on
1105 the vector. Adjusts *TOP_BB to be the basic block dominating all
1106 statements in the vector. */
1107
1108 static bool
1109 maybe_record_sincos (vec<gimple *> *stmts,
1110 basic_block *top_bb, gimple *use_stmt)
1111 {
1112 basic_block use_bb = gimple_bb (use_stmt);
1113 if (*top_bb
1114 && (*top_bb == use_bb
1115 || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1116 stmts->safe_push (use_stmt);
1117 else if (!*top_bb
1118 || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1119 {
1120 stmts->safe_push (use_stmt);
1121 *top_bb = use_bb;
1122 }
1123 else
1124 return false;
1125
1126 return true;
1127 }
1128
1129 /* Look for sin, cos and cexpi calls with the same argument NAME and
1130 create a single call to cexpi CSEing the result in this case.
1131 We first walk over all immediate uses of the argument collecting
1132 statements that we can CSE in a vector and in a second pass replace
1133 the statement rhs with a REALPART or IMAGPART expression on the
1134 result of the cexpi call we insert before the use statement that
1135 dominates all other candidates. */
1136
1137 static bool
1138 execute_cse_sincos_1 (tree name)
1139 {
1140 gimple_stmt_iterator gsi;
1141 imm_use_iterator use_iter;
1142 tree fndecl, res, type = NULL_TREE;
1143 gimple *def_stmt, *use_stmt, *stmt;
1144 int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1145 auto_vec<gimple *> stmts;
1146 basic_block top_bb = NULL;
1147 int i;
1148 bool cfg_changed = false;
1149
1150 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1151 {
1152 if (gimple_code (use_stmt) != GIMPLE_CALL
1153 || !gimple_call_lhs (use_stmt))
1154 continue;
1155
1156 switch (gimple_call_combined_fn (use_stmt))
1157 {
1158 CASE_CFN_COS:
1159 seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1160 break;
1161
1162 CASE_CFN_SIN:
1163 seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1164 break;
1165
1166 CASE_CFN_CEXPI:
1167 seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1168 break;
1169
1170 default:;
1171 continue;
1172 }
1173
1174 tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt));
1175 if (!type)
1176 {
1177 type = t;
1178 t = TREE_TYPE (name);
1179 }
1180 /* This checks that NAME has the right type in the first round,
1181 and, in subsequent rounds, that the built_in type is the same
1182 type, or a compatible type. */
1183 if (type != t && !types_compatible_p (type, t))
1184 return false;
1185 }
1186 if (seen_cos + seen_sin + seen_cexpi <= 1)
1187 return false;
1188
1189 /* Simply insert cexpi at the beginning of top_bb but not earlier than
1190 the name def statement. */
1191 fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1192 if (!fndecl)
1193 return false;
1194 stmt = gimple_build_call (fndecl, 1, name);
1195 res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1196 gimple_call_set_lhs (stmt, res);
1197
1198 def_stmt = SSA_NAME_DEF_STMT (name);
1199 if (!SSA_NAME_IS_DEFAULT_DEF (name)
1200 && gimple_code (def_stmt) != GIMPLE_PHI
1201 && gimple_bb (def_stmt) == top_bb)
1202 {
1203 gsi = gsi_for_stmt (def_stmt);
1204 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1205 }
1206 else
1207 {
1208 gsi = gsi_after_labels (top_bb);
1209 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1210 }
1211 sincos_stats.inserted++;
1212
1213 /* And adjust the recorded old call sites. */
1214 for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1215 {
1216 tree rhs = NULL;
1217
1218 switch (gimple_call_combined_fn (use_stmt))
1219 {
1220 CASE_CFN_COS:
1221 rhs = fold_build1 (REALPART_EXPR, type, res);
1222 break;
1223
1224 CASE_CFN_SIN:
1225 rhs = fold_build1 (IMAGPART_EXPR, type, res);
1226 break;
1227
1228 CASE_CFN_CEXPI:
1229 rhs = res;
1230 break;
1231
1232 default:;
1233 gcc_unreachable ();
1234 }
1235
1236 /* Replace call with a copy. */
1237 stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1238
1239 gsi = gsi_for_stmt (use_stmt);
1240 gsi_replace (&gsi, stmt, true);
1241 if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1242 cfg_changed = true;
1243 }
1244
1245 return cfg_changed;
1246 }
1247
1248 /* To evaluate powi(x,n), the floating point value x raised to the
1249 constant integer exponent n, we use a hybrid algorithm that
1250 combines the "window method" with look-up tables. For an
1251 introduction to exponentiation algorithms and "addition chains",
1252 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1253 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1254 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1255 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1256
1257 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1258 multiplications to inline before calling the system library's pow
1259 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1260 so this default never requires calling pow, powf or powl. */
1261
1262 #ifndef POWI_MAX_MULTS
1263 #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1264 #endif
1265
1266 /* The size of the "optimal power tree" lookup table. All
1267 exponents less than this value are simply looked up in the
1268 powi_table below. This threshold is also used to size the
1269 cache of pseudo registers that hold intermediate results. */
1270 #define POWI_TABLE_SIZE 256
1271
1272 /* The size, in bits of the window, used in the "window method"
1273 exponentiation algorithm. This is equivalent to a radix of
1274 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1275 #define POWI_WINDOW_SIZE 3
1276
1277 /* The following table is an efficient representation of an
1278 "optimal power tree". For each value, i, the corresponding
1279 value, j, in the table states than an optimal evaluation
1280 sequence for calculating pow(x,i) can be found by evaluating
1281 pow(x,j)*pow(x,i-j). An optimal power tree for the first
1282 100 integers is given in Knuth's "Seminumerical algorithms". */
1283
1284 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1285 {
1286 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1287 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1288 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1289 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1290 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1291 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1292 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1293 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1294 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1295 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1296 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1297 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1298 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1299 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1300 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1301 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1302 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1303 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1304 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1305 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1306 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1307 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1308 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1309 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1310 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1311 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1312 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1313 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1314 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1315 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1316 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1317 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1318 };
1319
1320
1321 /* Return the number of multiplications required to calculate
1322 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1323 subroutine of powi_cost. CACHE is an array indicating
1324 which exponents have already been calculated. */
1325
1326 static int
1327 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1328 {
1329 /* If we've already calculated this exponent, then this evaluation
1330 doesn't require any additional multiplications. */
1331 if (cache[n])
1332 return 0;
1333
1334 cache[n] = true;
1335 return powi_lookup_cost (n - powi_table[n], cache)
1336 + powi_lookup_cost (powi_table[n], cache) + 1;
1337 }
1338
1339 /* Return the number of multiplications required to calculate
1340 powi(x,n) for an arbitrary x, given the exponent N. This
1341 function needs to be kept in sync with powi_as_mults below. */
1342
1343 static int
1344 powi_cost (HOST_WIDE_INT n)
1345 {
1346 bool cache[POWI_TABLE_SIZE];
1347 unsigned HOST_WIDE_INT digit;
1348 unsigned HOST_WIDE_INT val;
1349 int result;
1350
1351 if (n == 0)
1352 return 0;
1353
1354 /* Ignore the reciprocal when calculating the cost. */
1355 val = (n < 0) ? -n : n;
1356
1357 /* Initialize the exponent cache. */
1358 memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1359 cache[1] = true;
1360
1361 result = 0;
1362
1363 while (val >= POWI_TABLE_SIZE)
1364 {
1365 if (val & 1)
1366 {
1367 digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1368 result += powi_lookup_cost (digit, cache)
1369 + POWI_WINDOW_SIZE + 1;
1370 val >>= POWI_WINDOW_SIZE;
1371 }
1372 else
1373 {
1374 val >>= 1;
1375 result++;
1376 }
1377 }
1378
1379 return result + powi_lookup_cost (val, cache);
1380 }
1381
1382 /* Recursive subroutine of powi_as_mults. This function takes the
1383 array, CACHE, of already calculated exponents and an exponent N and
1384 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1385
1386 static tree
1387 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1388 HOST_WIDE_INT n, tree *cache)
1389 {
1390 tree op0, op1, ssa_target;
1391 unsigned HOST_WIDE_INT digit;
1392 gassign *mult_stmt;
1393
1394 if (n < POWI_TABLE_SIZE && cache[n])
1395 return cache[n];
1396
1397 ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1398
1399 if (n < POWI_TABLE_SIZE)
1400 {
1401 cache[n] = ssa_target;
1402 op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1403 op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1404 }
1405 else if (n & 1)
1406 {
1407 digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1408 op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1409 op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1410 }
1411 else
1412 {
1413 op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1414 op1 = op0;
1415 }
1416
1417 mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1418 gimple_set_location (mult_stmt, loc);
1419 gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1420
1421 return ssa_target;
1422 }
1423
1424 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1425 This function needs to be kept in sync with powi_cost above. */
1426
1427 static tree
1428 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1429 tree arg0, HOST_WIDE_INT n)
1430 {
1431 tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1432 gassign *div_stmt;
1433 tree target;
1434
1435 if (n == 0)
1436 return build_real (type, dconst1);
1437
1438 memset (cache, 0, sizeof (cache));
1439 cache[1] = arg0;
1440
1441 result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1442 if (n >= 0)
1443 return result;
1444
1445 /* If the original exponent was negative, reciprocate the result. */
1446 target = make_temp_ssa_name (type, NULL, "powmult");
1447 div_stmt = gimple_build_assign (target, RDIV_EXPR,
1448 build_real (type, dconst1), result);
1449 gimple_set_location (div_stmt, loc);
1450 gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1451
1452 return target;
1453 }
1454
1455 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1456 location info LOC. If the arguments are appropriate, create an
1457 equivalent sequence of statements prior to GSI using an optimal
1458 number of multiplications, and return an expession holding the
1459 result. */
1460
1461 static tree
1462 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1463 tree arg0, HOST_WIDE_INT n)
1464 {
1465 /* Avoid largest negative number. */
1466 if (n != -n
1467 && ((n >= -1 && n <= 2)
1468 || (optimize_function_for_speed_p (cfun)
1469 && powi_cost (n) <= POWI_MAX_MULTS)))
1470 return powi_as_mults (gsi, loc, arg0, n);
1471
1472 return NULL_TREE;
1473 }
1474
1475 /* Build a gimple call statement that calls FN with argument ARG.
1476 Set the lhs of the call statement to a fresh SSA name. Insert the
1477 statement prior to GSI's current position, and return the fresh
1478 SSA name. */
1479
1480 static tree
1481 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1482 tree fn, tree arg)
1483 {
1484 gcall *call_stmt;
1485 tree ssa_target;
1486
1487 call_stmt = gimple_build_call (fn, 1, arg);
1488 ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1489 gimple_set_lhs (call_stmt, ssa_target);
1490 gimple_set_location (call_stmt, loc);
1491 gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1492
1493 return ssa_target;
1494 }
1495
1496 /* Build a gimple binary operation with the given CODE and arguments
1497 ARG0, ARG1, assigning the result to a new SSA name for variable
1498 TARGET. Insert the statement prior to GSI's current position, and
1499 return the fresh SSA name.*/
1500
1501 static tree
1502 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1503 const char *name, enum tree_code code,
1504 tree arg0, tree arg1)
1505 {
1506 tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1507 gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1508 gimple_set_location (stmt, loc);
1509 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1510 return result;
1511 }
1512
1513 /* Build a gimple reference operation with the given CODE and argument
1514 ARG, assigning the result to a new SSA name of TYPE with NAME.
1515 Insert the statement prior to GSI's current position, and return
1516 the fresh SSA name. */
1517
1518 static inline tree
1519 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1520 const char *name, enum tree_code code, tree arg0)
1521 {
1522 tree result = make_temp_ssa_name (type, NULL, name);
1523 gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1524 gimple_set_location (stmt, loc);
1525 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1526 return result;
1527 }
1528
1529 /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1530 prior to GSI's current position, and return the fresh SSA name. */
1531
1532 static tree
1533 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1534 tree type, tree val)
1535 {
1536 tree result = make_ssa_name (type);
1537 gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1538 gimple_set_location (stmt, loc);
1539 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1540 return result;
1541 }
1542
1543 struct pow_synth_sqrt_info
1544 {
1545 bool *factors;
1546 unsigned int deepest;
1547 unsigned int num_mults;
1548 };
1549
1550 /* Return true iff the real value C can be represented as a
1551 sum of powers of 0.5 up to N. That is:
1552 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1553 Record in INFO the various parameters of the synthesis algorithm such
1554 as the factors a[i], the maximum 0.5 power and the number of
1555 multiplications that will be required. */
1556
1557 bool
1558 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1559 struct pow_synth_sqrt_info *info)
1560 {
1561 REAL_VALUE_TYPE factor = dconsthalf;
1562 REAL_VALUE_TYPE remainder = c;
1563
1564 info->deepest = 0;
1565 info->num_mults = 0;
1566 memset (info->factors, 0, n * sizeof (bool));
1567
1568 for (unsigned i = 0; i < n; i++)
1569 {
1570 REAL_VALUE_TYPE res;
1571
1572 /* If something inexact happened bail out now. */
1573 if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1574 return false;
1575
1576 /* We have hit zero. The number is representable as a sum
1577 of powers of 0.5. */
1578 if (real_equal (&res, &dconst0))
1579 {
1580 info->factors[i] = true;
1581 info->deepest = i + 1;
1582 return true;
1583 }
1584 else if (!REAL_VALUE_NEGATIVE (res))
1585 {
1586 remainder = res;
1587 info->factors[i] = true;
1588 info->num_mults++;
1589 }
1590 else
1591 info->factors[i] = false;
1592
1593 real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1594 }
1595 return false;
1596 }
1597
1598 /* Return the tree corresponding to FN being applied
1599 to ARG N times at GSI and LOC.
1600 Look up previous results from CACHE if need be.
1601 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1602
1603 static tree
1604 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1605 tree fn, location_t loc, tree *cache)
1606 {
1607 tree res = cache[n];
1608 if (!res)
1609 {
1610 tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1611 res = build_and_insert_call (gsi, loc, fn, prev);
1612 cache[n] = res;
1613 }
1614
1615 return res;
1616 }
1617
1618 /* Print to STREAM the repeated application of function FNAME to ARG
1619 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1620 "foo (foo (x))". */
1621
1622 static void
1623 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1624 unsigned int n)
1625 {
1626 if (n == 0)
1627 fprintf (stream, "%s", arg);
1628 else
1629 {
1630 fprintf (stream, "%s (", fname);
1631 print_nested_fn (stream, fname, arg, n - 1);
1632 fprintf (stream, ")");
1633 }
1634 }
1635
1636 /* Print to STREAM the fractional sequence of sqrt chains
1637 applied to ARG, described by INFO. Used for the dump file. */
1638
1639 static void
1640 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1641 struct pow_synth_sqrt_info *info)
1642 {
1643 for (unsigned int i = 0; i < info->deepest; i++)
1644 {
1645 bool is_set = info->factors[i];
1646 if (is_set)
1647 {
1648 print_nested_fn (stream, "sqrt", arg, i + 1);
1649 if (i != info->deepest - 1)
1650 fprintf (stream, " * ");
1651 }
1652 }
1653 }
1654
1655 /* Print to STREAM a representation of raising ARG to an integer
1656 power N. Used for the dump file. */
1657
1658 static void
1659 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1660 {
1661 if (n > 1)
1662 fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1663 else if (n == 1)
1664 fprintf (stream, "%s", arg);
1665 }
1666
1667 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1668 square roots. Place at GSI and LOC. Limit the maximum depth
1669 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1670 result of the expanded sequence or NULL_TREE if the expansion failed.
1671
1672 This routine assumes that ARG1 is a real number with a fractional part
1673 (the integer exponent case will have been handled earlier in
1674 gimple_expand_builtin_pow).
1675
1676 For ARG1 > 0.0:
1677 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1678 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1679 FRAC_PART == ARG1 - WHOLE_PART:
1680 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1681 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1682 if it can be expressed as such, that is if FRAC_PART satisfies:
1683 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1684 where integer a[i] is either 0 or 1.
1685
1686 Example:
1687 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1688 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1689
1690 For ARG1 < 0.0 there are two approaches:
1691 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1692 is calculated as above.
1693
1694 Example:
1695 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1696 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1697
1698 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1699 FRAC_PART := ARG1 - WHOLE_PART
1700 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1701 Example:
1702 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1703 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1704
1705 For ARG1 < 0.0 we choose between (A) and (B) depending on
1706 how many multiplications we'd have to do.
1707 So, for the example in (B): POW (x, -5.875), if we were to
1708 follow algorithm (A) we would produce:
1709 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1710 which contains more multiplications than approach (B).
1711
1712 Hopefully, this approach will eliminate potentially expensive POW library
1713 calls when unsafe floating point math is enabled and allow the compiler to
1714 further optimise the multiplies, square roots and divides produced by this
1715 function. */
1716
1717 static tree
1718 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1719 tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1720 {
1721 tree type = TREE_TYPE (arg0);
1722 machine_mode mode = TYPE_MODE (type);
1723 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1724 bool one_over = true;
1725
1726 if (!sqrtfn)
1727 return NULL_TREE;
1728
1729 if (TREE_CODE (arg1) != REAL_CST)
1730 return NULL_TREE;
1731
1732 REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1733
1734 gcc_assert (max_depth > 0);
1735 tree *cache = XALLOCAVEC (tree, max_depth + 1);
1736
1737 struct pow_synth_sqrt_info synth_info;
1738 synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1739 synth_info.deepest = 0;
1740 synth_info.num_mults = 0;
1741
1742 bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1743 REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1744
1745 /* The whole and fractional parts of exp. */
1746 REAL_VALUE_TYPE whole_part;
1747 REAL_VALUE_TYPE frac_part;
1748
1749 real_floor (&whole_part, mode, &exp);
1750 real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1751
1752
1753 REAL_VALUE_TYPE ceil_whole = dconst0;
1754 REAL_VALUE_TYPE ceil_fract = dconst0;
1755
1756 if (neg_exp)
1757 {
1758 real_ceil (&ceil_whole, mode, &exp);
1759 real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1760 }
1761
1762 if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1763 return NULL_TREE;
1764
1765 /* Check whether it's more profitable to not use 1.0 / ... */
1766 if (neg_exp)
1767 {
1768 struct pow_synth_sqrt_info alt_synth_info;
1769 alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1770 alt_synth_info.deepest = 0;
1771 alt_synth_info.num_mults = 0;
1772
1773 if (representable_as_half_series_p (ceil_fract, max_depth,
1774 &alt_synth_info)
1775 && alt_synth_info.deepest <= synth_info.deepest
1776 && alt_synth_info.num_mults < synth_info.num_mults)
1777 {
1778 whole_part = ceil_whole;
1779 frac_part = ceil_fract;
1780 synth_info.deepest = alt_synth_info.deepest;
1781 synth_info.num_mults = alt_synth_info.num_mults;
1782 memcpy (synth_info.factors, alt_synth_info.factors,
1783 (max_depth + 1) * sizeof (bool));
1784 one_over = false;
1785 }
1786 }
1787
1788 HOST_WIDE_INT n = real_to_integer (&whole_part);
1789 REAL_VALUE_TYPE cint;
1790 real_from_integer (&cint, VOIDmode, n, SIGNED);
1791
1792 if (!real_identical (&whole_part, &cint))
1793 return NULL_TREE;
1794
1795 if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1796 return NULL_TREE;
1797
1798 memset (cache, 0, (max_depth + 1) * sizeof (tree));
1799
1800 tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1801
1802 /* Calculate the integer part of the exponent. */
1803 if (n > 1)
1804 {
1805 integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1806 if (!integer_res)
1807 return NULL_TREE;
1808 }
1809
1810 if (dump_file)
1811 {
1812 char string[64];
1813
1814 real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1815 fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1816
1817 if (neg_exp)
1818 {
1819 if (one_over)
1820 {
1821 fprintf (dump_file, "1.0 / (");
1822 dump_integer_part (dump_file, "x", n);
1823 if (n > 0)
1824 fprintf (dump_file, " * ");
1825 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1826 fprintf (dump_file, ")");
1827 }
1828 else
1829 {
1830 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1831 fprintf (dump_file, " / (");
1832 dump_integer_part (dump_file, "x", n);
1833 fprintf (dump_file, ")");
1834 }
1835 }
1836 else
1837 {
1838 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1839 if (n > 0)
1840 fprintf (dump_file, " * ");
1841 dump_integer_part (dump_file, "x", n);
1842 }
1843
1844 fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1845 }
1846
1847
1848 tree fract_res = NULL_TREE;
1849 cache[0] = arg0;
1850
1851 /* Calculate the fractional part of the exponent. */
1852 for (unsigned i = 0; i < synth_info.deepest; i++)
1853 {
1854 if (synth_info.factors[i])
1855 {
1856 tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1857
1858 if (!fract_res)
1859 fract_res = sqrt_chain;
1860
1861 else
1862 fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1863 fract_res, sqrt_chain);
1864 }
1865 }
1866
1867 tree res = NULL_TREE;
1868
1869 if (neg_exp)
1870 {
1871 if (one_over)
1872 {
1873 if (n > 0)
1874 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1875 fract_res, integer_res);
1876 else
1877 res = fract_res;
1878
1879 res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1880 build_real (type, dconst1), res);
1881 }
1882 else
1883 {
1884 res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1885 fract_res, integer_res);
1886 }
1887 }
1888 else
1889 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1890 fract_res, integer_res);
1891 return res;
1892 }
1893
1894 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1895 with location info LOC. If possible, create an equivalent and
1896 less expensive sequence of statements prior to GSI, and return an
1897 expession holding the result. */
1898
1899 static tree
1900 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1901 tree arg0, tree arg1)
1902 {
1903 REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
1904 REAL_VALUE_TYPE c2, dconst3;
1905 HOST_WIDE_INT n;
1906 tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
1907 machine_mode mode;
1908 bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
1909 bool hw_sqrt_exists, c_is_int, c2_is_int;
1910
1911 dconst1_4 = dconst1;
1912 SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1913
1914 /* If the exponent isn't a constant, there's nothing of interest
1915 to be done. */
1916 if (TREE_CODE (arg1) != REAL_CST)
1917 return NULL_TREE;
1918
1919 /* Don't perform the operation if flag_signaling_nans is on
1920 and the operand is a signaling NaN. */
1921 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
1922 && ((TREE_CODE (arg0) == REAL_CST
1923 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
1924 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
1925 return NULL_TREE;
1926
1927 /* If the exponent is equivalent to an integer, expand to an optimal
1928 multiplication sequence when profitable. */
1929 c = TREE_REAL_CST (arg1);
1930 n = real_to_integer (&c);
1931 real_from_integer (&cint, VOIDmode, n, SIGNED);
1932 c_is_int = real_identical (&c, &cint);
1933
1934 if (c_is_int
1935 && ((n >= -1 && n <= 2)
1936 || (flag_unsafe_math_optimizations
1937 && speed_p
1938 && powi_cost (n) <= POWI_MAX_MULTS)))
1939 return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1940
1941 /* Attempt various optimizations using sqrt and cbrt. */
1942 type = TREE_TYPE (arg0);
1943 mode = TYPE_MODE (type);
1944 sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1945
1946 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
1947 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
1948 sqrt(-0) = -0. */
1949 if (sqrtfn
1950 && real_equal (&c, &dconsthalf)
1951 && !HONOR_SIGNED_ZEROS (mode))
1952 return build_and_insert_call (gsi, loc, sqrtfn, arg0);
1953
1954 hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1955
1956 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
1957 optimizations since 1./3. is not exactly representable. If x
1958 is negative and finite, the correct value of pow(x,1./3.) is
1959 a NaN with the "invalid" exception raised, because the value
1960 of 1./3. actually has an even denominator. The correct value
1961 of cbrt(x) is a negative real value. */
1962 cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1963 dconst1_3 = real_value_truncate (mode, dconst_third ());
1964
1965 if (flag_unsafe_math_optimizations
1966 && cbrtfn
1967 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1968 && real_equal (&c, &dconst1_3))
1969 return build_and_insert_call (gsi, loc, cbrtfn, arg0);
1970
1971 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
1972 if we don't have a hardware sqrt insn. */
1973 dconst1_6 = dconst1_3;
1974 SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1975
1976 if (flag_unsafe_math_optimizations
1977 && sqrtfn
1978 && cbrtfn
1979 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1980 && speed_p
1981 && hw_sqrt_exists
1982 && real_equal (&c, &dconst1_6))
1983 {
1984 /* sqrt(x) */
1985 sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1986
1987 /* cbrt(sqrt(x)) */
1988 return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
1989 }
1990
1991
1992 /* Attempt to expand the POW as a product of square root chains.
1993 Expand the 0.25 case even when otpimising for size. */
1994 if (flag_unsafe_math_optimizations
1995 && sqrtfn
1996 && hw_sqrt_exists
1997 && (speed_p || real_equal (&c, &dconst1_4))
1998 && !HONOR_SIGNED_ZEROS (mode))
1999 {
2000 unsigned int max_depth = speed_p
2001 ? param_max_pow_sqrt_depth
2002 : 2;
2003
2004 tree expand_with_sqrts
2005 = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2006
2007 if (expand_with_sqrts)
2008 return expand_with_sqrts;
2009 }
2010
2011 real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2012 n = real_to_integer (&c2);
2013 real_from_integer (&cint, VOIDmode, n, SIGNED);
2014 c2_is_int = real_identical (&c2, &cint);
2015
2016 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2017
2018 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2019 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2020
2021 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2022 different from pow(x, 1./3.) due to rounding and behavior with
2023 negative x, we need to constrain this transformation to unsafe
2024 math and positive x or finite math. */
2025 real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2026 real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2027 real_round (&c2, mode, &c2);
2028 n = real_to_integer (&c2);
2029 real_from_integer (&cint, VOIDmode, n, SIGNED);
2030 real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2031 real_convert (&c2, mode, &c2);
2032
2033 if (flag_unsafe_math_optimizations
2034 && cbrtfn
2035 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2036 && real_identical (&c2, &c)
2037 && !c2_is_int
2038 && optimize_function_for_speed_p (cfun)
2039 && powi_cost (n / 3) <= POWI_MAX_MULTS)
2040 {
2041 tree powi_x_ndiv3 = NULL_TREE;
2042
2043 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2044 possible or profitable, give up. Skip the degenerate case when
2045 abs(n) < 3, where the result is always 1. */
2046 if (absu_hwi (n) >= 3)
2047 {
2048 powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2049 abs_hwi (n / 3));
2050 if (!powi_x_ndiv3)
2051 return NULL_TREE;
2052 }
2053
2054 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2055 as that creates an unnecessary variable. Instead, just produce
2056 either cbrt(x) or cbrt(x) * cbrt(x). */
2057 cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2058
2059 if (absu_hwi (n) % 3 == 1)
2060 powi_cbrt_x = cbrt_x;
2061 else
2062 powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2063 cbrt_x, cbrt_x);
2064
2065 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2066 if (absu_hwi (n) < 3)
2067 result = powi_cbrt_x;
2068 else
2069 result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2070 powi_x_ndiv3, powi_cbrt_x);
2071
2072 /* If n is negative, reciprocate the result. */
2073 if (n < 0)
2074 result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2075 build_real (type, dconst1), result);
2076
2077 return result;
2078 }
2079
2080 /* No optimizations succeeded. */
2081 return NULL_TREE;
2082 }
2083
2084 /* ARG is the argument to a cabs builtin call in GSI with location info
2085 LOC. Create a sequence of statements prior to GSI that calculates
2086 sqrt(R*R + I*I), where R and I are the real and imaginary components
2087 of ARG, respectively. Return an expression holding the result. */
2088
2089 static tree
2090 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2091 {
2092 tree real_part, imag_part, addend1, addend2, sum, result;
2093 tree type = TREE_TYPE (TREE_TYPE (arg));
2094 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2095 machine_mode mode = TYPE_MODE (type);
2096
2097 if (!flag_unsafe_math_optimizations
2098 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2099 || !sqrtfn
2100 || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2101 return NULL_TREE;
2102
2103 real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2104 REALPART_EXPR, arg);
2105 addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2106 real_part, real_part);
2107 imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2108 IMAGPART_EXPR, arg);
2109 addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2110 imag_part, imag_part);
2111 sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2112 result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2113
2114 return result;
2115 }
2116
2117 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2118 on the SSA_NAME argument of each of them. Also expand powi(x,n) into
2119 an optimal number of multiplies, when n is a constant. */
2120
2121 namespace {
2122
2123 const pass_data pass_data_cse_sincos =
2124 {
2125 GIMPLE_PASS, /* type */
2126 "sincos", /* name */
2127 OPTGROUP_NONE, /* optinfo_flags */
2128 TV_TREE_SINCOS, /* tv_id */
2129 PROP_ssa, /* properties_required */
2130 PROP_gimple_opt_math, /* properties_provided */
2131 0, /* properties_destroyed */
2132 0, /* todo_flags_start */
2133 TODO_update_ssa, /* todo_flags_finish */
2134 };
2135
2136 class pass_cse_sincos : public gimple_opt_pass
2137 {
2138 public:
2139 pass_cse_sincos (gcc::context *ctxt)
2140 : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2141 {}
2142
2143 /* opt_pass methods: */
2144 virtual bool gate (function *)
2145 {
2146 /* We no longer require either sincos or cexp, since powi expansion
2147 piggybacks on this pass. */
2148 return optimize;
2149 }
2150
2151 virtual unsigned int execute (function *);
2152
2153 }; // class pass_cse_sincos
2154
2155 unsigned int
2156 pass_cse_sincos::execute (function *fun)
2157 {
2158 basic_block bb;
2159 bool cfg_changed = false;
2160
2161 calculate_dominance_info (CDI_DOMINATORS);
2162 memset (&sincos_stats, 0, sizeof (sincos_stats));
2163
2164 FOR_EACH_BB_FN (bb, fun)
2165 {
2166 gimple_stmt_iterator gsi;
2167 bool cleanup_eh = false;
2168
2169 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2170 {
2171 gimple *stmt = gsi_stmt (gsi);
2172
2173 /* Only the last stmt in a bb could throw, no need to call
2174 gimple_purge_dead_eh_edges if we change something in the middle
2175 of a basic block. */
2176 cleanup_eh = false;
2177
2178 if (is_gimple_call (stmt)
2179 && gimple_call_lhs (stmt))
2180 {
2181 tree arg, arg0, arg1, result;
2182 HOST_WIDE_INT n;
2183 location_t loc;
2184
2185 switch (gimple_call_combined_fn (stmt))
2186 {
2187 CASE_CFN_COS:
2188 CASE_CFN_SIN:
2189 CASE_CFN_CEXPI:
2190 arg = gimple_call_arg (stmt, 0);
2191 /* Make sure we have either sincos or cexp. */
2192 if (!targetm.libc_has_function (function_c99_math_complex,
2193 TREE_TYPE (arg))
2194 && !targetm.libc_has_function (function_sincos,
2195 TREE_TYPE (arg)))
2196 break;
2197
2198 if (TREE_CODE (arg) == SSA_NAME)
2199 cfg_changed |= execute_cse_sincos_1 (arg);
2200 break;
2201
2202 CASE_CFN_POW:
2203 arg0 = gimple_call_arg (stmt, 0);
2204 arg1 = gimple_call_arg (stmt, 1);
2205
2206 loc = gimple_location (stmt);
2207 result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2208
2209 if (result)
2210 {
2211 tree lhs = gimple_get_lhs (stmt);
2212 gassign *new_stmt = gimple_build_assign (lhs, result);
2213 gimple_set_location (new_stmt, loc);
2214 unlink_stmt_vdef (stmt);
2215 gsi_replace (&gsi, new_stmt, true);
2216 cleanup_eh = true;
2217 if (gimple_vdef (stmt))
2218 release_ssa_name (gimple_vdef (stmt));
2219 }
2220 break;
2221
2222 CASE_CFN_POWI:
2223 arg0 = gimple_call_arg (stmt, 0);
2224 arg1 = gimple_call_arg (stmt, 1);
2225 loc = gimple_location (stmt);
2226
2227 if (real_minus_onep (arg0))
2228 {
2229 tree t0, t1, cond, one, minus_one;
2230 gassign *stmt;
2231
2232 t0 = TREE_TYPE (arg0);
2233 t1 = TREE_TYPE (arg1);
2234 one = build_real (t0, dconst1);
2235 minus_one = build_real (t0, dconstm1);
2236
2237 cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2238 stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2239 arg1, build_int_cst (t1, 1));
2240 gimple_set_location (stmt, loc);
2241 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2242
2243 result = make_temp_ssa_name (t0, NULL, "powi");
2244 stmt = gimple_build_assign (result, COND_EXPR, cond,
2245 minus_one, one);
2246 gimple_set_location (stmt, loc);
2247 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2248 }
2249 else
2250 {
2251 if (!tree_fits_shwi_p (arg1))
2252 break;
2253
2254 n = tree_to_shwi (arg1);
2255 result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2256 }
2257
2258 if (result)
2259 {
2260 tree lhs = gimple_get_lhs (stmt);
2261 gassign *new_stmt = gimple_build_assign (lhs, result);
2262 gimple_set_location (new_stmt, loc);
2263 unlink_stmt_vdef (stmt);
2264 gsi_replace (&gsi, new_stmt, true);
2265 cleanup_eh = true;
2266 if (gimple_vdef (stmt))
2267 release_ssa_name (gimple_vdef (stmt));
2268 }
2269 break;
2270
2271 CASE_CFN_CABS:
2272 arg0 = gimple_call_arg (stmt, 0);
2273 loc = gimple_location (stmt);
2274 result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2275
2276 if (result)
2277 {
2278 tree lhs = gimple_get_lhs (stmt);
2279 gassign *new_stmt = gimple_build_assign (lhs, result);
2280 gimple_set_location (new_stmt, loc);
2281 unlink_stmt_vdef (stmt);
2282 gsi_replace (&gsi, new_stmt, true);
2283 cleanup_eh = true;
2284 if (gimple_vdef (stmt))
2285 release_ssa_name (gimple_vdef (stmt));
2286 }
2287 break;
2288
2289 default:;
2290 }
2291 }
2292 }
2293 if (cleanup_eh)
2294 cfg_changed |= gimple_purge_dead_eh_edges (bb);
2295 }
2296
2297 statistics_counter_event (fun, "sincos statements inserted",
2298 sincos_stats.inserted);
2299
2300 return cfg_changed ? TODO_cleanup_cfg : 0;
2301 }
2302
2303 } // anon namespace
2304
2305 gimple_opt_pass *
2306 make_pass_cse_sincos (gcc::context *ctxt)
2307 {
2308 return new pass_cse_sincos (ctxt);
2309 }
2310
2311 /* Return true if stmt is a type conversion operation that can be stripped
2312 when used in a widening multiply operation. */
2313 static bool
2314 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2315 {
2316 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2317
2318 if (TREE_CODE (result_type) == INTEGER_TYPE)
2319 {
2320 tree op_type;
2321 tree inner_op_type;
2322
2323 if (!CONVERT_EXPR_CODE_P (rhs_code))
2324 return false;
2325
2326 op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2327
2328 /* If the type of OP has the same precision as the result, then
2329 we can strip this conversion. The multiply operation will be
2330 selected to create the correct extension as a by-product. */
2331 if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2332 return true;
2333
2334 /* We can also strip a conversion if it preserves the signed-ness of
2335 the operation and doesn't narrow the range. */
2336 inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2337
2338 /* If the inner-most type is unsigned, then we can strip any
2339 intermediate widening operation. If it's signed, then the
2340 intermediate widening operation must also be signed. */
2341 if ((TYPE_UNSIGNED (inner_op_type)
2342 || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2343 && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2344 return true;
2345
2346 return false;
2347 }
2348
2349 return rhs_code == FIXED_CONVERT_EXPR;
2350 }
2351
2352 /* Return true if RHS is a suitable operand for a widening multiplication,
2353 assuming a target type of TYPE.
2354 There are two cases:
2355
2356 - RHS makes some value at least twice as wide. Store that value
2357 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2358
2359 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2360 but leave *TYPE_OUT untouched. */
2361
2362 static bool
2363 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2364 tree *new_rhs_out)
2365 {
2366 gimple *stmt;
2367 tree type1, rhs1;
2368
2369 if (TREE_CODE (rhs) == SSA_NAME)
2370 {
2371 stmt = SSA_NAME_DEF_STMT (rhs);
2372 if (is_gimple_assign (stmt))
2373 {
2374 if (! widening_mult_conversion_strippable_p (type, stmt))
2375 rhs1 = rhs;
2376 else
2377 {
2378 rhs1 = gimple_assign_rhs1 (stmt);
2379
2380 if (TREE_CODE (rhs1) == INTEGER_CST)
2381 {
2382 *new_rhs_out = rhs1;
2383 *type_out = NULL;
2384 return true;
2385 }
2386 }
2387 }
2388 else
2389 rhs1 = rhs;
2390
2391 type1 = TREE_TYPE (rhs1);
2392
2393 if (TREE_CODE (type1) != TREE_CODE (type)
2394 || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2395 return false;
2396
2397 *new_rhs_out = rhs1;
2398 *type_out = type1;
2399 return true;
2400 }
2401
2402 if (TREE_CODE (rhs) == INTEGER_CST)
2403 {
2404 *new_rhs_out = rhs;
2405 *type_out = NULL;
2406 return true;
2407 }
2408
2409 return false;
2410 }
2411
2412 /* Return true if STMT performs a widening multiplication, assuming the
2413 output type is TYPE. If so, store the unwidened types of the operands
2414 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2415 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2416 and *TYPE2_OUT would give the operands of the multiplication. */
2417
2418 static bool
2419 is_widening_mult_p (gimple *stmt,
2420 tree *type1_out, tree *rhs1_out,
2421 tree *type2_out, tree *rhs2_out)
2422 {
2423 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2424
2425 if (TREE_CODE (type) == INTEGER_TYPE)
2426 {
2427 if (TYPE_OVERFLOW_TRAPS (type))
2428 return false;
2429 }
2430 else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2431 return false;
2432
2433 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2434 rhs1_out))
2435 return false;
2436
2437 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2438 rhs2_out))
2439 return false;
2440
2441 if (*type1_out == NULL)
2442 {
2443 if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2444 return false;
2445 *type1_out = *type2_out;
2446 }
2447
2448 if (*type2_out == NULL)
2449 {
2450 if (!int_fits_type_p (*rhs2_out, *type1_out))
2451 return false;
2452 *type2_out = *type1_out;
2453 }
2454
2455 /* Ensure that the larger of the two operands comes first. */
2456 if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2457 {
2458 std::swap (*type1_out, *type2_out);
2459 std::swap (*rhs1_out, *rhs2_out);
2460 }
2461
2462 return true;
2463 }
2464
2465 /* Check to see if the CALL statement is an invocation of copysign
2466 with 1. being the first argument. */
2467 static bool
2468 is_copysign_call_with_1 (gimple *call)
2469 {
2470 gcall *c = dyn_cast <gcall *> (call);
2471 if (! c)
2472 return false;
2473
2474 enum combined_fn code = gimple_call_combined_fn (c);
2475
2476 if (code == CFN_LAST)
2477 return false;
2478
2479 if (builtin_fn_p (code))
2480 {
2481 switch (as_builtin_fn (code))
2482 {
2483 CASE_FLT_FN (BUILT_IN_COPYSIGN):
2484 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2485 return real_onep (gimple_call_arg (c, 0));
2486 default:
2487 return false;
2488 }
2489 }
2490
2491 if (internal_fn_p (code))
2492 {
2493 switch (as_internal_fn (code))
2494 {
2495 case IFN_COPYSIGN:
2496 return real_onep (gimple_call_arg (c, 0));
2497 default:
2498 return false;
2499 }
2500 }
2501
2502 return false;
2503 }
2504
2505 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2506 This only happens when the xorsign optab is defined, if the
2507 pattern is not a xorsign pattern or if expansion fails FALSE is
2508 returned, otherwise TRUE is returned. */
2509 static bool
2510 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2511 {
2512 tree treeop0, treeop1, lhs, type;
2513 location_t loc = gimple_location (stmt);
2514 lhs = gimple_assign_lhs (stmt);
2515 treeop0 = gimple_assign_rhs1 (stmt);
2516 treeop1 = gimple_assign_rhs2 (stmt);
2517 type = TREE_TYPE (lhs);
2518 machine_mode mode = TYPE_MODE (type);
2519
2520 if (HONOR_SNANS (type))
2521 return false;
2522
2523 if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2524 {
2525 gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2526 if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2527 {
2528 call0 = SSA_NAME_DEF_STMT (treeop1);
2529 if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2530 return false;
2531
2532 treeop1 = treeop0;
2533 }
2534 if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2535 return false;
2536
2537 gcall *c = as_a<gcall*> (call0);
2538 treeop0 = gimple_call_arg (c, 1);
2539
2540 gcall *call_stmt
2541 = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2542 gimple_set_lhs (call_stmt, lhs);
2543 gimple_set_location (call_stmt, loc);
2544 gsi_replace (gsi, call_stmt, true);
2545 return true;
2546 }
2547
2548 return false;
2549 }
2550
2551 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2552 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2553 value is true iff we converted the statement. */
2554
2555 static bool
2556 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2557 {
2558 tree lhs, rhs1, rhs2, type, type1, type2;
2559 enum insn_code handler;
2560 scalar_int_mode to_mode, from_mode, actual_mode;
2561 optab op;
2562 int actual_precision;
2563 location_t loc = gimple_location (stmt);
2564 bool from_unsigned1, from_unsigned2;
2565
2566 lhs = gimple_assign_lhs (stmt);
2567 type = TREE_TYPE (lhs);
2568 if (TREE_CODE (type) != INTEGER_TYPE)
2569 return false;
2570
2571 if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2572 return false;
2573
2574 to_mode = SCALAR_INT_TYPE_MODE (type);
2575 from_mode = SCALAR_INT_TYPE_MODE (type1);
2576 if (to_mode == from_mode)
2577 return false;
2578
2579 from_unsigned1 = TYPE_UNSIGNED (type1);
2580 from_unsigned2 = TYPE_UNSIGNED (type2);
2581
2582 if (from_unsigned1 && from_unsigned2)
2583 op = umul_widen_optab;
2584 else if (!from_unsigned1 && !from_unsigned2)
2585 op = smul_widen_optab;
2586 else
2587 op = usmul_widen_optab;
2588
2589 handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2590 &actual_mode);
2591
2592 if (handler == CODE_FOR_nothing)
2593 {
2594 if (op != smul_widen_optab)
2595 {
2596 /* We can use a signed multiply with unsigned types as long as
2597 there is a wider mode to use, or it is the smaller of the two
2598 types that is unsigned. Note that type1 >= type2, always. */
2599 if ((TYPE_UNSIGNED (type1)
2600 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2601 || (TYPE_UNSIGNED (type2)
2602 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2603 {
2604 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2605 || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2606 return false;
2607 }
2608
2609 op = smul_widen_optab;
2610 handler = find_widening_optab_handler_and_mode (op, to_mode,
2611 from_mode,
2612 &actual_mode);
2613
2614 if (handler == CODE_FOR_nothing)
2615 return false;
2616
2617 from_unsigned1 = from_unsigned2 = false;
2618 }
2619 else
2620 return false;
2621 }
2622
2623 /* Ensure that the inputs to the handler are in the correct precison
2624 for the opcode. This will be the full mode size. */
2625 actual_precision = GET_MODE_PRECISION (actual_mode);
2626 if (2 * actual_precision > TYPE_PRECISION (type))
2627 return false;
2628 if (actual_precision != TYPE_PRECISION (type1)
2629 || from_unsigned1 != TYPE_UNSIGNED (type1))
2630 rhs1 = build_and_insert_cast (gsi, loc,
2631 build_nonstandard_integer_type
2632 (actual_precision, from_unsigned1), rhs1);
2633 if (actual_precision != TYPE_PRECISION (type2)
2634 || from_unsigned2 != TYPE_UNSIGNED (type2))
2635 rhs2 = build_and_insert_cast (gsi, loc,
2636 build_nonstandard_integer_type
2637 (actual_precision, from_unsigned2), rhs2);
2638
2639 /* Handle constants. */
2640 if (TREE_CODE (rhs1) == INTEGER_CST)
2641 rhs1 = fold_convert (type1, rhs1);
2642 if (TREE_CODE (rhs2) == INTEGER_CST)
2643 rhs2 = fold_convert (type2, rhs2);
2644
2645 gimple_assign_set_rhs1 (stmt, rhs1);
2646 gimple_assign_set_rhs2 (stmt, rhs2);
2647 gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2648 update_stmt (stmt);
2649 widen_mul_stats.widen_mults_inserted++;
2650 return true;
2651 }
2652
2653 /* Process a single gimple statement STMT, which is found at the
2654 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2655 rhs (given by CODE), and try to convert it into a
2656 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2657 is true iff we converted the statement. */
2658
2659 static bool
2660 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2661 enum tree_code code)
2662 {
2663 gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2664 gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2665 tree type, type1, type2, optype;
2666 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2667 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2668 optab this_optab;
2669 enum tree_code wmult_code;
2670 enum insn_code handler;
2671 scalar_mode to_mode, from_mode, actual_mode;
2672 location_t loc = gimple_location (stmt);
2673 int actual_precision;
2674 bool from_unsigned1, from_unsigned2;
2675
2676 lhs = gimple_assign_lhs (stmt);
2677 type = TREE_TYPE (lhs);
2678 if (TREE_CODE (type) != INTEGER_TYPE
2679 && TREE_CODE (type) != FIXED_POINT_TYPE)
2680 return false;
2681
2682 if (code == MINUS_EXPR)
2683 wmult_code = WIDEN_MULT_MINUS_EXPR;
2684 else
2685 wmult_code = WIDEN_MULT_PLUS_EXPR;
2686
2687 rhs1 = gimple_assign_rhs1 (stmt);
2688 rhs2 = gimple_assign_rhs2 (stmt);
2689
2690 if (TREE_CODE (rhs1) == SSA_NAME)
2691 {
2692 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2693 if (is_gimple_assign (rhs1_stmt))
2694 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2695 }
2696
2697 if (TREE_CODE (rhs2) == SSA_NAME)
2698 {
2699 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2700 if (is_gimple_assign (rhs2_stmt))
2701 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2702 }
2703
2704 /* Allow for one conversion statement between the multiply
2705 and addition/subtraction statement. If there are more than
2706 one conversions then we assume they would invalidate this
2707 transformation. If that's not the case then they should have
2708 been folded before now. */
2709 if (CONVERT_EXPR_CODE_P (rhs1_code))
2710 {
2711 conv1_stmt = rhs1_stmt;
2712 rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2713 if (TREE_CODE (rhs1) == SSA_NAME)
2714 {
2715 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2716 if (is_gimple_assign (rhs1_stmt))
2717 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2718 }
2719 else
2720 return false;
2721 }
2722 if (CONVERT_EXPR_CODE_P (rhs2_code))
2723 {
2724 conv2_stmt = rhs2_stmt;
2725 rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2726 if (TREE_CODE (rhs2) == SSA_NAME)
2727 {
2728 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2729 if (is_gimple_assign (rhs2_stmt))
2730 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2731 }
2732 else
2733 return false;
2734 }
2735
2736 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2737 is_widening_mult_p, but we still need the rhs returns.
2738
2739 It might also appear that it would be sufficient to use the existing
2740 operands of the widening multiply, but that would limit the choice of
2741 multiply-and-accumulate instructions.
2742
2743 If the widened-multiplication result has more than one uses, it is
2744 probably wiser not to do the conversion. Also restrict this operation
2745 to single basic block to avoid moving the multiply to a different block
2746 with a higher execution frequency. */
2747 if (code == PLUS_EXPR
2748 && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2749 {
2750 if (!has_single_use (rhs1)
2751 || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2752 || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2753 &type2, &mult_rhs2))
2754 return false;
2755 add_rhs = rhs2;
2756 conv_stmt = conv1_stmt;
2757 }
2758 else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2759 {
2760 if (!has_single_use (rhs2)
2761 || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2762 || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2763 &type2, &mult_rhs2))
2764 return false;
2765 add_rhs = rhs1;
2766 conv_stmt = conv2_stmt;
2767 }
2768 else
2769 return false;
2770
2771 to_mode = SCALAR_TYPE_MODE (type);
2772 from_mode = SCALAR_TYPE_MODE (type1);
2773 if (to_mode == from_mode)
2774 return false;
2775
2776 from_unsigned1 = TYPE_UNSIGNED (type1);
2777 from_unsigned2 = TYPE_UNSIGNED (type2);
2778 optype = type1;
2779
2780 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2781 if (from_unsigned1 != from_unsigned2)
2782 {
2783 if (!INTEGRAL_TYPE_P (type))
2784 return false;
2785 /* We can use a signed multiply with unsigned types as long as
2786 there is a wider mode to use, or it is the smaller of the two
2787 types that is unsigned. Note that type1 >= type2, always. */
2788 if ((from_unsigned1
2789 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2790 || (from_unsigned2
2791 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2792 {
2793 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2794 || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2795 return false;
2796 }
2797
2798 from_unsigned1 = from_unsigned2 = false;
2799 optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2800 false);
2801 }
2802
2803 /* If there was a conversion between the multiply and addition
2804 then we need to make sure it fits a multiply-and-accumulate.
2805 The should be a single mode change which does not change the
2806 value. */
2807 if (conv_stmt)
2808 {
2809 /* We use the original, unmodified data types for this. */
2810 tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2811 tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2812 int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2813 bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2814
2815 if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2816 {
2817 /* Conversion is a truncate. */
2818 if (TYPE_PRECISION (to_type) < data_size)
2819 return false;
2820 }
2821 else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2822 {
2823 /* Conversion is an extend. Check it's the right sort. */
2824 if (TYPE_UNSIGNED (from_type) != is_unsigned
2825 && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2826 return false;
2827 }
2828 /* else convert is a no-op for our purposes. */
2829 }
2830
2831 /* Verify that the machine can perform a widening multiply
2832 accumulate in this mode/signedness combination, otherwise
2833 this transformation is likely to pessimize code. */
2834 this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2835 handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2836 from_mode, &actual_mode);
2837
2838 if (handler == CODE_FOR_nothing)
2839 return false;
2840
2841 /* Ensure that the inputs to the handler are in the correct precison
2842 for the opcode. This will be the full mode size. */
2843 actual_precision = GET_MODE_PRECISION (actual_mode);
2844 if (actual_precision != TYPE_PRECISION (type1)
2845 || from_unsigned1 != TYPE_UNSIGNED (type1))
2846 mult_rhs1 = build_and_insert_cast (gsi, loc,
2847 build_nonstandard_integer_type
2848 (actual_precision, from_unsigned1),
2849 mult_rhs1);
2850 if (actual_precision != TYPE_PRECISION (type2)
2851 || from_unsigned2 != TYPE_UNSIGNED (type2))
2852 mult_rhs2 = build_and_insert_cast (gsi, loc,
2853 build_nonstandard_integer_type
2854 (actual_precision, from_unsigned2),
2855 mult_rhs2);
2856
2857 if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2858 add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2859
2860 /* Handle constants. */
2861 if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2862 mult_rhs1 = fold_convert (type1, mult_rhs1);
2863 if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2864 mult_rhs2 = fold_convert (type2, mult_rhs2);
2865
2866 gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2867 add_rhs);
2868 update_stmt (gsi_stmt (*gsi));
2869 widen_mul_stats.maccs_inserted++;
2870 return true;
2871 }
2872
2873 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
2874 OP2 and which we know is used in statements that can be, together with the
2875 multiplication, converted to FMAs, perform the transformation. */
2876
2877 static void
2878 convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
2879 {
2880 tree type = TREE_TYPE (mul_result);
2881 gimple *use_stmt;
2882 imm_use_iterator imm_iter;
2883 gcall *fma_stmt;
2884
2885 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
2886 {
2887 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
2888 tree addop, mulop1 = op1, result = mul_result;
2889 bool negate_p = false;
2890 gimple_seq seq = NULL;
2891
2892 if (is_gimple_debug (use_stmt))
2893 continue;
2894
2895 if (is_gimple_assign (use_stmt)
2896 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
2897 {
2898 result = gimple_assign_lhs (use_stmt);
2899 use_operand_p use_p;
2900 gimple *neguse_stmt;
2901 single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
2902 gsi_remove (&gsi, true);
2903 release_defs (use_stmt);
2904
2905 use_stmt = neguse_stmt;
2906 gsi = gsi_for_stmt (use_stmt);
2907 negate_p = true;
2908 }
2909
2910 tree cond, else_value, ops[3];
2911 tree_code code;
2912 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
2913 ops, &else_value))
2914 gcc_unreachable ();
2915 addop = ops[0] == result ? ops[1] : ops[0];
2916
2917 if (code == MINUS_EXPR)
2918 {
2919 if (ops[0] == result)
2920 /* a * b - c -> a * b + (-c) */
2921 addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
2922 else
2923 /* a - b * c -> (-b) * c + a */
2924 negate_p = !negate_p;
2925 }
2926
2927 if (negate_p)
2928 mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
2929
2930 if (seq)
2931 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2932
2933 if (cond)
2934 fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
2935 op2, addop, else_value);
2936 else
2937 fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
2938 gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
2939 gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
2940 use_stmt));
2941 gsi_replace (&gsi, fma_stmt, true);
2942 /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
2943 regardless of where the negation occurs. */
2944 gimple *orig_stmt = gsi_stmt (gsi);
2945 if (fold_stmt (&gsi, follow_all_ssa_edges))
2946 {
2947 if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
2948 gcc_unreachable ();
2949 update_stmt (gsi_stmt (gsi));
2950 }
2951
2952 if (dump_file && (dump_flags & TDF_DETAILS))
2953 {
2954 fprintf (dump_file, "Generated FMA ");
2955 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
2956 fprintf (dump_file, "\n");
2957 }
2958
2959 /* If the FMA result is negated in a single use, fold the negation
2960 too. */
2961 orig_stmt = gsi_stmt (gsi);
2962 use_operand_p use_p;
2963 gimple *neg_stmt;
2964 if (is_gimple_call (orig_stmt)
2965 && gimple_call_internal_p (orig_stmt)
2966 && gimple_call_lhs (orig_stmt)
2967 && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
2968 && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
2969 && is_gimple_assign (neg_stmt)
2970 && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
2971 && !stmt_could_throw_p (cfun, neg_stmt))
2972 {
2973 gsi = gsi_for_stmt (neg_stmt);
2974 if (fold_stmt (&gsi, follow_all_ssa_edges))
2975 {
2976 if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
2977 gcc_unreachable ();
2978 update_stmt (gsi_stmt (gsi));
2979 if (dump_file && (dump_flags & TDF_DETAILS))
2980 {
2981 fprintf (dump_file, "Folded FMA negation ");
2982 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
2983 fprintf (dump_file, "\n");
2984 }
2985 }
2986 }
2987
2988 widen_mul_stats.fmas_inserted++;
2989 }
2990 }
2991
2992 /* Data necessary to perform the actual transformation from a multiplication
2993 and an addition to an FMA after decision is taken it should be done and to
2994 then delete the multiplication statement from the function IL. */
2995
2996 struct fma_transformation_info
2997 {
2998 gimple *mul_stmt;
2999 tree mul_result;
3000 tree op1;
3001 tree op2;
3002 };
3003
3004 /* Structure containing the current state of FMA deferring, i.e. whether we are
3005 deferring, whether to continue deferring, and all data necessary to come
3006 back and perform all deferred transformations. */
3007
3008 class fma_deferring_state
3009 {
3010 public:
3011 /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3012 do any deferring. */
3013
3014 fma_deferring_state (bool perform_deferring)
3015 : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3016 m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3017
3018 /* List of FMA candidates for which we the transformation has been determined
3019 possible but we at this point in BB analysis we do not consider them
3020 beneficial. */
3021 auto_vec<fma_transformation_info, 8> m_candidates;
3022
3023 /* Set of results of multiplication that are part of an already deferred FMA
3024 candidates. */
3025 hash_set<tree> m_mul_result_set;
3026
3027 /* The PHI that supposedly feeds back result of a FMA to another over loop
3028 boundary. */
3029 gphi *m_initial_phi;
3030
3031 /* Result of the last produced FMA candidate or NULL if there has not been
3032 one. */
3033 tree m_last_result;
3034
3035 /* If true, deferring might still be profitable. If false, transform all
3036 candidates and no longer defer. */
3037 bool m_deferring_p;
3038 };
3039
3040 /* Transform all deferred FMA candidates and mark STATE as no longer
3041 deferring. */
3042
3043 static void
3044 cancel_fma_deferring (fma_deferring_state *state)
3045 {
3046 if (!state->m_deferring_p)
3047 return;
3048
3049 for (unsigned i = 0; i < state->m_candidates.length (); i++)
3050 {
3051 if (dump_file && (dump_flags & TDF_DETAILS))
3052 fprintf (dump_file, "Generating deferred FMA\n");
3053
3054 const fma_transformation_info &fti = state->m_candidates[i];
3055 convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3056
3057 gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3058 gsi_remove (&gsi, true);
3059 release_defs (fti.mul_stmt);
3060 }
3061 state->m_deferring_p = false;
3062 }
3063
3064 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3065 Otherwise return NULL. */
3066
3067 static gphi *
3068 result_of_phi (tree op)
3069 {
3070 if (TREE_CODE (op) != SSA_NAME)
3071 return NULL;
3072
3073 return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3074 }
3075
3076 /* After processing statements of a BB and recording STATE, return true if the
3077 initial phi is fed by the last FMA candidate result ore one such result from
3078 previously processed BBs marked in LAST_RESULT_SET. */
3079
3080 static bool
3081 last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3082 hash_set<tree> *last_result_set)
3083 {
3084 ssa_op_iter iter;
3085 use_operand_p use;
3086 FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3087 {
3088 tree t = USE_FROM_PTR (use);
3089 if (t == state->m_last_result
3090 || last_result_set->contains (t))
3091 return true;
3092 }
3093
3094 return false;
3095 }
3096
3097 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3098 with uses in additions and subtractions to form fused multiply-add
3099 operations. Returns true if successful and MUL_STMT should be removed.
3100 If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3101 on MUL_COND, otherwise it is unconditional.
3102
3103 If STATE indicates that we are deferring FMA transformation, that means
3104 that we do not produce FMAs for basic blocks which look like:
3105
3106 <bb 6>
3107 # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3108 _65 = _14 * _16;
3109 accumulator_66 = _65 + accumulator_111;
3110
3111 or its unrolled version, i.e. with several FMA candidates that feed result
3112 of one into the addend of another. Instead, we add them to a list in STATE
3113 and if we later discover an FMA candidate that is not part of such a chain,
3114 we go back and perform all deferred past candidates. */
3115
3116 static bool
3117 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3118 fma_deferring_state *state, tree mul_cond = NULL_TREE)
3119 {
3120 tree mul_result = gimple_get_lhs (mul_stmt);
3121 tree type = TREE_TYPE (mul_result);
3122 gimple *use_stmt, *neguse_stmt;
3123 use_operand_p use_p;
3124 imm_use_iterator imm_iter;
3125
3126 if (FLOAT_TYPE_P (type)
3127 && flag_fp_contract_mode == FP_CONTRACT_OFF)
3128 return false;
3129
3130 /* We don't want to do bitfield reduction ops. */
3131 if (INTEGRAL_TYPE_P (type)
3132 && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3133 return false;
3134
3135 /* If the target doesn't support it, don't generate it. We assume that
3136 if fma isn't available then fms, fnma or fnms are not either. */
3137 optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3138 if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3139 return false;
3140
3141 /* If the multiplication has zero uses, it is kept around probably because
3142 of -fnon-call-exceptions. Don't optimize it away in that case,
3143 it is DCE job. */
3144 if (has_zero_uses (mul_result))
3145 return false;
3146
3147 bool check_defer
3148 = (state->m_deferring_p
3149 && (tree_to_shwi (TYPE_SIZE (type))
3150 <= param_avoid_fma_max_bits));
3151 bool defer = check_defer;
3152 bool seen_negate_p = false;
3153 /* Make sure that the multiplication statement becomes dead after
3154 the transformation, thus that all uses are transformed to FMAs.
3155 This means we assume that an FMA operation has the same cost
3156 as an addition. */
3157 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3158 {
3159 tree result = mul_result;
3160 bool negate_p = false;
3161
3162 use_stmt = USE_STMT (use_p);
3163
3164 if (is_gimple_debug (use_stmt))
3165 continue;
3166
3167 /* For now restrict this operations to single basic blocks. In theory
3168 we would want to support sinking the multiplication in
3169 m = a*b;
3170 if ()
3171 ma = m + c;
3172 else
3173 d = m;
3174 to form a fma in the then block and sink the multiplication to the
3175 else block. */
3176 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3177 return false;
3178
3179 /* A negate on the multiplication leads to FNMA. */
3180 if (is_gimple_assign (use_stmt)
3181 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3182 {
3183 ssa_op_iter iter;
3184 use_operand_p usep;
3185
3186 /* If (due to earlier missed optimizations) we have two
3187 negates of the same value, treat them as equivalent
3188 to a single negate with multiple uses. */
3189 if (seen_negate_p)
3190 return false;
3191
3192 result = gimple_assign_lhs (use_stmt);
3193
3194 /* Make sure the negate statement becomes dead with this
3195 single transformation. */
3196 if (!single_imm_use (gimple_assign_lhs (use_stmt),
3197 &use_p, &neguse_stmt))
3198 return false;
3199
3200 /* Make sure the multiplication isn't also used on that stmt. */
3201 FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3202 if (USE_FROM_PTR (usep) == mul_result)
3203 return false;
3204
3205 /* Re-validate. */
3206 use_stmt = neguse_stmt;
3207 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3208 return false;
3209
3210 negate_p = seen_negate_p = true;
3211 }
3212
3213 tree cond, else_value, ops[3];
3214 tree_code code;
3215 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3216 &else_value))
3217 return false;
3218
3219 switch (code)
3220 {
3221 case MINUS_EXPR:
3222 if (ops[1] == result)
3223 negate_p = !negate_p;
3224 break;
3225 case PLUS_EXPR:
3226 break;
3227 default:
3228 /* FMA can only be formed from PLUS and MINUS. */
3229 return false;
3230 }
3231
3232 if (mul_cond && cond != mul_cond)
3233 return false;
3234
3235 if (cond)
3236 {
3237 if (cond == result || else_value == result)
3238 return false;
3239 if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type))
3240 return false;
3241 }
3242
3243 /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3244 we'll visit later, we might be able to get a more profitable
3245 match with fnma.
3246 OTOH, if we don't, a negate / fma pair has likely lower latency
3247 that a mult / subtract pair. */
3248 if (code == MINUS_EXPR
3249 && !negate_p
3250 && ops[0] == result
3251 && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3252 && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3253 && TREE_CODE (ops[1]) == SSA_NAME
3254 && has_single_use (ops[1]))
3255 {
3256 gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3257 if (is_gimple_assign (stmt2)
3258 && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3259 return false;
3260 }
3261
3262 /* We can't handle a * b + a * b. */
3263 if (ops[0] == ops[1])
3264 return false;
3265 /* If deferring, make sure we are not looking at an instruction that
3266 wouldn't have existed if we were not. */
3267 if (state->m_deferring_p
3268 && (state->m_mul_result_set.contains (ops[0])
3269 || state->m_mul_result_set.contains (ops[1])))
3270 return false;
3271
3272 if (check_defer)
3273 {
3274 tree use_lhs = gimple_get_lhs (use_stmt);
3275 if (state->m_last_result)
3276 {
3277 if (ops[1] == state->m_last_result
3278 || ops[0] == state->m_last_result)
3279 defer = true;
3280 else
3281 defer = false;
3282 }
3283 else
3284 {
3285 gcc_checking_assert (!state->m_initial_phi);
3286 gphi *phi;
3287 if (ops[0] == result)
3288 phi = result_of_phi (ops[1]);
3289 else
3290 {
3291 gcc_assert (ops[1] == result);
3292 phi = result_of_phi (ops[0]);
3293 }
3294
3295 if (phi)
3296 {
3297 state->m_initial_phi = phi;
3298 defer = true;
3299 }
3300 else
3301 defer = false;
3302 }
3303
3304 state->m_last_result = use_lhs;
3305 check_defer = false;
3306 }
3307 else
3308 defer = false;
3309
3310 /* While it is possible to validate whether or not the exact form that
3311 we've recognized is available in the backend, the assumption is that
3312 if the deferring logic above did not trigger, the transformation is
3313 never a loss. For instance, suppose the target only has the plain FMA
3314 pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3315 MUL+SUB for FMA+NEG, which is still two operations. Consider
3316 -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3317 form the two NEGs are independent and could be run in parallel. */
3318 }
3319
3320 if (defer)
3321 {
3322 fma_transformation_info fti;
3323 fti.mul_stmt = mul_stmt;
3324 fti.mul_result = mul_result;
3325 fti.op1 = op1;
3326 fti.op2 = op2;
3327 state->m_candidates.safe_push (fti);
3328 state->m_mul_result_set.add (mul_result);
3329
3330 if (dump_file && (dump_flags & TDF_DETAILS))
3331 {
3332 fprintf (dump_file, "Deferred generating FMA for multiplication ");
3333 print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3334 fprintf (dump_file, "\n");
3335 }
3336
3337 return false;
3338 }
3339 else
3340 {
3341 if (state->m_deferring_p)
3342 cancel_fma_deferring (state);
3343 convert_mult_to_fma_1 (mul_result, op1, op2);
3344 return true;
3345 }
3346 }
3347
3348
3349 /* Helper function of match_uaddsub_overflow. Return 1
3350 if USE_STMT is unsigned overflow check ovf != 0 for
3351 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3352 and 0 otherwise. */
3353
3354 static int
3355 uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt)
3356 {
3357 enum tree_code ccode = ERROR_MARK;
3358 tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3359 if (gimple_code (use_stmt) == GIMPLE_COND)
3360 {
3361 ccode = gimple_cond_code (use_stmt);
3362 crhs1 = gimple_cond_lhs (use_stmt);
3363 crhs2 = gimple_cond_rhs (use_stmt);
3364 }
3365 else if (is_gimple_assign (use_stmt))
3366 {
3367 if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3368 {
3369 ccode = gimple_assign_rhs_code (use_stmt);
3370 crhs1 = gimple_assign_rhs1 (use_stmt);
3371 crhs2 = gimple_assign_rhs2 (use_stmt);
3372 }
3373 else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
3374 {
3375 tree cond = gimple_assign_rhs1 (use_stmt);
3376 if (COMPARISON_CLASS_P (cond))
3377 {
3378 ccode = TREE_CODE (cond);
3379 crhs1 = TREE_OPERAND (cond, 0);
3380 crhs2 = TREE_OPERAND (cond, 1);
3381 }
3382 else
3383 return 0;
3384 }
3385 else
3386 return 0;
3387 }
3388 else
3389 return 0;
3390
3391 if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3392 return 0;
3393
3394 enum tree_code code = gimple_assign_rhs_code (stmt);
3395 tree lhs = gimple_assign_lhs (stmt);
3396 tree rhs1 = gimple_assign_rhs1 (stmt);
3397 tree rhs2 = gimple_assign_rhs2 (stmt);
3398
3399 switch (ccode)
3400 {
3401 case GT_EXPR:
3402 case LE_EXPR:
3403 /* r = a - b; r > a or r <= a
3404 r = a + b; a > r or a <= r or b > r or b <= r. */
3405 if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3406 || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3407 && crhs2 == lhs))
3408 return ccode == GT_EXPR ? 1 : -1;
3409 break;
3410 case LT_EXPR:
3411 case GE_EXPR:
3412 /* r = a - b; a < r or a >= r
3413 r = a + b; r < a or r >= a or r < b or r >= b. */
3414 if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3415 || (code == PLUS_EXPR && crhs1 == lhs
3416 && (crhs2 == rhs1 || crhs2 == rhs2)))
3417 return ccode == LT_EXPR ? 1 : -1;
3418 break;
3419 default:
3420 break;
3421 }
3422 return 0;
3423 }
3424
3425 /* Recognize for unsigned x
3426 x = y - z;
3427 if (x > y)
3428 where there are other uses of x and replace it with
3429 _7 = SUB_OVERFLOW (y, z);
3430 x = REALPART_EXPR <_7>;
3431 _8 = IMAGPART_EXPR <_7>;
3432 if (_8)
3433 and similarly for addition. */
3434
3435 static bool
3436 match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3437 enum tree_code code)
3438 {
3439 tree lhs = gimple_assign_lhs (stmt);
3440 tree type = TREE_TYPE (lhs);
3441 use_operand_p use_p;
3442 imm_use_iterator iter;
3443 bool use_seen = false;
3444 bool ovf_use_seen = false;
3445 gimple *use_stmt;
3446
3447 gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
3448 if (!INTEGRAL_TYPE_P (type)
3449 || !TYPE_UNSIGNED (type)
3450 || has_zero_uses (lhs)
3451 || has_single_use (lhs)
3452 || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
3453 TYPE_MODE (type)) == CODE_FOR_nothing)
3454 return false;
3455
3456 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3457 {
3458 use_stmt = USE_STMT (use_p);
3459 if (is_gimple_debug (use_stmt))
3460 continue;
3461
3462 if (uaddsub_overflow_check_p (stmt, use_stmt))
3463 ovf_use_seen = true;
3464 else
3465 use_seen = true;
3466 if (ovf_use_seen && use_seen)
3467 break;
3468 }
3469
3470 if (!ovf_use_seen || !use_seen)
3471 return false;
3472
3473 tree ctype = build_complex_type (type);
3474 tree rhs1 = gimple_assign_rhs1 (stmt);
3475 tree rhs2 = gimple_assign_rhs2 (stmt);
3476 gcall *g = gimple_build_call_internal (code == PLUS_EXPR
3477 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
3478 2, rhs1, rhs2);
3479 tree ctmp = make_ssa_name (ctype);
3480 gimple_call_set_lhs (g, ctmp);
3481 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3482 gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR,
3483 build1 (REALPART_EXPR, type, ctmp));
3484 gsi_replace (gsi, g2, true);
3485 tree ovf = make_ssa_name (type);
3486 g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
3487 build1 (IMAGPART_EXPR, type, ctmp));
3488 gsi_insert_after (gsi, g2, GSI_NEW_STMT);
3489
3490 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3491 {
3492 if (is_gimple_debug (use_stmt))
3493 continue;
3494
3495 int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt);
3496 if (ovf_use == 0)
3497 continue;
3498 if (gimple_code (use_stmt) == GIMPLE_COND)
3499 {
3500 gcond *cond_stmt = as_a <gcond *> (use_stmt);
3501 gimple_cond_set_lhs (cond_stmt, ovf);
3502 gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
3503 gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3504 }
3505 else
3506 {
3507 gcc_checking_assert (is_gimple_assign (use_stmt));
3508 if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3509 {
3510 gimple_assign_set_rhs1 (use_stmt, ovf);
3511 gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
3512 gimple_assign_set_rhs_code (use_stmt,
3513 ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3514 }
3515 else
3516 {
3517 gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
3518 == COND_EXPR);
3519 tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
3520 boolean_type_node, ovf,
3521 build_int_cst (type, 0));
3522 gimple_assign_set_rhs1 (use_stmt, cond);
3523 }
3524 }
3525 update_stmt (use_stmt);
3526 }
3527 return true;
3528 }
3529
3530 /* Return true if target has support for divmod. */
3531
3532 static bool
3533 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
3534 {
3535 /* If target supports hardware divmod insn, use it for divmod. */
3536 if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
3537 return true;
3538
3539 /* Check if libfunc for divmod is available. */
3540 rtx libfunc = optab_libfunc (divmod_optab, mode);
3541 if (libfunc != NULL_RTX)
3542 {
3543 /* If optab_handler exists for div_optab, perhaps in a wider mode,
3544 we don't want to use the libfunc even if it exists for given mode. */
3545 machine_mode div_mode;
3546 FOR_EACH_MODE_FROM (div_mode, mode)
3547 if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
3548 return false;
3549
3550 return targetm.expand_divmod_libfunc != NULL;
3551 }
3552
3553 return false;
3554 }
3555
3556 /* Check if stmt is candidate for divmod transform. */
3557
3558 static bool
3559 divmod_candidate_p (gassign *stmt)
3560 {
3561 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
3562 machine_mode mode = TYPE_MODE (type);
3563 optab divmod_optab, div_optab;
3564
3565 if (TYPE_UNSIGNED (type))
3566 {
3567 divmod_optab = udivmod_optab;
3568 div_optab = udiv_optab;
3569 }
3570 else
3571 {
3572 divmod_optab = sdivmod_optab;
3573 div_optab = sdiv_optab;
3574 }
3575
3576 tree op1 = gimple_assign_rhs1 (stmt);
3577 tree op2 = gimple_assign_rhs2 (stmt);
3578
3579 /* Disable the transform if either is a constant, since division-by-constant
3580 may have specialized expansion. */
3581 if (CONSTANT_CLASS_P (op1))
3582 return false;
3583
3584 if (CONSTANT_CLASS_P (op2))
3585 {
3586 if (integer_pow2p (op2))
3587 return false;
3588
3589 if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
3590 && TYPE_PRECISION (type) <= BITS_PER_WORD)
3591 return false;
3592
3593 /* If the divisor is not power of 2 and the precision wider than
3594 HWI, expand_divmod punts on that, so in that case it is better
3595 to use divmod optab or libfunc. Similarly if choose_multiplier
3596 might need pre/post shifts of BITS_PER_WORD or more. */
3597 }
3598
3599 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
3600 expand using the [su]divv optabs. */
3601 if (TYPE_OVERFLOW_TRAPS (type))
3602 return false;
3603
3604 if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
3605 return false;
3606
3607 return true;
3608 }
3609
3610 /* This function looks for:
3611 t1 = a TRUNC_DIV_EXPR b;
3612 t2 = a TRUNC_MOD_EXPR b;
3613 and transforms it to the following sequence:
3614 complex_tmp = DIVMOD (a, b);
3615 t1 = REALPART_EXPR(a);
3616 t2 = IMAGPART_EXPR(b);
3617 For conditions enabling the transform see divmod_candidate_p().
3618
3619 The pass has three parts:
3620 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
3621 other trunc_div_expr and trunc_mod_expr stmts.
3622 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
3623 to stmts vector.
3624 3) Insert DIVMOD call just before top_stmt and update entries in
3625 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
3626 IMAGPART_EXPR for mod). */
3627
3628 static bool
3629 convert_to_divmod (gassign *stmt)
3630 {
3631 if (stmt_can_throw_internal (cfun, stmt)
3632 || !divmod_candidate_p (stmt))
3633 return false;
3634
3635 tree op1 = gimple_assign_rhs1 (stmt);
3636 tree op2 = gimple_assign_rhs2 (stmt);
3637
3638 imm_use_iterator use_iter;
3639 gimple *use_stmt;
3640 auto_vec<gimple *> stmts;
3641
3642 gimple *top_stmt = stmt;
3643 basic_block top_bb = gimple_bb (stmt);
3644
3645 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
3646 at-least stmt and possibly other trunc_div/trunc_mod stmts
3647 having same operands as stmt. */
3648
3649 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
3650 {
3651 if (is_gimple_assign (use_stmt)
3652 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
3653 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
3654 && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
3655 && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
3656 {
3657 if (stmt_can_throw_internal (cfun, use_stmt))
3658 continue;
3659
3660 basic_block bb = gimple_bb (use_stmt);
3661
3662 if (bb == top_bb)
3663 {
3664 if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
3665 top_stmt = use_stmt;
3666 }
3667 else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
3668 {
3669 top_bb = bb;
3670 top_stmt = use_stmt;
3671 }
3672 }
3673 }
3674
3675 tree top_op1 = gimple_assign_rhs1 (top_stmt);
3676 tree top_op2 = gimple_assign_rhs2 (top_stmt);
3677
3678 stmts.safe_push (top_stmt);
3679 bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
3680
3681 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
3682 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
3683 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
3684 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
3685
3686 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
3687 {
3688 if (is_gimple_assign (use_stmt)
3689 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
3690 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
3691 && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
3692 && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
3693 {
3694 if (use_stmt == top_stmt
3695 || stmt_can_throw_internal (cfun, use_stmt)
3696 || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
3697 continue;
3698
3699 stmts.safe_push (use_stmt);
3700 if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
3701 div_seen = true;
3702 }
3703 }
3704
3705 if (!div_seen)
3706 return false;
3707
3708 /* Part 3: Create libcall to internal fn DIVMOD:
3709 divmod_tmp = DIVMOD (op1, op2). */
3710
3711 gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
3712 tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
3713 call_stmt, "divmod_tmp");
3714 gimple_call_set_lhs (call_stmt, res);
3715 /* We rejected throwing statements above. */
3716 gimple_call_set_nothrow (call_stmt, true);
3717
3718 /* Insert the call before top_stmt. */
3719 gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
3720 gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
3721
3722 widen_mul_stats.divmod_calls_inserted++;
3723
3724 /* Update all statements in stmts vector:
3725 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
3726 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
3727
3728 for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
3729 {
3730 tree new_rhs;
3731
3732 switch (gimple_assign_rhs_code (use_stmt))
3733 {
3734 case TRUNC_DIV_EXPR:
3735 new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
3736 break;
3737
3738 case TRUNC_MOD_EXPR:
3739 new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
3740 break;
3741
3742 default:
3743 gcc_unreachable ();
3744 }
3745
3746 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3747 gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
3748 update_stmt (use_stmt);
3749 }
3750
3751 return true;
3752 }
3753
3754 /* Find integer multiplications where the operands are extended from
3755 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
3756 where appropriate. */
3757
3758 namespace {
3759
3760 const pass_data pass_data_optimize_widening_mul =
3761 {
3762 GIMPLE_PASS, /* type */
3763 "widening_mul", /* name */
3764 OPTGROUP_NONE, /* optinfo_flags */
3765 TV_TREE_WIDEN_MUL, /* tv_id */
3766 PROP_ssa, /* properties_required */
3767 0, /* properties_provided */
3768 0, /* properties_destroyed */
3769 0, /* todo_flags_start */
3770 TODO_update_ssa, /* todo_flags_finish */
3771 };
3772
3773 class pass_optimize_widening_mul : public gimple_opt_pass
3774 {
3775 public:
3776 pass_optimize_widening_mul (gcc::context *ctxt)
3777 : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
3778 {}
3779
3780 /* opt_pass methods: */
3781 virtual bool gate (function *)
3782 {
3783 return flag_expensive_optimizations && optimize;
3784 }
3785
3786 virtual unsigned int execute (function *);
3787
3788 }; // class pass_optimize_widening_mul
3789
3790 /* Walker class to perform the transformation in reverse dominance order. */
3791
3792 class math_opts_dom_walker : public dom_walker
3793 {
3794 public:
3795 /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
3796 if walking modidifes the CFG. */
3797
3798 math_opts_dom_walker (bool *cfg_changed_p)
3799 : dom_walker (CDI_DOMINATORS), m_last_result_set (),
3800 m_cfg_changed_p (cfg_changed_p) {}
3801
3802 /* The actual actions performed in the walk. */
3803
3804 virtual void after_dom_children (basic_block);
3805
3806 /* Set of results of chains of multiply and add statement combinations that
3807 were not transformed into FMAs because of active deferring. */
3808 hash_set<tree> m_last_result_set;
3809
3810 /* Pointer to a flag of the user that needs to be set if CFG has been
3811 modified. */
3812 bool *m_cfg_changed_p;
3813 };
3814
3815 void
3816 math_opts_dom_walker::after_dom_children (basic_block bb)
3817 {
3818 gimple_stmt_iterator gsi;
3819
3820 fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
3821
3822 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3823 {
3824 gimple *stmt = gsi_stmt (gsi);
3825 enum tree_code code;
3826
3827 if (is_gimple_assign (stmt))
3828 {
3829 code = gimple_assign_rhs_code (stmt);
3830 switch (code)
3831 {
3832 case MULT_EXPR:
3833 if (!convert_mult_to_widen (stmt, &gsi)
3834 && !convert_expand_mult_copysign (stmt, &gsi)
3835 && convert_mult_to_fma (stmt,
3836 gimple_assign_rhs1 (stmt),
3837 gimple_assign_rhs2 (stmt),
3838 &fma_state))
3839 {
3840 gsi_remove (&gsi, true);
3841 release_defs (stmt);
3842 continue;
3843 }
3844 break;
3845
3846 case PLUS_EXPR:
3847 case MINUS_EXPR:
3848 if (!convert_plusminus_to_widen (&gsi, stmt, code))
3849 match_uaddsub_overflow (&gsi, stmt, code);
3850 break;
3851
3852 case TRUNC_MOD_EXPR:
3853 convert_to_divmod (as_a<gassign *> (stmt));
3854 break;
3855
3856 default:;
3857 }
3858 }
3859 else if (is_gimple_call (stmt))
3860 {
3861 switch (gimple_call_combined_fn (stmt))
3862 {
3863 CASE_CFN_POW:
3864 if (gimple_call_lhs (stmt)
3865 && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
3866 && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
3867 &dconst2)
3868 && convert_mult_to_fma (stmt,
3869 gimple_call_arg (stmt, 0),
3870 gimple_call_arg (stmt, 0),
3871 &fma_state))
3872 {
3873 unlink_stmt_vdef (stmt);
3874 if (gsi_remove (&gsi, true)
3875 && gimple_purge_dead_eh_edges (bb))
3876 *m_cfg_changed_p = true;
3877 release_defs (stmt);
3878 continue;
3879 }
3880 break;
3881
3882 case CFN_COND_MUL:
3883 if (convert_mult_to_fma (stmt,
3884 gimple_call_arg (stmt, 1),
3885 gimple_call_arg (stmt, 2),
3886 &fma_state,
3887 gimple_call_arg (stmt, 0)))
3888
3889 {
3890 gsi_remove (&gsi, true);
3891 release_defs (stmt);
3892 continue;
3893 }
3894 break;
3895
3896 case CFN_LAST:
3897 cancel_fma_deferring (&fma_state);
3898 break;
3899
3900 default:
3901 break;
3902 }
3903 }
3904 gsi_next (&gsi);
3905 }
3906 if (fma_state.m_deferring_p
3907 && fma_state.m_initial_phi)
3908 {
3909 gcc_checking_assert (fma_state.m_last_result);
3910 if (!last_fma_candidate_feeds_initial_phi (&fma_state,
3911 &m_last_result_set))
3912 cancel_fma_deferring (&fma_state);
3913 else
3914 m_last_result_set.add (fma_state.m_last_result);
3915 }
3916 }
3917
3918
3919 unsigned int
3920 pass_optimize_widening_mul::execute (function *fun)
3921 {
3922 bool cfg_changed = false;
3923
3924 memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
3925 calculate_dominance_info (CDI_DOMINATORS);
3926 renumber_gimple_stmt_uids (cfun);
3927
3928 math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
3929
3930 statistics_counter_event (fun, "widening multiplications inserted",
3931 widen_mul_stats.widen_mults_inserted);
3932 statistics_counter_event (fun, "widening maccs inserted",
3933 widen_mul_stats.maccs_inserted);
3934 statistics_counter_event (fun, "fused multiply-adds inserted",
3935 widen_mul_stats.fmas_inserted);
3936 statistics_counter_event (fun, "divmod calls inserted",
3937 widen_mul_stats.divmod_calls_inserted);
3938
3939 return cfg_changed ? TODO_cleanup_cfg : 0;
3940 }
3941
3942 } // anon namespace
3943
3944 gimple_opt_pass *
3945 make_pass_optimize_widening_mul (gcc::context *ctxt)
3946 {
3947 return new pass_optimize_widening_mul (ctxt);
3948 }