]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-math-opts.c
widening_mul: Pattern recognize also signed multiplication with overflow check [PR95852]
[thirdparty/gcc.git] / gcc / tree-ssa-math-opts.c
1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
22
23 modulus = sqrt(x*x + y*y + z*z);
24 x = x / modulus;
25 y = y / modulus;
26 z = z / modulus;
27
28 that can be optimized to
29
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
32 x = x * rmodulus;
33 y = y * rmodulus;
34 z = z * rmodulus;
35
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
38
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
42
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 by the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
49 this comment.
50
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
56
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
60
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
68
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
75
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
79
80 If we did this using domwalk.c, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
86
87 #include "config.h"
88 #include "system.h"
89 #include "coretypes.h"
90 #include "backend.h"
91 #include "target.h"
92 #include "rtl.h"
93 #include "tree.h"
94 #include "gimple.h"
95 #include "predict.h"
96 #include "alloc-pool.h"
97 #include "tree-pass.h"
98 #include "ssa.h"
99 #include "optabs-tree.h"
100 #include "gimple-pretty-print.h"
101 #include "alias.h"
102 #include "fold-const.h"
103 #include "gimple-fold.h"
104 #include "gimple-iterator.h"
105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
112 #include "internal-fn.h"
113 #include "case-cfn-macros.h"
114 #include "optabs-libfuncs.h"
115 #include "tree-eh.h"
116 #include "targhooks.h"
117 #include "domwalk.h"
118
119 /* This structure represents one basic block that either computes a
120 division, or is a common dominator for basic block that compute a
121 division. */
122 struct occurrence {
123 /* The basic block represented by this structure. */
124 basic_block bb = basic_block();
125
126 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
127 inserted in BB. */
128 tree recip_def = tree();
129
130 /* If non-NULL, the SSA_NAME holding the definition for a squared
131 reciprocal inserted in BB. */
132 tree square_recip_def = tree();
133
134 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
135 was inserted in BB. */
136 gimple *recip_def_stmt = nullptr;
137
138 /* Pointer to a list of "struct occurrence"s for blocks dominated
139 by BB. */
140 struct occurrence *children = nullptr;
141
142 /* Pointer to the next "struct occurrence"s in the list of blocks
143 sharing a common dominator. */
144 struct occurrence *next = nullptr;
145
146 /* The number of divisions that are in BB before compute_merit. The
147 number of divisions that are in BB or post-dominate it after
148 compute_merit. */
149 int num_divisions = 0;
150
151 /* True if the basic block has a division, false if it is a common
152 dominator for basic blocks that do. If it is false and trapping
153 math is active, BB is not a candidate for inserting a reciprocal. */
154 bool bb_has_division = false;
155
156 /* Construct a struct occurrence for basic block BB, and whose
157 children list is headed by CHILDREN. */
158 occurrence (basic_block bb, struct occurrence *children)
159 : bb (bb), children (children)
160 {
161 bb->aux = this;
162 }
163
164 /* Destroy a struct occurrence and remove it from its basic block. */
165 ~occurrence ()
166 {
167 bb->aux = nullptr;
168 }
169
170 /* Allocate memory for a struct occurrence from OCC_POOL. */
171 static void* operator new (size_t);
172
173 /* Return memory for a struct occurrence to OCC_POOL. */
174 static void operator delete (void*, size_t);
175 };
176
177 static struct
178 {
179 /* Number of 1.0/X ops inserted. */
180 int rdivs_inserted;
181
182 /* Number of 1.0/FUNC ops inserted. */
183 int rfuncs_inserted;
184 } reciprocal_stats;
185
186 static struct
187 {
188 /* Number of cexpi calls inserted. */
189 int inserted;
190
191 /* Number of conversions removed. */
192 int conv_removed;
193
194 } sincos_stats;
195
196 static struct
197 {
198 /* Number of widening multiplication ops inserted. */
199 int widen_mults_inserted;
200
201 /* Number of integer multiply-and-accumulate ops inserted. */
202 int maccs_inserted;
203
204 /* Number of fp fused multiply-add ops inserted. */
205 int fmas_inserted;
206
207 /* Number of divmod calls inserted. */
208 int divmod_calls_inserted;
209 } widen_mul_stats;
210
211 /* The instance of "struct occurrence" representing the highest
212 interesting block in the dominator tree. */
213 static struct occurrence *occ_head;
214
215 /* Allocation pool for getting instances of "struct occurrence". */
216 static object_allocator<occurrence> *occ_pool;
217
218 void* occurrence::operator new (size_t n)
219 {
220 gcc_assert (n == sizeof(occurrence));
221 return occ_pool->allocate_raw ();
222 }
223
224 void occurrence::operator delete (void *occ, size_t n)
225 {
226 gcc_assert (n == sizeof(occurrence));
227 occ_pool->remove_raw (occ);
228 }
229
230 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
231 list of "struct occurrence"s, one per basic block, having IDOM as
232 their common dominator.
233
234 We try to insert NEW_OCC as deep as possible in the tree, and we also
235 insert any other block that is a common dominator for BB and one
236 block already in the tree. */
237
238 static void
239 insert_bb (struct occurrence *new_occ, basic_block idom,
240 struct occurrence **p_head)
241 {
242 struct occurrence *occ, **p_occ;
243
244 for (p_occ = p_head; (occ = *p_occ) != NULL; )
245 {
246 basic_block bb = new_occ->bb, occ_bb = occ->bb;
247 basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
248 if (dom == bb)
249 {
250 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
251 from its list. */
252 *p_occ = occ->next;
253 occ->next = new_occ->children;
254 new_occ->children = occ;
255
256 /* Try the next block (it may as well be dominated by BB). */
257 }
258
259 else if (dom == occ_bb)
260 {
261 /* OCC_BB dominates BB. Tail recurse to look deeper. */
262 insert_bb (new_occ, dom, &occ->children);
263 return;
264 }
265
266 else if (dom != idom)
267 {
268 gcc_assert (!dom->aux);
269
270 /* There is a dominator between IDOM and BB, add it and make
271 two children out of NEW_OCC and OCC. First, remove OCC from
272 its list. */
273 *p_occ = occ->next;
274 new_occ->next = occ;
275 occ->next = NULL;
276
277 /* None of the previous blocks has DOM as a dominator: if we tail
278 recursed, we would reexamine them uselessly. Just switch BB with
279 DOM, and go on looking for blocks dominated by DOM. */
280 new_occ = new occurrence (dom, new_occ);
281 }
282
283 else
284 {
285 /* Nothing special, go on with the next element. */
286 p_occ = &occ->next;
287 }
288 }
289
290 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
291 new_occ->next = *p_head;
292 *p_head = new_occ;
293 }
294
295 /* Register that we found a division in BB.
296 IMPORTANCE is a measure of how much weighting to give
297 that division. Use IMPORTANCE = 2 to register a single
298 division. If the division is going to be found multiple
299 times use 1 (as it is with squares). */
300
301 static inline void
302 register_division_in (basic_block bb, int importance)
303 {
304 struct occurrence *occ;
305
306 occ = (struct occurrence *) bb->aux;
307 if (!occ)
308 {
309 occ = new occurrence (bb, NULL);
310 insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
311 }
312
313 occ->bb_has_division = true;
314 occ->num_divisions += importance;
315 }
316
317
318 /* Compute the number of divisions that postdominate each block in OCC and
319 its children. */
320
321 static void
322 compute_merit (struct occurrence *occ)
323 {
324 struct occurrence *occ_child;
325 basic_block dom = occ->bb;
326
327 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
328 {
329 basic_block bb;
330 if (occ_child->children)
331 compute_merit (occ_child);
332
333 if (flag_exceptions)
334 bb = single_noncomplex_succ (dom);
335 else
336 bb = dom;
337
338 if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
339 occ->num_divisions += occ_child->num_divisions;
340 }
341 }
342
343
344 /* Return whether USE_STMT is a floating-point division by DEF. */
345 static inline bool
346 is_division_by (gimple *use_stmt, tree def)
347 {
348 return is_gimple_assign (use_stmt)
349 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
350 && gimple_assign_rhs2 (use_stmt) == def
351 /* Do not recognize x / x as valid division, as we are getting
352 confused later by replacing all immediate uses x in such
353 a stmt. */
354 && gimple_assign_rhs1 (use_stmt) != def
355 && !stmt_can_throw_internal (cfun, use_stmt);
356 }
357
358 /* Return TRUE if USE_STMT is a multiplication of DEF by A. */
359 static inline bool
360 is_mult_by (gimple *use_stmt, tree def, tree a)
361 {
362 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
363 && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
364 {
365 tree op0 = gimple_assign_rhs1 (use_stmt);
366 tree op1 = gimple_assign_rhs2 (use_stmt);
367
368 return (op0 == def && op1 == a)
369 || (op0 == a && op1 == def);
370 }
371 return 0;
372 }
373
374 /* Return whether USE_STMT is DEF * DEF. */
375 static inline bool
376 is_square_of (gimple *use_stmt, tree def)
377 {
378 return is_mult_by (use_stmt, def, def);
379 }
380
381 /* Return whether USE_STMT is a floating-point division by
382 DEF * DEF. */
383 static inline bool
384 is_division_by_square (gimple *use_stmt, tree def)
385 {
386 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
387 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
388 && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
389 && !stmt_can_throw_internal (cfun, use_stmt))
390 {
391 tree denominator = gimple_assign_rhs2 (use_stmt);
392 if (TREE_CODE (denominator) == SSA_NAME)
393 return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
394 }
395 return 0;
396 }
397
398 /* Walk the subset of the dominator tree rooted at OCC, setting the
399 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
400 the given basic block. The field may be left NULL, of course,
401 if it is not possible or profitable to do the optimization.
402
403 DEF_BSI is an iterator pointing at the statement defining DEF.
404 If RECIP_DEF is set, a dominator already has a computation that can
405 be used.
406
407 If should_insert_square_recip is set, then this also inserts
408 the square of the reciprocal immediately after the definition
409 of the reciprocal. */
410
411 static void
412 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
413 tree def, tree recip_def, tree square_recip_def,
414 int should_insert_square_recip, int threshold)
415 {
416 tree type;
417 gassign *new_stmt, *new_square_stmt;
418 gimple_stmt_iterator gsi;
419 struct occurrence *occ_child;
420
421 if (!recip_def
422 && (occ->bb_has_division || !flag_trapping_math)
423 /* Divide by two as all divisions are counted twice in
424 the costing loop. */
425 && occ->num_divisions / 2 >= threshold)
426 {
427 /* Make a variable with the replacement and substitute it. */
428 type = TREE_TYPE (def);
429 recip_def = create_tmp_reg (type, "reciptmp");
430 new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
431 build_one_cst (type), def);
432
433 if (should_insert_square_recip)
434 {
435 square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
436 new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
437 recip_def, recip_def);
438 }
439
440 if (occ->bb_has_division)
441 {
442 /* Case 1: insert before an existing division. */
443 gsi = gsi_after_labels (occ->bb);
444 while (!gsi_end_p (gsi)
445 && (!is_division_by (gsi_stmt (gsi), def))
446 && (!is_division_by_square (gsi_stmt (gsi), def)))
447 gsi_next (&gsi);
448
449 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
450 if (should_insert_square_recip)
451 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
452 }
453 else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
454 {
455 /* Case 2: insert right after the definition. Note that this will
456 never happen if the definition statement can throw, because in
457 that case the sole successor of the statement's basic block will
458 dominate all the uses as well. */
459 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
460 if (should_insert_square_recip)
461 gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
462 }
463 else
464 {
465 /* Case 3: insert in a basic block not containing defs/uses. */
466 gsi = gsi_after_labels (occ->bb);
467 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
468 if (should_insert_square_recip)
469 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
470 }
471
472 reciprocal_stats.rdivs_inserted++;
473
474 occ->recip_def_stmt = new_stmt;
475 }
476
477 occ->recip_def = recip_def;
478 occ->square_recip_def = square_recip_def;
479 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
480 insert_reciprocals (def_gsi, occ_child, def, recip_def,
481 square_recip_def, should_insert_square_recip,
482 threshold);
483 }
484
485 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
486 Take as argument the use for (x * x). */
487 static inline void
488 replace_reciprocal_squares (use_operand_p use_p)
489 {
490 gimple *use_stmt = USE_STMT (use_p);
491 basic_block bb = gimple_bb (use_stmt);
492 struct occurrence *occ = (struct occurrence *) bb->aux;
493
494 if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
495 && occ->recip_def)
496 {
497 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
498 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
499 gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
500 SET_USE (use_p, occ->square_recip_def);
501 fold_stmt_inplace (&gsi);
502 update_stmt (use_stmt);
503 }
504 }
505
506
507 /* Replace the division at USE_P with a multiplication by the reciprocal, if
508 possible. */
509
510 static inline void
511 replace_reciprocal (use_operand_p use_p)
512 {
513 gimple *use_stmt = USE_STMT (use_p);
514 basic_block bb = gimple_bb (use_stmt);
515 struct occurrence *occ = (struct occurrence *) bb->aux;
516
517 if (optimize_bb_for_speed_p (bb)
518 && occ->recip_def && use_stmt != occ->recip_def_stmt)
519 {
520 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
521 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
522 SET_USE (use_p, occ->recip_def);
523 fold_stmt_inplace (&gsi);
524 update_stmt (use_stmt);
525 }
526 }
527
528
529 /* Free OCC and return one more "struct occurrence" to be freed. */
530
531 static struct occurrence *
532 free_bb (struct occurrence *occ)
533 {
534 struct occurrence *child, *next;
535
536 /* First get the two pointers hanging off OCC. */
537 next = occ->next;
538 child = occ->children;
539 delete occ;
540
541 /* Now ensure that we don't recurse unless it is necessary. */
542 if (!child)
543 return next;
544 else
545 {
546 while (next)
547 next = free_bb (next);
548
549 return child;
550 }
551 }
552
553 /* Transform sequences like
554 t = sqrt (a)
555 x = 1.0 / t;
556 r1 = x * x;
557 r2 = a * x;
558 into:
559 t = sqrt (a)
560 r1 = 1.0 / a;
561 r2 = t;
562 x = r1 * r2;
563 depending on the uses of x, r1, r2. This removes one multiplication and
564 allows the sqrt and division operations to execute in parallel.
565 DEF_GSI is the gsi of the initial division by sqrt that defines
566 DEF (x in the example above). */
567
568 static void
569 optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
570 {
571 gimple *use_stmt;
572 imm_use_iterator use_iter;
573 gimple *stmt = gsi_stmt (*def_gsi);
574 tree x = def;
575 tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
576 tree div_rhs1 = gimple_assign_rhs1 (stmt);
577
578 if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
579 || TREE_CODE (div_rhs1) != REAL_CST
580 || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
581 return;
582
583 gcall *sqrt_stmt
584 = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
585
586 if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
587 return;
588
589 switch (gimple_call_combined_fn (sqrt_stmt))
590 {
591 CASE_CFN_SQRT:
592 CASE_CFN_SQRT_FN:
593 break;
594
595 default:
596 return;
597 }
598 tree a = gimple_call_arg (sqrt_stmt, 0);
599
600 /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
601
602 /* Statements that use x in x * x. */
603 auto_vec<gimple *> sqr_stmts;
604 /* Statements that use x in a * x. */
605 auto_vec<gimple *> mult_stmts;
606 bool has_other_use = false;
607 bool mult_on_main_path = false;
608
609 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
610 {
611 if (is_gimple_debug (use_stmt))
612 continue;
613 if (is_square_of (use_stmt, x))
614 {
615 sqr_stmts.safe_push (use_stmt);
616 if (gimple_bb (use_stmt) == gimple_bb (stmt))
617 mult_on_main_path = true;
618 }
619 else if (is_mult_by (use_stmt, x, a))
620 {
621 mult_stmts.safe_push (use_stmt);
622 if (gimple_bb (use_stmt) == gimple_bb (stmt))
623 mult_on_main_path = true;
624 }
625 else
626 has_other_use = true;
627 }
628
629 /* In the x * x and a * x cases we just rewire stmt operands or
630 remove multiplications. In the has_other_use case we introduce
631 a multiplication so make sure we don't introduce a multiplication
632 on a path where there was none. */
633 if (has_other_use && !mult_on_main_path)
634 return;
635
636 if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
637 return;
638
639 /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
640 to be able to compose it from the sqr and mult cases. */
641 if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
642 return;
643
644 if (dump_file)
645 {
646 fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
647 print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
648 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
649 fprintf (dump_file, "\n");
650 }
651
652 bool delete_div = !has_other_use;
653 tree sqr_ssa_name = NULL_TREE;
654 if (!sqr_stmts.is_empty ())
655 {
656 /* r1 = x * x. Transform the original
657 x = 1.0 / t
658 into
659 tmp1 = 1.0 / a
660 r1 = tmp1. */
661
662 sqr_ssa_name
663 = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
664
665 if (dump_file)
666 {
667 fprintf (dump_file, "Replacing original division\n");
668 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
669 fprintf (dump_file, "with new division\n");
670 }
671 stmt
672 = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
673 gimple_assign_rhs1 (stmt), a);
674 gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
675 gsi_remove (def_gsi, true);
676 *def_gsi = gsi_for_stmt (stmt);
677 fold_stmt_inplace (def_gsi);
678 update_stmt (stmt);
679
680 if (dump_file)
681 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
682
683 delete_div = false;
684 gimple *sqr_stmt;
685 unsigned int i;
686 FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
687 {
688 gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
689 gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
690 update_stmt (sqr_stmt);
691 }
692 }
693 if (!mult_stmts.is_empty ())
694 {
695 /* r2 = a * x. Transform this into:
696 r2 = t (The original sqrt (a)). */
697 unsigned int i;
698 gimple *mult_stmt = NULL;
699 FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
700 {
701 gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
702
703 if (dump_file)
704 {
705 fprintf (dump_file, "Replacing squaring multiplication\n");
706 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
707 fprintf (dump_file, "with assignment\n");
708 }
709 gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
710 fold_stmt_inplace (&gsi2);
711 update_stmt (mult_stmt);
712 if (dump_file)
713 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
714 }
715 }
716
717 if (has_other_use)
718 {
719 /* Using the two temporaries tmp1, tmp2 from above
720 the original x is now:
721 x = tmp1 * tmp2. */
722 gcc_assert (orig_sqrt_ssa_name);
723 gcc_assert (sqr_ssa_name);
724
725 gimple *new_stmt
726 = gimple_build_assign (x, MULT_EXPR,
727 orig_sqrt_ssa_name, sqr_ssa_name);
728 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
729 update_stmt (stmt);
730 }
731 else if (delete_div)
732 {
733 /* Remove the original division. */
734 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
735 gsi_remove (&gsi2, true);
736 release_defs (stmt);
737 }
738 else
739 release_ssa_name (x);
740 }
741
742 /* Look for floating-point divisions among DEF's uses, and try to
743 replace them by multiplications with the reciprocal. Add
744 as many statements computing the reciprocal as needed.
745
746 DEF must be a GIMPLE register of a floating-point type. */
747
748 static void
749 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
750 {
751 use_operand_p use_p, square_use_p;
752 imm_use_iterator use_iter, square_use_iter;
753 tree square_def;
754 struct occurrence *occ;
755 int count = 0;
756 int threshold;
757 int square_recip_count = 0;
758 int sqrt_recip_count = 0;
759
760 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
761 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
762
763 /* If DEF is a square (x * x), count the number of divisions by x.
764 If there are more divisions by x than by (DEF * DEF), prefer to optimize
765 the reciprocal of x instead of DEF. This improves cases like:
766 def = x * x
767 t0 = a / def
768 t1 = b / def
769 t2 = c / x
770 Reciprocal optimization of x results in 1 division rather than 2 or 3. */
771 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
772
773 if (is_gimple_assign (def_stmt)
774 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
775 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
776 && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
777 {
778 tree op0 = gimple_assign_rhs1 (def_stmt);
779
780 FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
781 {
782 gimple *use_stmt = USE_STMT (use_p);
783 if (is_division_by (use_stmt, op0))
784 sqrt_recip_count++;
785 }
786 }
787
788 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
789 {
790 gimple *use_stmt = USE_STMT (use_p);
791 if (is_division_by (use_stmt, def))
792 {
793 register_division_in (gimple_bb (use_stmt), 2);
794 count++;
795 }
796
797 if (is_square_of (use_stmt, def))
798 {
799 square_def = gimple_assign_lhs (use_stmt);
800 FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
801 {
802 gimple *square_use_stmt = USE_STMT (square_use_p);
803 if (is_division_by (square_use_stmt, square_def))
804 {
805 /* This is executed twice for each division by a square. */
806 register_division_in (gimple_bb (square_use_stmt), 1);
807 square_recip_count++;
808 }
809 }
810 }
811 }
812
813 /* Square reciprocals were counted twice above. */
814 square_recip_count /= 2;
815
816 /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
817 if (sqrt_recip_count > square_recip_count)
818 goto out;
819
820 /* Do the expensive part only if we can hope to optimize something. */
821 if (count + square_recip_count >= threshold && count >= 1)
822 {
823 gimple *use_stmt;
824 for (occ = occ_head; occ; occ = occ->next)
825 {
826 compute_merit (occ);
827 insert_reciprocals (def_gsi, occ, def, NULL, NULL,
828 square_recip_count, threshold);
829 }
830
831 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
832 {
833 if (is_division_by (use_stmt, def))
834 {
835 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
836 replace_reciprocal (use_p);
837 }
838 else if (square_recip_count > 0 && is_square_of (use_stmt, def))
839 {
840 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
841 {
842 /* Find all uses of the square that are divisions and
843 * replace them by multiplications with the inverse. */
844 imm_use_iterator square_iterator;
845 gimple *powmult_use_stmt = USE_STMT (use_p);
846 tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
847
848 FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
849 square_iterator, powmult_def_name)
850 FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
851 {
852 gimple *powmult_use_stmt = USE_STMT (square_use_p);
853 if (is_division_by (powmult_use_stmt, powmult_def_name))
854 replace_reciprocal_squares (square_use_p);
855 }
856 }
857 }
858 }
859 }
860
861 out:
862 for (occ = occ_head; occ; )
863 occ = free_bb (occ);
864
865 occ_head = NULL;
866 }
867
868 /* Return an internal function that implements the reciprocal of CALL,
869 or IFN_LAST if there is no such function that the target supports. */
870
871 internal_fn
872 internal_fn_reciprocal (gcall *call)
873 {
874 internal_fn ifn;
875
876 switch (gimple_call_combined_fn (call))
877 {
878 CASE_CFN_SQRT:
879 CASE_CFN_SQRT_FN:
880 ifn = IFN_RSQRT;
881 break;
882
883 default:
884 return IFN_LAST;
885 }
886
887 tree_pair types = direct_internal_fn_types (ifn, call);
888 if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
889 return IFN_LAST;
890
891 return ifn;
892 }
893
894 /* Go through all the floating-point SSA_NAMEs, and call
895 execute_cse_reciprocals_1 on each of them. */
896 namespace {
897
898 const pass_data pass_data_cse_reciprocals =
899 {
900 GIMPLE_PASS, /* type */
901 "recip", /* name */
902 OPTGROUP_NONE, /* optinfo_flags */
903 TV_TREE_RECIP, /* tv_id */
904 PROP_ssa, /* properties_required */
905 0, /* properties_provided */
906 0, /* properties_destroyed */
907 0, /* todo_flags_start */
908 TODO_update_ssa, /* todo_flags_finish */
909 };
910
911 class pass_cse_reciprocals : public gimple_opt_pass
912 {
913 public:
914 pass_cse_reciprocals (gcc::context *ctxt)
915 : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
916 {}
917
918 /* opt_pass methods: */
919 virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
920 virtual unsigned int execute (function *);
921
922 }; // class pass_cse_reciprocals
923
924 unsigned int
925 pass_cse_reciprocals::execute (function *fun)
926 {
927 basic_block bb;
928 tree arg;
929
930 occ_pool = new object_allocator<occurrence> ("dominators for recip");
931
932 memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
933 calculate_dominance_info (CDI_DOMINATORS);
934 calculate_dominance_info (CDI_POST_DOMINATORS);
935
936 if (flag_checking)
937 FOR_EACH_BB_FN (bb, fun)
938 gcc_assert (!bb->aux);
939
940 for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
941 if (FLOAT_TYPE_P (TREE_TYPE (arg))
942 && is_gimple_reg (arg))
943 {
944 tree name = ssa_default_def (fun, arg);
945 if (name)
946 execute_cse_reciprocals_1 (NULL, name);
947 }
948
949 FOR_EACH_BB_FN (bb, fun)
950 {
951 tree def;
952
953 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
954 gsi_next (&gsi))
955 {
956 gphi *phi = gsi.phi ();
957 def = PHI_RESULT (phi);
958 if (! virtual_operand_p (def)
959 && FLOAT_TYPE_P (TREE_TYPE (def)))
960 execute_cse_reciprocals_1 (NULL, def);
961 }
962
963 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
964 gsi_next (&gsi))
965 {
966 gimple *stmt = gsi_stmt (gsi);
967
968 if (gimple_has_lhs (stmt)
969 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
970 && FLOAT_TYPE_P (TREE_TYPE (def))
971 && TREE_CODE (def) == SSA_NAME)
972 {
973 execute_cse_reciprocals_1 (&gsi, def);
974 stmt = gsi_stmt (gsi);
975 if (flag_unsafe_math_optimizations
976 && is_gimple_assign (stmt)
977 && gimple_assign_lhs (stmt) == def
978 && !stmt_can_throw_internal (cfun, stmt)
979 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
980 optimize_recip_sqrt (&gsi, def);
981 }
982 }
983
984 if (optimize_bb_for_size_p (bb))
985 continue;
986
987 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
988 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
989 gsi_next (&gsi))
990 {
991 gimple *stmt = gsi_stmt (gsi);
992
993 if (is_gimple_assign (stmt)
994 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
995 {
996 tree arg1 = gimple_assign_rhs2 (stmt);
997 gimple *stmt1;
998
999 if (TREE_CODE (arg1) != SSA_NAME)
1000 continue;
1001
1002 stmt1 = SSA_NAME_DEF_STMT (arg1);
1003
1004 if (is_gimple_call (stmt1)
1005 && gimple_call_lhs (stmt1))
1006 {
1007 bool fail;
1008 imm_use_iterator ui;
1009 use_operand_p use_p;
1010 tree fndecl = NULL_TREE;
1011
1012 gcall *call = as_a <gcall *> (stmt1);
1013 internal_fn ifn = internal_fn_reciprocal (call);
1014 if (ifn == IFN_LAST)
1015 {
1016 fndecl = gimple_call_fndecl (call);
1017 if (!fndecl
1018 || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1019 continue;
1020 fndecl = targetm.builtin_reciprocal (fndecl);
1021 if (!fndecl)
1022 continue;
1023 }
1024
1025 /* Check that all uses of the SSA name are divisions,
1026 otherwise replacing the defining statement will do
1027 the wrong thing. */
1028 fail = false;
1029 FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1030 {
1031 gimple *stmt2 = USE_STMT (use_p);
1032 if (is_gimple_debug (stmt2))
1033 continue;
1034 if (!is_gimple_assign (stmt2)
1035 || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1036 || gimple_assign_rhs1 (stmt2) == arg1
1037 || gimple_assign_rhs2 (stmt2) != arg1)
1038 {
1039 fail = true;
1040 break;
1041 }
1042 }
1043 if (fail)
1044 continue;
1045
1046 gimple_replace_ssa_lhs (call, arg1);
1047 if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1048 {
1049 auto_vec<tree, 4> args;
1050 for (unsigned int i = 0;
1051 i < gimple_call_num_args (call); i++)
1052 args.safe_push (gimple_call_arg (call, i));
1053 gcall *stmt2;
1054 if (ifn == IFN_LAST)
1055 stmt2 = gimple_build_call_vec (fndecl, args);
1056 else
1057 stmt2 = gimple_build_call_internal_vec (ifn, args);
1058 gimple_call_set_lhs (stmt2, arg1);
1059 gimple_move_vops (stmt2, call);
1060 gimple_call_set_nothrow (stmt2,
1061 gimple_call_nothrow_p (call));
1062 gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1063 gsi_replace (&gsi2, stmt2, true);
1064 }
1065 else
1066 {
1067 if (ifn == IFN_LAST)
1068 gimple_call_set_fndecl (call, fndecl);
1069 else
1070 gimple_call_set_internal_fn (call, ifn);
1071 update_stmt (call);
1072 }
1073 reciprocal_stats.rfuncs_inserted++;
1074
1075 FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1076 {
1077 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1078 gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1079 fold_stmt_inplace (&gsi);
1080 update_stmt (stmt);
1081 }
1082 }
1083 }
1084 }
1085 }
1086
1087 statistics_counter_event (fun, "reciprocal divs inserted",
1088 reciprocal_stats.rdivs_inserted);
1089 statistics_counter_event (fun, "reciprocal functions inserted",
1090 reciprocal_stats.rfuncs_inserted);
1091
1092 free_dominance_info (CDI_DOMINATORS);
1093 free_dominance_info (CDI_POST_DOMINATORS);
1094 delete occ_pool;
1095 return 0;
1096 }
1097
1098 } // anon namespace
1099
1100 gimple_opt_pass *
1101 make_pass_cse_reciprocals (gcc::context *ctxt)
1102 {
1103 return new pass_cse_reciprocals (ctxt);
1104 }
1105
1106 /* If NAME is the result of a type conversion, look for other
1107 equivalent dominating or dominated conversions, and replace all
1108 uses with the earliest dominating name, removing the redundant
1109 conversions. Return the prevailing name. */
1110
1111 static tree
1112 execute_cse_conv_1 (tree name)
1113 {
1114 if (SSA_NAME_IS_DEFAULT_DEF (name)
1115 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1116 return name;
1117
1118 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1119
1120 if (!gimple_assign_cast_p (def_stmt))
1121 return name;
1122
1123 tree src = gimple_assign_rhs1 (def_stmt);
1124
1125 if (TREE_CODE (src) != SSA_NAME)
1126 return name;
1127
1128 imm_use_iterator use_iter;
1129 gimple *use_stmt;
1130
1131 /* Find the earliest dominating def. */
1132 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1133 {
1134 if (use_stmt == def_stmt
1135 || !gimple_assign_cast_p (use_stmt))
1136 continue;
1137
1138 tree lhs = gimple_assign_lhs (use_stmt);
1139
1140 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1141 || (gimple_assign_rhs1 (use_stmt)
1142 != gimple_assign_rhs1 (def_stmt))
1143 || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1144 continue;
1145
1146 bool use_dominates;
1147 if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1148 {
1149 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1150 while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1151 gsi_next (&gsi);
1152 use_dominates = !gsi_end_p (gsi);
1153 }
1154 else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1155 gimple_bb (def_stmt)))
1156 use_dominates = false;
1157 else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1158 gimple_bb (use_stmt)))
1159 use_dominates = true;
1160 else
1161 continue;
1162
1163 if (use_dominates)
1164 {
1165 std::swap (name, lhs);
1166 std::swap (def_stmt, use_stmt);
1167 }
1168 }
1169
1170 /* Now go through all uses of SRC again, replacing the equivalent
1171 dominated conversions. We may replace defs that were not
1172 dominated by the then-prevailing defs when we first visited
1173 them. */
1174 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1175 {
1176 if (use_stmt == def_stmt
1177 || !gimple_assign_cast_p (use_stmt))
1178 continue;
1179
1180 tree lhs = gimple_assign_lhs (use_stmt);
1181
1182 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1183 || (gimple_assign_rhs1 (use_stmt)
1184 != gimple_assign_rhs1 (def_stmt))
1185 || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1186 continue;
1187
1188 if (gimple_bb (def_stmt) == gimple_bb (use_stmt)
1189 || dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1190 gimple_bb (def_stmt)))
1191 {
1192 sincos_stats.conv_removed++;
1193
1194 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1195 replace_uses_by (lhs, name);
1196 gsi_remove (&gsi, true);
1197 }
1198 }
1199
1200 return name;
1201 }
1202
1203 /* Records an occurrence at statement USE_STMT in the vector of trees
1204 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1205 is not yet initialized. Returns true if the occurrence was pushed on
1206 the vector. Adjusts *TOP_BB to be the basic block dominating all
1207 statements in the vector. */
1208
1209 static bool
1210 maybe_record_sincos (vec<gimple *> *stmts,
1211 basic_block *top_bb, gimple *use_stmt)
1212 {
1213 basic_block use_bb = gimple_bb (use_stmt);
1214 if (*top_bb
1215 && (*top_bb == use_bb
1216 || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1217 stmts->safe_push (use_stmt);
1218 else if (!*top_bb
1219 || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1220 {
1221 stmts->safe_push (use_stmt);
1222 *top_bb = use_bb;
1223 }
1224 else
1225 return false;
1226
1227 return true;
1228 }
1229
1230 /* Look for sin, cos and cexpi calls with the same argument NAME and
1231 create a single call to cexpi CSEing the result in this case.
1232 We first walk over all immediate uses of the argument collecting
1233 statements that we can CSE in a vector and in a second pass replace
1234 the statement rhs with a REALPART or IMAGPART expression on the
1235 result of the cexpi call we insert before the use statement that
1236 dominates all other candidates. */
1237
1238 static bool
1239 execute_cse_sincos_1 (tree name)
1240 {
1241 gimple_stmt_iterator gsi;
1242 imm_use_iterator use_iter;
1243 tree fndecl, res, type = NULL_TREE;
1244 gimple *def_stmt, *use_stmt, *stmt;
1245 int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1246 auto_vec<gimple *> stmts;
1247 basic_block top_bb = NULL;
1248 int i;
1249 bool cfg_changed = false;
1250
1251 name = execute_cse_conv_1 (name);
1252
1253 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1254 {
1255 if (gimple_code (use_stmt) != GIMPLE_CALL
1256 || !gimple_call_lhs (use_stmt))
1257 continue;
1258
1259 switch (gimple_call_combined_fn (use_stmt))
1260 {
1261 CASE_CFN_COS:
1262 seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1263 break;
1264
1265 CASE_CFN_SIN:
1266 seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1267 break;
1268
1269 CASE_CFN_CEXPI:
1270 seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1271 break;
1272
1273 default:;
1274 continue;
1275 }
1276
1277 tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt));
1278 if (!type)
1279 {
1280 type = t;
1281 t = TREE_TYPE (name);
1282 }
1283 /* This checks that NAME has the right type in the first round,
1284 and, in subsequent rounds, that the built_in type is the same
1285 type, or a compatible type. */
1286 if (type != t && !types_compatible_p (type, t))
1287 RETURN_FROM_IMM_USE_STMT (use_iter, false);
1288 }
1289 if (seen_cos + seen_sin + seen_cexpi <= 1)
1290 return false;
1291
1292 /* Simply insert cexpi at the beginning of top_bb but not earlier than
1293 the name def statement. */
1294 fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1295 if (!fndecl)
1296 return false;
1297 stmt = gimple_build_call (fndecl, 1, name);
1298 res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1299 gimple_call_set_lhs (stmt, res);
1300
1301 def_stmt = SSA_NAME_DEF_STMT (name);
1302 if (!SSA_NAME_IS_DEFAULT_DEF (name)
1303 && gimple_code (def_stmt) != GIMPLE_PHI
1304 && gimple_bb (def_stmt) == top_bb)
1305 {
1306 gsi = gsi_for_stmt (def_stmt);
1307 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1308 }
1309 else
1310 {
1311 gsi = gsi_after_labels (top_bb);
1312 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1313 }
1314 sincos_stats.inserted++;
1315
1316 /* And adjust the recorded old call sites. */
1317 for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1318 {
1319 tree rhs = NULL;
1320
1321 switch (gimple_call_combined_fn (use_stmt))
1322 {
1323 CASE_CFN_COS:
1324 rhs = fold_build1 (REALPART_EXPR, type, res);
1325 break;
1326
1327 CASE_CFN_SIN:
1328 rhs = fold_build1 (IMAGPART_EXPR, type, res);
1329 break;
1330
1331 CASE_CFN_CEXPI:
1332 rhs = res;
1333 break;
1334
1335 default:;
1336 gcc_unreachable ();
1337 }
1338
1339 /* Replace call with a copy. */
1340 stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1341
1342 gsi = gsi_for_stmt (use_stmt);
1343 gsi_replace (&gsi, stmt, true);
1344 if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1345 cfg_changed = true;
1346 }
1347
1348 return cfg_changed;
1349 }
1350
1351 /* To evaluate powi(x,n), the floating point value x raised to the
1352 constant integer exponent n, we use a hybrid algorithm that
1353 combines the "window method" with look-up tables. For an
1354 introduction to exponentiation algorithms and "addition chains",
1355 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1356 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1357 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1358 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1359
1360 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1361 multiplications to inline before calling the system library's pow
1362 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1363 so this default never requires calling pow, powf or powl. */
1364
1365 #ifndef POWI_MAX_MULTS
1366 #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1367 #endif
1368
1369 /* The size of the "optimal power tree" lookup table. All
1370 exponents less than this value are simply looked up in the
1371 powi_table below. This threshold is also used to size the
1372 cache of pseudo registers that hold intermediate results. */
1373 #define POWI_TABLE_SIZE 256
1374
1375 /* The size, in bits of the window, used in the "window method"
1376 exponentiation algorithm. This is equivalent to a radix of
1377 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1378 #define POWI_WINDOW_SIZE 3
1379
1380 /* The following table is an efficient representation of an
1381 "optimal power tree". For each value, i, the corresponding
1382 value, j, in the table states than an optimal evaluation
1383 sequence for calculating pow(x,i) can be found by evaluating
1384 pow(x,j)*pow(x,i-j). An optimal power tree for the first
1385 100 integers is given in Knuth's "Seminumerical algorithms". */
1386
1387 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1388 {
1389 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1390 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1391 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1392 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1393 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1394 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1395 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1396 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1397 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1398 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1399 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1400 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1401 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1402 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1403 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1404 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1405 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1406 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1407 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1408 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1409 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1410 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1411 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1412 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1413 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1414 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1415 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1416 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1417 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1418 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1419 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1420 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1421 };
1422
1423
1424 /* Return the number of multiplications required to calculate
1425 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1426 subroutine of powi_cost. CACHE is an array indicating
1427 which exponents have already been calculated. */
1428
1429 static int
1430 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1431 {
1432 /* If we've already calculated this exponent, then this evaluation
1433 doesn't require any additional multiplications. */
1434 if (cache[n])
1435 return 0;
1436
1437 cache[n] = true;
1438 return powi_lookup_cost (n - powi_table[n], cache)
1439 + powi_lookup_cost (powi_table[n], cache) + 1;
1440 }
1441
1442 /* Return the number of multiplications required to calculate
1443 powi(x,n) for an arbitrary x, given the exponent N. This
1444 function needs to be kept in sync with powi_as_mults below. */
1445
1446 static int
1447 powi_cost (HOST_WIDE_INT n)
1448 {
1449 bool cache[POWI_TABLE_SIZE];
1450 unsigned HOST_WIDE_INT digit;
1451 unsigned HOST_WIDE_INT val;
1452 int result;
1453
1454 if (n == 0)
1455 return 0;
1456
1457 /* Ignore the reciprocal when calculating the cost. */
1458 val = (n < 0) ? -n : n;
1459
1460 /* Initialize the exponent cache. */
1461 memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1462 cache[1] = true;
1463
1464 result = 0;
1465
1466 while (val >= POWI_TABLE_SIZE)
1467 {
1468 if (val & 1)
1469 {
1470 digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1471 result += powi_lookup_cost (digit, cache)
1472 + POWI_WINDOW_SIZE + 1;
1473 val >>= POWI_WINDOW_SIZE;
1474 }
1475 else
1476 {
1477 val >>= 1;
1478 result++;
1479 }
1480 }
1481
1482 return result + powi_lookup_cost (val, cache);
1483 }
1484
1485 /* Recursive subroutine of powi_as_mults. This function takes the
1486 array, CACHE, of already calculated exponents and an exponent N and
1487 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1488
1489 static tree
1490 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1491 HOST_WIDE_INT n, tree *cache)
1492 {
1493 tree op0, op1, ssa_target;
1494 unsigned HOST_WIDE_INT digit;
1495 gassign *mult_stmt;
1496
1497 if (n < POWI_TABLE_SIZE && cache[n])
1498 return cache[n];
1499
1500 ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1501
1502 if (n < POWI_TABLE_SIZE)
1503 {
1504 cache[n] = ssa_target;
1505 op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1506 op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1507 }
1508 else if (n & 1)
1509 {
1510 digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1511 op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1512 op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1513 }
1514 else
1515 {
1516 op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1517 op1 = op0;
1518 }
1519
1520 mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1521 gimple_set_location (mult_stmt, loc);
1522 gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1523
1524 return ssa_target;
1525 }
1526
1527 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1528 This function needs to be kept in sync with powi_cost above. */
1529
1530 static tree
1531 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1532 tree arg0, HOST_WIDE_INT n)
1533 {
1534 tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1535 gassign *div_stmt;
1536 tree target;
1537
1538 if (n == 0)
1539 return build_real (type, dconst1);
1540
1541 memset (cache, 0, sizeof (cache));
1542 cache[1] = arg0;
1543
1544 result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1545 if (n >= 0)
1546 return result;
1547
1548 /* If the original exponent was negative, reciprocate the result. */
1549 target = make_temp_ssa_name (type, NULL, "powmult");
1550 div_stmt = gimple_build_assign (target, RDIV_EXPR,
1551 build_real (type, dconst1), result);
1552 gimple_set_location (div_stmt, loc);
1553 gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1554
1555 return target;
1556 }
1557
1558 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1559 location info LOC. If the arguments are appropriate, create an
1560 equivalent sequence of statements prior to GSI using an optimal
1561 number of multiplications, and return an expession holding the
1562 result. */
1563
1564 static tree
1565 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1566 tree arg0, HOST_WIDE_INT n)
1567 {
1568 /* Avoid largest negative number. */
1569 if (n != -n
1570 && ((n >= -1 && n <= 2)
1571 || (optimize_function_for_speed_p (cfun)
1572 && powi_cost (n) <= POWI_MAX_MULTS)))
1573 return powi_as_mults (gsi, loc, arg0, n);
1574
1575 return NULL_TREE;
1576 }
1577
1578 /* Build a gimple call statement that calls FN with argument ARG.
1579 Set the lhs of the call statement to a fresh SSA name. Insert the
1580 statement prior to GSI's current position, and return the fresh
1581 SSA name. */
1582
1583 static tree
1584 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1585 tree fn, tree arg)
1586 {
1587 gcall *call_stmt;
1588 tree ssa_target;
1589
1590 call_stmt = gimple_build_call (fn, 1, arg);
1591 ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1592 gimple_set_lhs (call_stmt, ssa_target);
1593 gimple_set_location (call_stmt, loc);
1594 gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1595
1596 return ssa_target;
1597 }
1598
1599 /* Build a gimple binary operation with the given CODE and arguments
1600 ARG0, ARG1, assigning the result to a new SSA name for variable
1601 TARGET. Insert the statement prior to GSI's current position, and
1602 return the fresh SSA name.*/
1603
1604 static tree
1605 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1606 const char *name, enum tree_code code,
1607 tree arg0, tree arg1)
1608 {
1609 tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1610 gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1611 gimple_set_location (stmt, loc);
1612 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1613 return result;
1614 }
1615
1616 /* Build a gimple reference operation with the given CODE and argument
1617 ARG, assigning the result to a new SSA name of TYPE with NAME.
1618 Insert the statement prior to GSI's current position, and return
1619 the fresh SSA name. */
1620
1621 static inline tree
1622 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1623 const char *name, enum tree_code code, tree arg0)
1624 {
1625 tree result = make_temp_ssa_name (type, NULL, name);
1626 gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1627 gimple_set_location (stmt, loc);
1628 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1629 return result;
1630 }
1631
1632 /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1633 prior to GSI's current position, and return the fresh SSA name. */
1634
1635 static tree
1636 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1637 tree type, tree val)
1638 {
1639 tree result = make_ssa_name (type);
1640 gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1641 gimple_set_location (stmt, loc);
1642 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1643 return result;
1644 }
1645
1646 struct pow_synth_sqrt_info
1647 {
1648 bool *factors;
1649 unsigned int deepest;
1650 unsigned int num_mults;
1651 };
1652
1653 /* Return true iff the real value C can be represented as a
1654 sum of powers of 0.5 up to N. That is:
1655 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1656 Record in INFO the various parameters of the synthesis algorithm such
1657 as the factors a[i], the maximum 0.5 power and the number of
1658 multiplications that will be required. */
1659
1660 bool
1661 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1662 struct pow_synth_sqrt_info *info)
1663 {
1664 REAL_VALUE_TYPE factor = dconsthalf;
1665 REAL_VALUE_TYPE remainder = c;
1666
1667 info->deepest = 0;
1668 info->num_mults = 0;
1669 memset (info->factors, 0, n * sizeof (bool));
1670
1671 for (unsigned i = 0; i < n; i++)
1672 {
1673 REAL_VALUE_TYPE res;
1674
1675 /* If something inexact happened bail out now. */
1676 if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1677 return false;
1678
1679 /* We have hit zero. The number is representable as a sum
1680 of powers of 0.5. */
1681 if (real_equal (&res, &dconst0))
1682 {
1683 info->factors[i] = true;
1684 info->deepest = i + 1;
1685 return true;
1686 }
1687 else if (!REAL_VALUE_NEGATIVE (res))
1688 {
1689 remainder = res;
1690 info->factors[i] = true;
1691 info->num_mults++;
1692 }
1693 else
1694 info->factors[i] = false;
1695
1696 real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1697 }
1698 return false;
1699 }
1700
1701 /* Return the tree corresponding to FN being applied
1702 to ARG N times at GSI and LOC.
1703 Look up previous results from CACHE if need be.
1704 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1705
1706 static tree
1707 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1708 tree fn, location_t loc, tree *cache)
1709 {
1710 tree res = cache[n];
1711 if (!res)
1712 {
1713 tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1714 res = build_and_insert_call (gsi, loc, fn, prev);
1715 cache[n] = res;
1716 }
1717
1718 return res;
1719 }
1720
1721 /* Print to STREAM the repeated application of function FNAME to ARG
1722 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1723 "foo (foo (x))". */
1724
1725 static void
1726 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1727 unsigned int n)
1728 {
1729 if (n == 0)
1730 fprintf (stream, "%s", arg);
1731 else
1732 {
1733 fprintf (stream, "%s (", fname);
1734 print_nested_fn (stream, fname, arg, n - 1);
1735 fprintf (stream, ")");
1736 }
1737 }
1738
1739 /* Print to STREAM the fractional sequence of sqrt chains
1740 applied to ARG, described by INFO. Used for the dump file. */
1741
1742 static void
1743 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1744 struct pow_synth_sqrt_info *info)
1745 {
1746 for (unsigned int i = 0; i < info->deepest; i++)
1747 {
1748 bool is_set = info->factors[i];
1749 if (is_set)
1750 {
1751 print_nested_fn (stream, "sqrt", arg, i + 1);
1752 if (i != info->deepest - 1)
1753 fprintf (stream, " * ");
1754 }
1755 }
1756 }
1757
1758 /* Print to STREAM a representation of raising ARG to an integer
1759 power N. Used for the dump file. */
1760
1761 static void
1762 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1763 {
1764 if (n > 1)
1765 fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1766 else if (n == 1)
1767 fprintf (stream, "%s", arg);
1768 }
1769
1770 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1771 square roots. Place at GSI and LOC. Limit the maximum depth
1772 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1773 result of the expanded sequence or NULL_TREE if the expansion failed.
1774
1775 This routine assumes that ARG1 is a real number with a fractional part
1776 (the integer exponent case will have been handled earlier in
1777 gimple_expand_builtin_pow).
1778
1779 For ARG1 > 0.0:
1780 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1781 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1782 FRAC_PART == ARG1 - WHOLE_PART:
1783 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1784 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1785 if it can be expressed as such, that is if FRAC_PART satisfies:
1786 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1787 where integer a[i] is either 0 or 1.
1788
1789 Example:
1790 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1791 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1792
1793 For ARG1 < 0.0 there are two approaches:
1794 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1795 is calculated as above.
1796
1797 Example:
1798 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1799 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1800
1801 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1802 FRAC_PART := ARG1 - WHOLE_PART
1803 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1804 Example:
1805 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1806 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1807
1808 For ARG1 < 0.0 we choose between (A) and (B) depending on
1809 how many multiplications we'd have to do.
1810 So, for the example in (B): POW (x, -5.875), if we were to
1811 follow algorithm (A) we would produce:
1812 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1813 which contains more multiplications than approach (B).
1814
1815 Hopefully, this approach will eliminate potentially expensive POW library
1816 calls when unsafe floating point math is enabled and allow the compiler to
1817 further optimise the multiplies, square roots and divides produced by this
1818 function. */
1819
1820 static tree
1821 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1822 tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1823 {
1824 tree type = TREE_TYPE (arg0);
1825 machine_mode mode = TYPE_MODE (type);
1826 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1827 bool one_over = true;
1828
1829 if (!sqrtfn)
1830 return NULL_TREE;
1831
1832 if (TREE_CODE (arg1) != REAL_CST)
1833 return NULL_TREE;
1834
1835 REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1836
1837 gcc_assert (max_depth > 0);
1838 tree *cache = XALLOCAVEC (tree, max_depth + 1);
1839
1840 struct pow_synth_sqrt_info synth_info;
1841 synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1842 synth_info.deepest = 0;
1843 synth_info.num_mults = 0;
1844
1845 bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1846 REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1847
1848 /* The whole and fractional parts of exp. */
1849 REAL_VALUE_TYPE whole_part;
1850 REAL_VALUE_TYPE frac_part;
1851
1852 real_floor (&whole_part, mode, &exp);
1853 real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1854
1855
1856 REAL_VALUE_TYPE ceil_whole = dconst0;
1857 REAL_VALUE_TYPE ceil_fract = dconst0;
1858
1859 if (neg_exp)
1860 {
1861 real_ceil (&ceil_whole, mode, &exp);
1862 real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1863 }
1864
1865 if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1866 return NULL_TREE;
1867
1868 /* Check whether it's more profitable to not use 1.0 / ... */
1869 if (neg_exp)
1870 {
1871 struct pow_synth_sqrt_info alt_synth_info;
1872 alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1873 alt_synth_info.deepest = 0;
1874 alt_synth_info.num_mults = 0;
1875
1876 if (representable_as_half_series_p (ceil_fract, max_depth,
1877 &alt_synth_info)
1878 && alt_synth_info.deepest <= synth_info.deepest
1879 && alt_synth_info.num_mults < synth_info.num_mults)
1880 {
1881 whole_part = ceil_whole;
1882 frac_part = ceil_fract;
1883 synth_info.deepest = alt_synth_info.deepest;
1884 synth_info.num_mults = alt_synth_info.num_mults;
1885 memcpy (synth_info.factors, alt_synth_info.factors,
1886 (max_depth + 1) * sizeof (bool));
1887 one_over = false;
1888 }
1889 }
1890
1891 HOST_WIDE_INT n = real_to_integer (&whole_part);
1892 REAL_VALUE_TYPE cint;
1893 real_from_integer (&cint, VOIDmode, n, SIGNED);
1894
1895 if (!real_identical (&whole_part, &cint))
1896 return NULL_TREE;
1897
1898 if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1899 return NULL_TREE;
1900
1901 memset (cache, 0, (max_depth + 1) * sizeof (tree));
1902
1903 tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1904
1905 /* Calculate the integer part of the exponent. */
1906 if (n > 1)
1907 {
1908 integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1909 if (!integer_res)
1910 return NULL_TREE;
1911 }
1912
1913 if (dump_file)
1914 {
1915 char string[64];
1916
1917 real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1918 fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1919
1920 if (neg_exp)
1921 {
1922 if (one_over)
1923 {
1924 fprintf (dump_file, "1.0 / (");
1925 dump_integer_part (dump_file, "x", n);
1926 if (n > 0)
1927 fprintf (dump_file, " * ");
1928 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1929 fprintf (dump_file, ")");
1930 }
1931 else
1932 {
1933 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1934 fprintf (dump_file, " / (");
1935 dump_integer_part (dump_file, "x", n);
1936 fprintf (dump_file, ")");
1937 }
1938 }
1939 else
1940 {
1941 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1942 if (n > 0)
1943 fprintf (dump_file, " * ");
1944 dump_integer_part (dump_file, "x", n);
1945 }
1946
1947 fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1948 }
1949
1950
1951 tree fract_res = NULL_TREE;
1952 cache[0] = arg0;
1953
1954 /* Calculate the fractional part of the exponent. */
1955 for (unsigned i = 0; i < synth_info.deepest; i++)
1956 {
1957 if (synth_info.factors[i])
1958 {
1959 tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1960
1961 if (!fract_res)
1962 fract_res = sqrt_chain;
1963
1964 else
1965 fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1966 fract_res, sqrt_chain);
1967 }
1968 }
1969
1970 tree res = NULL_TREE;
1971
1972 if (neg_exp)
1973 {
1974 if (one_over)
1975 {
1976 if (n > 0)
1977 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1978 fract_res, integer_res);
1979 else
1980 res = fract_res;
1981
1982 res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1983 build_real (type, dconst1), res);
1984 }
1985 else
1986 {
1987 res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1988 fract_res, integer_res);
1989 }
1990 }
1991 else
1992 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1993 fract_res, integer_res);
1994 return res;
1995 }
1996
1997 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1998 with location info LOC. If possible, create an equivalent and
1999 less expensive sequence of statements prior to GSI, and return an
2000 expession holding the result. */
2001
2002 static tree
2003 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2004 tree arg0, tree arg1)
2005 {
2006 REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2007 REAL_VALUE_TYPE c2, dconst3;
2008 HOST_WIDE_INT n;
2009 tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2010 machine_mode mode;
2011 bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2012 bool hw_sqrt_exists, c_is_int, c2_is_int;
2013
2014 dconst1_4 = dconst1;
2015 SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2016
2017 /* If the exponent isn't a constant, there's nothing of interest
2018 to be done. */
2019 if (TREE_CODE (arg1) != REAL_CST)
2020 return NULL_TREE;
2021
2022 /* Don't perform the operation if flag_signaling_nans is on
2023 and the operand is a signaling NaN. */
2024 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2025 && ((TREE_CODE (arg0) == REAL_CST
2026 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2027 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2028 return NULL_TREE;
2029
2030 /* If the exponent is equivalent to an integer, expand to an optimal
2031 multiplication sequence when profitable. */
2032 c = TREE_REAL_CST (arg1);
2033 n = real_to_integer (&c);
2034 real_from_integer (&cint, VOIDmode, n, SIGNED);
2035 c_is_int = real_identical (&c, &cint);
2036
2037 if (c_is_int
2038 && ((n >= -1 && n <= 2)
2039 || (flag_unsafe_math_optimizations
2040 && speed_p
2041 && powi_cost (n) <= POWI_MAX_MULTS)))
2042 return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2043
2044 /* Attempt various optimizations using sqrt and cbrt. */
2045 type = TREE_TYPE (arg0);
2046 mode = TYPE_MODE (type);
2047 sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2048
2049 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
2050 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
2051 sqrt(-0) = -0. */
2052 if (sqrtfn
2053 && real_equal (&c, &dconsthalf)
2054 && !HONOR_SIGNED_ZEROS (mode))
2055 return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2056
2057 hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2058
2059 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
2060 optimizations since 1./3. is not exactly representable. If x
2061 is negative and finite, the correct value of pow(x,1./3.) is
2062 a NaN with the "invalid" exception raised, because the value
2063 of 1./3. actually has an even denominator. The correct value
2064 of cbrt(x) is a negative real value. */
2065 cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2066 dconst1_3 = real_value_truncate (mode, dconst_third ());
2067
2068 if (flag_unsafe_math_optimizations
2069 && cbrtfn
2070 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2071 && real_equal (&c, &dconst1_3))
2072 return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2073
2074 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
2075 if we don't have a hardware sqrt insn. */
2076 dconst1_6 = dconst1_3;
2077 SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2078
2079 if (flag_unsafe_math_optimizations
2080 && sqrtfn
2081 && cbrtfn
2082 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2083 && speed_p
2084 && hw_sqrt_exists
2085 && real_equal (&c, &dconst1_6))
2086 {
2087 /* sqrt(x) */
2088 sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2089
2090 /* cbrt(sqrt(x)) */
2091 return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2092 }
2093
2094
2095 /* Attempt to expand the POW as a product of square root chains.
2096 Expand the 0.25 case even when otpimising for size. */
2097 if (flag_unsafe_math_optimizations
2098 && sqrtfn
2099 && hw_sqrt_exists
2100 && (speed_p || real_equal (&c, &dconst1_4))
2101 && !HONOR_SIGNED_ZEROS (mode))
2102 {
2103 unsigned int max_depth = speed_p
2104 ? param_max_pow_sqrt_depth
2105 : 2;
2106
2107 tree expand_with_sqrts
2108 = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2109
2110 if (expand_with_sqrts)
2111 return expand_with_sqrts;
2112 }
2113
2114 real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2115 n = real_to_integer (&c2);
2116 real_from_integer (&cint, VOIDmode, n, SIGNED);
2117 c2_is_int = real_identical (&c2, &cint);
2118
2119 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2120
2121 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2122 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2123
2124 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2125 different from pow(x, 1./3.) due to rounding and behavior with
2126 negative x, we need to constrain this transformation to unsafe
2127 math and positive x or finite math. */
2128 real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2129 real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2130 real_round (&c2, mode, &c2);
2131 n = real_to_integer (&c2);
2132 real_from_integer (&cint, VOIDmode, n, SIGNED);
2133 real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2134 real_convert (&c2, mode, &c2);
2135
2136 if (flag_unsafe_math_optimizations
2137 && cbrtfn
2138 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2139 && real_identical (&c2, &c)
2140 && !c2_is_int
2141 && optimize_function_for_speed_p (cfun)
2142 && powi_cost (n / 3) <= POWI_MAX_MULTS)
2143 {
2144 tree powi_x_ndiv3 = NULL_TREE;
2145
2146 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2147 possible or profitable, give up. Skip the degenerate case when
2148 abs(n) < 3, where the result is always 1. */
2149 if (absu_hwi (n) >= 3)
2150 {
2151 powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2152 abs_hwi (n / 3));
2153 if (!powi_x_ndiv3)
2154 return NULL_TREE;
2155 }
2156
2157 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2158 as that creates an unnecessary variable. Instead, just produce
2159 either cbrt(x) or cbrt(x) * cbrt(x). */
2160 cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2161
2162 if (absu_hwi (n) % 3 == 1)
2163 powi_cbrt_x = cbrt_x;
2164 else
2165 powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2166 cbrt_x, cbrt_x);
2167
2168 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2169 if (absu_hwi (n) < 3)
2170 result = powi_cbrt_x;
2171 else
2172 result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2173 powi_x_ndiv3, powi_cbrt_x);
2174
2175 /* If n is negative, reciprocate the result. */
2176 if (n < 0)
2177 result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2178 build_real (type, dconst1), result);
2179
2180 return result;
2181 }
2182
2183 /* No optimizations succeeded. */
2184 return NULL_TREE;
2185 }
2186
2187 /* ARG is the argument to a cabs builtin call in GSI with location info
2188 LOC. Create a sequence of statements prior to GSI that calculates
2189 sqrt(R*R + I*I), where R and I are the real and imaginary components
2190 of ARG, respectively. Return an expression holding the result. */
2191
2192 static tree
2193 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2194 {
2195 tree real_part, imag_part, addend1, addend2, sum, result;
2196 tree type = TREE_TYPE (TREE_TYPE (arg));
2197 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2198 machine_mode mode = TYPE_MODE (type);
2199
2200 if (!flag_unsafe_math_optimizations
2201 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2202 || !sqrtfn
2203 || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2204 return NULL_TREE;
2205
2206 real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2207 REALPART_EXPR, arg);
2208 addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2209 real_part, real_part);
2210 imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2211 IMAGPART_EXPR, arg);
2212 addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2213 imag_part, imag_part);
2214 sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2215 result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2216
2217 return result;
2218 }
2219
2220 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2221 on the SSA_NAME argument of each of them. Also expand powi(x,n) into
2222 an optimal number of multiplies, when n is a constant. */
2223
2224 namespace {
2225
2226 const pass_data pass_data_cse_sincos =
2227 {
2228 GIMPLE_PASS, /* type */
2229 "sincos", /* name */
2230 OPTGROUP_NONE, /* optinfo_flags */
2231 TV_TREE_SINCOS, /* tv_id */
2232 PROP_ssa, /* properties_required */
2233 PROP_gimple_opt_math, /* properties_provided */
2234 0, /* properties_destroyed */
2235 0, /* todo_flags_start */
2236 TODO_update_ssa, /* todo_flags_finish */
2237 };
2238
2239 class pass_cse_sincos : public gimple_opt_pass
2240 {
2241 public:
2242 pass_cse_sincos (gcc::context *ctxt)
2243 : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2244 {}
2245
2246 /* opt_pass methods: */
2247 virtual bool gate (function *)
2248 {
2249 /* We no longer require either sincos or cexp, since powi expansion
2250 piggybacks on this pass. */
2251 return optimize;
2252 }
2253
2254 virtual unsigned int execute (function *);
2255
2256 }; // class pass_cse_sincos
2257
2258 unsigned int
2259 pass_cse_sincos::execute (function *fun)
2260 {
2261 basic_block bb;
2262 bool cfg_changed = false;
2263
2264 calculate_dominance_info (CDI_DOMINATORS);
2265 memset (&sincos_stats, 0, sizeof (sincos_stats));
2266
2267 FOR_EACH_BB_FN (bb, fun)
2268 {
2269 gimple_stmt_iterator gsi;
2270 bool cleanup_eh = false;
2271
2272 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2273 {
2274 gimple *stmt = gsi_stmt (gsi);
2275
2276 /* Only the last stmt in a bb could throw, no need to call
2277 gimple_purge_dead_eh_edges if we change something in the middle
2278 of a basic block. */
2279 cleanup_eh = false;
2280
2281 if (is_gimple_call (stmt)
2282 && gimple_call_lhs (stmt))
2283 {
2284 tree arg, arg0, arg1, result;
2285 HOST_WIDE_INT n;
2286 location_t loc;
2287
2288 switch (gimple_call_combined_fn (stmt))
2289 {
2290 CASE_CFN_COS:
2291 CASE_CFN_SIN:
2292 CASE_CFN_CEXPI:
2293 arg = gimple_call_arg (stmt, 0);
2294 /* Make sure we have either sincos or cexp. */
2295 if (!targetm.libc_has_function (function_c99_math_complex,
2296 TREE_TYPE (arg))
2297 && !targetm.libc_has_function (function_sincos,
2298 TREE_TYPE (arg)))
2299 break;
2300
2301 if (TREE_CODE (arg) == SSA_NAME)
2302 cfg_changed |= execute_cse_sincos_1 (arg);
2303 break;
2304
2305 CASE_CFN_POW:
2306 arg0 = gimple_call_arg (stmt, 0);
2307 arg1 = gimple_call_arg (stmt, 1);
2308
2309 loc = gimple_location (stmt);
2310 result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2311
2312 if (result)
2313 {
2314 tree lhs = gimple_get_lhs (stmt);
2315 gassign *new_stmt = gimple_build_assign (lhs, result);
2316 gimple_set_location (new_stmt, loc);
2317 unlink_stmt_vdef (stmt);
2318 gsi_replace (&gsi, new_stmt, true);
2319 cleanup_eh = true;
2320 if (gimple_vdef (stmt))
2321 release_ssa_name (gimple_vdef (stmt));
2322 }
2323 break;
2324
2325 CASE_CFN_POWI:
2326 arg0 = gimple_call_arg (stmt, 0);
2327 arg1 = gimple_call_arg (stmt, 1);
2328 loc = gimple_location (stmt);
2329
2330 if (real_minus_onep (arg0))
2331 {
2332 tree t0, t1, cond, one, minus_one;
2333 gassign *stmt;
2334
2335 t0 = TREE_TYPE (arg0);
2336 t1 = TREE_TYPE (arg1);
2337 one = build_real (t0, dconst1);
2338 minus_one = build_real (t0, dconstm1);
2339
2340 cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2341 stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2342 arg1, build_int_cst (t1, 1));
2343 gimple_set_location (stmt, loc);
2344 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2345
2346 result = make_temp_ssa_name (t0, NULL, "powi");
2347 stmt = gimple_build_assign (result, COND_EXPR, cond,
2348 minus_one, one);
2349 gimple_set_location (stmt, loc);
2350 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2351 }
2352 else
2353 {
2354 if (!tree_fits_shwi_p (arg1))
2355 break;
2356
2357 n = tree_to_shwi (arg1);
2358 result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2359 }
2360
2361 if (result)
2362 {
2363 tree lhs = gimple_get_lhs (stmt);
2364 gassign *new_stmt = gimple_build_assign (lhs, result);
2365 gimple_set_location (new_stmt, loc);
2366 unlink_stmt_vdef (stmt);
2367 gsi_replace (&gsi, new_stmt, true);
2368 cleanup_eh = true;
2369 if (gimple_vdef (stmt))
2370 release_ssa_name (gimple_vdef (stmt));
2371 }
2372 break;
2373
2374 CASE_CFN_CABS:
2375 arg0 = gimple_call_arg (stmt, 0);
2376 loc = gimple_location (stmt);
2377 result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2378
2379 if (result)
2380 {
2381 tree lhs = gimple_get_lhs (stmt);
2382 gassign *new_stmt = gimple_build_assign (lhs, result);
2383 gimple_set_location (new_stmt, loc);
2384 unlink_stmt_vdef (stmt);
2385 gsi_replace (&gsi, new_stmt, true);
2386 cleanup_eh = true;
2387 if (gimple_vdef (stmt))
2388 release_ssa_name (gimple_vdef (stmt));
2389 }
2390 break;
2391
2392 default:;
2393 }
2394 }
2395 }
2396 if (cleanup_eh)
2397 cfg_changed |= gimple_purge_dead_eh_edges (bb);
2398 }
2399
2400 statistics_counter_event (fun, "sincos statements inserted",
2401 sincos_stats.inserted);
2402 statistics_counter_event (fun, "conv statements removed",
2403 sincos_stats.conv_removed);
2404
2405 return cfg_changed ? TODO_cleanup_cfg : 0;
2406 }
2407
2408 } // anon namespace
2409
2410 gimple_opt_pass *
2411 make_pass_cse_sincos (gcc::context *ctxt)
2412 {
2413 return new pass_cse_sincos (ctxt);
2414 }
2415
2416 /* Return true if stmt is a type conversion operation that can be stripped
2417 when used in a widening multiply operation. */
2418 static bool
2419 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2420 {
2421 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2422
2423 if (TREE_CODE (result_type) == INTEGER_TYPE)
2424 {
2425 tree op_type;
2426 tree inner_op_type;
2427
2428 if (!CONVERT_EXPR_CODE_P (rhs_code))
2429 return false;
2430
2431 op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2432
2433 /* If the type of OP has the same precision as the result, then
2434 we can strip this conversion. The multiply operation will be
2435 selected to create the correct extension as a by-product. */
2436 if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2437 return true;
2438
2439 /* We can also strip a conversion if it preserves the signed-ness of
2440 the operation and doesn't narrow the range. */
2441 inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2442
2443 /* If the inner-most type is unsigned, then we can strip any
2444 intermediate widening operation. If it's signed, then the
2445 intermediate widening operation must also be signed. */
2446 if ((TYPE_UNSIGNED (inner_op_type)
2447 || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2448 && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2449 return true;
2450
2451 return false;
2452 }
2453
2454 return rhs_code == FIXED_CONVERT_EXPR;
2455 }
2456
2457 /* Return true if RHS is a suitable operand for a widening multiplication,
2458 assuming a target type of TYPE.
2459 There are two cases:
2460
2461 - RHS makes some value at least twice as wide. Store that value
2462 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2463
2464 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2465 but leave *TYPE_OUT untouched. */
2466
2467 static bool
2468 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2469 tree *new_rhs_out)
2470 {
2471 gimple *stmt;
2472 tree type1, rhs1;
2473
2474 if (TREE_CODE (rhs) == SSA_NAME)
2475 {
2476 stmt = SSA_NAME_DEF_STMT (rhs);
2477 if (is_gimple_assign (stmt))
2478 {
2479 if (! widening_mult_conversion_strippable_p (type, stmt))
2480 rhs1 = rhs;
2481 else
2482 {
2483 rhs1 = gimple_assign_rhs1 (stmt);
2484
2485 if (TREE_CODE (rhs1) == INTEGER_CST)
2486 {
2487 *new_rhs_out = rhs1;
2488 *type_out = NULL;
2489 return true;
2490 }
2491 }
2492 }
2493 else
2494 rhs1 = rhs;
2495
2496 type1 = TREE_TYPE (rhs1);
2497
2498 if (TREE_CODE (type1) != TREE_CODE (type)
2499 || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2500 return false;
2501
2502 *new_rhs_out = rhs1;
2503 *type_out = type1;
2504 return true;
2505 }
2506
2507 if (TREE_CODE (rhs) == INTEGER_CST)
2508 {
2509 *new_rhs_out = rhs;
2510 *type_out = NULL;
2511 return true;
2512 }
2513
2514 return false;
2515 }
2516
2517 /* Return true if STMT performs a widening multiplication, assuming the
2518 output type is TYPE. If so, store the unwidened types of the operands
2519 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2520 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2521 and *TYPE2_OUT would give the operands of the multiplication. */
2522
2523 static bool
2524 is_widening_mult_p (gimple *stmt,
2525 tree *type1_out, tree *rhs1_out,
2526 tree *type2_out, tree *rhs2_out)
2527 {
2528 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2529
2530 if (TREE_CODE (type) == INTEGER_TYPE)
2531 {
2532 if (TYPE_OVERFLOW_TRAPS (type))
2533 return false;
2534 }
2535 else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2536 return false;
2537
2538 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2539 rhs1_out))
2540 return false;
2541
2542 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2543 rhs2_out))
2544 return false;
2545
2546 if (*type1_out == NULL)
2547 {
2548 if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2549 return false;
2550 *type1_out = *type2_out;
2551 }
2552
2553 if (*type2_out == NULL)
2554 {
2555 if (!int_fits_type_p (*rhs2_out, *type1_out))
2556 return false;
2557 *type2_out = *type1_out;
2558 }
2559
2560 /* Ensure that the larger of the two operands comes first. */
2561 if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2562 {
2563 std::swap (*type1_out, *type2_out);
2564 std::swap (*rhs1_out, *rhs2_out);
2565 }
2566
2567 return true;
2568 }
2569
2570 /* Check to see if the CALL statement is an invocation of copysign
2571 with 1. being the first argument. */
2572 static bool
2573 is_copysign_call_with_1 (gimple *call)
2574 {
2575 gcall *c = dyn_cast <gcall *> (call);
2576 if (! c)
2577 return false;
2578
2579 enum combined_fn code = gimple_call_combined_fn (c);
2580
2581 if (code == CFN_LAST)
2582 return false;
2583
2584 if (builtin_fn_p (code))
2585 {
2586 switch (as_builtin_fn (code))
2587 {
2588 CASE_FLT_FN (BUILT_IN_COPYSIGN):
2589 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2590 return real_onep (gimple_call_arg (c, 0));
2591 default:
2592 return false;
2593 }
2594 }
2595
2596 if (internal_fn_p (code))
2597 {
2598 switch (as_internal_fn (code))
2599 {
2600 case IFN_COPYSIGN:
2601 return real_onep (gimple_call_arg (c, 0));
2602 default:
2603 return false;
2604 }
2605 }
2606
2607 return false;
2608 }
2609
2610 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2611 This only happens when the xorsign optab is defined, if the
2612 pattern is not a xorsign pattern or if expansion fails FALSE is
2613 returned, otherwise TRUE is returned. */
2614 static bool
2615 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2616 {
2617 tree treeop0, treeop1, lhs, type;
2618 location_t loc = gimple_location (stmt);
2619 lhs = gimple_assign_lhs (stmt);
2620 treeop0 = gimple_assign_rhs1 (stmt);
2621 treeop1 = gimple_assign_rhs2 (stmt);
2622 type = TREE_TYPE (lhs);
2623 machine_mode mode = TYPE_MODE (type);
2624
2625 if (HONOR_SNANS (type))
2626 return false;
2627
2628 if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2629 {
2630 gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2631 if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2632 {
2633 call0 = SSA_NAME_DEF_STMT (treeop1);
2634 if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2635 return false;
2636
2637 treeop1 = treeop0;
2638 }
2639 if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2640 return false;
2641
2642 gcall *c = as_a<gcall*> (call0);
2643 treeop0 = gimple_call_arg (c, 1);
2644
2645 gcall *call_stmt
2646 = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2647 gimple_set_lhs (call_stmt, lhs);
2648 gimple_set_location (call_stmt, loc);
2649 gsi_replace (gsi, call_stmt, true);
2650 return true;
2651 }
2652
2653 return false;
2654 }
2655
2656 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2657 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2658 value is true iff we converted the statement. */
2659
2660 static bool
2661 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2662 {
2663 tree lhs, rhs1, rhs2, type, type1, type2;
2664 enum insn_code handler;
2665 scalar_int_mode to_mode, from_mode, actual_mode;
2666 optab op;
2667 int actual_precision;
2668 location_t loc = gimple_location (stmt);
2669 bool from_unsigned1, from_unsigned2;
2670
2671 lhs = gimple_assign_lhs (stmt);
2672 type = TREE_TYPE (lhs);
2673 if (TREE_CODE (type) != INTEGER_TYPE)
2674 return false;
2675
2676 if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2677 return false;
2678
2679 to_mode = SCALAR_INT_TYPE_MODE (type);
2680 from_mode = SCALAR_INT_TYPE_MODE (type1);
2681 if (to_mode == from_mode)
2682 return false;
2683
2684 from_unsigned1 = TYPE_UNSIGNED (type1);
2685 from_unsigned2 = TYPE_UNSIGNED (type2);
2686
2687 if (from_unsigned1 && from_unsigned2)
2688 op = umul_widen_optab;
2689 else if (!from_unsigned1 && !from_unsigned2)
2690 op = smul_widen_optab;
2691 else
2692 op = usmul_widen_optab;
2693
2694 handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2695 &actual_mode);
2696
2697 if (handler == CODE_FOR_nothing)
2698 {
2699 if (op != smul_widen_optab)
2700 {
2701 /* We can use a signed multiply with unsigned types as long as
2702 there is a wider mode to use, or it is the smaller of the two
2703 types that is unsigned. Note that type1 >= type2, always. */
2704 if ((TYPE_UNSIGNED (type1)
2705 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2706 || (TYPE_UNSIGNED (type2)
2707 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2708 {
2709 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2710 || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2711 return false;
2712 }
2713
2714 op = smul_widen_optab;
2715 handler = find_widening_optab_handler_and_mode (op, to_mode,
2716 from_mode,
2717 &actual_mode);
2718
2719 if (handler == CODE_FOR_nothing)
2720 return false;
2721
2722 from_unsigned1 = from_unsigned2 = false;
2723 }
2724 else
2725 return false;
2726 }
2727
2728 /* Ensure that the inputs to the handler are in the correct precison
2729 for the opcode. This will be the full mode size. */
2730 actual_precision = GET_MODE_PRECISION (actual_mode);
2731 if (2 * actual_precision > TYPE_PRECISION (type))
2732 return false;
2733 if (actual_precision != TYPE_PRECISION (type1)
2734 || from_unsigned1 != TYPE_UNSIGNED (type1))
2735 rhs1 = build_and_insert_cast (gsi, loc,
2736 build_nonstandard_integer_type
2737 (actual_precision, from_unsigned1), rhs1);
2738 if (actual_precision != TYPE_PRECISION (type2)
2739 || from_unsigned2 != TYPE_UNSIGNED (type2))
2740 rhs2 = build_and_insert_cast (gsi, loc,
2741 build_nonstandard_integer_type
2742 (actual_precision, from_unsigned2), rhs2);
2743
2744 /* Handle constants. */
2745 if (TREE_CODE (rhs1) == INTEGER_CST)
2746 rhs1 = fold_convert (type1, rhs1);
2747 if (TREE_CODE (rhs2) == INTEGER_CST)
2748 rhs2 = fold_convert (type2, rhs2);
2749
2750 gimple_assign_set_rhs1 (stmt, rhs1);
2751 gimple_assign_set_rhs2 (stmt, rhs2);
2752 gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2753 update_stmt (stmt);
2754 widen_mul_stats.widen_mults_inserted++;
2755 return true;
2756 }
2757
2758 /* Process a single gimple statement STMT, which is found at the
2759 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2760 rhs (given by CODE), and try to convert it into a
2761 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2762 is true iff we converted the statement. */
2763
2764 static bool
2765 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2766 enum tree_code code)
2767 {
2768 gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2769 gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2770 tree type, type1, type2, optype;
2771 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2772 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2773 optab this_optab;
2774 enum tree_code wmult_code;
2775 enum insn_code handler;
2776 scalar_mode to_mode, from_mode, actual_mode;
2777 location_t loc = gimple_location (stmt);
2778 int actual_precision;
2779 bool from_unsigned1, from_unsigned2;
2780
2781 lhs = gimple_assign_lhs (stmt);
2782 type = TREE_TYPE (lhs);
2783 if (TREE_CODE (type) != INTEGER_TYPE
2784 && TREE_CODE (type) != FIXED_POINT_TYPE)
2785 return false;
2786
2787 if (code == MINUS_EXPR)
2788 wmult_code = WIDEN_MULT_MINUS_EXPR;
2789 else
2790 wmult_code = WIDEN_MULT_PLUS_EXPR;
2791
2792 rhs1 = gimple_assign_rhs1 (stmt);
2793 rhs2 = gimple_assign_rhs2 (stmt);
2794
2795 if (TREE_CODE (rhs1) == SSA_NAME)
2796 {
2797 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2798 if (is_gimple_assign (rhs1_stmt))
2799 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2800 }
2801
2802 if (TREE_CODE (rhs2) == SSA_NAME)
2803 {
2804 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2805 if (is_gimple_assign (rhs2_stmt))
2806 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2807 }
2808
2809 /* Allow for one conversion statement between the multiply
2810 and addition/subtraction statement. If there are more than
2811 one conversions then we assume they would invalidate this
2812 transformation. If that's not the case then they should have
2813 been folded before now. */
2814 if (CONVERT_EXPR_CODE_P (rhs1_code))
2815 {
2816 conv1_stmt = rhs1_stmt;
2817 rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2818 if (TREE_CODE (rhs1) == SSA_NAME)
2819 {
2820 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2821 if (is_gimple_assign (rhs1_stmt))
2822 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2823 }
2824 else
2825 return false;
2826 }
2827 if (CONVERT_EXPR_CODE_P (rhs2_code))
2828 {
2829 conv2_stmt = rhs2_stmt;
2830 rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2831 if (TREE_CODE (rhs2) == SSA_NAME)
2832 {
2833 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2834 if (is_gimple_assign (rhs2_stmt))
2835 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2836 }
2837 else
2838 return false;
2839 }
2840
2841 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2842 is_widening_mult_p, but we still need the rhs returns.
2843
2844 It might also appear that it would be sufficient to use the existing
2845 operands of the widening multiply, but that would limit the choice of
2846 multiply-and-accumulate instructions.
2847
2848 If the widened-multiplication result has more than one uses, it is
2849 probably wiser not to do the conversion. Also restrict this operation
2850 to single basic block to avoid moving the multiply to a different block
2851 with a higher execution frequency. */
2852 if (code == PLUS_EXPR
2853 && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2854 {
2855 if (!has_single_use (rhs1)
2856 || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2857 || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2858 &type2, &mult_rhs2))
2859 return false;
2860 add_rhs = rhs2;
2861 conv_stmt = conv1_stmt;
2862 }
2863 else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2864 {
2865 if (!has_single_use (rhs2)
2866 || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2867 || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2868 &type2, &mult_rhs2))
2869 return false;
2870 add_rhs = rhs1;
2871 conv_stmt = conv2_stmt;
2872 }
2873 else
2874 return false;
2875
2876 to_mode = SCALAR_TYPE_MODE (type);
2877 from_mode = SCALAR_TYPE_MODE (type1);
2878 if (to_mode == from_mode)
2879 return false;
2880
2881 from_unsigned1 = TYPE_UNSIGNED (type1);
2882 from_unsigned2 = TYPE_UNSIGNED (type2);
2883 optype = type1;
2884
2885 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2886 if (from_unsigned1 != from_unsigned2)
2887 {
2888 if (!INTEGRAL_TYPE_P (type))
2889 return false;
2890 /* We can use a signed multiply with unsigned types as long as
2891 there is a wider mode to use, or it is the smaller of the two
2892 types that is unsigned. Note that type1 >= type2, always. */
2893 if ((from_unsigned1
2894 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2895 || (from_unsigned2
2896 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2897 {
2898 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2899 || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2900 return false;
2901 }
2902
2903 from_unsigned1 = from_unsigned2 = false;
2904 optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2905 false);
2906 }
2907
2908 /* If there was a conversion between the multiply and addition
2909 then we need to make sure it fits a multiply-and-accumulate.
2910 The should be a single mode change which does not change the
2911 value. */
2912 if (conv_stmt)
2913 {
2914 /* We use the original, unmodified data types for this. */
2915 tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2916 tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2917 int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2918 bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2919
2920 if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2921 {
2922 /* Conversion is a truncate. */
2923 if (TYPE_PRECISION (to_type) < data_size)
2924 return false;
2925 }
2926 else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2927 {
2928 /* Conversion is an extend. Check it's the right sort. */
2929 if (TYPE_UNSIGNED (from_type) != is_unsigned
2930 && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2931 return false;
2932 }
2933 /* else convert is a no-op for our purposes. */
2934 }
2935
2936 /* Verify that the machine can perform a widening multiply
2937 accumulate in this mode/signedness combination, otherwise
2938 this transformation is likely to pessimize code. */
2939 this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2940 handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2941 from_mode, &actual_mode);
2942
2943 if (handler == CODE_FOR_nothing)
2944 return false;
2945
2946 /* Ensure that the inputs to the handler are in the correct precison
2947 for the opcode. This will be the full mode size. */
2948 actual_precision = GET_MODE_PRECISION (actual_mode);
2949 if (actual_precision != TYPE_PRECISION (type1)
2950 || from_unsigned1 != TYPE_UNSIGNED (type1))
2951 mult_rhs1 = build_and_insert_cast (gsi, loc,
2952 build_nonstandard_integer_type
2953 (actual_precision, from_unsigned1),
2954 mult_rhs1);
2955 if (actual_precision != TYPE_PRECISION (type2)
2956 || from_unsigned2 != TYPE_UNSIGNED (type2))
2957 mult_rhs2 = build_and_insert_cast (gsi, loc,
2958 build_nonstandard_integer_type
2959 (actual_precision, from_unsigned2),
2960 mult_rhs2);
2961
2962 if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2963 add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2964
2965 /* Handle constants. */
2966 if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2967 mult_rhs1 = fold_convert (type1, mult_rhs1);
2968 if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2969 mult_rhs2 = fold_convert (type2, mult_rhs2);
2970
2971 gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2972 add_rhs);
2973 update_stmt (gsi_stmt (*gsi));
2974 widen_mul_stats.maccs_inserted++;
2975 return true;
2976 }
2977
2978 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
2979 OP2 and which we know is used in statements that can be, together with the
2980 multiplication, converted to FMAs, perform the transformation. */
2981
2982 static void
2983 convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
2984 {
2985 tree type = TREE_TYPE (mul_result);
2986 gimple *use_stmt;
2987 imm_use_iterator imm_iter;
2988 gcall *fma_stmt;
2989
2990 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
2991 {
2992 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
2993 tree addop, mulop1 = op1, result = mul_result;
2994 bool negate_p = false;
2995 gimple_seq seq = NULL;
2996
2997 if (is_gimple_debug (use_stmt))
2998 continue;
2999
3000 if (is_gimple_assign (use_stmt)
3001 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3002 {
3003 result = gimple_assign_lhs (use_stmt);
3004 use_operand_p use_p;
3005 gimple *neguse_stmt;
3006 single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3007 gsi_remove (&gsi, true);
3008 release_defs (use_stmt);
3009
3010 use_stmt = neguse_stmt;
3011 gsi = gsi_for_stmt (use_stmt);
3012 negate_p = true;
3013 }
3014
3015 tree cond, else_value, ops[3];
3016 tree_code code;
3017 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3018 ops, &else_value))
3019 gcc_unreachable ();
3020 addop = ops[0] == result ? ops[1] : ops[0];
3021
3022 if (code == MINUS_EXPR)
3023 {
3024 if (ops[0] == result)
3025 /* a * b - c -> a * b + (-c) */
3026 addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
3027 else
3028 /* a - b * c -> (-b) * c + a */
3029 negate_p = !negate_p;
3030 }
3031
3032 if (negate_p)
3033 mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
3034
3035 if (seq)
3036 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3037
3038 if (cond)
3039 fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3040 op2, addop, else_value);
3041 else
3042 fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3043 gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3044 gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3045 use_stmt));
3046 gsi_replace (&gsi, fma_stmt, true);
3047 /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3048 regardless of where the negation occurs. */
3049 gimple *orig_stmt = gsi_stmt (gsi);
3050 if (fold_stmt (&gsi, follow_all_ssa_edges))
3051 {
3052 if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3053 gcc_unreachable ();
3054 update_stmt (gsi_stmt (gsi));
3055 }
3056
3057 if (dump_file && (dump_flags & TDF_DETAILS))
3058 {
3059 fprintf (dump_file, "Generated FMA ");
3060 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3061 fprintf (dump_file, "\n");
3062 }
3063
3064 /* If the FMA result is negated in a single use, fold the negation
3065 too. */
3066 orig_stmt = gsi_stmt (gsi);
3067 use_operand_p use_p;
3068 gimple *neg_stmt;
3069 if (is_gimple_call (orig_stmt)
3070 && gimple_call_internal_p (orig_stmt)
3071 && gimple_call_lhs (orig_stmt)
3072 && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3073 && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3074 && is_gimple_assign (neg_stmt)
3075 && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3076 && !stmt_could_throw_p (cfun, neg_stmt))
3077 {
3078 gsi = gsi_for_stmt (neg_stmt);
3079 if (fold_stmt (&gsi, follow_all_ssa_edges))
3080 {
3081 if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3082 gcc_unreachable ();
3083 update_stmt (gsi_stmt (gsi));
3084 if (dump_file && (dump_flags & TDF_DETAILS))
3085 {
3086 fprintf (dump_file, "Folded FMA negation ");
3087 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3088 fprintf (dump_file, "\n");
3089 }
3090 }
3091 }
3092
3093 widen_mul_stats.fmas_inserted++;
3094 }
3095 }
3096
3097 /* Data necessary to perform the actual transformation from a multiplication
3098 and an addition to an FMA after decision is taken it should be done and to
3099 then delete the multiplication statement from the function IL. */
3100
3101 struct fma_transformation_info
3102 {
3103 gimple *mul_stmt;
3104 tree mul_result;
3105 tree op1;
3106 tree op2;
3107 };
3108
3109 /* Structure containing the current state of FMA deferring, i.e. whether we are
3110 deferring, whether to continue deferring, and all data necessary to come
3111 back and perform all deferred transformations. */
3112
3113 class fma_deferring_state
3114 {
3115 public:
3116 /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3117 do any deferring. */
3118
3119 fma_deferring_state (bool perform_deferring)
3120 : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3121 m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3122
3123 /* List of FMA candidates for which we the transformation has been determined
3124 possible but we at this point in BB analysis we do not consider them
3125 beneficial. */
3126 auto_vec<fma_transformation_info, 8> m_candidates;
3127
3128 /* Set of results of multiplication that are part of an already deferred FMA
3129 candidates. */
3130 hash_set<tree> m_mul_result_set;
3131
3132 /* The PHI that supposedly feeds back result of a FMA to another over loop
3133 boundary. */
3134 gphi *m_initial_phi;
3135
3136 /* Result of the last produced FMA candidate or NULL if there has not been
3137 one. */
3138 tree m_last_result;
3139
3140 /* If true, deferring might still be profitable. If false, transform all
3141 candidates and no longer defer. */
3142 bool m_deferring_p;
3143 };
3144
3145 /* Transform all deferred FMA candidates and mark STATE as no longer
3146 deferring. */
3147
3148 static void
3149 cancel_fma_deferring (fma_deferring_state *state)
3150 {
3151 if (!state->m_deferring_p)
3152 return;
3153
3154 for (unsigned i = 0; i < state->m_candidates.length (); i++)
3155 {
3156 if (dump_file && (dump_flags & TDF_DETAILS))
3157 fprintf (dump_file, "Generating deferred FMA\n");
3158
3159 const fma_transformation_info &fti = state->m_candidates[i];
3160 convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3161
3162 gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3163 gsi_remove (&gsi, true);
3164 release_defs (fti.mul_stmt);
3165 }
3166 state->m_deferring_p = false;
3167 }
3168
3169 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3170 Otherwise return NULL. */
3171
3172 static gphi *
3173 result_of_phi (tree op)
3174 {
3175 if (TREE_CODE (op) != SSA_NAME)
3176 return NULL;
3177
3178 return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3179 }
3180
3181 /* After processing statements of a BB and recording STATE, return true if the
3182 initial phi is fed by the last FMA candidate result ore one such result from
3183 previously processed BBs marked in LAST_RESULT_SET. */
3184
3185 static bool
3186 last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3187 hash_set<tree> *last_result_set)
3188 {
3189 ssa_op_iter iter;
3190 use_operand_p use;
3191 FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3192 {
3193 tree t = USE_FROM_PTR (use);
3194 if (t == state->m_last_result
3195 || last_result_set->contains (t))
3196 return true;
3197 }
3198
3199 return false;
3200 }
3201
3202 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3203 with uses in additions and subtractions to form fused multiply-add
3204 operations. Returns true if successful and MUL_STMT should be removed.
3205 If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3206 on MUL_COND, otherwise it is unconditional.
3207
3208 If STATE indicates that we are deferring FMA transformation, that means
3209 that we do not produce FMAs for basic blocks which look like:
3210
3211 <bb 6>
3212 # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3213 _65 = _14 * _16;
3214 accumulator_66 = _65 + accumulator_111;
3215
3216 or its unrolled version, i.e. with several FMA candidates that feed result
3217 of one into the addend of another. Instead, we add them to a list in STATE
3218 and if we later discover an FMA candidate that is not part of such a chain,
3219 we go back and perform all deferred past candidates. */
3220
3221 static bool
3222 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3223 fma_deferring_state *state, tree mul_cond = NULL_TREE)
3224 {
3225 tree mul_result = gimple_get_lhs (mul_stmt);
3226 tree type = TREE_TYPE (mul_result);
3227 gimple *use_stmt, *neguse_stmt;
3228 use_operand_p use_p;
3229 imm_use_iterator imm_iter;
3230
3231 if (FLOAT_TYPE_P (type)
3232 && flag_fp_contract_mode == FP_CONTRACT_OFF)
3233 return false;
3234
3235 /* We don't want to do bitfield reduction ops. */
3236 if (INTEGRAL_TYPE_P (type)
3237 && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3238 return false;
3239
3240 /* If the target doesn't support it, don't generate it. We assume that
3241 if fma isn't available then fms, fnma or fnms are not either. */
3242 optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3243 if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3244 return false;
3245
3246 /* If the multiplication has zero uses, it is kept around probably because
3247 of -fnon-call-exceptions. Don't optimize it away in that case,
3248 it is DCE job. */
3249 if (has_zero_uses (mul_result))
3250 return false;
3251
3252 bool check_defer
3253 = (state->m_deferring_p
3254 && (tree_to_shwi (TYPE_SIZE (type))
3255 <= param_avoid_fma_max_bits));
3256 bool defer = check_defer;
3257 bool seen_negate_p = false;
3258 /* Make sure that the multiplication statement becomes dead after
3259 the transformation, thus that all uses are transformed to FMAs.
3260 This means we assume that an FMA operation has the same cost
3261 as an addition. */
3262 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3263 {
3264 tree result = mul_result;
3265 bool negate_p = false;
3266
3267 use_stmt = USE_STMT (use_p);
3268
3269 if (is_gimple_debug (use_stmt))
3270 continue;
3271
3272 /* For now restrict this operations to single basic blocks. In theory
3273 we would want to support sinking the multiplication in
3274 m = a*b;
3275 if ()
3276 ma = m + c;
3277 else
3278 d = m;
3279 to form a fma in the then block and sink the multiplication to the
3280 else block. */
3281 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3282 return false;
3283
3284 /* A negate on the multiplication leads to FNMA. */
3285 if (is_gimple_assign (use_stmt)
3286 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3287 {
3288 ssa_op_iter iter;
3289 use_operand_p usep;
3290
3291 /* If (due to earlier missed optimizations) we have two
3292 negates of the same value, treat them as equivalent
3293 to a single negate with multiple uses. */
3294 if (seen_negate_p)
3295 return false;
3296
3297 result = gimple_assign_lhs (use_stmt);
3298
3299 /* Make sure the negate statement becomes dead with this
3300 single transformation. */
3301 if (!single_imm_use (gimple_assign_lhs (use_stmt),
3302 &use_p, &neguse_stmt))
3303 return false;
3304
3305 /* Make sure the multiplication isn't also used on that stmt. */
3306 FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3307 if (USE_FROM_PTR (usep) == mul_result)
3308 return false;
3309
3310 /* Re-validate. */
3311 use_stmt = neguse_stmt;
3312 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3313 return false;
3314
3315 negate_p = seen_negate_p = true;
3316 }
3317
3318 tree cond, else_value, ops[3];
3319 tree_code code;
3320 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3321 &else_value))
3322 return false;
3323
3324 switch (code)
3325 {
3326 case MINUS_EXPR:
3327 if (ops[1] == result)
3328 negate_p = !negate_p;
3329 break;
3330 case PLUS_EXPR:
3331 break;
3332 default:
3333 /* FMA can only be formed from PLUS and MINUS. */
3334 return false;
3335 }
3336
3337 if (mul_cond && cond != mul_cond)
3338 return false;
3339
3340 if (cond)
3341 {
3342 if (cond == result || else_value == result)
3343 return false;
3344 if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type))
3345 return false;
3346 }
3347
3348 /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3349 we'll visit later, we might be able to get a more profitable
3350 match with fnma.
3351 OTOH, if we don't, a negate / fma pair has likely lower latency
3352 that a mult / subtract pair. */
3353 if (code == MINUS_EXPR
3354 && !negate_p
3355 && ops[0] == result
3356 && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3357 && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3358 && TREE_CODE (ops[1]) == SSA_NAME
3359 && has_single_use (ops[1]))
3360 {
3361 gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3362 if (is_gimple_assign (stmt2)
3363 && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3364 return false;
3365 }
3366
3367 /* We can't handle a * b + a * b. */
3368 if (ops[0] == ops[1])
3369 return false;
3370 /* If deferring, make sure we are not looking at an instruction that
3371 wouldn't have existed if we were not. */
3372 if (state->m_deferring_p
3373 && (state->m_mul_result_set.contains (ops[0])
3374 || state->m_mul_result_set.contains (ops[1])))
3375 return false;
3376
3377 if (check_defer)
3378 {
3379 tree use_lhs = gimple_get_lhs (use_stmt);
3380 if (state->m_last_result)
3381 {
3382 if (ops[1] == state->m_last_result
3383 || ops[0] == state->m_last_result)
3384 defer = true;
3385 else
3386 defer = false;
3387 }
3388 else
3389 {
3390 gcc_checking_assert (!state->m_initial_phi);
3391 gphi *phi;
3392 if (ops[0] == result)
3393 phi = result_of_phi (ops[1]);
3394 else
3395 {
3396 gcc_assert (ops[1] == result);
3397 phi = result_of_phi (ops[0]);
3398 }
3399
3400 if (phi)
3401 {
3402 state->m_initial_phi = phi;
3403 defer = true;
3404 }
3405 else
3406 defer = false;
3407 }
3408
3409 state->m_last_result = use_lhs;
3410 check_defer = false;
3411 }
3412 else
3413 defer = false;
3414
3415 /* While it is possible to validate whether or not the exact form that
3416 we've recognized is available in the backend, the assumption is that
3417 if the deferring logic above did not trigger, the transformation is
3418 never a loss. For instance, suppose the target only has the plain FMA
3419 pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3420 MUL+SUB for FMA+NEG, which is still two operations. Consider
3421 -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3422 form the two NEGs are independent and could be run in parallel. */
3423 }
3424
3425 if (defer)
3426 {
3427 fma_transformation_info fti;
3428 fti.mul_stmt = mul_stmt;
3429 fti.mul_result = mul_result;
3430 fti.op1 = op1;
3431 fti.op2 = op2;
3432 state->m_candidates.safe_push (fti);
3433 state->m_mul_result_set.add (mul_result);
3434
3435 if (dump_file && (dump_flags & TDF_DETAILS))
3436 {
3437 fprintf (dump_file, "Deferred generating FMA for multiplication ");
3438 print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3439 fprintf (dump_file, "\n");
3440 }
3441
3442 return false;
3443 }
3444 else
3445 {
3446 if (state->m_deferring_p)
3447 cancel_fma_deferring (state);
3448 convert_mult_to_fma_1 (mul_result, op1, op2);
3449 return true;
3450 }
3451 }
3452
3453
3454 /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
3455 a check for non-zero like:
3456 _1 = x_4(D) * y_5(D);
3457 *res_7(D) = _1;
3458 if (x_4(D) != 0)
3459 goto <bb 3>; [50.00%]
3460 else
3461 goto <bb 4>; [50.00%]
3462
3463 <bb 3> [local count: 536870913]:
3464 _2 = _1 / x_4(D);
3465 _9 = _2 != y_5(D);
3466 _10 = (int) _9;
3467
3468 <bb 4> [local count: 1073741824]:
3469 # iftmp.0_3 = PHI <_10(3), 0(2)>
3470 then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3471 optimize the x_4(D) != 0 condition to 1. */
3472
3473 static void
3474 maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3475 gimple *div_stmt, bool *cfg_changed)
3476 {
3477 basic_block bb = gimple_bb (cond_stmt);
3478 if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3479 return;
3480 edge pred_edge = single_pred_edge (bb);
3481 basic_block pred_bb = pred_edge->src;
3482 if (EDGE_COUNT (pred_bb->succs) != 2)
3483 return;
3484 edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3485 edge other_succ_edge = NULL;
3486 if (gimple_code (cond_stmt) == GIMPLE_COND)
3487 {
3488 if (EDGE_COUNT (bb->succs) != 2)
3489 return;
3490 other_succ_edge = EDGE_SUCC (bb, 0);
3491 if (gimple_cond_code (cond_stmt) == NE_EXPR)
3492 {
3493 if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3494 other_succ_edge = EDGE_SUCC (bb, 1);
3495 }
3496 else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3497 other_succ_edge = EDGE_SUCC (bb, 0);
3498 if (other_edge->dest != other_succ_edge->dest)
3499 return;
3500 }
3501 else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3502 return;
3503 gimple *zero_cond = last_stmt (pred_bb);
3504 if (zero_cond == NULL
3505 || gimple_code (zero_cond) != GIMPLE_COND
3506 || (gimple_cond_code (zero_cond)
3507 != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3508 || !integer_zerop (gimple_cond_rhs (zero_cond)))
3509 return;
3510 tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3511 if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3512 return;
3513 if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3514 {
3515 /* Allow the divisor to be result of a same precision cast
3516 from zero_cond_lhs. */
3517 tree rhs2 = gimple_assign_rhs2 (div_stmt);
3518 if (TREE_CODE (rhs2) != SSA_NAME)
3519 return;
3520 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3521 if (!gimple_assign_cast_p (g)
3522 || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3523 || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3524 || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3525 != TYPE_PRECISION (TREE_TYPE (rhs2))))
3526 return;
3527 }
3528 gimple_stmt_iterator gsi = gsi_after_labels (bb);
3529 mul_stmts.quick_push (div_stmt);
3530 if (is_gimple_debug (gsi_stmt (gsi)))
3531 gsi_next_nondebug (&gsi);
3532 unsigned cast_count = 0;
3533 while (gsi_stmt (gsi) != cond_stmt)
3534 {
3535 /* If original mul_stmt has a single use, allow it in the same bb,
3536 we are looking then just at __builtin_mul_overflow_p.
3537 Though, in that case the original mul_stmt will be replaced
3538 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */
3539 gimple *mul_stmt;
3540 unsigned int i;
3541 bool ok = false;
3542 FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3543 {
3544 if (gsi_stmt (gsi) == mul_stmt)
3545 {
3546 ok = true;
3547 break;
3548 }
3549 }
3550 if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3551 ok = true;
3552 if (!ok)
3553 return;
3554 gsi_next_nondebug (&gsi);
3555 }
3556 if (gimple_code (cond_stmt) == GIMPLE_COND)
3557 {
3558 basic_block succ_bb = other_edge->dest;
3559 for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3560 gsi_next (&gpi))
3561 {
3562 gphi *phi = gpi.phi ();
3563 tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3564 tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3565 if (!operand_equal_p (v1, v2, 0))
3566 return;
3567 }
3568 }
3569 else
3570 {
3571 tree lhs = gimple_assign_lhs (cond_stmt);
3572 if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3573 return;
3574 gsi_next_nondebug (&gsi);
3575 if (!gsi_end_p (gsi))
3576 {
3577 if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3578 return;
3579 gimple *cast_stmt = gsi_stmt (gsi);
3580 if (!gimple_assign_cast_p (cast_stmt))
3581 return;
3582 tree new_lhs = gimple_assign_lhs (cast_stmt);
3583 gsi_next_nondebug (&gsi);
3584 if (!gsi_end_p (gsi)
3585 || !new_lhs
3586 || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3587 || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3588 return;
3589 lhs = new_lhs;
3590 }
3591 edge succ_edge = single_succ_edge (bb);
3592 basic_block succ_bb = succ_edge->dest;
3593 gsi = gsi_start_phis (succ_bb);
3594 if (gsi_end_p (gsi))
3595 return;
3596 gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3597 gsi_next (&gsi);
3598 if (!gsi_end_p (gsi))
3599 return;
3600 if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3601 return;
3602 tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3603 if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3604 {
3605 tree cond = gimple_assign_rhs1 (cond_stmt);
3606 if (TREE_CODE (cond) == NE_EXPR)
3607 {
3608 if (!operand_equal_p (other_val,
3609 gimple_assign_rhs3 (cond_stmt), 0))
3610 return;
3611 }
3612 else if (!operand_equal_p (other_val,
3613 gimple_assign_rhs2 (cond_stmt), 0))
3614 return;
3615 }
3616 else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3617 {
3618 if (!integer_zerop (other_val))
3619 return;
3620 }
3621 else if (!integer_onep (other_val))
3622 return;
3623 }
3624 gcond *zero_gcond = as_a <gcond *> (zero_cond);
3625 if (pred_edge->flags & EDGE_TRUE_VALUE)
3626 gimple_cond_make_true (zero_gcond);
3627 else
3628 gimple_cond_make_false (zero_gcond);
3629 update_stmt (zero_cond);
3630 *cfg_changed = true;
3631 }
3632
3633 /* Helper function for arith_overflow_check_p. Return true
3634 if VAL1 is equal to VAL2 cast to corresponding integral type
3635 with other signedness or vice versa. */
3636
3637 static bool
3638 arith_cast_equal_p (tree val1, tree val2)
3639 {
3640 if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3641 return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3642 else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3643 return false;
3644 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3645 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3646 return true;
3647 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3648 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3649 return true;
3650 return false;
3651 }
3652
3653 /* Helper function of match_arith_overflow. Return 1
3654 if USE_STMT is unsigned overflow check ovf != 0 for
3655 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3656 and 0 otherwise. */
3657
3658 static int
3659 arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3660 tree maxval, tree *other)
3661 {
3662 enum tree_code ccode = ERROR_MARK;
3663 tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3664 enum tree_code code = gimple_assign_rhs_code (stmt);
3665 tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3666 tree rhs1 = gimple_assign_rhs1 (stmt);
3667 tree rhs2 = gimple_assign_rhs2 (stmt);
3668 tree multop = NULL_TREE, divlhs = NULL_TREE;
3669
3670 if (code == MULT_EXPR)
3671 {
3672 if (!is_gimple_assign (use_stmt))
3673 return 0;
3674 if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3675 return 0;
3676 if (gimple_assign_rhs1 (use_stmt) != lhs)
3677 return 0;
3678 if (cast_stmt)
3679 {
3680 if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3681 multop = rhs2;
3682 else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3683 multop = rhs1;
3684 else
3685 return 0;
3686 }
3687 else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3688 multop = rhs2;
3689 else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3690 multop = rhs1;
3691 else
3692 return 0;
3693 if (stmt_ends_bb_p (use_stmt))
3694 return 0;
3695 divlhs = gimple_assign_lhs (use_stmt);
3696 if (!divlhs)
3697 return 0;
3698 use_operand_p use;
3699 if (!single_imm_use (divlhs, &use, &use_stmt))
3700 return 0;
3701 }
3702 if (gimple_code (use_stmt) == GIMPLE_COND)
3703 {
3704 ccode = gimple_cond_code (use_stmt);
3705 crhs1 = gimple_cond_lhs (use_stmt);
3706 crhs2 = gimple_cond_rhs (use_stmt);
3707 }
3708 else if (is_gimple_assign (use_stmt))
3709 {
3710 if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3711 {
3712 ccode = gimple_assign_rhs_code (use_stmt);
3713 crhs1 = gimple_assign_rhs1 (use_stmt);
3714 crhs2 = gimple_assign_rhs2 (use_stmt);
3715 }
3716 else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
3717 {
3718 tree cond = gimple_assign_rhs1 (use_stmt);
3719 if (COMPARISON_CLASS_P (cond))
3720 {
3721 ccode = TREE_CODE (cond);
3722 crhs1 = TREE_OPERAND (cond, 0);
3723 crhs2 = TREE_OPERAND (cond, 1);
3724 }
3725 else
3726 return 0;
3727 }
3728 else
3729 return 0;
3730 }
3731 else
3732 return 0;
3733
3734 if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3735 return 0;
3736
3737 switch (ccode)
3738 {
3739 case GT_EXPR:
3740 case LE_EXPR:
3741 if (maxval)
3742 {
3743 /* r = a + b; r > maxval or r <= maxval */
3744 if (crhs1 == lhs
3745 && TREE_CODE (crhs2) == INTEGER_CST
3746 && tree_int_cst_equal (crhs2, maxval))
3747 return ccode == GT_EXPR ? 1 : -1;
3748 break;
3749 }
3750 /* r = a - b; r > a or r <= a
3751 r = a + b; a > r or a <= r or b > r or b <= r. */
3752 if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3753 || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3754 && crhs2 == lhs))
3755 return ccode == GT_EXPR ? 1 : -1;
3756 /* r = ~a; b > r or b <= r. */
3757 if (code == BIT_NOT_EXPR && crhs2 == lhs)
3758 {
3759 if (other)
3760 *other = crhs1;
3761 return ccode == GT_EXPR ? 1 : -1;
3762 }
3763 break;
3764 case LT_EXPR:
3765 case GE_EXPR:
3766 if (maxval)
3767 break;
3768 /* r = a - b; a < r or a >= r
3769 r = a + b; r < a or r >= a or r < b or r >= b. */
3770 if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3771 || (code == PLUS_EXPR && crhs1 == lhs
3772 && (crhs2 == rhs1 || crhs2 == rhs2)))
3773 return ccode == LT_EXPR ? 1 : -1;
3774 /* r = ~a; r < b or r >= b. */
3775 if (code == BIT_NOT_EXPR && crhs1 == lhs)
3776 {
3777 if (other)
3778 *other = crhs2;
3779 return ccode == LT_EXPR ? 1 : -1;
3780 }
3781 break;
3782 case EQ_EXPR:
3783 case NE_EXPR:
3784 /* r = a * b; _1 = r / a; _1 == b
3785 r = a * b; _1 = r / b; _1 == a
3786 r = a * b; _1 = r / a; _1 != b
3787 r = a * b; _1 = r / b; _1 != a. */
3788 if (code == MULT_EXPR)
3789 {
3790 if (cast_stmt)
3791 {
3792 if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
3793 || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
3794 return ccode == NE_EXPR ? 1 : -1;
3795 }
3796 else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
3797 || (crhs2 == divlhs && crhs1 == multop))
3798 return ccode == NE_EXPR ? 1 : -1;
3799 }
3800 break;
3801 default:
3802 break;
3803 }
3804 return 0;
3805 }
3806
3807 /* Recognize for unsigned x
3808 x = y - z;
3809 if (x > y)
3810 where there are other uses of x and replace it with
3811 _7 = .SUB_OVERFLOW (y, z);
3812 x = REALPART_EXPR <_7>;
3813 _8 = IMAGPART_EXPR <_7>;
3814 if (_8)
3815 and similarly for addition.
3816
3817 Also recognize:
3818 yc = (type) y;
3819 zc = (type) z;
3820 x = yc + zc;
3821 if (x > max)
3822 where y and z have unsigned types with maximum max
3823 and there are other uses of x and all of those cast x
3824 back to that unsigned type and again replace it with
3825 _7 = .ADD_OVERFLOW (y, z);
3826 _9 = REALPART_EXPR <_7>;
3827 _8 = IMAGPART_EXPR <_7>;
3828 if (_8)
3829 and replace (utype) x with _9.
3830
3831 Also recognize:
3832 x = ~z;
3833 if (y > x)
3834 and replace it with
3835 _7 = .ADD_OVERFLOW (y, z);
3836 _8 = IMAGPART_EXPR <_7>;
3837 if (_8)
3838
3839 And also recognize:
3840 z = x * y;
3841 if (x != 0)
3842 goto <bb 3>; [50.00%]
3843 else
3844 goto <bb 4>; [50.00%]
3845
3846 <bb 3> [local count: 536870913]:
3847 _2 = z / x;
3848 _9 = _2 != y;
3849 _10 = (int) _9;
3850
3851 <bb 4> [local count: 1073741824]:
3852 # iftmp.0_3 = PHI <_10(3), 0(2)>
3853 and replace it with
3854 _7 = .MUL_OVERFLOW (x, y);
3855 z = IMAGPART_EXPR <_7>;
3856 _8 = IMAGPART_EXPR <_7>;
3857 _9 = _8 != 0;
3858 iftmp.0_3 = (int) _9; */
3859
3860 static bool
3861 match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3862 enum tree_code code, bool *cfg_changed)
3863 {
3864 tree lhs = gimple_assign_lhs (stmt);
3865 tree type = TREE_TYPE (lhs);
3866 use_operand_p use_p;
3867 imm_use_iterator iter;
3868 bool use_seen = false;
3869 bool ovf_use_seen = false;
3870 gimple *use_stmt;
3871 gimple *add_stmt = NULL;
3872 bool add_first = false;
3873 gimple *cond_stmt = NULL;
3874 gimple *cast_stmt = NULL;
3875 tree cast_lhs = NULL_TREE;
3876
3877 gcc_checking_assert (code == PLUS_EXPR
3878 || code == MINUS_EXPR
3879 || code == MULT_EXPR
3880 || code == BIT_NOT_EXPR);
3881 if (!INTEGRAL_TYPE_P (type)
3882 || !TYPE_UNSIGNED (type)
3883 || has_zero_uses (lhs)
3884 || (code != PLUS_EXPR
3885 && code != MULT_EXPR
3886 && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
3887 TYPE_MODE (type)) == CODE_FOR_nothing))
3888 return false;
3889
3890 tree rhs1 = gimple_assign_rhs1 (stmt);
3891 tree rhs2 = gimple_assign_rhs2 (stmt);
3892 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3893 {
3894 use_stmt = USE_STMT (use_p);
3895 if (is_gimple_debug (use_stmt))
3896 continue;
3897
3898 tree other = NULL_TREE;
3899 if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
3900 {
3901 if (code == BIT_NOT_EXPR)
3902 {
3903 gcc_assert (other);
3904 if (TREE_CODE (other) != SSA_NAME)
3905 return false;
3906 if (rhs2 == NULL)
3907 rhs2 = other;
3908 else
3909 return false;
3910 cond_stmt = use_stmt;
3911 }
3912 ovf_use_seen = true;
3913 }
3914 else
3915 {
3916 use_seen = true;
3917 if (code == MULT_EXPR
3918 && cast_stmt == NULL
3919 && gimple_assign_cast_p (use_stmt))
3920 {
3921 cast_lhs = gimple_assign_lhs (use_stmt);
3922 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3923 && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3924 && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3925 == TYPE_PRECISION (TREE_TYPE (lhs))))
3926 cast_stmt = use_stmt;
3927 else
3928 cast_lhs = NULL_TREE;
3929 }
3930 }
3931 if (ovf_use_seen && use_seen)
3932 break;
3933 }
3934
3935 if (!ovf_use_seen
3936 && code == MULT_EXPR
3937 && cast_stmt)
3938 {
3939 if (TREE_CODE (rhs1) != SSA_NAME
3940 || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
3941 return false;
3942 FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
3943 {
3944 use_stmt = USE_STMT (use_p);
3945 if (is_gimple_debug (use_stmt))
3946 continue;
3947
3948 if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
3949 NULL_TREE, NULL))
3950 ovf_use_seen = true;
3951 }
3952 }
3953 else
3954 {
3955 cast_stmt = NULL;
3956 cast_lhs = NULL_TREE;
3957 }
3958
3959 tree maxval = NULL_TREE;
3960 if (!ovf_use_seen
3961 || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
3962 || (code == PLUS_EXPR
3963 && optab_handler (uaddv4_optab,
3964 TYPE_MODE (type)) == CODE_FOR_nothing)
3965 || (code == MULT_EXPR
3966 && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
3967 TYPE_MODE (type)) == CODE_FOR_nothing))
3968 {
3969 if (code != PLUS_EXPR)
3970 return false;
3971 if (TREE_CODE (rhs1) != SSA_NAME
3972 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
3973 return false;
3974 rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
3975 tree type1 = TREE_TYPE (rhs1);
3976 if (!INTEGRAL_TYPE_P (type1)
3977 || !TYPE_UNSIGNED (type1)
3978 || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
3979 || (TYPE_PRECISION (type1)
3980 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
3981 return false;
3982 if (TREE_CODE (rhs2) == INTEGER_CST)
3983 {
3984 if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
3985 TYPE_PRECISION (type1),
3986 UNSIGNED), 0))
3987 return false;
3988 rhs2 = fold_convert (type1, rhs2);
3989 }
3990 else
3991 {
3992 if (TREE_CODE (rhs2) != SSA_NAME
3993 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
3994 return false;
3995 rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
3996 tree type2 = TREE_TYPE (rhs2);
3997 if (!INTEGRAL_TYPE_P (type2)
3998 || !TYPE_UNSIGNED (type2)
3999 || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4000 || (TYPE_PRECISION (type2)
4001 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4002 return false;
4003 }
4004 if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4005 type = type1;
4006 else
4007 type = TREE_TYPE (rhs2);
4008
4009 if (TREE_CODE (type) != INTEGER_TYPE
4010 || optab_handler (uaddv4_optab,
4011 TYPE_MODE (type)) == CODE_FOR_nothing)
4012 return false;
4013
4014 maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4015 UNSIGNED));
4016 ovf_use_seen = false;
4017 use_seen = false;
4018 basic_block use_bb = NULL;
4019 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4020 {
4021 use_stmt = USE_STMT (use_p);
4022 if (is_gimple_debug (use_stmt))
4023 continue;
4024
4025 if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4026 {
4027 ovf_use_seen = true;
4028 use_bb = gimple_bb (use_stmt);
4029 }
4030 else
4031 {
4032 if (!gimple_assign_cast_p (use_stmt)
4033 || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4034 return false;
4035 tree use_lhs = gimple_assign_lhs (use_stmt);
4036 if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4037 || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4038 > TYPE_PRECISION (type)))
4039 return false;
4040 use_seen = true;
4041 }
4042 }
4043 if (!ovf_use_seen)
4044 return false;
4045 if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4046 {
4047 if (!use_seen)
4048 return false;
4049 tree new_rhs1 = make_ssa_name (type);
4050 gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4051 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4052 rhs1 = new_rhs1;
4053 }
4054 else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4055 {
4056 if (!use_seen)
4057 return false;
4058 tree new_rhs2 = make_ssa_name (type);
4059 gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4060 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4061 rhs2 = new_rhs2;
4062 }
4063 else if (!use_seen)
4064 {
4065 /* If there are no uses of the wider addition, check if
4066 forwprop has not created a narrower addition.
4067 Require it to be in the same bb as the overflow check. */
4068 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4069 {
4070 use_stmt = USE_STMT (use_p);
4071 if (is_gimple_debug (use_stmt))
4072 continue;
4073
4074 if (use_stmt == stmt)
4075 continue;
4076
4077 if (!is_gimple_assign (use_stmt)
4078 || gimple_bb (use_stmt) != use_bb
4079 || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4080 continue;
4081
4082 if (gimple_assign_rhs1 (use_stmt) == rhs1)
4083 {
4084 if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4085 rhs2, 0))
4086 continue;
4087 }
4088 else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4089 {
4090 if (gimple_assign_rhs1 (use_stmt) != rhs2)
4091 continue;
4092 }
4093 else
4094 continue;
4095
4096 add_stmt = use_stmt;
4097 break;
4098 }
4099 if (add_stmt == NULL)
4100 return false;
4101
4102 /* If stmt and add_stmt are in the same bb, we need to find out
4103 which one is earlier. If they are in different bbs, we've
4104 checked add_stmt is in the same bb as one of the uses of the
4105 stmt lhs, so stmt needs to dominate add_stmt too. */
4106 if (gimple_bb (stmt) == gimple_bb (add_stmt))
4107 {
4108 gimple_stmt_iterator gsif = *gsi;
4109 gimple_stmt_iterator gsib = *gsi;
4110 int i;
4111 /* Search both forward and backward from stmt and have a small
4112 upper bound. */
4113 for (i = 0; i < 128; i++)
4114 {
4115 if (!gsi_end_p (gsib))
4116 {
4117 gsi_prev_nondebug (&gsib);
4118 if (gsi_stmt (gsib) == add_stmt)
4119 {
4120 add_first = true;
4121 break;
4122 }
4123 }
4124 else if (gsi_end_p (gsif))
4125 break;
4126 if (!gsi_end_p (gsif))
4127 {
4128 gsi_next_nondebug (&gsif);
4129 if (gsi_stmt (gsif) == add_stmt)
4130 break;
4131 }
4132 }
4133 if (i == 128)
4134 return false;
4135 if (add_first)
4136 *gsi = gsi_for_stmt (add_stmt);
4137 }
4138 }
4139 }
4140
4141 if (code == BIT_NOT_EXPR)
4142 *gsi = gsi_for_stmt (cond_stmt);
4143
4144 auto_vec<gimple *, 8> mul_stmts;
4145 if (code == MULT_EXPR && cast_stmt)
4146 {
4147 type = TREE_TYPE (cast_lhs);
4148 gimple *g = SSA_NAME_DEF_STMT (rhs1);
4149 if (gimple_assign_cast_p (g)
4150 && useless_type_conversion_p (type,
4151 TREE_TYPE (gimple_assign_rhs1 (g)))
4152 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4153 rhs1 = gimple_assign_rhs1 (g);
4154 else
4155 {
4156 g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4157 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4158 rhs1 = gimple_assign_lhs (g);
4159 mul_stmts.quick_push (g);
4160 }
4161 if (TREE_CODE (rhs2) == INTEGER_CST)
4162 rhs2 = fold_convert (type, rhs2);
4163 else
4164 {
4165 if (gimple_assign_cast_p (g)
4166 && useless_type_conversion_p (type,
4167 TREE_TYPE (gimple_assign_rhs1 (g)))
4168 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4169 rhs2 = gimple_assign_rhs1 (g);
4170 else
4171 {
4172 g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4173 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4174 rhs2 = gimple_assign_lhs (g);
4175 mul_stmts.quick_push (g);
4176 }
4177 }
4178 }
4179 tree ctype = build_complex_type (type);
4180 gcall *g = gimple_build_call_internal (code == MULT_EXPR
4181 ? IFN_MUL_OVERFLOW
4182 : code != MINUS_EXPR
4183 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4184 2, rhs1, rhs2);
4185 tree ctmp = make_ssa_name (ctype);
4186 gimple_call_set_lhs (g, ctmp);
4187 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4188 tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4189 gassign *g2;
4190 if (code != BIT_NOT_EXPR)
4191 {
4192 g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4193 build1 (REALPART_EXPR, type, ctmp));
4194 if (maxval || cast_stmt)
4195 {
4196 gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4197 if (add_first)
4198 *gsi = gsi_for_stmt (stmt);
4199 }
4200 else
4201 gsi_replace (gsi, g2, true);
4202 if (code == MULT_EXPR)
4203 {
4204 mul_stmts.quick_push (g);
4205 mul_stmts.quick_push (g2);
4206 if (cast_stmt)
4207 {
4208 g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4209 gsi_replace (gsi, g2, true);
4210 mul_stmts.quick_push (g2);
4211 }
4212 }
4213 }
4214 tree ovf = make_ssa_name (type);
4215 g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4216 build1 (IMAGPART_EXPR, type, ctmp));
4217 if (code != BIT_NOT_EXPR)
4218 gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4219 else
4220 gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4221 if (code == MULT_EXPR)
4222 mul_stmts.quick_push (g2);
4223
4224 FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4225 {
4226 if (is_gimple_debug (use_stmt))
4227 continue;
4228
4229 gimple *orig_use_stmt = use_stmt;
4230 int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4231 maxval, NULL);
4232 if (ovf_use == 0)
4233 {
4234 gcc_assert (code != BIT_NOT_EXPR);
4235 if (maxval)
4236 {
4237 tree use_lhs = gimple_assign_lhs (use_stmt);
4238 gimple_assign_set_rhs1 (use_stmt, new_lhs);
4239 if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4240 TREE_TYPE (new_lhs)))
4241 gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
4242 update_stmt (use_stmt);
4243 }
4244 continue;
4245 }
4246 if (gimple_code (use_stmt) == GIMPLE_COND)
4247 {
4248 gcond *cond_stmt = as_a <gcond *> (use_stmt);
4249 gimple_cond_set_lhs (cond_stmt, ovf);
4250 gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4251 gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4252 }
4253 else
4254 {
4255 gcc_checking_assert (is_gimple_assign (use_stmt));
4256 if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
4257 {
4258 gimple_assign_set_rhs1 (use_stmt, ovf);
4259 gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
4260 gimple_assign_set_rhs_code (use_stmt,
4261 ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4262 }
4263 else
4264 {
4265 gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4266 == COND_EXPR);
4267 tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4268 boolean_type_node, ovf,
4269 build_int_cst (type, 0));
4270 gimple_assign_set_rhs1 (use_stmt, cond);
4271 }
4272 }
4273 update_stmt (use_stmt);
4274 if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4275 {
4276 gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4277 maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
4278 cfg_changed);
4279 gsi_remove (&gsi2, true);
4280 release_ssa_name (gimple_assign_lhs (orig_use_stmt));
4281 }
4282 }
4283 if (maxval)
4284 {
4285 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
4286 gsi_remove (&gsi2, true);
4287 if (add_stmt)
4288 {
4289 gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
4290 new_lhs);
4291 gsi2 = gsi_for_stmt (add_stmt);
4292 gsi_replace (&gsi2, g, true);
4293 }
4294 }
4295 else if (code == BIT_NOT_EXPR)
4296 {
4297 *gsi = gsi_for_stmt (stmt);
4298 gsi_remove (gsi, true);
4299 release_ssa_name (lhs);
4300 return true;
4301 }
4302 return false;
4303 }
4304
4305 /* Return true if target has support for divmod. */
4306
4307 static bool
4308 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
4309 {
4310 /* If target supports hardware divmod insn, use it for divmod. */
4311 if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
4312 return true;
4313
4314 /* Check if libfunc for divmod is available. */
4315 rtx libfunc = optab_libfunc (divmod_optab, mode);
4316 if (libfunc != NULL_RTX)
4317 {
4318 /* If optab_handler exists for div_optab, perhaps in a wider mode,
4319 we don't want to use the libfunc even if it exists for given mode. */
4320 machine_mode div_mode;
4321 FOR_EACH_MODE_FROM (div_mode, mode)
4322 if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
4323 return false;
4324
4325 return targetm.expand_divmod_libfunc != NULL;
4326 }
4327
4328 return false;
4329 }
4330
4331 /* Check if stmt is candidate for divmod transform. */
4332
4333 static bool
4334 divmod_candidate_p (gassign *stmt)
4335 {
4336 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
4337 machine_mode mode = TYPE_MODE (type);
4338 optab divmod_optab, div_optab;
4339
4340 if (TYPE_UNSIGNED (type))
4341 {
4342 divmod_optab = udivmod_optab;
4343 div_optab = udiv_optab;
4344 }
4345 else
4346 {
4347 divmod_optab = sdivmod_optab;
4348 div_optab = sdiv_optab;
4349 }
4350
4351 tree op1 = gimple_assign_rhs1 (stmt);
4352 tree op2 = gimple_assign_rhs2 (stmt);
4353
4354 /* Disable the transform if either is a constant, since division-by-constant
4355 may have specialized expansion. */
4356 if (CONSTANT_CLASS_P (op1))
4357 return false;
4358
4359 if (CONSTANT_CLASS_P (op2))
4360 {
4361 if (integer_pow2p (op2))
4362 return false;
4363
4364 if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
4365 && TYPE_PRECISION (type) <= BITS_PER_WORD)
4366 return false;
4367
4368 /* If the divisor is not power of 2 and the precision wider than
4369 HWI, expand_divmod punts on that, so in that case it is better
4370 to use divmod optab or libfunc. Similarly if choose_multiplier
4371 might need pre/post shifts of BITS_PER_WORD or more. */
4372 }
4373
4374 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
4375 expand using the [su]divv optabs. */
4376 if (TYPE_OVERFLOW_TRAPS (type))
4377 return false;
4378
4379 if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
4380 return false;
4381
4382 return true;
4383 }
4384
4385 /* This function looks for:
4386 t1 = a TRUNC_DIV_EXPR b;
4387 t2 = a TRUNC_MOD_EXPR b;
4388 and transforms it to the following sequence:
4389 complex_tmp = DIVMOD (a, b);
4390 t1 = REALPART_EXPR(a);
4391 t2 = IMAGPART_EXPR(b);
4392 For conditions enabling the transform see divmod_candidate_p().
4393
4394 The pass has three parts:
4395 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4396 other trunc_div_expr and trunc_mod_expr stmts.
4397 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4398 to stmts vector.
4399 3) Insert DIVMOD call just before top_stmt and update entries in
4400 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4401 IMAGPART_EXPR for mod). */
4402
4403 static bool
4404 convert_to_divmod (gassign *stmt)
4405 {
4406 if (stmt_can_throw_internal (cfun, stmt)
4407 || !divmod_candidate_p (stmt))
4408 return false;
4409
4410 tree op1 = gimple_assign_rhs1 (stmt);
4411 tree op2 = gimple_assign_rhs2 (stmt);
4412
4413 imm_use_iterator use_iter;
4414 gimple *use_stmt;
4415 auto_vec<gimple *> stmts;
4416
4417 gimple *top_stmt = stmt;
4418 basic_block top_bb = gimple_bb (stmt);
4419
4420 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4421 at-least stmt and possibly other trunc_div/trunc_mod stmts
4422 having same operands as stmt. */
4423
4424 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
4425 {
4426 if (is_gimple_assign (use_stmt)
4427 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4428 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4429 && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
4430 && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
4431 {
4432 if (stmt_can_throw_internal (cfun, use_stmt))
4433 continue;
4434
4435 basic_block bb = gimple_bb (use_stmt);
4436
4437 if (bb == top_bb)
4438 {
4439 if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
4440 top_stmt = use_stmt;
4441 }
4442 else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
4443 {
4444 top_bb = bb;
4445 top_stmt = use_stmt;
4446 }
4447 }
4448 }
4449
4450 tree top_op1 = gimple_assign_rhs1 (top_stmt);
4451 tree top_op2 = gimple_assign_rhs2 (top_stmt);
4452
4453 stmts.safe_push (top_stmt);
4454 bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
4455
4456 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4457 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4458 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4459 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
4460
4461 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
4462 {
4463 if (is_gimple_assign (use_stmt)
4464 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4465 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4466 && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
4467 && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
4468 {
4469 if (use_stmt == top_stmt
4470 || stmt_can_throw_internal (cfun, use_stmt)
4471 || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
4472 continue;
4473
4474 stmts.safe_push (use_stmt);
4475 if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
4476 div_seen = true;
4477 }
4478 }
4479
4480 if (!div_seen)
4481 return false;
4482
4483 /* Part 3: Create libcall to internal fn DIVMOD:
4484 divmod_tmp = DIVMOD (op1, op2). */
4485
4486 gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
4487 tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
4488 call_stmt, "divmod_tmp");
4489 gimple_call_set_lhs (call_stmt, res);
4490 /* We rejected throwing statements above. */
4491 gimple_call_set_nothrow (call_stmt, true);
4492
4493 /* Insert the call before top_stmt. */
4494 gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
4495 gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
4496
4497 widen_mul_stats.divmod_calls_inserted++;
4498
4499 /* Update all statements in stmts vector:
4500 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4501 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
4502
4503 for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
4504 {
4505 tree new_rhs;
4506
4507 switch (gimple_assign_rhs_code (use_stmt))
4508 {
4509 case TRUNC_DIV_EXPR:
4510 new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
4511 break;
4512
4513 case TRUNC_MOD_EXPR:
4514 new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
4515 break;
4516
4517 default:
4518 gcc_unreachable ();
4519 }
4520
4521 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
4522 gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
4523 update_stmt (use_stmt);
4524 }
4525
4526 return true;
4527 }
4528
4529 /* Find integer multiplications where the operands are extended from
4530 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4531 where appropriate. */
4532
4533 namespace {
4534
4535 const pass_data pass_data_optimize_widening_mul =
4536 {
4537 GIMPLE_PASS, /* type */
4538 "widening_mul", /* name */
4539 OPTGROUP_NONE, /* optinfo_flags */
4540 TV_TREE_WIDEN_MUL, /* tv_id */
4541 PROP_ssa, /* properties_required */
4542 0, /* properties_provided */
4543 0, /* properties_destroyed */
4544 0, /* todo_flags_start */
4545 TODO_update_ssa, /* todo_flags_finish */
4546 };
4547
4548 class pass_optimize_widening_mul : public gimple_opt_pass
4549 {
4550 public:
4551 pass_optimize_widening_mul (gcc::context *ctxt)
4552 : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
4553 {}
4554
4555 /* opt_pass methods: */
4556 virtual bool gate (function *)
4557 {
4558 return flag_expensive_optimizations && optimize;
4559 }
4560
4561 virtual unsigned int execute (function *);
4562
4563 }; // class pass_optimize_widening_mul
4564
4565 /* Walker class to perform the transformation in reverse dominance order. */
4566
4567 class math_opts_dom_walker : public dom_walker
4568 {
4569 public:
4570 /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
4571 if walking modidifes the CFG. */
4572
4573 math_opts_dom_walker (bool *cfg_changed_p)
4574 : dom_walker (CDI_DOMINATORS), m_last_result_set (),
4575 m_cfg_changed_p (cfg_changed_p) {}
4576
4577 /* The actual actions performed in the walk. */
4578
4579 virtual void after_dom_children (basic_block);
4580
4581 /* Set of results of chains of multiply and add statement combinations that
4582 were not transformed into FMAs because of active deferring. */
4583 hash_set<tree> m_last_result_set;
4584
4585 /* Pointer to a flag of the user that needs to be set if CFG has been
4586 modified. */
4587 bool *m_cfg_changed_p;
4588 };
4589
4590 void
4591 math_opts_dom_walker::after_dom_children (basic_block bb)
4592 {
4593 gimple_stmt_iterator gsi;
4594
4595 fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
4596
4597 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
4598 {
4599 gimple *stmt = gsi_stmt (gsi);
4600 enum tree_code code;
4601
4602 if (is_gimple_assign (stmt))
4603 {
4604 code = gimple_assign_rhs_code (stmt);
4605 switch (code)
4606 {
4607 case MULT_EXPR:
4608 if (!convert_mult_to_widen (stmt, &gsi)
4609 && !convert_expand_mult_copysign (stmt, &gsi)
4610 && convert_mult_to_fma (stmt,
4611 gimple_assign_rhs1 (stmt),
4612 gimple_assign_rhs2 (stmt),
4613 &fma_state))
4614 {
4615 gsi_remove (&gsi, true);
4616 release_defs (stmt);
4617 continue;
4618 }
4619 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4620 break;
4621
4622 case PLUS_EXPR:
4623 case MINUS_EXPR:
4624 if (!convert_plusminus_to_widen (&gsi, stmt, code))
4625 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4626 break;
4627
4628 case BIT_NOT_EXPR:
4629 if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
4630 continue;
4631 break;
4632
4633 case TRUNC_MOD_EXPR:
4634 convert_to_divmod (as_a<gassign *> (stmt));
4635 break;
4636
4637 default:;
4638 }
4639 }
4640 else if (is_gimple_call (stmt))
4641 {
4642 switch (gimple_call_combined_fn (stmt))
4643 {
4644 CASE_CFN_POW:
4645 if (gimple_call_lhs (stmt)
4646 && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
4647 && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
4648 &dconst2)
4649 && convert_mult_to_fma (stmt,
4650 gimple_call_arg (stmt, 0),
4651 gimple_call_arg (stmt, 0),
4652 &fma_state))
4653 {
4654 unlink_stmt_vdef (stmt);
4655 if (gsi_remove (&gsi, true)
4656 && gimple_purge_dead_eh_edges (bb))
4657 *m_cfg_changed_p = true;
4658 release_defs (stmt);
4659 continue;
4660 }
4661 break;
4662
4663 case CFN_COND_MUL:
4664 if (convert_mult_to_fma (stmt,
4665 gimple_call_arg (stmt, 1),
4666 gimple_call_arg (stmt, 2),
4667 &fma_state,
4668 gimple_call_arg (stmt, 0)))
4669
4670 {
4671 gsi_remove (&gsi, true);
4672 release_defs (stmt);
4673 continue;
4674 }
4675 break;
4676
4677 case CFN_LAST:
4678 cancel_fma_deferring (&fma_state);
4679 break;
4680
4681 default:
4682 break;
4683 }
4684 }
4685 gsi_next (&gsi);
4686 }
4687 if (fma_state.m_deferring_p
4688 && fma_state.m_initial_phi)
4689 {
4690 gcc_checking_assert (fma_state.m_last_result);
4691 if (!last_fma_candidate_feeds_initial_phi (&fma_state,
4692 &m_last_result_set))
4693 cancel_fma_deferring (&fma_state);
4694 else
4695 m_last_result_set.add (fma_state.m_last_result);
4696 }
4697 }
4698
4699
4700 unsigned int
4701 pass_optimize_widening_mul::execute (function *fun)
4702 {
4703 bool cfg_changed = false;
4704
4705 memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
4706 calculate_dominance_info (CDI_DOMINATORS);
4707 renumber_gimple_stmt_uids (cfun);
4708
4709 math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4710
4711 statistics_counter_event (fun, "widening multiplications inserted",
4712 widen_mul_stats.widen_mults_inserted);
4713 statistics_counter_event (fun, "widening maccs inserted",
4714 widen_mul_stats.maccs_inserted);
4715 statistics_counter_event (fun, "fused multiply-adds inserted",
4716 widen_mul_stats.fmas_inserted);
4717 statistics_counter_event (fun, "divmod calls inserted",
4718 widen_mul_stats.divmod_calls_inserted);
4719
4720 return cfg_changed ? TODO_cleanup_cfg : 0;
4721 }
4722
4723 } // anon namespace
4724
4725 gimple_opt_pass *
4726 make_pass_optimize_widening_mul (gcc::context *ctxt)
4727 {
4728 return new pass_optimize_widening_mul (ctxt);
4729 }