]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-math-opts.c
* testsuite/ext/profile/mh.cc: Add xfail for uclibc.
[thirdparty/gcc.git] / gcc / tree-ssa-math-opts.c
CommitLineData
abacb398 1/* Global, SSA-based optimizations using mathematical identities.
7cf0dbf3 2 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
48e1416a 4
abacb398 5This file is part of GCC.
48e1416a 6
abacb398 7GCC is free software; you can redistribute it and/or modify it
8under the terms of the GNU General Public License as published by the
8c4c00c1 9Free Software Foundation; either version 3, or (at your option) any
abacb398 10later version.
48e1416a 11
abacb398 12GCC is distributed in the hope that it will be useful, but WITHOUT
13ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
48e1416a 16
abacb398 17You should have received a copy of the GNU General Public License
8c4c00c1 18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
abacb398 20
21/* Currently, the only mini-pass in this file tries to CSE reciprocal
22 operations. These are common in sequences such as this one:
23
24 modulus = sqrt(x*x + y*y + z*z);
25 x = x / modulus;
26 y = y / modulus;
27 z = z / modulus;
28
29 that can be optimized to
30
31 modulus = sqrt(x*x + y*y + z*z);
32 rmodulus = 1.0 / modulus;
33 x = x * rmodulus;
34 y = y * rmodulus;
35 z = z * rmodulus;
36
37 We do this for loop invariant divisors, and with this pass whenever
ac70caad 38 we notice that a division has the same divisor multiple times.
39
40 Of course, like in PRE, we don't insert a division if a dominator
41 already has one. However, this cannot be done as an extension of
42 PRE for several reasons.
43
44 First of all, with some experiments it was found out that the
45 transformation is not always useful if there are only two divisions
46 hy the same divisor. This is probably because modern processors
47 can pipeline the divisions; on older, in-order processors it should
48 still be effective to optimize two divisions by the same number.
49 We make this a param, and it shall be called N in the remainder of
50 this comment.
51
52 Second, if trapping math is active, we have less freedom on where
53 to insert divisions: we can only do so in basic blocks that already
54 contain one. (If divisions don't trap, instead, we can insert
55 divisions elsewhere, which will be in blocks that are common dominators
56 of those that have the division).
57
58 We really don't want to compute the reciprocal unless a division will
59 be found. To do this, we won't insert the division in a basic block
60 that has less than N divisions *post-dominating* it.
61
62 The algorithm constructs a subset of the dominator tree, holding the
63 blocks containing the divisions and the common dominators to them,
64 and walk it twice. The first walk is in post-order, and it annotates
65 each block with the number of divisions that post-dominate it: this
66 gives information on where divisions can be inserted profitably.
67 The second walk is in pre-order, and it inserts divisions as explained
68 above, and replaces divisions by multiplications.
69
70 In the best case, the cost of the pass is O(n_statements). In the
71 worst-case, the cost is due to creating the dominator tree subset,
72 with a cost of O(n_basic_blocks ^ 2); however this can only happen
73 for n_statements / n_basic_blocks statements. So, the amortized cost
74 of creating the dominator tree subset is O(n_basic_blocks) and the
75 worst-case cost of the pass is O(n_statements * n_basic_blocks).
76
77 More practically, the cost will be small because there are few
78 divisions, and they tend to be in the same basic block, so insert_bb
79 is called very few times.
80
81 If we did this using domwalk.c, an efficient implementation would have
82 to work on all the variables in a single pass, because we could not
83 work on just a subset of the dominator tree, as we do now, and the
84 cost would also be something like O(n_statements * n_basic_blocks).
85 The data structures would be more complex in order to work on all the
86 variables in a single pass. */
abacb398 87
88#include "config.h"
89#include "system.h"
90#include "coretypes.h"
91#include "tm.h"
92#include "flags.h"
93#include "tree.h"
94#include "tree-flow.h"
abacb398 95#include "timevar.h"
96#include "tree-pass.h"
ac70caad 97#include "alloc-pool.h"
98#include "basic-block.h"
99#include "target.h"
ce084dfc 100#include "gimple-pretty-print.h"
a7a46268 101
102/* FIXME: RTL headers have to be included here for optabs. */
103#include "rtl.h" /* Because optabs.h wants enum rtx_code. */
104#include "expr.h" /* Because optabs.h wants sepops. */
84cc784c 105#include "optabs.h"
ac70caad 106
107/* This structure represents one basic block that either computes a
108 division, or is a common dominator for basic block that compute a
109 division. */
110struct occurrence {
111 /* The basic block represented by this structure. */
112 basic_block bb;
113
114 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
115 inserted in BB. */
116 tree recip_def;
117
75a70cf9 118 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
ac70caad 119 was inserted in BB. */
75a70cf9 120 gimple recip_def_stmt;
ac70caad 121
122 /* Pointer to a list of "struct occurrence"s for blocks dominated
123 by BB. */
124 struct occurrence *children;
125
126 /* Pointer to the next "struct occurrence"s in the list of blocks
127 sharing a common dominator. */
128 struct occurrence *next;
129
130 /* The number of divisions that are in BB before compute_merit. The
131 number of divisions that are in BB or post-dominate it after
132 compute_merit. */
133 int num_divisions;
134
135 /* True if the basic block has a division, false if it is a common
136 dominator for basic blocks that do. If it is false and trapping
137 math is active, BB is not a candidate for inserting a reciprocal. */
138 bool bb_has_division;
139};
140
141
142/* The instance of "struct occurrence" representing the highest
143 interesting block in the dominator tree. */
144static struct occurrence *occ_head;
145
146/* Allocation pool for getting instances of "struct occurrence". */
147static alloc_pool occ_pool;
148
149
150
151/* Allocate and return a new struct occurrence for basic block BB, and
152 whose children list is headed by CHILDREN. */
153static struct occurrence *
154occ_new (basic_block bb, struct occurrence *children)
abacb398 155{
ac70caad 156 struct occurrence *occ;
157
f0d6e81c 158 bb->aux = occ = (struct occurrence *) pool_alloc (occ_pool);
ac70caad 159 memset (occ, 0, sizeof (struct occurrence));
160
161 occ->bb = bb;
162 occ->children = children;
163 return occ;
abacb398 164}
165
ac70caad 166
167/* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
168 list of "struct occurrence"s, one per basic block, having IDOM as
169 their common dominator.
170
171 We try to insert NEW_OCC as deep as possible in the tree, and we also
172 insert any other block that is a common dominator for BB and one
173 block already in the tree. */
174
175static void
176insert_bb (struct occurrence *new_occ, basic_block idom,
177 struct occurrence **p_head)
9e583fac 178{
ac70caad 179 struct occurrence *occ, **p_occ;
9e583fac 180
ac70caad 181 for (p_occ = p_head; (occ = *p_occ) != NULL; )
182 {
183 basic_block bb = new_occ->bb, occ_bb = occ->bb;
184 basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
185 if (dom == bb)
186 {
187 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
188 from its list. */
189 *p_occ = occ->next;
190 occ->next = new_occ->children;
191 new_occ->children = occ;
192
193 /* Try the next block (it may as well be dominated by BB). */
194 }
195
196 else if (dom == occ_bb)
197 {
198 /* OCC_BB dominates BB. Tail recurse to look deeper. */
199 insert_bb (new_occ, dom, &occ->children);
200 return;
201 }
202
203 else if (dom != idom)
204 {
205 gcc_assert (!dom->aux);
206
207 /* There is a dominator between IDOM and BB, add it and make
208 two children out of NEW_OCC and OCC. First, remove OCC from
209 its list. */
210 *p_occ = occ->next;
211 new_occ->next = occ;
212 occ->next = NULL;
213
214 /* None of the previous blocks has DOM as a dominator: if we tail
215 recursed, we would reexamine them uselessly. Just switch BB with
216 DOM, and go on looking for blocks dominated by DOM. */
217 new_occ = occ_new (dom, new_occ);
218 }
219
220 else
221 {
222 /* Nothing special, go on with the next element. */
223 p_occ = &occ->next;
224 }
225 }
226
227 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
228 new_occ->next = *p_head;
229 *p_head = new_occ;
230}
231
232/* Register that we found a division in BB. */
233
234static inline void
235register_division_in (basic_block bb)
236{
237 struct occurrence *occ;
238
239 occ = (struct occurrence *) bb->aux;
240 if (!occ)
241 {
242 occ = occ_new (bb, NULL);
243 insert_bb (occ, ENTRY_BLOCK_PTR, &occ_head);
244 }
245
246 occ->bb_has_division = true;
247 occ->num_divisions++;
248}
249
250
251/* Compute the number of divisions that postdominate each block in OCC and
252 its children. */
abacb398 253
abacb398 254static void
ac70caad 255compute_merit (struct occurrence *occ)
abacb398 256{
ac70caad 257 struct occurrence *occ_child;
258 basic_block dom = occ->bb;
abacb398 259
ac70caad 260 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
abacb398 261 {
ac70caad 262 basic_block bb;
263 if (occ_child->children)
264 compute_merit (occ_child);
265
266 if (flag_exceptions)
267 bb = single_noncomplex_succ (dom);
268 else
269 bb = dom;
270
271 if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
272 occ->num_divisions += occ_child->num_divisions;
273 }
274}
275
276
277/* Return whether USE_STMT is a floating-point division by DEF. */
278static inline bool
75a70cf9 279is_division_by (gimple use_stmt, tree def)
ac70caad 280{
75a70cf9 281 return is_gimple_assign (use_stmt)
282 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
283 && gimple_assign_rhs2 (use_stmt) == def
119368d7 284 /* Do not recognize x / x as valid division, as we are getting
285 confused later by replacing all immediate uses x in such
286 a stmt. */
75a70cf9 287 && gimple_assign_rhs1 (use_stmt) != def;
ac70caad 288}
289
290/* Walk the subset of the dominator tree rooted at OCC, setting the
291 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
292 the given basic block. The field may be left NULL, of course,
293 if it is not possible or profitable to do the optimization.
294
295 DEF_BSI is an iterator pointing at the statement defining DEF.
296 If RECIP_DEF is set, a dominator already has a computation that can
297 be used. */
298
299static void
75a70cf9 300insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
ac70caad 301 tree def, tree recip_def, int threshold)
302{
75a70cf9 303 tree type;
304 gimple new_stmt;
305 gimple_stmt_iterator gsi;
ac70caad 306 struct occurrence *occ_child;
307
308 if (!recip_def
309 && (occ->bb_has_division || !flag_trapping_math)
310 && occ->num_divisions >= threshold)
311 {
312 /* Make a variable with the replacement and substitute it. */
313 type = TREE_TYPE (def);
314 recip_def = make_rename_temp (type, "reciptmp");
75a70cf9 315 new_stmt = gimple_build_assign_with_ops (RDIV_EXPR, recip_def,
316 build_one_cst (type), def);
48e1416a 317
ac70caad 318 if (occ->bb_has_division)
319 {
320 /* Case 1: insert before an existing division. */
75a70cf9 321 gsi = gsi_after_labels (occ->bb);
322 while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
323 gsi_next (&gsi);
ac70caad 324
75a70cf9 325 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
ac70caad 326 }
75a70cf9 327 else if (def_gsi && occ->bb == def_gsi->bb)
685b24f5 328 {
ac70caad 329 /* Case 2: insert right after the definition. Note that this will
330 never happen if the definition statement can throw, because in
331 that case the sole successor of the statement's basic block will
332 dominate all the uses as well. */
75a70cf9 333 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
685b24f5 334 }
ac70caad 335 else
336 {
337 /* Case 3: insert in a basic block not containing defs/uses. */
75a70cf9 338 gsi = gsi_after_labels (occ->bb);
339 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
ac70caad 340 }
341
342 occ->recip_def_stmt = new_stmt;
abacb398 343 }
344
ac70caad 345 occ->recip_def = recip_def;
346 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
75a70cf9 347 insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
ac70caad 348}
349
350
351/* Replace the division at USE_P with a multiplication by the reciprocal, if
352 possible. */
353
354static inline void
355replace_reciprocal (use_operand_p use_p)
356{
75a70cf9 357 gimple use_stmt = USE_STMT (use_p);
358 basic_block bb = gimple_bb (use_stmt);
ac70caad 359 struct occurrence *occ = (struct occurrence *) bb->aux;
360
0bfd8d5c 361 if (optimize_bb_for_speed_p (bb)
362 && occ->recip_def && use_stmt != occ->recip_def_stmt)
ac70caad 363 {
75a70cf9 364 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
ac70caad 365 SET_USE (use_p, occ->recip_def);
366 fold_stmt_inplace (use_stmt);
367 update_stmt (use_stmt);
368 }
369}
370
371
372/* Free OCC and return one more "struct occurrence" to be freed. */
373
374static struct occurrence *
375free_bb (struct occurrence *occ)
376{
377 struct occurrence *child, *next;
378
379 /* First get the two pointers hanging off OCC. */
380 next = occ->next;
381 child = occ->children;
382 occ->bb->aux = NULL;
383 pool_free (occ_pool, occ);
384
385 /* Now ensure that we don't recurse unless it is necessary. */
386 if (!child)
387 return next;
9e583fac 388 else
ac70caad 389 {
390 while (next)
391 next = free_bb (next);
392
393 return child;
394 }
395}
396
397
398/* Look for floating-point divisions among DEF's uses, and try to
399 replace them by multiplications with the reciprocal. Add
400 as many statements computing the reciprocal as needed.
401
402 DEF must be a GIMPLE register of a floating-point type. */
403
404static void
75a70cf9 405execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
ac70caad 406{
407 use_operand_p use_p;
408 imm_use_iterator use_iter;
409 struct occurrence *occ;
410 int count = 0, threshold;
abacb398 411
ac70caad 412 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
413
414 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
abacb398 415 {
75a70cf9 416 gimple use_stmt = USE_STMT (use_p);
ac70caad 417 if (is_division_by (use_stmt, def))
abacb398 418 {
75a70cf9 419 register_division_in (gimple_bb (use_stmt));
ac70caad 420 count++;
abacb398 421 }
422 }
48e1416a 423
ac70caad 424 /* Do the expensive part only if we can hope to optimize something. */
425 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
426 if (count >= threshold)
427 {
75a70cf9 428 gimple use_stmt;
ac70caad 429 for (occ = occ_head; occ; occ = occ->next)
430 {
431 compute_merit (occ);
75a70cf9 432 insert_reciprocals (def_gsi, occ, def, NULL, threshold);
ac70caad 433 }
434
09aca5bc 435 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
ac70caad 436 {
ac70caad 437 if (is_division_by (use_stmt, def))
09aca5bc 438 {
439 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
440 replace_reciprocal (use_p);
441 }
ac70caad 442 }
443 }
444
445 for (occ = occ_head; occ; )
446 occ = free_bb (occ);
447
448 occ_head = NULL;
abacb398 449}
450
ac70caad 451static bool
452gate_cse_reciprocals (void)
453{
0bfd8d5c 454 return optimize && flag_reciprocal_math;
ac70caad 455}
456
ac70caad 457/* Go through all the floating-point SSA_NAMEs, and call
458 execute_cse_reciprocals_1 on each of them. */
2a1990e9 459static unsigned int
abacb398 460execute_cse_reciprocals (void)
461{
462 basic_block bb;
51b60a11 463 tree arg;
685b24f5 464
ac70caad 465 occ_pool = create_alloc_pool ("dominators for recip",
466 sizeof (struct occurrence),
467 n_basic_blocks / 3 + 1);
685b24f5 468
c136ae61 469 calculate_dominance_info (CDI_DOMINATORS);
470 calculate_dominance_info (CDI_POST_DOMINATORS);
ac70caad 471
472#ifdef ENABLE_CHECKING
473 FOR_EACH_BB (bb)
474 gcc_assert (!bb->aux);
475#endif
476
1767a056 477 for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = DECL_CHAIN (arg))
2d04fd8d 478 if (gimple_default_def (cfun, arg)
ac70caad 479 && FLOAT_TYPE_P (TREE_TYPE (arg))
480 && is_gimple_reg (arg))
2d04fd8d 481 execute_cse_reciprocals_1 (NULL, gimple_default_def (cfun, arg));
51b60a11 482
abacb398 483 FOR_EACH_BB (bb)
484 {
75a70cf9 485 gimple_stmt_iterator gsi;
486 gimple phi;
487 tree def;
abacb398 488
75a70cf9 489 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
abacb398 490 {
75a70cf9 491 phi = gsi_stmt (gsi);
abacb398 492 def = PHI_RESULT (phi);
493 if (FLOAT_TYPE_P (TREE_TYPE (def))
494 && is_gimple_reg (def))
ac70caad 495 execute_cse_reciprocals_1 (NULL, def);
abacb398 496 }
497
75a70cf9 498 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
abacb398 499 {
75a70cf9 500 gimple stmt = gsi_stmt (gsi);
a0315874 501
75a70cf9 502 if (gimple_has_lhs (stmt)
abacb398 503 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
504 && FLOAT_TYPE_P (TREE_TYPE (def))
51b60a11 505 && TREE_CODE (def) == SSA_NAME)
75a70cf9 506 execute_cse_reciprocals_1 (&gsi, def);
abacb398 507 }
e174638f 508
0bfd8d5c 509 if (optimize_bb_for_size_p (bb))
510 continue;
511
e174638f 512 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
75a70cf9 513 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
e174638f 514 {
75a70cf9 515 gimple stmt = gsi_stmt (gsi);
e174638f 516 tree fndecl;
517
75a70cf9 518 if (is_gimple_assign (stmt)
519 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
e174638f 520 {
75a70cf9 521 tree arg1 = gimple_assign_rhs2 (stmt);
522 gimple stmt1;
2cd360b6 523
524 if (TREE_CODE (arg1) != SSA_NAME)
525 continue;
526
527 stmt1 = SSA_NAME_DEF_STMT (arg1);
e174638f 528
75a70cf9 529 if (is_gimple_call (stmt1)
530 && gimple_call_lhs (stmt1)
531 && (fndecl = gimple_call_fndecl (stmt1))
e174638f 532 && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
533 || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD))
534 {
535 enum built_in_function code;
774b1cdd 536 bool md_code, fail;
537 imm_use_iterator ui;
538 use_operand_p use_p;
e174638f 539
540 code = DECL_FUNCTION_CODE (fndecl);
2cd360b6 541 md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD;
542
543 fndecl = targetm.builtin_reciprocal (code, md_code, false);
e174638f 544 if (!fndecl)
545 continue;
546
774b1cdd 547 /* Check that all uses of the SSA name are divisions,
548 otherwise replacing the defining statement will do
549 the wrong thing. */
550 fail = false;
551 FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
552 {
553 gimple stmt2 = USE_STMT (use_p);
554 if (is_gimple_debug (stmt2))
555 continue;
556 if (!is_gimple_assign (stmt2)
557 || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
558 || gimple_assign_rhs1 (stmt2) == arg1
559 || gimple_assign_rhs2 (stmt2) != arg1)
560 {
561 fail = true;
562 break;
563 }
564 }
565 if (fail)
566 continue;
567
5fb3d93f 568 gimple_replace_lhs (stmt1, arg1);
0acacf9e 569 gimple_call_set_fndecl (stmt1, fndecl);
e174638f 570 update_stmt (stmt1);
571
774b1cdd 572 FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
573 {
574 gimple_assign_set_rhs_code (stmt, MULT_EXPR);
575 fold_stmt_inplace (stmt);
576 update_stmt (stmt);
577 }
e174638f 578 }
579 }
580 }
abacb398 581 }
685b24f5 582
c136ae61 583 free_dominance_info (CDI_DOMINATORS);
584 free_dominance_info (CDI_POST_DOMINATORS);
ac70caad 585 free_alloc_pool (occ_pool);
2a1990e9 586 return 0;
abacb398 587}
588
20099e35 589struct gimple_opt_pass pass_cse_reciprocals =
abacb398 590{
20099e35 591 {
592 GIMPLE_PASS,
abacb398 593 "recip", /* name */
594 gate_cse_reciprocals, /* gate */
595 execute_cse_reciprocals, /* execute */
596 NULL, /* sub */
597 NULL, /* next */
598 0, /* static_pass_number */
0b1615c1 599 TV_NONE, /* tv_id */
abacb398 600 PROP_ssa, /* properties_required */
601 0, /* properties_provided */
602 0, /* properties_destroyed */
603 0, /* todo_flags_start */
604 TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
20099e35 605 | TODO_verify_stmts /* todo_flags_finish */
606 }
abacb398 607};
a0315874 608
0d424440 609/* Records an occurrence at statement USE_STMT in the vector of trees
a0315874 610 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
0d424440 611 is not yet initialized. Returns true if the occurrence was pushed on
a0315874 612 the vector. Adjusts *TOP_BB to be the basic block dominating all
613 statements in the vector. */
614
615static bool
75a70cf9 616maybe_record_sincos (VEC(gimple, heap) **stmts,
617 basic_block *top_bb, gimple use_stmt)
a0315874 618{
75a70cf9 619 basic_block use_bb = gimple_bb (use_stmt);
a0315874 620 if (*top_bb
621 && (*top_bb == use_bb
622 || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
75a70cf9 623 VEC_safe_push (gimple, heap, *stmts, use_stmt);
a0315874 624 else if (!*top_bb
625 || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
626 {
75a70cf9 627 VEC_safe_push (gimple, heap, *stmts, use_stmt);
a0315874 628 *top_bb = use_bb;
629 }
630 else
631 return false;
632
633 return true;
634}
635
636/* Look for sin, cos and cexpi calls with the same argument NAME and
637 create a single call to cexpi CSEing the result in this case.
638 We first walk over all immediate uses of the argument collecting
639 statements that we can CSE in a vector and in a second pass replace
640 the statement rhs with a REALPART or IMAGPART expression on the
641 result of the cexpi call we insert before the use statement that
642 dominates all other candidates. */
643
4c80086d 644static bool
a0315874 645execute_cse_sincos_1 (tree name)
646{
75a70cf9 647 gimple_stmt_iterator gsi;
a0315874 648 imm_use_iterator use_iter;
75a70cf9 649 tree fndecl, res, type;
650 gimple def_stmt, use_stmt, stmt;
a0315874 651 int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
75a70cf9 652 VEC(gimple, heap) *stmts = NULL;
a0315874 653 basic_block top_bb = NULL;
654 int i;
4c80086d 655 bool cfg_changed = false;
a0315874 656
657 type = TREE_TYPE (name);
658 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
659 {
75a70cf9 660 if (gimple_code (use_stmt) != GIMPLE_CALL
661 || !gimple_call_lhs (use_stmt)
662 || !(fndecl = gimple_call_fndecl (use_stmt))
a0315874 663 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
664 continue;
665
666 switch (DECL_FUNCTION_CODE (fndecl))
667 {
668 CASE_FLT_FN (BUILT_IN_COS):
669 seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
670 break;
671
672 CASE_FLT_FN (BUILT_IN_SIN):
673 seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
674 break;
675
676 CASE_FLT_FN (BUILT_IN_CEXPI):
677 seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
678 break;
679
680 default:;
681 }
682 }
683
684 if (seen_cos + seen_sin + seen_cexpi <= 1)
685 {
75a70cf9 686 VEC_free(gimple, heap, stmts);
4c80086d 687 return false;
a0315874 688 }
689
690 /* Simply insert cexpi at the beginning of top_bb but not earlier than
691 the name def statement. */
692 fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
693 if (!fndecl)
4c80086d 694 return false;
695 res = create_tmp_reg (TREE_TYPE (TREE_TYPE (fndecl)), "sincostmp");
75a70cf9 696 stmt = gimple_build_call (fndecl, 1, name);
4c80086d 697 res = make_ssa_name (res, stmt);
75a70cf9 698 gimple_call_set_lhs (stmt, res);
699
a0315874 700 def_stmt = SSA_NAME_DEF_STMT (name);
8090c12d 701 if (!SSA_NAME_IS_DEFAULT_DEF (name)
75a70cf9 702 && gimple_code (def_stmt) != GIMPLE_PHI
703 && gimple_bb (def_stmt) == top_bb)
a0315874 704 {
75a70cf9 705 gsi = gsi_for_stmt (def_stmt);
706 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
a0315874 707 }
708 else
709 {
75a70cf9 710 gsi = gsi_after_labels (top_bb);
711 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
a0315874 712 }
713 update_stmt (stmt);
714
715 /* And adjust the recorded old call sites. */
75a70cf9 716 for (i = 0; VEC_iterate(gimple, stmts, i, use_stmt); ++i)
a0315874 717 {
75a70cf9 718 tree rhs = NULL;
719 fndecl = gimple_call_fndecl (use_stmt);
720
a0315874 721 switch (DECL_FUNCTION_CODE (fndecl))
722 {
723 CASE_FLT_FN (BUILT_IN_COS):
75a70cf9 724 rhs = fold_build1 (REALPART_EXPR, type, res);
a0315874 725 break;
726
727 CASE_FLT_FN (BUILT_IN_SIN):
75a70cf9 728 rhs = fold_build1 (IMAGPART_EXPR, type, res);
a0315874 729 break;
730
731 CASE_FLT_FN (BUILT_IN_CEXPI):
75a70cf9 732 rhs = res;
a0315874 733 break;
734
735 default:;
736 gcc_unreachable ();
737 }
738
75a70cf9 739 /* Replace call with a copy. */
740 stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
741
742 gsi = gsi_for_stmt (use_stmt);
4c80086d 743 gsi_replace (&gsi, stmt, true);
744 if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
745 cfg_changed = true;
a0315874 746 }
747
75a70cf9 748 VEC_free(gimple, heap, stmts);
4c80086d 749
750 return cfg_changed;
a0315874 751}
752
753/* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
754 on the SSA_NAME argument of each of them. */
755
756static unsigned int
757execute_cse_sincos (void)
758{
759 basic_block bb;
4c80086d 760 bool cfg_changed = false;
a0315874 761
762 calculate_dominance_info (CDI_DOMINATORS);
763
764 FOR_EACH_BB (bb)
765 {
75a70cf9 766 gimple_stmt_iterator gsi;
a0315874 767
75a70cf9 768 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
a0315874 769 {
75a70cf9 770 gimple stmt = gsi_stmt (gsi);
a0315874 771 tree fndecl;
772
75a70cf9 773 if (is_gimple_call (stmt)
774 && gimple_call_lhs (stmt)
775 && (fndecl = gimple_call_fndecl (stmt))
a0315874 776 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
777 {
778 tree arg;
779
780 switch (DECL_FUNCTION_CODE (fndecl))
781 {
782 CASE_FLT_FN (BUILT_IN_COS):
783 CASE_FLT_FN (BUILT_IN_SIN):
784 CASE_FLT_FN (BUILT_IN_CEXPI):
75a70cf9 785 arg = gimple_call_arg (stmt, 0);
a0315874 786 if (TREE_CODE (arg) == SSA_NAME)
4c80086d 787 cfg_changed |= execute_cse_sincos_1 (arg);
a0315874 788 break;
789
790 default:;
791 }
792 }
793 }
794 }
795
796 free_dominance_info (CDI_DOMINATORS);
4c80086d 797 return cfg_changed ? TODO_cleanup_cfg : 0;
a0315874 798}
799
800static bool
801gate_cse_sincos (void)
802{
803 /* Make sure we have either sincos or cexp. */
804 return (TARGET_HAS_SINCOS
805 || TARGET_C99_FUNCTIONS)
806 && optimize;
807}
808
20099e35 809struct gimple_opt_pass pass_cse_sincos =
a0315874 810{
20099e35 811 {
812 GIMPLE_PASS,
a0315874 813 "sincos", /* name */
814 gate_cse_sincos, /* gate */
815 execute_cse_sincos, /* execute */
816 NULL, /* sub */
817 NULL, /* next */
818 0, /* static_pass_number */
0b1615c1 819 TV_NONE, /* tv_id */
a0315874 820 PROP_ssa, /* properties_required */
821 0, /* properties_provided */
822 0, /* properties_destroyed */
823 0, /* todo_flags_start */
824 TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
20099e35 825 | TODO_verify_stmts /* todo_flags_finish */
826 }
a0315874 827};
e174638f 828
84cc784c 829/* A symbolic number is used to detect byte permutation and selection
830 patterns. Therefore the field N contains an artificial number
831 consisting of byte size markers:
832
833 0 - byte has the value 0
834 1..size - byte contains the content of the byte
835 number indexed with that value minus one */
836
837struct symbolic_number {
838 unsigned HOST_WIDEST_INT n;
839 int size;
840};
841
842/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
843 number N. Return false if the requested operation is not permitted
844 on a symbolic number. */
845
846static inline bool
847do_shift_rotate (enum tree_code code,
848 struct symbolic_number *n,
849 int count)
850{
851 if (count % 8 != 0)
852 return false;
853
854 /* Zero out the extra bits of N in order to avoid them being shifted
855 into the significant bits. */
856 if (n->size < (int)sizeof (HOST_WIDEST_INT))
857 n->n &= ((unsigned HOST_WIDEST_INT)1 << (n->size * BITS_PER_UNIT)) - 1;
858
859 switch (code)
860 {
861 case LSHIFT_EXPR:
862 n->n <<= count;
863 break;
864 case RSHIFT_EXPR:
865 n->n >>= count;
866 break;
867 case LROTATE_EXPR:
868 n->n = (n->n << count) | (n->n >> ((n->size * BITS_PER_UNIT) - count));
869 break;
870 case RROTATE_EXPR:
871 n->n = (n->n >> count) | (n->n << ((n->size * BITS_PER_UNIT) - count));
872 break;
873 default:
874 return false;
875 }
876 return true;
877}
878
879/* Perform sanity checking for the symbolic number N and the gimple
880 statement STMT. */
881
882static inline bool
883verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
884{
885 tree lhs_type;
886
887 lhs_type = gimple_expr_type (stmt);
888
889 if (TREE_CODE (lhs_type) != INTEGER_TYPE)
890 return false;
891
892 if (TYPE_PRECISION (lhs_type) != n->size * BITS_PER_UNIT)
893 return false;
894
895 return true;
896}
897
898/* find_bswap_1 invokes itself recursively with N and tries to perform
899 the operation given by the rhs of STMT on the result. If the
900 operation could successfully be executed the function returns the
901 tree expression of the source operand and NULL otherwise. */
902
903static tree
904find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
905{
906 enum tree_code code;
907 tree rhs1, rhs2 = NULL;
908 gimple rhs1_stmt, rhs2_stmt;
909 tree source_expr1;
910 enum gimple_rhs_class rhs_class;
911
912 if (!limit || !is_gimple_assign (stmt))
913 return NULL_TREE;
914
915 rhs1 = gimple_assign_rhs1 (stmt);
916
917 if (TREE_CODE (rhs1) != SSA_NAME)
918 return NULL_TREE;
919
920 code = gimple_assign_rhs_code (stmt);
921 rhs_class = gimple_assign_rhs_class (stmt);
922 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
923
924 if (rhs_class == GIMPLE_BINARY_RHS)
925 rhs2 = gimple_assign_rhs2 (stmt);
926
927 /* Handle unary rhs and binary rhs with integer constants as second
928 operand. */
929
930 if (rhs_class == GIMPLE_UNARY_RHS
931 || (rhs_class == GIMPLE_BINARY_RHS
932 && TREE_CODE (rhs2) == INTEGER_CST))
933 {
934 if (code != BIT_AND_EXPR
935 && code != LSHIFT_EXPR
936 && code != RSHIFT_EXPR
937 && code != LROTATE_EXPR
938 && code != RROTATE_EXPR
939 && code != NOP_EXPR
940 && code != CONVERT_EXPR)
941 return NULL_TREE;
942
943 source_expr1 = find_bswap_1 (rhs1_stmt, n, limit - 1);
944
945 /* If find_bswap_1 returned NULL STMT is a leaf node and we have
946 to initialize the symbolic number. */
947 if (!source_expr1)
948 {
949 /* Set up the symbolic number N by setting each byte to a
950 value between 1 and the byte size of rhs1. The highest
f9a210c9 951 order byte is set to n->size and the lowest order
952 byte to 1. */
84cc784c 953 n->size = TYPE_PRECISION (TREE_TYPE (rhs1));
954 if (n->size % BITS_PER_UNIT != 0)
955 return NULL_TREE;
956 n->size /= BITS_PER_UNIT;
957 n->n = (sizeof (HOST_WIDEST_INT) < 8 ? 0 :
f9a210c9 958 (unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201);
959
960 if (n->size < (int)sizeof (HOST_WIDEST_INT))
961 n->n &= ((unsigned HOST_WIDEST_INT)1 <<
962 (n->size * BITS_PER_UNIT)) - 1;
84cc784c 963
964 source_expr1 = rhs1;
965 }
966
967 switch (code)
968 {
969 case BIT_AND_EXPR:
970 {
971 int i;
972 unsigned HOST_WIDEST_INT val = widest_int_cst_value (rhs2);
973 unsigned HOST_WIDEST_INT tmp = val;
974
975 /* Only constants masking full bytes are allowed. */
976 for (i = 0; i < n->size; i++, tmp >>= BITS_PER_UNIT)
977 if ((tmp & 0xff) != 0 && (tmp & 0xff) != 0xff)
978 return NULL_TREE;
979
980 n->n &= val;
981 }
982 break;
983 case LSHIFT_EXPR:
984 case RSHIFT_EXPR:
985 case LROTATE_EXPR:
986 case RROTATE_EXPR:
987 if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2)))
988 return NULL_TREE;
989 break;
990 CASE_CONVERT:
991 {
992 int type_size;
993
994 type_size = TYPE_PRECISION (gimple_expr_type (stmt));
995 if (type_size % BITS_PER_UNIT != 0)
996 return NULL_TREE;
997
84cc784c 998 if (type_size / BITS_PER_UNIT < (int)(sizeof (HOST_WIDEST_INT)))
999 {
1000 /* If STMT casts to a smaller type mask out the bits not
1001 belonging to the target type. */
84cc784c 1002 n->n &= ((unsigned HOST_WIDEST_INT)1 << type_size) - 1;
1003 }
f9a210c9 1004 n->size = type_size / BITS_PER_UNIT;
84cc784c 1005 }
1006 break;
1007 default:
1008 return NULL_TREE;
1009 };
1010 return verify_symbolic_number_p (n, stmt) ? source_expr1 : NULL;
1011 }
1012
1013 /* Handle binary rhs. */
1014
1015 if (rhs_class == GIMPLE_BINARY_RHS)
1016 {
1017 struct symbolic_number n1, n2;
1018 tree source_expr2;
1019
1020 if (code != BIT_IOR_EXPR)
1021 return NULL_TREE;
1022
1023 if (TREE_CODE (rhs2) != SSA_NAME)
1024 return NULL_TREE;
1025
1026 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
1027
1028 switch (code)
1029 {
1030 case BIT_IOR_EXPR:
1031 source_expr1 = find_bswap_1 (rhs1_stmt, &n1, limit - 1);
1032
1033 if (!source_expr1)
1034 return NULL_TREE;
1035
1036 source_expr2 = find_bswap_1 (rhs2_stmt, &n2, limit - 1);
1037
1038 if (source_expr1 != source_expr2
1039 || n1.size != n2.size)
1040 return NULL_TREE;
1041
1042 n->size = n1.size;
1043 n->n = n1.n | n2.n;
1044
1045 if (!verify_symbolic_number_p (n, stmt))
1046 return NULL_TREE;
1047
1048 break;
1049 default:
1050 return NULL_TREE;
1051 }
1052 return source_expr1;
1053 }
1054 return NULL_TREE;
1055}
1056
1057/* Check if STMT completes a bswap implementation consisting of ORs,
1058 SHIFTs and ANDs. Return the source tree expression on which the
1059 byte swap is performed and NULL if no bswap was found. */
1060
1061static tree
1062find_bswap (gimple stmt)
1063{
1064/* The number which the find_bswap result should match in order to
f9a210c9 1065 have a full byte swap. The number is shifted to the left according
1066 to the size of the symbolic number before using it. */
84cc784c 1067 unsigned HOST_WIDEST_INT cmp =
1068 sizeof (HOST_WIDEST_INT) < 8 ? 0 :
f9a210c9 1069 (unsigned HOST_WIDEST_INT)0x01020304 << 32 | 0x05060708;
84cc784c 1070
1071 struct symbolic_number n;
1072 tree source_expr;
1073
9bc1852a 1074 /* The last parameter determines the depth search limit. It usually
1075 correlates directly to the number of bytes to be touched. We
1076 increase that number by one here in order to also cover signed ->
1077 unsigned conversions of the src operand as can be seen in
1078 libgcc. */
84cc784c 1079 source_expr = find_bswap_1 (stmt, &n,
1080 TREE_INT_CST_LOW (
9bc1852a 1081 TYPE_SIZE_UNIT (gimple_expr_type (stmt))) + 1);
84cc784c 1082
1083 if (!source_expr)
1084 return NULL_TREE;
1085
1086 /* Zero out the extra bits of N and CMP. */
1087 if (n.size < (int)sizeof (HOST_WIDEST_INT))
1088 {
1089 unsigned HOST_WIDEST_INT mask =
1090 ((unsigned HOST_WIDEST_INT)1 << (n.size * BITS_PER_UNIT)) - 1;
1091
1092 n.n &= mask;
f9a210c9 1093 cmp >>= (sizeof (HOST_WIDEST_INT) - n.size) * BITS_PER_UNIT;
84cc784c 1094 }
1095
1096 /* A complete byte swap should make the symbolic number to start
1097 with the largest digit in the highest order byte. */
1098 if (cmp != n.n)
1099 return NULL_TREE;
1100
1101 return source_expr;
1102}
1103
1104/* Find manual byte swap implementations and turn them into a bswap
1105 builtin invokation. */
1106
1107static unsigned int
1108execute_optimize_bswap (void)
1109{
1110 basic_block bb;
1111 bool bswap32_p, bswap64_p;
1112 bool changed = false;
0af25806 1113 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
84cc784c 1114
1115 if (BITS_PER_UNIT != 8)
1116 return 0;
1117
1118 if (sizeof (HOST_WIDEST_INT) < 8)
1119 return 0;
1120
1121 bswap32_p = (built_in_decls[BUILT_IN_BSWAP32]
d6bf3b14 1122 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
84cc784c 1123 bswap64_p = (built_in_decls[BUILT_IN_BSWAP64]
d6bf3b14 1124 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
3328b1fb 1125 || (bswap32_p && word_mode == SImode)));
84cc784c 1126
1127 if (!bswap32_p && !bswap64_p)
1128 return 0;
1129
0af25806 1130 /* Determine the argument type of the builtins. The code later on
1131 assumes that the return and argument type are the same. */
1132 if (bswap32_p)
1133 {
1134 tree fndecl = built_in_decls[BUILT_IN_BSWAP32];
1135 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1136 }
1137
1138 if (bswap64_p)
1139 {
1140 tree fndecl = built_in_decls[BUILT_IN_BSWAP64];
1141 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1142 }
1143
84cc784c 1144 FOR_EACH_BB (bb)
1145 {
1146 gimple_stmt_iterator gsi;
1147
1148 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1149 {
1150 gimple stmt = gsi_stmt (gsi);
0af25806 1151 tree bswap_src, bswap_type;
1152 tree bswap_tmp;
84cc784c 1153 tree fndecl = NULL_TREE;
1154 int type_size;
1155 gimple call;
1156
1157 if (!is_gimple_assign (stmt)
1158 || gimple_assign_rhs_code (stmt) != BIT_IOR_EXPR)
1159 continue;
1160
1161 type_size = TYPE_PRECISION (gimple_expr_type (stmt));
1162
1163 switch (type_size)
1164 {
1165 case 32:
1166 if (bswap32_p)
0af25806 1167 {
1168 fndecl = built_in_decls[BUILT_IN_BSWAP32];
1169 bswap_type = bswap32_type;
1170 }
84cc784c 1171 break;
1172 case 64:
1173 if (bswap64_p)
0af25806 1174 {
1175 fndecl = built_in_decls[BUILT_IN_BSWAP64];
1176 bswap_type = bswap64_type;
1177 }
84cc784c 1178 break;
1179 default:
1180 continue;
1181 }
1182
1183 if (!fndecl)
1184 continue;
1185
1186 bswap_src = find_bswap (stmt);
1187
1188 if (!bswap_src)
1189 continue;
1190
1191 changed = true;
0af25806 1192
1193 bswap_tmp = bswap_src;
1194
1195 /* Convert the src expression if necessary. */
1196 if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
1197 {
1198 gimple convert_stmt;
1199
1200 bswap_tmp = create_tmp_var (bswap_type, "bswapsrc");
1201 add_referenced_var (bswap_tmp);
1202 bswap_tmp = make_ssa_name (bswap_tmp, NULL);
1203
1204 convert_stmt = gimple_build_assign_with_ops (
1205 CONVERT_EXPR, bswap_tmp, bswap_src, NULL);
1206 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1207 }
1208
1209 call = gimple_build_call (fndecl, 1, bswap_tmp);
1210
1211 bswap_tmp = gimple_assign_lhs (stmt);
1212
1213 /* Convert the result if necessary. */
1214 if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
1215 {
1216 gimple convert_stmt;
1217
1218 bswap_tmp = create_tmp_var (bswap_type, "bswapdst");
1219 add_referenced_var (bswap_tmp);
1220 bswap_tmp = make_ssa_name (bswap_tmp, NULL);
1221 convert_stmt = gimple_build_assign_with_ops (
1222 CONVERT_EXPR, gimple_assign_lhs (stmt), bswap_tmp, NULL);
1223 gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1224 }
1225
1226 gimple_call_set_lhs (call, bswap_tmp);
84cc784c 1227
1228 if (dump_file)
1229 {
1230 fprintf (dump_file, "%d bit bswap implementation found at: ",
1231 (int)type_size);
1232 print_gimple_stmt (dump_file, stmt, 0, 0);
1233 }
1234
1235 gsi_insert_after (&gsi, call, GSI_SAME_STMT);
1236 gsi_remove (&gsi, true);
1237 }
1238 }
1239
1240 return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
1241 | TODO_verify_stmts : 0);
1242}
1243
1244static bool
1245gate_optimize_bswap (void)
1246{
1247 return flag_expensive_optimizations && optimize;
1248}
1249
1250struct gimple_opt_pass pass_optimize_bswap =
1251{
1252 {
1253 GIMPLE_PASS,
1254 "bswap", /* name */
1255 gate_optimize_bswap, /* gate */
1256 execute_optimize_bswap, /* execute */
1257 NULL, /* sub */
1258 NULL, /* next */
1259 0, /* static_pass_number */
1260 TV_NONE, /* tv_id */
1261 PROP_ssa, /* properties_required */
1262 0, /* properties_provided */
1263 0, /* properties_destroyed */
1264 0, /* todo_flags_start */
1265 0 /* todo_flags_finish */
1266 }
1267};
62be004c 1268
7e4c867e 1269/* Return true if RHS is a suitable operand for a widening multiplication.
1270 There are two cases:
1271
1272 - RHS makes some value twice as wide. Store that value in *NEW_RHS_OUT
1273 if so, and store its type in *TYPE_OUT.
1274
1275 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
1276 but leave *TYPE_OUT untouched. */
00f4f705 1277
1278static bool
7e4c867e 1279is_widening_mult_rhs_p (tree rhs, tree *type_out, tree *new_rhs_out)
1280{
1281 gimple stmt;
1282 tree type, type1, rhs1;
1283 enum tree_code rhs_code;
1284
1285 if (TREE_CODE (rhs) == SSA_NAME)
1286 {
1287 type = TREE_TYPE (rhs);
1288 stmt = SSA_NAME_DEF_STMT (rhs);
1289 if (!is_gimple_assign (stmt))
1290 return false;
1291
1292 rhs_code = gimple_assign_rhs_code (stmt);
1293 if (TREE_CODE (type) == INTEGER_TYPE
1294 ? !CONVERT_EXPR_CODE_P (rhs_code)
1295 : rhs_code != FIXED_CONVERT_EXPR)
1296 return false;
1297
1298 rhs1 = gimple_assign_rhs1 (stmt);
1299 type1 = TREE_TYPE (rhs1);
1300 if (TREE_CODE (type1) != TREE_CODE (type)
1301 || TYPE_PRECISION (type1) * 2 != TYPE_PRECISION (type))
1302 return false;
1303
1304 *new_rhs_out = rhs1;
1305 *type_out = type1;
1306 return true;
1307 }
1308
1309 if (TREE_CODE (rhs) == INTEGER_CST)
1310 {
1311 *new_rhs_out = rhs;
1312 *type_out = NULL;
1313 return true;
1314 }
1315
1316 return false;
1317}
1318
1319/* Return true if STMT performs a widening multiplication. If so,
1320 store the unwidened types of the operands in *TYPE1_OUT and *TYPE2_OUT
1321 respectively. Also fill *RHS1_OUT and *RHS2_OUT such that converting
1322 those operands to types *TYPE1_OUT and *TYPE2_OUT would give the
1323 operands of the multiplication. */
1324
1325static bool
1326is_widening_mult_p (gimple stmt,
1327 tree *type1_out, tree *rhs1_out,
1328 tree *type2_out, tree *rhs2_out)
00f4f705 1329{
00f4f705 1330 tree type;
1331
1332 type = TREE_TYPE (gimple_assign_lhs (stmt));
7e4c867e 1333 if (TREE_CODE (type) != INTEGER_TYPE
1334 && TREE_CODE (type) != FIXED_POINT_TYPE)
1335 return false;
00f4f705 1336
7e4c867e 1337 if (!is_widening_mult_rhs_p (gimple_assign_rhs1 (stmt), type1_out, rhs1_out))
00f4f705 1338 return false;
1339
7e4c867e 1340 if (!is_widening_mult_rhs_p (gimple_assign_rhs2 (stmt), type2_out, rhs2_out))
1341 return false;
00f4f705 1342
7e4c867e 1343 if (*type1_out == NULL)
00f4f705 1344 {
7e4c867e 1345 if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
00f4f705 1346 return false;
7e4c867e 1347 *type1_out = *type2_out;
00f4f705 1348 }
00f4f705 1349
7e4c867e 1350 if (*type2_out == NULL)
00f4f705 1351 {
7e4c867e 1352 if (!int_fits_type_p (*rhs2_out, *type1_out))
00f4f705 1353 return false;
7e4c867e 1354 *type2_out = *type1_out;
00f4f705 1355 }
00f4f705 1356
7e4c867e 1357 return true;
1358}
00f4f705 1359
7e4c867e 1360/* Process a single gimple statement STMT, which has a MULT_EXPR as
1361 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
1362 value is true iff we converted the statement. */
1363
1364static bool
1365convert_mult_to_widen (gimple stmt)
1366{
1367 tree lhs, rhs1, rhs2, type, type1, type2;
1368 enum insn_code handler;
1369
1370 lhs = gimple_assign_lhs (stmt);
1371 type = TREE_TYPE (lhs);
1372 if (TREE_CODE (type) != INTEGER_TYPE)
00f4f705 1373 return false;
1374
7e4c867e 1375 if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
00f4f705 1376 return false;
1377
7e4c867e 1378 if (TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2))
1379 handler = optab_handler (umul_widen_optab, TYPE_MODE (type));
1380 else if (!TYPE_UNSIGNED (type1) && !TYPE_UNSIGNED (type2))
1381 handler = optab_handler (smul_widen_optab, TYPE_MODE (type));
00f4f705 1382 else
7e4c867e 1383 handler = optab_handler (usmul_widen_optab, TYPE_MODE (type));
1384
1385 if (handler == CODE_FOR_nothing)
1386 return false;
1387
1388 gimple_assign_set_rhs1 (stmt, fold_convert (type1, rhs1));
1389 gimple_assign_set_rhs2 (stmt, fold_convert (type2, rhs2));
00f4f705 1390 gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
1391 update_stmt (stmt);
1392 return true;
1393}
1394
1395/* Process a single gimple statement STMT, which is found at the
1396 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
1397 rhs (given by CODE), and try to convert it into a
1398 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
1399 is true iff we converted the statement. */
1400
1401static bool
1402convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
1403 enum tree_code code)
1404{
1405 gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
7e4c867e 1406 tree type, type1, type2;
00f4f705 1407 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
1408 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
1409 optab this_optab;
1410 enum tree_code wmult_code;
1411
1412 lhs = gimple_assign_lhs (stmt);
1413 type = TREE_TYPE (lhs);
7e4c867e 1414 if (TREE_CODE (type) != INTEGER_TYPE
1415 && TREE_CODE (type) != FIXED_POINT_TYPE)
00f4f705 1416 return false;
1417
1418 if (code == MINUS_EXPR)
1419 wmult_code = WIDEN_MULT_MINUS_EXPR;
1420 else
1421 wmult_code = WIDEN_MULT_PLUS_EXPR;
1422
00f4f705 1423 rhs1 = gimple_assign_rhs1 (stmt);
1424 rhs2 = gimple_assign_rhs2 (stmt);
1425
1426 if (TREE_CODE (rhs1) == SSA_NAME)
1427 {
1428 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
1429 if (is_gimple_assign (rhs1_stmt))
1430 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
1431 }
1432 else
1433 return false;
1434
1435 if (TREE_CODE (rhs2) == SSA_NAME)
1436 {
1437 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
1438 if (is_gimple_assign (rhs2_stmt))
1439 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
1440 }
1441 else
1442 return false;
1443
7e4c867e 1444 if (code == PLUS_EXPR && rhs1_code == MULT_EXPR)
00f4f705 1445 {
7e4c867e 1446 if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
1447 &type2, &mult_rhs2))
00f4f705 1448 return false;
7e4c867e 1449 add_rhs = rhs2;
00f4f705 1450 }
7e4c867e 1451 else if (rhs2_code == MULT_EXPR)
00f4f705 1452 {
815a0224 1453 if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
7e4c867e 1454 &type2, &mult_rhs2))
00f4f705 1455 return false;
7e4c867e 1456 add_rhs = rhs1;
00f4f705 1457 }
7e4c867e 1458 else if (code == PLUS_EXPR && rhs1_code == WIDEN_MULT_EXPR)
00f4f705 1459 {
1460 mult_rhs1 = gimple_assign_rhs1 (rhs1_stmt);
1461 mult_rhs2 = gimple_assign_rhs2 (rhs1_stmt);
815a0224 1462 type1 = TREE_TYPE (mult_rhs1);
1463 type2 = TREE_TYPE (mult_rhs2);
00f4f705 1464 add_rhs = rhs2;
1465 }
1466 else if (rhs2_code == WIDEN_MULT_EXPR)
1467 {
1468 mult_rhs1 = gimple_assign_rhs1 (rhs2_stmt);
1469 mult_rhs2 = gimple_assign_rhs2 (rhs2_stmt);
815a0224 1470 type1 = TREE_TYPE (mult_rhs1);
1471 type2 = TREE_TYPE (mult_rhs2);
00f4f705 1472 add_rhs = rhs1;
1473 }
1474 else
1475 return false;
1476
815a0224 1477 if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
1478 return false;
1479
1480 /* Verify that the machine can perform a widening multiply
1481 accumulate in this mode/signedness combination, otherwise
1482 this transformation is likely to pessimize code. */
1483 this_optab = optab_for_tree_code (wmult_code, type1, optab_default);
1484 if (optab_handler (this_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
1485 return false;
1486
00f4f705 1487 /* ??? May need some type verification here? */
1488
815a0224 1489 gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code,
1490 fold_convert (type1, mult_rhs1),
1491 fold_convert (type2, mult_rhs2),
00f4f705 1492 add_rhs);
1493 update_stmt (gsi_stmt (*gsi));
1494 return true;
1495}
1496
b9be572e 1497/* Combine the multiplication at MUL_STMT with uses in additions and
1498 subtractions to form fused multiply-add operations. Returns true
1499 if successful and MUL_STMT should be removed. */
1500
1501static bool
1502convert_mult_to_fma (gimple mul_stmt)
1503{
1504 tree mul_result = gimple_assign_lhs (mul_stmt);
1505 tree type = TREE_TYPE (mul_result);
44579526 1506 gimple use_stmt, neguse_stmt, fma_stmt;
b9be572e 1507 use_operand_p use_p;
1508 imm_use_iterator imm_iter;
1509
1510 if (FLOAT_TYPE_P (type)
1511 && flag_fp_contract_mode == FP_CONTRACT_OFF)
1512 return false;
1513
1514 /* We don't want to do bitfield reduction ops. */
1515 if (INTEGRAL_TYPE_P (type)
1516 && (TYPE_PRECISION (type)
1517 != GET_MODE_PRECISION (TYPE_MODE (type))))
1518 return false;
1519
1520 /* If the target doesn't support it, don't generate it. We assume that
1521 if fma isn't available then fms, fnma or fnms are not either. */
1522 if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
1523 return false;
1524
1525 /* Make sure that the multiplication statement becomes dead after
1526 the transformation, thus that all uses are transformed to FMAs.
1527 This means we assume that an FMA operation has the same cost
1528 as an addition. */
1529 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
1530 {
1531 enum tree_code use_code;
44579526 1532 tree result = mul_result;
1533 bool negate_p = false;
b9be572e 1534
1535 use_stmt = USE_STMT (use_p);
1536
b9be572e 1537 /* For now restrict this operations to single basic blocks. In theory
1538 we would want to support sinking the multiplication in
1539 m = a*b;
1540 if ()
1541 ma = m + c;
1542 else
1543 d = m;
1544 to form a fma in the then block and sink the multiplication to the
1545 else block. */
1546 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
1547 return false;
1548
44579526 1549 if (!is_gimple_assign (use_stmt))
b9be572e 1550 return false;
1551
44579526 1552 use_code = gimple_assign_rhs_code (use_stmt);
1553
1554 /* A negate on the multiplication leads to FNMA. */
1555 if (use_code == NEGATE_EXPR)
1556 {
1557 result = gimple_assign_lhs (use_stmt);
1558
1559 /* Make sure the negate statement becomes dead with this
1560 single transformation. */
1561 if (!single_imm_use (gimple_assign_lhs (use_stmt),
1562 &use_p, &neguse_stmt))
1563 return false;
1564
1565 /* Re-validate. */
1566 use_stmt = neguse_stmt;
1567 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
1568 return false;
1569 if (!is_gimple_assign (use_stmt))
1570 return false;
1571
1572 use_code = gimple_assign_rhs_code (use_stmt);
1573 negate_p = true;
1574 }
b9be572e 1575
44579526 1576 switch (use_code)
1577 {
1578 case MINUS_EXPR:
8a9d0572 1579 if (gimple_assign_rhs2 (use_stmt) == result)
1580 negate_p = !negate_p;
1581 break;
44579526 1582 case PLUS_EXPR:
44579526 1583 break;
44579526 1584 default:
1585 /* FMA can only be formed from PLUS and MINUS. */
1586 return false;
1587 }
b9be572e 1588
44579526 1589 /* We can't handle a * b + a * b. */
1590 if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
1591 return false;
8a9d0572 1592
1593 /* While it is possible to validate whether or not the exact form
1594 that we've recognized is available in the backend, the assumption
1595 is that the transformation is never a loss. For instance, suppose
1596 the target only has the plain FMA pattern available. Consider
1597 a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
1598 is still two operations. Consider -(a*b)-c -> fma(-a,b,-c): we
1599 still have 3 operations, but in the FMA form the two NEGs are
1600 independant and could be run in parallel. */
b9be572e 1601 }
1602
1603 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
1604 {
b9be572e 1605 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
44579526 1606 enum tree_code use_code = gimple_assign_rhs_code (use_stmt);
1607 tree addop, mulop1, result = mul_result;
1608 bool negate_p = false;
b9be572e 1609
44579526 1610 if (use_code == NEGATE_EXPR)
1611 {
1612 result = gimple_assign_lhs (use_stmt);
1613 single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
1614 gsi_remove (&gsi, true);
1615 release_defs (use_stmt);
1616
1617 use_stmt = neguse_stmt;
1618 gsi = gsi_for_stmt (use_stmt);
1619 use_code = gimple_assign_rhs_code (use_stmt);
1620 negate_p = true;
1621 }
1622
1623 if (gimple_assign_rhs1 (use_stmt) == result)
b9be572e 1624 {
1625 addop = gimple_assign_rhs2 (use_stmt);
1626 /* a * b - c -> a * b + (-c) */
1627 if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
1628 addop = force_gimple_operand_gsi (&gsi,
1629 build1 (NEGATE_EXPR,
1630 type, addop),
1631 true, NULL_TREE, true,
1632 GSI_SAME_STMT);
1633 }
1634 else
1635 {
1636 addop = gimple_assign_rhs1 (use_stmt);
1637 /* a - b * c -> (-b) * c + a */
1638 if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
44579526 1639 negate_p = !negate_p;
b9be572e 1640 }
1641
44579526 1642 mulop1 = gimple_assign_rhs1 (mul_stmt);
1643 if (negate_p)
1644 mulop1 = force_gimple_operand_gsi (&gsi,
1645 build1 (NEGATE_EXPR,
1646 type, mulop1),
1647 true, NULL_TREE, true,
1648 GSI_SAME_STMT);
1649
b9be572e 1650 fma_stmt = gimple_build_assign_with_ops3 (FMA_EXPR,
1651 gimple_assign_lhs (use_stmt),
1652 mulop1,
1653 gimple_assign_rhs2 (mul_stmt),
1654 addop);
1655 gsi_replace (&gsi, fma_stmt, true);
1656 }
1657
1658 return true;
1659}
1660
62be004c 1661/* Find integer multiplications where the operands are extended from
1662 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
1663 where appropriate. */
1664
1665static unsigned int
1666execute_optimize_widening_mul (void)
1667{
62be004c 1668 basic_block bb;
1669
1670 FOR_EACH_BB (bb)
1671 {
1672 gimple_stmt_iterator gsi;
1673
b9be572e 1674 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
62be004c 1675 {
1676 gimple stmt = gsi_stmt (gsi);
00f4f705 1677 enum tree_code code;
62be004c 1678
b9be572e 1679 if (is_gimple_assign (stmt))
1680 {
1681 code = gimple_assign_rhs_code (stmt);
1682 switch (code)
1683 {
1684 case MULT_EXPR:
1685 if (!convert_mult_to_widen (stmt)
1686 && convert_mult_to_fma (stmt))
1687 {
1688 gsi_remove (&gsi, true);
1689 release_defs (stmt);
1690 continue;
1691 }
1692 break;
1693
1694 case PLUS_EXPR:
1695 case MINUS_EXPR:
1696 convert_plusminus_to_widen (&gsi, stmt, code);
1697 break;
62be004c 1698
b9be572e 1699 default:;
1700 }
1701 }
1702 gsi_next (&gsi);
62be004c 1703 }
1704 }
00f4f705 1705
b9be572e 1706 return 0;
62be004c 1707}
1708
1709static bool
1710gate_optimize_widening_mul (void)
1711{
1712 return flag_expensive_optimizations && optimize;
1713}
1714
1715struct gimple_opt_pass pass_optimize_widening_mul =
1716{
1717 {
1718 GIMPLE_PASS,
1719 "widening_mul", /* name */
1720 gate_optimize_widening_mul, /* gate */
1721 execute_optimize_widening_mul, /* execute */
1722 NULL, /* sub */
1723 NULL, /* next */
1724 0, /* static_pass_number */
1725 TV_NONE, /* tv_id */
1726 PROP_ssa, /* properties_required */
1727 0, /* properties_provided */
1728 0, /* properties_destroyed */
1729 0, /* todo_flags_start */
b9be572e 1730 TODO_verify_ssa
1731 | TODO_verify_stmts
1732 | TODO_dump_func
1733 | TODO_update_ssa /* todo_flags_finish */
62be004c 1734 }
1735};