]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-math-opts.c
[33/77] Add a NARROWEST_INT_MODE macro
[thirdparty/gcc.git] / gcc / tree-ssa-math-opts.c
CommitLineData
abacb398 1/* Global, SSA-based optimizations using mathematical identities.
aad93da1 2 Copyright (C) 2005-2017 Free Software Foundation, Inc.
48e1416a 3
abacb398 4This file is part of GCC.
48e1416a 5
abacb398 6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8c4c00c1 8Free Software Foundation; either version 3, or (at your option) any
abacb398 9later version.
48e1416a 10
abacb398 11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
48e1416a 15
abacb398 16You should have received a copy of the GNU General Public License
8c4c00c1 17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
abacb398 19
20/* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
22
23 modulus = sqrt(x*x + y*y + z*z);
24 x = x / modulus;
25 y = y / modulus;
26 z = z / modulus;
27
28 that can be optimized to
29
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
32 x = x * rmodulus;
33 y = y * rmodulus;
34 z = z * rmodulus;
35
36 We do this for loop invariant divisors, and with this pass whenever
ac70caad 37 we notice that a division has the same divisor multiple times.
38
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
42
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
24794e79 45 by the same divisor. This is probably because modern processors
ac70caad 46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
49 this comment.
50
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
56
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
60
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
68
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
75
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
79
80 If we did this using domwalk.c, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
abacb398 86
87#include "config.h"
88#include "system.h"
89#include "coretypes.h"
9ef16211 90#include "backend.h"
7c29e30e 91#include "target.h"
92#include "rtl.h"
9ef16211 93#include "tree.h"
94#include "gimple.h"
7c29e30e 95#include "predict.h"
96#include "alloc-pool.h"
97#include "tree-pass.h"
9ef16211 98#include "ssa.h"
7c29e30e 99#include "optabs-tree.h"
100#include "gimple-pretty-print.h"
b20a8bb4 101#include "alias.h"
b20a8bb4 102#include "fold-const.h"
bc61cadb 103#include "gimple-fold.h"
dcf1a1ec 104#include "gimple-iterator.h"
470d5bb5 105#include "gimplify.h"
e795d6e1 106#include "gimplify-me.h"
9ed99284 107#include "stor-layout.h"
073c1fd5 108#include "tree-cfg.h"
073c1fd5 109#include "tree-dfa.h"
69ee5dbb 110#include "tree-ssa.h"
f7715905 111#include "builtins.h"
c3206272 112#include "params.h"
4cfd27a5 113#include "internal-fn.h"
fa0793ad 114#include "case-cfn-macros.h"
67f7b566 115#include "optabs-libfuncs.h"
116#include "tree-eh.h"
117#include "targhooks.h"
ac70caad 118
119/* This structure represents one basic block that either computes a
120 division, or is a common dominator for basic block that compute a
121 division. */
122struct occurrence {
123 /* The basic block represented by this structure. */
124 basic_block bb;
125
126 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
127 inserted in BB. */
128 tree recip_def;
129
75a70cf9 130 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
ac70caad 131 was inserted in BB. */
42acab1c 132 gimple *recip_def_stmt;
ac70caad 133
134 /* Pointer to a list of "struct occurrence"s for blocks dominated
135 by BB. */
136 struct occurrence *children;
137
138 /* Pointer to the next "struct occurrence"s in the list of blocks
139 sharing a common dominator. */
140 struct occurrence *next;
141
142 /* The number of divisions that are in BB before compute_merit. The
143 number of divisions that are in BB or post-dominate it after
144 compute_merit. */
145 int num_divisions;
146
147 /* True if the basic block has a division, false if it is a common
148 dominator for basic blocks that do. If it is false and trapping
149 math is active, BB is not a candidate for inserting a reciprocal. */
150 bool bb_has_division;
151};
152
30c4e60d 153static struct
154{
155 /* Number of 1.0/X ops inserted. */
156 int rdivs_inserted;
157
158 /* Number of 1.0/FUNC ops inserted. */
159 int rfuncs_inserted;
160} reciprocal_stats;
161
162static struct
163{
164 /* Number of cexpi calls inserted. */
165 int inserted;
166} sincos_stats;
167
168static struct
169{
470d5bb5 170 /* Number of hand-written 16-bit nop / bswaps found. */
f811051b 171 int found_16bit;
172
470d5bb5 173 /* Number of hand-written 32-bit nop / bswaps found. */
30c4e60d 174 int found_32bit;
175
470d5bb5 176 /* Number of hand-written 64-bit nop / bswaps found. */
30c4e60d 177 int found_64bit;
470d5bb5 178} nop_stats, bswap_stats;
30c4e60d 179
180static struct
181{
182 /* Number of widening multiplication ops inserted. */
183 int widen_mults_inserted;
184
185 /* Number of integer multiply-and-accumulate ops inserted. */
186 int maccs_inserted;
187
188 /* Number of fp fused multiply-add ops inserted. */
189 int fmas_inserted;
67f7b566 190
191 /* Number of divmod calls inserted. */
192 int divmod_calls_inserted;
30c4e60d 193} widen_mul_stats;
ac70caad 194
195/* The instance of "struct occurrence" representing the highest
196 interesting block in the dominator tree. */
197static struct occurrence *occ_head;
198
199/* Allocation pool for getting instances of "struct occurrence". */
e16712b1 200static object_allocator<occurrence> *occ_pool;
ac70caad 201
202
203
204/* Allocate and return a new struct occurrence for basic block BB, and
205 whose children list is headed by CHILDREN. */
206static struct occurrence *
207occ_new (basic_block bb, struct occurrence *children)
abacb398 208{
ac70caad 209 struct occurrence *occ;
210
d8e7268c 211 bb->aux = occ = occ_pool->allocate ();
ac70caad 212 memset (occ, 0, sizeof (struct occurrence));
213
214 occ->bb = bb;
215 occ->children = children;
216 return occ;
abacb398 217}
218
ac70caad 219
220/* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
221 list of "struct occurrence"s, one per basic block, having IDOM as
222 their common dominator.
223
224 We try to insert NEW_OCC as deep as possible in the tree, and we also
225 insert any other block that is a common dominator for BB and one
226 block already in the tree. */
227
228static void
229insert_bb (struct occurrence *new_occ, basic_block idom,
230 struct occurrence **p_head)
9e583fac 231{
ac70caad 232 struct occurrence *occ, **p_occ;
9e583fac 233
ac70caad 234 for (p_occ = p_head; (occ = *p_occ) != NULL; )
235 {
236 basic_block bb = new_occ->bb, occ_bb = occ->bb;
237 basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
238 if (dom == bb)
239 {
240 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
241 from its list. */
242 *p_occ = occ->next;
243 occ->next = new_occ->children;
244 new_occ->children = occ;
245
246 /* Try the next block (it may as well be dominated by BB). */
247 }
248
249 else if (dom == occ_bb)
250 {
251 /* OCC_BB dominates BB. Tail recurse to look deeper. */
252 insert_bb (new_occ, dom, &occ->children);
253 return;
254 }
255
256 else if (dom != idom)
257 {
258 gcc_assert (!dom->aux);
259
260 /* There is a dominator between IDOM and BB, add it and make
261 two children out of NEW_OCC and OCC. First, remove OCC from
262 its list. */
263 *p_occ = occ->next;
264 new_occ->next = occ;
265 occ->next = NULL;
266
267 /* None of the previous blocks has DOM as a dominator: if we tail
268 recursed, we would reexamine them uselessly. Just switch BB with
269 DOM, and go on looking for blocks dominated by DOM. */
270 new_occ = occ_new (dom, new_occ);
271 }
272
273 else
274 {
275 /* Nothing special, go on with the next element. */
276 p_occ = &occ->next;
277 }
278 }
279
280 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
281 new_occ->next = *p_head;
282 *p_head = new_occ;
283}
284
285/* Register that we found a division in BB. */
286
287static inline void
288register_division_in (basic_block bb)
289{
290 struct occurrence *occ;
291
292 occ = (struct occurrence *) bb->aux;
293 if (!occ)
294 {
295 occ = occ_new (bb, NULL);
34154e27 296 insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
ac70caad 297 }
298
299 occ->bb_has_division = true;
300 occ->num_divisions++;
301}
302
303
304/* Compute the number of divisions that postdominate each block in OCC and
305 its children. */
abacb398 306
abacb398 307static void
ac70caad 308compute_merit (struct occurrence *occ)
abacb398 309{
ac70caad 310 struct occurrence *occ_child;
311 basic_block dom = occ->bb;
abacb398 312
ac70caad 313 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
abacb398 314 {
ac70caad 315 basic_block bb;
316 if (occ_child->children)
317 compute_merit (occ_child);
318
319 if (flag_exceptions)
320 bb = single_noncomplex_succ (dom);
321 else
322 bb = dom;
323
324 if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
325 occ->num_divisions += occ_child->num_divisions;
326 }
327}
328
329
330/* Return whether USE_STMT is a floating-point division by DEF. */
331static inline bool
42acab1c 332is_division_by (gimple *use_stmt, tree def)
ac70caad 333{
75a70cf9 334 return is_gimple_assign (use_stmt)
335 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
336 && gimple_assign_rhs2 (use_stmt) == def
119368d7 337 /* Do not recognize x / x as valid division, as we are getting
338 confused later by replacing all immediate uses x in such
339 a stmt. */
75a70cf9 340 && gimple_assign_rhs1 (use_stmt) != def;
ac70caad 341}
342
343/* Walk the subset of the dominator tree rooted at OCC, setting the
344 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
345 the given basic block. The field may be left NULL, of course,
346 if it is not possible or profitable to do the optimization.
347
348 DEF_BSI is an iterator pointing at the statement defining DEF.
349 If RECIP_DEF is set, a dominator already has a computation that can
350 be used. */
351
352static void
75a70cf9 353insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
ac70caad 354 tree def, tree recip_def, int threshold)
355{
75a70cf9 356 tree type;
1a91d914 357 gassign *new_stmt;
75a70cf9 358 gimple_stmt_iterator gsi;
ac70caad 359 struct occurrence *occ_child;
360
361 if (!recip_def
362 && (occ->bb_has_division || !flag_trapping_math)
363 && occ->num_divisions >= threshold)
364 {
365 /* Make a variable with the replacement and substitute it. */
366 type = TREE_TYPE (def);
072f7ab1 367 recip_def = create_tmp_reg (type, "reciptmp");
e9cf809e 368 new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
369 build_one_cst (type), def);
48e1416a 370
ac70caad 371 if (occ->bb_has_division)
372 {
373 /* Case 1: insert before an existing division. */
75a70cf9 374 gsi = gsi_after_labels (occ->bb);
375 while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
376 gsi_next (&gsi);
ac70caad 377
75a70cf9 378 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
ac70caad 379 }
75a70cf9 380 else if (def_gsi && occ->bb == def_gsi->bb)
685b24f5 381 {
ac70caad 382 /* Case 2: insert right after the definition. Note that this will
383 never happen if the definition statement can throw, because in
384 that case the sole successor of the statement's basic block will
385 dominate all the uses as well. */
75a70cf9 386 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
685b24f5 387 }
ac70caad 388 else
389 {
390 /* Case 3: insert in a basic block not containing defs/uses. */
75a70cf9 391 gsi = gsi_after_labels (occ->bb);
392 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
ac70caad 393 }
394
30c4e60d 395 reciprocal_stats.rdivs_inserted++;
396
ac70caad 397 occ->recip_def_stmt = new_stmt;
abacb398 398 }
399
ac70caad 400 occ->recip_def = recip_def;
401 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
75a70cf9 402 insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
ac70caad 403}
404
405
406/* Replace the division at USE_P with a multiplication by the reciprocal, if
407 possible. */
408
409static inline void
410replace_reciprocal (use_operand_p use_p)
411{
42acab1c 412 gimple *use_stmt = USE_STMT (use_p);
75a70cf9 413 basic_block bb = gimple_bb (use_stmt);
ac70caad 414 struct occurrence *occ = (struct occurrence *) bb->aux;
415
0bfd8d5c 416 if (optimize_bb_for_speed_p (bb)
417 && occ->recip_def && use_stmt != occ->recip_def_stmt)
ac70caad 418 {
50aacf4c 419 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
75a70cf9 420 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
ac70caad 421 SET_USE (use_p, occ->recip_def);
50aacf4c 422 fold_stmt_inplace (&gsi);
ac70caad 423 update_stmt (use_stmt);
424 }
425}
426
427
428/* Free OCC and return one more "struct occurrence" to be freed. */
429
430static struct occurrence *
431free_bb (struct occurrence *occ)
432{
433 struct occurrence *child, *next;
434
435 /* First get the two pointers hanging off OCC. */
436 next = occ->next;
437 child = occ->children;
438 occ->bb->aux = NULL;
d8e7268c 439 occ_pool->remove (occ);
ac70caad 440
441 /* Now ensure that we don't recurse unless it is necessary. */
442 if (!child)
443 return next;
9e583fac 444 else
ac70caad 445 {
446 while (next)
447 next = free_bb (next);
448
449 return child;
450 }
451}
452
453
454/* Look for floating-point divisions among DEF's uses, and try to
455 replace them by multiplications with the reciprocal. Add
456 as many statements computing the reciprocal as needed.
457
458 DEF must be a GIMPLE register of a floating-point type. */
459
460static void
75a70cf9 461execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
ac70caad 462{
463 use_operand_p use_p;
464 imm_use_iterator use_iter;
465 struct occurrence *occ;
466 int count = 0, threshold;
abacb398 467
ac70caad 468 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
469
470 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
abacb398 471 {
42acab1c 472 gimple *use_stmt = USE_STMT (use_p);
ac70caad 473 if (is_division_by (use_stmt, def))
abacb398 474 {
75a70cf9 475 register_division_in (gimple_bb (use_stmt));
ac70caad 476 count++;
abacb398 477 }
478 }
48e1416a 479
ac70caad 480 /* Do the expensive part only if we can hope to optimize something. */
481 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
482 if (count >= threshold)
483 {
42acab1c 484 gimple *use_stmt;
ac70caad 485 for (occ = occ_head; occ; occ = occ->next)
486 {
487 compute_merit (occ);
75a70cf9 488 insert_reciprocals (def_gsi, occ, def, NULL, threshold);
ac70caad 489 }
490
09aca5bc 491 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
ac70caad 492 {
ac70caad 493 if (is_division_by (use_stmt, def))
09aca5bc 494 {
495 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
496 replace_reciprocal (use_p);
497 }
ac70caad 498 }
499 }
500
501 for (occ = occ_head; occ; )
502 occ = free_bb (occ);
503
504 occ_head = NULL;
abacb398 505}
506
4cfd27a5 507/* Return an internal function that implements the reciprocal of CALL,
508 or IFN_LAST if there is no such function that the target supports. */
509
510internal_fn
511internal_fn_reciprocal (gcall *call)
512{
513 internal_fn ifn;
514
515 switch (gimple_call_combined_fn (call))
516 {
517 CASE_CFN_SQRT:
518 ifn = IFN_RSQRT;
519 break;
520
521 default:
522 return IFN_LAST;
523 }
524
525 tree_pair types = direct_internal_fn_types (ifn, call);
526 if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
527 return IFN_LAST;
528
529 return ifn;
530}
531
ac70caad 532/* Go through all the floating-point SSA_NAMEs, and call
533 execute_cse_reciprocals_1 on each of them. */
65b0537f 534namespace {
535
536const pass_data pass_data_cse_reciprocals =
537{
538 GIMPLE_PASS, /* type */
539 "recip", /* name */
540 OPTGROUP_NONE, /* optinfo_flags */
65b0537f 541 TV_NONE, /* tv_id */
542 PROP_ssa, /* properties_required */
543 0, /* properties_provided */
544 0, /* properties_destroyed */
545 0, /* todo_flags_start */
8b88439e 546 TODO_update_ssa, /* todo_flags_finish */
65b0537f 547};
548
549class pass_cse_reciprocals : public gimple_opt_pass
550{
551public:
552 pass_cse_reciprocals (gcc::context *ctxt)
553 : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
554 {}
555
556 /* opt_pass methods: */
557 virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
558 virtual unsigned int execute (function *);
559
560}; // class pass_cse_reciprocals
561
562unsigned int
563pass_cse_reciprocals::execute (function *fun)
abacb398 564{
565 basic_block bb;
51b60a11 566 tree arg;
685b24f5 567
1dc6c44d 568 occ_pool = new object_allocator<occurrence> ("dominators for recip");
685b24f5 569
30c4e60d 570 memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
c136ae61 571 calculate_dominance_info (CDI_DOMINATORS);
572 calculate_dominance_info (CDI_POST_DOMINATORS);
ac70caad 573
382ecba7 574 if (flag_checking)
575 FOR_EACH_BB_FN (bb, fun)
576 gcc_assert (!bb->aux);
ac70caad 577
65b0537f 578 for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
c6dfe037 579 if (FLOAT_TYPE_P (TREE_TYPE (arg))
ac70caad 580 && is_gimple_reg (arg))
c6dfe037 581 {
65b0537f 582 tree name = ssa_default_def (fun, arg);
c6dfe037 583 if (name)
584 execute_cse_reciprocals_1 (NULL, name);
585 }
51b60a11 586
65b0537f 587 FOR_EACH_BB_FN (bb, fun)
abacb398 588 {
75a70cf9 589 tree def;
abacb398 590
1a91d914 591 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
592 gsi_next (&gsi))
abacb398 593 {
1a91d914 594 gphi *phi = gsi.phi ();
abacb398 595 def = PHI_RESULT (phi);
7c782c9b 596 if (! virtual_operand_p (def)
597 && FLOAT_TYPE_P (TREE_TYPE (def)))
ac70caad 598 execute_cse_reciprocals_1 (NULL, def);
abacb398 599 }
600
1a91d914 601 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
602 gsi_next (&gsi))
abacb398 603 {
42acab1c 604 gimple *stmt = gsi_stmt (gsi);
a0315874 605
75a70cf9 606 if (gimple_has_lhs (stmt)
abacb398 607 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
608 && FLOAT_TYPE_P (TREE_TYPE (def))
51b60a11 609 && TREE_CODE (def) == SSA_NAME)
75a70cf9 610 execute_cse_reciprocals_1 (&gsi, def);
abacb398 611 }
e174638f 612
0bfd8d5c 613 if (optimize_bb_for_size_p (bb))
614 continue;
615
e174638f 616 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
1a91d914 617 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
618 gsi_next (&gsi))
e174638f 619 {
42acab1c 620 gimple *stmt = gsi_stmt (gsi);
e174638f 621
75a70cf9 622 if (is_gimple_assign (stmt)
623 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
e174638f 624 {
75a70cf9 625 tree arg1 = gimple_assign_rhs2 (stmt);
42acab1c 626 gimple *stmt1;
2cd360b6 627
628 if (TREE_CODE (arg1) != SSA_NAME)
629 continue;
630
631 stmt1 = SSA_NAME_DEF_STMT (arg1);
e174638f 632
75a70cf9 633 if (is_gimple_call (stmt1)
4cfd27a5 634 && gimple_call_lhs (stmt1))
e174638f 635 {
851c1b0c 636 bool fail;
774b1cdd 637 imm_use_iterator ui;
638 use_operand_p use_p;
4cfd27a5 639 tree fndecl = NULL_TREE;
e174638f 640
4cfd27a5 641 gcall *call = as_a <gcall *> (stmt1);
642 internal_fn ifn = internal_fn_reciprocal (call);
643 if (ifn == IFN_LAST)
644 {
645 fndecl = gimple_call_fndecl (call);
646 if (!fndecl
647 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_MD)
648 continue;
649 fndecl = targetm.builtin_reciprocal (fndecl);
650 if (!fndecl)
651 continue;
652 }
e174638f 653
774b1cdd 654 /* Check that all uses of the SSA name are divisions,
655 otherwise replacing the defining statement will do
656 the wrong thing. */
657 fail = false;
658 FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
659 {
42acab1c 660 gimple *stmt2 = USE_STMT (use_p);
774b1cdd 661 if (is_gimple_debug (stmt2))
662 continue;
663 if (!is_gimple_assign (stmt2)
664 || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
665 || gimple_assign_rhs1 (stmt2) == arg1
666 || gimple_assign_rhs2 (stmt2) != arg1)
667 {
668 fail = true;
669 break;
670 }
671 }
672 if (fail)
673 continue;
674
4cfd27a5 675 gimple_replace_ssa_lhs (call, arg1);
676 if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
851c1b0c 677 {
678 auto_vec<tree, 4> args;
679 for (unsigned int i = 0;
4cfd27a5 680 i < gimple_call_num_args (call); i++)
681 args.safe_push (gimple_call_arg (call, i));
682 gcall *stmt2;
683 if (ifn == IFN_LAST)
684 stmt2 = gimple_build_call_vec (fndecl, args);
685 else
686 stmt2 = gimple_build_call_internal_vec (ifn, args);
851c1b0c 687 gimple_call_set_lhs (stmt2, arg1);
4cfd27a5 688 if (gimple_vdef (call))
851c1b0c 689 {
4cfd27a5 690 gimple_set_vdef (stmt2, gimple_vdef (call));
851c1b0c 691 SSA_NAME_DEF_STMT (gimple_vdef (stmt2)) = stmt2;
692 }
989f02dc 693 gimple_call_set_nothrow (stmt2,
694 gimple_call_nothrow_p (call));
4cfd27a5 695 gimple_set_vuse (stmt2, gimple_vuse (call));
696 gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
851c1b0c 697 gsi_replace (&gsi2, stmt2, true);
698 }
699 else
700 {
4cfd27a5 701 if (ifn == IFN_LAST)
702 gimple_call_set_fndecl (call, fndecl);
703 else
704 gimple_call_set_internal_fn (call, ifn);
705 update_stmt (call);
851c1b0c 706 }
30c4e60d 707 reciprocal_stats.rfuncs_inserted++;
e174638f 708
774b1cdd 709 FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
710 {
50aacf4c 711 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
774b1cdd 712 gimple_assign_set_rhs_code (stmt, MULT_EXPR);
50aacf4c 713 fold_stmt_inplace (&gsi);
774b1cdd 714 update_stmt (stmt);
715 }
e174638f 716 }
717 }
718 }
abacb398 719 }
685b24f5 720
65b0537f 721 statistics_counter_event (fun, "reciprocal divs inserted",
30c4e60d 722 reciprocal_stats.rdivs_inserted);
65b0537f 723 statistics_counter_event (fun, "reciprocal functions inserted",
30c4e60d 724 reciprocal_stats.rfuncs_inserted);
725
c136ae61 726 free_dominance_info (CDI_DOMINATORS);
727 free_dominance_info (CDI_POST_DOMINATORS);
d8e7268c 728 delete occ_pool;
2a1990e9 729 return 0;
abacb398 730}
731
cbe8bda8 732} // anon namespace
733
734gimple_opt_pass *
735make_pass_cse_reciprocals (gcc::context *ctxt)
736{
737 return new pass_cse_reciprocals (ctxt);
738}
739
0d424440 740/* Records an occurrence at statement USE_STMT in the vector of trees
a0315874 741 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
0d424440 742 is not yet initialized. Returns true if the occurrence was pushed on
a0315874 743 the vector. Adjusts *TOP_BB to be the basic block dominating all
744 statements in the vector. */
745
746static bool
42acab1c 747maybe_record_sincos (vec<gimple *> *stmts,
748 basic_block *top_bb, gimple *use_stmt)
a0315874 749{
75a70cf9 750 basic_block use_bb = gimple_bb (use_stmt);
a0315874 751 if (*top_bb
752 && (*top_bb == use_bb
753 || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
f1f41a6c 754 stmts->safe_push (use_stmt);
a0315874 755 else if (!*top_bb
756 || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
757 {
f1f41a6c 758 stmts->safe_push (use_stmt);
a0315874 759 *top_bb = use_bb;
760 }
761 else
762 return false;
763
764 return true;
765}
766
767/* Look for sin, cos and cexpi calls with the same argument NAME and
768 create a single call to cexpi CSEing the result in this case.
769 We first walk over all immediate uses of the argument collecting
770 statements that we can CSE in a vector and in a second pass replace
771 the statement rhs with a REALPART or IMAGPART expression on the
772 result of the cexpi call we insert before the use statement that
773 dominates all other candidates. */
774
4c80086d 775static bool
a0315874 776execute_cse_sincos_1 (tree name)
777{
75a70cf9 778 gimple_stmt_iterator gsi;
a0315874 779 imm_use_iterator use_iter;
75a70cf9 780 tree fndecl, res, type;
42acab1c 781 gimple *def_stmt, *use_stmt, *stmt;
a0315874 782 int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
42acab1c 783 auto_vec<gimple *> stmts;
a0315874 784 basic_block top_bb = NULL;
785 int i;
4c80086d 786 bool cfg_changed = false;
a0315874 787
788 type = TREE_TYPE (name);
789 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
790 {
75a70cf9 791 if (gimple_code (use_stmt) != GIMPLE_CALL
fa0793ad 792 || !gimple_call_lhs (use_stmt))
a0315874 793 continue;
794
fa0793ad 795 switch (gimple_call_combined_fn (use_stmt))
a0315874 796 {
fa0793ad 797 CASE_CFN_COS:
a0315874 798 seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
799 break;
800
fa0793ad 801 CASE_CFN_SIN:
a0315874 802 seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
803 break;
804
fa0793ad 805 CASE_CFN_CEXPI:
a0315874 806 seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
807 break;
808
809 default:;
810 }
811 }
812
813 if (seen_cos + seen_sin + seen_cexpi <= 1)
6702d09a 814 return false;
a0315874 815
816 /* Simply insert cexpi at the beginning of top_bb but not earlier than
817 the name def statement. */
818 fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
819 if (!fndecl)
4c80086d 820 return false;
75a70cf9 821 stmt = gimple_build_call (fndecl, 1, name);
03d37e4e 822 res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
75a70cf9 823 gimple_call_set_lhs (stmt, res);
824
a0315874 825 def_stmt = SSA_NAME_DEF_STMT (name);
8090c12d 826 if (!SSA_NAME_IS_DEFAULT_DEF (name)
75a70cf9 827 && gimple_code (def_stmt) != GIMPLE_PHI
828 && gimple_bb (def_stmt) == top_bb)
a0315874 829 {
75a70cf9 830 gsi = gsi_for_stmt (def_stmt);
831 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
a0315874 832 }
833 else
834 {
75a70cf9 835 gsi = gsi_after_labels (top_bb);
836 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
a0315874 837 }
30c4e60d 838 sincos_stats.inserted++;
a0315874 839
840 /* And adjust the recorded old call sites. */
f1f41a6c 841 for (i = 0; stmts.iterate (i, &use_stmt); ++i)
a0315874 842 {
75a70cf9 843 tree rhs = NULL;
75a70cf9 844
fa0793ad 845 switch (gimple_call_combined_fn (use_stmt))
a0315874 846 {
fa0793ad 847 CASE_CFN_COS:
75a70cf9 848 rhs = fold_build1 (REALPART_EXPR, type, res);
a0315874 849 break;
850
fa0793ad 851 CASE_CFN_SIN:
75a70cf9 852 rhs = fold_build1 (IMAGPART_EXPR, type, res);
a0315874 853 break;
854
fa0793ad 855 CASE_CFN_CEXPI:
75a70cf9 856 rhs = res;
a0315874 857 break;
858
859 default:;
860 gcc_unreachable ();
861 }
862
75a70cf9 863 /* Replace call with a copy. */
864 stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
865
866 gsi = gsi_for_stmt (use_stmt);
4c80086d 867 gsi_replace (&gsi, stmt, true);
868 if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
869 cfg_changed = true;
a0315874 870 }
871
4c80086d 872 return cfg_changed;
a0315874 873}
874
e9a6c4bc 875/* To evaluate powi(x,n), the floating point value x raised to the
876 constant integer exponent n, we use a hybrid algorithm that
877 combines the "window method" with look-up tables. For an
878 introduction to exponentiation algorithms and "addition chains",
879 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
880 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
881 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
882 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
883
884/* Provide a default value for POWI_MAX_MULTS, the maximum number of
885 multiplications to inline before calling the system library's pow
886 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
887 so this default never requires calling pow, powf or powl. */
888
889#ifndef POWI_MAX_MULTS
890#define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
891#endif
892
893/* The size of the "optimal power tree" lookup table. All
894 exponents less than this value are simply looked up in the
895 powi_table below. This threshold is also used to size the
896 cache of pseudo registers that hold intermediate results. */
897#define POWI_TABLE_SIZE 256
898
899/* The size, in bits of the window, used in the "window method"
900 exponentiation algorithm. This is equivalent to a radix of
901 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
902#define POWI_WINDOW_SIZE 3
903
904/* The following table is an efficient representation of an
905 "optimal power tree". For each value, i, the corresponding
906 value, j, in the table states than an optimal evaluation
907 sequence for calculating pow(x,i) can be found by evaluating
908 pow(x,j)*pow(x,i-j). An optimal power tree for the first
909 100 integers is given in Knuth's "Seminumerical algorithms". */
910
911static const unsigned char powi_table[POWI_TABLE_SIZE] =
912 {
913 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
914 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
915 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
916 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
917 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
918 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
919 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
920 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
921 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
922 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
923 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
924 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
925 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
926 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
927 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
928 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
929 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
930 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
931 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
932 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
933 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
934 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
935 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
936 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
937 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
938 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
939 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
940 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
941 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
942 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
943 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
944 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
945 };
946
947
948/* Return the number of multiplications required to calculate
949 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
950 subroutine of powi_cost. CACHE is an array indicating
951 which exponents have already been calculated. */
952
953static int
954powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
955{
956 /* If we've already calculated this exponent, then this evaluation
957 doesn't require any additional multiplications. */
958 if (cache[n])
959 return 0;
960
961 cache[n] = true;
962 return powi_lookup_cost (n - powi_table[n], cache)
963 + powi_lookup_cost (powi_table[n], cache) + 1;
964}
965
966/* Return the number of multiplications required to calculate
967 powi(x,n) for an arbitrary x, given the exponent N. This
968 function needs to be kept in sync with powi_as_mults below. */
969
970static int
971powi_cost (HOST_WIDE_INT n)
972{
973 bool cache[POWI_TABLE_SIZE];
974 unsigned HOST_WIDE_INT digit;
975 unsigned HOST_WIDE_INT val;
976 int result;
977
978 if (n == 0)
979 return 0;
980
981 /* Ignore the reciprocal when calculating the cost. */
982 val = (n < 0) ? -n : n;
983
984 /* Initialize the exponent cache. */
985 memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
986 cache[1] = true;
987
988 result = 0;
989
990 while (val >= POWI_TABLE_SIZE)
991 {
992 if (val & 1)
993 {
994 digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
995 result += powi_lookup_cost (digit, cache)
996 + POWI_WINDOW_SIZE + 1;
997 val >>= POWI_WINDOW_SIZE;
998 }
999 else
1000 {
1001 val >>= 1;
1002 result++;
1003 }
1004 }
1005
1006 return result + powi_lookup_cost (val, cache);
1007}
1008
1009/* Recursive subroutine of powi_as_mults. This function takes the
1010 array, CACHE, of already calculated exponents and an exponent N and
1011 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1012
1013static tree
1014powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
03d37e4e 1015 HOST_WIDE_INT n, tree *cache)
e9a6c4bc 1016{
1017 tree op0, op1, ssa_target;
1018 unsigned HOST_WIDE_INT digit;
1a91d914 1019 gassign *mult_stmt;
e9a6c4bc 1020
1021 if (n < POWI_TABLE_SIZE && cache[n])
1022 return cache[n];
1023
03d37e4e 1024 ssa_target = make_temp_ssa_name (type, NULL, "powmult");
e9a6c4bc 1025
1026 if (n < POWI_TABLE_SIZE)
1027 {
1028 cache[n] = ssa_target;
03d37e4e 1029 op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1030 op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
e9a6c4bc 1031 }
1032 else if (n & 1)
1033 {
1034 digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
03d37e4e 1035 op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1036 op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
e9a6c4bc 1037 }
1038 else
1039 {
03d37e4e 1040 op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
e9a6c4bc 1041 op1 = op0;
1042 }
1043
e9cf809e 1044 mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
ae43b05e 1045 gimple_set_location (mult_stmt, loc);
e9a6c4bc 1046 gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1047
1048 return ssa_target;
1049}
1050
1051/* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1052 This function needs to be kept in sync with powi_cost above. */
1053
1054static tree
1055powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1056 tree arg0, HOST_WIDE_INT n)
1057{
03d37e4e 1058 tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1a91d914 1059 gassign *div_stmt;
03d37e4e 1060 tree target;
e9a6c4bc 1061
1062 if (n == 0)
1063 return build_real (type, dconst1);
1064
1065 memset (cache, 0, sizeof (cache));
1066 cache[1] = arg0;
1067
03d37e4e 1068 result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
e9a6c4bc 1069 if (n >= 0)
1070 return result;
1071
1072 /* If the original exponent was negative, reciprocate the result. */
03d37e4e 1073 target = make_temp_ssa_name (type, NULL, "powmult");
e9cf809e 1074 div_stmt = gimple_build_assign (target, RDIV_EXPR,
1075 build_real (type, dconst1), result);
ae43b05e 1076 gimple_set_location (div_stmt, loc);
e9a6c4bc 1077 gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1078
1079 return target;
1080}
1081
1082/* ARG0 and N are the two arguments to a powi builtin in GSI with
1083 location info LOC. If the arguments are appropriate, create an
1084 equivalent sequence of statements prior to GSI using an optimal
1085 number of multiplications, and return an expession holding the
1086 result. */
1087
1088static tree
1089gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1090 tree arg0, HOST_WIDE_INT n)
1091{
1092 /* Avoid largest negative number. */
1093 if (n != -n
1094 && ((n >= -1 && n <= 2)
1095 || (optimize_function_for_speed_p (cfun)
1096 && powi_cost (n) <= POWI_MAX_MULTS)))
1097 return powi_as_mults (gsi, loc, arg0, n);
1098
1099 return NULL_TREE;
1100}
1101
ae43b05e 1102/* Build a gimple call statement that calls FN with argument ARG.
03d37e4e 1103 Set the lhs of the call statement to a fresh SSA name. Insert the
ae43b05e 1104 statement prior to GSI's current position, and return the fresh
1105 SSA name. */
1106
1107static tree
ca12eb68 1108build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
03d37e4e 1109 tree fn, tree arg)
ae43b05e 1110{
1a91d914 1111 gcall *call_stmt;
ae43b05e 1112 tree ssa_target;
1113
ae43b05e 1114 call_stmt = gimple_build_call (fn, 1, arg);
03d37e4e 1115 ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
ae43b05e 1116 gimple_set_lhs (call_stmt, ssa_target);
1117 gimple_set_location (call_stmt, loc);
1118 gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1119
1120 return ssa_target;
1121}
1122
ca12eb68 1123/* Build a gimple binary operation with the given CODE and arguments
1124 ARG0, ARG1, assigning the result to a new SSA name for variable
1125 TARGET. Insert the statement prior to GSI's current position, and
1126 return the fresh SSA name.*/
1127
1128static tree
1129build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
03d37e4e 1130 const char *name, enum tree_code code,
1131 tree arg0, tree arg1)
ca12eb68 1132{
03d37e4e 1133 tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
e9cf809e 1134 gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
ca12eb68 1135 gimple_set_location (stmt, loc);
1136 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1137 return result;
1138}
1139
a5c384c1 1140/* Build a gimple reference operation with the given CODE and argument
03d37e4e 1141 ARG, assigning the result to a new SSA name of TYPE with NAME.
a5c384c1 1142 Insert the statement prior to GSI's current position, and return
1143 the fresh SSA name. */
1144
1145static inline tree
1146build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
03d37e4e 1147 const char *name, enum tree_code code, tree arg0)
a5c384c1 1148{
03d37e4e 1149 tree result = make_temp_ssa_name (type, NULL, name);
42acab1c 1150 gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
a5c384c1 1151 gimple_set_location (stmt, loc);
1152 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1153 return result;
1154}
1155
03d37e4e 1156/* Build a gimple assignment to cast VAL to TYPE. Insert the statement
aff5fb4d 1157 prior to GSI's current position, and return the fresh SSA name. */
1158
1159static tree
1160build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
03d37e4e 1161 tree type, tree val)
aff5fb4d 1162{
f9e245b2 1163 tree result = make_ssa_name (type);
e9cf809e 1164 gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
03d37e4e 1165 gimple_set_location (stmt, loc);
1166 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1167 return result;
aff5fb4d 1168}
1169
c3206272 1170struct pow_synth_sqrt_info
1171{
1172 bool *factors;
1173 unsigned int deepest;
1174 unsigned int num_mults;
1175};
1176
1177/* Return true iff the real value C can be represented as a
1178 sum of powers of 0.5 up to N. That is:
1179 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1180 Record in INFO the various parameters of the synthesis algorithm such
1181 as the factors a[i], the maximum 0.5 power and the number of
1182 multiplications that will be required. */
1183
1184bool
1185representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1186 struct pow_synth_sqrt_info *info)
1187{
1188 REAL_VALUE_TYPE factor = dconsthalf;
1189 REAL_VALUE_TYPE remainder = c;
1190
1191 info->deepest = 0;
1192 info->num_mults = 0;
1193 memset (info->factors, 0, n * sizeof (bool));
1194
1195 for (unsigned i = 0; i < n; i++)
1196 {
1197 REAL_VALUE_TYPE res;
1198
1199 /* If something inexact happened bail out now. */
f2ad9e38 1200 if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
c3206272 1201 return false;
1202
1203 /* We have hit zero. The number is representable as a sum
1204 of powers of 0.5. */
20cb53c9 1205 if (real_equal (&res, &dconst0))
c3206272 1206 {
1207 info->factors[i] = true;
1208 info->deepest = i + 1;
1209 return true;
1210 }
1211 else if (!REAL_VALUE_NEGATIVE (res))
1212 {
1213 remainder = res;
1214 info->factors[i] = true;
1215 info->num_mults++;
1216 }
1217 else
1218 info->factors[i] = false;
1219
f2ad9e38 1220 real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
c3206272 1221 }
1222 return false;
1223}
1224
1225/* Return the tree corresponding to FN being applied
1226 to ARG N times at GSI and LOC.
1227 Look up previous results from CACHE if need be.
1228 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1229
1230static tree
1231get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1232 tree fn, location_t loc, tree *cache)
1233{
1234 tree res = cache[n];
1235 if (!res)
1236 {
1237 tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1238 res = build_and_insert_call (gsi, loc, fn, prev);
1239 cache[n] = res;
1240 }
1241
1242 return res;
1243}
1244
1245/* Print to STREAM the repeated application of function FNAME to ARG
1246 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1247 "foo (foo (x))". */
1248
1249static void
1250print_nested_fn (FILE* stream, const char *fname, const char* arg,
1251 unsigned int n)
1252{
1253 if (n == 0)
1254 fprintf (stream, "%s", arg);
1255 else
1256 {
1257 fprintf (stream, "%s (", fname);
1258 print_nested_fn (stream, fname, arg, n - 1);
1259 fprintf (stream, ")");
1260 }
1261}
1262
1263/* Print to STREAM the fractional sequence of sqrt chains
1264 applied to ARG, described by INFO. Used for the dump file. */
1265
1266static void
1267dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1268 struct pow_synth_sqrt_info *info)
1269{
1270 for (unsigned int i = 0; i < info->deepest; i++)
1271 {
1272 bool is_set = info->factors[i];
1273 if (is_set)
1274 {
1275 print_nested_fn (stream, "sqrt", arg, i + 1);
1276 if (i != info->deepest - 1)
1277 fprintf (stream, " * ");
1278 }
1279 }
1280}
1281
1282/* Print to STREAM a representation of raising ARG to an integer
1283 power N. Used for the dump file. */
1284
1285static void
1286dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1287{
1288 if (n > 1)
1289 fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1290 else if (n == 1)
1291 fprintf (stream, "%s", arg);
1292}
1293
1294/* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1295 square roots. Place at GSI and LOC. Limit the maximum depth
1296 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1297 result of the expanded sequence or NULL_TREE if the expansion failed.
1298
1299 This routine assumes that ARG1 is a real number with a fractional part
1300 (the integer exponent case will have been handled earlier in
1301 gimple_expand_builtin_pow).
1302
1303 For ARG1 > 0.0:
1304 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1305 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1306 FRAC_PART == ARG1 - WHOLE_PART:
1307 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1308 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1309 if it can be expressed as such, that is if FRAC_PART satisfies:
1310 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1311 where integer a[i] is either 0 or 1.
1312
1313 Example:
1314 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1315 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1316
1317 For ARG1 < 0.0 there are two approaches:
1318 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1319 is calculated as above.
1320
1321 Example:
1322 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1323 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1324
1325 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1326 FRAC_PART := ARG1 - WHOLE_PART
1327 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1328 Example:
1329 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1330 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1331
1332 For ARG1 < 0.0 we choose between (A) and (B) depending on
1333 how many multiplications we'd have to do.
1334 So, for the example in (B): POW (x, -5.875), if we were to
1335 follow algorithm (A) we would produce:
1336 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1337 which contains more multiplications than approach (B).
1338
1339 Hopefully, this approach will eliminate potentially expensive POW library
1340 calls when unsafe floating point math is enabled and allow the compiler to
1341 further optimise the multiplies, square roots and divides produced by this
1342 function. */
1343
1344static tree
1345expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1346 tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1347{
1348 tree type = TREE_TYPE (arg0);
1349 machine_mode mode = TYPE_MODE (type);
1350 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1351 bool one_over = true;
1352
1353 if (!sqrtfn)
1354 return NULL_TREE;
1355
1356 if (TREE_CODE (arg1) != REAL_CST)
1357 return NULL_TREE;
1358
1359 REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1360
1361 gcc_assert (max_depth > 0);
1362 tree *cache = XALLOCAVEC (tree, max_depth + 1);
1363
1364 struct pow_synth_sqrt_info synth_info;
1365 synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1366 synth_info.deepest = 0;
1367 synth_info.num_mults = 0;
1368
1369 bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1370 REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1371
1372 /* The whole and fractional parts of exp. */
1373 REAL_VALUE_TYPE whole_part;
1374 REAL_VALUE_TYPE frac_part;
1375
1376 real_floor (&whole_part, mode, &exp);
f2ad9e38 1377 real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
c3206272 1378
1379
1380 REAL_VALUE_TYPE ceil_whole = dconst0;
1381 REAL_VALUE_TYPE ceil_fract = dconst0;
1382
1383 if (neg_exp)
1384 {
1385 real_ceil (&ceil_whole, mode, &exp);
f2ad9e38 1386 real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
c3206272 1387 }
1388
1389 if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1390 return NULL_TREE;
1391
1392 /* Check whether it's more profitable to not use 1.0 / ... */
1393 if (neg_exp)
1394 {
1395 struct pow_synth_sqrt_info alt_synth_info;
1396 alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1397 alt_synth_info.deepest = 0;
1398 alt_synth_info.num_mults = 0;
1399
1400 if (representable_as_half_series_p (ceil_fract, max_depth,
1401 &alt_synth_info)
1402 && alt_synth_info.deepest <= synth_info.deepest
1403 && alt_synth_info.num_mults < synth_info.num_mults)
1404 {
1405 whole_part = ceil_whole;
1406 frac_part = ceil_fract;
1407 synth_info.deepest = alt_synth_info.deepest;
1408 synth_info.num_mults = alt_synth_info.num_mults;
1409 memcpy (synth_info.factors, alt_synth_info.factors,
1410 (max_depth + 1) * sizeof (bool));
1411 one_over = false;
1412 }
1413 }
1414
1415 HOST_WIDE_INT n = real_to_integer (&whole_part);
1416 REAL_VALUE_TYPE cint;
1417 real_from_integer (&cint, VOIDmode, n, SIGNED);
1418
1419 if (!real_identical (&whole_part, &cint))
1420 return NULL_TREE;
1421
1422 if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1423 return NULL_TREE;
1424
1425 memset (cache, 0, (max_depth + 1) * sizeof (tree));
1426
1427 tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1428
1429 /* Calculate the integer part of the exponent. */
1430 if (n > 1)
1431 {
1432 integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1433 if (!integer_res)
1434 return NULL_TREE;
1435 }
1436
1437 if (dump_file)
1438 {
1439 char string[64];
1440
1441 real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1442 fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1443
1444 if (neg_exp)
1445 {
1446 if (one_over)
1447 {
1448 fprintf (dump_file, "1.0 / (");
1449 dump_integer_part (dump_file, "x", n);
1450 if (n > 0)
1451 fprintf (dump_file, " * ");
1452 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1453 fprintf (dump_file, ")");
1454 }
1455 else
1456 {
1457 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1458 fprintf (dump_file, " / (");
1459 dump_integer_part (dump_file, "x", n);
1460 fprintf (dump_file, ")");
1461 }
1462 }
1463 else
1464 {
1465 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1466 if (n > 0)
1467 fprintf (dump_file, " * ");
1468 dump_integer_part (dump_file, "x", n);
1469 }
1470
1471 fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1472 }
1473
1474
1475 tree fract_res = NULL_TREE;
1476 cache[0] = arg0;
1477
1478 /* Calculate the fractional part of the exponent. */
1479 for (unsigned i = 0; i < synth_info.deepest; i++)
1480 {
1481 if (synth_info.factors[i])
1482 {
1483 tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1484
1485 if (!fract_res)
1486 fract_res = sqrt_chain;
1487
1488 else
1489 fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1490 fract_res, sqrt_chain);
1491 }
1492 }
1493
1494 tree res = NULL_TREE;
1495
1496 if (neg_exp)
1497 {
1498 if (one_over)
1499 {
1500 if (n > 0)
1501 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1502 fract_res, integer_res);
1503 else
1504 res = fract_res;
1505
1506 res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1507 build_real (type, dconst1), res);
1508 }
1509 else
1510 {
1511 res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1512 fract_res, integer_res);
1513 }
1514 }
1515 else
1516 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1517 fract_res, integer_res);
1518 return res;
1519}
1520
e78306af 1521/* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1522 with location info LOC. If possible, create an equivalent and
1523 less expensive sequence of statements prior to GSI, and return an
1524 expession holding the result. */
1525
1526static tree
1527gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1528 tree arg0, tree arg1)
1529{
c3206272 1530 REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
ca12eb68 1531 REAL_VALUE_TYPE c2, dconst3;
e78306af 1532 HOST_WIDE_INT n;
c3206272 1533 tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
3754d046 1534 machine_mode mode;
c3206272 1535 bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
0190fe95 1536 bool hw_sqrt_exists, c_is_int, c2_is_int;
e78306af 1537
c3206272 1538 dconst1_4 = dconst1;
1539 SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1540
e78306af 1541 /* If the exponent isn't a constant, there's nothing of interest
1542 to be done. */
1543 if (TREE_CODE (arg1) != REAL_CST)
1544 return NULL_TREE;
1545
9f27d92a 1546 /* Don't perform the operation if flag_signaling_nans is on
1547 and the operand is a signaling NaN. */
1548 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2a659064 1549 && ((TREE_CODE (arg0) == REAL_CST
1550 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
9f27d92a 1551 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
1552 return NULL_TREE;
1553
ae43b05e 1554 /* If the exponent is equivalent to an integer, expand to an optimal
1555 multiplication sequence when profitable. */
e78306af 1556 c = TREE_REAL_CST (arg1);
1557 n = real_to_integer (&c);
e913b5cd 1558 real_from_integer (&cint, VOIDmode, n, SIGNED);
0190fe95 1559 c_is_int = real_identical (&c, &cint);
e78306af 1560
0190fe95 1561 if (c_is_int
e78306af 1562 && ((n >= -1 && n <= 2)
1563 || (flag_unsafe_math_optimizations
c3206272 1564 && speed_p
e78306af 1565 && powi_cost (n) <= POWI_MAX_MULTS)))
1566 return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1567
ae43b05e 1568 /* Attempt various optimizations using sqrt and cbrt. */
1569 type = TREE_TYPE (arg0);
1570 mode = TYPE_MODE (type);
1571 sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1572
1573 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
1574 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
1575 sqrt(-0) = -0. */
1576 if (sqrtfn
20cb53c9 1577 && real_equal (&c, &dconsthalf)
ae43b05e 1578 && !HONOR_SIGNED_ZEROS (mode))
03d37e4e 1579 return build_and_insert_call (gsi, loc, sqrtfn, arg0);
ae43b05e 1580
a5c384c1 1581 hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
ae43b05e 1582
ae43b05e 1583 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
1584 optimizations since 1./3. is not exactly representable. If x
1585 is negative and finite, the correct value of pow(x,1./3.) is
1586 a NaN with the "invalid" exception raised, because the value
1587 of 1./3. actually has an even denominator. The correct value
1588 of cbrt(x) is a negative real value. */
1589 cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1590 dconst1_3 = real_value_truncate (mode, dconst_third ());
1591
1592 if (flag_unsafe_math_optimizations
1593 && cbrtfn
ee230333 1594 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
20cb53c9 1595 && real_equal (&c, &dconst1_3))
03d37e4e 1596 return build_and_insert_call (gsi, loc, cbrtfn, arg0);
ae43b05e 1597
1598 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
1599 if we don't have a hardware sqrt insn. */
1600 dconst1_6 = dconst1_3;
1601 SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1602
1603 if (flag_unsafe_math_optimizations
1604 && sqrtfn
1605 && cbrtfn
ee230333 1606 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
c3206272 1607 && speed_p
ae43b05e 1608 && hw_sqrt_exists
20cb53c9 1609 && real_equal (&c, &dconst1_6))
ae43b05e 1610 {
1611 /* sqrt(x) */
03d37e4e 1612 sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
ae43b05e 1613
1614 /* cbrt(sqrt(x)) */
03d37e4e 1615 return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
ca12eb68 1616 }
1617
ca12eb68 1618
c3206272 1619 /* Attempt to expand the POW as a product of square root chains.
1620 Expand the 0.25 case even when otpimising for size. */
ca12eb68 1621 if (flag_unsafe_math_optimizations
1622 && sqrtfn
c3206272 1623 && hw_sqrt_exists
20cb53c9 1624 && (speed_p || real_equal (&c, &dconst1_4))
c3206272 1625 && !HONOR_SIGNED_ZEROS (mode))
ca12eb68 1626 {
c3206272 1627 unsigned int max_depth = speed_p
1628 ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH)
1629 : 2;
ca12eb68 1630
c3206272 1631 tree expand_with_sqrts
1632 = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
ca12eb68 1633
c3206272 1634 if (expand_with_sqrts)
1635 return expand_with_sqrts;
ca12eb68 1636 }
1637
c3206272 1638 real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1639 n = real_to_integer (&c2);
1640 real_from_integer (&cint, VOIDmode, n, SIGNED);
1641 c2_is_int = real_identical (&c2, &cint);
1642
ca12eb68 1643 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1644
1645 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
1646 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
1647
1648 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
1649 different from pow(x, 1./3.) due to rounding and behavior with
1650 negative x, we need to constrain this transformation to unsafe
1651 math and positive x or finite math. */
e913b5cd 1652 real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
ca12eb68 1653 real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
1654 real_round (&c2, mode, &c2);
1655 n = real_to_integer (&c2);
e913b5cd 1656 real_from_integer (&cint, VOIDmode, n, SIGNED);
ca12eb68 1657 real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
1658 real_convert (&c2, mode, &c2);
1659
1660 if (flag_unsafe_math_optimizations
1661 && cbrtfn
ee230333 1662 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
ca12eb68 1663 && real_identical (&c2, &c)
0190fe95 1664 && !c2_is_int
ca12eb68 1665 && optimize_function_for_speed_p (cfun)
1666 && powi_cost (n / 3) <= POWI_MAX_MULTS)
1667 {
1668 tree powi_x_ndiv3 = NULL_TREE;
1669
1670 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
1671 possible or profitable, give up. Skip the degenerate case when
1672 abs(n) < 3, where the result is always 1. */
b1757d46 1673 if (absu_hwi (n) >= 3)
ca12eb68 1674 {
1675 powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
5ebd604f 1676 abs_hwi (n / 3));
ca12eb68 1677 if (!powi_x_ndiv3)
1678 return NULL_TREE;
1679 }
1680
1681 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
1682 as that creates an unnecessary variable. Instead, just produce
1683 either cbrt(x) or cbrt(x) * cbrt(x). */
03d37e4e 1684 cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
ca12eb68 1685
b1757d46 1686 if (absu_hwi (n) % 3 == 1)
ca12eb68 1687 powi_cbrt_x = cbrt_x;
1688 else
03d37e4e 1689 powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
ca12eb68 1690 cbrt_x, cbrt_x);
1691
1692 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
b1757d46 1693 if (absu_hwi (n) < 3)
ca12eb68 1694 result = powi_cbrt_x;
1695 else
03d37e4e 1696 result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
ca12eb68 1697 powi_x_ndiv3, powi_cbrt_x);
1698
1699 /* If n is negative, reciprocate the result. */
1700 if (n < 0)
03d37e4e 1701 result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
ca12eb68 1702 build_real (type, dconst1), result);
1703
1704 return result;
ae43b05e 1705 }
1706
ca12eb68 1707 /* No optimizations succeeded. */
e78306af 1708 return NULL_TREE;
1709}
1710
a5c384c1 1711/* ARG is the argument to a cabs builtin call in GSI with location info
1712 LOC. Create a sequence of statements prior to GSI that calculates
1713 sqrt(R*R + I*I), where R and I are the real and imaginary components
1714 of ARG, respectively. Return an expression holding the result. */
1715
1716static tree
1717gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
1718{
03d37e4e 1719 tree real_part, imag_part, addend1, addend2, sum, result;
a5c384c1 1720 tree type = TREE_TYPE (TREE_TYPE (arg));
1721 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
3754d046 1722 machine_mode mode = TYPE_MODE (type);
a5c384c1 1723
1724 if (!flag_unsafe_math_optimizations
1725 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
1726 || !sqrtfn
1727 || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
1728 return NULL_TREE;
1729
03d37e4e 1730 real_part = build_and_insert_ref (gsi, loc, type, "cabs",
a5c384c1 1731 REALPART_EXPR, arg);
03d37e4e 1732 addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
a5c384c1 1733 real_part, real_part);
03d37e4e 1734 imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
a5c384c1 1735 IMAGPART_EXPR, arg);
03d37e4e 1736 addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
a5c384c1 1737 imag_part, imag_part);
03d37e4e 1738 sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
1739 result = build_and_insert_call (gsi, loc, sqrtfn, sum);
a5c384c1 1740
1741 return result;
1742}
1743
a0315874 1744/* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
e9a6c4bc 1745 on the SSA_NAME argument of each of them. Also expand powi(x,n) into
1746 an optimal number of multiplies, when n is a constant. */
a0315874 1747
65b0537f 1748namespace {
1749
1750const pass_data pass_data_cse_sincos =
1751{
1752 GIMPLE_PASS, /* type */
1753 "sincos", /* name */
1754 OPTGROUP_NONE, /* optinfo_flags */
65b0537f 1755 TV_NONE, /* tv_id */
1756 PROP_ssa, /* properties_required */
a153e7b3 1757 PROP_gimple_opt_math, /* properties_provided */
65b0537f 1758 0, /* properties_destroyed */
1759 0, /* todo_flags_start */
8b88439e 1760 TODO_update_ssa, /* todo_flags_finish */
65b0537f 1761};
1762
1763class pass_cse_sincos : public gimple_opt_pass
1764{
1765public:
1766 pass_cse_sincos (gcc::context *ctxt)
1767 : gimple_opt_pass (pass_data_cse_sincos, ctxt)
1768 {}
1769
1770 /* opt_pass methods: */
1771 virtual bool gate (function *)
1772 {
1773 /* We no longer require either sincos or cexp, since powi expansion
1774 piggybacks on this pass. */
1775 return optimize;
1776 }
1777
1778 virtual unsigned int execute (function *);
1779
1780}; // class pass_cse_sincos
1781
1782unsigned int
1783pass_cse_sincos::execute (function *fun)
a0315874 1784{
1785 basic_block bb;
4c80086d 1786 bool cfg_changed = false;
a0315874 1787
1788 calculate_dominance_info (CDI_DOMINATORS);
30c4e60d 1789 memset (&sincos_stats, 0, sizeof (sincos_stats));
a0315874 1790
65b0537f 1791 FOR_EACH_BB_FN (bb, fun)
a0315874 1792 {
75a70cf9 1793 gimple_stmt_iterator gsi;
2a155cf0 1794 bool cleanup_eh = false;
a0315874 1795
75a70cf9 1796 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
a0315874 1797 {
42acab1c 1798 gimple *stmt = gsi_stmt (gsi);
a0315874 1799
2a155cf0 1800 /* Only the last stmt in a bb could throw, no need to call
1801 gimple_purge_dead_eh_edges if we change something in the middle
1802 of a basic block. */
1803 cleanup_eh = false;
1804
fa0793ad 1805 if (is_gimple_call (stmt)
5e8b972c 1806 && gimple_call_lhs (stmt))
a0315874 1807 {
e9a6c4bc 1808 tree arg, arg0, arg1, result;
1809 HOST_WIDE_INT n;
1810 location_t loc;
a0315874 1811
fa0793ad 1812 switch (gimple_call_combined_fn (stmt))
a0315874 1813 {
fa0793ad 1814 CASE_CFN_COS:
1815 CASE_CFN_SIN:
1816 CASE_CFN_CEXPI:
d312d7df 1817 /* Make sure we have either sincos or cexp. */
30f690e0 1818 if (!targetm.libc_has_function (function_c99_math_complex)
1819 && !targetm.libc_has_function (function_sincos))
d312d7df 1820 break;
1821
75a70cf9 1822 arg = gimple_call_arg (stmt, 0);
a0315874 1823 if (TREE_CODE (arg) == SSA_NAME)
4c80086d 1824 cfg_changed |= execute_cse_sincos_1 (arg);
a0315874 1825 break;
1826
fa0793ad 1827 CASE_CFN_POW:
e78306af 1828 arg0 = gimple_call_arg (stmt, 0);
1829 arg1 = gimple_call_arg (stmt, 1);
1830
1831 loc = gimple_location (stmt);
1832 result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
1833
1834 if (result)
1835 {
1836 tree lhs = gimple_get_lhs (stmt);
1a91d914 1837 gassign *new_stmt = gimple_build_assign (lhs, result);
e78306af 1838 gimple_set_location (new_stmt, loc);
1839 unlink_stmt_vdef (stmt);
1840 gsi_replace (&gsi, new_stmt, true);
2a155cf0 1841 cleanup_eh = true;
bc8a8451 1842 if (gimple_vdef (stmt))
1843 release_ssa_name (gimple_vdef (stmt));
e78306af 1844 }
1845 break;
1846
fa0793ad 1847 CASE_CFN_POWI:
e9a6c4bc 1848 arg0 = gimple_call_arg (stmt, 0);
1849 arg1 = gimple_call_arg (stmt, 1);
e9a6c4bc 1850 loc = gimple_location (stmt);
377db285 1851
6dfe7d53 1852 if (real_minus_onep (arg0))
377db285 1853 {
1854 tree t0, t1, cond, one, minus_one;
1a91d914 1855 gassign *stmt;
377db285 1856
1857 t0 = TREE_TYPE (arg0);
1858 t1 = TREE_TYPE (arg1);
1859 one = build_real (t0, dconst1);
1860 minus_one = build_real (t0, dconstm1);
1861
1862 cond = make_temp_ssa_name (t1, NULL, "powi_cond");
e9cf809e 1863 stmt = gimple_build_assign (cond, BIT_AND_EXPR,
1864 arg1, build_int_cst (t1, 1));
377db285 1865 gimple_set_location (stmt, loc);
1866 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1867
1868 result = make_temp_ssa_name (t0, NULL, "powi");
e9cf809e 1869 stmt = gimple_build_assign (result, COND_EXPR, cond,
1870 minus_one, one);
377db285 1871 gimple_set_location (stmt, loc);
1872 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1873 }
1874 else
1875 {
e913b5cd 1876 if (!tree_fits_shwi_p (arg1))
d48be958 1877 break;
1878
e913b5cd 1879 n = tree_to_shwi (arg1);
377db285 1880 result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
1881 }
e9a6c4bc 1882
1883 if (result)
1884 {
1885 tree lhs = gimple_get_lhs (stmt);
1a91d914 1886 gassign *new_stmt = gimple_build_assign (lhs, result);
e9a6c4bc 1887 gimple_set_location (new_stmt, loc);
a5c384c1 1888 unlink_stmt_vdef (stmt);
1889 gsi_replace (&gsi, new_stmt, true);
2a155cf0 1890 cleanup_eh = true;
bc8a8451 1891 if (gimple_vdef (stmt))
1892 release_ssa_name (gimple_vdef (stmt));
a5c384c1 1893 }
1894 break;
1895
fa0793ad 1896 CASE_CFN_CABS:
a5c384c1 1897 arg0 = gimple_call_arg (stmt, 0);
1898 loc = gimple_location (stmt);
1899 result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
1900
1901 if (result)
1902 {
1903 tree lhs = gimple_get_lhs (stmt);
1a91d914 1904 gassign *new_stmt = gimple_build_assign (lhs, result);
a5c384c1 1905 gimple_set_location (new_stmt, loc);
e9a6c4bc 1906 unlink_stmt_vdef (stmt);
1907 gsi_replace (&gsi, new_stmt, true);
2a155cf0 1908 cleanup_eh = true;
bc8a8451 1909 if (gimple_vdef (stmt))
1910 release_ssa_name (gimple_vdef (stmt));
e9a6c4bc 1911 }
1912 break;
1913
a0315874 1914 default:;
1915 }
1916 }
1917 }
2a155cf0 1918 if (cleanup_eh)
1919 cfg_changed |= gimple_purge_dead_eh_edges (bb);
a0315874 1920 }
1921
65b0537f 1922 statistics_counter_event (fun, "sincos statements inserted",
30c4e60d 1923 sincos_stats.inserted);
1924
4c80086d 1925 return cfg_changed ? TODO_cleanup_cfg : 0;
a0315874 1926}
1927
cbe8bda8 1928} // anon namespace
1929
1930gimple_opt_pass *
1931make_pass_cse_sincos (gcc::context *ctxt)
1932{
1933 return new pass_cse_sincos (ctxt);
1934}
1935
ee0c17ad 1936/* A symbolic number structure is used to detect byte permutation and selection
1937 patterns of a source. To achieve that, its field N contains an artificial
1938 number consisting of BITS_PER_MARKER sized markers tracking where does each
1939 byte come from in the source:
84cc784c 1940
ee0c17ad 1941 0 - target byte has the value 0
1942 FF - target byte has an unknown value (eg. due to sign extension)
1943 1..size - marker value is the byte index in the source (0 for lsb).
470d5bb5 1944
1945 To detect permutations on memory sources (arrays and structures), a symbolic
ee0c17ad 1946 number is also associated:
1947 - a base address BASE_ADDR and an OFFSET giving the address of the source;
1948 - a range which gives the difference between the highest and lowest accessed
1949 memory location to make such a symbolic number;
1950 - the address SRC of the source element of lowest address as a convenience
e20b054d 1951 to easily get BASE_ADDR + offset + lowest bytepos;
1952 - number of expressions N_OPS bitwise ored together to represent
1953 approximate cost of the computation.
ee0c17ad 1954
1955 Note 1: the range is different from size as size reflects the size of the
1956 type of the current expression. For instance, for an array char a[],
1957 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
1958 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
1959 time a range of 1.
1960
1961 Note 2: for non-memory sources, range holds the same value as size.
1962
1963 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
84cc784c 1964
1965struct symbolic_number {
3a4303e7 1966 uint64_t n;
7101c0cf 1967 tree type;
470d5bb5 1968 tree base_addr;
1969 tree offset;
1970 HOST_WIDE_INT bytepos;
a9eff296 1971 tree src;
470d5bb5 1972 tree alias_set;
1973 tree vuse;
1974 unsigned HOST_WIDE_INT range;
e20b054d 1975 int n_ops;
84cc784c 1976};
1977
cc76206c 1978#define BITS_PER_MARKER 8
14cbfffb 1979#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
1980#define MARKER_BYTE_UNKNOWN MARKER_MASK
1981#define HEAD_MARKER(n, size) \
1982 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
cc76206c 1983
470d5bb5 1984/* The number which the find_bswap_or_nop_1 result should match in
1985 order to have a nop. The number is masked according to the size of
1986 the symbolic number before using it. */
3a4303e7 1987#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
1988 (uint64_t)0x08070605 << 32 | 0x04030201)
470d5bb5 1989
1990/* The number which the find_bswap_or_nop_1 result should match in
1991 order to have a byte swap. The number is masked according to the
1992 size of the symbolic number before using it. */
3a4303e7 1993#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
1994 (uint64_t)0x01020304 << 32 | 0x05060708)
470d5bb5 1995
84cc784c 1996/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
1997 number N. Return false if the requested operation is not permitted
1998 on a symbolic number. */
1999
2000static inline bool
2001do_shift_rotate (enum tree_code code,
2002 struct symbolic_number *n,
2003 int count)
2004{
14cbfffb 2005 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2006 unsigned head_marker;
7101c0cf 2007
cc76206c 2008 if (count % BITS_PER_UNIT != 0)
84cc784c 2009 return false;
cc76206c 2010 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
84cc784c 2011
2012 /* Zero out the extra bits of N in order to avoid them being shifted
2013 into the significant bits. */
cc76206c 2014 if (size < 64 / BITS_PER_MARKER)
2015 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
84cc784c 2016
2017 switch (code)
2018 {
2019 case LSHIFT_EXPR:
2020 n->n <<= count;
2021 break;
2022 case RSHIFT_EXPR:
14cbfffb 2023 head_marker = HEAD_MARKER (n->n, size);
84cc784c 2024 n->n >>= count;
14cbfffb 2025 /* Arithmetic shift of signed type: result is dependent on the value. */
2026 if (!TYPE_UNSIGNED (n->type) && head_marker)
2027 for (i = 0; i < count / BITS_PER_MARKER; i++)
2028 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
2029 << ((size - 1 - i) * BITS_PER_MARKER);
84cc784c 2030 break;
2031 case LROTATE_EXPR:
cc76206c 2032 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
84cc784c 2033 break;
2034 case RROTATE_EXPR:
cc76206c 2035 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
84cc784c 2036 break;
2037 default:
2038 return false;
2039 }
0f09ed00 2040 /* Zero unused bits for size. */
cc76206c 2041 if (size < 64 / BITS_PER_MARKER)
2042 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
84cc784c 2043 return true;
2044}
2045
2046/* Perform sanity checking for the symbolic number N and the gimple
2047 statement STMT. */
2048
2049static inline bool
42acab1c 2050verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
84cc784c 2051{
2052 tree lhs_type;
2053
2054 lhs_type = gimple_expr_type (stmt);
2055
2056 if (TREE_CODE (lhs_type) != INTEGER_TYPE)
2057 return false;
2058
7101c0cf 2059 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
84cc784c 2060 return false;
2061
2062 return true;
2063}
2064
ee054e83 2065/* Initialize the symbolic number N for the bswap pass from the base element
2066 SRC manipulated by the bitwise OR expression. */
2067
2068static bool
2069init_symbolic_number (struct symbolic_number *n, tree src)
2070{
7101c0cf 2071 int size;
2072
23edc6d1 2073 if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
2074 return false;
2075
ee054e83 2076 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
a9eff296 2077 n->src = src;
ee054e83 2078
2079 /* Set up the symbolic number N by setting each byte to a value between 1 and
2080 the byte size of rhs1. The highest order byte is set to n->size and the
2081 lowest order byte to 1. */
7101c0cf 2082 n->type = TREE_TYPE (src);
2083 size = TYPE_PRECISION (n->type);
2084 if (size % BITS_PER_UNIT != 0)
ee054e83 2085 return false;
7101c0cf 2086 size /= BITS_PER_UNIT;
cc76206c 2087 if (size > 64 / BITS_PER_MARKER)
c0dd3a90 2088 return false;
7101c0cf 2089 n->range = size;
ee054e83 2090 n->n = CMPNOP;
e20b054d 2091 n->n_ops = 1;
ee054e83 2092
cc76206c 2093 if (size < 64 / BITS_PER_MARKER)
2094 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
ee054e83 2095
2096 return true;
2097}
2098
470d5bb5 2099/* Check if STMT might be a byte swap or a nop from a memory source and returns
2100 the answer. If so, REF is that memory source and the base of the memory area
2101 accessed and the offset of the access from that base are recorded in N. */
2102
2103bool
42acab1c 2104find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
470d5bb5 2105{
2106 /* Leaf node is an array or component ref. Memorize its base and
2107 offset from base to compare to other such leaf node. */
2108 HOST_WIDE_INT bitsize, bitpos;
3754d046 2109 machine_mode mode;
292237f3 2110 int unsignedp, reversep, volatilep;
ee054e83 2111 tree offset, base_addr;
470d5bb5 2112
1e6c1ce6 2113 /* Not prepared to handle PDP endian. */
2114 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
2115 return false;
2116
470d5bb5 2117 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
2118 return false;
2119
ee054e83 2120 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
b3b6e4b5 2121 &unsignedp, &reversep, &volatilep);
470d5bb5 2122
ee054e83 2123 if (TREE_CODE (base_addr) == MEM_REF)
470d5bb5 2124 {
2125 offset_int bit_offset = 0;
ee054e83 2126 tree off = TREE_OPERAND (base_addr, 1);
470d5bb5 2127
2128 if (!integer_zerop (off))
2129 {
ee054e83 2130 offset_int boff, coff = mem_ref_offset (base_addr);
9fdc1ed4 2131 boff = coff << LOG2_BITS_PER_UNIT;
470d5bb5 2132 bit_offset += boff;
2133 }
2134
ee054e83 2135 base_addr = TREE_OPERAND (base_addr, 0);
470d5bb5 2136
2137 /* Avoid returning a negative bitpos as this may wreak havoc later. */
2138 if (wi::neg_p (bit_offset))
2139 {
2140 offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false);
2141 offset_int tem = bit_offset.and_not (mask);
2142 /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
2143 Subtract it to BIT_OFFSET and add it (scaled) to OFFSET. */
2144 bit_offset -= tem;
9fdc1ed4 2145 tem >>= LOG2_BITS_PER_UNIT;
ee054e83 2146 if (offset)
2147 offset = size_binop (PLUS_EXPR, offset,
470d5bb5 2148 wide_int_to_tree (sizetype, tem));
2149 else
ee054e83 2150 offset = wide_int_to_tree (sizetype, tem);
470d5bb5 2151 }
2152
2153 bitpos += bit_offset.to_shwi ();
2154 }
2155
2156 if (bitpos % BITS_PER_UNIT)
2157 return false;
2158 if (bitsize % BITS_PER_UNIT)
2159 return false;
292237f3 2160 if (reversep)
2161 return false;
470d5bb5 2162
935abe57 2163 if (!init_symbolic_number (n, ref))
2164 return false;
ee054e83 2165 n->base_addr = base_addr;
2166 n->offset = offset;
470d5bb5 2167 n->bytepos = bitpos / BITS_PER_UNIT;
2168 n->alias_set = reference_alias_ptr_type (ref);
2169 n->vuse = gimple_vuse (stmt);
2170 return true;
2171}
2172
f7a40894 2173/* Compute the symbolic number N representing the result of a bitwise OR on 2
2174 symbolic number N1 and N2 whose source statements are respectively
2175 SOURCE_STMT1 and SOURCE_STMT2. */
2176
42acab1c 2177static gimple *
2178perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
2179 gimple *source_stmt2, struct symbolic_number *n2,
f7a40894 2180 struct symbolic_number *n)
2181{
2182 int i, size;
2183 uint64_t mask;
42acab1c 2184 gimple *source_stmt;
f7a40894 2185 struct symbolic_number *n_start;
2186
42333dd9 2187 tree rhs1 = gimple_assign_rhs1 (source_stmt1);
123c61e0 2188 if (TREE_CODE (rhs1) == BIT_FIELD_REF
2189 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
42333dd9 2190 rhs1 = TREE_OPERAND (rhs1, 0);
2191 tree rhs2 = gimple_assign_rhs1 (source_stmt2);
123c61e0 2192 if (TREE_CODE (rhs2) == BIT_FIELD_REF
2193 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
42333dd9 2194 rhs2 = TREE_OPERAND (rhs2, 0);
2195
f7a40894 2196 /* Sources are different, cancel bswap if they are not memory location with
2197 the same base (array, structure, ...). */
42333dd9 2198 if (rhs1 != rhs2)
f7a40894 2199 {
ddc9c989 2200 uint64_t inc;
f7a40894 2201 HOST_WIDE_INT start_sub, end_sub, end1, end2, end;
2202 struct symbolic_number *toinc_n_ptr, *n_end;
a9eff296 2203 basic_block bb1, bb2;
f7a40894 2204
2205 if (!n1->base_addr || !n2->base_addr
2206 || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
2207 return NULL;
2208
1e6c1ce6 2209 if (!n1->offset != !n2->offset
2210 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
f7a40894 2211 return NULL;
2212
2213 if (n1->bytepos < n2->bytepos)
2214 {
2215 n_start = n1;
2216 start_sub = n2->bytepos - n1->bytepos;
f7a40894 2217 }
2218 else
2219 {
2220 n_start = n2;
2221 start_sub = n1->bytepos - n2->bytepos;
f7a40894 2222 }
2223
a9eff296 2224 bb1 = gimple_bb (source_stmt1);
2225 bb2 = gimple_bb (source_stmt2);
2226 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
2227 source_stmt = source_stmt1;
2228 else
2229 source_stmt = source_stmt2;
2230
f7a40894 2231 /* Find the highest address at which a load is performed and
2232 compute related info. */
2233 end1 = n1->bytepos + (n1->range - 1);
2234 end2 = n2->bytepos + (n2->range - 1);
2235 if (end1 < end2)
2236 {
2237 end = end2;
2238 end_sub = end2 - end1;
2239 }
2240 else
2241 {
2242 end = end1;
2243 end_sub = end1 - end2;
2244 }
2245 n_end = (end2 > end1) ? n2 : n1;
2246
2247 /* Find symbolic number whose lsb is the most significant. */
2248 if (BYTES_BIG_ENDIAN)
2249 toinc_n_ptr = (n_end == n1) ? n2 : n1;
2250 else
2251 toinc_n_ptr = (n_start == n1) ? n2 : n1;
2252
2253 n->range = end - n_start->bytepos + 1;
2254
2255 /* Check that the range of memory covered can be represented by
2256 a symbolic number. */
2257 if (n->range > 64 / BITS_PER_MARKER)
2258 return NULL;
2259
2260 /* Reinterpret byte marks in symbolic number holding the value of
2261 bigger weight according to target endianness. */
2262 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
2263 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
2264 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
2265 {
1e6c1ce6 2266 unsigned marker
2267 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
f7a40894 2268 if (marker && marker != MARKER_BYTE_UNKNOWN)
2269 toinc_n_ptr->n += inc;
2270 }
2271 }
2272 else
2273 {
2274 n->range = n1->range;
2275 n_start = n1;
2276 source_stmt = source_stmt1;
2277 }
2278
2279 if (!n1->alias_set
2280 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
2281 n->alias_set = n1->alias_set;
2282 else
2283 n->alias_set = ptr_type_node;
2284 n->vuse = n_start->vuse;
2285 n->base_addr = n_start->base_addr;
2286 n->offset = n_start->offset;
a9eff296 2287 n->src = n_start->src;
f7a40894 2288 n->bytepos = n_start->bytepos;
2289 n->type = n_start->type;
2290 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2291
2292 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
2293 {
2294 uint64_t masked1, masked2;
2295
2296 masked1 = n1->n & mask;
2297 masked2 = n2->n & mask;
2298 if (masked1 && masked2 && masked1 != masked2)
2299 return NULL;
2300 }
2301 n->n = n1->n | n2->n;
e20b054d 2302 n->n_ops = n1->n_ops + n2->n_ops;
f7a40894 2303
2304 return source_stmt;
2305}
2306
470d5bb5 2307/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
2308 the operation given by the rhs of STMT on the result. If the operation
29314253 2309 could successfully be executed the function returns a gimple stmt whose
2310 rhs's first tree is the expression of the source operand and NULL
2311 otherwise. */
84cc784c 2312
42acab1c 2313static gimple *
2314find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
84cc784c 2315{
2316 enum tree_code code;
2317 tree rhs1, rhs2 = NULL;
42acab1c 2318 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
84cc784c 2319 enum gimple_rhs_class rhs_class;
2320
2321 if (!limit || !is_gimple_assign (stmt))
29314253 2322 return NULL;
84cc784c 2323
2324 rhs1 = gimple_assign_rhs1 (stmt);
2325
470d5bb5 2326 if (find_bswap_or_nop_load (stmt, rhs1, n))
29314253 2327 return stmt;
470d5bb5 2328
42333dd9 2329 /* Handle BIT_FIELD_REF. */
2330 if (TREE_CODE (rhs1) == BIT_FIELD_REF
2331 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
2332 {
2333 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
2334 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
2335 if (bitpos % BITS_PER_UNIT == 0
2336 && bitsize % BITS_PER_UNIT == 0
2337 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
2338 {
e5c957b4 2339 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
2340 if (BYTES_BIG_ENDIAN)
2341 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
2342
42333dd9 2343 /* Shift. */
2344 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
2345 return NULL;
2346
2347 /* Mask. */
2348 uint64_t mask = 0;
2349 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
2350 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
2351 i++, tmp <<= BITS_PER_UNIT)
2352 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
2353 n->n &= mask;
2354
2355 /* Convert. */
2356 n->type = TREE_TYPE (rhs1);
2357 if (!n->base_addr)
2358 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2359
2360 return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
2361 }
2362
2363 return NULL;
2364 }
2365
84cc784c 2366 if (TREE_CODE (rhs1) != SSA_NAME)
29314253 2367 return NULL;
84cc784c 2368
2369 code = gimple_assign_rhs_code (stmt);
2370 rhs_class = gimple_assign_rhs_class (stmt);
2371 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2372
2373 if (rhs_class == GIMPLE_BINARY_RHS)
2374 rhs2 = gimple_assign_rhs2 (stmt);
2375
2376 /* Handle unary rhs and binary rhs with integer constants as second
2377 operand. */
2378
2379 if (rhs_class == GIMPLE_UNARY_RHS
2380 || (rhs_class == GIMPLE_BINARY_RHS
2381 && TREE_CODE (rhs2) == INTEGER_CST))
2382 {
2383 if (code != BIT_AND_EXPR
2384 && code != LSHIFT_EXPR
2385 && code != RSHIFT_EXPR
2386 && code != LROTATE_EXPR
2387 && code != RROTATE_EXPR
d09ef31a 2388 && !CONVERT_EXPR_CODE_P (code))
29314253 2389 return NULL;
84cc784c 2390
29314253 2391 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
84cc784c 2392
470d5bb5 2393 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
2394 we have to initialize the symbolic number. */
29314253 2395 if (!source_stmt1)
84cc784c 2396 {
ee054e83 2397 if (gimple_assign_load_p (stmt)
2398 || !init_symbolic_number (n, rhs1))
29314253 2399 return NULL;
2400 source_stmt1 = stmt;
84cc784c 2401 }
2402
2403 switch (code)
2404 {
2405 case BIT_AND_EXPR:
2406 {
7101c0cf 2407 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
cc76206c 2408 uint64_t val = int_cst_value (rhs2), mask = 0;
2409 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
84cc784c 2410
2411 /* Only constants masking full bytes are allowed. */
cc76206c 2412 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
2413 if ((val & tmp) != 0 && (val & tmp) != tmp)
29314253 2414 return NULL;
cc76206c 2415 else if (val & tmp)
14cbfffb 2416 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
84cc784c 2417
cc76206c 2418 n->n &= mask;
84cc784c 2419 }
2420 break;
2421 case LSHIFT_EXPR:
2422 case RSHIFT_EXPR:
2423 case LROTATE_EXPR:
2424 case RROTATE_EXPR:
1e6c1ce6 2425 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
29314253 2426 return NULL;
84cc784c 2427 break;
2428 CASE_CONVERT:
2429 {
14cbfffb 2430 int i, type_size, old_type_size;
7101c0cf 2431 tree type;
84cc784c 2432
7101c0cf 2433 type = gimple_expr_type (stmt);
2434 type_size = TYPE_PRECISION (type);
84cc784c 2435 if (type_size % BITS_PER_UNIT != 0)
29314253 2436 return NULL;
cc76206c 2437 type_size /= BITS_PER_UNIT;
2438 if (type_size > 64 / BITS_PER_MARKER)
29314253 2439 return NULL;
84cc784c 2440
7101c0cf 2441 /* Sign extension: result is dependent on the value. */
cc76206c 2442 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
14cbfffb 2443 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
2444 && HEAD_MARKER (n->n, old_type_size))
2445 for (i = 0; i < type_size - old_type_size; i++)
a7e676a4 2446 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
409678bf 2447 << ((type_size - 1 - i) * BITS_PER_MARKER);
7101c0cf 2448
cc76206c 2449 if (type_size < 64 / BITS_PER_MARKER)
84cc784c 2450 {
2451 /* If STMT casts to a smaller type mask out the bits not
2452 belonging to the target type. */
cc76206c 2453 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
84cc784c 2454 }
7101c0cf 2455 n->type = type;
470d5bb5 2456 if (!n->base_addr)
cc76206c 2457 n->range = type_size;
84cc784c 2458 }
2459 break;
2460 default:
29314253 2461 return NULL;
84cc784c 2462 };
29314253 2463 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
84cc784c 2464 }
2465
2466 /* Handle binary rhs. */
2467
2468 if (rhs_class == GIMPLE_BINARY_RHS)
2469 {
2470 struct symbolic_number n1, n2;
42acab1c 2471 gimple *source_stmt, *source_stmt2;
84cc784c 2472
2473 if (code != BIT_IOR_EXPR)
29314253 2474 return NULL;
84cc784c 2475
2476 if (TREE_CODE (rhs2) != SSA_NAME)
29314253 2477 return NULL;
84cc784c 2478
2479 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2480
2481 switch (code)
2482 {
2483 case BIT_IOR_EXPR:
29314253 2484 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
84cc784c 2485
29314253 2486 if (!source_stmt1)
2487 return NULL;
84cc784c 2488
29314253 2489 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
470d5bb5 2490
29314253 2491 if (!source_stmt2)
2492 return NULL;
ee054e83 2493
7101c0cf 2494 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
29314253 2495 return NULL;
84cc784c 2496
1e6c1ce6 2497 if (!n1.vuse != !n2.vuse
2498 || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
29314253 2499 return NULL;
84cc784c 2500
1e6c1ce6 2501 source_stmt
2502 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
470d5bb5 2503
f7a40894 2504 if (!source_stmt)
2505 return NULL;
84cc784c 2506
2507 if (!verify_symbolic_number_p (n, stmt))
29314253 2508 return NULL;
84cc784c 2509
2510 break;
2511 default:
29314253 2512 return NULL;
84cc784c 2513 }
f7a40894 2514 return source_stmt;
84cc784c 2515 }
29314253 2516 return NULL;
84cc784c 2517}
2518
470d5bb5 2519/* Check if STMT completes a bswap implementation or a read in a given
2520 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
2521 accordingly. It also sets N to represent the kind of operations
2522 performed: size of the resulting expression and whether it works on
2523 a memory source, and if so alias-set and vuse. At last, the
29314253 2524 function returns a stmt whose rhs's first tree is the source
2525 expression. */
84cc784c 2526
42acab1c 2527static gimple *
2528find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
84cc784c 2529{
77dfd3f2 2530 unsigned rsize;
2531 uint64_t tmpn, mask;
2532/* The number which the find_bswap_or_nop_1 result should match in order
2533 to have a full byte swap. The number is shifted to the right
2534 according to the size of the symbolic number before using it. */
3a4303e7 2535 uint64_t cmpxchg = CMPXCHG;
2536 uint64_t cmpnop = CMPNOP;
470d5bb5 2537
a9eff296 2538 gimple *ins_stmt;
0f09ed00 2539 int limit;
84cc784c 2540
9bc1852a 2541 /* The last parameter determines the depth search limit. It usually
470d5bb5 2542 correlates directly to the number n of bytes to be touched. We
2543 increase that number by log2(n) + 1 here in order to also
2544 cover signed -> unsigned conversions of the src operand as can be seen
0f09ed00 2545 in libgcc, and for initial shift/and operation of the src operand. */
f9ae6f95 2546 limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
0f09ed00 2547 limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
a9eff296 2548 ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
84cc784c 2549
a9eff296 2550 if (!ins_stmt)
29314253 2551 return NULL;
84cc784c 2552
1e6c1ce6 2553 /* Find real size of result (highest non-zero byte). */
470d5bb5 2554 if (n->base_addr)
77dfd3f2 2555 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
2556 else
2557 rsize = n->range;
50e0cd76 2558
77dfd3f2 2559 /* Zero out the bits corresponding to untouched bytes in original gimple
2560 expression. */
cc76206c 2561 if (n->range < (int) sizeof (int64_t))
470d5bb5 2562 {
cc76206c 2563 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
2564 cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
470d5bb5 2565 cmpnop &= mask;
2566 }
2567
77dfd3f2 2568 /* Zero out the bits corresponding to unused bytes in the result of the
2569 gimple expression. */
2570 if (rsize < n->range)
2571 {
2572 if (BYTES_BIG_ENDIAN)
2573 {
2574 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
2575 cmpxchg &= mask;
2576 cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
2577 }
2578 else
2579 {
2580 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
2581 cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
2582 cmpnop &= mask;
2583 }
2584 n->range = rsize;
2585 }
2586
470d5bb5 2587 /* A complete byte swap should make the symbolic number to start with
2588 the largest digit in the highest order byte. Unchanged symbolic
89d0fbd4 2589 number indicates a read with same endianness as target architecture. */
470d5bb5 2590 if (n->n == cmpnop)
2591 *bswap = false;
2592 else if (n->n == cmpxchg)
2593 *bswap = true;
2594 else
29314253 2595 return NULL;
470d5bb5 2596
2597 /* Useless bit manipulation performed by code. */
e20b054d 2598 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
29314253 2599 return NULL;
84cc784c 2600
470d5bb5 2601 n->range *= BITS_PER_UNIT;
a9eff296 2602 return ins_stmt;
84cc784c 2603}
2604
65b0537f 2605namespace {
2606
2607const pass_data pass_data_optimize_bswap =
2608{
2609 GIMPLE_PASS, /* type */
2610 "bswap", /* name */
2611 OPTGROUP_NONE, /* optinfo_flags */
65b0537f 2612 TV_NONE, /* tv_id */
2613 PROP_ssa, /* properties_required */
2614 0, /* properties_provided */
2615 0, /* properties_destroyed */
2616 0, /* todo_flags_start */
2617 0, /* todo_flags_finish */
2618};
2619
2620class pass_optimize_bswap : public gimple_opt_pass
2621{
2622public:
2623 pass_optimize_bswap (gcc::context *ctxt)
2624 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
2625 {}
2626
2627 /* opt_pass methods: */
2628 virtual bool gate (function *)
2629 {
2630 return flag_expensive_optimizations && optimize;
2631 }
2632
2633 virtual unsigned int execute (function *);
2634
2635}; // class pass_optimize_bswap
2636
4b2d4ce4 2637/* Perform the bswap optimization: replace the expression computed in the rhs
2638 of CUR_STMT by an equivalent bswap, load or load + bswap expression.
2639 Which of these alternatives replace the rhs is given by N->base_addr (non
2640 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
2641 load to perform are also given in N while the builtin bswap invoke is given
2642 in FNDEL. Finally, if a load is involved, SRC_STMT refers to one of the
2643 load statements involved to construct the rhs in CUR_STMT and N->range gives
2644 the size of the rhs expression for maintaining some statistics.
2645
2646 Note that if the replacement involve a load, CUR_STMT is moved just after
2647 SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
2648 changing of basic block. */
470d5bb5 2649
2650static bool
a9eff296 2651bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
42acab1c 2652 tree bswap_type, tree load_type, struct symbolic_number *n,
2653 bool bswap)
470d5bb5 2654{
4b2d4ce4 2655 gimple_stmt_iterator gsi;
29314253 2656 tree src, tmp, tgt;
42acab1c 2657 gimple *bswap_stmt;
470d5bb5 2658
4b2d4ce4 2659 gsi = gsi_for_stmt (cur_stmt);
a9eff296 2660 src = n->src;
29314253 2661 tgt = gimple_assign_lhs (cur_stmt);
470d5bb5 2662
2663 /* Need to load the value from memory first. */
2664 if (n->base_addr)
2665 {
a9eff296 2666 gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt);
470d5bb5 2667 tree addr_expr, addr_tmp, val_expr, val_tmp;
2668 tree load_offset_ptr, aligned_load_type;
42acab1c 2669 gimple *addr_stmt, *load_stmt;
470d5bb5 2670 unsigned align;
1e6c1ce6 2671 HOST_WIDE_INT load_offset = 0;
a9eff296 2672 basic_block ins_bb, cur_bb;
2673
2674 ins_bb = gimple_bb (ins_stmt);
2675 cur_bb = gimple_bb (cur_stmt);
2676 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
2677 return false;
470d5bb5 2678
2679 align = get_object_alignment (src);
1e6c1ce6 2680
4b2d4ce4 2681 /* Move cur_stmt just before one of the load of the original
2682 to ensure it has the same VUSE. See PR61517 for what could
2683 go wrong. */
a9eff296 2684 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
32f2925a 2685 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
29314253 2686 gsi_move_before (&gsi, &gsi_ins);
2687 gsi = gsi_for_stmt (cur_stmt);
2688
1e6c1ce6 2689 /* Compute address to load from and cast according to the size
2690 of the load. */
470d5bb5 2691 addr_expr = build_fold_addr_expr (unshare_expr (src));
1e6c1ce6 2692 if (is_gimple_mem_ref_addr (addr_expr))
470d5bb5 2693 addr_tmp = addr_expr;
2694 else
2695 {
2696 addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
2697 "load_src");
2698 addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
29314253 2699 gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
470d5bb5 2700 }
2701
2702 /* Perform the load. */
2703 aligned_load_type = load_type;
2704 if (align < TYPE_ALIGN (load_type))
2705 aligned_load_type = build_aligned_type (load_type, align);
1e6c1ce6 2706 load_offset_ptr = build_int_cst (n->alias_set, load_offset);
470d5bb5 2707 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
2708 load_offset_ptr);
2709
2710 if (!bswap)
2711 {
2712 if (n->range == 16)
2713 nop_stats.found_16bit++;
2714 else if (n->range == 32)
2715 nop_stats.found_32bit++;
2716 else
2717 {
2718 gcc_assert (n->range == 64);
2719 nop_stats.found_64bit++;
2720 }
2721
2722 /* Convert the result of load if necessary. */
2723 if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
2724 {
2725 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
2726 "load_dst");
2727 load_stmt = gimple_build_assign (val_tmp, val_expr);
2728 gimple_set_vuse (load_stmt, n->vuse);
29314253 2729 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
806413d2 2730 gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
470d5bb5 2731 }
2732 else
29314253 2733 {
806413d2 2734 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
29314253 2735 gimple_set_vuse (cur_stmt, n->vuse);
2736 }
2737 update_stmt (cur_stmt);
470d5bb5 2738
2739 if (dump_file)
2740 {
2741 fprintf (dump_file,
89d0fbd4 2742 "%d bit load in target endianness found at: ",
1e6c1ce6 2743 (int) n->range);
1ffa4346 2744 print_gimple_stmt (dump_file, cur_stmt, 0);
470d5bb5 2745 }
2746 return true;
2747 }
2748 else
2749 {
2750 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
2751 load_stmt = gimple_build_assign (val_tmp, val_expr);
2752 gimple_set_vuse (load_stmt, n->vuse);
29314253 2753 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
470d5bb5 2754 }
2755 src = val_tmp;
2756 }
e20b054d 2757 else if (!bswap)
2758 {
2759 gimple *g;
2760 if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
2761 {
2762 if (!is_gimple_val (src))
2763 return false;
2764 g = gimple_build_assign (tgt, NOP_EXPR, src);
2765 }
2766 else
2767 g = gimple_build_assign (tgt, src);
2768 if (n->range == 16)
2769 nop_stats.found_16bit++;
2770 else if (n->range == 32)
2771 nop_stats.found_32bit++;
2772 else
2773 {
2774 gcc_assert (n->range == 64);
2775 nop_stats.found_64bit++;
2776 }
2777 if (dump_file)
2778 {
2779 fprintf (dump_file,
2780 "%d bit reshuffle in target endianness found at: ",
2781 (int) n->range);
2782 print_gimple_stmt (dump_file, cur_stmt, 0);
2783 }
2784 gsi_replace (&gsi, g, true);
2785 return true;
2786 }
42333dd9 2787 else if (TREE_CODE (src) == BIT_FIELD_REF)
2788 src = TREE_OPERAND (src, 0);
470d5bb5 2789
2790 if (n->range == 16)
2791 bswap_stats.found_16bit++;
2792 else if (n->range == 32)
2793 bswap_stats.found_32bit++;
2794 else
2795 {
2796 gcc_assert (n->range == 64);
2797 bswap_stats.found_64bit++;
2798 }
2799
2800 tmp = src;
2801
904c73a0 2802 /* Convert the src expression if necessary. */
2803 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
2804 {
42acab1c 2805 gimple *convert_stmt;
904c73a0 2806
2807 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
2808 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
2809 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
2810 }
2811
4b2d4ce4 2812 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
2813 are considered as rotation of 2N bit values by N bits is generally not
904c73a0 2814 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
2815 gives 0x03040102 while a bswap for that value is 0x04030201. */
0b6c968a 2816 if (bswap && n->range == 16)
470d5bb5 2817 {
0b6c968a 2818 tree count = build_int_cst (NULL, BITS_PER_UNIT);
904c73a0 2819 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
0b6c968a 2820 bswap_stmt = gimple_build_assign (NULL, src);
470d5bb5 2821 }
0b6c968a 2822 else
904c73a0 2823 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
470d5bb5 2824
2825 tmp = tgt;
2826
2827 /* Convert the result if necessary. */
2828 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
2829 {
42acab1c 2830 gimple *convert_stmt;
904c73a0 2831
470d5bb5 2832 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
e9cf809e 2833 convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
29314253 2834 gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
470d5bb5 2835 }
2836
0b6c968a 2837 gimple_set_lhs (bswap_stmt, tmp);
470d5bb5 2838
2839 if (dump_file)
2840 {
2841 fprintf (dump_file, "%d bit bswap implementation found at: ",
1e6c1ce6 2842 (int) n->range);
1ffa4346 2843 print_gimple_stmt (dump_file, cur_stmt, 0);
470d5bb5 2844 }
2845
0b6c968a 2846 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
29314253 2847 gsi_remove (&gsi, true);
470d5bb5 2848 return true;
2849}
2850
2851/* Find manual byte swap implementations as well as load in a given
2852 endianness. Byte swaps are turned into a bswap builtin invokation
2853 while endian loads are converted to bswap builtin invokation or
89d0fbd4 2854 simple load according to the target endianness. */
470d5bb5 2855
65b0537f 2856unsigned int
2857pass_optimize_bswap::execute (function *fun)
84cc784c 2858{
2859 basic_block bb;
bbb9b536 2860 bool bswap32_p, bswap64_p;
84cc784c 2861 bool changed = false;
bbb9b536 2862 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
84cc784c 2863
2864 if (BITS_PER_UNIT != 8)
2865 return 0;
2866
b9a16870 2867 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
d6bf3b14 2868 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
b9a16870 2869 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
d6bf3b14 2870 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
3328b1fb 2871 || (bswap32_p && word_mode == SImode)));
84cc784c 2872
0af25806 2873 /* Determine the argument type of the builtins. The code later on
2874 assumes that the return and argument type are the same. */
2875 if (bswap32_p)
2876 {
b9a16870 2877 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
0af25806 2878 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2879 }
2880
2881 if (bswap64_p)
2882 {
b9a16870 2883 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
0af25806 2884 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2885 }
2886
470d5bb5 2887 memset (&nop_stats, 0, sizeof (nop_stats));
30c4e60d 2888 memset (&bswap_stats, 0, sizeof (bswap_stats));
a9eff296 2889 calculate_dominance_info (CDI_DOMINATORS);
30c4e60d 2890
65b0537f 2891 FOR_EACH_BB_FN (bb, fun)
84cc784c 2892 {
2893 gimple_stmt_iterator gsi;
2894
0ec31268 2895 /* We do a reverse scan for bswap patterns to make sure we get the
4b2d4ce4 2896 widest match. As bswap pattern matching doesn't handle previously
2897 inserted smaller bswap replacements as sub-patterns, the wider
2898 variant wouldn't be detected. */
2899 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
84cc784c 2900 {
a9eff296 2901 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
29314253 2902 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
0b6c968a 2903 enum tree_code code;
470d5bb5 2904 struct symbolic_number n;
2905 bool bswap;
84cc784c 2906
4b2d4ce4 2907 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
2908 might be moved to a different basic block by bswap_replace and gsi
2909 must not points to it if that's the case. Moving the gsi_prev
2910 there make sure that gsi points to the statement previous to
2911 cur_stmt while still making sure that all statements are
2912 considered in this basic block. */
2913 gsi_prev (&gsi);
2914
0b6c968a 2915 if (!is_gimple_assign (cur_stmt))
84cc784c 2916 continue;
2917
0b6c968a 2918 code = gimple_assign_rhs_code (cur_stmt);
2919 switch (code)
2920 {
2921 case LROTATE_EXPR:
2922 case RROTATE_EXPR:
2923 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
2924 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
2925 % BITS_PER_UNIT)
2926 continue;
2927 /* Fall through. */
2928 case BIT_IOR_EXPR:
2929 break;
2930 default:
2931 continue;
2932 }
2933
a9eff296 2934 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
470d5bb5 2935
a9eff296 2936 if (!ins_stmt)
470d5bb5 2937 continue;
84cc784c 2938
470d5bb5 2939 switch (n.range)
84cc784c 2940 {
f811051b 2941 case 16:
4b2d4ce4 2942 /* Already in canonical form, nothing to do. */
2943 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2944 continue;
904c73a0 2945 load_type = bswap_type = uint16_type_node;
f811051b 2946 break;
84cc784c 2947 case 32:
470d5bb5 2948 load_type = uint32_type_node;
84cc784c 2949 if (bswap32_p)
0af25806 2950 {
b9a16870 2951 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
0af25806 2952 bswap_type = bswap32_type;
2953 }
84cc784c 2954 break;
2955 case 64:
470d5bb5 2956 load_type = uint64_type_node;
84cc784c 2957 if (bswap64_p)
0af25806 2958 {
b9a16870 2959 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
0af25806 2960 bswap_type = bswap64_type;
2961 }
84cc784c 2962 break;
2963 default:
2964 continue;
2965 }
2966
bbb9b536 2967 if (bswap && !fndecl && n.range != 16)
84cc784c 2968 continue;
2969
a9eff296 2970 if (bswap_replace (cur_stmt, ins_stmt, fndecl, bswap_type, load_type,
4b2d4ce4 2971 &n, bswap))
470d5bb5 2972 changed = true;
84cc784c 2973 }
2974 }
2975
470d5bb5 2976 statistics_counter_event (fun, "16-bit nop implementations found",
2977 nop_stats.found_16bit);
2978 statistics_counter_event (fun, "32-bit nop implementations found",
2979 nop_stats.found_32bit);
2980 statistics_counter_event (fun, "64-bit nop implementations found",
2981 nop_stats.found_64bit);
65b0537f 2982 statistics_counter_event (fun, "16-bit bswap implementations found",
f811051b 2983 bswap_stats.found_16bit);
65b0537f 2984 statistics_counter_event (fun, "32-bit bswap implementations found",
30c4e60d 2985 bswap_stats.found_32bit);
65b0537f 2986 statistics_counter_event (fun, "64-bit bswap implementations found",
30c4e60d 2987 bswap_stats.found_64bit);
2988
8b88439e 2989 return (changed ? TODO_update_ssa : 0);
84cc784c 2990}
2991
cbe8bda8 2992} // anon namespace
2993
2994gimple_opt_pass *
2995make_pass_optimize_bswap (gcc::context *ctxt)
2996{
2997 return new pass_optimize_bswap (ctxt);
2998}
2999
71dbd910 3000/* Return true if stmt is a type conversion operation that can be stripped
3001 when used in a widening multiply operation. */
3002static bool
42acab1c 3003widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
71dbd910 3004{
3005 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
3006
3007 if (TREE_CODE (result_type) == INTEGER_TYPE)
3008 {
3009 tree op_type;
3010 tree inner_op_type;
3011
3012 if (!CONVERT_EXPR_CODE_P (rhs_code))
3013 return false;
3014
3015 op_type = TREE_TYPE (gimple_assign_lhs (stmt));
3016
3017 /* If the type of OP has the same precision as the result, then
3018 we can strip this conversion. The multiply operation will be
3019 selected to create the correct extension as a by-product. */
3020 if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
3021 return true;
3022
3023 /* We can also strip a conversion if it preserves the signed-ness of
3024 the operation and doesn't narrow the range. */
3025 inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
3026
8f9d1531 3027 /* If the inner-most type is unsigned, then we can strip any
3028 intermediate widening operation. If it's signed, then the
3029 intermediate widening operation must also be signed. */
3030 if ((TYPE_UNSIGNED (inner_op_type)
3031 || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
71dbd910 3032 && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
3033 return true;
3034
3035 return false;
3036 }
3037
3038 return rhs_code == FIXED_CONVERT_EXPR;
3039}
3040
0989f516 3041/* Return true if RHS is a suitable operand for a widening multiplication,
3042 assuming a target type of TYPE.
7e4c867e 3043 There are two cases:
3044
aff5fb4d 3045 - RHS makes some value at least twice as wide. Store that value
3046 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
7e4c867e 3047
3048 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
3049 but leave *TYPE_OUT untouched. */
00f4f705 3050
3051static bool
0989f516 3052is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
3053 tree *new_rhs_out)
7e4c867e 3054{
42acab1c 3055 gimple *stmt;
0989f516 3056 tree type1, rhs1;
7e4c867e 3057
3058 if (TREE_CODE (rhs) == SSA_NAME)
3059 {
7e4c867e 3060 stmt = SSA_NAME_DEF_STMT (rhs);
0989f516 3061 if (is_gimple_assign (stmt))
3062 {
71dbd910 3063 if (! widening_mult_conversion_strippable_p (type, stmt))
0989f516 3064 rhs1 = rhs;
3065 else
ffebd9c5 3066 {
3067 rhs1 = gimple_assign_rhs1 (stmt);
3068
3069 if (TREE_CODE (rhs1) == INTEGER_CST)
3070 {
3071 *new_rhs_out = rhs1;
3072 *type_out = NULL;
3073 return true;
3074 }
3075 }
0989f516 3076 }
3077 else
3078 rhs1 = rhs;
7e4c867e 3079
7e4c867e 3080 type1 = TREE_TYPE (rhs1);
0989f516 3081
7e4c867e 3082 if (TREE_CODE (type1) != TREE_CODE (type)
aff5fb4d 3083 || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
7e4c867e 3084 return false;
3085
3086 *new_rhs_out = rhs1;
3087 *type_out = type1;
3088 return true;
3089 }
3090
3091 if (TREE_CODE (rhs) == INTEGER_CST)
3092 {
3093 *new_rhs_out = rhs;
3094 *type_out = NULL;
3095 return true;
3096 }
3097
3098 return false;
3099}
3100
0989f516 3101/* Return true if STMT performs a widening multiplication, assuming the
3102 output type is TYPE. If so, store the unwidened types of the operands
3103 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
3104 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
3105 and *TYPE2_OUT would give the operands of the multiplication. */
7e4c867e 3106
3107static bool
42acab1c 3108is_widening_mult_p (gimple *stmt,
7e4c867e 3109 tree *type1_out, tree *rhs1_out,
3110 tree *type2_out, tree *rhs2_out)
00f4f705 3111{
4333b41f 3112 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
3113
7e4c867e 3114 if (TREE_CODE (type) != INTEGER_TYPE
3115 && TREE_CODE (type) != FIXED_POINT_TYPE)
3116 return false;
00f4f705 3117
0989f516 3118 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
3119 rhs1_out))
00f4f705 3120 return false;
3121
0989f516 3122 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
3123 rhs2_out))
7e4c867e 3124 return false;
00f4f705 3125
7e4c867e 3126 if (*type1_out == NULL)
00f4f705 3127 {
7e4c867e 3128 if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
00f4f705 3129 return false;
7e4c867e 3130 *type1_out = *type2_out;
00f4f705 3131 }
00f4f705 3132
7e4c867e 3133 if (*type2_out == NULL)
00f4f705 3134 {
7e4c867e 3135 if (!int_fits_type_p (*rhs2_out, *type1_out))
00f4f705 3136 return false;
7e4c867e 3137 *type2_out = *type1_out;
00f4f705 3138 }
00f4f705 3139
287c271c 3140 /* Ensure that the larger of the two operands comes first. */
3141 if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
3142 {
dfcf26a5 3143 std::swap (*type1_out, *type2_out);
3144 std::swap (*rhs1_out, *rhs2_out);
287c271c 3145 }
aff5fb4d 3146
7e4c867e 3147 return true;
3148}
00f4f705 3149
eb728046 3150/* Check to see if the CALL statement is an invocation of copysign
3151 with 1. being the first argument. */
3152static bool
3153is_copysign_call_with_1 (gimple *call)
3154{
3155 gcall *c = dyn_cast <gcall *> (call);
3156 if (! c)
3157 return false;
3158
3159 enum combined_fn code = gimple_call_combined_fn (c);
3160
3161 if (code == CFN_LAST)
3162 return false;
3163
3164 if (builtin_fn_p (code))
3165 {
3166 switch (as_builtin_fn (code))
3167 {
3168 CASE_FLT_FN (BUILT_IN_COPYSIGN):
3169 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
3170 return real_onep (gimple_call_arg (c, 0));
3171 default:
3172 return false;
3173 }
3174 }
3175
3176 if (internal_fn_p (code))
3177 {
3178 switch (as_internal_fn (code))
3179 {
3180 case IFN_COPYSIGN:
3181 return real_onep (gimple_call_arg (c, 0));
3182 default:
3183 return false;
3184 }
3185 }
3186
3187 return false;
3188}
3189
3190/* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
3191 This only happens when the the xorsign optab is defined, if the
3192 pattern is not a xorsign pattern or if expansion fails FALSE is
3193 returned, otherwise TRUE is returned. */
3194static bool
3195convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
3196{
3197 tree treeop0, treeop1, lhs, type;
3198 location_t loc = gimple_location (stmt);
3199 lhs = gimple_assign_lhs (stmt);
3200 treeop0 = gimple_assign_rhs1 (stmt);
3201 treeop1 = gimple_assign_rhs2 (stmt);
3202 type = TREE_TYPE (lhs);
3203 machine_mode mode = TYPE_MODE (type);
3204
3aa2a10c 3205 if (HONOR_SNANS (type))
eb728046 3206 return false;
3207
3208 if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
3209 {
3210 gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
3aa2a10c 3211 if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
eb728046 3212 {
3213 call0 = SSA_NAME_DEF_STMT (treeop1);
3aa2a10c 3214 if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
eb728046 3215 return false;
3216
3217 treeop1 = treeop0;
3218 }
eb728046 3219 if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
3220 return false;
3221
3222 gcall *c = as_a<gcall*> (call0);
3223 treeop0 = gimple_call_arg (c, 1);
3224
3225 gcall *call_stmt
3226 = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
3227 gimple_set_lhs (call_stmt, lhs);
3228 gimple_set_location (call_stmt, loc);
3229 gsi_replace (gsi, call_stmt, true);
3230 return true;
3231 }
3232
3233 return false;
3234}
3235
7e4c867e 3236/* Process a single gimple statement STMT, which has a MULT_EXPR as
3237 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
3238 value is true iff we converted the statement. */
3239
3240static bool
42acab1c 3241convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
7e4c867e 3242{
03d37e4e 3243 tree lhs, rhs1, rhs2, type, type1, type2;
7e4c867e 3244 enum insn_code handler;
3754d046 3245 machine_mode to_mode, from_mode, actual_mode;
5a574e8b 3246 optab op;
aff5fb4d 3247 int actual_precision;
3248 location_t loc = gimple_location (stmt);
3f2ab719 3249 bool from_unsigned1, from_unsigned2;
7e4c867e 3250
3251 lhs = gimple_assign_lhs (stmt);
3252 type = TREE_TYPE (lhs);
3253 if (TREE_CODE (type) != INTEGER_TYPE)
00f4f705 3254 return false;
3255
4333b41f 3256 if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
00f4f705 3257 return false;
3258
5a574e8b 3259 to_mode = TYPE_MODE (type);
3260 from_mode = TYPE_MODE (type1);
3f2ab719 3261 from_unsigned1 = TYPE_UNSIGNED (type1);
3262 from_unsigned2 = TYPE_UNSIGNED (type2);
5a574e8b 3263
3f2ab719 3264 if (from_unsigned1 && from_unsigned2)
5a574e8b 3265 op = umul_widen_optab;
3f2ab719 3266 else if (!from_unsigned1 && !from_unsigned2)
5a574e8b 3267 op = smul_widen_optab;
00f4f705 3268 else
5a574e8b 3269 op = usmul_widen_optab;
3270
aff5fb4d 3271 handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
3272 0, &actual_mode);
7e4c867e 3273
3274 if (handler == CODE_FOR_nothing)
3f2ab719 3275 {
3276 if (op != smul_widen_optab)
3277 {
22ffd684 3278 /* We can use a signed multiply with unsigned types as long as
3279 there is a wider mode to use, or it is the smaller of the two
3280 types that is unsigned. Note that type1 >= type2, always. */
3281 if ((TYPE_UNSIGNED (type1)
3282 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3283 || (TYPE_UNSIGNED (type2)
3284 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3285 {
28ebc73c 3286 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
3287 || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
22ffd684 3288 return false;
3289 }
3f2ab719 3290
3291 op = smul_widen_optab;
3292 handler = find_widening_optab_handler_and_mode (op, to_mode,
3293 from_mode, 0,
3294 &actual_mode);
3295
3296 if (handler == CODE_FOR_nothing)
3297 return false;
3298
3299 from_unsigned1 = from_unsigned2 = false;
3300 }
3301 else
3302 return false;
3303 }
7e4c867e 3304
aff5fb4d 3305 /* Ensure that the inputs to the handler are in the correct precison
3306 for the opcode. This will be the full mode size. */
3307 actual_precision = GET_MODE_PRECISION (actual_mode);
b36be69d 3308 if (2 * actual_precision > TYPE_PRECISION (type))
3309 return false;
3f2ab719 3310 if (actual_precision != TYPE_PRECISION (type1)
3311 || from_unsigned1 != TYPE_UNSIGNED (type1))
03d37e4e 3312 rhs1 = build_and_insert_cast (gsi, loc,
3313 build_nonstandard_integer_type
3314 (actual_precision, from_unsigned1), rhs1);
3f2ab719 3315 if (actual_precision != TYPE_PRECISION (type2)
3316 || from_unsigned2 != TYPE_UNSIGNED (type2))
03d37e4e 3317 rhs2 = build_and_insert_cast (gsi, loc,
3318 build_nonstandard_integer_type
3319 (actual_precision, from_unsigned2), rhs2);
aff5fb4d 3320
ffebd9c5 3321 /* Handle constants. */
3322 if (TREE_CODE (rhs1) == INTEGER_CST)
3323 rhs1 = fold_convert (type1, rhs1);
3324 if (TREE_CODE (rhs2) == INTEGER_CST)
3325 rhs2 = fold_convert (type2, rhs2);
3326
aff5fb4d 3327 gimple_assign_set_rhs1 (stmt, rhs1);
3328 gimple_assign_set_rhs2 (stmt, rhs2);
00f4f705 3329 gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
3330 update_stmt (stmt);
30c4e60d 3331 widen_mul_stats.widen_mults_inserted++;
00f4f705 3332 return true;
3333}
3334
3335/* Process a single gimple statement STMT, which is found at the
3336 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
3337 rhs (given by CODE), and try to convert it into a
3338 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
3339 is true iff we converted the statement. */
3340
3341static bool
42acab1c 3342convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
00f4f705 3343 enum tree_code code)
3344{
42acab1c 3345 gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
3346 gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
03d37e4e 3347 tree type, type1, type2, optype;
00f4f705 3348 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
3349 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
3350 optab this_optab;
3351 enum tree_code wmult_code;
aff5fb4d 3352 enum insn_code handler;
3754d046 3353 machine_mode to_mode, from_mode, actual_mode;
aff5fb4d 3354 location_t loc = gimple_location (stmt);
3355 int actual_precision;
3f2ab719 3356 bool from_unsigned1, from_unsigned2;
00f4f705 3357
3358 lhs = gimple_assign_lhs (stmt);
3359 type = TREE_TYPE (lhs);
7e4c867e 3360 if (TREE_CODE (type) != INTEGER_TYPE
3361 && TREE_CODE (type) != FIXED_POINT_TYPE)
00f4f705 3362 return false;
3363
3364 if (code == MINUS_EXPR)
3365 wmult_code = WIDEN_MULT_MINUS_EXPR;
3366 else
3367 wmult_code = WIDEN_MULT_PLUS_EXPR;
3368
00f4f705 3369 rhs1 = gimple_assign_rhs1 (stmt);
3370 rhs2 = gimple_assign_rhs2 (stmt);
3371
3372 if (TREE_CODE (rhs1) == SSA_NAME)
3373 {
3374 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
3375 if (is_gimple_assign (rhs1_stmt))
3376 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
3377 }
00f4f705 3378
3379 if (TREE_CODE (rhs2) == SSA_NAME)
3380 {
3381 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
3382 if (is_gimple_assign (rhs2_stmt))
3383 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
3384 }
00f4f705 3385
07ea3e5c 3386 /* Allow for one conversion statement between the multiply
3387 and addition/subtraction statement. If there are more than
3388 one conversions then we assume they would invalidate this
3389 transformation. If that's not the case then they should have
3390 been folded before now. */
3391 if (CONVERT_EXPR_CODE_P (rhs1_code))
3392 {
3393 conv1_stmt = rhs1_stmt;
3394 rhs1 = gimple_assign_rhs1 (rhs1_stmt);
3395 if (TREE_CODE (rhs1) == SSA_NAME)
3396 {
3397 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
3398 if (is_gimple_assign (rhs1_stmt))
3399 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
3400 }
3401 else
3402 return false;
3403 }
3404 if (CONVERT_EXPR_CODE_P (rhs2_code))
3405 {
3406 conv2_stmt = rhs2_stmt;
3407 rhs2 = gimple_assign_rhs1 (rhs2_stmt);
3408 if (TREE_CODE (rhs2) == SSA_NAME)
3409 {
3410 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
3411 if (is_gimple_assign (rhs2_stmt))
3412 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
3413 }
3414 else
3415 return false;
3416 }
3417
aff5fb4d 3418 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
3419 is_widening_mult_p, but we still need the rhs returns.
3420
3421 It might also appear that it would be sufficient to use the existing
3422 operands of the widening multiply, but that would limit the choice of
e0df5be0 3423 multiply-and-accumulate instructions.
3424
3425 If the widened-multiplication result has more than one uses, it is
3426 probably wiser not to do the conversion. */
aff5fb4d 3427 if (code == PLUS_EXPR
3428 && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
00f4f705 3429 {
e0df5be0 3430 if (!has_single_use (rhs1)
3431 || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
3432 &type2, &mult_rhs2))
00f4f705 3433 return false;
7e4c867e 3434 add_rhs = rhs2;
07ea3e5c 3435 conv_stmt = conv1_stmt;
00f4f705 3436 }
aff5fb4d 3437 else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
00f4f705 3438 {
e0df5be0 3439 if (!has_single_use (rhs2)
3440 || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
3441 &type2, &mult_rhs2))
00f4f705 3442 return false;
7e4c867e 3443 add_rhs = rhs1;
07ea3e5c 3444 conv_stmt = conv2_stmt;
00f4f705 3445 }
00f4f705 3446 else
3447 return false;
3448
aff5fb4d 3449 to_mode = TYPE_MODE (type);
3450 from_mode = TYPE_MODE (type1);
3f2ab719 3451 from_unsigned1 = TYPE_UNSIGNED (type1);
3452 from_unsigned2 = TYPE_UNSIGNED (type2);
4ccf368d 3453 optype = type1;
aff5fb4d 3454
3f2ab719 3455 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
3456 if (from_unsigned1 != from_unsigned2)
3457 {
4ccf368d 3458 if (!INTEGRAL_TYPE_P (type))
3459 return false;
22ffd684 3460 /* We can use a signed multiply with unsigned types as long as
3461 there is a wider mode to use, or it is the smaller of the two
3462 types that is unsigned. Note that type1 >= type2, always. */
3463 if ((from_unsigned1
3464 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3465 || (from_unsigned2
3466 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3f2ab719 3467 {
28ebc73c 3468 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
3469 || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
22ffd684 3470 return false;
3f2ab719 3471 }
22ffd684 3472
3473 from_unsigned1 = from_unsigned2 = false;
4ccf368d 3474 optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
3475 false);
3f2ab719 3476 }
815a0224 3477
07ea3e5c 3478 /* If there was a conversion between the multiply and addition
3479 then we need to make sure it fits a multiply-and-accumulate.
3480 The should be a single mode change which does not change the
3481 value. */
3482 if (conv_stmt)
3483 {
3f2ab719 3484 /* We use the original, unmodified data types for this. */
07ea3e5c 3485 tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
3486 tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
3487 int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
3488 bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
3489
3490 if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3491 {
3492 /* Conversion is a truncate. */
3493 if (TYPE_PRECISION (to_type) < data_size)
3494 return false;
3495 }
3496 else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3497 {
3498 /* Conversion is an extend. Check it's the right sort. */
3499 if (TYPE_UNSIGNED (from_type) != is_unsigned
3500 && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3501 return false;
3502 }
3503 /* else convert is a no-op for our purposes. */
3504 }
3505
815a0224 3506 /* Verify that the machine can perform a widening multiply
3507 accumulate in this mode/signedness combination, otherwise
3508 this transformation is likely to pessimize code. */
3f2ab719 3509 this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
aff5fb4d 3510 handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
3511 from_mode, 0, &actual_mode);
3512
3513 if (handler == CODE_FOR_nothing)
815a0224 3514 return false;
3515
aff5fb4d 3516 /* Ensure that the inputs to the handler are in the correct precison
3517 for the opcode. This will be the full mode size. */
3518 actual_precision = GET_MODE_PRECISION (actual_mode);
3f2ab719 3519 if (actual_precision != TYPE_PRECISION (type1)
3520 || from_unsigned1 != TYPE_UNSIGNED (type1))
03d37e4e 3521 mult_rhs1 = build_and_insert_cast (gsi, loc,
3522 build_nonstandard_integer_type
3523 (actual_precision, from_unsigned1),
3524 mult_rhs1);
3f2ab719 3525 if (actual_precision != TYPE_PRECISION (type2)
3526 || from_unsigned2 != TYPE_UNSIGNED (type2))
03d37e4e 3527 mult_rhs2 = build_and_insert_cast (gsi, loc,
3528 build_nonstandard_integer_type
3529 (actual_precision, from_unsigned2),
3530 mult_rhs2);
00f4f705 3531
12421545 3532 if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
03d37e4e 3533 add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
12421545 3534
ffebd9c5 3535 /* Handle constants. */
3536 if (TREE_CODE (mult_rhs1) == INTEGER_CST)
d5a3bb10 3537 mult_rhs1 = fold_convert (type1, mult_rhs1);
ffebd9c5 3538 if (TREE_CODE (mult_rhs2) == INTEGER_CST)
d5a3bb10 3539 mult_rhs2 = fold_convert (type2, mult_rhs2);
ffebd9c5 3540
806413d2 3541 gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3542 add_rhs);
00f4f705 3543 update_stmt (gsi_stmt (*gsi));
30c4e60d 3544 widen_mul_stats.maccs_inserted++;
00f4f705 3545 return true;
3546}
3547
15dbdc8f 3548/* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3549 with uses in additions and subtractions to form fused multiply-add
3550 operations. Returns true if successful and MUL_STMT should be removed. */
b9be572e 3551
3552static bool
42acab1c 3553convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2)
b9be572e 3554{
15dbdc8f 3555 tree mul_result = gimple_get_lhs (mul_stmt);
b9be572e 3556 tree type = TREE_TYPE (mul_result);
42acab1c 3557 gimple *use_stmt, *neguse_stmt;
1a91d914 3558 gassign *fma_stmt;
b9be572e 3559 use_operand_p use_p;
3560 imm_use_iterator imm_iter;
3561
3562 if (FLOAT_TYPE_P (type)
3563 && flag_fp_contract_mode == FP_CONTRACT_OFF)
3564 return false;
3565
3566 /* We don't want to do bitfield reduction ops. */
3567 if (INTEGRAL_TYPE_P (type)
654ba22c 3568 && !type_has_mode_precision_p (type))
b9be572e 3569 return false;
3570
3571 /* If the target doesn't support it, don't generate it. We assume that
3572 if fma isn't available then fms, fnma or fnms are not either. */
3573 if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
3574 return false;
3575
5ed3d3b8 3576 /* If the multiplication has zero uses, it is kept around probably because
3577 of -fnon-call-exceptions. Don't optimize it away in that case,
3578 it is DCE job. */
3579 if (has_zero_uses (mul_result))
3580 return false;
3581
b9be572e 3582 /* Make sure that the multiplication statement becomes dead after
3583 the transformation, thus that all uses are transformed to FMAs.
3584 This means we assume that an FMA operation has the same cost
3585 as an addition. */
3586 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3587 {
3588 enum tree_code use_code;
44579526 3589 tree result = mul_result;
3590 bool negate_p = false;
b9be572e 3591
3592 use_stmt = USE_STMT (use_p);
3593
17a2c727 3594 if (is_gimple_debug (use_stmt))
3595 continue;
3596
b9be572e 3597 /* For now restrict this operations to single basic blocks. In theory
3598 we would want to support sinking the multiplication in
3599 m = a*b;
3600 if ()
3601 ma = m + c;
3602 else
3603 d = m;
3604 to form a fma in the then block and sink the multiplication to the
3605 else block. */
3606 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3607 return false;
3608
44579526 3609 if (!is_gimple_assign (use_stmt))
b9be572e 3610 return false;
3611
44579526 3612 use_code = gimple_assign_rhs_code (use_stmt);
3613
3614 /* A negate on the multiplication leads to FNMA. */
3615 if (use_code == NEGATE_EXPR)
3616 {
805ad414 3617 ssa_op_iter iter;
5715c09b 3618 use_operand_p usep;
805ad414 3619
44579526 3620 result = gimple_assign_lhs (use_stmt);
3621
3622 /* Make sure the negate statement becomes dead with this
3623 single transformation. */
3624 if (!single_imm_use (gimple_assign_lhs (use_stmt),
3625 &use_p, &neguse_stmt))
3626 return false;
3627
805ad414 3628 /* Make sure the multiplication isn't also used on that stmt. */
5715c09b 3629 FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3630 if (USE_FROM_PTR (usep) == mul_result)
805ad414 3631 return false;
3632
44579526 3633 /* Re-validate. */
3634 use_stmt = neguse_stmt;
3635 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3636 return false;
3637 if (!is_gimple_assign (use_stmt))
3638 return false;
3639
3640 use_code = gimple_assign_rhs_code (use_stmt);
3641 negate_p = true;
3642 }
b9be572e 3643
44579526 3644 switch (use_code)
3645 {
3646 case MINUS_EXPR:
8a9d0572 3647 if (gimple_assign_rhs2 (use_stmt) == result)
3648 negate_p = !negate_p;
3649 break;
44579526 3650 case PLUS_EXPR:
44579526 3651 break;
44579526 3652 default:
3653 /* FMA can only be formed from PLUS and MINUS. */
3654 return false;
3655 }
b9be572e 3656
b095bd6a 3657 /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
3658 by a MULT_EXPR that we'll visit later, we might be able to
3659 get a more profitable match with fnma.
3660 OTOH, if we don't, a negate / fma pair has likely lower latency
3661 that a mult / subtract pair. */
3662 if (use_code == MINUS_EXPR && !negate_p
3663 && gimple_assign_rhs1 (use_stmt) == result
3664 && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
3665 && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
3666 {
3667 tree rhs2 = gimple_assign_rhs2 (use_stmt);
b095bd6a 3668
058e9571 3669 if (TREE_CODE (rhs2) == SSA_NAME)
3670 {
42acab1c 3671 gimple *stmt2 = SSA_NAME_DEF_STMT (rhs2);
058e9571 3672 if (has_single_use (rhs2)
3673 && is_gimple_assign (stmt2)
3674 && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3675 return false;
3676 }
b095bd6a 3677 }
3678
44579526 3679 /* We can't handle a * b + a * b. */
3680 if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
3681 return false;
8a9d0572 3682
3683 /* While it is possible to validate whether or not the exact form
3684 that we've recognized is available in the backend, the assumption
3685 is that the transformation is never a loss. For instance, suppose
3686 the target only has the plain FMA pattern available. Consider
3687 a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
3688 is still two operations. Consider -(a*b)-c -> fma(-a,b,-c): we
3689 still have 3 operations, but in the FMA form the two NEGs are
9d75589a 3690 independent and could be run in parallel. */
b9be572e 3691 }
3692
3693 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3694 {
b9be572e 3695 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
17a2c727 3696 enum tree_code use_code;
15dbdc8f 3697 tree addop, mulop1 = op1, result = mul_result;
44579526 3698 bool negate_p = false;
b9be572e 3699
17a2c727 3700 if (is_gimple_debug (use_stmt))
3701 continue;
3702
3703 use_code = gimple_assign_rhs_code (use_stmt);
44579526 3704 if (use_code == NEGATE_EXPR)
3705 {
3706 result = gimple_assign_lhs (use_stmt);
3707 single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3708 gsi_remove (&gsi, true);
3709 release_defs (use_stmt);
3710
3711 use_stmt = neguse_stmt;
3712 gsi = gsi_for_stmt (use_stmt);
3713 use_code = gimple_assign_rhs_code (use_stmt);
3714 negate_p = true;
3715 }
3716
3717 if (gimple_assign_rhs1 (use_stmt) == result)
b9be572e 3718 {
3719 addop = gimple_assign_rhs2 (use_stmt);
3720 /* a * b - c -> a * b + (-c) */
3721 if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
3722 addop = force_gimple_operand_gsi (&gsi,
3723 build1 (NEGATE_EXPR,
3724 type, addop),
3725 true, NULL_TREE, true,
3726 GSI_SAME_STMT);
3727 }
3728 else
3729 {
3730 addop = gimple_assign_rhs1 (use_stmt);
3731 /* a - b * c -> (-b) * c + a */
3732 if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
44579526 3733 negate_p = !negate_p;
b9be572e 3734 }
3735
44579526 3736 if (negate_p)
3737 mulop1 = force_gimple_operand_gsi (&gsi,
3738 build1 (NEGATE_EXPR,
3739 type, mulop1),
3740 true, NULL_TREE, true,
3741 GSI_SAME_STMT);
3742
e9cf809e 3743 fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt),
3744 FMA_EXPR, mulop1, op2, addop);
b9be572e 3745 gsi_replace (&gsi, fma_stmt, true);
30c4e60d 3746 widen_mul_stats.fmas_inserted++;
b9be572e 3747 }
3748
3749 return true;
3750}
3751
e11a63e8 3752
3753/* Helper function of match_uaddsub_overflow. Return 1
3754 if USE_STMT is unsigned overflow check ovf != 0 for
3755 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3756 and 0 otherwise. */
3757
3758static int
3759uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt)
3760{
3761 enum tree_code ccode = ERROR_MARK;
3762 tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3763 if (gimple_code (use_stmt) == GIMPLE_COND)
3764 {
3765 ccode = gimple_cond_code (use_stmt);
3766 crhs1 = gimple_cond_lhs (use_stmt);
3767 crhs2 = gimple_cond_rhs (use_stmt);
3768 }
3769 else if (is_gimple_assign (use_stmt))
3770 {
3771 if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3772 {
3773 ccode = gimple_assign_rhs_code (use_stmt);
3774 crhs1 = gimple_assign_rhs1 (use_stmt);
3775 crhs2 = gimple_assign_rhs2 (use_stmt);
3776 }
3777 else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
3778 {
3779 tree cond = gimple_assign_rhs1 (use_stmt);
3780 if (COMPARISON_CLASS_P (cond))
3781 {
3782 ccode = TREE_CODE (cond);
3783 crhs1 = TREE_OPERAND (cond, 0);
3784 crhs2 = TREE_OPERAND (cond, 1);
3785 }
3786 else
3787 return 0;
3788 }
3789 else
3790 return 0;
3791 }
3792 else
3793 return 0;
3794
3795 if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3796 return 0;
3797
3798 enum tree_code code = gimple_assign_rhs_code (stmt);
3799 tree lhs = gimple_assign_lhs (stmt);
3800 tree rhs1 = gimple_assign_rhs1 (stmt);
3801 tree rhs2 = gimple_assign_rhs2 (stmt);
3802
3803 switch (ccode)
3804 {
3805 case GT_EXPR:
3806 case LE_EXPR:
3807 /* r = a - b; r > a or r <= a
3808 r = a + b; a > r or a <= r or b > r or b <= r. */
3809 if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3810 || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3811 && crhs2 == lhs))
3812 return ccode == GT_EXPR ? 1 : -1;
3813 break;
3814 case LT_EXPR:
3815 case GE_EXPR:
3816 /* r = a - b; a < r or a >= r
3817 r = a + b; r < a or r >= a or r < b or r >= b. */
3818 if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3819 || (code == PLUS_EXPR && crhs1 == lhs
3820 && (crhs2 == rhs1 || crhs2 == rhs2)))
3821 return ccode == LT_EXPR ? 1 : -1;
3822 break;
3823 default:
3824 break;
3825 }
3826 return 0;
3827}
3828
3829/* Recognize for unsigned x
3830 x = y - z;
3831 if (x > y)
3832 where there are other uses of x and replace it with
3833 _7 = SUB_OVERFLOW (y, z);
3834 x = REALPART_EXPR <_7>;
3835 _8 = IMAGPART_EXPR <_7>;
3836 if (_8)
3837 and similarly for addition. */
3838
3839static bool
3840match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3841 enum tree_code code)
3842{
3843 tree lhs = gimple_assign_lhs (stmt);
3844 tree type = TREE_TYPE (lhs);
3845 use_operand_p use_p;
3846 imm_use_iterator iter;
3847 bool use_seen = false;
3848 bool ovf_use_seen = false;
3849 gimple *use_stmt;
3850
3851 gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
3852 if (!INTEGRAL_TYPE_P (type)
3853 || !TYPE_UNSIGNED (type)
3854 || has_zero_uses (lhs)
3855 || has_single_use (lhs)
3856 || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
3857 TYPE_MODE (type)) == CODE_FOR_nothing)
3858 return false;
3859
3860 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3861 {
3862 use_stmt = USE_STMT (use_p);
3863 if (is_gimple_debug (use_stmt))
3864 continue;
3865
3866 if (uaddsub_overflow_check_p (stmt, use_stmt))
3867 ovf_use_seen = true;
3868 else
3869 use_seen = true;
3870 if (ovf_use_seen && use_seen)
3871 break;
3872 }
3873
3874 if (!ovf_use_seen || !use_seen)
3875 return false;
3876
3877 tree ctype = build_complex_type (type);
3878 tree rhs1 = gimple_assign_rhs1 (stmt);
3879 tree rhs2 = gimple_assign_rhs2 (stmt);
3880 gcall *g = gimple_build_call_internal (code == PLUS_EXPR
3881 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
3882 2, rhs1, rhs2);
3883 tree ctmp = make_ssa_name (ctype);
3884 gimple_call_set_lhs (g, ctmp);
3885 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3886 gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR,
3887 build1 (REALPART_EXPR, type, ctmp));
3888 gsi_replace (gsi, g2, true);
3889 tree ovf = make_ssa_name (type);
3890 g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
3891 build1 (IMAGPART_EXPR, type, ctmp));
3892 gsi_insert_after (gsi, g2, GSI_NEW_STMT);
3893
3894 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3895 {
3896 if (is_gimple_debug (use_stmt))
3897 continue;
3898
3899 int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt);
3900 if (ovf_use == 0)
3901 continue;
3902 if (gimple_code (use_stmt) == GIMPLE_COND)
3903 {
3904 gcond *cond_stmt = as_a <gcond *> (use_stmt);
3905 gimple_cond_set_lhs (cond_stmt, ovf);
3906 gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
3907 gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3908 }
3909 else
3910 {
3911 gcc_checking_assert (is_gimple_assign (use_stmt));
3912 if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3913 {
3914 gimple_assign_set_rhs1 (use_stmt, ovf);
3915 gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
3916 gimple_assign_set_rhs_code (use_stmt,
3917 ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3918 }
3919 else
3920 {
3921 gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
3922 == COND_EXPR);
3923 tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
3924 boolean_type_node, ovf,
3925 build_int_cst (type, 0));
3926 gimple_assign_set_rhs1 (use_stmt, cond);
3927 }
3928 }
3929 update_stmt (use_stmt);
3930 }
3931 return true;
3932}
3933
67f7b566 3934/* Return true if target has support for divmod. */
3935
3936static bool
3937target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
3938{
3939 /* If target supports hardware divmod insn, use it for divmod. */
3940 if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
3941 return true;
3942
3943 /* Check if libfunc for divmod is available. */
3944 rtx libfunc = optab_libfunc (divmod_optab, mode);
3945 if (libfunc != NULL_RTX)
3946 {
3947 /* If optab_handler exists for div_optab, perhaps in a wider mode,
3948 we don't want to use the libfunc even if it exists for given mode. */
19a4dce4 3949 machine_mode div_mode;
3950 FOR_EACH_MODE_FROM (div_mode, mode)
67f7b566 3951 if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
3952 return false;
3953
3954 return targetm.expand_divmod_libfunc != NULL;
3955 }
3956
3957 return false;
3958}
3959
3960/* Check if stmt is candidate for divmod transform. */
3961
3962static bool
3963divmod_candidate_p (gassign *stmt)
3964{
3965 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
582adad1 3966 machine_mode mode = TYPE_MODE (type);
67f7b566 3967 optab divmod_optab, div_optab;
3968
3969 if (TYPE_UNSIGNED (type))
3970 {
3971 divmod_optab = udivmod_optab;
3972 div_optab = udiv_optab;
3973 }
3974 else
3975 {
3976 divmod_optab = sdivmod_optab;
3977 div_optab = sdiv_optab;
3978 }
3979
3980 tree op1 = gimple_assign_rhs1 (stmt);
3981 tree op2 = gimple_assign_rhs2 (stmt);
3982
3983 /* Disable the transform if either is a constant, since division-by-constant
3984 may have specialized expansion. */
3985 if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
3986 return false;
3987
3988 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
3989 expand using the [su]divv optabs. */
3990 if (TYPE_OVERFLOW_TRAPS (type))
3991 return false;
3992
3993 if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
3994 return false;
3995
3996 return true;
3997}
3998
3999/* This function looks for:
4000 t1 = a TRUNC_DIV_EXPR b;
4001 t2 = a TRUNC_MOD_EXPR b;
4002 and transforms it to the following sequence:
4003 complex_tmp = DIVMOD (a, b);
4004 t1 = REALPART_EXPR(a);
4005 t2 = IMAGPART_EXPR(b);
4006 For conditions enabling the transform see divmod_candidate_p().
4007
4008 The pass has three parts:
4009 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4010 other trunc_div_expr and trunc_mod_expr stmts.
4011 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4012 to stmts vector.
4013 3) Insert DIVMOD call just before top_stmt and update entries in
4014 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4015 IMAGPART_EXPR for mod). */
4016
4017static bool
4018convert_to_divmod (gassign *stmt)
4019{
4020 if (stmt_can_throw_internal (stmt)
4021 || !divmod_candidate_p (stmt))
4022 return false;
4023
4024 tree op1 = gimple_assign_rhs1 (stmt);
4025 tree op2 = gimple_assign_rhs2 (stmt);
4026
4027 imm_use_iterator use_iter;
4028 gimple *use_stmt;
4029 auto_vec<gimple *> stmts;
4030
4031 gimple *top_stmt = stmt;
4032 basic_block top_bb = gimple_bb (stmt);
4033
4034 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4035 at-least stmt and possibly other trunc_div/trunc_mod stmts
4036 having same operands as stmt. */
4037
4038 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
4039 {
4040 if (is_gimple_assign (use_stmt)
4041 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4042 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4043 && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
4044 && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
4045 {
4046 if (stmt_can_throw_internal (use_stmt))
4047 continue;
4048
4049 basic_block bb = gimple_bb (use_stmt);
4050
4051 if (bb == top_bb)
4052 {
4053 if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
4054 top_stmt = use_stmt;
4055 }
4056 else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
4057 {
4058 top_bb = bb;
4059 top_stmt = use_stmt;
4060 }
4061 }
4062 }
4063
4064 tree top_op1 = gimple_assign_rhs1 (top_stmt);
4065 tree top_op2 = gimple_assign_rhs2 (top_stmt);
4066
4067 stmts.safe_push (top_stmt);
4068 bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
4069
4070 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4071 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4072 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4073 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
4074
4075 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
4076 {
4077 if (is_gimple_assign (use_stmt)
4078 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4079 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4080 && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
4081 && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
4082 {
4083 if (use_stmt == top_stmt
4084 || stmt_can_throw_internal (use_stmt)
4085 || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
4086 continue;
4087
4088 stmts.safe_push (use_stmt);
4089 if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
4090 div_seen = true;
4091 }
4092 }
4093
4094 if (!div_seen)
4095 return false;
4096
4097 /* Part 3: Create libcall to internal fn DIVMOD:
4098 divmod_tmp = DIVMOD (op1, op2). */
4099
4100 gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
4101 tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
4102 call_stmt, "divmod_tmp");
4103 gimple_call_set_lhs (call_stmt, res);
989f02dc 4104 /* We rejected throwing statements above. */
4105 gimple_call_set_nothrow (call_stmt, true);
67f7b566 4106
4107 /* Insert the call before top_stmt. */
4108 gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
4109 gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
4110
4111 widen_mul_stats.divmod_calls_inserted++;
4112
4113 /* Update all statements in stmts vector:
4114 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4115 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
4116
4117 for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
4118 {
4119 tree new_rhs;
4120
4121 switch (gimple_assign_rhs_code (use_stmt))
4122 {
4123 case TRUNC_DIV_EXPR:
4124 new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
4125 break;
4126
4127 case TRUNC_MOD_EXPR:
4128 new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
4129 break;
4130
4131 default:
4132 gcc_unreachable ();
4133 }
4134
4135 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
4136 gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
4137 update_stmt (use_stmt);
4138 }
4139
4140 return true;
4141}
e11a63e8 4142
62be004c 4143/* Find integer multiplications where the operands are extended from
4144 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4145 where appropriate. */
4146
65b0537f 4147namespace {
4148
4149const pass_data pass_data_optimize_widening_mul =
4150{
4151 GIMPLE_PASS, /* type */
4152 "widening_mul", /* name */
4153 OPTGROUP_NONE, /* optinfo_flags */
65b0537f 4154 TV_NONE, /* tv_id */
4155 PROP_ssa, /* properties_required */
4156 0, /* properties_provided */
4157 0, /* properties_destroyed */
4158 0, /* todo_flags_start */
8b88439e 4159 TODO_update_ssa, /* todo_flags_finish */
65b0537f 4160};
4161
4162class pass_optimize_widening_mul : public gimple_opt_pass
4163{
4164public:
4165 pass_optimize_widening_mul (gcc::context *ctxt)
4166 : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
4167 {}
4168
4169 /* opt_pass methods: */
4170 virtual bool gate (function *)
4171 {
4172 return flag_expensive_optimizations && optimize;
4173 }
4174
4175 virtual unsigned int execute (function *);
4176
4177}; // class pass_optimize_widening_mul
4178
4179unsigned int
4180pass_optimize_widening_mul::execute (function *fun)
62be004c 4181{
62be004c 4182 basic_block bb;
15dbdc8f 4183 bool cfg_changed = false;
62be004c 4184
30c4e60d 4185 memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
67f7b566 4186 calculate_dominance_info (CDI_DOMINATORS);
4187 renumber_gimple_stmt_uids ();
30c4e60d 4188
65b0537f 4189 FOR_EACH_BB_FN (bb, fun)
62be004c 4190 {
4191 gimple_stmt_iterator gsi;
4192
b9be572e 4193 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
62be004c 4194 {
42acab1c 4195 gimple *stmt = gsi_stmt (gsi);
00f4f705 4196 enum tree_code code;
62be004c 4197
b9be572e 4198 if (is_gimple_assign (stmt))
4199 {
4200 code = gimple_assign_rhs_code (stmt);
4201 switch (code)
4202 {
4203 case MULT_EXPR:
aff5fb4d 4204 if (!convert_mult_to_widen (stmt, &gsi)
eb728046 4205 && !convert_expand_mult_copysign (stmt, &gsi)
15dbdc8f 4206 && convert_mult_to_fma (stmt,
4207 gimple_assign_rhs1 (stmt),
4208 gimple_assign_rhs2 (stmt)))
b9be572e 4209 {
4210 gsi_remove (&gsi, true);
4211 release_defs (stmt);
4212 continue;
4213 }
4214 break;
4215
4216 case PLUS_EXPR:
4217 case MINUS_EXPR:
e11a63e8 4218 if (!convert_plusminus_to_widen (&gsi, stmt, code))
4219 match_uaddsub_overflow (&gsi, stmt, code);
b9be572e 4220 break;
62be004c 4221
67f7b566 4222 case TRUNC_MOD_EXPR:
4223 convert_to_divmod (as_a<gassign *> (stmt));
4224 break;
4225
b9be572e 4226 default:;
4227 }
4228 }
d4af184a 4229 else if (is_gimple_call (stmt)
4230 && gimple_call_lhs (stmt))
15dbdc8f 4231 {
4232 tree fndecl = gimple_call_fndecl (stmt);
4233 if (fndecl
8ff377a6 4234 && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
15dbdc8f 4235 {
4236 switch (DECL_FUNCTION_CODE (fndecl))
4237 {
4238 case BUILT_IN_POWF:
4239 case BUILT_IN_POW:
4240 case BUILT_IN_POWL:
4241 if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
20cb53c9 4242 && real_equal
4243 (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
4244 &dconst2)
15dbdc8f 4245 && convert_mult_to_fma (stmt,
4246 gimple_call_arg (stmt, 0),
4247 gimple_call_arg (stmt, 0)))
4248 {
6716f635 4249 unlink_stmt_vdef (stmt);
13ff78a4 4250 if (gsi_remove (&gsi, true)
4251 && gimple_purge_dead_eh_edges (bb))
15dbdc8f 4252 cfg_changed = true;
13ff78a4 4253 release_defs (stmt);
15dbdc8f 4254 continue;
4255 }
4256 break;
4257
4258 default:;
4259 }
4260 }
4261 }
b9be572e 4262 gsi_next (&gsi);
62be004c 4263 }
4264 }
00f4f705 4265
65b0537f 4266 statistics_counter_event (fun, "widening multiplications inserted",
30c4e60d 4267 widen_mul_stats.widen_mults_inserted);
65b0537f 4268 statistics_counter_event (fun, "widening maccs inserted",
30c4e60d 4269 widen_mul_stats.maccs_inserted);
65b0537f 4270 statistics_counter_event (fun, "fused multiply-adds inserted",
30c4e60d 4271 widen_mul_stats.fmas_inserted);
67f7b566 4272 statistics_counter_event (fun, "divmod calls inserted",
4273 widen_mul_stats.divmod_calls_inserted);
30c4e60d 4274
15dbdc8f 4275 return cfg_changed ? TODO_cleanup_cfg : 0;
62be004c 4276}
4277
cbe8bda8 4278} // anon namespace
4279
4280gimple_opt_pass *
4281make_pass_optimize_widening_mul (gcc::context *ctxt)
4282{
4283 return new pass_optimize_widening_mul (ctxt);
4284}