]>
Commit | Line | Data |
---|---|---|
abacb398 | 1 | /* Global, SSA-based optimizations using mathematical identities. |
711789cc | 2 | Copyright (C) 2005-2013 Free Software Foundation, Inc. |
48e1416a | 3 | |
abacb398 | 4 | This file is part of GCC. |
48e1416a | 5 | |
abacb398 | 6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the | |
8c4c00c1 | 8 | Free Software Foundation; either version 3, or (at your option) any |
abacb398 | 9 | later version. |
48e1416a | 10 | |
abacb398 | 11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
48e1416a | 15 | |
abacb398 | 16 | You should have received a copy of the GNU General Public License |
8c4c00c1 | 17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
abacb398 | 19 | |
20 | /* Currently, the only mini-pass in this file tries to CSE reciprocal | |
21 | operations. These are common in sequences such as this one: | |
22 | ||
23 | modulus = sqrt(x*x + y*y + z*z); | |
24 | x = x / modulus; | |
25 | y = y / modulus; | |
26 | z = z / modulus; | |
27 | ||
28 | that can be optimized to | |
29 | ||
30 | modulus = sqrt(x*x + y*y + z*z); | |
31 | rmodulus = 1.0 / modulus; | |
32 | x = x * rmodulus; | |
33 | y = y * rmodulus; | |
34 | z = z * rmodulus; | |
35 | ||
36 | We do this for loop invariant divisors, and with this pass whenever | |
ac70caad | 37 | we notice that a division has the same divisor multiple times. |
38 | ||
39 | Of course, like in PRE, we don't insert a division if a dominator | |
40 | already has one. However, this cannot be done as an extension of | |
41 | PRE for several reasons. | |
42 | ||
43 | First of all, with some experiments it was found out that the | |
44 | transformation is not always useful if there are only two divisions | |
45 | hy the same divisor. This is probably because modern processors | |
46 | can pipeline the divisions; on older, in-order processors it should | |
47 | still be effective to optimize two divisions by the same number. | |
48 | We make this a param, and it shall be called N in the remainder of | |
49 | this comment. | |
50 | ||
51 | Second, if trapping math is active, we have less freedom on where | |
52 | to insert divisions: we can only do so in basic blocks that already | |
53 | contain one. (If divisions don't trap, instead, we can insert | |
54 | divisions elsewhere, which will be in blocks that are common dominators | |
55 | of those that have the division). | |
56 | ||
57 | We really don't want to compute the reciprocal unless a division will | |
58 | be found. To do this, we won't insert the division in a basic block | |
59 | that has less than N divisions *post-dominating* it. | |
60 | ||
61 | The algorithm constructs a subset of the dominator tree, holding the | |
62 | blocks containing the divisions and the common dominators to them, | |
63 | and walk it twice. The first walk is in post-order, and it annotates | |
64 | each block with the number of divisions that post-dominate it: this | |
65 | gives information on where divisions can be inserted profitably. | |
66 | The second walk is in pre-order, and it inserts divisions as explained | |
67 | above, and replaces divisions by multiplications. | |
68 | ||
69 | In the best case, the cost of the pass is O(n_statements). In the | |
70 | worst-case, the cost is due to creating the dominator tree subset, | |
71 | with a cost of O(n_basic_blocks ^ 2); however this can only happen | |
72 | for n_statements / n_basic_blocks statements. So, the amortized cost | |
73 | of creating the dominator tree subset is O(n_basic_blocks) and the | |
74 | worst-case cost of the pass is O(n_statements * n_basic_blocks). | |
75 | ||
76 | More practically, the cost will be small because there are few | |
77 | divisions, and they tend to be in the same basic block, so insert_bb | |
78 | is called very few times. | |
79 | ||
80 | If we did this using domwalk.c, an efficient implementation would have | |
81 | to work on all the variables in a single pass, because we could not | |
82 | work on just a subset of the dominator tree, as we do now, and the | |
83 | cost would also be something like O(n_statements * n_basic_blocks). | |
84 | The data structures would be more complex in order to work on all the | |
85 | variables in a single pass. */ | |
abacb398 | 86 | |
87 | #include "config.h" | |
88 | #include "system.h" | |
89 | #include "coretypes.h" | |
90 | #include "tm.h" | |
91 | #include "flags.h" | |
92 | #include "tree.h" | |
bc61cadb | 93 | #include "basic-block.h" |
94 | #include "tree-ssa-alias.h" | |
95 | #include "internal-fn.h" | |
96 | #include "gimple-fold.h" | |
97 | #include "gimple-expr.h" | |
98 | #include "is-a.h" | |
e795d6e1 | 99 | #include "gimple.h" |
dcf1a1ec | 100 | #include "gimple-iterator.h" |
e795d6e1 | 101 | #include "gimplify-me.h" |
9ed99284 | 102 | #include "stor-layout.h" |
073c1fd5 | 103 | #include "gimple-ssa.h" |
104 | #include "tree-cfg.h" | |
105 | #include "tree-phinodes.h" | |
106 | #include "ssa-iterators.h" | |
9ed99284 | 107 | #include "stringpool.h" |
073c1fd5 | 108 | #include "tree-ssanames.h" |
9ed99284 | 109 | #include "expr.h" |
073c1fd5 | 110 | #include "tree-dfa.h" |
69ee5dbb | 111 | #include "tree-ssa.h" |
abacb398 | 112 | #include "tree-pass.h" |
ac70caad | 113 | #include "alloc-pool.h" |
ac70caad | 114 | #include "target.h" |
ce084dfc | 115 | #include "gimple-pretty-print.h" |
a7a46268 | 116 | |
117 | /* FIXME: RTL headers have to be included here for optabs. */ | |
118 | #include "rtl.h" /* Because optabs.h wants enum rtx_code. */ | |
119 | #include "expr.h" /* Because optabs.h wants sepops. */ | |
84cc784c | 120 | #include "optabs.h" |
ac70caad | 121 | |
122 | /* This structure represents one basic block that either computes a | |
123 | division, or is a common dominator for basic block that compute a | |
124 | division. */ | |
125 | struct occurrence { | |
126 | /* The basic block represented by this structure. */ | |
127 | basic_block bb; | |
128 | ||
129 | /* If non-NULL, the SSA_NAME holding the definition for a reciprocal | |
130 | inserted in BB. */ | |
131 | tree recip_def; | |
132 | ||
75a70cf9 | 133 | /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that |
ac70caad | 134 | was inserted in BB. */ |
75a70cf9 | 135 | gimple recip_def_stmt; |
ac70caad | 136 | |
137 | /* Pointer to a list of "struct occurrence"s for blocks dominated | |
138 | by BB. */ | |
139 | struct occurrence *children; | |
140 | ||
141 | /* Pointer to the next "struct occurrence"s in the list of blocks | |
142 | sharing a common dominator. */ | |
143 | struct occurrence *next; | |
144 | ||
145 | /* The number of divisions that are in BB before compute_merit. The | |
146 | number of divisions that are in BB or post-dominate it after | |
147 | compute_merit. */ | |
148 | int num_divisions; | |
149 | ||
150 | /* True if the basic block has a division, false if it is a common | |
151 | dominator for basic blocks that do. If it is false and trapping | |
152 | math is active, BB is not a candidate for inserting a reciprocal. */ | |
153 | bool bb_has_division; | |
154 | }; | |
155 | ||
30c4e60d | 156 | static struct |
157 | { | |
158 | /* Number of 1.0/X ops inserted. */ | |
159 | int rdivs_inserted; | |
160 | ||
161 | /* Number of 1.0/FUNC ops inserted. */ | |
162 | int rfuncs_inserted; | |
163 | } reciprocal_stats; | |
164 | ||
165 | static struct | |
166 | { | |
167 | /* Number of cexpi calls inserted. */ | |
168 | int inserted; | |
169 | } sincos_stats; | |
170 | ||
171 | static struct | |
172 | { | |
f811051b | 173 | /* Number of hand-written 16-bit bswaps found. */ |
174 | int found_16bit; | |
175 | ||
30c4e60d | 176 | /* Number of hand-written 32-bit bswaps found. */ |
177 | int found_32bit; | |
178 | ||
179 | /* Number of hand-written 64-bit bswaps found. */ | |
180 | int found_64bit; | |
181 | } bswap_stats; | |
182 | ||
183 | static struct | |
184 | { | |
185 | /* Number of widening multiplication ops inserted. */ | |
186 | int widen_mults_inserted; | |
187 | ||
188 | /* Number of integer multiply-and-accumulate ops inserted. */ | |
189 | int maccs_inserted; | |
190 | ||
191 | /* Number of fp fused multiply-add ops inserted. */ | |
192 | int fmas_inserted; | |
193 | } widen_mul_stats; | |
ac70caad | 194 | |
195 | /* The instance of "struct occurrence" representing the highest | |
196 | interesting block in the dominator tree. */ | |
197 | static struct occurrence *occ_head; | |
198 | ||
199 | /* Allocation pool for getting instances of "struct occurrence". */ | |
200 | static alloc_pool occ_pool; | |
201 | ||
202 | ||
203 | ||
204 | /* Allocate and return a new struct occurrence for basic block BB, and | |
205 | whose children list is headed by CHILDREN. */ | |
206 | static struct occurrence * | |
207 | occ_new (basic_block bb, struct occurrence *children) | |
abacb398 | 208 | { |
ac70caad | 209 | struct occurrence *occ; |
210 | ||
f0d6e81c | 211 | bb->aux = occ = (struct occurrence *) pool_alloc (occ_pool); |
ac70caad | 212 | memset (occ, 0, sizeof (struct occurrence)); |
213 | ||
214 | occ->bb = bb; | |
215 | occ->children = children; | |
216 | return occ; | |
abacb398 | 217 | } |
218 | ||
ac70caad | 219 | |
220 | /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a | |
221 | list of "struct occurrence"s, one per basic block, having IDOM as | |
222 | their common dominator. | |
223 | ||
224 | We try to insert NEW_OCC as deep as possible in the tree, and we also | |
225 | insert any other block that is a common dominator for BB and one | |
226 | block already in the tree. */ | |
227 | ||
228 | static void | |
229 | insert_bb (struct occurrence *new_occ, basic_block idom, | |
230 | struct occurrence **p_head) | |
9e583fac | 231 | { |
ac70caad | 232 | struct occurrence *occ, **p_occ; |
9e583fac | 233 | |
ac70caad | 234 | for (p_occ = p_head; (occ = *p_occ) != NULL; ) |
235 | { | |
236 | basic_block bb = new_occ->bb, occ_bb = occ->bb; | |
237 | basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb); | |
238 | if (dom == bb) | |
239 | { | |
240 | /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC | |
241 | from its list. */ | |
242 | *p_occ = occ->next; | |
243 | occ->next = new_occ->children; | |
244 | new_occ->children = occ; | |
245 | ||
246 | /* Try the next block (it may as well be dominated by BB). */ | |
247 | } | |
248 | ||
249 | else if (dom == occ_bb) | |
250 | { | |
251 | /* OCC_BB dominates BB. Tail recurse to look deeper. */ | |
252 | insert_bb (new_occ, dom, &occ->children); | |
253 | return; | |
254 | } | |
255 | ||
256 | else if (dom != idom) | |
257 | { | |
258 | gcc_assert (!dom->aux); | |
259 | ||
260 | /* There is a dominator between IDOM and BB, add it and make | |
261 | two children out of NEW_OCC and OCC. First, remove OCC from | |
262 | its list. */ | |
263 | *p_occ = occ->next; | |
264 | new_occ->next = occ; | |
265 | occ->next = NULL; | |
266 | ||
267 | /* None of the previous blocks has DOM as a dominator: if we tail | |
268 | recursed, we would reexamine them uselessly. Just switch BB with | |
269 | DOM, and go on looking for blocks dominated by DOM. */ | |
270 | new_occ = occ_new (dom, new_occ); | |
271 | } | |
272 | ||
273 | else | |
274 | { | |
275 | /* Nothing special, go on with the next element. */ | |
276 | p_occ = &occ->next; | |
277 | } | |
278 | } | |
279 | ||
280 | /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */ | |
281 | new_occ->next = *p_head; | |
282 | *p_head = new_occ; | |
283 | } | |
284 | ||
285 | /* Register that we found a division in BB. */ | |
286 | ||
287 | static inline void | |
288 | register_division_in (basic_block bb) | |
289 | { | |
290 | struct occurrence *occ; | |
291 | ||
292 | occ = (struct occurrence *) bb->aux; | |
293 | if (!occ) | |
294 | { | |
295 | occ = occ_new (bb, NULL); | |
34154e27 | 296 | insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head); |
ac70caad | 297 | } |
298 | ||
299 | occ->bb_has_division = true; | |
300 | occ->num_divisions++; | |
301 | } | |
302 | ||
303 | ||
304 | /* Compute the number of divisions that postdominate each block in OCC and | |
305 | its children. */ | |
abacb398 | 306 | |
abacb398 | 307 | static void |
ac70caad | 308 | compute_merit (struct occurrence *occ) |
abacb398 | 309 | { |
ac70caad | 310 | struct occurrence *occ_child; |
311 | basic_block dom = occ->bb; | |
abacb398 | 312 | |
ac70caad | 313 | for (occ_child = occ->children; occ_child; occ_child = occ_child->next) |
abacb398 | 314 | { |
ac70caad | 315 | basic_block bb; |
316 | if (occ_child->children) | |
317 | compute_merit (occ_child); | |
318 | ||
319 | if (flag_exceptions) | |
320 | bb = single_noncomplex_succ (dom); | |
321 | else | |
322 | bb = dom; | |
323 | ||
324 | if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb)) | |
325 | occ->num_divisions += occ_child->num_divisions; | |
326 | } | |
327 | } | |
328 | ||
329 | ||
330 | /* Return whether USE_STMT is a floating-point division by DEF. */ | |
331 | static inline bool | |
75a70cf9 | 332 | is_division_by (gimple use_stmt, tree def) |
ac70caad | 333 | { |
75a70cf9 | 334 | return is_gimple_assign (use_stmt) |
335 | && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR | |
336 | && gimple_assign_rhs2 (use_stmt) == def | |
119368d7 | 337 | /* Do not recognize x / x as valid division, as we are getting |
338 | confused later by replacing all immediate uses x in such | |
339 | a stmt. */ | |
75a70cf9 | 340 | && gimple_assign_rhs1 (use_stmt) != def; |
ac70caad | 341 | } |
342 | ||
343 | /* Walk the subset of the dominator tree rooted at OCC, setting the | |
344 | RECIP_DEF field to a definition of 1.0 / DEF that can be used in | |
345 | the given basic block. The field may be left NULL, of course, | |
346 | if it is not possible or profitable to do the optimization. | |
347 | ||
348 | DEF_BSI is an iterator pointing at the statement defining DEF. | |
349 | If RECIP_DEF is set, a dominator already has a computation that can | |
350 | be used. */ | |
351 | ||
352 | static void | |
75a70cf9 | 353 | insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, |
ac70caad | 354 | tree def, tree recip_def, int threshold) |
355 | { | |
75a70cf9 | 356 | tree type; |
357 | gimple new_stmt; | |
358 | gimple_stmt_iterator gsi; | |
ac70caad | 359 | struct occurrence *occ_child; |
360 | ||
361 | if (!recip_def | |
362 | && (occ->bb_has_division || !flag_trapping_math) | |
363 | && occ->num_divisions >= threshold) | |
364 | { | |
365 | /* Make a variable with the replacement and substitute it. */ | |
366 | type = TREE_TYPE (def); | |
072f7ab1 | 367 | recip_def = create_tmp_reg (type, "reciptmp"); |
75a70cf9 | 368 | new_stmt = gimple_build_assign_with_ops (RDIV_EXPR, recip_def, |
369 | build_one_cst (type), def); | |
48e1416a | 370 | |
ac70caad | 371 | if (occ->bb_has_division) |
372 | { | |
373 | /* Case 1: insert before an existing division. */ | |
75a70cf9 | 374 | gsi = gsi_after_labels (occ->bb); |
375 | while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def)) | |
376 | gsi_next (&gsi); | |
ac70caad | 377 | |
75a70cf9 | 378 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |
ac70caad | 379 | } |
75a70cf9 | 380 | else if (def_gsi && occ->bb == def_gsi->bb) |
685b24f5 | 381 | { |
ac70caad | 382 | /* Case 2: insert right after the definition. Note that this will |
383 | never happen if the definition statement can throw, because in | |
384 | that case the sole successor of the statement's basic block will | |
385 | dominate all the uses as well. */ | |
75a70cf9 | 386 | gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT); |
685b24f5 | 387 | } |
ac70caad | 388 | else |
389 | { | |
390 | /* Case 3: insert in a basic block not containing defs/uses. */ | |
75a70cf9 | 391 | gsi = gsi_after_labels (occ->bb); |
392 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); | |
ac70caad | 393 | } |
394 | ||
30c4e60d | 395 | reciprocal_stats.rdivs_inserted++; |
396 | ||
ac70caad | 397 | occ->recip_def_stmt = new_stmt; |
abacb398 | 398 | } |
399 | ||
ac70caad | 400 | occ->recip_def = recip_def; |
401 | for (occ_child = occ->children; occ_child; occ_child = occ_child->next) | |
75a70cf9 | 402 | insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold); |
ac70caad | 403 | } |
404 | ||
405 | ||
406 | /* Replace the division at USE_P with a multiplication by the reciprocal, if | |
407 | possible. */ | |
408 | ||
409 | static inline void | |
410 | replace_reciprocal (use_operand_p use_p) | |
411 | { | |
75a70cf9 | 412 | gimple use_stmt = USE_STMT (use_p); |
413 | basic_block bb = gimple_bb (use_stmt); | |
ac70caad | 414 | struct occurrence *occ = (struct occurrence *) bb->aux; |
415 | ||
0bfd8d5c | 416 | if (optimize_bb_for_speed_p (bb) |
417 | && occ->recip_def && use_stmt != occ->recip_def_stmt) | |
ac70caad | 418 | { |
50aacf4c | 419 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
75a70cf9 | 420 | gimple_assign_set_rhs_code (use_stmt, MULT_EXPR); |
ac70caad | 421 | SET_USE (use_p, occ->recip_def); |
50aacf4c | 422 | fold_stmt_inplace (&gsi); |
ac70caad | 423 | update_stmt (use_stmt); |
424 | } | |
425 | } | |
426 | ||
427 | ||
428 | /* Free OCC and return one more "struct occurrence" to be freed. */ | |
429 | ||
430 | static struct occurrence * | |
431 | free_bb (struct occurrence *occ) | |
432 | { | |
433 | struct occurrence *child, *next; | |
434 | ||
435 | /* First get the two pointers hanging off OCC. */ | |
436 | next = occ->next; | |
437 | child = occ->children; | |
438 | occ->bb->aux = NULL; | |
439 | pool_free (occ_pool, occ); | |
440 | ||
441 | /* Now ensure that we don't recurse unless it is necessary. */ | |
442 | if (!child) | |
443 | return next; | |
9e583fac | 444 | else |
ac70caad | 445 | { |
446 | while (next) | |
447 | next = free_bb (next); | |
448 | ||
449 | return child; | |
450 | } | |
451 | } | |
452 | ||
453 | ||
454 | /* Look for floating-point divisions among DEF's uses, and try to | |
455 | replace them by multiplications with the reciprocal. Add | |
456 | as many statements computing the reciprocal as needed. | |
457 | ||
458 | DEF must be a GIMPLE register of a floating-point type. */ | |
459 | ||
460 | static void | |
75a70cf9 | 461 | execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def) |
ac70caad | 462 | { |
463 | use_operand_p use_p; | |
464 | imm_use_iterator use_iter; | |
465 | struct occurrence *occ; | |
466 | int count = 0, threshold; | |
abacb398 | 467 | |
ac70caad | 468 | gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def)); |
469 | ||
470 | FOR_EACH_IMM_USE_FAST (use_p, use_iter, def) | |
abacb398 | 471 | { |
75a70cf9 | 472 | gimple use_stmt = USE_STMT (use_p); |
ac70caad | 473 | if (is_division_by (use_stmt, def)) |
abacb398 | 474 | { |
75a70cf9 | 475 | register_division_in (gimple_bb (use_stmt)); |
ac70caad | 476 | count++; |
abacb398 | 477 | } |
478 | } | |
48e1416a | 479 | |
ac70caad | 480 | /* Do the expensive part only if we can hope to optimize something. */ |
481 | threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def))); | |
482 | if (count >= threshold) | |
483 | { | |
75a70cf9 | 484 | gimple use_stmt; |
ac70caad | 485 | for (occ = occ_head; occ; occ = occ->next) |
486 | { | |
487 | compute_merit (occ); | |
75a70cf9 | 488 | insert_reciprocals (def_gsi, occ, def, NULL, threshold); |
ac70caad | 489 | } |
490 | ||
09aca5bc | 491 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def) |
ac70caad | 492 | { |
ac70caad | 493 | if (is_division_by (use_stmt, def)) |
09aca5bc | 494 | { |
495 | FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) | |
496 | replace_reciprocal (use_p); | |
497 | } | |
ac70caad | 498 | } |
499 | } | |
500 | ||
501 | for (occ = occ_head; occ; ) | |
502 | occ = free_bb (occ); | |
503 | ||
504 | occ_head = NULL; | |
abacb398 | 505 | } |
506 | ||
ac70caad | 507 | static bool |
508 | gate_cse_reciprocals (void) | |
509 | { | |
0bfd8d5c | 510 | return optimize && flag_reciprocal_math; |
ac70caad | 511 | } |
512 | ||
ac70caad | 513 | /* Go through all the floating-point SSA_NAMEs, and call |
514 | execute_cse_reciprocals_1 on each of them. */ | |
2a1990e9 | 515 | static unsigned int |
abacb398 | 516 | execute_cse_reciprocals (void) |
517 | { | |
518 | basic_block bb; | |
51b60a11 | 519 | tree arg; |
685b24f5 | 520 | |
ac70caad | 521 | occ_pool = create_alloc_pool ("dominators for recip", |
522 | sizeof (struct occurrence), | |
a28770e1 | 523 | n_basic_blocks_for_fn (cfun) / 3 + 1); |
685b24f5 | 524 | |
30c4e60d | 525 | memset (&reciprocal_stats, 0, sizeof (reciprocal_stats)); |
c136ae61 | 526 | calculate_dominance_info (CDI_DOMINATORS); |
527 | calculate_dominance_info (CDI_POST_DOMINATORS); | |
ac70caad | 528 | |
529 | #ifdef ENABLE_CHECKING | |
530 | FOR_EACH_BB (bb) | |
531 | gcc_assert (!bb->aux); | |
532 | #endif | |
533 | ||
1767a056 | 534 | for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = DECL_CHAIN (arg)) |
c6dfe037 | 535 | if (FLOAT_TYPE_P (TREE_TYPE (arg)) |
ac70caad | 536 | && is_gimple_reg (arg)) |
c6dfe037 | 537 | { |
538 | tree name = ssa_default_def (cfun, arg); | |
539 | if (name) | |
540 | execute_cse_reciprocals_1 (NULL, name); | |
541 | } | |
51b60a11 | 542 | |
abacb398 | 543 | FOR_EACH_BB (bb) |
544 | { | |
75a70cf9 | 545 | gimple_stmt_iterator gsi; |
546 | gimple phi; | |
547 | tree def; | |
abacb398 | 548 | |
75a70cf9 | 549 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
abacb398 | 550 | { |
75a70cf9 | 551 | phi = gsi_stmt (gsi); |
abacb398 | 552 | def = PHI_RESULT (phi); |
7c782c9b | 553 | if (! virtual_operand_p (def) |
554 | && FLOAT_TYPE_P (TREE_TYPE (def))) | |
ac70caad | 555 | execute_cse_reciprocals_1 (NULL, def); |
abacb398 | 556 | } |
557 | ||
75a70cf9 | 558 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
abacb398 | 559 | { |
75a70cf9 | 560 | gimple stmt = gsi_stmt (gsi); |
a0315874 | 561 | |
75a70cf9 | 562 | if (gimple_has_lhs (stmt) |
abacb398 | 563 | && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL |
564 | && FLOAT_TYPE_P (TREE_TYPE (def)) | |
51b60a11 | 565 | && TREE_CODE (def) == SSA_NAME) |
75a70cf9 | 566 | execute_cse_reciprocals_1 (&gsi, def); |
abacb398 | 567 | } |
e174638f | 568 | |
0bfd8d5c | 569 | if (optimize_bb_for_size_p (bb)) |
570 | continue; | |
571 | ||
e174638f | 572 | /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */ |
75a70cf9 | 573 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
e174638f | 574 | { |
75a70cf9 | 575 | gimple stmt = gsi_stmt (gsi); |
e174638f | 576 | tree fndecl; |
577 | ||
75a70cf9 | 578 | if (is_gimple_assign (stmt) |
579 | && gimple_assign_rhs_code (stmt) == RDIV_EXPR) | |
e174638f | 580 | { |
75a70cf9 | 581 | tree arg1 = gimple_assign_rhs2 (stmt); |
582 | gimple stmt1; | |
2cd360b6 | 583 | |
584 | if (TREE_CODE (arg1) != SSA_NAME) | |
585 | continue; | |
586 | ||
587 | stmt1 = SSA_NAME_DEF_STMT (arg1); | |
e174638f | 588 | |
75a70cf9 | 589 | if (is_gimple_call (stmt1) |
590 | && gimple_call_lhs (stmt1) | |
591 | && (fndecl = gimple_call_fndecl (stmt1)) | |
e174638f | 592 | && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL |
593 | || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)) | |
594 | { | |
595 | enum built_in_function code; | |
774b1cdd | 596 | bool md_code, fail; |
597 | imm_use_iterator ui; | |
598 | use_operand_p use_p; | |
e174638f | 599 | |
600 | code = DECL_FUNCTION_CODE (fndecl); | |
2cd360b6 | 601 | md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD; |
602 | ||
603 | fndecl = targetm.builtin_reciprocal (code, md_code, false); | |
e174638f | 604 | if (!fndecl) |
605 | continue; | |
606 | ||
774b1cdd | 607 | /* Check that all uses of the SSA name are divisions, |
608 | otherwise replacing the defining statement will do | |
609 | the wrong thing. */ | |
610 | fail = false; | |
611 | FOR_EACH_IMM_USE_FAST (use_p, ui, arg1) | |
612 | { | |
613 | gimple stmt2 = USE_STMT (use_p); | |
614 | if (is_gimple_debug (stmt2)) | |
615 | continue; | |
616 | if (!is_gimple_assign (stmt2) | |
617 | || gimple_assign_rhs_code (stmt2) != RDIV_EXPR | |
618 | || gimple_assign_rhs1 (stmt2) == arg1 | |
619 | || gimple_assign_rhs2 (stmt2) != arg1) | |
620 | { | |
621 | fail = true; | |
622 | break; | |
623 | } | |
624 | } | |
625 | if (fail) | |
626 | continue; | |
627 | ||
9a4a3348 | 628 | gimple_replace_ssa_lhs (stmt1, arg1); |
0acacf9e | 629 | gimple_call_set_fndecl (stmt1, fndecl); |
e174638f | 630 | update_stmt (stmt1); |
30c4e60d | 631 | reciprocal_stats.rfuncs_inserted++; |
e174638f | 632 | |
774b1cdd | 633 | FOR_EACH_IMM_USE_STMT (stmt, ui, arg1) |
634 | { | |
50aacf4c | 635 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
774b1cdd | 636 | gimple_assign_set_rhs_code (stmt, MULT_EXPR); |
50aacf4c | 637 | fold_stmt_inplace (&gsi); |
774b1cdd | 638 | update_stmt (stmt); |
639 | } | |
e174638f | 640 | } |
641 | } | |
642 | } | |
abacb398 | 643 | } |
685b24f5 | 644 | |
30c4e60d | 645 | statistics_counter_event (cfun, "reciprocal divs inserted", |
646 | reciprocal_stats.rdivs_inserted); | |
647 | statistics_counter_event (cfun, "reciprocal functions inserted", | |
648 | reciprocal_stats.rfuncs_inserted); | |
649 | ||
c136ae61 | 650 | free_dominance_info (CDI_DOMINATORS); |
651 | free_dominance_info (CDI_POST_DOMINATORS); | |
ac70caad | 652 | free_alloc_pool (occ_pool); |
2a1990e9 | 653 | return 0; |
abacb398 | 654 | } |
655 | ||
cbe8bda8 | 656 | namespace { |
657 | ||
658 | const pass_data pass_data_cse_reciprocals = | |
abacb398 | 659 | { |
cbe8bda8 | 660 | GIMPLE_PASS, /* type */ |
661 | "recip", /* name */ | |
662 | OPTGROUP_NONE, /* optinfo_flags */ | |
663 | true, /* has_gate */ | |
664 | true, /* has_execute */ | |
665 | TV_NONE, /* tv_id */ | |
666 | PROP_ssa, /* properties_required */ | |
667 | 0, /* properties_provided */ | |
668 | 0, /* properties_destroyed */ | |
669 | 0, /* todo_flags_start */ | |
670 | ( TODO_update_ssa | TODO_verify_ssa | |
671 | | TODO_verify_stmts ), /* todo_flags_finish */ | |
abacb398 | 672 | }; |
a0315874 | 673 | |
cbe8bda8 | 674 | class pass_cse_reciprocals : public gimple_opt_pass |
675 | { | |
676 | public: | |
9af5ce0c | 677 | pass_cse_reciprocals (gcc::context *ctxt) |
678 | : gimple_opt_pass (pass_data_cse_reciprocals, ctxt) | |
cbe8bda8 | 679 | {} |
680 | ||
681 | /* opt_pass methods: */ | |
682 | bool gate () { return gate_cse_reciprocals (); } | |
683 | unsigned int execute () { return execute_cse_reciprocals (); } | |
684 | ||
685 | }; // class pass_cse_reciprocals | |
686 | ||
687 | } // anon namespace | |
688 | ||
689 | gimple_opt_pass * | |
690 | make_pass_cse_reciprocals (gcc::context *ctxt) | |
691 | { | |
692 | return new pass_cse_reciprocals (ctxt); | |
693 | } | |
694 | ||
0d424440 | 695 | /* Records an occurrence at statement USE_STMT in the vector of trees |
a0315874 | 696 | STMTS if it is dominated by *TOP_BB or dominates it or this basic block |
0d424440 | 697 | is not yet initialized. Returns true if the occurrence was pushed on |
a0315874 | 698 | the vector. Adjusts *TOP_BB to be the basic block dominating all |
699 | statements in the vector. */ | |
700 | ||
701 | static bool | |
f1f41a6c | 702 | maybe_record_sincos (vec<gimple> *stmts, |
75a70cf9 | 703 | basic_block *top_bb, gimple use_stmt) |
a0315874 | 704 | { |
75a70cf9 | 705 | basic_block use_bb = gimple_bb (use_stmt); |
a0315874 | 706 | if (*top_bb |
707 | && (*top_bb == use_bb | |
708 | || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb))) | |
f1f41a6c | 709 | stmts->safe_push (use_stmt); |
a0315874 | 710 | else if (!*top_bb |
711 | || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb)) | |
712 | { | |
f1f41a6c | 713 | stmts->safe_push (use_stmt); |
a0315874 | 714 | *top_bb = use_bb; |
715 | } | |
716 | else | |
717 | return false; | |
718 | ||
719 | return true; | |
720 | } | |
721 | ||
722 | /* Look for sin, cos and cexpi calls with the same argument NAME and | |
723 | create a single call to cexpi CSEing the result in this case. | |
724 | We first walk over all immediate uses of the argument collecting | |
725 | statements that we can CSE in a vector and in a second pass replace | |
726 | the statement rhs with a REALPART or IMAGPART expression on the | |
727 | result of the cexpi call we insert before the use statement that | |
728 | dominates all other candidates. */ | |
729 | ||
4c80086d | 730 | static bool |
a0315874 | 731 | execute_cse_sincos_1 (tree name) |
732 | { | |
75a70cf9 | 733 | gimple_stmt_iterator gsi; |
a0315874 | 734 | imm_use_iterator use_iter; |
75a70cf9 | 735 | tree fndecl, res, type; |
736 | gimple def_stmt, use_stmt, stmt; | |
a0315874 | 737 | int seen_cos = 0, seen_sin = 0, seen_cexpi = 0; |
1e094109 | 738 | vec<gimple> stmts = vNULL; |
a0315874 | 739 | basic_block top_bb = NULL; |
740 | int i; | |
4c80086d | 741 | bool cfg_changed = false; |
a0315874 | 742 | |
743 | type = TREE_TYPE (name); | |
744 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name) | |
745 | { | |
75a70cf9 | 746 | if (gimple_code (use_stmt) != GIMPLE_CALL |
747 | || !gimple_call_lhs (use_stmt) | |
748 | || !(fndecl = gimple_call_fndecl (use_stmt)) | |
a0315874 | 749 | || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) |
750 | continue; | |
751 | ||
752 | switch (DECL_FUNCTION_CODE (fndecl)) | |
753 | { | |
754 | CASE_FLT_FN (BUILT_IN_COS): | |
755 | seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; | |
756 | break; | |
757 | ||
758 | CASE_FLT_FN (BUILT_IN_SIN): | |
759 | seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; | |
760 | break; | |
761 | ||
762 | CASE_FLT_FN (BUILT_IN_CEXPI): | |
763 | seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; | |
764 | break; | |
765 | ||
766 | default:; | |
767 | } | |
768 | } | |
769 | ||
770 | if (seen_cos + seen_sin + seen_cexpi <= 1) | |
771 | { | |
f1f41a6c | 772 | stmts.release (); |
4c80086d | 773 | return false; |
a0315874 | 774 | } |
775 | ||
776 | /* Simply insert cexpi at the beginning of top_bb but not earlier than | |
777 | the name def statement. */ | |
778 | fndecl = mathfn_built_in (type, BUILT_IN_CEXPI); | |
779 | if (!fndecl) | |
4c80086d | 780 | return false; |
75a70cf9 | 781 | stmt = gimple_build_call (fndecl, 1, name); |
03d37e4e | 782 | res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp"); |
75a70cf9 | 783 | gimple_call_set_lhs (stmt, res); |
784 | ||
a0315874 | 785 | def_stmt = SSA_NAME_DEF_STMT (name); |
8090c12d | 786 | if (!SSA_NAME_IS_DEFAULT_DEF (name) |
75a70cf9 | 787 | && gimple_code (def_stmt) != GIMPLE_PHI |
788 | && gimple_bb (def_stmt) == top_bb) | |
a0315874 | 789 | { |
75a70cf9 | 790 | gsi = gsi_for_stmt (def_stmt); |
791 | gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); | |
a0315874 | 792 | } |
793 | else | |
794 | { | |
75a70cf9 | 795 | gsi = gsi_after_labels (top_bb); |
796 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
a0315874 | 797 | } |
30c4e60d | 798 | sincos_stats.inserted++; |
a0315874 | 799 | |
800 | /* And adjust the recorded old call sites. */ | |
f1f41a6c | 801 | for (i = 0; stmts.iterate (i, &use_stmt); ++i) |
a0315874 | 802 | { |
75a70cf9 | 803 | tree rhs = NULL; |
804 | fndecl = gimple_call_fndecl (use_stmt); | |
805 | ||
a0315874 | 806 | switch (DECL_FUNCTION_CODE (fndecl)) |
807 | { | |
808 | CASE_FLT_FN (BUILT_IN_COS): | |
75a70cf9 | 809 | rhs = fold_build1 (REALPART_EXPR, type, res); |
a0315874 | 810 | break; |
811 | ||
812 | CASE_FLT_FN (BUILT_IN_SIN): | |
75a70cf9 | 813 | rhs = fold_build1 (IMAGPART_EXPR, type, res); |
a0315874 | 814 | break; |
815 | ||
816 | CASE_FLT_FN (BUILT_IN_CEXPI): | |
75a70cf9 | 817 | rhs = res; |
a0315874 | 818 | break; |
819 | ||
820 | default:; | |
821 | gcc_unreachable (); | |
822 | } | |
823 | ||
75a70cf9 | 824 | /* Replace call with a copy. */ |
825 | stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs); | |
826 | ||
827 | gsi = gsi_for_stmt (use_stmt); | |
4c80086d | 828 | gsi_replace (&gsi, stmt, true); |
829 | if (gimple_purge_dead_eh_edges (gimple_bb (stmt))) | |
830 | cfg_changed = true; | |
a0315874 | 831 | } |
832 | ||
f1f41a6c | 833 | stmts.release (); |
4c80086d | 834 | |
835 | return cfg_changed; | |
a0315874 | 836 | } |
837 | ||
e9a6c4bc | 838 | /* To evaluate powi(x,n), the floating point value x raised to the |
839 | constant integer exponent n, we use a hybrid algorithm that | |
840 | combines the "window method" with look-up tables. For an | |
841 | introduction to exponentiation algorithms and "addition chains", | |
842 | see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth, | |
843 | "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming", | |
844 | 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation | |
845 | Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */ | |
846 | ||
847 | /* Provide a default value for POWI_MAX_MULTS, the maximum number of | |
848 | multiplications to inline before calling the system library's pow | |
849 | function. powi(x,n) requires at worst 2*bits(n)-2 multiplications, | |
850 | so this default never requires calling pow, powf or powl. */ | |
851 | ||
852 | #ifndef POWI_MAX_MULTS | |
853 | #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2) | |
854 | #endif | |
855 | ||
856 | /* The size of the "optimal power tree" lookup table. All | |
857 | exponents less than this value are simply looked up in the | |
858 | powi_table below. This threshold is also used to size the | |
859 | cache of pseudo registers that hold intermediate results. */ | |
860 | #define POWI_TABLE_SIZE 256 | |
861 | ||
862 | /* The size, in bits of the window, used in the "window method" | |
863 | exponentiation algorithm. This is equivalent to a radix of | |
864 | (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */ | |
865 | #define POWI_WINDOW_SIZE 3 | |
866 | ||
867 | /* The following table is an efficient representation of an | |
868 | "optimal power tree". For each value, i, the corresponding | |
869 | value, j, in the table states than an optimal evaluation | |
870 | sequence for calculating pow(x,i) can be found by evaluating | |
871 | pow(x,j)*pow(x,i-j). An optimal power tree for the first | |
872 | 100 integers is given in Knuth's "Seminumerical algorithms". */ | |
873 | ||
874 | static const unsigned char powi_table[POWI_TABLE_SIZE] = | |
875 | { | |
876 | 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */ | |
877 | 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */ | |
878 | 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */ | |
879 | 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */ | |
880 | 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */ | |
881 | 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */ | |
882 | 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */ | |
883 | 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */ | |
884 | 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */ | |
885 | 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */ | |
886 | 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */ | |
887 | 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */ | |
888 | 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */ | |
889 | 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */ | |
890 | 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */ | |
891 | 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */ | |
892 | 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */ | |
893 | 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */ | |
894 | 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */ | |
895 | 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */ | |
896 | 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */ | |
897 | 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */ | |
898 | 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */ | |
899 | 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */ | |
900 | 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */ | |
901 | 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */ | |
902 | 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */ | |
903 | 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */ | |
904 | 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */ | |
905 | 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */ | |
906 | 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */ | |
907 | 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */ | |
908 | }; | |
909 | ||
910 | ||
911 | /* Return the number of multiplications required to calculate | |
912 | powi(x,n) where n is less than POWI_TABLE_SIZE. This is a | |
913 | subroutine of powi_cost. CACHE is an array indicating | |
914 | which exponents have already been calculated. */ | |
915 | ||
916 | static int | |
917 | powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache) | |
918 | { | |
919 | /* If we've already calculated this exponent, then this evaluation | |
920 | doesn't require any additional multiplications. */ | |
921 | if (cache[n]) | |
922 | return 0; | |
923 | ||
924 | cache[n] = true; | |
925 | return powi_lookup_cost (n - powi_table[n], cache) | |
926 | + powi_lookup_cost (powi_table[n], cache) + 1; | |
927 | } | |
928 | ||
929 | /* Return the number of multiplications required to calculate | |
930 | powi(x,n) for an arbitrary x, given the exponent N. This | |
931 | function needs to be kept in sync with powi_as_mults below. */ | |
932 | ||
933 | static int | |
934 | powi_cost (HOST_WIDE_INT n) | |
935 | { | |
936 | bool cache[POWI_TABLE_SIZE]; | |
937 | unsigned HOST_WIDE_INT digit; | |
938 | unsigned HOST_WIDE_INT val; | |
939 | int result; | |
940 | ||
941 | if (n == 0) | |
942 | return 0; | |
943 | ||
944 | /* Ignore the reciprocal when calculating the cost. */ | |
945 | val = (n < 0) ? -n : n; | |
946 | ||
947 | /* Initialize the exponent cache. */ | |
948 | memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool)); | |
949 | cache[1] = true; | |
950 | ||
951 | result = 0; | |
952 | ||
953 | while (val >= POWI_TABLE_SIZE) | |
954 | { | |
955 | if (val & 1) | |
956 | { | |
957 | digit = val & ((1 << POWI_WINDOW_SIZE) - 1); | |
958 | result += powi_lookup_cost (digit, cache) | |
959 | + POWI_WINDOW_SIZE + 1; | |
960 | val >>= POWI_WINDOW_SIZE; | |
961 | } | |
962 | else | |
963 | { | |
964 | val >>= 1; | |
965 | result++; | |
966 | } | |
967 | } | |
968 | ||
969 | return result + powi_lookup_cost (val, cache); | |
970 | } | |
971 | ||
972 | /* Recursive subroutine of powi_as_mults. This function takes the | |
973 | array, CACHE, of already calculated exponents and an exponent N and | |
974 | returns a tree that corresponds to CACHE[1]**N, with type TYPE. */ | |
975 | ||
976 | static tree | |
977 | powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type, | |
03d37e4e | 978 | HOST_WIDE_INT n, tree *cache) |
e9a6c4bc | 979 | { |
980 | tree op0, op1, ssa_target; | |
981 | unsigned HOST_WIDE_INT digit; | |
982 | gimple mult_stmt; | |
983 | ||
984 | if (n < POWI_TABLE_SIZE && cache[n]) | |
985 | return cache[n]; | |
986 | ||
03d37e4e | 987 | ssa_target = make_temp_ssa_name (type, NULL, "powmult"); |
e9a6c4bc | 988 | |
989 | if (n < POWI_TABLE_SIZE) | |
990 | { | |
991 | cache[n] = ssa_target; | |
03d37e4e | 992 | op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache); |
993 | op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache); | |
e9a6c4bc | 994 | } |
995 | else if (n & 1) | |
996 | { | |
997 | digit = n & ((1 << POWI_WINDOW_SIZE) - 1); | |
03d37e4e | 998 | op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache); |
999 | op1 = powi_as_mults_1 (gsi, loc, type, digit, cache); | |
e9a6c4bc | 1000 | } |
1001 | else | |
1002 | { | |
03d37e4e | 1003 | op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache); |
e9a6c4bc | 1004 | op1 = op0; |
1005 | } | |
1006 | ||
1007 | mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target, op0, op1); | |
ae43b05e | 1008 | gimple_set_location (mult_stmt, loc); |
e9a6c4bc | 1009 | gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT); |
1010 | ||
1011 | return ssa_target; | |
1012 | } | |
1013 | ||
1014 | /* Convert ARG0**N to a tree of multiplications of ARG0 with itself. | |
1015 | This function needs to be kept in sync with powi_cost above. */ | |
1016 | ||
1017 | static tree | |
1018 | powi_as_mults (gimple_stmt_iterator *gsi, location_t loc, | |
1019 | tree arg0, HOST_WIDE_INT n) | |
1020 | { | |
03d37e4e | 1021 | tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0); |
e9a6c4bc | 1022 | gimple div_stmt; |
03d37e4e | 1023 | tree target; |
e9a6c4bc | 1024 | |
1025 | if (n == 0) | |
1026 | return build_real (type, dconst1); | |
1027 | ||
1028 | memset (cache, 0, sizeof (cache)); | |
1029 | cache[1] = arg0; | |
1030 | ||
03d37e4e | 1031 | result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache); |
e9a6c4bc | 1032 | if (n >= 0) |
1033 | return result; | |
1034 | ||
1035 | /* If the original exponent was negative, reciprocate the result. */ | |
03d37e4e | 1036 | target = make_temp_ssa_name (type, NULL, "powmult"); |
e9a6c4bc | 1037 | div_stmt = gimple_build_assign_with_ops (RDIV_EXPR, target, |
1038 | build_real (type, dconst1), | |
1039 | result); | |
ae43b05e | 1040 | gimple_set_location (div_stmt, loc); |
e9a6c4bc | 1041 | gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT); |
1042 | ||
1043 | return target; | |
1044 | } | |
1045 | ||
1046 | /* ARG0 and N are the two arguments to a powi builtin in GSI with | |
1047 | location info LOC. If the arguments are appropriate, create an | |
1048 | equivalent sequence of statements prior to GSI using an optimal | |
1049 | number of multiplications, and return an expession holding the | |
1050 | result. */ | |
1051 | ||
1052 | static tree | |
1053 | gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc, | |
1054 | tree arg0, HOST_WIDE_INT n) | |
1055 | { | |
1056 | /* Avoid largest negative number. */ | |
1057 | if (n != -n | |
1058 | && ((n >= -1 && n <= 2) | |
1059 | || (optimize_function_for_speed_p (cfun) | |
1060 | && powi_cost (n) <= POWI_MAX_MULTS))) | |
1061 | return powi_as_mults (gsi, loc, arg0, n); | |
1062 | ||
1063 | return NULL_TREE; | |
1064 | } | |
1065 | ||
ae43b05e | 1066 | /* Build a gimple call statement that calls FN with argument ARG. |
03d37e4e | 1067 | Set the lhs of the call statement to a fresh SSA name. Insert the |
ae43b05e | 1068 | statement prior to GSI's current position, and return the fresh |
1069 | SSA name. */ | |
1070 | ||
1071 | static tree | |
ca12eb68 | 1072 | build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc, |
03d37e4e | 1073 | tree fn, tree arg) |
ae43b05e | 1074 | { |
1075 | gimple call_stmt; | |
1076 | tree ssa_target; | |
1077 | ||
ae43b05e | 1078 | call_stmt = gimple_build_call (fn, 1, arg); |
03d37e4e | 1079 | ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot"); |
ae43b05e | 1080 | gimple_set_lhs (call_stmt, ssa_target); |
1081 | gimple_set_location (call_stmt, loc); | |
1082 | gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT); | |
1083 | ||
1084 | return ssa_target; | |
1085 | } | |
1086 | ||
ca12eb68 | 1087 | /* Build a gimple binary operation with the given CODE and arguments |
1088 | ARG0, ARG1, assigning the result to a new SSA name for variable | |
1089 | TARGET. Insert the statement prior to GSI's current position, and | |
1090 | return the fresh SSA name.*/ | |
1091 | ||
1092 | static tree | |
1093 | build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc, | |
03d37e4e | 1094 | const char *name, enum tree_code code, |
1095 | tree arg0, tree arg1) | |
ca12eb68 | 1096 | { |
03d37e4e | 1097 | tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name); |
ca12eb68 | 1098 | gimple stmt = gimple_build_assign_with_ops (code, result, arg0, arg1); |
1099 | gimple_set_location (stmt, loc); | |
1100 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); | |
1101 | return result; | |
1102 | } | |
1103 | ||
a5c384c1 | 1104 | /* Build a gimple reference operation with the given CODE and argument |
03d37e4e | 1105 | ARG, assigning the result to a new SSA name of TYPE with NAME. |
a5c384c1 | 1106 | Insert the statement prior to GSI's current position, and return |
1107 | the fresh SSA name. */ | |
1108 | ||
1109 | static inline tree | |
1110 | build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type, | |
03d37e4e | 1111 | const char *name, enum tree_code code, tree arg0) |
a5c384c1 | 1112 | { |
03d37e4e | 1113 | tree result = make_temp_ssa_name (type, NULL, name); |
a5c384c1 | 1114 | gimple stmt = gimple_build_assign (result, build1 (code, type, arg0)); |
1115 | gimple_set_location (stmt, loc); | |
1116 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); | |
1117 | return result; | |
1118 | } | |
1119 | ||
03d37e4e | 1120 | /* Build a gimple assignment to cast VAL to TYPE. Insert the statement |
aff5fb4d | 1121 | prior to GSI's current position, and return the fresh SSA name. */ |
1122 | ||
1123 | static tree | |
1124 | build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc, | |
03d37e4e | 1125 | tree type, tree val) |
aff5fb4d | 1126 | { |
03d37e4e | 1127 | tree result = make_ssa_name (type, NULL); |
1128 | gimple stmt = gimple_build_assign_with_ops (NOP_EXPR, result, val, NULL_TREE); | |
1129 | gimple_set_location (stmt, loc); | |
1130 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); | |
1131 | return result; | |
aff5fb4d | 1132 | } |
1133 | ||
e78306af | 1134 | /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI |
1135 | with location info LOC. If possible, create an equivalent and | |
1136 | less expensive sequence of statements prior to GSI, and return an | |
1137 | expession holding the result. */ | |
1138 | ||
1139 | static tree | |
1140 | gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc, | |
1141 | tree arg0, tree arg1) | |
1142 | { | |
ae43b05e | 1143 | REAL_VALUE_TYPE c, cint, dconst1_4, dconst3_4, dconst1_3, dconst1_6; |
ca12eb68 | 1144 | REAL_VALUE_TYPE c2, dconst3; |
e78306af | 1145 | HOST_WIDE_INT n; |
ca12eb68 | 1146 | tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, result, cbrt_x, powi_cbrt_x; |
ae43b05e | 1147 | enum machine_mode mode; |
0190fe95 | 1148 | bool hw_sqrt_exists, c_is_int, c2_is_int; |
e78306af | 1149 | |
1150 | /* If the exponent isn't a constant, there's nothing of interest | |
1151 | to be done. */ | |
1152 | if (TREE_CODE (arg1) != REAL_CST) | |
1153 | return NULL_TREE; | |
1154 | ||
ae43b05e | 1155 | /* If the exponent is equivalent to an integer, expand to an optimal |
1156 | multiplication sequence when profitable. */ | |
e78306af | 1157 | c = TREE_REAL_CST (arg1); |
1158 | n = real_to_integer (&c); | |
1159 | real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0); | |
0190fe95 | 1160 | c_is_int = real_identical (&c, &cint); |
e78306af | 1161 | |
0190fe95 | 1162 | if (c_is_int |
e78306af | 1163 | && ((n >= -1 && n <= 2) |
1164 | || (flag_unsafe_math_optimizations | |
1165 | && optimize_insn_for_speed_p () | |
1166 | && powi_cost (n) <= POWI_MAX_MULTS))) | |
1167 | return gimple_expand_builtin_powi (gsi, loc, arg0, n); | |
1168 | ||
ae43b05e | 1169 | /* Attempt various optimizations using sqrt and cbrt. */ |
1170 | type = TREE_TYPE (arg0); | |
1171 | mode = TYPE_MODE (type); | |
1172 | sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); | |
1173 | ||
1174 | /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe | |
1175 | unless signed zeros must be maintained. pow(-0,0.5) = +0, while | |
1176 | sqrt(-0) = -0. */ | |
1177 | if (sqrtfn | |
1178 | && REAL_VALUES_EQUAL (c, dconsthalf) | |
1179 | && !HONOR_SIGNED_ZEROS (mode)) | |
03d37e4e | 1180 | return build_and_insert_call (gsi, loc, sqrtfn, arg0); |
ae43b05e | 1181 | |
1182 | /* Optimize pow(x,0.25) = sqrt(sqrt(x)). Assume on most machines that | |
1183 | a builtin sqrt instruction is smaller than a call to pow with 0.25, | |
1184 | so do this optimization even if -Os. Don't do this optimization | |
1185 | if we don't have a hardware sqrt insn. */ | |
1186 | dconst1_4 = dconst1; | |
1187 | SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2); | |
a5c384c1 | 1188 | hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing; |
ae43b05e | 1189 | |
1190 | if (flag_unsafe_math_optimizations | |
1191 | && sqrtfn | |
1192 | && REAL_VALUES_EQUAL (c, dconst1_4) | |
1193 | && hw_sqrt_exists) | |
1194 | { | |
1195 | /* sqrt(x) */ | |
03d37e4e | 1196 | sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0); |
ae43b05e | 1197 | |
1198 | /* sqrt(sqrt(x)) */ | |
03d37e4e | 1199 | return build_and_insert_call (gsi, loc, sqrtfn, sqrt_arg0); |
ae43b05e | 1200 | } |
1201 | ||
1202 | /* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)) unless we are | |
1203 | optimizing for space. Don't do this optimization if we don't have | |
1204 | a hardware sqrt insn. */ | |
1205 | real_from_integer (&dconst3_4, VOIDmode, 3, 0, 0); | |
1206 | SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2); | |
1207 | ||
1208 | if (flag_unsafe_math_optimizations | |
1209 | && sqrtfn | |
1210 | && optimize_function_for_speed_p (cfun) | |
1211 | && REAL_VALUES_EQUAL (c, dconst3_4) | |
1212 | && hw_sqrt_exists) | |
1213 | { | |
1214 | /* sqrt(x) */ | |
03d37e4e | 1215 | sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0); |
ae43b05e | 1216 | |
1217 | /* sqrt(sqrt(x)) */ | |
03d37e4e | 1218 | sqrt_sqrt = build_and_insert_call (gsi, loc, sqrtfn, sqrt_arg0); |
ae43b05e | 1219 | |
1220 | /* sqrt(x) * sqrt(sqrt(x)) */ | |
03d37e4e | 1221 | return build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, |
ca12eb68 | 1222 | sqrt_arg0, sqrt_sqrt); |
ae43b05e | 1223 | } |
1224 | ||
1225 | /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math | |
1226 | optimizations since 1./3. is not exactly representable. If x | |
1227 | is negative and finite, the correct value of pow(x,1./3.) is | |
1228 | a NaN with the "invalid" exception raised, because the value | |
1229 | of 1./3. actually has an even denominator. The correct value | |
1230 | of cbrt(x) is a negative real value. */ | |
1231 | cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT); | |
1232 | dconst1_3 = real_value_truncate (mode, dconst_third ()); | |
1233 | ||
1234 | if (flag_unsafe_math_optimizations | |
1235 | && cbrtfn | |
0b7ad900 | 1236 | && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode)) |
ae43b05e | 1237 | && REAL_VALUES_EQUAL (c, dconst1_3)) |
03d37e4e | 1238 | return build_and_insert_call (gsi, loc, cbrtfn, arg0); |
ae43b05e | 1239 | |
1240 | /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization | |
1241 | if we don't have a hardware sqrt insn. */ | |
1242 | dconst1_6 = dconst1_3; | |
1243 | SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1); | |
1244 | ||
1245 | if (flag_unsafe_math_optimizations | |
1246 | && sqrtfn | |
1247 | && cbrtfn | |
0b7ad900 | 1248 | && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode)) |
ae43b05e | 1249 | && optimize_function_for_speed_p (cfun) |
1250 | && hw_sqrt_exists | |
1251 | && REAL_VALUES_EQUAL (c, dconst1_6)) | |
1252 | { | |
1253 | /* sqrt(x) */ | |
03d37e4e | 1254 | sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0); |
ae43b05e | 1255 | |
1256 | /* cbrt(sqrt(x)) */ | |
03d37e4e | 1257 | return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0); |
ca12eb68 | 1258 | } |
1259 | ||
0190fe95 | 1260 | /* Optimize pow(x,c), where n = 2c for some nonzero integer n |
1261 | and c not an integer, into | |
ca12eb68 | 1262 | |
1263 | sqrt(x) * powi(x, n/2), n > 0; | |
1264 | 1.0 / (sqrt(x) * powi(x, abs(n/2))), n < 0. | |
1265 | ||
1266 | Do not calculate the powi factor when n/2 = 0. */ | |
1267 | real_arithmetic (&c2, MULT_EXPR, &c, &dconst2); | |
1268 | n = real_to_integer (&c2); | |
1269 | real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0); | |
0190fe95 | 1270 | c2_is_int = real_identical (&c2, &cint); |
ca12eb68 | 1271 | |
1272 | if (flag_unsafe_math_optimizations | |
1273 | && sqrtfn | |
0190fe95 | 1274 | && c2_is_int |
1275 | && !c_is_int | |
1276 | && optimize_function_for_speed_p (cfun)) | |
ca12eb68 | 1277 | { |
1278 | tree powi_x_ndiv2 = NULL_TREE; | |
1279 | ||
1280 | /* Attempt to fold powi(arg0, abs(n/2)) into multiplies. If not | |
1281 | possible or profitable, give up. Skip the degenerate case when | |
1282 | n is 1 or -1, where the result is always 1. */ | |
b1757d46 | 1283 | if (absu_hwi (n) != 1) |
ca12eb68 | 1284 | { |
5ebd604f | 1285 | powi_x_ndiv2 = gimple_expand_builtin_powi (gsi, loc, arg0, |
1286 | abs_hwi (n / 2)); | |
ca12eb68 | 1287 | if (!powi_x_ndiv2) |
1288 | return NULL_TREE; | |
1289 | } | |
1290 | ||
1291 | /* Calculate sqrt(x). When n is not 1 or -1, multiply it by the | |
1292 | result of the optimal multiply sequence just calculated. */ | |
03d37e4e | 1293 | sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0); |
ca12eb68 | 1294 | |
b1757d46 | 1295 | if (absu_hwi (n) == 1) |
ca12eb68 | 1296 | result = sqrt_arg0; |
1297 | else | |
03d37e4e | 1298 | result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, |
ca12eb68 | 1299 | sqrt_arg0, powi_x_ndiv2); |
1300 | ||
1301 | /* If n is negative, reciprocate the result. */ | |
1302 | if (n < 0) | |
03d37e4e | 1303 | result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR, |
ca12eb68 | 1304 | build_real (type, dconst1), result); |
1305 | return result; | |
1306 | } | |
1307 | ||
1308 | /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into | |
1309 | ||
1310 | powi(x, n/3) * powi(cbrt(x), n%3), n > 0; | |
1311 | 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0. | |
1312 | ||
1313 | Do not calculate the first factor when n/3 = 0. As cbrt(x) is | |
1314 | different from pow(x, 1./3.) due to rounding and behavior with | |
1315 | negative x, we need to constrain this transformation to unsafe | |
1316 | math and positive x or finite math. */ | |
1317 | real_from_integer (&dconst3, VOIDmode, 3, 0, 0); | |
1318 | real_arithmetic (&c2, MULT_EXPR, &c, &dconst3); | |
1319 | real_round (&c2, mode, &c2); | |
1320 | n = real_to_integer (&c2); | |
1321 | real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0); | |
1322 | real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3); | |
1323 | real_convert (&c2, mode, &c2); | |
1324 | ||
1325 | if (flag_unsafe_math_optimizations | |
1326 | && cbrtfn | |
0b7ad900 | 1327 | && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode)) |
ca12eb68 | 1328 | && real_identical (&c2, &c) |
0190fe95 | 1329 | && !c2_is_int |
ca12eb68 | 1330 | && optimize_function_for_speed_p (cfun) |
1331 | && powi_cost (n / 3) <= POWI_MAX_MULTS) | |
1332 | { | |
1333 | tree powi_x_ndiv3 = NULL_TREE; | |
1334 | ||
1335 | /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not | |
1336 | possible or profitable, give up. Skip the degenerate case when | |
1337 | abs(n) < 3, where the result is always 1. */ | |
b1757d46 | 1338 | if (absu_hwi (n) >= 3) |
ca12eb68 | 1339 | { |
1340 | powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0, | |
5ebd604f | 1341 | abs_hwi (n / 3)); |
ca12eb68 | 1342 | if (!powi_x_ndiv3) |
1343 | return NULL_TREE; | |
1344 | } | |
1345 | ||
1346 | /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi | |
1347 | as that creates an unnecessary variable. Instead, just produce | |
1348 | either cbrt(x) or cbrt(x) * cbrt(x). */ | |
03d37e4e | 1349 | cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0); |
ca12eb68 | 1350 | |
b1757d46 | 1351 | if (absu_hwi (n) % 3 == 1) |
ca12eb68 | 1352 | powi_cbrt_x = cbrt_x; |
1353 | else | |
03d37e4e | 1354 | powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, |
ca12eb68 | 1355 | cbrt_x, cbrt_x); |
1356 | ||
1357 | /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */ | |
b1757d46 | 1358 | if (absu_hwi (n) < 3) |
ca12eb68 | 1359 | result = powi_cbrt_x; |
1360 | else | |
03d37e4e | 1361 | result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, |
ca12eb68 | 1362 | powi_x_ndiv3, powi_cbrt_x); |
1363 | ||
1364 | /* If n is negative, reciprocate the result. */ | |
1365 | if (n < 0) | |
03d37e4e | 1366 | result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR, |
ca12eb68 | 1367 | build_real (type, dconst1), result); |
1368 | ||
1369 | return result; | |
ae43b05e | 1370 | } |
1371 | ||
ca12eb68 | 1372 | /* No optimizations succeeded. */ |
e78306af | 1373 | return NULL_TREE; |
1374 | } | |
1375 | ||
a5c384c1 | 1376 | /* ARG is the argument to a cabs builtin call in GSI with location info |
1377 | LOC. Create a sequence of statements prior to GSI that calculates | |
1378 | sqrt(R*R + I*I), where R and I are the real and imaginary components | |
1379 | of ARG, respectively. Return an expression holding the result. */ | |
1380 | ||
1381 | static tree | |
1382 | gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg) | |
1383 | { | |
03d37e4e | 1384 | tree real_part, imag_part, addend1, addend2, sum, result; |
a5c384c1 | 1385 | tree type = TREE_TYPE (TREE_TYPE (arg)); |
1386 | tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); | |
1387 | enum machine_mode mode = TYPE_MODE (type); | |
1388 | ||
1389 | if (!flag_unsafe_math_optimizations | |
1390 | || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi))) | |
1391 | || !sqrtfn | |
1392 | || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing) | |
1393 | return NULL_TREE; | |
1394 | ||
03d37e4e | 1395 | real_part = build_and_insert_ref (gsi, loc, type, "cabs", |
a5c384c1 | 1396 | REALPART_EXPR, arg); |
03d37e4e | 1397 | addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR, |
a5c384c1 | 1398 | real_part, real_part); |
03d37e4e | 1399 | imag_part = build_and_insert_ref (gsi, loc, type, "cabs", |
a5c384c1 | 1400 | IMAGPART_EXPR, arg); |
03d37e4e | 1401 | addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR, |
a5c384c1 | 1402 | imag_part, imag_part); |
03d37e4e | 1403 | sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2); |
1404 | result = build_and_insert_call (gsi, loc, sqrtfn, sum); | |
a5c384c1 | 1405 | |
1406 | return result; | |
1407 | } | |
1408 | ||
a0315874 | 1409 | /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1 |
e9a6c4bc | 1410 | on the SSA_NAME argument of each of them. Also expand powi(x,n) into |
1411 | an optimal number of multiplies, when n is a constant. */ | |
a0315874 | 1412 | |
1413 | static unsigned int | |
1414 | execute_cse_sincos (void) | |
1415 | { | |
1416 | basic_block bb; | |
4c80086d | 1417 | bool cfg_changed = false; |
a0315874 | 1418 | |
1419 | calculate_dominance_info (CDI_DOMINATORS); | |
30c4e60d | 1420 | memset (&sincos_stats, 0, sizeof (sincos_stats)); |
a0315874 | 1421 | |
1422 | FOR_EACH_BB (bb) | |
1423 | { | |
75a70cf9 | 1424 | gimple_stmt_iterator gsi; |
2a155cf0 | 1425 | bool cleanup_eh = false; |
a0315874 | 1426 | |
75a70cf9 | 1427 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
a0315874 | 1428 | { |
75a70cf9 | 1429 | gimple stmt = gsi_stmt (gsi); |
a0315874 | 1430 | tree fndecl; |
1431 | ||
2a155cf0 | 1432 | /* Only the last stmt in a bb could throw, no need to call |
1433 | gimple_purge_dead_eh_edges if we change something in the middle | |
1434 | of a basic block. */ | |
1435 | cleanup_eh = false; | |
1436 | ||
75a70cf9 | 1437 | if (is_gimple_call (stmt) |
1438 | && gimple_call_lhs (stmt) | |
1439 | && (fndecl = gimple_call_fndecl (stmt)) | |
a0315874 | 1440 | && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL) |
1441 | { | |
e9a6c4bc | 1442 | tree arg, arg0, arg1, result; |
1443 | HOST_WIDE_INT n; | |
1444 | location_t loc; | |
a0315874 | 1445 | |
1446 | switch (DECL_FUNCTION_CODE (fndecl)) | |
1447 | { | |
1448 | CASE_FLT_FN (BUILT_IN_COS): | |
1449 | CASE_FLT_FN (BUILT_IN_SIN): | |
1450 | CASE_FLT_FN (BUILT_IN_CEXPI): | |
d312d7df | 1451 | /* Make sure we have either sincos or cexp. */ |
30f690e0 | 1452 | if (!targetm.libc_has_function (function_c99_math_complex) |
1453 | && !targetm.libc_has_function (function_sincos)) | |
d312d7df | 1454 | break; |
1455 | ||
75a70cf9 | 1456 | arg = gimple_call_arg (stmt, 0); |
a0315874 | 1457 | if (TREE_CODE (arg) == SSA_NAME) |
4c80086d | 1458 | cfg_changed |= execute_cse_sincos_1 (arg); |
a0315874 | 1459 | break; |
1460 | ||
e78306af | 1461 | CASE_FLT_FN (BUILT_IN_POW): |
1462 | arg0 = gimple_call_arg (stmt, 0); | |
1463 | arg1 = gimple_call_arg (stmt, 1); | |
1464 | ||
1465 | loc = gimple_location (stmt); | |
1466 | result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1); | |
1467 | ||
1468 | if (result) | |
1469 | { | |
1470 | tree lhs = gimple_get_lhs (stmt); | |
1471 | gimple new_stmt = gimple_build_assign (lhs, result); | |
1472 | gimple_set_location (new_stmt, loc); | |
1473 | unlink_stmt_vdef (stmt); | |
1474 | gsi_replace (&gsi, new_stmt, true); | |
2a155cf0 | 1475 | cleanup_eh = true; |
bc8a8451 | 1476 | if (gimple_vdef (stmt)) |
1477 | release_ssa_name (gimple_vdef (stmt)); | |
e78306af | 1478 | } |
1479 | break; | |
1480 | ||
e9a6c4bc | 1481 | CASE_FLT_FN (BUILT_IN_POWI): |
1482 | arg0 = gimple_call_arg (stmt, 0); | |
1483 | arg1 = gimple_call_arg (stmt, 1); | |
e9a6c4bc | 1484 | loc = gimple_location (stmt); |
377db285 | 1485 | |
6dfe7d53 | 1486 | if (real_minus_onep (arg0)) |
377db285 | 1487 | { |
1488 | tree t0, t1, cond, one, minus_one; | |
1489 | gimple stmt; | |
1490 | ||
1491 | t0 = TREE_TYPE (arg0); | |
1492 | t1 = TREE_TYPE (arg1); | |
1493 | one = build_real (t0, dconst1); | |
1494 | minus_one = build_real (t0, dconstm1); | |
1495 | ||
1496 | cond = make_temp_ssa_name (t1, NULL, "powi_cond"); | |
1497 | stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, cond, | |
1498 | arg1, | |
1499 | build_int_cst (t1, | |
1500 | 1)); | |
1501 | gimple_set_location (stmt, loc); | |
1502 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
1503 | ||
1504 | result = make_temp_ssa_name (t0, NULL, "powi"); | |
1505 | stmt = gimple_build_assign_with_ops (COND_EXPR, result, | |
1506 | cond, | |
1507 | minus_one, one); | |
1508 | gimple_set_location (stmt, loc); | |
1509 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
1510 | } | |
1511 | else | |
1512 | { | |
35ec552a | 1513 | if (!tree_fits_shwi_p (arg1)) |
d48be958 | 1514 | break; |
1515 | ||
8c53c46c | 1516 | n = tree_to_shwi (arg1); |
377db285 | 1517 | result = gimple_expand_builtin_powi (&gsi, loc, arg0, n); |
1518 | } | |
e9a6c4bc | 1519 | |
1520 | if (result) | |
1521 | { | |
1522 | tree lhs = gimple_get_lhs (stmt); | |
1523 | gimple new_stmt = gimple_build_assign (lhs, result); | |
1524 | gimple_set_location (new_stmt, loc); | |
a5c384c1 | 1525 | unlink_stmt_vdef (stmt); |
1526 | gsi_replace (&gsi, new_stmt, true); | |
2a155cf0 | 1527 | cleanup_eh = true; |
bc8a8451 | 1528 | if (gimple_vdef (stmt)) |
1529 | release_ssa_name (gimple_vdef (stmt)); | |
a5c384c1 | 1530 | } |
1531 | break; | |
1532 | ||
1533 | CASE_FLT_FN (BUILT_IN_CABS): | |
1534 | arg0 = gimple_call_arg (stmt, 0); | |
1535 | loc = gimple_location (stmt); | |
1536 | result = gimple_expand_builtin_cabs (&gsi, loc, arg0); | |
1537 | ||
1538 | if (result) | |
1539 | { | |
1540 | tree lhs = gimple_get_lhs (stmt); | |
1541 | gimple new_stmt = gimple_build_assign (lhs, result); | |
1542 | gimple_set_location (new_stmt, loc); | |
e9a6c4bc | 1543 | unlink_stmt_vdef (stmt); |
1544 | gsi_replace (&gsi, new_stmt, true); | |
2a155cf0 | 1545 | cleanup_eh = true; |
bc8a8451 | 1546 | if (gimple_vdef (stmt)) |
1547 | release_ssa_name (gimple_vdef (stmt)); | |
e9a6c4bc | 1548 | } |
1549 | break; | |
1550 | ||
a0315874 | 1551 | default:; |
1552 | } | |
1553 | } | |
1554 | } | |
2a155cf0 | 1555 | if (cleanup_eh) |
1556 | cfg_changed |= gimple_purge_dead_eh_edges (bb); | |
a0315874 | 1557 | } |
1558 | ||
30c4e60d | 1559 | statistics_counter_event (cfun, "sincos statements inserted", |
1560 | sincos_stats.inserted); | |
1561 | ||
a0315874 | 1562 | free_dominance_info (CDI_DOMINATORS); |
4c80086d | 1563 | return cfg_changed ? TODO_cleanup_cfg : 0; |
a0315874 | 1564 | } |
1565 | ||
1566 | static bool | |
1567 | gate_cse_sincos (void) | |
1568 | { | |
e9a6c4bc | 1569 | /* We no longer require either sincos or cexp, since powi expansion |
1570 | piggybacks on this pass. */ | |
1571 | return optimize; | |
a0315874 | 1572 | } |
1573 | ||
cbe8bda8 | 1574 | namespace { |
1575 | ||
1576 | const pass_data pass_data_cse_sincos = | |
a0315874 | 1577 | { |
cbe8bda8 | 1578 | GIMPLE_PASS, /* type */ |
1579 | "sincos", /* name */ | |
1580 | OPTGROUP_NONE, /* optinfo_flags */ | |
1581 | true, /* has_gate */ | |
1582 | true, /* has_execute */ | |
1583 | TV_NONE, /* tv_id */ | |
1584 | PROP_ssa, /* properties_required */ | |
1585 | 0, /* properties_provided */ | |
1586 | 0, /* properties_destroyed */ | |
1587 | 0, /* todo_flags_start */ | |
1588 | ( TODO_update_ssa | TODO_verify_ssa | |
1589 | | TODO_verify_stmts ), /* todo_flags_finish */ | |
a0315874 | 1590 | }; |
e174638f | 1591 | |
cbe8bda8 | 1592 | class pass_cse_sincos : public gimple_opt_pass |
1593 | { | |
1594 | public: | |
9af5ce0c | 1595 | pass_cse_sincos (gcc::context *ctxt) |
1596 | : gimple_opt_pass (pass_data_cse_sincos, ctxt) | |
cbe8bda8 | 1597 | {} |
1598 | ||
1599 | /* opt_pass methods: */ | |
1600 | bool gate () { return gate_cse_sincos (); } | |
1601 | unsigned int execute () { return execute_cse_sincos (); } | |
1602 | ||
1603 | }; // class pass_cse_sincos | |
1604 | ||
1605 | } // anon namespace | |
1606 | ||
1607 | gimple_opt_pass * | |
1608 | make_pass_cse_sincos (gcc::context *ctxt) | |
1609 | { | |
1610 | return new pass_cse_sincos (ctxt); | |
1611 | } | |
1612 | ||
84cc784c | 1613 | /* A symbolic number is used to detect byte permutation and selection |
1614 | patterns. Therefore the field N contains an artificial number | |
1615 | consisting of byte size markers: | |
1616 | ||
1617 | 0 - byte has the value 0 | |
1618 | 1..size - byte contains the content of the byte | |
1619 | number indexed with that value minus one */ | |
1620 | ||
1621 | struct symbolic_number { | |
1622 | unsigned HOST_WIDEST_INT n; | |
1623 | int size; | |
1624 | }; | |
1625 | ||
1626 | /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic | |
1627 | number N. Return false if the requested operation is not permitted | |
1628 | on a symbolic number. */ | |
1629 | ||
1630 | static inline bool | |
1631 | do_shift_rotate (enum tree_code code, | |
1632 | struct symbolic_number *n, | |
1633 | int count) | |
1634 | { | |
1635 | if (count % 8 != 0) | |
1636 | return false; | |
1637 | ||
1638 | /* Zero out the extra bits of N in order to avoid them being shifted | |
1639 | into the significant bits. */ | |
1640 | if (n->size < (int)sizeof (HOST_WIDEST_INT)) | |
1641 | n->n &= ((unsigned HOST_WIDEST_INT)1 << (n->size * BITS_PER_UNIT)) - 1; | |
1642 | ||
1643 | switch (code) | |
1644 | { | |
1645 | case LSHIFT_EXPR: | |
1646 | n->n <<= count; | |
1647 | break; | |
1648 | case RSHIFT_EXPR: | |
1649 | n->n >>= count; | |
1650 | break; | |
1651 | case LROTATE_EXPR: | |
1652 | n->n = (n->n << count) | (n->n >> ((n->size * BITS_PER_UNIT) - count)); | |
1653 | break; | |
1654 | case RROTATE_EXPR: | |
1655 | n->n = (n->n >> count) | (n->n << ((n->size * BITS_PER_UNIT) - count)); | |
1656 | break; | |
1657 | default: | |
1658 | return false; | |
1659 | } | |
0f09ed00 | 1660 | /* Zero unused bits for size. */ |
1661 | if (n->size < (int)sizeof (HOST_WIDEST_INT)) | |
1662 | n->n &= ((unsigned HOST_WIDEST_INT)1 << (n->size * BITS_PER_UNIT)) - 1; | |
84cc784c | 1663 | return true; |
1664 | } | |
1665 | ||
1666 | /* Perform sanity checking for the symbolic number N and the gimple | |
1667 | statement STMT. */ | |
1668 | ||
1669 | static inline bool | |
1670 | verify_symbolic_number_p (struct symbolic_number *n, gimple stmt) | |
1671 | { | |
1672 | tree lhs_type; | |
1673 | ||
1674 | lhs_type = gimple_expr_type (stmt); | |
1675 | ||
1676 | if (TREE_CODE (lhs_type) != INTEGER_TYPE) | |
1677 | return false; | |
1678 | ||
1679 | if (TYPE_PRECISION (lhs_type) != n->size * BITS_PER_UNIT) | |
1680 | return false; | |
1681 | ||
1682 | return true; | |
1683 | } | |
1684 | ||
1685 | /* find_bswap_1 invokes itself recursively with N and tries to perform | |
1686 | the operation given by the rhs of STMT on the result. If the | |
1687 | operation could successfully be executed the function returns the | |
1688 | tree expression of the source operand and NULL otherwise. */ | |
1689 | ||
1690 | static tree | |
1691 | find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit) | |
1692 | { | |
1693 | enum tree_code code; | |
1694 | tree rhs1, rhs2 = NULL; | |
1695 | gimple rhs1_stmt, rhs2_stmt; | |
1696 | tree source_expr1; | |
1697 | enum gimple_rhs_class rhs_class; | |
1698 | ||
1699 | if (!limit || !is_gimple_assign (stmt)) | |
1700 | return NULL_TREE; | |
1701 | ||
1702 | rhs1 = gimple_assign_rhs1 (stmt); | |
1703 | ||
1704 | if (TREE_CODE (rhs1) != SSA_NAME) | |
1705 | return NULL_TREE; | |
1706 | ||
1707 | code = gimple_assign_rhs_code (stmt); | |
1708 | rhs_class = gimple_assign_rhs_class (stmt); | |
1709 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); | |
1710 | ||
1711 | if (rhs_class == GIMPLE_BINARY_RHS) | |
1712 | rhs2 = gimple_assign_rhs2 (stmt); | |
1713 | ||
1714 | /* Handle unary rhs and binary rhs with integer constants as second | |
1715 | operand. */ | |
1716 | ||
1717 | if (rhs_class == GIMPLE_UNARY_RHS | |
1718 | || (rhs_class == GIMPLE_BINARY_RHS | |
1719 | && TREE_CODE (rhs2) == INTEGER_CST)) | |
1720 | { | |
1721 | if (code != BIT_AND_EXPR | |
1722 | && code != LSHIFT_EXPR | |
1723 | && code != RSHIFT_EXPR | |
1724 | && code != LROTATE_EXPR | |
1725 | && code != RROTATE_EXPR | |
1726 | && code != NOP_EXPR | |
1727 | && code != CONVERT_EXPR) | |
1728 | return NULL_TREE; | |
1729 | ||
1730 | source_expr1 = find_bswap_1 (rhs1_stmt, n, limit - 1); | |
1731 | ||
1732 | /* If find_bswap_1 returned NULL STMT is a leaf node and we have | |
1733 | to initialize the symbolic number. */ | |
1734 | if (!source_expr1) | |
1735 | { | |
1736 | /* Set up the symbolic number N by setting each byte to a | |
1737 | value between 1 and the byte size of rhs1. The highest | |
f9a210c9 | 1738 | order byte is set to n->size and the lowest order |
1739 | byte to 1. */ | |
84cc784c | 1740 | n->size = TYPE_PRECISION (TREE_TYPE (rhs1)); |
1741 | if (n->size % BITS_PER_UNIT != 0) | |
1742 | return NULL_TREE; | |
1743 | n->size /= BITS_PER_UNIT; | |
1744 | n->n = (sizeof (HOST_WIDEST_INT) < 8 ? 0 : | |
f9a210c9 | 1745 | (unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201); |
1746 | ||
1747 | if (n->size < (int)sizeof (HOST_WIDEST_INT)) | |
1748 | n->n &= ((unsigned HOST_WIDEST_INT)1 << | |
1749 | (n->size * BITS_PER_UNIT)) - 1; | |
84cc784c | 1750 | |
1751 | source_expr1 = rhs1; | |
1752 | } | |
1753 | ||
1754 | switch (code) | |
1755 | { | |
1756 | case BIT_AND_EXPR: | |
1757 | { | |
1758 | int i; | |
1759 | unsigned HOST_WIDEST_INT val = widest_int_cst_value (rhs2); | |
1760 | unsigned HOST_WIDEST_INT tmp = val; | |
1761 | ||
1762 | /* Only constants masking full bytes are allowed. */ | |
1763 | for (i = 0; i < n->size; i++, tmp >>= BITS_PER_UNIT) | |
1764 | if ((tmp & 0xff) != 0 && (tmp & 0xff) != 0xff) | |
1765 | return NULL_TREE; | |
1766 | ||
1767 | n->n &= val; | |
1768 | } | |
1769 | break; | |
1770 | case LSHIFT_EXPR: | |
1771 | case RSHIFT_EXPR: | |
1772 | case LROTATE_EXPR: | |
1773 | case RROTATE_EXPR: | |
1774 | if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2))) | |
1775 | return NULL_TREE; | |
1776 | break; | |
1777 | CASE_CONVERT: | |
1778 | { | |
1779 | int type_size; | |
1780 | ||
1781 | type_size = TYPE_PRECISION (gimple_expr_type (stmt)); | |
1782 | if (type_size % BITS_PER_UNIT != 0) | |
1783 | return NULL_TREE; | |
1784 | ||
84cc784c | 1785 | if (type_size / BITS_PER_UNIT < (int)(sizeof (HOST_WIDEST_INT))) |
1786 | { | |
1787 | /* If STMT casts to a smaller type mask out the bits not | |
1788 | belonging to the target type. */ | |
84cc784c | 1789 | n->n &= ((unsigned HOST_WIDEST_INT)1 << type_size) - 1; |
1790 | } | |
f9a210c9 | 1791 | n->size = type_size / BITS_PER_UNIT; |
84cc784c | 1792 | } |
1793 | break; | |
1794 | default: | |
1795 | return NULL_TREE; | |
1796 | }; | |
1797 | return verify_symbolic_number_p (n, stmt) ? source_expr1 : NULL; | |
1798 | } | |
1799 | ||
1800 | /* Handle binary rhs. */ | |
1801 | ||
1802 | if (rhs_class == GIMPLE_BINARY_RHS) | |
1803 | { | |
1804 | struct symbolic_number n1, n2; | |
1805 | tree source_expr2; | |
1806 | ||
1807 | if (code != BIT_IOR_EXPR) | |
1808 | return NULL_TREE; | |
1809 | ||
1810 | if (TREE_CODE (rhs2) != SSA_NAME) | |
1811 | return NULL_TREE; | |
1812 | ||
1813 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); | |
1814 | ||
1815 | switch (code) | |
1816 | { | |
1817 | case BIT_IOR_EXPR: | |
1818 | source_expr1 = find_bswap_1 (rhs1_stmt, &n1, limit - 1); | |
1819 | ||
1820 | if (!source_expr1) | |
1821 | return NULL_TREE; | |
1822 | ||
1823 | source_expr2 = find_bswap_1 (rhs2_stmt, &n2, limit - 1); | |
1824 | ||
1825 | if (source_expr1 != source_expr2 | |
1826 | || n1.size != n2.size) | |
1827 | return NULL_TREE; | |
1828 | ||
1829 | n->size = n1.size; | |
1830 | n->n = n1.n | n2.n; | |
1831 | ||
1832 | if (!verify_symbolic_number_p (n, stmt)) | |
1833 | return NULL_TREE; | |
1834 | ||
1835 | break; | |
1836 | default: | |
1837 | return NULL_TREE; | |
1838 | } | |
1839 | return source_expr1; | |
1840 | } | |
1841 | return NULL_TREE; | |
1842 | } | |
1843 | ||
1844 | /* Check if STMT completes a bswap implementation consisting of ORs, | |
1845 | SHIFTs and ANDs. Return the source tree expression on which the | |
1846 | byte swap is performed and NULL if no bswap was found. */ | |
1847 | ||
1848 | static tree | |
1849 | find_bswap (gimple stmt) | |
1850 | { | |
1851 | /* The number which the find_bswap result should match in order to | |
f9a210c9 | 1852 | have a full byte swap. The number is shifted to the left according |
1853 | to the size of the symbolic number before using it. */ | |
84cc784c | 1854 | unsigned HOST_WIDEST_INT cmp = |
1855 | sizeof (HOST_WIDEST_INT) < 8 ? 0 : | |
f9a210c9 | 1856 | (unsigned HOST_WIDEST_INT)0x01020304 << 32 | 0x05060708; |
84cc784c | 1857 | |
1858 | struct symbolic_number n; | |
1859 | tree source_expr; | |
0f09ed00 | 1860 | int limit; |
84cc784c | 1861 | |
9bc1852a | 1862 | /* The last parameter determines the depth search limit. It usually |
1863 | correlates directly to the number of bytes to be touched. We | |
0f09ed00 | 1864 | increase that number by three here in order to also |
1865 | cover signed -> unsigned converions of the src operand as can be seen | |
1866 | in libgcc, and for initial shift/and operation of the src operand. */ | |
1867 | limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt))); | |
1868 | limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit); | |
1869 | source_expr = find_bswap_1 (stmt, &n, limit); | |
84cc784c | 1870 | |
1871 | if (!source_expr) | |
1872 | return NULL_TREE; | |
1873 | ||
1874 | /* Zero out the extra bits of N and CMP. */ | |
1875 | if (n.size < (int)sizeof (HOST_WIDEST_INT)) | |
1876 | { | |
1877 | unsigned HOST_WIDEST_INT mask = | |
1878 | ((unsigned HOST_WIDEST_INT)1 << (n.size * BITS_PER_UNIT)) - 1; | |
1879 | ||
1880 | n.n &= mask; | |
f9a210c9 | 1881 | cmp >>= (sizeof (HOST_WIDEST_INT) - n.size) * BITS_PER_UNIT; |
84cc784c | 1882 | } |
1883 | ||
1884 | /* A complete byte swap should make the symbolic number to start | |
1885 | with the largest digit in the highest order byte. */ | |
1886 | if (cmp != n.n) | |
1887 | return NULL_TREE; | |
1888 | ||
1889 | return source_expr; | |
1890 | } | |
1891 | ||
1892 | /* Find manual byte swap implementations and turn them into a bswap | |
1893 | builtin invokation. */ | |
1894 | ||
1895 | static unsigned int | |
1896 | execute_optimize_bswap (void) | |
1897 | { | |
1898 | basic_block bb; | |
f811051b | 1899 | bool bswap16_p, bswap32_p, bswap64_p; |
84cc784c | 1900 | bool changed = false; |
f811051b | 1901 | tree bswap16_type = NULL_TREE, bswap32_type = NULL_TREE, bswap64_type = NULL_TREE; |
84cc784c | 1902 | |
1903 | if (BITS_PER_UNIT != 8) | |
1904 | return 0; | |
1905 | ||
1906 | if (sizeof (HOST_WIDEST_INT) < 8) | |
1907 | return 0; | |
1908 | ||
f811051b | 1909 | bswap16_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP16) |
1910 | && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing); | |
b9a16870 | 1911 | bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32) |
d6bf3b14 | 1912 | && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing); |
b9a16870 | 1913 | bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64) |
d6bf3b14 | 1914 | && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing |
3328b1fb | 1915 | || (bswap32_p && word_mode == SImode))); |
84cc784c | 1916 | |
f811051b | 1917 | if (!bswap16_p && !bswap32_p && !bswap64_p) |
84cc784c | 1918 | return 0; |
1919 | ||
0af25806 | 1920 | /* Determine the argument type of the builtins. The code later on |
1921 | assumes that the return and argument type are the same. */ | |
f811051b | 1922 | if (bswap16_p) |
1923 | { | |
1924 | tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP16); | |
1925 | bswap16_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); | |
1926 | } | |
1927 | ||
0af25806 | 1928 | if (bswap32_p) |
1929 | { | |
b9a16870 | 1930 | tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32); |
0af25806 | 1931 | bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); |
1932 | } | |
1933 | ||
1934 | if (bswap64_p) | |
1935 | { | |
b9a16870 | 1936 | tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64); |
0af25806 | 1937 | bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); |
1938 | } | |
1939 | ||
30c4e60d | 1940 | memset (&bswap_stats, 0, sizeof (bswap_stats)); |
1941 | ||
84cc784c | 1942 | FOR_EACH_BB (bb) |
1943 | { | |
1944 | gimple_stmt_iterator gsi; | |
1945 | ||
0ec31268 | 1946 | /* We do a reverse scan for bswap patterns to make sure we get the |
1947 | widest match. As bswap pattern matching doesn't handle | |
1948 | previously inserted smaller bswap replacements as sub- | |
1949 | patterns, the wider variant wouldn't be detected. */ | |
1950 | for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi)) | |
84cc784c | 1951 | { |
1952 | gimple stmt = gsi_stmt (gsi); | |
0af25806 | 1953 | tree bswap_src, bswap_type; |
1954 | tree bswap_tmp; | |
84cc784c | 1955 | tree fndecl = NULL_TREE; |
1956 | int type_size; | |
1957 | gimple call; | |
1958 | ||
1959 | if (!is_gimple_assign (stmt) | |
1960 | || gimple_assign_rhs_code (stmt) != BIT_IOR_EXPR) | |
1961 | continue; | |
1962 | ||
1963 | type_size = TYPE_PRECISION (gimple_expr_type (stmt)); | |
1964 | ||
1965 | switch (type_size) | |
1966 | { | |
f811051b | 1967 | case 16: |
1968 | if (bswap16_p) | |
1969 | { | |
1970 | fndecl = builtin_decl_explicit (BUILT_IN_BSWAP16); | |
1971 | bswap_type = bswap16_type; | |
1972 | } | |
1973 | break; | |
84cc784c | 1974 | case 32: |
1975 | if (bswap32_p) | |
0af25806 | 1976 | { |
b9a16870 | 1977 | fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32); |
0af25806 | 1978 | bswap_type = bswap32_type; |
1979 | } | |
84cc784c | 1980 | break; |
1981 | case 64: | |
1982 | if (bswap64_p) | |
0af25806 | 1983 | { |
b9a16870 | 1984 | fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64); |
0af25806 | 1985 | bswap_type = bswap64_type; |
1986 | } | |
84cc784c | 1987 | break; |
1988 | default: | |
1989 | continue; | |
1990 | } | |
1991 | ||
1992 | if (!fndecl) | |
1993 | continue; | |
1994 | ||
1995 | bswap_src = find_bswap (stmt); | |
1996 | ||
1997 | if (!bswap_src) | |
1998 | continue; | |
1999 | ||
2000 | changed = true; | |
f811051b | 2001 | if (type_size == 16) |
2002 | bswap_stats.found_16bit++; | |
2003 | else if (type_size == 32) | |
30c4e60d | 2004 | bswap_stats.found_32bit++; |
2005 | else | |
2006 | bswap_stats.found_64bit++; | |
0af25806 | 2007 | |
2008 | bswap_tmp = bswap_src; | |
2009 | ||
2010 | /* Convert the src expression if necessary. */ | |
2011 | if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type)) | |
2012 | { | |
2013 | gimple convert_stmt; | |
03d37e4e | 2014 | bswap_tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc"); |
2015 | convert_stmt = gimple_build_assign_with_ops | |
2016 | (NOP_EXPR, bswap_tmp, bswap_src, NULL); | |
0af25806 | 2017 | gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT); |
2018 | } | |
2019 | ||
2020 | call = gimple_build_call (fndecl, 1, bswap_tmp); | |
2021 | ||
2022 | bswap_tmp = gimple_assign_lhs (stmt); | |
2023 | ||
2024 | /* Convert the result if necessary. */ | |
2025 | if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type)) | |
2026 | { | |
2027 | gimple convert_stmt; | |
03d37e4e | 2028 | bswap_tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst"); |
2029 | convert_stmt = gimple_build_assign_with_ops | |
2030 | (NOP_EXPR, gimple_assign_lhs (stmt), bswap_tmp, NULL); | |
0af25806 | 2031 | gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT); |
2032 | } | |
2033 | ||
2034 | gimple_call_set_lhs (call, bswap_tmp); | |
84cc784c | 2035 | |
2036 | if (dump_file) | |
2037 | { | |
2038 | fprintf (dump_file, "%d bit bswap implementation found at: ", | |
2039 | (int)type_size); | |
2040 | print_gimple_stmt (dump_file, stmt, 0, 0); | |
2041 | } | |
2042 | ||
2043 | gsi_insert_after (&gsi, call, GSI_SAME_STMT); | |
2044 | gsi_remove (&gsi, true); | |
2045 | } | |
2046 | } | |
2047 | ||
f811051b | 2048 | statistics_counter_event (cfun, "16-bit bswap implementations found", |
2049 | bswap_stats.found_16bit); | |
30c4e60d | 2050 | statistics_counter_event (cfun, "32-bit bswap implementations found", |
2051 | bswap_stats.found_32bit); | |
2052 | statistics_counter_event (cfun, "64-bit bswap implementations found", | |
2053 | bswap_stats.found_64bit); | |
2054 | ||
771e2890 | 2055 | return (changed ? TODO_update_ssa | TODO_verify_ssa |
84cc784c | 2056 | | TODO_verify_stmts : 0); |
2057 | } | |
2058 | ||
2059 | static bool | |
2060 | gate_optimize_bswap (void) | |
2061 | { | |
2062 | return flag_expensive_optimizations && optimize; | |
2063 | } | |
2064 | ||
cbe8bda8 | 2065 | namespace { |
2066 | ||
2067 | const pass_data pass_data_optimize_bswap = | |
84cc784c | 2068 | { |
cbe8bda8 | 2069 | GIMPLE_PASS, /* type */ |
2070 | "bswap", /* name */ | |
2071 | OPTGROUP_NONE, /* optinfo_flags */ | |
2072 | true, /* has_gate */ | |
2073 | true, /* has_execute */ | |
2074 | TV_NONE, /* tv_id */ | |
2075 | PROP_ssa, /* properties_required */ | |
2076 | 0, /* properties_provided */ | |
2077 | 0, /* properties_destroyed */ | |
2078 | 0, /* todo_flags_start */ | |
2079 | 0, /* todo_flags_finish */ | |
84cc784c | 2080 | }; |
62be004c | 2081 | |
cbe8bda8 | 2082 | class pass_optimize_bswap : public gimple_opt_pass |
2083 | { | |
2084 | public: | |
9af5ce0c | 2085 | pass_optimize_bswap (gcc::context *ctxt) |
2086 | : gimple_opt_pass (pass_data_optimize_bswap, ctxt) | |
cbe8bda8 | 2087 | {} |
2088 | ||
2089 | /* opt_pass methods: */ | |
2090 | bool gate () { return gate_optimize_bswap (); } | |
2091 | unsigned int execute () { return execute_optimize_bswap (); } | |
2092 | ||
2093 | }; // class pass_optimize_bswap | |
2094 | ||
2095 | } // anon namespace | |
2096 | ||
2097 | gimple_opt_pass * | |
2098 | make_pass_optimize_bswap (gcc::context *ctxt) | |
2099 | { | |
2100 | return new pass_optimize_bswap (ctxt); | |
2101 | } | |
2102 | ||
71dbd910 | 2103 | /* Return true if stmt is a type conversion operation that can be stripped |
2104 | when used in a widening multiply operation. */ | |
2105 | static bool | |
2106 | widening_mult_conversion_strippable_p (tree result_type, gimple stmt) | |
2107 | { | |
2108 | enum tree_code rhs_code = gimple_assign_rhs_code (stmt); | |
2109 | ||
2110 | if (TREE_CODE (result_type) == INTEGER_TYPE) | |
2111 | { | |
2112 | tree op_type; | |
2113 | tree inner_op_type; | |
2114 | ||
2115 | if (!CONVERT_EXPR_CODE_P (rhs_code)) | |
2116 | return false; | |
2117 | ||
2118 | op_type = TREE_TYPE (gimple_assign_lhs (stmt)); | |
2119 | ||
2120 | /* If the type of OP has the same precision as the result, then | |
2121 | we can strip this conversion. The multiply operation will be | |
2122 | selected to create the correct extension as a by-product. */ | |
2123 | if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type)) | |
2124 | return true; | |
2125 | ||
2126 | /* We can also strip a conversion if it preserves the signed-ness of | |
2127 | the operation and doesn't narrow the range. */ | |
2128 | inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); | |
2129 | ||
8f9d1531 | 2130 | /* If the inner-most type is unsigned, then we can strip any |
2131 | intermediate widening operation. If it's signed, then the | |
2132 | intermediate widening operation must also be signed. */ | |
2133 | if ((TYPE_UNSIGNED (inner_op_type) | |
2134 | || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type)) | |
71dbd910 | 2135 | && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type)) |
2136 | return true; | |
2137 | ||
2138 | return false; | |
2139 | } | |
2140 | ||
2141 | return rhs_code == FIXED_CONVERT_EXPR; | |
2142 | } | |
2143 | ||
0989f516 | 2144 | /* Return true if RHS is a suitable operand for a widening multiplication, |
2145 | assuming a target type of TYPE. | |
7e4c867e | 2146 | There are two cases: |
2147 | ||
aff5fb4d | 2148 | - RHS makes some value at least twice as wide. Store that value |
2149 | in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT. | |
7e4c867e | 2150 | |
2151 | - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so, | |
2152 | but leave *TYPE_OUT untouched. */ | |
00f4f705 | 2153 | |
2154 | static bool | |
0989f516 | 2155 | is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out, |
2156 | tree *new_rhs_out) | |
7e4c867e | 2157 | { |
2158 | gimple stmt; | |
0989f516 | 2159 | tree type1, rhs1; |
7e4c867e | 2160 | |
2161 | if (TREE_CODE (rhs) == SSA_NAME) | |
2162 | { | |
7e4c867e | 2163 | stmt = SSA_NAME_DEF_STMT (rhs); |
0989f516 | 2164 | if (is_gimple_assign (stmt)) |
2165 | { | |
71dbd910 | 2166 | if (! widening_mult_conversion_strippable_p (type, stmt)) |
0989f516 | 2167 | rhs1 = rhs; |
2168 | else | |
ffebd9c5 | 2169 | { |
2170 | rhs1 = gimple_assign_rhs1 (stmt); | |
2171 | ||
2172 | if (TREE_CODE (rhs1) == INTEGER_CST) | |
2173 | { | |
2174 | *new_rhs_out = rhs1; | |
2175 | *type_out = NULL; | |
2176 | return true; | |
2177 | } | |
2178 | } | |
0989f516 | 2179 | } |
2180 | else | |
2181 | rhs1 = rhs; | |
7e4c867e | 2182 | |
7e4c867e | 2183 | type1 = TREE_TYPE (rhs1); |
0989f516 | 2184 | |
7e4c867e | 2185 | if (TREE_CODE (type1) != TREE_CODE (type) |
aff5fb4d | 2186 | || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type)) |
7e4c867e | 2187 | return false; |
2188 | ||
2189 | *new_rhs_out = rhs1; | |
2190 | *type_out = type1; | |
2191 | return true; | |
2192 | } | |
2193 | ||
2194 | if (TREE_CODE (rhs) == INTEGER_CST) | |
2195 | { | |
2196 | *new_rhs_out = rhs; | |
2197 | *type_out = NULL; | |
2198 | return true; | |
2199 | } | |
2200 | ||
2201 | return false; | |
2202 | } | |
2203 | ||
0989f516 | 2204 | /* Return true if STMT performs a widening multiplication, assuming the |
2205 | output type is TYPE. If so, store the unwidened types of the operands | |
2206 | in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and | |
2207 | *RHS2_OUT such that converting those operands to types *TYPE1_OUT | |
2208 | and *TYPE2_OUT would give the operands of the multiplication. */ | |
7e4c867e | 2209 | |
2210 | static bool | |
4333b41f | 2211 | is_widening_mult_p (gimple stmt, |
7e4c867e | 2212 | tree *type1_out, tree *rhs1_out, |
2213 | tree *type2_out, tree *rhs2_out) | |
00f4f705 | 2214 | { |
4333b41f | 2215 | tree type = TREE_TYPE (gimple_assign_lhs (stmt)); |
2216 | ||
7e4c867e | 2217 | if (TREE_CODE (type) != INTEGER_TYPE |
2218 | && TREE_CODE (type) != FIXED_POINT_TYPE) | |
2219 | return false; | |
00f4f705 | 2220 | |
0989f516 | 2221 | if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out, |
2222 | rhs1_out)) | |
00f4f705 | 2223 | return false; |
2224 | ||
0989f516 | 2225 | if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out, |
2226 | rhs2_out)) | |
7e4c867e | 2227 | return false; |
00f4f705 | 2228 | |
7e4c867e | 2229 | if (*type1_out == NULL) |
00f4f705 | 2230 | { |
7e4c867e | 2231 | if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out)) |
00f4f705 | 2232 | return false; |
7e4c867e | 2233 | *type1_out = *type2_out; |
00f4f705 | 2234 | } |
00f4f705 | 2235 | |
7e4c867e | 2236 | if (*type2_out == NULL) |
00f4f705 | 2237 | { |
7e4c867e | 2238 | if (!int_fits_type_p (*rhs2_out, *type1_out)) |
00f4f705 | 2239 | return false; |
7e4c867e | 2240 | *type2_out = *type1_out; |
00f4f705 | 2241 | } |
00f4f705 | 2242 | |
287c271c | 2243 | /* Ensure that the larger of the two operands comes first. */ |
2244 | if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out)) | |
2245 | { | |
2246 | tree tmp; | |
2247 | tmp = *type1_out; | |
2248 | *type1_out = *type2_out; | |
2249 | *type2_out = tmp; | |
2250 | tmp = *rhs1_out; | |
2251 | *rhs1_out = *rhs2_out; | |
2252 | *rhs2_out = tmp; | |
2253 | } | |
aff5fb4d | 2254 | |
7e4c867e | 2255 | return true; |
2256 | } | |
00f4f705 | 2257 | |
7e4c867e | 2258 | /* Process a single gimple statement STMT, which has a MULT_EXPR as |
2259 | its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return | |
2260 | value is true iff we converted the statement. */ | |
2261 | ||
2262 | static bool | |
aff5fb4d | 2263 | convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi) |
7e4c867e | 2264 | { |
03d37e4e | 2265 | tree lhs, rhs1, rhs2, type, type1, type2; |
7e4c867e | 2266 | enum insn_code handler; |
aff5fb4d | 2267 | enum machine_mode to_mode, from_mode, actual_mode; |
5a574e8b | 2268 | optab op; |
aff5fb4d | 2269 | int actual_precision; |
2270 | location_t loc = gimple_location (stmt); | |
3f2ab719 | 2271 | bool from_unsigned1, from_unsigned2; |
7e4c867e | 2272 | |
2273 | lhs = gimple_assign_lhs (stmt); | |
2274 | type = TREE_TYPE (lhs); | |
2275 | if (TREE_CODE (type) != INTEGER_TYPE) | |
00f4f705 | 2276 | return false; |
2277 | ||
4333b41f | 2278 | if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2)) |
00f4f705 | 2279 | return false; |
2280 | ||
5a574e8b | 2281 | to_mode = TYPE_MODE (type); |
2282 | from_mode = TYPE_MODE (type1); | |
3f2ab719 | 2283 | from_unsigned1 = TYPE_UNSIGNED (type1); |
2284 | from_unsigned2 = TYPE_UNSIGNED (type2); | |
5a574e8b | 2285 | |
3f2ab719 | 2286 | if (from_unsigned1 && from_unsigned2) |
5a574e8b | 2287 | op = umul_widen_optab; |
3f2ab719 | 2288 | else if (!from_unsigned1 && !from_unsigned2) |
5a574e8b | 2289 | op = smul_widen_optab; |
00f4f705 | 2290 | else |
5a574e8b | 2291 | op = usmul_widen_optab; |
2292 | ||
aff5fb4d | 2293 | handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode, |
2294 | 0, &actual_mode); | |
7e4c867e | 2295 | |
2296 | if (handler == CODE_FOR_nothing) | |
3f2ab719 | 2297 | { |
2298 | if (op != smul_widen_optab) | |
2299 | { | |
22ffd684 | 2300 | /* We can use a signed multiply with unsigned types as long as |
2301 | there is a wider mode to use, or it is the smaller of the two | |
2302 | types that is unsigned. Note that type1 >= type2, always. */ | |
2303 | if ((TYPE_UNSIGNED (type1) | |
2304 | && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode)) | |
2305 | || (TYPE_UNSIGNED (type2) | |
2306 | && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode))) | |
2307 | { | |
2308 | from_mode = GET_MODE_WIDER_MODE (from_mode); | |
2309 | if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode)) | |
2310 | return false; | |
2311 | } | |
3f2ab719 | 2312 | |
2313 | op = smul_widen_optab; | |
2314 | handler = find_widening_optab_handler_and_mode (op, to_mode, | |
2315 | from_mode, 0, | |
2316 | &actual_mode); | |
2317 | ||
2318 | if (handler == CODE_FOR_nothing) | |
2319 | return false; | |
2320 | ||
2321 | from_unsigned1 = from_unsigned2 = false; | |
2322 | } | |
2323 | else | |
2324 | return false; | |
2325 | } | |
7e4c867e | 2326 | |
aff5fb4d | 2327 | /* Ensure that the inputs to the handler are in the correct precison |
2328 | for the opcode. This will be the full mode size. */ | |
2329 | actual_precision = GET_MODE_PRECISION (actual_mode); | |
b36be69d | 2330 | if (2 * actual_precision > TYPE_PRECISION (type)) |
2331 | return false; | |
3f2ab719 | 2332 | if (actual_precision != TYPE_PRECISION (type1) |
2333 | || from_unsigned1 != TYPE_UNSIGNED (type1)) | |
03d37e4e | 2334 | rhs1 = build_and_insert_cast (gsi, loc, |
2335 | build_nonstandard_integer_type | |
2336 | (actual_precision, from_unsigned1), rhs1); | |
3f2ab719 | 2337 | if (actual_precision != TYPE_PRECISION (type2) |
2338 | || from_unsigned2 != TYPE_UNSIGNED (type2)) | |
03d37e4e | 2339 | rhs2 = build_and_insert_cast (gsi, loc, |
2340 | build_nonstandard_integer_type | |
2341 | (actual_precision, from_unsigned2), rhs2); | |
aff5fb4d | 2342 | |
ffebd9c5 | 2343 | /* Handle constants. */ |
2344 | if (TREE_CODE (rhs1) == INTEGER_CST) | |
2345 | rhs1 = fold_convert (type1, rhs1); | |
2346 | if (TREE_CODE (rhs2) == INTEGER_CST) | |
2347 | rhs2 = fold_convert (type2, rhs2); | |
2348 | ||
aff5fb4d | 2349 | gimple_assign_set_rhs1 (stmt, rhs1); |
2350 | gimple_assign_set_rhs2 (stmt, rhs2); | |
00f4f705 | 2351 | gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR); |
2352 | update_stmt (stmt); | |
30c4e60d | 2353 | widen_mul_stats.widen_mults_inserted++; |
00f4f705 | 2354 | return true; |
2355 | } | |
2356 | ||
2357 | /* Process a single gimple statement STMT, which is found at the | |
2358 | iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its | |
2359 | rhs (given by CODE), and try to convert it into a | |
2360 | WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value | |
2361 | is true iff we converted the statement. */ | |
2362 | ||
2363 | static bool | |
2364 | convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt, | |
2365 | enum tree_code code) | |
2366 | { | |
2367 | gimple rhs1_stmt = NULL, rhs2_stmt = NULL; | |
07ea3e5c | 2368 | gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt; |
03d37e4e | 2369 | tree type, type1, type2, optype; |
00f4f705 | 2370 | tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs; |
2371 | enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK; | |
2372 | optab this_optab; | |
2373 | enum tree_code wmult_code; | |
aff5fb4d | 2374 | enum insn_code handler; |
2375 | enum machine_mode to_mode, from_mode, actual_mode; | |
2376 | location_t loc = gimple_location (stmt); | |
2377 | int actual_precision; | |
3f2ab719 | 2378 | bool from_unsigned1, from_unsigned2; |
00f4f705 | 2379 | |
2380 | lhs = gimple_assign_lhs (stmt); | |
2381 | type = TREE_TYPE (lhs); | |
7e4c867e | 2382 | if (TREE_CODE (type) != INTEGER_TYPE |
2383 | && TREE_CODE (type) != FIXED_POINT_TYPE) | |
00f4f705 | 2384 | return false; |
2385 | ||
2386 | if (code == MINUS_EXPR) | |
2387 | wmult_code = WIDEN_MULT_MINUS_EXPR; | |
2388 | else | |
2389 | wmult_code = WIDEN_MULT_PLUS_EXPR; | |
2390 | ||
00f4f705 | 2391 | rhs1 = gimple_assign_rhs1 (stmt); |
2392 | rhs2 = gimple_assign_rhs2 (stmt); | |
2393 | ||
2394 | if (TREE_CODE (rhs1) == SSA_NAME) | |
2395 | { | |
2396 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); | |
2397 | if (is_gimple_assign (rhs1_stmt)) | |
2398 | rhs1_code = gimple_assign_rhs_code (rhs1_stmt); | |
2399 | } | |
00f4f705 | 2400 | |
2401 | if (TREE_CODE (rhs2) == SSA_NAME) | |
2402 | { | |
2403 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); | |
2404 | if (is_gimple_assign (rhs2_stmt)) | |
2405 | rhs2_code = gimple_assign_rhs_code (rhs2_stmt); | |
2406 | } | |
00f4f705 | 2407 | |
07ea3e5c | 2408 | /* Allow for one conversion statement between the multiply |
2409 | and addition/subtraction statement. If there are more than | |
2410 | one conversions then we assume they would invalidate this | |
2411 | transformation. If that's not the case then they should have | |
2412 | been folded before now. */ | |
2413 | if (CONVERT_EXPR_CODE_P (rhs1_code)) | |
2414 | { | |
2415 | conv1_stmt = rhs1_stmt; | |
2416 | rhs1 = gimple_assign_rhs1 (rhs1_stmt); | |
2417 | if (TREE_CODE (rhs1) == SSA_NAME) | |
2418 | { | |
2419 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); | |
2420 | if (is_gimple_assign (rhs1_stmt)) | |
2421 | rhs1_code = gimple_assign_rhs_code (rhs1_stmt); | |
2422 | } | |
2423 | else | |
2424 | return false; | |
2425 | } | |
2426 | if (CONVERT_EXPR_CODE_P (rhs2_code)) | |
2427 | { | |
2428 | conv2_stmt = rhs2_stmt; | |
2429 | rhs2 = gimple_assign_rhs1 (rhs2_stmt); | |
2430 | if (TREE_CODE (rhs2) == SSA_NAME) | |
2431 | { | |
2432 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); | |
2433 | if (is_gimple_assign (rhs2_stmt)) | |
2434 | rhs2_code = gimple_assign_rhs_code (rhs2_stmt); | |
2435 | } | |
2436 | else | |
2437 | return false; | |
2438 | } | |
2439 | ||
aff5fb4d | 2440 | /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call |
2441 | is_widening_mult_p, but we still need the rhs returns. | |
2442 | ||
2443 | It might also appear that it would be sufficient to use the existing | |
2444 | operands of the widening multiply, but that would limit the choice of | |
e0df5be0 | 2445 | multiply-and-accumulate instructions. |
2446 | ||
2447 | If the widened-multiplication result has more than one uses, it is | |
2448 | probably wiser not to do the conversion. */ | |
aff5fb4d | 2449 | if (code == PLUS_EXPR |
2450 | && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR)) | |
00f4f705 | 2451 | { |
e0df5be0 | 2452 | if (!has_single_use (rhs1) |
2453 | || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1, | |
2454 | &type2, &mult_rhs2)) | |
00f4f705 | 2455 | return false; |
7e4c867e | 2456 | add_rhs = rhs2; |
07ea3e5c | 2457 | conv_stmt = conv1_stmt; |
00f4f705 | 2458 | } |
aff5fb4d | 2459 | else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR) |
00f4f705 | 2460 | { |
e0df5be0 | 2461 | if (!has_single_use (rhs2) |
2462 | || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1, | |
2463 | &type2, &mult_rhs2)) | |
00f4f705 | 2464 | return false; |
7e4c867e | 2465 | add_rhs = rhs1; |
07ea3e5c | 2466 | conv_stmt = conv2_stmt; |
00f4f705 | 2467 | } |
00f4f705 | 2468 | else |
2469 | return false; | |
2470 | ||
aff5fb4d | 2471 | to_mode = TYPE_MODE (type); |
2472 | from_mode = TYPE_MODE (type1); | |
3f2ab719 | 2473 | from_unsigned1 = TYPE_UNSIGNED (type1); |
2474 | from_unsigned2 = TYPE_UNSIGNED (type2); | |
4ccf368d | 2475 | optype = type1; |
aff5fb4d | 2476 | |
3f2ab719 | 2477 | /* There's no such thing as a mixed sign madd yet, so use a wider mode. */ |
2478 | if (from_unsigned1 != from_unsigned2) | |
2479 | { | |
4ccf368d | 2480 | if (!INTEGRAL_TYPE_P (type)) |
2481 | return false; | |
22ffd684 | 2482 | /* We can use a signed multiply with unsigned types as long as |
2483 | there is a wider mode to use, or it is the smaller of the two | |
2484 | types that is unsigned. Note that type1 >= type2, always. */ | |
2485 | if ((from_unsigned1 | |
2486 | && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode)) | |
2487 | || (from_unsigned2 | |
2488 | && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode))) | |
3f2ab719 | 2489 | { |
22ffd684 | 2490 | from_mode = GET_MODE_WIDER_MODE (from_mode); |
2491 | if (GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode)) | |
2492 | return false; | |
3f2ab719 | 2493 | } |
22ffd684 | 2494 | |
2495 | from_unsigned1 = from_unsigned2 = false; | |
4ccf368d | 2496 | optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode), |
2497 | false); | |
3f2ab719 | 2498 | } |
815a0224 | 2499 | |
07ea3e5c | 2500 | /* If there was a conversion between the multiply and addition |
2501 | then we need to make sure it fits a multiply-and-accumulate. | |
2502 | The should be a single mode change which does not change the | |
2503 | value. */ | |
2504 | if (conv_stmt) | |
2505 | { | |
3f2ab719 | 2506 | /* We use the original, unmodified data types for this. */ |
07ea3e5c | 2507 | tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt)); |
2508 | tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt)); | |
2509 | int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2); | |
2510 | bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2); | |
2511 | ||
2512 | if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type)) | |
2513 | { | |
2514 | /* Conversion is a truncate. */ | |
2515 | if (TYPE_PRECISION (to_type) < data_size) | |
2516 | return false; | |
2517 | } | |
2518 | else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)) | |
2519 | { | |
2520 | /* Conversion is an extend. Check it's the right sort. */ | |
2521 | if (TYPE_UNSIGNED (from_type) != is_unsigned | |
2522 | && !(is_unsigned && TYPE_PRECISION (from_type) > data_size)) | |
2523 | return false; | |
2524 | } | |
2525 | /* else convert is a no-op for our purposes. */ | |
2526 | } | |
2527 | ||
815a0224 | 2528 | /* Verify that the machine can perform a widening multiply |
2529 | accumulate in this mode/signedness combination, otherwise | |
2530 | this transformation is likely to pessimize code. */ | |
3f2ab719 | 2531 | this_optab = optab_for_tree_code (wmult_code, optype, optab_default); |
aff5fb4d | 2532 | handler = find_widening_optab_handler_and_mode (this_optab, to_mode, |
2533 | from_mode, 0, &actual_mode); | |
2534 | ||
2535 | if (handler == CODE_FOR_nothing) | |
815a0224 | 2536 | return false; |
2537 | ||
aff5fb4d | 2538 | /* Ensure that the inputs to the handler are in the correct precison |
2539 | for the opcode. This will be the full mode size. */ | |
2540 | actual_precision = GET_MODE_PRECISION (actual_mode); | |
3f2ab719 | 2541 | if (actual_precision != TYPE_PRECISION (type1) |
2542 | || from_unsigned1 != TYPE_UNSIGNED (type1)) | |
03d37e4e | 2543 | mult_rhs1 = build_and_insert_cast (gsi, loc, |
2544 | build_nonstandard_integer_type | |
2545 | (actual_precision, from_unsigned1), | |
2546 | mult_rhs1); | |
3f2ab719 | 2547 | if (actual_precision != TYPE_PRECISION (type2) |
2548 | || from_unsigned2 != TYPE_UNSIGNED (type2)) | |
03d37e4e | 2549 | mult_rhs2 = build_and_insert_cast (gsi, loc, |
2550 | build_nonstandard_integer_type | |
2551 | (actual_precision, from_unsigned2), | |
2552 | mult_rhs2); | |
00f4f705 | 2553 | |
12421545 | 2554 | if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs))) |
03d37e4e | 2555 | add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs); |
12421545 | 2556 | |
ffebd9c5 | 2557 | /* Handle constants. */ |
2558 | if (TREE_CODE (mult_rhs1) == INTEGER_CST) | |
d5a3bb10 | 2559 | mult_rhs1 = fold_convert (type1, mult_rhs1); |
ffebd9c5 | 2560 | if (TREE_CODE (mult_rhs2) == INTEGER_CST) |
d5a3bb10 | 2561 | mult_rhs2 = fold_convert (type2, mult_rhs2); |
ffebd9c5 | 2562 | |
aff5fb4d | 2563 | gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, mult_rhs1, mult_rhs2, |
00f4f705 | 2564 | add_rhs); |
2565 | update_stmt (gsi_stmt (*gsi)); | |
30c4e60d | 2566 | widen_mul_stats.maccs_inserted++; |
00f4f705 | 2567 | return true; |
2568 | } | |
2569 | ||
15dbdc8f | 2570 | /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2 |
2571 | with uses in additions and subtractions to form fused multiply-add | |
2572 | operations. Returns true if successful and MUL_STMT should be removed. */ | |
b9be572e | 2573 | |
2574 | static bool | |
15dbdc8f | 2575 | convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2) |
b9be572e | 2576 | { |
15dbdc8f | 2577 | tree mul_result = gimple_get_lhs (mul_stmt); |
b9be572e | 2578 | tree type = TREE_TYPE (mul_result); |
44579526 | 2579 | gimple use_stmt, neguse_stmt, fma_stmt; |
b9be572e | 2580 | use_operand_p use_p; |
2581 | imm_use_iterator imm_iter; | |
2582 | ||
2583 | if (FLOAT_TYPE_P (type) | |
2584 | && flag_fp_contract_mode == FP_CONTRACT_OFF) | |
2585 | return false; | |
2586 | ||
2587 | /* We don't want to do bitfield reduction ops. */ | |
2588 | if (INTEGRAL_TYPE_P (type) | |
2589 | && (TYPE_PRECISION (type) | |
2590 | != GET_MODE_PRECISION (TYPE_MODE (type)))) | |
2591 | return false; | |
2592 | ||
2593 | /* If the target doesn't support it, don't generate it. We assume that | |
2594 | if fma isn't available then fms, fnma or fnms are not either. */ | |
2595 | if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing) | |
2596 | return false; | |
2597 | ||
5ed3d3b8 | 2598 | /* If the multiplication has zero uses, it is kept around probably because |
2599 | of -fnon-call-exceptions. Don't optimize it away in that case, | |
2600 | it is DCE job. */ | |
2601 | if (has_zero_uses (mul_result)) | |
2602 | return false; | |
2603 | ||
b9be572e | 2604 | /* Make sure that the multiplication statement becomes dead after |
2605 | the transformation, thus that all uses are transformed to FMAs. | |
2606 | This means we assume that an FMA operation has the same cost | |
2607 | as an addition. */ | |
2608 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result) | |
2609 | { | |
2610 | enum tree_code use_code; | |
44579526 | 2611 | tree result = mul_result; |
2612 | bool negate_p = false; | |
b9be572e | 2613 | |
2614 | use_stmt = USE_STMT (use_p); | |
2615 | ||
17a2c727 | 2616 | if (is_gimple_debug (use_stmt)) |
2617 | continue; | |
2618 | ||
b9be572e | 2619 | /* For now restrict this operations to single basic blocks. In theory |
2620 | we would want to support sinking the multiplication in | |
2621 | m = a*b; | |
2622 | if () | |
2623 | ma = m + c; | |
2624 | else | |
2625 | d = m; | |
2626 | to form a fma in the then block and sink the multiplication to the | |
2627 | else block. */ | |
2628 | if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) | |
2629 | return false; | |
2630 | ||
44579526 | 2631 | if (!is_gimple_assign (use_stmt)) |
b9be572e | 2632 | return false; |
2633 | ||
44579526 | 2634 | use_code = gimple_assign_rhs_code (use_stmt); |
2635 | ||
2636 | /* A negate on the multiplication leads to FNMA. */ | |
2637 | if (use_code == NEGATE_EXPR) | |
2638 | { | |
805ad414 | 2639 | ssa_op_iter iter; |
5715c09b | 2640 | use_operand_p usep; |
805ad414 | 2641 | |
44579526 | 2642 | result = gimple_assign_lhs (use_stmt); |
2643 | ||
2644 | /* Make sure the negate statement becomes dead with this | |
2645 | single transformation. */ | |
2646 | if (!single_imm_use (gimple_assign_lhs (use_stmt), | |
2647 | &use_p, &neguse_stmt)) | |
2648 | return false; | |
2649 | ||
805ad414 | 2650 | /* Make sure the multiplication isn't also used on that stmt. */ |
5715c09b | 2651 | FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE) |
2652 | if (USE_FROM_PTR (usep) == mul_result) | |
805ad414 | 2653 | return false; |
2654 | ||
44579526 | 2655 | /* Re-validate. */ |
2656 | use_stmt = neguse_stmt; | |
2657 | if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) | |
2658 | return false; | |
2659 | if (!is_gimple_assign (use_stmt)) | |
2660 | return false; | |
2661 | ||
2662 | use_code = gimple_assign_rhs_code (use_stmt); | |
2663 | negate_p = true; | |
2664 | } | |
b9be572e | 2665 | |
44579526 | 2666 | switch (use_code) |
2667 | { | |
2668 | case MINUS_EXPR: | |
8a9d0572 | 2669 | if (gimple_assign_rhs2 (use_stmt) == result) |
2670 | negate_p = !negate_p; | |
2671 | break; | |
44579526 | 2672 | case PLUS_EXPR: |
44579526 | 2673 | break; |
44579526 | 2674 | default: |
2675 | /* FMA can only be formed from PLUS and MINUS. */ | |
2676 | return false; | |
2677 | } | |
b9be572e | 2678 | |
b095bd6a | 2679 | /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed |
2680 | by a MULT_EXPR that we'll visit later, we might be able to | |
2681 | get a more profitable match with fnma. | |
2682 | OTOH, if we don't, a negate / fma pair has likely lower latency | |
2683 | that a mult / subtract pair. */ | |
2684 | if (use_code == MINUS_EXPR && !negate_p | |
2685 | && gimple_assign_rhs1 (use_stmt) == result | |
2686 | && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing | |
2687 | && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing) | |
2688 | { | |
2689 | tree rhs2 = gimple_assign_rhs2 (use_stmt); | |
b095bd6a | 2690 | |
058e9571 | 2691 | if (TREE_CODE (rhs2) == SSA_NAME) |
2692 | { | |
2693 | gimple stmt2 = SSA_NAME_DEF_STMT (rhs2); | |
2694 | if (has_single_use (rhs2) | |
2695 | && is_gimple_assign (stmt2) | |
2696 | && gimple_assign_rhs_code (stmt2) == MULT_EXPR) | |
2697 | return false; | |
2698 | } | |
b095bd6a | 2699 | } |
2700 | ||
44579526 | 2701 | /* We can't handle a * b + a * b. */ |
2702 | if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt)) | |
2703 | return false; | |
8a9d0572 | 2704 | |
2705 | /* While it is possible to validate whether or not the exact form | |
2706 | that we've recognized is available in the backend, the assumption | |
2707 | is that the transformation is never a loss. For instance, suppose | |
2708 | the target only has the plain FMA pattern available. Consider | |
2709 | a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which | |
2710 | is still two operations. Consider -(a*b)-c -> fma(-a,b,-c): we | |
2711 | still have 3 operations, but in the FMA form the two NEGs are | |
9d75589a | 2712 | independent and could be run in parallel. */ |
b9be572e | 2713 | } |
2714 | ||
2715 | FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result) | |
2716 | { | |
b9be572e | 2717 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
17a2c727 | 2718 | enum tree_code use_code; |
15dbdc8f | 2719 | tree addop, mulop1 = op1, result = mul_result; |
44579526 | 2720 | bool negate_p = false; |
b9be572e | 2721 | |
17a2c727 | 2722 | if (is_gimple_debug (use_stmt)) |
2723 | continue; | |
2724 | ||
2725 | use_code = gimple_assign_rhs_code (use_stmt); | |
44579526 | 2726 | if (use_code == NEGATE_EXPR) |
2727 | { | |
2728 | result = gimple_assign_lhs (use_stmt); | |
2729 | single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt); | |
2730 | gsi_remove (&gsi, true); | |
2731 | release_defs (use_stmt); | |
2732 | ||
2733 | use_stmt = neguse_stmt; | |
2734 | gsi = gsi_for_stmt (use_stmt); | |
2735 | use_code = gimple_assign_rhs_code (use_stmt); | |
2736 | negate_p = true; | |
2737 | } | |
2738 | ||
2739 | if (gimple_assign_rhs1 (use_stmt) == result) | |
b9be572e | 2740 | { |
2741 | addop = gimple_assign_rhs2 (use_stmt); | |
2742 | /* a * b - c -> a * b + (-c) */ | |
2743 | if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) | |
2744 | addop = force_gimple_operand_gsi (&gsi, | |
2745 | build1 (NEGATE_EXPR, | |
2746 | type, addop), | |
2747 | true, NULL_TREE, true, | |
2748 | GSI_SAME_STMT); | |
2749 | } | |
2750 | else | |
2751 | { | |
2752 | addop = gimple_assign_rhs1 (use_stmt); | |
2753 | /* a - b * c -> (-b) * c + a */ | |
2754 | if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) | |
44579526 | 2755 | negate_p = !negate_p; |
b9be572e | 2756 | } |
2757 | ||
44579526 | 2758 | if (negate_p) |
2759 | mulop1 = force_gimple_operand_gsi (&gsi, | |
2760 | build1 (NEGATE_EXPR, | |
2761 | type, mulop1), | |
2762 | true, NULL_TREE, true, | |
2763 | GSI_SAME_STMT); | |
2764 | ||
446e85eb | 2765 | fma_stmt = gimple_build_assign_with_ops (FMA_EXPR, |
2766 | gimple_assign_lhs (use_stmt), | |
2767 | mulop1, op2, | |
2768 | addop); | |
b9be572e | 2769 | gsi_replace (&gsi, fma_stmt, true); |
30c4e60d | 2770 | widen_mul_stats.fmas_inserted++; |
b9be572e | 2771 | } |
2772 | ||
2773 | return true; | |
2774 | } | |
2775 | ||
62be004c | 2776 | /* Find integer multiplications where the operands are extended from |
2777 | smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR | |
2778 | where appropriate. */ | |
2779 | ||
2780 | static unsigned int | |
2781 | execute_optimize_widening_mul (void) | |
2782 | { | |
62be004c | 2783 | basic_block bb; |
15dbdc8f | 2784 | bool cfg_changed = false; |
62be004c | 2785 | |
30c4e60d | 2786 | memset (&widen_mul_stats, 0, sizeof (widen_mul_stats)); |
2787 | ||
62be004c | 2788 | FOR_EACH_BB (bb) |
2789 | { | |
2790 | gimple_stmt_iterator gsi; | |
2791 | ||
b9be572e | 2792 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) |
62be004c | 2793 | { |
2794 | gimple stmt = gsi_stmt (gsi); | |
00f4f705 | 2795 | enum tree_code code; |
62be004c | 2796 | |
b9be572e | 2797 | if (is_gimple_assign (stmt)) |
2798 | { | |
2799 | code = gimple_assign_rhs_code (stmt); | |
2800 | switch (code) | |
2801 | { | |
2802 | case MULT_EXPR: | |
aff5fb4d | 2803 | if (!convert_mult_to_widen (stmt, &gsi) |
15dbdc8f | 2804 | && convert_mult_to_fma (stmt, |
2805 | gimple_assign_rhs1 (stmt), | |
2806 | gimple_assign_rhs2 (stmt))) | |
b9be572e | 2807 | { |
2808 | gsi_remove (&gsi, true); | |
2809 | release_defs (stmt); | |
2810 | continue; | |
2811 | } | |
2812 | break; | |
2813 | ||
2814 | case PLUS_EXPR: | |
2815 | case MINUS_EXPR: | |
2816 | convert_plusminus_to_widen (&gsi, stmt, code); | |
2817 | break; | |
62be004c | 2818 | |
b9be572e | 2819 | default:; |
2820 | } | |
2821 | } | |
d4af184a | 2822 | else if (is_gimple_call (stmt) |
2823 | && gimple_call_lhs (stmt)) | |
15dbdc8f | 2824 | { |
2825 | tree fndecl = gimple_call_fndecl (stmt); | |
2826 | if (fndecl | |
2827 | && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL) | |
2828 | { | |
2829 | switch (DECL_FUNCTION_CODE (fndecl)) | |
2830 | { | |
2831 | case BUILT_IN_POWF: | |
2832 | case BUILT_IN_POW: | |
2833 | case BUILT_IN_POWL: | |
2834 | if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST | |
2835 | && REAL_VALUES_EQUAL | |
2836 | (TREE_REAL_CST (gimple_call_arg (stmt, 1)), | |
2837 | dconst2) | |
2838 | && convert_mult_to_fma (stmt, | |
2839 | gimple_call_arg (stmt, 0), | |
2840 | gimple_call_arg (stmt, 0))) | |
2841 | { | |
6716f635 | 2842 | unlink_stmt_vdef (stmt); |
13ff78a4 | 2843 | if (gsi_remove (&gsi, true) |
2844 | && gimple_purge_dead_eh_edges (bb)) | |
15dbdc8f | 2845 | cfg_changed = true; |
13ff78a4 | 2846 | release_defs (stmt); |
15dbdc8f | 2847 | continue; |
2848 | } | |
2849 | break; | |
2850 | ||
2851 | default:; | |
2852 | } | |
2853 | } | |
2854 | } | |
b9be572e | 2855 | gsi_next (&gsi); |
62be004c | 2856 | } |
2857 | } | |
00f4f705 | 2858 | |
30c4e60d | 2859 | statistics_counter_event (cfun, "widening multiplications inserted", |
2860 | widen_mul_stats.widen_mults_inserted); | |
2861 | statistics_counter_event (cfun, "widening maccs inserted", | |
2862 | widen_mul_stats.maccs_inserted); | |
2863 | statistics_counter_event (cfun, "fused multiply-adds inserted", | |
2864 | widen_mul_stats.fmas_inserted); | |
2865 | ||
15dbdc8f | 2866 | return cfg_changed ? TODO_cleanup_cfg : 0; |
62be004c | 2867 | } |
2868 | ||
2869 | static bool | |
2870 | gate_optimize_widening_mul (void) | |
2871 | { | |
2872 | return flag_expensive_optimizations && optimize; | |
2873 | } | |
2874 | ||
cbe8bda8 | 2875 | namespace { |
2876 | ||
2877 | const pass_data pass_data_optimize_widening_mul = | |
62be004c | 2878 | { |
cbe8bda8 | 2879 | GIMPLE_PASS, /* type */ |
2880 | "widening_mul", /* name */ | |
2881 | OPTGROUP_NONE, /* optinfo_flags */ | |
2882 | true, /* has_gate */ | |
2883 | true, /* has_execute */ | |
2884 | TV_NONE, /* tv_id */ | |
2885 | PROP_ssa, /* properties_required */ | |
2886 | 0, /* properties_provided */ | |
2887 | 0, /* properties_destroyed */ | |
2888 | 0, /* todo_flags_start */ | |
2889 | ( TODO_verify_ssa | TODO_verify_stmts | |
2890 | | TODO_update_ssa ), /* todo_flags_finish */ | |
62be004c | 2891 | }; |
cbe8bda8 | 2892 | |
2893 | class pass_optimize_widening_mul : public gimple_opt_pass | |
2894 | { | |
2895 | public: | |
9af5ce0c | 2896 | pass_optimize_widening_mul (gcc::context *ctxt) |
2897 | : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt) | |
cbe8bda8 | 2898 | {} |
2899 | ||
2900 | /* opt_pass methods: */ | |
2901 | bool gate () { return gate_optimize_widening_mul (); } | |
2902 | unsigned int execute () { return execute_optimize_widening_mul (); } | |
2903 | ||
2904 | }; // class pass_optimize_widening_mul | |
2905 | ||
2906 | } // anon namespace | |
2907 | ||
2908 | gimple_opt_pass * | |
2909 | make_pass_optimize_widening_mul (gcc::context *ctxt) | |
2910 | { | |
2911 | return new pass_optimize_widening_mul (ctxt); | |
2912 | } |