]>
Commit | Line | Data |
---|---|---|
abacb398 | 1 | /* Global, SSA-based optimizations using mathematical identities. |
fbd26352 | 2 | Copyright (C) 2005-2019 Free Software Foundation, Inc. |
48e1416a | 3 | |
abacb398 | 4 | This file is part of GCC. |
48e1416a | 5 | |
abacb398 | 6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the | |
8c4c00c1 | 8 | Free Software Foundation; either version 3, or (at your option) any |
abacb398 | 9 | later version. |
48e1416a | 10 | |
abacb398 | 11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
48e1416a | 15 | |
abacb398 | 16 | You should have received a copy of the GNU General Public License |
8c4c00c1 | 17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
abacb398 | 19 | |
20 | /* Currently, the only mini-pass in this file tries to CSE reciprocal | |
21 | operations. These are common in sequences such as this one: | |
22 | ||
23 | modulus = sqrt(x*x + y*y + z*z); | |
24 | x = x / modulus; | |
25 | y = y / modulus; | |
26 | z = z / modulus; | |
27 | ||
28 | that can be optimized to | |
29 | ||
30 | modulus = sqrt(x*x + y*y + z*z); | |
31 | rmodulus = 1.0 / modulus; | |
32 | x = x * rmodulus; | |
33 | y = y * rmodulus; | |
34 | z = z * rmodulus; | |
35 | ||
36 | We do this for loop invariant divisors, and with this pass whenever | |
ac70caad | 37 | we notice that a division has the same divisor multiple times. |
38 | ||
39 | Of course, like in PRE, we don't insert a division if a dominator | |
40 | already has one. However, this cannot be done as an extension of | |
41 | PRE for several reasons. | |
42 | ||
43 | First of all, with some experiments it was found out that the | |
44 | transformation is not always useful if there are only two divisions | |
24794e79 | 45 | by the same divisor. This is probably because modern processors |
ac70caad | 46 | can pipeline the divisions; on older, in-order processors it should |
47 | still be effective to optimize two divisions by the same number. | |
48 | We make this a param, and it shall be called N in the remainder of | |
49 | this comment. | |
50 | ||
51 | Second, if trapping math is active, we have less freedom on where | |
52 | to insert divisions: we can only do so in basic blocks that already | |
53 | contain one. (If divisions don't trap, instead, we can insert | |
54 | divisions elsewhere, which will be in blocks that are common dominators | |
55 | of those that have the division). | |
56 | ||
57 | We really don't want to compute the reciprocal unless a division will | |
58 | be found. To do this, we won't insert the division in a basic block | |
59 | that has less than N divisions *post-dominating* it. | |
60 | ||
61 | The algorithm constructs a subset of the dominator tree, holding the | |
62 | blocks containing the divisions and the common dominators to them, | |
63 | and walk it twice. The first walk is in post-order, and it annotates | |
64 | each block with the number of divisions that post-dominate it: this | |
65 | gives information on where divisions can be inserted profitably. | |
66 | The second walk is in pre-order, and it inserts divisions as explained | |
67 | above, and replaces divisions by multiplications. | |
68 | ||
69 | In the best case, the cost of the pass is O(n_statements). In the | |
70 | worst-case, the cost is due to creating the dominator tree subset, | |
71 | with a cost of O(n_basic_blocks ^ 2); however this can only happen | |
72 | for n_statements / n_basic_blocks statements. So, the amortized cost | |
73 | of creating the dominator tree subset is O(n_basic_blocks) and the | |
74 | worst-case cost of the pass is O(n_statements * n_basic_blocks). | |
75 | ||
76 | More practically, the cost will be small because there are few | |
77 | divisions, and they tend to be in the same basic block, so insert_bb | |
78 | is called very few times. | |
79 | ||
80 | If we did this using domwalk.c, an efficient implementation would have | |
81 | to work on all the variables in a single pass, because we could not | |
82 | work on just a subset of the dominator tree, as we do now, and the | |
83 | cost would also be something like O(n_statements * n_basic_blocks). | |
84 | The data structures would be more complex in order to work on all the | |
85 | variables in a single pass. */ | |
abacb398 | 86 | |
87 | #include "config.h" | |
88 | #include "system.h" | |
89 | #include "coretypes.h" | |
9ef16211 | 90 | #include "backend.h" |
7c29e30e | 91 | #include "target.h" |
92 | #include "rtl.h" | |
9ef16211 | 93 | #include "tree.h" |
94 | #include "gimple.h" | |
7c29e30e | 95 | #include "predict.h" |
96 | #include "alloc-pool.h" | |
97 | #include "tree-pass.h" | |
9ef16211 | 98 | #include "ssa.h" |
7c29e30e | 99 | #include "optabs-tree.h" |
100 | #include "gimple-pretty-print.h" | |
b20a8bb4 | 101 | #include "alias.h" |
b20a8bb4 | 102 | #include "fold-const.h" |
bc61cadb | 103 | #include "gimple-fold.h" |
dcf1a1ec | 104 | #include "gimple-iterator.h" |
470d5bb5 | 105 | #include "gimplify.h" |
e795d6e1 | 106 | #include "gimplify-me.h" |
9ed99284 | 107 | #include "stor-layout.h" |
073c1fd5 | 108 | #include "tree-cfg.h" |
073c1fd5 | 109 | #include "tree-dfa.h" |
69ee5dbb | 110 | #include "tree-ssa.h" |
f7715905 | 111 | #include "builtins.h" |
c3206272 | 112 | #include "params.h" |
4cfd27a5 | 113 | #include "internal-fn.h" |
fa0793ad | 114 | #include "case-cfn-macros.h" |
67f7b566 | 115 | #include "optabs-libfuncs.h" |
116 | #include "tree-eh.h" | |
117 | #include "targhooks.h" | |
ed306e55 | 118 | #include "domwalk.h" |
ac70caad | 119 | |
120 | /* This structure represents one basic block that either computes a | |
121 | division, or is a common dominator for basic block that compute a | |
122 | division. */ | |
123 | struct occurrence { | |
124 | /* The basic block represented by this structure. */ | |
125 | basic_block bb; | |
126 | ||
127 | /* If non-NULL, the SSA_NAME holding the definition for a reciprocal | |
128 | inserted in BB. */ | |
129 | tree recip_def; | |
130 | ||
472f3f23 | 131 | /* If non-NULL, the SSA_NAME holding the definition for a squared |
132 | reciprocal inserted in BB. */ | |
133 | tree square_recip_def; | |
134 | ||
75a70cf9 | 135 | /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that |
ac70caad | 136 | was inserted in BB. */ |
42acab1c | 137 | gimple *recip_def_stmt; |
ac70caad | 138 | |
139 | /* Pointer to a list of "struct occurrence"s for blocks dominated | |
140 | by BB. */ | |
141 | struct occurrence *children; | |
142 | ||
143 | /* Pointer to the next "struct occurrence"s in the list of blocks | |
144 | sharing a common dominator. */ | |
145 | struct occurrence *next; | |
146 | ||
147 | /* The number of divisions that are in BB before compute_merit. The | |
148 | number of divisions that are in BB or post-dominate it after | |
149 | compute_merit. */ | |
150 | int num_divisions; | |
151 | ||
152 | /* True if the basic block has a division, false if it is a common | |
153 | dominator for basic blocks that do. If it is false and trapping | |
154 | math is active, BB is not a candidate for inserting a reciprocal. */ | |
155 | bool bb_has_division; | |
156 | }; | |
157 | ||
30c4e60d | 158 | static struct |
159 | { | |
160 | /* Number of 1.0/X ops inserted. */ | |
161 | int rdivs_inserted; | |
162 | ||
163 | /* Number of 1.0/FUNC ops inserted. */ | |
164 | int rfuncs_inserted; | |
165 | } reciprocal_stats; | |
166 | ||
167 | static struct | |
168 | { | |
169 | /* Number of cexpi calls inserted. */ | |
170 | int inserted; | |
171 | } sincos_stats; | |
172 | ||
30c4e60d | 173 | static struct |
174 | { | |
175 | /* Number of widening multiplication ops inserted. */ | |
176 | int widen_mults_inserted; | |
177 | ||
178 | /* Number of integer multiply-and-accumulate ops inserted. */ | |
179 | int maccs_inserted; | |
180 | ||
181 | /* Number of fp fused multiply-add ops inserted. */ | |
182 | int fmas_inserted; | |
67f7b566 | 183 | |
184 | /* Number of divmod calls inserted. */ | |
185 | int divmod_calls_inserted; | |
30c4e60d | 186 | } widen_mul_stats; |
ac70caad | 187 | |
188 | /* The instance of "struct occurrence" representing the highest | |
189 | interesting block in the dominator tree. */ | |
190 | static struct occurrence *occ_head; | |
191 | ||
192 | /* Allocation pool for getting instances of "struct occurrence". */ | |
e16712b1 | 193 | static object_allocator<occurrence> *occ_pool; |
ac70caad | 194 | |
195 | ||
196 | ||
197 | /* Allocate and return a new struct occurrence for basic block BB, and | |
198 | whose children list is headed by CHILDREN. */ | |
199 | static struct occurrence * | |
200 | occ_new (basic_block bb, struct occurrence *children) | |
abacb398 | 201 | { |
ac70caad | 202 | struct occurrence *occ; |
203 | ||
d8e7268c | 204 | bb->aux = occ = occ_pool->allocate (); |
ac70caad | 205 | memset (occ, 0, sizeof (struct occurrence)); |
206 | ||
207 | occ->bb = bb; | |
208 | occ->children = children; | |
209 | return occ; | |
abacb398 | 210 | } |
211 | ||
ac70caad | 212 | |
213 | /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a | |
214 | list of "struct occurrence"s, one per basic block, having IDOM as | |
215 | their common dominator. | |
216 | ||
217 | We try to insert NEW_OCC as deep as possible in the tree, and we also | |
218 | insert any other block that is a common dominator for BB and one | |
219 | block already in the tree. */ | |
220 | ||
221 | static void | |
222 | insert_bb (struct occurrence *new_occ, basic_block idom, | |
223 | struct occurrence **p_head) | |
9e583fac | 224 | { |
ac70caad | 225 | struct occurrence *occ, **p_occ; |
9e583fac | 226 | |
ac70caad | 227 | for (p_occ = p_head; (occ = *p_occ) != NULL; ) |
228 | { | |
229 | basic_block bb = new_occ->bb, occ_bb = occ->bb; | |
230 | basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb); | |
231 | if (dom == bb) | |
232 | { | |
233 | /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC | |
234 | from its list. */ | |
235 | *p_occ = occ->next; | |
236 | occ->next = new_occ->children; | |
237 | new_occ->children = occ; | |
238 | ||
239 | /* Try the next block (it may as well be dominated by BB). */ | |
240 | } | |
241 | ||
242 | else if (dom == occ_bb) | |
243 | { | |
244 | /* OCC_BB dominates BB. Tail recurse to look deeper. */ | |
245 | insert_bb (new_occ, dom, &occ->children); | |
246 | return; | |
247 | } | |
248 | ||
249 | else if (dom != idom) | |
250 | { | |
251 | gcc_assert (!dom->aux); | |
252 | ||
253 | /* There is a dominator between IDOM and BB, add it and make | |
254 | two children out of NEW_OCC and OCC. First, remove OCC from | |
255 | its list. */ | |
256 | *p_occ = occ->next; | |
257 | new_occ->next = occ; | |
258 | occ->next = NULL; | |
259 | ||
260 | /* None of the previous blocks has DOM as a dominator: if we tail | |
261 | recursed, we would reexamine them uselessly. Just switch BB with | |
262 | DOM, and go on looking for blocks dominated by DOM. */ | |
263 | new_occ = occ_new (dom, new_occ); | |
264 | } | |
265 | ||
266 | else | |
267 | { | |
268 | /* Nothing special, go on with the next element. */ | |
269 | p_occ = &occ->next; | |
270 | } | |
271 | } | |
272 | ||
273 | /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */ | |
274 | new_occ->next = *p_head; | |
275 | *p_head = new_occ; | |
276 | } | |
277 | ||
472f3f23 | 278 | /* Register that we found a division in BB. |
279 | IMPORTANCE is a measure of how much weighting to give | |
280 | that division. Use IMPORTANCE = 2 to register a single | |
281 | division. If the division is going to be found multiple | |
282 | times use 1 (as it is with squares). */ | |
ac70caad | 283 | |
284 | static inline void | |
472f3f23 | 285 | register_division_in (basic_block bb, int importance) |
ac70caad | 286 | { |
287 | struct occurrence *occ; | |
288 | ||
289 | occ = (struct occurrence *) bb->aux; | |
290 | if (!occ) | |
291 | { | |
292 | occ = occ_new (bb, NULL); | |
34154e27 | 293 | insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head); |
ac70caad | 294 | } |
295 | ||
296 | occ->bb_has_division = true; | |
472f3f23 | 297 | occ->num_divisions += importance; |
ac70caad | 298 | } |
299 | ||
300 | ||
301 | /* Compute the number of divisions that postdominate each block in OCC and | |
302 | its children. */ | |
abacb398 | 303 | |
abacb398 | 304 | static void |
ac70caad | 305 | compute_merit (struct occurrence *occ) |
abacb398 | 306 | { |
ac70caad | 307 | struct occurrence *occ_child; |
308 | basic_block dom = occ->bb; | |
abacb398 | 309 | |
ac70caad | 310 | for (occ_child = occ->children; occ_child; occ_child = occ_child->next) |
abacb398 | 311 | { |
ac70caad | 312 | basic_block bb; |
313 | if (occ_child->children) | |
314 | compute_merit (occ_child); | |
315 | ||
316 | if (flag_exceptions) | |
317 | bb = single_noncomplex_succ (dom); | |
318 | else | |
319 | bb = dom; | |
320 | ||
321 | if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb)) | |
322 | occ->num_divisions += occ_child->num_divisions; | |
323 | } | |
324 | } | |
325 | ||
326 | ||
327 | /* Return whether USE_STMT is a floating-point division by DEF. */ | |
328 | static inline bool | |
42acab1c | 329 | is_division_by (gimple *use_stmt, tree def) |
ac70caad | 330 | { |
75a70cf9 | 331 | return is_gimple_assign (use_stmt) |
332 | && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR | |
333 | && gimple_assign_rhs2 (use_stmt) == def | |
119368d7 | 334 | /* Do not recognize x / x as valid division, as we are getting |
335 | confused later by replacing all immediate uses x in such | |
336 | a stmt. */ | |
61c8e77a | 337 | && gimple_assign_rhs1 (use_stmt) != def |
338 | && !stmt_can_throw_internal (cfun, use_stmt); | |
ac70caad | 339 | } |
340 | ||
3cb2785e | 341 | /* Return TRUE if USE_STMT is a multiplication of DEF by A. */ |
472f3f23 | 342 | static inline bool |
3cb2785e | 343 | is_mult_by (gimple *use_stmt, tree def, tree a) |
472f3f23 | 344 | { |
345 | if (gimple_code (use_stmt) == GIMPLE_ASSIGN | |
346 | && gimple_assign_rhs_code (use_stmt) == MULT_EXPR) | |
347 | { | |
348 | tree op0 = gimple_assign_rhs1 (use_stmt); | |
349 | tree op1 = gimple_assign_rhs2 (use_stmt); | |
350 | ||
3cb2785e | 351 | return (op0 == def && op1 == a) |
352 | || (op0 == a && op1 == def); | |
472f3f23 | 353 | } |
354 | return 0; | |
355 | } | |
356 | ||
3cb2785e | 357 | /* Return whether USE_STMT is DEF * DEF. */ |
358 | static inline bool | |
359 | is_square_of (gimple *use_stmt, tree def) | |
360 | { | |
361 | return is_mult_by (use_stmt, def, def); | |
362 | } | |
363 | ||
472f3f23 | 364 | /* Return whether USE_STMT is a floating-point division by |
365 | DEF * DEF. */ | |
366 | static inline bool | |
367 | is_division_by_square (gimple *use_stmt, tree def) | |
368 | { | |
369 | if (gimple_code (use_stmt) == GIMPLE_ASSIGN | |
370 | && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR | |
61c8e77a | 371 | && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt) |
372 | && !stmt_can_throw_internal (cfun, use_stmt)) | |
472f3f23 | 373 | { |
374 | tree denominator = gimple_assign_rhs2 (use_stmt); | |
375 | if (TREE_CODE (denominator) == SSA_NAME) | |
61c8e77a | 376 | return is_square_of (SSA_NAME_DEF_STMT (denominator), def); |
472f3f23 | 377 | } |
378 | return 0; | |
379 | } | |
380 | ||
ac70caad | 381 | /* Walk the subset of the dominator tree rooted at OCC, setting the |
382 | RECIP_DEF field to a definition of 1.0 / DEF that can be used in | |
383 | the given basic block. The field may be left NULL, of course, | |
384 | if it is not possible or profitable to do the optimization. | |
385 | ||
386 | DEF_BSI is an iterator pointing at the statement defining DEF. | |
387 | If RECIP_DEF is set, a dominator already has a computation that can | |
472f3f23 | 388 | be used. |
389 | ||
390 | If should_insert_square_recip is set, then this also inserts | |
391 | the square of the reciprocal immediately after the definition | |
392 | of the reciprocal. */ | |
ac70caad | 393 | |
394 | static void | |
75a70cf9 | 395 | insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, |
472f3f23 | 396 | tree def, tree recip_def, tree square_recip_def, |
397 | int should_insert_square_recip, int threshold) | |
ac70caad | 398 | { |
75a70cf9 | 399 | tree type; |
472f3f23 | 400 | gassign *new_stmt, *new_square_stmt; |
75a70cf9 | 401 | gimple_stmt_iterator gsi; |
ac70caad | 402 | struct occurrence *occ_child; |
403 | ||
404 | if (!recip_def | |
405 | && (occ->bb_has_division || !flag_trapping_math) | |
472f3f23 | 406 | /* Divide by two as all divisions are counted twice in |
407 | the costing loop. */ | |
408 | && occ->num_divisions / 2 >= threshold) | |
ac70caad | 409 | { |
410 | /* Make a variable with the replacement and substitute it. */ | |
411 | type = TREE_TYPE (def); | |
072f7ab1 | 412 | recip_def = create_tmp_reg (type, "reciptmp"); |
e9cf809e | 413 | new_stmt = gimple_build_assign (recip_def, RDIV_EXPR, |
414 | build_one_cst (type), def); | |
48e1416a | 415 | |
472f3f23 | 416 | if (should_insert_square_recip) |
417 | { | |
418 | square_recip_def = create_tmp_reg (type, "powmult_reciptmp"); | |
419 | new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR, | |
420 | recip_def, recip_def); | |
421 | } | |
422 | ||
ac70caad | 423 | if (occ->bb_has_division) |
472f3f23 | 424 | { |
425 | /* Case 1: insert before an existing division. */ | |
426 | gsi = gsi_after_labels (occ->bb); | |
427 | while (!gsi_end_p (gsi) | |
428 | && (!is_division_by (gsi_stmt (gsi), def)) | |
429 | && (!is_division_by_square (gsi_stmt (gsi), def))) | |
75a70cf9 | 430 | gsi_next (&gsi); |
ac70caad | 431 | |
472f3f23 | 432 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |
922f606b | 433 | if (should_insert_square_recip) |
434 | gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT); | |
472f3f23 | 435 | } |
75a70cf9 | 436 | else if (def_gsi && occ->bb == def_gsi->bb) |
472f3f23 | 437 | { |
438 | /* Case 2: insert right after the definition. Note that this will | |
ac70caad | 439 | never happen if the definition statement can throw, because in |
440 | that case the sole successor of the statement's basic block will | |
441 | dominate all the uses as well. */ | |
472f3f23 | 442 | gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT); |
922f606b | 443 | if (should_insert_square_recip) |
444 | gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT); | |
472f3f23 | 445 | } |
ac70caad | 446 | else |
472f3f23 | 447 | { |
448 | /* Case 3: insert in a basic block not containing defs/uses. */ | |
449 | gsi = gsi_after_labels (occ->bb); | |
450 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); | |
922f606b | 451 | if (should_insert_square_recip) |
452 | gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT); | |
472f3f23 | 453 | } |
454 | ||
30c4e60d | 455 | reciprocal_stats.rdivs_inserted++; |
456 | ||
ac70caad | 457 | occ->recip_def_stmt = new_stmt; |
abacb398 | 458 | } |
459 | ||
ac70caad | 460 | occ->recip_def = recip_def; |
472f3f23 | 461 | occ->square_recip_def = square_recip_def; |
ac70caad | 462 | for (occ_child = occ->children; occ_child; occ_child = occ_child->next) |
472f3f23 | 463 | insert_reciprocals (def_gsi, occ_child, def, recip_def, |
464 | square_recip_def, should_insert_square_recip, | |
465 | threshold); | |
466 | } | |
467 | ||
468 | /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)). | |
469 | Take as argument the use for (x * x). */ | |
470 | static inline void | |
471 | replace_reciprocal_squares (use_operand_p use_p) | |
472 | { | |
473 | gimple *use_stmt = USE_STMT (use_p); | |
474 | basic_block bb = gimple_bb (use_stmt); | |
475 | struct occurrence *occ = (struct occurrence *) bb->aux; | |
476 | ||
477 | if (optimize_bb_for_speed_p (bb) && occ->square_recip_def | |
478 | && occ->recip_def) | |
479 | { | |
480 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); | |
481 | gimple_assign_set_rhs_code (use_stmt, MULT_EXPR); | |
482 | gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def); | |
483 | SET_USE (use_p, occ->square_recip_def); | |
484 | fold_stmt_inplace (&gsi); | |
485 | update_stmt (use_stmt); | |
486 | } | |
ac70caad | 487 | } |
488 | ||
489 | ||
490 | /* Replace the division at USE_P with a multiplication by the reciprocal, if | |
491 | possible. */ | |
492 | ||
493 | static inline void | |
494 | replace_reciprocal (use_operand_p use_p) | |
495 | { | |
42acab1c | 496 | gimple *use_stmt = USE_STMT (use_p); |
75a70cf9 | 497 | basic_block bb = gimple_bb (use_stmt); |
ac70caad | 498 | struct occurrence *occ = (struct occurrence *) bb->aux; |
499 | ||
0bfd8d5c | 500 | if (optimize_bb_for_speed_p (bb) |
501 | && occ->recip_def && use_stmt != occ->recip_def_stmt) | |
ac70caad | 502 | { |
50aacf4c | 503 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
75a70cf9 | 504 | gimple_assign_set_rhs_code (use_stmt, MULT_EXPR); |
ac70caad | 505 | SET_USE (use_p, occ->recip_def); |
50aacf4c | 506 | fold_stmt_inplace (&gsi); |
ac70caad | 507 | update_stmt (use_stmt); |
508 | } | |
509 | } | |
510 | ||
511 | ||
512 | /* Free OCC and return one more "struct occurrence" to be freed. */ | |
513 | ||
514 | static struct occurrence * | |
515 | free_bb (struct occurrence *occ) | |
516 | { | |
517 | struct occurrence *child, *next; | |
518 | ||
519 | /* First get the two pointers hanging off OCC. */ | |
520 | next = occ->next; | |
521 | child = occ->children; | |
522 | occ->bb->aux = NULL; | |
d8e7268c | 523 | occ_pool->remove (occ); |
ac70caad | 524 | |
525 | /* Now ensure that we don't recurse unless it is necessary. */ | |
526 | if (!child) | |
527 | return next; | |
9e583fac | 528 | else |
ac70caad | 529 | { |
530 | while (next) | |
531 | next = free_bb (next); | |
532 | ||
533 | return child; | |
534 | } | |
535 | } | |
536 | ||
3cb2785e | 537 | /* Transform sequences like |
538 | t = sqrt (a) | |
539 | x = 1.0 / t; | |
540 | r1 = x * x; | |
541 | r2 = a * x; | |
542 | into: | |
543 | t = sqrt (a) | |
544 | r1 = 1.0 / a; | |
545 | r2 = t; | |
546 | x = r1 * r2; | |
547 | depending on the uses of x, r1, r2. This removes one multiplication and | |
548 | allows the sqrt and division operations to execute in parallel. | |
549 | DEF_GSI is the gsi of the initial division by sqrt that defines | |
4552b6fc | 550 | DEF (x in the example above). */ |
3cb2785e | 551 | |
552 | static void | |
553 | optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def) | |
554 | { | |
555 | gimple *use_stmt; | |
556 | imm_use_iterator use_iter; | |
557 | gimple *stmt = gsi_stmt (*def_gsi); | |
558 | tree x = def; | |
559 | tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt); | |
560 | tree div_rhs1 = gimple_assign_rhs1 (stmt); | |
561 | ||
562 | if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME | |
563 | || TREE_CODE (div_rhs1) != REAL_CST | |
564 | || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1)) | |
565 | return; | |
566 | ||
567 | gcall *sqrt_stmt | |
568 | = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name)); | |
569 | ||
570 | if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt)) | |
571 | return; | |
572 | ||
573 | switch (gimple_call_combined_fn (sqrt_stmt)) | |
574 | { | |
575 | CASE_CFN_SQRT: | |
576 | CASE_CFN_SQRT_FN: | |
577 | break; | |
578 | ||
579 | default: | |
580 | return; | |
581 | } | |
582 | tree a = gimple_call_arg (sqrt_stmt, 0); | |
583 | ||
584 | /* We have 'a' and 'x'. Now analyze the uses of 'x'. */ | |
585 | ||
586 | /* Statements that use x in x * x. */ | |
587 | auto_vec<gimple *> sqr_stmts; | |
588 | /* Statements that use x in a * x. */ | |
589 | auto_vec<gimple *> mult_stmts; | |
590 | bool has_other_use = false; | |
591 | bool mult_on_main_path = false; | |
592 | ||
593 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x) | |
594 | { | |
595 | if (is_gimple_debug (use_stmt)) | |
596 | continue; | |
597 | if (is_square_of (use_stmt, x)) | |
598 | { | |
599 | sqr_stmts.safe_push (use_stmt); | |
600 | if (gimple_bb (use_stmt) == gimple_bb (stmt)) | |
601 | mult_on_main_path = true; | |
602 | } | |
603 | else if (is_mult_by (use_stmt, x, a)) | |
604 | { | |
605 | mult_stmts.safe_push (use_stmt); | |
606 | if (gimple_bb (use_stmt) == gimple_bb (stmt)) | |
607 | mult_on_main_path = true; | |
608 | } | |
609 | else | |
610 | has_other_use = true; | |
611 | } | |
612 | ||
613 | /* In the x * x and a * x cases we just rewire stmt operands or | |
614 | remove multiplications. In the has_other_use case we introduce | |
615 | a multiplication so make sure we don't introduce a multiplication | |
616 | on a path where there was none. */ | |
617 | if (has_other_use && !mult_on_main_path) | |
618 | return; | |
619 | ||
620 | if (sqr_stmts.is_empty () && mult_stmts.is_empty ()) | |
621 | return; | |
622 | ||
623 | /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want | |
624 | to be able to compose it from the sqr and mult cases. */ | |
625 | if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ())) | |
626 | return; | |
627 | ||
628 | if (dump_file) | |
629 | { | |
630 | fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n"); | |
631 | print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE); | |
632 | print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); | |
633 | fprintf (dump_file, "\n"); | |
634 | } | |
635 | ||
636 | bool delete_div = !has_other_use; | |
637 | tree sqr_ssa_name = NULL_TREE; | |
638 | if (!sqr_stmts.is_empty ()) | |
639 | { | |
640 | /* r1 = x * x. Transform the original | |
641 | x = 1.0 / t | |
642 | into | |
643 | tmp1 = 1.0 / a | |
644 | r1 = tmp1. */ | |
645 | ||
646 | sqr_ssa_name | |
647 | = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr"); | |
648 | ||
649 | if (dump_file) | |
650 | { | |
651 | fprintf (dump_file, "Replacing original division\n"); | |
652 | print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); | |
653 | fprintf (dump_file, "with new division\n"); | |
654 | } | |
b9feec79 | 655 | stmt |
656 | = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt), | |
657 | gimple_assign_rhs1 (stmt), a); | |
658 | gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT); | |
659 | gsi_remove (def_gsi, true); | |
660 | *def_gsi = gsi_for_stmt (stmt); | |
3cb2785e | 661 | fold_stmt_inplace (def_gsi); |
662 | update_stmt (stmt); | |
663 | ||
664 | if (dump_file) | |
665 | print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); | |
666 | ||
667 | delete_div = false; | |
668 | gimple *sqr_stmt; | |
669 | unsigned int i; | |
670 | FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt) | |
671 | { | |
672 | gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt); | |
673 | gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name); | |
674 | update_stmt (sqr_stmt); | |
675 | } | |
676 | } | |
677 | if (!mult_stmts.is_empty ()) | |
678 | { | |
679 | /* r2 = a * x. Transform this into: | |
680 | r2 = t (The original sqrt (a)). */ | |
681 | unsigned int i; | |
682 | gimple *mult_stmt = NULL; | |
683 | FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt) | |
684 | { | |
685 | gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt); | |
686 | ||
687 | if (dump_file) | |
688 | { | |
689 | fprintf (dump_file, "Replacing squaring multiplication\n"); | |
690 | print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE); | |
691 | fprintf (dump_file, "with assignment\n"); | |
692 | } | |
693 | gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name); | |
694 | fold_stmt_inplace (&gsi2); | |
695 | update_stmt (mult_stmt); | |
696 | if (dump_file) | |
697 | print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE); | |
698 | } | |
699 | } | |
700 | ||
701 | if (has_other_use) | |
702 | { | |
703 | /* Using the two temporaries tmp1, tmp2 from above | |
704 | the original x is now: | |
705 | x = tmp1 * tmp2. */ | |
706 | gcc_assert (orig_sqrt_ssa_name); | |
707 | gcc_assert (sqr_ssa_name); | |
708 | ||
709 | gimple *new_stmt | |
710 | = gimple_build_assign (x, MULT_EXPR, | |
b9feec79 | 711 | orig_sqrt_ssa_name, sqr_ssa_name); |
3cb2785e | 712 | gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT); |
713 | update_stmt (stmt); | |
714 | } | |
715 | else if (delete_div) | |
716 | { | |
717 | /* Remove the original division. */ | |
718 | gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt); | |
719 | gsi_remove (&gsi2, true); | |
720 | release_defs (stmt); | |
721 | } | |
b9feec79 | 722 | else |
723 | release_ssa_name (x); | |
3cb2785e | 724 | } |
ac70caad | 725 | |
726 | /* Look for floating-point divisions among DEF's uses, and try to | |
727 | replace them by multiplications with the reciprocal. Add | |
728 | as many statements computing the reciprocal as needed. | |
729 | ||
730 | DEF must be a GIMPLE register of a floating-point type. */ | |
731 | ||
732 | static void | |
75a70cf9 | 733 | execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def) |
ac70caad | 734 | { |
472f3f23 | 735 | use_operand_p use_p, square_use_p; |
736 | imm_use_iterator use_iter, square_use_iter; | |
737 | tree square_def; | |
ac70caad | 738 | struct occurrence *occ; |
472f3f23 | 739 | int count = 0; |
740 | int threshold; | |
741 | int square_recip_count = 0; | |
742 | int sqrt_recip_count = 0; | |
abacb398 | 743 | |
56c4f422 | 744 | gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME); |
472f3f23 | 745 | threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def))); |
746 | ||
41e37ac9 | 747 | /* If DEF is a square (x * x), count the number of divisions by x. |
748 | If there are more divisions by x than by (DEF * DEF), prefer to optimize | |
749 | the reciprocal of x instead of DEF. This improves cases like: | |
750 | def = x * x | |
751 | t0 = a / def | |
752 | t1 = b / def | |
753 | t2 = c / x | |
754 | Reciprocal optimization of x results in 1 division rather than 2 or 3. */ | |
755 | gimple *def_stmt = SSA_NAME_DEF_STMT (def); | |
756 | ||
757 | if (is_gimple_assign (def_stmt) | |
758 | && gimple_assign_rhs_code (def_stmt) == MULT_EXPR | |
759 | && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME | |
760 | && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt)) | |
472f3f23 | 761 | { |
41e37ac9 | 762 | tree op0 = gimple_assign_rhs1 (def_stmt); |
472f3f23 | 763 | |
41e37ac9 | 764 | FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0) |
472f3f23 | 765 | { |
41e37ac9 | 766 | gimple *use_stmt = USE_STMT (use_p); |
767 | if (is_division_by (use_stmt, op0)) | |
768 | sqrt_recip_count++; | |
472f3f23 | 769 | } |
770 | } | |
ac70caad | 771 | |
772 | FOR_EACH_IMM_USE_FAST (use_p, use_iter, def) | |
abacb398 | 773 | { |
42acab1c | 774 | gimple *use_stmt = USE_STMT (use_p); |
ac70caad | 775 | if (is_division_by (use_stmt, def)) |
abacb398 | 776 | { |
472f3f23 | 777 | register_division_in (gimple_bb (use_stmt), 2); |
ac70caad | 778 | count++; |
abacb398 | 779 | } |
472f3f23 | 780 | |
781 | if (is_square_of (use_stmt, def)) | |
782 | { | |
783 | square_def = gimple_assign_lhs (use_stmt); | |
784 | FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def) | |
785 | { | |
786 | gimple *square_use_stmt = USE_STMT (square_use_p); | |
787 | if (is_division_by (square_use_stmt, square_def)) | |
788 | { | |
41e37ac9 | 789 | /* This is executed twice for each division by a square. */ |
472f3f23 | 790 | register_division_in (gimple_bb (square_use_stmt), 1); |
41e37ac9 | 791 | square_recip_count++; |
472f3f23 | 792 | } |
793 | } | |
794 | } | |
abacb398 | 795 | } |
48e1416a | 796 | |
41e37ac9 | 797 | /* Square reciprocals were counted twice above. */ |
472f3f23 | 798 | square_recip_count /= 2; |
799 | ||
41e37ac9 | 800 | /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */ |
472f3f23 | 801 | if (sqrt_recip_count > square_recip_count) |
d5e9136f | 802 | goto out; |
472f3f23 | 803 | |
ac70caad | 804 | /* Do the expensive part only if we can hope to optimize something. */ |
41e37ac9 | 805 | if (count + square_recip_count >= threshold && count >= 1) |
ac70caad | 806 | { |
42acab1c | 807 | gimple *use_stmt; |
ac70caad | 808 | for (occ = occ_head; occ; occ = occ->next) |
809 | { | |
810 | compute_merit (occ); | |
472f3f23 | 811 | insert_reciprocals (def_gsi, occ, def, NULL, NULL, |
812 | square_recip_count, threshold); | |
ac70caad | 813 | } |
814 | ||
09aca5bc | 815 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def) |
ac70caad | 816 | { |
ac70caad | 817 | if (is_division_by (use_stmt, def)) |
09aca5bc | 818 | { |
819 | FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) | |
820 | replace_reciprocal (use_p); | |
821 | } | |
41e37ac9 | 822 | else if (square_recip_count > 0 && is_square_of (use_stmt, def)) |
472f3f23 | 823 | { |
824 | FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) | |
825 | { | |
826 | /* Find all uses of the square that are divisions and | |
827 | * replace them by multiplications with the inverse. */ | |
828 | imm_use_iterator square_iterator; | |
829 | gimple *powmult_use_stmt = USE_STMT (use_p); | |
830 | tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt); | |
831 | ||
832 | FOR_EACH_IMM_USE_STMT (powmult_use_stmt, | |
833 | square_iterator, powmult_def_name) | |
834 | FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator) | |
835 | { | |
836 | gimple *powmult_use_stmt = USE_STMT (square_use_p); | |
837 | if (is_division_by (powmult_use_stmt, powmult_def_name)) | |
838 | replace_reciprocal_squares (square_use_p); | |
839 | } | |
840 | } | |
841 | } | |
ac70caad | 842 | } |
843 | } | |
844 | ||
d5e9136f | 845 | out: |
ac70caad | 846 | for (occ = occ_head; occ; ) |
847 | occ = free_bb (occ); | |
848 | ||
849 | occ_head = NULL; | |
abacb398 | 850 | } |
851 | ||
4cfd27a5 | 852 | /* Return an internal function that implements the reciprocal of CALL, |
853 | or IFN_LAST if there is no such function that the target supports. */ | |
854 | ||
855 | internal_fn | |
856 | internal_fn_reciprocal (gcall *call) | |
857 | { | |
858 | internal_fn ifn; | |
859 | ||
860 | switch (gimple_call_combined_fn (call)) | |
861 | { | |
862 | CASE_CFN_SQRT: | |
8c32188e | 863 | CASE_CFN_SQRT_FN: |
4cfd27a5 | 864 | ifn = IFN_RSQRT; |
865 | break; | |
866 | ||
867 | default: | |
868 | return IFN_LAST; | |
869 | } | |
870 | ||
871 | tree_pair types = direct_internal_fn_types (ifn, call); | |
872 | if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED)) | |
873 | return IFN_LAST; | |
874 | ||
875 | return ifn; | |
876 | } | |
877 | ||
ac70caad | 878 | /* Go through all the floating-point SSA_NAMEs, and call |
879 | execute_cse_reciprocals_1 on each of them. */ | |
65b0537f | 880 | namespace { |
881 | ||
882 | const pass_data pass_data_cse_reciprocals = | |
883 | { | |
884 | GIMPLE_PASS, /* type */ | |
885 | "recip", /* name */ | |
886 | OPTGROUP_NONE, /* optinfo_flags */ | |
8ed378fe | 887 | TV_TREE_RECIP, /* tv_id */ |
65b0537f | 888 | PROP_ssa, /* properties_required */ |
889 | 0, /* properties_provided */ | |
890 | 0, /* properties_destroyed */ | |
891 | 0, /* todo_flags_start */ | |
8b88439e | 892 | TODO_update_ssa, /* todo_flags_finish */ |
65b0537f | 893 | }; |
894 | ||
895 | class pass_cse_reciprocals : public gimple_opt_pass | |
896 | { | |
897 | public: | |
898 | pass_cse_reciprocals (gcc::context *ctxt) | |
899 | : gimple_opt_pass (pass_data_cse_reciprocals, ctxt) | |
900 | {} | |
901 | ||
902 | /* opt_pass methods: */ | |
903 | virtual bool gate (function *) { return optimize && flag_reciprocal_math; } | |
904 | virtual unsigned int execute (function *); | |
905 | ||
906 | }; // class pass_cse_reciprocals | |
907 | ||
908 | unsigned int | |
909 | pass_cse_reciprocals::execute (function *fun) | |
abacb398 | 910 | { |
911 | basic_block bb; | |
51b60a11 | 912 | tree arg; |
685b24f5 | 913 | |
1dc6c44d | 914 | occ_pool = new object_allocator<occurrence> ("dominators for recip"); |
685b24f5 | 915 | |
30c4e60d | 916 | memset (&reciprocal_stats, 0, sizeof (reciprocal_stats)); |
c136ae61 | 917 | calculate_dominance_info (CDI_DOMINATORS); |
918 | calculate_dominance_info (CDI_POST_DOMINATORS); | |
ac70caad | 919 | |
382ecba7 | 920 | if (flag_checking) |
921 | FOR_EACH_BB_FN (bb, fun) | |
922 | gcc_assert (!bb->aux); | |
ac70caad | 923 | |
65b0537f | 924 | for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg)) |
c6dfe037 | 925 | if (FLOAT_TYPE_P (TREE_TYPE (arg)) |
ac70caad | 926 | && is_gimple_reg (arg)) |
c6dfe037 | 927 | { |
65b0537f | 928 | tree name = ssa_default_def (fun, arg); |
c6dfe037 | 929 | if (name) |
930 | execute_cse_reciprocals_1 (NULL, name); | |
931 | } | |
51b60a11 | 932 | |
65b0537f | 933 | FOR_EACH_BB_FN (bb, fun) |
abacb398 | 934 | { |
75a70cf9 | 935 | tree def; |
abacb398 | 936 | |
1a91d914 | 937 | for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); |
938 | gsi_next (&gsi)) | |
abacb398 | 939 | { |
1a91d914 | 940 | gphi *phi = gsi.phi (); |
abacb398 | 941 | def = PHI_RESULT (phi); |
7c782c9b | 942 | if (! virtual_operand_p (def) |
943 | && FLOAT_TYPE_P (TREE_TYPE (def))) | |
ac70caad | 944 | execute_cse_reciprocals_1 (NULL, def); |
abacb398 | 945 | } |
946 | ||
1a91d914 | 947 | for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi); |
948 | gsi_next (&gsi)) | |
abacb398 | 949 | { |
42acab1c | 950 | gimple *stmt = gsi_stmt (gsi); |
a0315874 | 951 | |
75a70cf9 | 952 | if (gimple_has_lhs (stmt) |
abacb398 | 953 | && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL |
954 | && FLOAT_TYPE_P (TREE_TYPE (def)) | |
51b60a11 | 955 | && TREE_CODE (def) == SSA_NAME) |
3cb2785e | 956 | { |
4552b6fc | 957 | execute_cse_reciprocals_1 (&gsi, def); |
958 | stmt = gsi_stmt (gsi); | |
3cb2785e | 959 | if (flag_unsafe_math_optimizations |
960 | && is_gimple_assign (stmt) | |
b9feec79 | 961 | && gimple_assign_lhs (stmt) == def |
aac19106 | 962 | && !stmt_can_throw_internal (cfun, stmt) |
3cb2785e | 963 | && gimple_assign_rhs_code (stmt) == RDIV_EXPR) |
964 | optimize_recip_sqrt (&gsi, def); | |
3cb2785e | 965 | } |
abacb398 | 966 | } |
e174638f | 967 | |
0bfd8d5c | 968 | if (optimize_bb_for_size_p (bb)) |
969 | continue; | |
970 | ||
e174638f | 971 | /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */ |
1a91d914 | 972 | for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi); |
973 | gsi_next (&gsi)) | |
e174638f | 974 | { |
42acab1c | 975 | gimple *stmt = gsi_stmt (gsi); |
e174638f | 976 | |
75a70cf9 | 977 | if (is_gimple_assign (stmt) |
978 | && gimple_assign_rhs_code (stmt) == RDIV_EXPR) | |
e174638f | 979 | { |
75a70cf9 | 980 | tree arg1 = gimple_assign_rhs2 (stmt); |
42acab1c | 981 | gimple *stmt1; |
2cd360b6 | 982 | |
983 | if (TREE_CODE (arg1) != SSA_NAME) | |
984 | continue; | |
985 | ||
986 | stmt1 = SSA_NAME_DEF_STMT (arg1); | |
e174638f | 987 | |
75a70cf9 | 988 | if (is_gimple_call (stmt1) |
4cfd27a5 | 989 | && gimple_call_lhs (stmt1)) |
e174638f | 990 | { |
851c1b0c | 991 | bool fail; |
774b1cdd | 992 | imm_use_iterator ui; |
993 | use_operand_p use_p; | |
4cfd27a5 | 994 | tree fndecl = NULL_TREE; |
e174638f | 995 | |
4cfd27a5 | 996 | gcall *call = as_a <gcall *> (stmt1); |
997 | internal_fn ifn = internal_fn_reciprocal (call); | |
998 | if (ifn == IFN_LAST) | |
999 | { | |
1000 | fndecl = gimple_call_fndecl (call); | |
1001 | if (!fndecl | |
a0e9bfbb | 1002 | || !fndecl_built_in_p (fndecl, BUILT_IN_MD)) |
4cfd27a5 | 1003 | continue; |
1004 | fndecl = targetm.builtin_reciprocal (fndecl); | |
1005 | if (!fndecl) | |
1006 | continue; | |
1007 | } | |
e174638f | 1008 | |
774b1cdd | 1009 | /* Check that all uses of the SSA name are divisions, |
1010 | otherwise replacing the defining statement will do | |
1011 | the wrong thing. */ | |
1012 | fail = false; | |
1013 | FOR_EACH_IMM_USE_FAST (use_p, ui, arg1) | |
1014 | { | |
42acab1c | 1015 | gimple *stmt2 = USE_STMT (use_p); |
774b1cdd | 1016 | if (is_gimple_debug (stmt2)) |
1017 | continue; | |
1018 | if (!is_gimple_assign (stmt2) | |
1019 | || gimple_assign_rhs_code (stmt2) != RDIV_EXPR | |
1020 | || gimple_assign_rhs1 (stmt2) == arg1 | |
1021 | || gimple_assign_rhs2 (stmt2) != arg1) | |
1022 | { | |
1023 | fail = true; | |
1024 | break; | |
1025 | } | |
1026 | } | |
1027 | if (fail) | |
1028 | continue; | |
1029 | ||
4cfd27a5 | 1030 | gimple_replace_ssa_lhs (call, arg1); |
1031 | if (gimple_call_internal_p (call) != (ifn != IFN_LAST)) | |
851c1b0c | 1032 | { |
1033 | auto_vec<tree, 4> args; | |
1034 | for (unsigned int i = 0; | |
4cfd27a5 | 1035 | i < gimple_call_num_args (call); i++) |
1036 | args.safe_push (gimple_call_arg (call, i)); | |
1037 | gcall *stmt2; | |
1038 | if (ifn == IFN_LAST) | |
1039 | stmt2 = gimple_build_call_vec (fndecl, args); | |
1040 | else | |
1041 | stmt2 = gimple_build_call_internal_vec (ifn, args); | |
851c1b0c | 1042 | gimple_call_set_lhs (stmt2, arg1); |
1263a9e1 | 1043 | gimple_move_vops (stmt2, call); |
989f02dc | 1044 | gimple_call_set_nothrow (stmt2, |
1045 | gimple_call_nothrow_p (call)); | |
4cfd27a5 | 1046 | gimple_stmt_iterator gsi2 = gsi_for_stmt (call); |
851c1b0c | 1047 | gsi_replace (&gsi2, stmt2, true); |
1048 | } | |
1049 | else | |
1050 | { | |
4cfd27a5 | 1051 | if (ifn == IFN_LAST) |
1052 | gimple_call_set_fndecl (call, fndecl); | |
1053 | else | |
1054 | gimple_call_set_internal_fn (call, ifn); | |
1055 | update_stmt (call); | |
851c1b0c | 1056 | } |
30c4e60d | 1057 | reciprocal_stats.rfuncs_inserted++; |
e174638f | 1058 | |
774b1cdd | 1059 | FOR_EACH_IMM_USE_STMT (stmt, ui, arg1) |
1060 | { | |
50aacf4c | 1061 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
774b1cdd | 1062 | gimple_assign_set_rhs_code (stmt, MULT_EXPR); |
50aacf4c | 1063 | fold_stmt_inplace (&gsi); |
774b1cdd | 1064 | update_stmt (stmt); |
1065 | } | |
e174638f | 1066 | } |
1067 | } | |
1068 | } | |
abacb398 | 1069 | } |
685b24f5 | 1070 | |
65b0537f | 1071 | statistics_counter_event (fun, "reciprocal divs inserted", |
30c4e60d | 1072 | reciprocal_stats.rdivs_inserted); |
65b0537f | 1073 | statistics_counter_event (fun, "reciprocal functions inserted", |
30c4e60d | 1074 | reciprocal_stats.rfuncs_inserted); |
1075 | ||
c136ae61 | 1076 | free_dominance_info (CDI_DOMINATORS); |
1077 | free_dominance_info (CDI_POST_DOMINATORS); | |
d8e7268c | 1078 | delete occ_pool; |
2a1990e9 | 1079 | return 0; |
abacb398 | 1080 | } |
1081 | ||
cbe8bda8 | 1082 | } // anon namespace |
1083 | ||
1084 | gimple_opt_pass * | |
1085 | make_pass_cse_reciprocals (gcc::context *ctxt) | |
1086 | { | |
1087 | return new pass_cse_reciprocals (ctxt); | |
1088 | } | |
1089 | ||
0d424440 | 1090 | /* Records an occurrence at statement USE_STMT in the vector of trees |
a0315874 | 1091 | STMTS if it is dominated by *TOP_BB or dominates it or this basic block |
0d424440 | 1092 | is not yet initialized. Returns true if the occurrence was pushed on |
a0315874 | 1093 | the vector. Adjusts *TOP_BB to be the basic block dominating all |
1094 | statements in the vector. */ | |
1095 | ||
1096 | static bool | |
42acab1c | 1097 | maybe_record_sincos (vec<gimple *> *stmts, |
1098 | basic_block *top_bb, gimple *use_stmt) | |
a0315874 | 1099 | { |
75a70cf9 | 1100 | basic_block use_bb = gimple_bb (use_stmt); |
a0315874 | 1101 | if (*top_bb |
1102 | && (*top_bb == use_bb | |
1103 | || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb))) | |
f1f41a6c | 1104 | stmts->safe_push (use_stmt); |
a0315874 | 1105 | else if (!*top_bb |
1106 | || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb)) | |
1107 | { | |
f1f41a6c | 1108 | stmts->safe_push (use_stmt); |
a0315874 | 1109 | *top_bb = use_bb; |
1110 | } | |
1111 | else | |
1112 | return false; | |
1113 | ||
1114 | return true; | |
1115 | } | |
1116 | ||
1117 | /* Look for sin, cos and cexpi calls with the same argument NAME and | |
1118 | create a single call to cexpi CSEing the result in this case. | |
1119 | We first walk over all immediate uses of the argument collecting | |
1120 | statements that we can CSE in a vector and in a second pass replace | |
1121 | the statement rhs with a REALPART or IMAGPART expression on the | |
1122 | result of the cexpi call we insert before the use statement that | |
1123 | dominates all other candidates. */ | |
1124 | ||
4c80086d | 1125 | static bool |
a0315874 | 1126 | execute_cse_sincos_1 (tree name) |
1127 | { | |
75a70cf9 | 1128 | gimple_stmt_iterator gsi; |
a0315874 | 1129 | imm_use_iterator use_iter; |
75a70cf9 | 1130 | tree fndecl, res, type; |
42acab1c | 1131 | gimple *def_stmt, *use_stmt, *stmt; |
a0315874 | 1132 | int seen_cos = 0, seen_sin = 0, seen_cexpi = 0; |
42acab1c | 1133 | auto_vec<gimple *> stmts; |
a0315874 | 1134 | basic_block top_bb = NULL; |
1135 | int i; | |
4c80086d | 1136 | bool cfg_changed = false; |
a0315874 | 1137 | |
1138 | type = TREE_TYPE (name); | |
1139 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name) | |
1140 | { | |
75a70cf9 | 1141 | if (gimple_code (use_stmt) != GIMPLE_CALL |
fa0793ad | 1142 | || !gimple_call_lhs (use_stmt)) |
a0315874 | 1143 | continue; |
1144 | ||
fa0793ad | 1145 | switch (gimple_call_combined_fn (use_stmt)) |
a0315874 | 1146 | { |
fa0793ad | 1147 | CASE_CFN_COS: |
a0315874 | 1148 | seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; |
1149 | break; | |
1150 | ||
fa0793ad | 1151 | CASE_CFN_SIN: |
a0315874 | 1152 | seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; |
1153 | break; | |
1154 | ||
fa0793ad | 1155 | CASE_CFN_CEXPI: |
a0315874 | 1156 | seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; |
1157 | break; | |
1158 | ||
1159 | default:; | |
1160 | } | |
1161 | } | |
1162 | ||
1163 | if (seen_cos + seen_sin + seen_cexpi <= 1) | |
6702d09a | 1164 | return false; |
a0315874 | 1165 | |
1166 | /* Simply insert cexpi at the beginning of top_bb but not earlier than | |
1167 | the name def statement. */ | |
1168 | fndecl = mathfn_built_in (type, BUILT_IN_CEXPI); | |
1169 | if (!fndecl) | |
4c80086d | 1170 | return false; |
75a70cf9 | 1171 | stmt = gimple_build_call (fndecl, 1, name); |
03d37e4e | 1172 | res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp"); |
75a70cf9 | 1173 | gimple_call_set_lhs (stmt, res); |
1174 | ||
a0315874 | 1175 | def_stmt = SSA_NAME_DEF_STMT (name); |
8090c12d | 1176 | if (!SSA_NAME_IS_DEFAULT_DEF (name) |
75a70cf9 | 1177 | && gimple_code (def_stmt) != GIMPLE_PHI |
1178 | && gimple_bb (def_stmt) == top_bb) | |
a0315874 | 1179 | { |
75a70cf9 | 1180 | gsi = gsi_for_stmt (def_stmt); |
1181 | gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); | |
a0315874 | 1182 | } |
1183 | else | |
1184 | { | |
75a70cf9 | 1185 | gsi = gsi_after_labels (top_bb); |
1186 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
a0315874 | 1187 | } |
30c4e60d | 1188 | sincos_stats.inserted++; |
a0315874 | 1189 | |
1190 | /* And adjust the recorded old call sites. */ | |
f1f41a6c | 1191 | for (i = 0; stmts.iterate (i, &use_stmt); ++i) |
a0315874 | 1192 | { |
75a70cf9 | 1193 | tree rhs = NULL; |
75a70cf9 | 1194 | |
fa0793ad | 1195 | switch (gimple_call_combined_fn (use_stmt)) |
a0315874 | 1196 | { |
fa0793ad | 1197 | CASE_CFN_COS: |
75a70cf9 | 1198 | rhs = fold_build1 (REALPART_EXPR, type, res); |
a0315874 | 1199 | break; |
1200 | ||
fa0793ad | 1201 | CASE_CFN_SIN: |
75a70cf9 | 1202 | rhs = fold_build1 (IMAGPART_EXPR, type, res); |
a0315874 | 1203 | break; |
1204 | ||
fa0793ad | 1205 | CASE_CFN_CEXPI: |
75a70cf9 | 1206 | rhs = res; |
a0315874 | 1207 | break; |
1208 | ||
1209 | default:; | |
1210 | gcc_unreachable (); | |
1211 | } | |
1212 | ||
75a70cf9 | 1213 | /* Replace call with a copy. */ |
1214 | stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs); | |
1215 | ||
1216 | gsi = gsi_for_stmt (use_stmt); | |
4c80086d | 1217 | gsi_replace (&gsi, stmt, true); |
1218 | if (gimple_purge_dead_eh_edges (gimple_bb (stmt))) | |
1219 | cfg_changed = true; | |
a0315874 | 1220 | } |
1221 | ||
4c80086d | 1222 | return cfg_changed; |
a0315874 | 1223 | } |
1224 | ||
e9a6c4bc | 1225 | /* To evaluate powi(x,n), the floating point value x raised to the |
1226 | constant integer exponent n, we use a hybrid algorithm that | |
1227 | combines the "window method" with look-up tables. For an | |
1228 | introduction to exponentiation algorithms and "addition chains", | |
1229 | see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth, | |
1230 | "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming", | |
1231 | 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation | |
1232 | Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */ | |
1233 | ||
1234 | /* Provide a default value for POWI_MAX_MULTS, the maximum number of | |
1235 | multiplications to inline before calling the system library's pow | |
1236 | function. powi(x,n) requires at worst 2*bits(n)-2 multiplications, | |
1237 | so this default never requires calling pow, powf or powl. */ | |
1238 | ||
1239 | #ifndef POWI_MAX_MULTS | |
1240 | #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2) | |
1241 | #endif | |
1242 | ||
1243 | /* The size of the "optimal power tree" lookup table. All | |
1244 | exponents less than this value are simply looked up in the | |
1245 | powi_table below. This threshold is also used to size the | |
1246 | cache of pseudo registers that hold intermediate results. */ | |
1247 | #define POWI_TABLE_SIZE 256 | |
1248 | ||
1249 | /* The size, in bits of the window, used in the "window method" | |
1250 | exponentiation algorithm. This is equivalent to a radix of | |
1251 | (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */ | |
1252 | #define POWI_WINDOW_SIZE 3 | |
1253 | ||
1254 | /* The following table is an efficient representation of an | |
1255 | "optimal power tree". For each value, i, the corresponding | |
1256 | value, j, in the table states than an optimal evaluation | |
1257 | sequence for calculating pow(x,i) can be found by evaluating | |
1258 | pow(x,j)*pow(x,i-j). An optimal power tree for the first | |
1259 | 100 integers is given in Knuth's "Seminumerical algorithms". */ | |
1260 | ||
1261 | static const unsigned char powi_table[POWI_TABLE_SIZE] = | |
1262 | { | |
1263 | 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */ | |
1264 | 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */ | |
1265 | 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */ | |
1266 | 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */ | |
1267 | 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */ | |
1268 | 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */ | |
1269 | 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */ | |
1270 | 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */ | |
1271 | 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */ | |
1272 | 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */ | |
1273 | 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */ | |
1274 | 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */ | |
1275 | 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */ | |
1276 | 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */ | |
1277 | 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */ | |
1278 | 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */ | |
1279 | 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */ | |
1280 | 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */ | |
1281 | 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */ | |
1282 | 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */ | |
1283 | 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */ | |
1284 | 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */ | |
1285 | 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */ | |
1286 | 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */ | |
1287 | 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */ | |
1288 | 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */ | |
1289 | 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */ | |
1290 | 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */ | |
1291 | 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */ | |
1292 | 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */ | |
1293 | 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */ | |
1294 | 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */ | |
1295 | }; | |
1296 | ||
1297 | ||
1298 | /* Return the number of multiplications required to calculate | |
1299 | powi(x,n) where n is less than POWI_TABLE_SIZE. This is a | |
1300 | subroutine of powi_cost. CACHE is an array indicating | |
1301 | which exponents have already been calculated. */ | |
1302 | ||
1303 | static int | |
1304 | powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache) | |
1305 | { | |
1306 | /* If we've already calculated this exponent, then this evaluation | |
1307 | doesn't require any additional multiplications. */ | |
1308 | if (cache[n]) | |
1309 | return 0; | |
1310 | ||
1311 | cache[n] = true; | |
1312 | return powi_lookup_cost (n - powi_table[n], cache) | |
1313 | + powi_lookup_cost (powi_table[n], cache) + 1; | |
1314 | } | |
1315 | ||
1316 | /* Return the number of multiplications required to calculate | |
1317 | powi(x,n) for an arbitrary x, given the exponent N. This | |
1318 | function needs to be kept in sync with powi_as_mults below. */ | |
1319 | ||
1320 | static int | |
1321 | powi_cost (HOST_WIDE_INT n) | |
1322 | { | |
1323 | bool cache[POWI_TABLE_SIZE]; | |
1324 | unsigned HOST_WIDE_INT digit; | |
1325 | unsigned HOST_WIDE_INT val; | |
1326 | int result; | |
1327 | ||
1328 | if (n == 0) | |
1329 | return 0; | |
1330 | ||
1331 | /* Ignore the reciprocal when calculating the cost. */ | |
1332 | val = (n < 0) ? -n : n; | |
1333 | ||
1334 | /* Initialize the exponent cache. */ | |
1335 | memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool)); | |
1336 | cache[1] = true; | |
1337 | ||
1338 | result = 0; | |
1339 | ||
1340 | while (val >= POWI_TABLE_SIZE) | |
1341 | { | |
1342 | if (val & 1) | |
1343 | { | |
1344 | digit = val & ((1 << POWI_WINDOW_SIZE) - 1); | |
1345 | result += powi_lookup_cost (digit, cache) | |
1346 | + POWI_WINDOW_SIZE + 1; | |
1347 | val >>= POWI_WINDOW_SIZE; | |
1348 | } | |
1349 | else | |
1350 | { | |
1351 | val >>= 1; | |
1352 | result++; | |
1353 | } | |
1354 | } | |
1355 | ||
1356 | return result + powi_lookup_cost (val, cache); | |
1357 | } | |
1358 | ||
1359 | /* Recursive subroutine of powi_as_mults. This function takes the | |
1360 | array, CACHE, of already calculated exponents and an exponent N and | |
1361 | returns a tree that corresponds to CACHE[1]**N, with type TYPE. */ | |
1362 | ||
1363 | static tree | |
1364 | powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type, | |
03d37e4e | 1365 | HOST_WIDE_INT n, tree *cache) |
e9a6c4bc | 1366 | { |
1367 | tree op0, op1, ssa_target; | |
1368 | unsigned HOST_WIDE_INT digit; | |
1a91d914 | 1369 | gassign *mult_stmt; |
e9a6c4bc | 1370 | |
1371 | if (n < POWI_TABLE_SIZE && cache[n]) | |
1372 | return cache[n]; | |
1373 | ||
03d37e4e | 1374 | ssa_target = make_temp_ssa_name (type, NULL, "powmult"); |
e9a6c4bc | 1375 | |
1376 | if (n < POWI_TABLE_SIZE) | |
1377 | { | |
1378 | cache[n] = ssa_target; | |
03d37e4e | 1379 | op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache); |
1380 | op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache); | |
e9a6c4bc | 1381 | } |
1382 | else if (n & 1) | |
1383 | { | |
1384 | digit = n & ((1 << POWI_WINDOW_SIZE) - 1); | |
03d37e4e | 1385 | op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache); |
1386 | op1 = powi_as_mults_1 (gsi, loc, type, digit, cache); | |
e9a6c4bc | 1387 | } |
1388 | else | |
1389 | { | |
03d37e4e | 1390 | op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache); |
e9a6c4bc | 1391 | op1 = op0; |
1392 | } | |
1393 | ||
e9cf809e | 1394 | mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1); |
ae43b05e | 1395 | gimple_set_location (mult_stmt, loc); |
e9a6c4bc | 1396 | gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT); |
1397 | ||
1398 | return ssa_target; | |
1399 | } | |
1400 | ||
1401 | /* Convert ARG0**N to a tree of multiplications of ARG0 with itself. | |
1402 | This function needs to be kept in sync with powi_cost above. */ | |
1403 | ||
1404 | static tree | |
1405 | powi_as_mults (gimple_stmt_iterator *gsi, location_t loc, | |
1406 | tree arg0, HOST_WIDE_INT n) | |
1407 | { | |
03d37e4e | 1408 | tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0); |
1a91d914 | 1409 | gassign *div_stmt; |
03d37e4e | 1410 | tree target; |
e9a6c4bc | 1411 | |
1412 | if (n == 0) | |
1413 | return build_real (type, dconst1); | |
1414 | ||
1415 | memset (cache, 0, sizeof (cache)); | |
1416 | cache[1] = arg0; | |
1417 | ||
03d37e4e | 1418 | result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache); |
e9a6c4bc | 1419 | if (n >= 0) |
1420 | return result; | |
1421 | ||
1422 | /* If the original exponent was negative, reciprocate the result. */ | |
03d37e4e | 1423 | target = make_temp_ssa_name (type, NULL, "powmult"); |
e9cf809e | 1424 | div_stmt = gimple_build_assign (target, RDIV_EXPR, |
1425 | build_real (type, dconst1), result); | |
ae43b05e | 1426 | gimple_set_location (div_stmt, loc); |
e9a6c4bc | 1427 | gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT); |
1428 | ||
1429 | return target; | |
1430 | } | |
1431 | ||
1432 | /* ARG0 and N are the two arguments to a powi builtin in GSI with | |
1433 | location info LOC. If the arguments are appropriate, create an | |
1434 | equivalent sequence of statements prior to GSI using an optimal | |
1435 | number of multiplications, and return an expession holding the | |
1436 | result. */ | |
1437 | ||
1438 | static tree | |
1439 | gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc, | |
1440 | tree arg0, HOST_WIDE_INT n) | |
1441 | { | |
1442 | /* Avoid largest negative number. */ | |
1443 | if (n != -n | |
1444 | && ((n >= -1 && n <= 2) | |
1445 | || (optimize_function_for_speed_p (cfun) | |
1446 | && powi_cost (n) <= POWI_MAX_MULTS))) | |
1447 | return powi_as_mults (gsi, loc, arg0, n); | |
1448 | ||
1449 | return NULL_TREE; | |
1450 | } | |
1451 | ||
ae43b05e | 1452 | /* Build a gimple call statement that calls FN with argument ARG. |
03d37e4e | 1453 | Set the lhs of the call statement to a fresh SSA name. Insert the |
ae43b05e | 1454 | statement prior to GSI's current position, and return the fresh |
1455 | SSA name. */ | |
1456 | ||
1457 | static tree | |
ca12eb68 | 1458 | build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc, |
03d37e4e | 1459 | tree fn, tree arg) |
ae43b05e | 1460 | { |
1a91d914 | 1461 | gcall *call_stmt; |
ae43b05e | 1462 | tree ssa_target; |
1463 | ||
ae43b05e | 1464 | call_stmt = gimple_build_call (fn, 1, arg); |
03d37e4e | 1465 | ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot"); |
ae43b05e | 1466 | gimple_set_lhs (call_stmt, ssa_target); |
1467 | gimple_set_location (call_stmt, loc); | |
1468 | gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT); | |
1469 | ||
1470 | return ssa_target; | |
1471 | } | |
1472 | ||
ca12eb68 | 1473 | /* Build a gimple binary operation with the given CODE and arguments |
1474 | ARG0, ARG1, assigning the result to a new SSA name for variable | |
1475 | TARGET. Insert the statement prior to GSI's current position, and | |
1476 | return the fresh SSA name.*/ | |
1477 | ||
1478 | static tree | |
1479 | build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc, | |
03d37e4e | 1480 | const char *name, enum tree_code code, |
1481 | tree arg0, tree arg1) | |
ca12eb68 | 1482 | { |
03d37e4e | 1483 | tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name); |
e9cf809e | 1484 | gassign *stmt = gimple_build_assign (result, code, arg0, arg1); |
ca12eb68 | 1485 | gimple_set_location (stmt, loc); |
1486 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); | |
1487 | return result; | |
1488 | } | |
1489 | ||
a5c384c1 | 1490 | /* Build a gimple reference operation with the given CODE and argument |
03d37e4e | 1491 | ARG, assigning the result to a new SSA name of TYPE with NAME. |
a5c384c1 | 1492 | Insert the statement prior to GSI's current position, and return |
1493 | the fresh SSA name. */ | |
1494 | ||
1495 | static inline tree | |
1496 | build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type, | |
03d37e4e | 1497 | const char *name, enum tree_code code, tree arg0) |
a5c384c1 | 1498 | { |
03d37e4e | 1499 | tree result = make_temp_ssa_name (type, NULL, name); |
42acab1c | 1500 | gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0)); |
a5c384c1 | 1501 | gimple_set_location (stmt, loc); |
1502 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); | |
1503 | return result; | |
1504 | } | |
1505 | ||
03d37e4e | 1506 | /* Build a gimple assignment to cast VAL to TYPE. Insert the statement |
aff5fb4d | 1507 | prior to GSI's current position, and return the fresh SSA name. */ |
1508 | ||
1509 | static tree | |
1510 | build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc, | |
03d37e4e | 1511 | tree type, tree val) |
aff5fb4d | 1512 | { |
f9e245b2 | 1513 | tree result = make_ssa_name (type); |
e9cf809e | 1514 | gassign *stmt = gimple_build_assign (result, NOP_EXPR, val); |
03d37e4e | 1515 | gimple_set_location (stmt, loc); |
1516 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); | |
1517 | return result; | |
aff5fb4d | 1518 | } |
1519 | ||
c3206272 | 1520 | struct pow_synth_sqrt_info |
1521 | { | |
1522 | bool *factors; | |
1523 | unsigned int deepest; | |
1524 | unsigned int num_mults; | |
1525 | }; | |
1526 | ||
1527 | /* Return true iff the real value C can be represented as a | |
1528 | sum of powers of 0.5 up to N. That is: | |
1529 | C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1. | |
1530 | Record in INFO the various parameters of the synthesis algorithm such | |
1531 | as the factors a[i], the maximum 0.5 power and the number of | |
1532 | multiplications that will be required. */ | |
1533 | ||
1534 | bool | |
1535 | representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n, | |
1536 | struct pow_synth_sqrt_info *info) | |
1537 | { | |
1538 | REAL_VALUE_TYPE factor = dconsthalf; | |
1539 | REAL_VALUE_TYPE remainder = c; | |
1540 | ||
1541 | info->deepest = 0; | |
1542 | info->num_mults = 0; | |
1543 | memset (info->factors, 0, n * sizeof (bool)); | |
1544 | ||
1545 | for (unsigned i = 0; i < n; i++) | |
1546 | { | |
1547 | REAL_VALUE_TYPE res; | |
1548 | ||
1549 | /* If something inexact happened bail out now. */ | |
f2ad9e38 | 1550 | if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor)) |
c3206272 | 1551 | return false; |
1552 | ||
1553 | /* We have hit zero. The number is representable as a sum | |
1554 | of powers of 0.5. */ | |
20cb53c9 | 1555 | if (real_equal (&res, &dconst0)) |
c3206272 | 1556 | { |
1557 | info->factors[i] = true; | |
1558 | info->deepest = i + 1; | |
1559 | return true; | |
1560 | } | |
1561 | else if (!REAL_VALUE_NEGATIVE (res)) | |
1562 | { | |
1563 | remainder = res; | |
1564 | info->factors[i] = true; | |
1565 | info->num_mults++; | |
1566 | } | |
1567 | else | |
1568 | info->factors[i] = false; | |
1569 | ||
f2ad9e38 | 1570 | real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf); |
c3206272 | 1571 | } |
1572 | return false; | |
1573 | } | |
1574 | ||
1575 | /* Return the tree corresponding to FN being applied | |
1576 | to ARG N times at GSI and LOC. | |
1577 | Look up previous results from CACHE if need be. | |
1578 | cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */ | |
1579 | ||
1580 | static tree | |
1581 | get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi, | |
1582 | tree fn, location_t loc, tree *cache) | |
1583 | { | |
1584 | tree res = cache[n]; | |
1585 | if (!res) | |
1586 | { | |
1587 | tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache); | |
1588 | res = build_and_insert_call (gsi, loc, fn, prev); | |
1589 | cache[n] = res; | |
1590 | } | |
1591 | ||
1592 | return res; | |
1593 | } | |
1594 | ||
1595 | /* Print to STREAM the repeated application of function FNAME to ARG | |
1596 | N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print: | |
1597 | "foo (foo (x))". */ | |
1598 | ||
1599 | static void | |
1600 | print_nested_fn (FILE* stream, const char *fname, const char* arg, | |
1601 | unsigned int n) | |
1602 | { | |
1603 | if (n == 0) | |
1604 | fprintf (stream, "%s", arg); | |
1605 | else | |
1606 | { | |
1607 | fprintf (stream, "%s (", fname); | |
1608 | print_nested_fn (stream, fname, arg, n - 1); | |
1609 | fprintf (stream, ")"); | |
1610 | } | |
1611 | } | |
1612 | ||
1613 | /* Print to STREAM the fractional sequence of sqrt chains | |
1614 | applied to ARG, described by INFO. Used for the dump file. */ | |
1615 | ||
1616 | static void | |
1617 | dump_fractional_sqrt_sequence (FILE *stream, const char *arg, | |
1618 | struct pow_synth_sqrt_info *info) | |
1619 | { | |
1620 | for (unsigned int i = 0; i < info->deepest; i++) | |
1621 | { | |
1622 | bool is_set = info->factors[i]; | |
1623 | if (is_set) | |
1624 | { | |
1625 | print_nested_fn (stream, "sqrt", arg, i + 1); | |
1626 | if (i != info->deepest - 1) | |
1627 | fprintf (stream, " * "); | |
1628 | } | |
1629 | } | |
1630 | } | |
1631 | ||
1632 | /* Print to STREAM a representation of raising ARG to an integer | |
1633 | power N. Used for the dump file. */ | |
1634 | ||
1635 | static void | |
1636 | dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n) | |
1637 | { | |
1638 | if (n > 1) | |
1639 | fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n); | |
1640 | else if (n == 1) | |
1641 | fprintf (stream, "%s", arg); | |
1642 | } | |
1643 | ||
1644 | /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of | |
1645 | square roots. Place at GSI and LOC. Limit the maximum depth | |
1646 | of the sqrt chains to MAX_DEPTH. Return the tree holding the | |
1647 | result of the expanded sequence or NULL_TREE if the expansion failed. | |
1648 | ||
1649 | This routine assumes that ARG1 is a real number with a fractional part | |
1650 | (the integer exponent case will have been handled earlier in | |
1651 | gimple_expand_builtin_pow). | |
1652 | ||
1653 | For ARG1 > 0.0: | |
1654 | * For ARG1 composed of a whole part WHOLE_PART and a fractional part | |
1655 | FRAC_PART i.e. WHOLE_PART == floor (ARG1) and | |
1656 | FRAC_PART == ARG1 - WHOLE_PART: | |
1657 | Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where | |
1658 | POW (ARG0, FRAC_PART) is expanded as a product of square root chains | |
1659 | if it can be expressed as such, that is if FRAC_PART satisfies: | |
1660 | FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i)) | |
1661 | where integer a[i] is either 0 or 1. | |
1662 | ||
1663 | Example: | |
1664 | POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625) | |
1665 | --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x))) | |
1666 | ||
1667 | For ARG1 < 0.0 there are two approaches: | |
1668 | * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1) | |
1669 | is calculated as above. | |
1670 | ||
1671 | Example: | |
1672 | POW (x, -5.625) == 1.0 / POW (x, 5.625) | |
1673 | --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x)))) | |
1674 | ||
1675 | * (B) : WHOLE_PART := - ceil (abs (ARG1)) | |
1676 | FRAC_PART := ARG1 - WHOLE_PART | |
1677 | and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART). | |
1678 | Example: | |
1679 | POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6) | |
1680 | --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6)) | |
1681 | ||
1682 | For ARG1 < 0.0 we choose between (A) and (B) depending on | |
1683 | how many multiplications we'd have to do. | |
1684 | So, for the example in (B): POW (x, -5.875), if we were to | |
1685 | follow algorithm (A) we would produce: | |
1686 | 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X))) | |
1687 | which contains more multiplications than approach (B). | |
1688 | ||
1689 | Hopefully, this approach will eliminate potentially expensive POW library | |
1690 | calls when unsafe floating point math is enabled and allow the compiler to | |
1691 | further optimise the multiplies, square roots and divides produced by this | |
1692 | function. */ | |
1693 | ||
1694 | static tree | |
1695 | expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc, | |
1696 | tree arg0, tree arg1, HOST_WIDE_INT max_depth) | |
1697 | { | |
1698 | tree type = TREE_TYPE (arg0); | |
1699 | machine_mode mode = TYPE_MODE (type); | |
1700 | tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); | |
1701 | bool one_over = true; | |
1702 | ||
1703 | if (!sqrtfn) | |
1704 | return NULL_TREE; | |
1705 | ||
1706 | if (TREE_CODE (arg1) != REAL_CST) | |
1707 | return NULL_TREE; | |
1708 | ||
1709 | REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1); | |
1710 | ||
1711 | gcc_assert (max_depth > 0); | |
1712 | tree *cache = XALLOCAVEC (tree, max_depth + 1); | |
1713 | ||
1714 | struct pow_synth_sqrt_info synth_info; | |
1715 | synth_info.factors = XALLOCAVEC (bool, max_depth + 1); | |
1716 | synth_info.deepest = 0; | |
1717 | synth_info.num_mults = 0; | |
1718 | ||
1719 | bool neg_exp = REAL_VALUE_NEGATIVE (exp_init); | |
1720 | REAL_VALUE_TYPE exp = real_value_abs (&exp_init); | |
1721 | ||
1722 | /* The whole and fractional parts of exp. */ | |
1723 | REAL_VALUE_TYPE whole_part; | |
1724 | REAL_VALUE_TYPE frac_part; | |
1725 | ||
1726 | real_floor (&whole_part, mode, &exp); | |
f2ad9e38 | 1727 | real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part); |
c3206272 | 1728 | |
1729 | ||
1730 | REAL_VALUE_TYPE ceil_whole = dconst0; | |
1731 | REAL_VALUE_TYPE ceil_fract = dconst0; | |
1732 | ||
1733 | if (neg_exp) | |
1734 | { | |
1735 | real_ceil (&ceil_whole, mode, &exp); | |
f2ad9e38 | 1736 | real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp); |
c3206272 | 1737 | } |
1738 | ||
1739 | if (!representable_as_half_series_p (frac_part, max_depth, &synth_info)) | |
1740 | return NULL_TREE; | |
1741 | ||
1742 | /* Check whether it's more profitable to not use 1.0 / ... */ | |
1743 | if (neg_exp) | |
1744 | { | |
1745 | struct pow_synth_sqrt_info alt_synth_info; | |
1746 | alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1); | |
1747 | alt_synth_info.deepest = 0; | |
1748 | alt_synth_info.num_mults = 0; | |
1749 | ||
1750 | if (representable_as_half_series_p (ceil_fract, max_depth, | |
1751 | &alt_synth_info) | |
1752 | && alt_synth_info.deepest <= synth_info.deepest | |
1753 | && alt_synth_info.num_mults < synth_info.num_mults) | |
1754 | { | |
1755 | whole_part = ceil_whole; | |
1756 | frac_part = ceil_fract; | |
1757 | synth_info.deepest = alt_synth_info.deepest; | |
1758 | synth_info.num_mults = alt_synth_info.num_mults; | |
1759 | memcpy (synth_info.factors, alt_synth_info.factors, | |
1760 | (max_depth + 1) * sizeof (bool)); | |
1761 | one_over = false; | |
1762 | } | |
1763 | } | |
1764 | ||
1765 | HOST_WIDE_INT n = real_to_integer (&whole_part); | |
1766 | REAL_VALUE_TYPE cint; | |
1767 | real_from_integer (&cint, VOIDmode, n, SIGNED); | |
1768 | ||
1769 | if (!real_identical (&whole_part, &cint)) | |
1770 | return NULL_TREE; | |
1771 | ||
1772 | if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS) | |
1773 | return NULL_TREE; | |
1774 | ||
1775 | memset (cache, 0, (max_depth + 1) * sizeof (tree)); | |
1776 | ||
1777 | tree integer_res = n == 0 ? build_real (type, dconst1) : arg0; | |
1778 | ||
1779 | /* Calculate the integer part of the exponent. */ | |
1780 | if (n > 1) | |
1781 | { | |
1782 | integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n); | |
1783 | if (!integer_res) | |
1784 | return NULL_TREE; | |
1785 | } | |
1786 | ||
1787 | if (dump_file) | |
1788 | { | |
1789 | char string[64]; | |
1790 | ||
1791 | real_to_decimal (string, &exp_init, sizeof (string), 0, 1); | |
1792 | fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string); | |
1793 | ||
1794 | if (neg_exp) | |
1795 | { | |
1796 | if (one_over) | |
1797 | { | |
1798 | fprintf (dump_file, "1.0 / ("); | |
1799 | dump_integer_part (dump_file, "x", n); | |
1800 | if (n > 0) | |
1801 | fprintf (dump_file, " * "); | |
1802 | dump_fractional_sqrt_sequence (dump_file, "x", &synth_info); | |
1803 | fprintf (dump_file, ")"); | |
1804 | } | |
1805 | else | |
1806 | { | |
1807 | dump_fractional_sqrt_sequence (dump_file, "x", &synth_info); | |
1808 | fprintf (dump_file, " / ("); | |
1809 | dump_integer_part (dump_file, "x", n); | |
1810 | fprintf (dump_file, ")"); | |
1811 | } | |
1812 | } | |
1813 | else | |
1814 | { | |
1815 | dump_fractional_sqrt_sequence (dump_file, "x", &synth_info); | |
1816 | if (n > 0) | |
1817 | fprintf (dump_file, " * "); | |
1818 | dump_integer_part (dump_file, "x", n); | |
1819 | } | |
1820 | ||
1821 | fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest); | |
1822 | } | |
1823 | ||
1824 | ||
1825 | tree fract_res = NULL_TREE; | |
1826 | cache[0] = arg0; | |
1827 | ||
1828 | /* Calculate the fractional part of the exponent. */ | |
1829 | for (unsigned i = 0; i < synth_info.deepest; i++) | |
1830 | { | |
1831 | if (synth_info.factors[i]) | |
1832 | { | |
1833 | tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache); | |
1834 | ||
1835 | if (!fract_res) | |
1836 | fract_res = sqrt_chain; | |
1837 | ||
1838 | else | |
1839 | fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, | |
1840 | fract_res, sqrt_chain); | |
1841 | } | |
1842 | } | |
1843 | ||
1844 | tree res = NULL_TREE; | |
1845 | ||
1846 | if (neg_exp) | |
1847 | { | |
1848 | if (one_over) | |
1849 | { | |
1850 | if (n > 0) | |
1851 | res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, | |
1852 | fract_res, integer_res); | |
1853 | else | |
1854 | res = fract_res; | |
1855 | ||
1856 | res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR, | |
1857 | build_real (type, dconst1), res); | |
1858 | } | |
1859 | else | |
1860 | { | |
1861 | res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR, | |
1862 | fract_res, integer_res); | |
1863 | } | |
1864 | } | |
1865 | else | |
1866 | res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, | |
1867 | fract_res, integer_res); | |
1868 | return res; | |
1869 | } | |
1870 | ||
e78306af | 1871 | /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI |
1872 | with location info LOC. If possible, create an equivalent and | |
1873 | less expensive sequence of statements prior to GSI, and return an | |
1874 | expession holding the result. */ | |
1875 | ||
1876 | static tree | |
1877 | gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc, | |
1878 | tree arg0, tree arg1) | |
1879 | { | |
c3206272 | 1880 | REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6; |
ca12eb68 | 1881 | REAL_VALUE_TYPE c2, dconst3; |
e78306af | 1882 | HOST_WIDE_INT n; |
c3206272 | 1883 | tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x; |
3754d046 | 1884 | machine_mode mode; |
c3206272 | 1885 | bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi)); |
0190fe95 | 1886 | bool hw_sqrt_exists, c_is_int, c2_is_int; |
e78306af | 1887 | |
c3206272 | 1888 | dconst1_4 = dconst1; |
1889 | SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2); | |
1890 | ||
e78306af | 1891 | /* If the exponent isn't a constant, there's nothing of interest |
1892 | to be done. */ | |
1893 | if (TREE_CODE (arg1) != REAL_CST) | |
1894 | return NULL_TREE; | |
1895 | ||
9f27d92a | 1896 | /* Don't perform the operation if flag_signaling_nans is on |
1897 | and the operand is a signaling NaN. */ | |
1898 | if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1))) | |
2a659064 | 1899 | && ((TREE_CODE (arg0) == REAL_CST |
1900 | && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0))) | |
9f27d92a | 1901 | || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1)))) |
1902 | return NULL_TREE; | |
1903 | ||
ae43b05e | 1904 | /* If the exponent is equivalent to an integer, expand to an optimal |
1905 | multiplication sequence when profitable. */ | |
e78306af | 1906 | c = TREE_REAL_CST (arg1); |
1907 | n = real_to_integer (&c); | |
e913b5cd | 1908 | real_from_integer (&cint, VOIDmode, n, SIGNED); |
0190fe95 | 1909 | c_is_int = real_identical (&c, &cint); |
e78306af | 1910 | |
0190fe95 | 1911 | if (c_is_int |
e78306af | 1912 | && ((n >= -1 && n <= 2) |
1913 | || (flag_unsafe_math_optimizations | |
c3206272 | 1914 | && speed_p |
e78306af | 1915 | && powi_cost (n) <= POWI_MAX_MULTS))) |
1916 | return gimple_expand_builtin_powi (gsi, loc, arg0, n); | |
1917 | ||
ae43b05e | 1918 | /* Attempt various optimizations using sqrt and cbrt. */ |
1919 | type = TREE_TYPE (arg0); | |
1920 | mode = TYPE_MODE (type); | |
1921 | sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); | |
1922 | ||
1923 | /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe | |
1924 | unless signed zeros must be maintained. pow(-0,0.5) = +0, while | |
1925 | sqrt(-0) = -0. */ | |
1926 | if (sqrtfn | |
20cb53c9 | 1927 | && real_equal (&c, &dconsthalf) |
ae43b05e | 1928 | && !HONOR_SIGNED_ZEROS (mode)) |
03d37e4e | 1929 | return build_and_insert_call (gsi, loc, sqrtfn, arg0); |
ae43b05e | 1930 | |
a5c384c1 | 1931 | hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing; |
ae43b05e | 1932 | |
ae43b05e | 1933 | /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math |
1934 | optimizations since 1./3. is not exactly representable. If x | |
1935 | is negative and finite, the correct value of pow(x,1./3.) is | |
1936 | a NaN with the "invalid" exception raised, because the value | |
1937 | of 1./3. actually has an even denominator. The correct value | |
1938 | of cbrt(x) is a negative real value. */ | |
1939 | cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT); | |
1940 | dconst1_3 = real_value_truncate (mode, dconst_third ()); | |
1941 | ||
1942 | if (flag_unsafe_math_optimizations | |
1943 | && cbrtfn | |
ee230333 | 1944 | && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) |
20cb53c9 | 1945 | && real_equal (&c, &dconst1_3)) |
03d37e4e | 1946 | return build_and_insert_call (gsi, loc, cbrtfn, arg0); |
ae43b05e | 1947 | |
1948 | /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization | |
1949 | if we don't have a hardware sqrt insn. */ | |
1950 | dconst1_6 = dconst1_3; | |
1951 | SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1); | |
1952 | ||
1953 | if (flag_unsafe_math_optimizations | |
1954 | && sqrtfn | |
1955 | && cbrtfn | |
ee230333 | 1956 | && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) |
c3206272 | 1957 | && speed_p |
ae43b05e | 1958 | && hw_sqrt_exists |
20cb53c9 | 1959 | && real_equal (&c, &dconst1_6)) |
ae43b05e | 1960 | { |
1961 | /* sqrt(x) */ | |
03d37e4e | 1962 | sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0); |
ae43b05e | 1963 | |
1964 | /* cbrt(sqrt(x)) */ | |
03d37e4e | 1965 | return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0); |
ca12eb68 | 1966 | } |
1967 | ||
ca12eb68 | 1968 | |
c3206272 | 1969 | /* Attempt to expand the POW as a product of square root chains. |
1970 | Expand the 0.25 case even when otpimising for size. */ | |
ca12eb68 | 1971 | if (flag_unsafe_math_optimizations |
1972 | && sqrtfn | |
c3206272 | 1973 | && hw_sqrt_exists |
20cb53c9 | 1974 | && (speed_p || real_equal (&c, &dconst1_4)) |
c3206272 | 1975 | && !HONOR_SIGNED_ZEROS (mode)) |
ca12eb68 | 1976 | { |
c3206272 | 1977 | unsigned int max_depth = speed_p |
1978 | ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH) | |
1979 | : 2; | |
ca12eb68 | 1980 | |
c3206272 | 1981 | tree expand_with_sqrts |
1982 | = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth); | |
ca12eb68 | 1983 | |
c3206272 | 1984 | if (expand_with_sqrts) |
1985 | return expand_with_sqrts; | |
ca12eb68 | 1986 | } |
1987 | ||
c3206272 | 1988 | real_arithmetic (&c2, MULT_EXPR, &c, &dconst2); |
1989 | n = real_to_integer (&c2); | |
1990 | real_from_integer (&cint, VOIDmode, n, SIGNED); | |
1991 | c2_is_int = real_identical (&c2, &cint); | |
1992 | ||
ca12eb68 | 1993 | /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into |
1994 | ||
1995 | powi(x, n/3) * powi(cbrt(x), n%3), n > 0; | |
1996 | 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0. | |
1997 | ||
1998 | Do not calculate the first factor when n/3 = 0. As cbrt(x) is | |
1999 | different from pow(x, 1./3.) due to rounding and behavior with | |
2000 | negative x, we need to constrain this transformation to unsafe | |
2001 | math and positive x or finite math. */ | |
e913b5cd | 2002 | real_from_integer (&dconst3, VOIDmode, 3, SIGNED); |
ca12eb68 | 2003 | real_arithmetic (&c2, MULT_EXPR, &c, &dconst3); |
2004 | real_round (&c2, mode, &c2); | |
2005 | n = real_to_integer (&c2); | |
e913b5cd | 2006 | real_from_integer (&cint, VOIDmode, n, SIGNED); |
ca12eb68 | 2007 | real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3); |
2008 | real_convert (&c2, mode, &c2); | |
2009 | ||
2010 | if (flag_unsafe_math_optimizations | |
2011 | && cbrtfn | |
ee230333 | 2012 | && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) |
ca12eb68 | 2013 | && real_identical (&c2, &c) |
0190fe95 | 2014 | && !c2_is_int |
ca12eb68 | 2015 | && optimize_function_for_speed_p (cfun) |
2016 | && powi_cost (n / 3) <= POWI_MAX_MULTS) | |
2017 | { | |
2018 | tree powi_x_ndiv3 = NULL_TREE; | |
2019 | ||
2020 | /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not | |
2021 | possible or profitable, give up. Skip the degenerate case when | |
2022 | abs(n) < 3, where the result is always 1. */ | |
b1757d46 | 2023 | if (absu_hwi (n) >= 3) |
ca12eb68 | 2024 | { |
2025 | powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0, | |
5ebd604f | 2026 | abs_hwi (n / 3)); |
ca12eb68 | 2027 | if (!powi_x_ndiv3) |
2028 | return NULL_TREE; | |
2029 | } | |
2030 | ||
2031 | /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi | |
2032 | as that creates an unnecessary variable. Instead, just produce | |
2033 | either cbrt(x) or cbrt(x) * cbrt(x). */ | |
03d37e4e | 2034 | cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0); |
ca12eb68 | 2035 | |
b1757d46 | 2036 | if (absu_hwi (n) % 3 == 1) |
ca12eb68 | 2037 | powi_cbrt_x = cbrt_x; |
2038 | else | |
03d37e4e | 2039 | powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, |
ca12eb68 | 2040 | cbrt_x, cbrt_x); |
2041 | ||
2042 | /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */ | |
b1757d46 | 2043 | if (absu_hwi (n) < 3) |
ca12eb68 | 2044 | result = powi_cbrt_x; |
2045 | else | |
03d37e4e | 2046 | result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, |
ca12eb68 | 2047 | powi_x_ndiv3, powi_cbrt_x); |
2048 | ||
2049 | /* If n is negative, reciprocate the result. */ | |
2050 | if (n < 0) | |
03d37e4e | 2051 | result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR, |
ca12eb68 | 2052 | build_real (type, dconst1), result); |
2053 | ||
2054 | return result; | |
ae43b05e | 2055 | } |
2056 | ||
ca12eb68 | 2057 | /* No optimizations succeeded. */ |
e78306af | 2058 | return NULL_TREE; |
2059 | } | |
2060 | ||
a5c384c1 | 2061 | /* ARG is the argument to a cabs builtin call in GSI with location info |
2062 | LOC. Create a sequence of statements prior to GSI that calculates | |
2063 | sqrt(R*R + I*I), where R and I are the real and imaginary components | |
2064 | of ARG, respectively. Return an expression holding the result. */ | |
2065 | ||
2066 | static tree | |
2067 | gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg) | |
2068 | { | |
03d37e4e | 2069 | tree real_part, imag_part, addend1, addend2, sum, result; |
a5c384c1 | 2070 | tree type = TREE_TYPE (TREE_TYPE (arg)); |
2071 | tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); | |
3754d046 | 2072 | machine_mode mode = TYPE_MODE (type); |
a5c384c1 | 2073 | |
2074 | if (!flag_unsafe_math_optimizations | |
2075 | || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi))) | |
2076 | || !sqrtfn | |
2077 | || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing) | |
2078 | return NULL_TREE; | |
2079 | ||
03d37e4e | 2080 | real_part = build_and_insert_ref (gsi, loc, type, "cabs", |
a5c384c1 | 2081 | REALPART_EXPR, arg); |
03d37e4e | 2082 | addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR, |
a5c384c1 | 2083 | real_part, real_part); |
03d37e4e | 2084 | imag_part = build_and_insert_ref (gsi, loc, type, "cabs", |
a5c384c1 | 2085 | IMAGPART_EXPR, arg); |
03d37e4e | 2086 | addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR, |
a5c384c1 | 2087 | imag_part, imag_part); |
03d37e4e | 2088 | sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2); |
2089 | result = build_and_insert_call (gsi, loc, sqrtfn, sum); | |
a5c384c1 | 2090 | |
2091 | return result; | |
2092 | } | |
2093 | ||
a0315874 | 2094 | /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1 |
e9a6c4bc | 2095 | on the SSA_NAME argument of each of them. Also expand powi(x,n) into |
2096 | an optimal number of multiplies, when n is a constant. */ | |
a0315874 | 2097 | |
65b0537f | 2098 | namespace { |
2099 | ||
2100 | const pass_data pass_data_cse_sincos = | |
2101 | { | |
2102 | GIMPLE_PASS, /* type */ | |
2103 | "sincos", /* name */ | |
2104 | OPTGROUP_NONE, /* optinfo_flags */ | |
8ed378fe | 2105 | TV_TREE_SINCOS, /* tv_id */ |
65b0537f | 2106 | PROP_ssa, /* properties_required */ |
a153e7b3 | 2107 | PROP_gimple_opt_math, /* properties_provided */ |
65b0537f | 2108 | 0, /* properties_destroyed */ |
2109 | 0, /* todo_flags_start */ | |
8b88439e | 2110 | TODO_update_ssa, /* todo_flags_finish */ |
65b0537f | 2111 | }; |
2112 | ||
2113 | class pass_cse_sincos : public gimple_opt_pass | |
2114 | { | |
2115 | public: | |
2116 | pass_cse_sincos (gcc::context *ctxt) | |
2117 | : gimple_opt_pass (pass_data_cse_sincos, ctxt) | |
2118 | {} | |
2119 | ||
2120 | /* opt_pass methods: */ | |
2121 | virtual bool gate (function *) | |
2122 | { | |
2123 | /* We no longer require either sincos or cexp, since powi expansion | |
2124 | piggybacks on this pass. */ | |
2125 | return optimize; | |
2126 | } | |
2127 | ||
2128 | virtual unsigned int execute (function *); | |
2129 | ||
2130 | }; // class pass_cse_sincos | |
2131 | ||
2132 | unsigned int | |
2133 | pass_cse_sincos::execute (function *fun) | |
a0315874 | 2134 | { |
2135 | basic_block bb; | |
4c80086d | 2136 | bool cfg_changed = false; |
a0315874 | 2137 | |
2138 | calculate_dominance_info (CDI_DOMINATORS); | |
30c4e60d | 2139 | memset (&sincos_stats, 0, sizeof (sincos_stats)); |
a0315874 | 2140 | |
65b0537f | 2141 | FOR_EACH_BB_FN (bb, fun) |
a0315874 | 2142 | { |
75a70cf9 | 2143 | gimple_stmt_iterator gsi; |
2a155cf0 | 2144 | bool cleanup_eh = false; |
a0315874 | 2145 | |
75a70cf9 | 2146 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
a0315874 | 2147 | { |
42acab1c | 2148 | gimple *stmt = gsi_stmt (gsi); |
a0315874 | 2149 | |
2a155cf0 | 2150 | /* Only the last stmt in a bb could throw, no need to call |
2151 | gimple_purge_dead_eh_edges if we change something in the middle | |
2152 | of a basic block. */ | |
2153 | cleanup_eh = false; | |
2154 | ||
fa0793ad | 2155 | if (is_gimple_call (stmt) |
5e8b972c | 2156 | && gimple_call_lhs (stmt)) |
a0315874 | 2157 | { |
e9a6c4bc | 2158 | tree arg, arg0, arg1, result; |
2159 | HOST_WIDE_INT n; | |
2160 | location_t loc; | |
a0315874 | 2161 | |
fa0793ad | 2162 | switch (gimple_call_combined_fn (stmt)) |
a0315874 | 2163 | { |
fa0793ad | 2164 | CASE_CFN_COS: |
2165 | CASE_CFN_SIN: | |
2166 | CASE_CFN_CEXPI: | |
d312d7df | 2167 | /* Make sure we have either sincos or cexp. */ |
30f690e0 | 2168 | if (!targetm.libc_has_function (function_c99_math_complex) |
2169 | && !targetm.libc_has_function (function_sincos)) | |
d312d7df | 2170 | break; |
2171 | ||
75a70cf9 | 2172 | arg = gimple_call_arg (stmt, 0); |
a0315874 | 2173 | if (TREE_CODE (arg) == SSA_NAME) |
4c80086d | 2174 | cfg_changed |= execute_cse_sincos_1 (arg); |
a0315874 | 2175 | break; |
2176 | ||
fa0793ad | 2177 | CASE_CFN_POW: |
e78306af | 2178 | arg0 = gimple_call_arg (stmt, 0); |
2179 | arg1 = gimple_call_arg (stmt, 1); | |
2180 | ||
2181 | loc = gimple_location (stmt); | |
2182 | result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1); | |
2183 | ||
2184 | if (result) | |
2185 | { | |
2186 | tree lhs = gimple_get_lhs (stmt); | |
1a91d914 | 2187 | gassign *new_stmt = gimple_build_assign (lhs, result); |
e78306af | 2188 | gimple_set_location (new_stmt, loc); |
2189 | unlink_stmt_vdef (stmt); | |
2190 | gsi_replace (&gsi, new_stmt, true); | |
2a155cf0 | 2191 | cleanup_eh = true; |
bc8a8451 | 2192 | if (gimple_vdef (stmt)) |
2193 | release_ssa_name (gimple_vdef (stmt)); | |
e78306af | 2194 | } |
2195 | break; | |
2196 | ||
fa0793ad | 2197 | CASE_CFN_POWI: |
e9a6c4bc | 2198 | arg0 = gimple_call_arg (stmt, 0); |
2199 | arg1 = gimple_call_arg (stmt, 1); | |
e9a6c4bc | 2200 | loc = gimple_location (stmt); |
377db285 | 2201 | |
6dfe7d53 | 2202 | if (real_minus_onep (arg0)) |
377db285 | 2203 | { |
2204 | tree t0, t1, cond, one, minus_one; | |
1a91d914 | 2205 | gassign *stmt; |
377db285 | 2206 | |
2207 | t0 = TREE_TYPE (arg0); | |
2208 | t1 = TREE_TYPE (arg1); | |
2209 | one = build_real (t0, dconst1); | |
2210 | minus_one = build_real (t0, dconstm1); | |
2211 | ||
2212 | cond = make_temp_ssa_name (t1, NULL, "powi_cond"); | |
e9cf809e | 2213 | stmt = gimple_build_assign (cond, BIT_AND_EXPR, |
2214 | arg1, build_int_cst (t1, 1)); | |
377db285 | 2215 | gimple_set_location (stmt, loc); |
2216 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
2217 | ||
2218 | result = make_temp_ssa_name (t0, NULL, "powi"); | |
e9cf809e | 2219 | stmt = gimple_build_assign (result, COND_EXPR, cond, |
2220 | minus_one, one); | |
377db285 | 2221 | gimple_set_location (stmt, loc); |
2222 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
2223 | } | |
2224 | else | |
2225 | { | |
e913b5cd | 2226 | if (!tree_fits_shwi_p (arg1)) |
d48be958 | 2227 | break; |
2228 | ||
e913b5cd | 2229 | n = tree_to_shwi (arg1); |
377db285 | 2230 | result = gimple_expand_builtin_powi (&gsi, loc, arg0, n); |
2231 | } | |
e9a6c4bc | 2232 | |
2233 | if (result) | |
2234 | { | |
2235 | tree lhs = gimple_get_lhs (stmt); | |
1a91d914 | 2236 | gassign *new_stmt = gimple_build_assign (lhs, result); |
e9a6c4bc | 2237 | gimple_set_location (new_stmt, loc); |
a5c384c1 | 2238 | unlink_stmt_vdef (stmt); |
2239 | gsi_replace (&gsi, new_stmt, true); | |
2a155cf0 | 2240 | cleanup_eh = true; |
bc8a8451 | 2241 | if (gimple_vdef (stmt)) |
2242 | release_ssa_name (gimple_vdef (stmt)); | |
a5c384c1 | 2243 | } |
2244 | break; | |
2245 | ||
fa0793ad | 2246 | CASE_CFN_CABS: |
a5c384c1 | 2247 | arg0 = gimple_call_arg (stmt, 0); |
2248 | loc = gimple_location (stmt); | |
2249 | result = gimple_expand_builtin_cabs (&gsi, loc, arg0); | |
2250 | ||
2251 | if (result) | |
2252 | { | |
2253 | tree lhs = gimple_get_lhs (stmt); | |
1a91d914 | 2254 | gassign *new_stmt = gimple_build_assign (lhs, result); |
a5c384c1 | 2255 | gimple_set_location (new_stmt, loc); |
e9a6c4bc | 2256 | unlink_stmt_vdef (stmt); |
2257 | gsi_replace (&gsi, new_stmt, true); | |
2a155cf0 | 2258 | cleanup_eh = true; |
bc8a8451 | 2259 | if (gimple_vdef (stmt)) |
2260 | release_ssa_name (gimple_vdef (stmt)); | |
e9a6c4bc | 2261 | } |
2262 | break; | |
2263 | ||
a0315874 | 2264 | default:; |
2265 | } | |
2266 | } | |
2267 | } | |
2a155cf0 | 2268 | if (cleanup_eh) |
2269 | cfg_changed |= gimple_purge_dead_eh_edges (bb); | |
a0315874 | 2270 | } |
2271 | ||
65b0537f | 2272 | statistics_counter_event (fun, "sincos statements inserted", |
30c4e60d | 2273 | sincos_stats.inserted); |
2274 | ||
4c80086d | 2275 | return cfg_changed ? TODO_cleanup_cfg : 0; |
a0315874 | 2276 | } |
2277 | ||
cbe8bda8 | 2278 | } // anon namespace |
2279 | ||
2280 | gimple_opt_pass * | |
2281 | make_pass_cse_sincos (gcc::context *ctxt) | |
2282 | { | |
2283 | return new pass_cse_sincos (ctxt); | |
2284 | } | |
2285 | ||
71dbd910 | 2286 | /* Return true if stmt is a type conversion operation that can be stripped |
2287 | when used in a widening multiply operation. */ | |
2288 | static bool | |
42acab1c | 2289 | widening_mult_conversion_strippable_p (tree result_type, gimple *stmt) |
71dbd910 | 2290 | { |
2291 | enum tree_code rhs_code = gimple_assign_rhs_code (stmt); | |
2292 | ||
2293 | if (TREE_CODE (result_type) == INTEGER_TYPE) | |
2294 | { | |
2295 | tree op_type; | |
2296 | tree inner_op_type; | |
2297 | ||
2298 | if (!CONVERT_EXPR_CODE_P (rhs_code)) | |
2299 | return false; | |
2300 | ||
2301 | op_type = TREE_TYPE (gimple_assign_lhs (stmt)); | |
2302 | ||
2303 | /* If the type of OP has the same precision as the result, then | |
2304 | we can strip this conversion. The multiply operation will be | |
2305 | selected to create the correct extension as a by-product. */ | |
2306 | if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type)) | |
2307 | return true; | |
2308 | ||
2309 | /* We can also strip a conversion if it preserves the signed-ness of | |
2310 | the operation and doesn't narrow the range. */ | |
2311 | inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); | |
2312 | ||
8f9d1531 | 2313 | /* If the inner-most type is unsigned, then we can strip any |
2314 | intermediate widening operation. If it's signed, then the | |
2315 | intermediate widening operation must also be signed. */ | |
2316 | if ((TYPE_UNSIGNED (inner_op_type) | |
2317 | || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type)) | |
71dbd910 | 2318 | && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type)) |
2319 | return true; | |
2320 | ||
2321 | return false; | |
2322 | } | |
2323 | ||
2324 | return rhs_code == FIXED_CONVERT_EXPR; | |
2325 | } | |
2326 | ||
0989f516 | 2327 | /* Return true if RHS is a suitable operand for a widening multiplication, |
2328 | assuming a target type of TYPE. | |
7e4c867e | 2329 | There are two cases: |
2330 | ||
aff5fb4d | 2331 | - RHS makes some value at least twice as wide. Store that value |
2332 | in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT. | |
7e4c867e | 2333 | |
2334 | - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so, | |
2335 | but leave *TYPE_OUT untouched. */ | |
00f4f705 | 2336 | |
2337 | static bool | |
0989f516 | 2338 | is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out, |
2339 | tree *new_rhs_out) | |
7e4c867e | 2340 | { |
42acab1c | 2341 | gimple *stmt; |
0989f516 | 2342 | tree type1, rhs1; |
7e4c867e | 2343 | |
2344 | if (TREE_CODE (rhs) == SSA_NAME) | |
2345 | { | |
7e4c867e | 2346 | stmt = SSA_NAME_DEF_STMT (rhs); |
0989f516 | 2347 | if (is_gimple_assign (stmt)) |
2348 | { | |
71dbd910 | 2349 | if (! widening_mult_conversion_strippable_p (type, stmt)) |
0989f516 | 2350 | rhs1 = rhs; |
2351 | else | |
ffebd9c5 | 2352 | { |
2353 | rhs1 = gimple_assign_rhs1 (stmt); | |
2354 | ||
2355 | if (TREE_CODE (rhs1) == INTEGER_CST) | |
2356 | { | |
2357 | *new_rhs_out = rhs1; | |
2358 | *type_out = NULL; | |
2359 | return true; | |
2360 | } | |
2361 | } | |
0989f516 | 2362 | } |
2363 | else | |
2364 | rhs1 = rhs; | |
7e4c867e | 2365 | |
7e4c867e | 2366 | type1 = TREE_TYPE (rhs1); |
0989f516 | 2367 | |
7e4c867e | 2368 | if (TREE_CODE (type1) != TREE_CODE (type) |
aff5fb4d | 2369 | || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type)) |
7e4c867e | 2370 | return false; |
2371 | ||
2372 | *new_rhs_out = rhs1; | |
2373 | *type_out = type1; | |
2374 | return true; | |
2375 | } | |
2376 | ||
2377 | if (TREE_CODE (rhs) == INTEGER_CST) | |
2378 | { | |
2379 | *new_rhs_out = rhs; | |
2380 | *type_out = NULL; | |
2381 | return true; | |
2382 | } | |
2383 | ||
2384 | return false; | |
2385 | } | |
2386 | ||
0989f516 | 2387 | /* Return true if STMT performs a widening multiplication, assuming the |
2388 | output type is TYPE. If so, store the unwidened types of the operands | |
2389 | in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and | |
2390 | *RHS2_OUT such that converting those operands to types *TYPE1_OUT | |
2391 | and *TYPE2_OUT would give the operands of the multiplication. */ | |
7e4c867e | 2392 | |
2393 | static bool | |
42acab1c | 2394 | is_widening_mult_p (gimple *stmt, |
7e4c867e | 2395 | tree *type1_out, tree *rhs1_out, |
2396 | tree *type2_out, tree *rhs2_out) | |
00f4f705 | 2397 | { |
4333b41f | 2398 | tree type = TREE_TYPE (gimple_assign_lhs (stmt)); |
2399 | ||
f2ef7276 | 2400 | if (TREE_CODE (type) == INTEGER_TYPE) |
2401 | { | |
2402 | if (TYPE_OVERFLOW_TRAPS (type)) | |
2403 | return false; | |
2404 | } | |
2405 | else if (TREE_CODE (type) != FIXED_POINT_TYPE) | |
7e4c867e | 2406 | return false; |
00f4f705 | 2407 | |
0989f516 | 2408 | if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out, |
2409 | rhs1_out)) | |
00f4f705 | 2410 | return false; |
2411 | ||
0989f516 | 2412 | if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out, |
2413 | rhs2_out)) | |
7e4c867e | 2414 | return false; |
00f4f705 | 2415 | |
7e4c867e | 2416 | if (*type1_out == NULL) |
00f4f705 | 2417 | { |
7e4c867e | 2418 | if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out)) |
00f4f705 | 2419 | return false; |
7e4c867e | 2420 | *type1_out = *type2_out; |
00f4f705 | 2421 | } |
00f4f705 | 2422 | |
7e4c867e | 2423 | if (*type2_out == NULL) |
00f4f705 | 2424 | { |
7e4c867e | 2425 | if (!int_fits_type_p (*rhs2_out, *type1_out)) |
00f4f705 | 2426 | return false; |
7e4c867e | 2427 | *type2_out = *type1_out; |
00f4f705 | 2428 | } |
00f4f705 | 2429 | |
287c271c | 2430 | /* Ensure that the larger of the two operands comes first. */ |
2431 | if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out)) | |
2432 | { | |
dfcf26a5 | 2433 | std::swap (*type1_out, *type2_out); |
2434 | std::swap (*rhs1_out, *rhs2_out); | |
287c271c | 2435 | } |
aff5fb4d | 2436 | |
7e4c867e | 2437 | return true; |
2438 | } | |
00f4f705 | 2439 | |
eb728046 | 2440 | /* Check to see if the CALL statement is an invocation of copysign |
2441 | with 1. being the first argument. */ | |
2442 | static bool | |
2443 | is_copysign_call_with_1 (gimple *call) | |
2444 | { | |
2445 | gcall *c = dyn_cast <gcall *> (call); | |
2446 | if (! c) | |
2447 | return false; | |
2448 | ||
2449 | enum combined_fn code = gimple_call_combined_fn (c); | |
2450 | ||
2451 | if (code == CFN_LAST) | |
2452 | return false; | |
2453 | ||
2454 | if (builtin_fn_p (code)) | |
2455 | { | |
2456 | switch (as_builtin_fn (code)) | |
2457 | { | |
2458 | CASE_FLT_FN (BUILT_IN_COPYSIGN): | |
2459 | CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN): | |
2460 | return real_onep (gimple_call_arg (c, 0)); | |
2461 | default: | |
2462 | return false; | |
2463 | } | |
2464 | } | |
2465 | ||
2466 | if (internal_fn_p (code)) | |
2467 | { | |
2468 | switch (as_internal_fn (code)) | |
2469 | { | |
2470 | case IFN_COPYSIGN: | |
2471 | return real_onep (gimple_call_arg (c, 0)); | |
2472 | default: | |
2473 | return false; | |
2474 | } | |
2475 | } | |
2476 | ||
2477 | return false; | |
2478 | } | |
2479 | ||
2480 | /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y). | |
2481 | This only happens when the the xorsign optab is defined, if the | |
2482 | pattern is not a xorsign pattern or if expansion fails FALSE is | |
2483 | returned, otherwise TRUE is returned. */ | |
2484 | static bool | |
2485 | convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi) | |
2486 | { | |
2487 | tree treeop0, treeop1, lhs, type; | |
2488 | location_t loc = gimple_location (stmt); | |
2489 | lhs = gimple_assign_lhs (stmt); | |
2490 | treeop0 = gimple_assign_rhs1 (stmt); | |
2491 | treeop1 = gimple_assign_rhs2 (stmt); | |
2492 | type = TREE_TYPE (lhs); | |
2493 | machine_mode mode = TYPE_MODE (type); | |
2494 | ||
3aa2a10c | 2495 | if (HONOR_SNANS (type)) |
eb728046 | 2496 | return false; |
2497 | ||
2498 | if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME) | |
2499 | { | |
2500 | gimple *call0 = SSA_NAME_DEF_STMT (treeop0); | |
3aa2a10c | 2501 | if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0)) |
eb728046 | 2502 | { |
2503 | call0 = SSA_NAME_DEF_STMT (treeop1); | |
3aa2a10c | 2504 | if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0)) |
eb728046 | 2505 | return false; |
2506 | ||
2507 | treeop1 = treeop0; | |
2508 | } | |
eb728046 | 2509 | if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing) |
2510 | return false; | |
2511 | ||
2512 | gcall *c = as_a<gcall*> (call0); | |
2513 | treeop0 = gimple_call_arg (c, 1); | |
2514 | ||
2515 | gcall *call_stmt | |
2516 | = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0); | |
2517 | gimple_set_lhs (call_stmt, lhs); | |
2518 | gimple_set_location (call_stmt, loc); | |
2519 | gsi_replace (gsi, call_stmt, true); | |
2520 | return true; | |
2521 | } | |
2522 | ||
2523 | return false; | |
2524 | } | |
2525 | ||
7e4c867e | 2526 | /* Process a single gimple statement STMT, which has a MULT_EXPR as |
2527 | its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return | |
2528 | value is true iff we converted the statement. */ | |
2529 | ||
2530 | static bool | |
42acab1c | 2531 | convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi) |
7e4c867e | 2532 | { |
03d37e4e | 2533 | tree lhs, rhs1, rhs2, type, type1, type2; |
7e4c867e | 2534 | enum insn_code handler; |
d2a1b453 | 2535 | scalar_int_mode to_mode, from_mode, actual_mode; |
5a574e8b | 2536 | optab op; |
aff5fb4d | 2537 | int actual_precision; |
2538 | location_t loc = gimple_location (stmt); | |
3f2ab719 | 2539 | bool from_unsigned1, from_unsigned2; |
7e4c867e | 2540 | |
2541 | lhs = gimple_assign_lhs (stmt); | |
2542 | type = TREE_TYPE (lhs); | |
2543 | if (TREE_CODE (type) != INTEGER_TYPE) | |
00f4f705 | 2544 | return false; |
2545 | ||
4333b41f | 2546 | if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2)) |
00f4f705 | 2547 | return false; |
2548 | ||
03b7a719 | 2549 | to_mode = SCALAR_INT_TYPE_MODE (type); |
2550 | from_mode = SCALAR_INT_TYPE_MODE (type1); | |
f90f6ff1 | 2551 | if (to_mode == from_mode) |
2552 | return false; | |
2553 | ||
3f2ab719 | 2554 | from_unsigned1 = TYPE_UNSIGNED (type1); |
2555 | from_unsigned2 = TYPE_UNSIGNED (type2); | |
5a574e8b | 2556 | |
3f2ab719 | 2557 | if (from_unsigned1 && from_unsigned2) |
5a574e8b | 2558 | op = umul_widen_optab; |
3f2ab719 | 2559 | else if (!from_unsigned1 && !from_unsigned2) |
5a574e8b | 2560 | op = smul_widen_optab; |
00f4f705 | 2561 | else |
5a574e8b | 2562 | op = usmul_widen_optab; |
2563 | ||
aff5fb4d | 2564 | handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode, |
d2a1b453 | 2565 | &actual_mode); |
7e4c867e | 2566 | |
2567 | if (handler == CODE_FOR_nothing) | |
3f2ab719 | 2568 | { |
2569 | if (op != smul_widen_optab) | |
2570 | { | |
22ffd684 | 2571 | /* We can use a signed multiply with unsigned types as long as |
2572 | there is a wider mode to use, or it is the smaller of the two | |
2573 | types that is unsigned. Note that type1 >= type2, always. */ | |
2574 | if ((TYPE_UNSIGNED (type1) | |
2575 | && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode)) | |
2576 | || (TYPE_UNSIGNED (type2) | |
2577 | && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode))) | |
2578 | { | |
28ebc73c | 2579 | if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode) |
2580 | || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode)) | |
22ffd684 | 2581 | return false; |
2582 | } | |
3f2ab719 | 2583 | |
2584 | op = smul_widen_optab; | |
2585 | handler = find_widening_optab_handler_and_mode (op, to_mode, | |
d2a1b453 | 2586 | from_mode, |
3f2ab719 | 2587 | &actual_mode); |
2588 | ||
2589 | if (handler == CODE_FOR_nothing) | |
2590 | return false; | |
2591 | ||
2592 | from_unsigned1 = from_unsigned2 = false; | |
2593 | } | |
2594 | else | |
2595 | return false; | |
2596 | } | |
7e4c867e | 2597 | |
aff5fb4d | 2598 | /* Ensure that the inputs to the handler are in the correct precison |
2599 | for the opcode. This will be the full mode size. */ | |
2600 | actual_precision = GET_MODE_PRECISION (actual_mode); | |
b36be69d | 2601 | if (2 * actual_precision > TYPE_PRECISION (type)) |
2602 | return false; | |
3f2ab719 | 2603 | if (actual_precision != TYPE_PRECISION (type1) |
2604 | || from_unsigned1 != TYPE_UNSIGNED (type1)) | |
03d37e4e | 2605 | rhs1 = build_and_insert_cast (gsi, loc, |
2606 | build_nonstandard_integer_type | |
2607 | (actual_precision, from_unsigned1), rhs1); | |
3f2ab719 | 2608 | if (actual_precision != TYPE_PRECISION (type2) |
2609 | || from_unsigned2 != TYPE_UNSIGNED (type2)) | |
03d37e4e | 2610 | rhs2 = build_and_insert_cast (gsi, loc, |
2611 | build_nonstandard_integer_type | |
2612 | (actual_precision, from_unsigned2), rhs2); | |
aff5fb4d | 2613 | |
ffebd9c5 | 2614 | /* Handle constants. */ |
2615 | if (TREE_CODE (rhs1) == INTEGER_CST) | |
2616 | rhs1 = fold_convert (type1, rhs1); | |
2617 | if (TREE_CODE (rhs2) == INTEGER_CST) | |
2618 | rhs2 = fold_convert (type2, rhs2); | |
2619 | ||
aff5fb4d | 2620 | gimple_assign_set_rhs1 (stmt, rhs1); |
2621 | gimple_assign_set_rhs2 (stmt, rhs2); | |
00f4f705 | 2622 | gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR); |
2623 | update_stmt (stmt); | |
30c4e60d | 2624 | widen_mul_stats.widen_mults_inserted++; |
00f4f705 | 2625 | return true; |
2626 | } | |
2627 | ||
2628 | /* Process a single gimple statement STMT, which is found at the | |
2629 | iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its | |
2630 | rhs (given by CODE), and try to convert it into a | |
2631 | WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value | |
2632 | is true iff we converted the statement. */ | |
2633 | ||
2634 | static bool | |
42acab1c | 2635 | convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt, |
00f4f705 | 2636 | enum tree_code code) |
2637 | { | |
42acab1c | 2638 | gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL; |
2639 | gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt; | |
03d37e4e | 2640 | tree type, type1, type2, optype; |
00f4f705 | 2641 | tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs; |
2642 | enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK; | |
2643 | optab this_optab; | |
2644 | enum tree_code wmult_code; | |
aff5fb4d | 2645 | enum insn_code handler; |
d2a1b453 | 2646 | scalar_mode to_mode, from_mode, actual_mode; |
aff5fb4d | 2647 | location_t loc = gimple_location (stmt); |
2648 | int actual_precision; | |
3f2ab719 | 2649 | bool from_unsigned1, from_unsigned2; |
00f4f705 | 2650 | |
2651 | lhs = gimple_assign_lhs (stmt); | |
2652 | type = TREE_TYPE (lhs); | |
7e4c867e | 2653 | if (TREE_CODE (type) != INTEGER_TYPE |
2654 | && TREE_CODE (type) != FIXED_POINT_TYPE) | |
00f4f705 | 2655 | return false; |
2656 | ||
2657 | if (code == MINUS_EXPR) | |
2658 | wmult_code = WIDEN_MULT_MINUS_EXPR; | |
2659 | else | |
2660 | wmult_code = WIDEN_MULT_PLUS_EXPR; | |
2661 | ||
00f4f705 | 2662 | rhs1 = gimple_assign_rhs1 (stmt); |
2663 | rhs2 = gimple_assign_rhs2 (stmt); | |
2664 | ||
2665 | if (TREE_CODE (rhs1) == SSA_NAME) | |
2666 | { | |
2667 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); | |
2668 | if (is_gimple_assign (rhs1_stmt)) | |
2669 | rhs1_code = gimple_assign_rhs_code (rhs1_stmt); | |
2670 | } | |
00f4f705 | 2671 | |
2672 | if (TREE_CODE (rhs2) == SSA_NAME) | |
2673 | { | |
2674 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); | |
2675 | if (is_gimple_assign (rhs2_stmt)) | |
2676 | rhs2_code = gimple_assign_rhs_code (rhs2_stmt); | |
2677 | } | |
00f4f705 | 2678 | |
07ea3e5c | 2679 | /* Allow for one conversion statement between the multiply |
2680 | and addition/subtraction statement. If there are more than | |
2681 | one conversions then we assume they would invalidate this | |
2682 | transformation. If that's not the case then they should have | |
2683 | been folded before now. */ | |
2684 | if (CONVERT_EXPR_CODE_P (rhs1_code)) | |
2685 | { | |
2686 | conv1_stmt = rhs1_stmt; | |
2687 | rhs1 = gimple_assign_rhs1 (rhs1_stmt); | |
2688 | if (TREE_CODE (rhs1) == SSA_NAME) | |
2689 | { | |
2690 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); | |
2691 | if (is_gimple_assign (rhs1_stmt)) | |
2692 | rhs1_code = gimple_assign_rhs_code (rhs1_stmt); | |
2693 | } | |
2694 | else | |
2695 | return false; | |
2696 | } | |
2697 | if (CONVERT_EXPR_CODE_P (rhs2_code)) | |
2698 | { | |
2699 | conv2_stmt = rhs2_stmt; | |
2700 | rhs2 = gimple_assign_rhs1 (rhs2_stmt); | |
2701 | if (TREE_CODE (rhs2) == SSA_NAME) | |
2702 | { | |
2703 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); | |
2704 | if (is_gimple_assign (rhs2_stmt)) | |
2705 | rhs2_code = gimple_assign_rhs_code (rhs2_stmt); | |
2706 | } | |
2707 | else | |
2708 | return false; | |
2709 | } | |
2710 | ||
aff5fb4d | 2711 | /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call |
2712 | is_widening_mult_p, but we still need the rhs returns. | |
2713 | ||
2714 | It might also appear that it would be sufficient to use the existing | |
2715 | operands of the widening multiply, but that would limit the choice of | |
e0df5be0 | 2716 | multiply-and-accumulate instructions. |
2717 | ||
2718 | If the widened-multiplication result has more than one uses, it is | |
2719 | probably wiser not to do the conversion. */ | |
aff5fb4d | 2720 | if (code == PLUS_EXPR |
2721 | && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR)) | |
00f4f705 | 2722 | { |
e0df5be0 | 2723 | if (!has_single_use (rhs1) |
2724 | || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1, | |
2725 | &type2, &mult_rhs2)) | |
00f4f705 | 2726 | return false; |
7e4c867e | 2727 | add_rhs = rhs2; |
07ea3e5c | 2728 | conv_stmt = conv1_stmt; |
00f4f705 | 2729 | } |
aff5fb4d | 2730 | else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR) |
00f4f705 | 2731 | { |
e0df5be0 | 2732 | if (!has_single_use (rhs2) |
2733 | || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1, | |
2734 | &type2, &mult_rhs2)) | |
00f4f705 | 2735 | return false; |
7e4c867e | 2736 | add_rhs = rhs1; |
07ea3e5c | 2737 | conv_stmt = conv2_stmt; |
00f4f705 | 2738 | } |
00f4f705 | 2739 | else |
2740 | return false; | |
2741 | ||
3d2b0034 | 2742 | to_mode = SCALAR_TYPE_MODE (type); |
2743 | from_mode = SCALAR_TYPE_MODE (type1); | |
f90f6ff1 | 2744 | if (to_mode == from_mode) |
2745 | return false; | |
2746 | ||
3f2ab719 | 2747 | from_unsigned1 = TYPE_UNSIGNED (type1); |
2748 | from_unsigned2 = TYPE_UNSIGNED (type2); | |
4ccf368d | 2749 | optype = type1; |
aff5fb4d | 2750 | |
3f2ab719 | 2751 | /* There's no such thing as a mixed sign madd yet, so use a wider mode. */ |
2752 | if (from_unsigned1 != from_unsigned2) | |
2753 | { | |
4ccf368d | 2754 | if (!INTEGRAL_TYPE_P (type)) |
2755 | return false; | |
22ffd684 | 2756 | /* We can use a signed multiply with unsigned types as long as |
2757 | there is a wider mode to use, or it is the smaller of the two | |
2758 | types that is unsigned. Note that type1 >= type2, always. */ | |
2759 | if ((from_unsigned1 | |
2760 | && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode)) | |
2761 | || (from_unsigned2 | |
2762 | && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode))) | |
3f2ab719 | 2763 | { |
28ebc73c | 2764 | if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode) |
2765 | || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode)) | |
22ffd684 | 2766 | return false; |
3f2ab719 | 2767 | } |
22ffd684 | 2768 | |
2769 | from_unsigned1 = from_unsigned2 = false; | |
4ccf368d | 2770 | optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode), |
2771 | false); | |
3f2ab719 | 2772 | } |
815a0224 | 2773 | |
07ea3e5c | 2774 | /* If there was a conversion between the multiply and addition |
2775 | then we need to make sure it fits a multiply-and-accumulate. | |
2776 | The should be a single mode change which does not change the | |
2777 | value. */ | |
2778 | if (conv_stmt) | |
2779 | { | |
3f2ab719 | 2780 | /* We use the original, unmodified data types for this. */ |
07ea3e5c | 2781 | tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt)); |
2782 | tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt)); | |
2783 | int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2); | |
2784 | bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2); | |
2785 | ||
2786 | if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type)) | |
2787 | { | |
2788 | /* Conversion is a truncate. */ | |
2789 | if (TYPE_PRECISION (to_type) < data_size) | |
2790 | return false; | |
2791 | } | |
2792 | else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)) | |
2793 | { | |
2794 | /* Conversion is an extend. Check it's the right sort. */ | |
2795 | if (TYPE_UNSIGNED (from_type) != is_unsigned | |
2796 | && !(is_unsigned && TYPE_PRECISION (from_type) > data_size)) | |
2797 | return false; | |
2798 | } | |
2799 | /* else convert is a no-op for our purposes. */ | |
2800 | } | |
2801 | ||
815a0224 | 2802 | /* Verify that the machine can perform a widening multiply |
2803 | accumulate in this mode/signedness combination, otherwise | |
2804 | this transformation is likely to pessimize code. */ | |
3f2ab719 | 2805 | this_optab = optab_for_tree_code (wmult_code, optype, optab_default); |
aff5fb4d | 2806 | handler = find_widening_optab_handler_and_mode (this_optab, to_mode, |
d2a1b453 | 2807 | from_mode, &actual_mode); |
aff5fb4d | 2808 | |
2809 | if (handler == CODE_FOR_nothing) | |
815a0224 | 2810 | return false; |
2811 | ||
aff5fb4d | 2812 | /* Ensure that the inputs to the handler are in the correct precison |
2813 | for the opcode. This will be the full mode size. */ | |
2814 | actual_precision = GET_MODE_PRECISION (actual_mode); | |
3f2ab719 | 2815 | if (actual_precision != TYPE_PRECISION (type1) |
2816 | || from_unsigned1 != TYPE_UNSIGNED (type1)) | |
03d37e4e | 2817 | mult_rhs1 = build_and_insert_cast (gsi, loc, |
2818 | build_nonstandard_integer_type | |
2819 | (actual_precision, from_unsigned1), | |
2820 | mult_rhs1); | |
3f2ab719 | 2821 | if (actual_precision != TYPE_PRECISION (type2) |
2822 | || from_unsigned2 != TYPE_UNSIGNED (type2)) | |
03d37e4e | 2823 | mult_rhs2 = build_and_insert_cast (gsi, loc, |
2824 | build_nonstandard_integer_type | |
2825 | (actual_precision, from_unsigned2), | |
2826 | mult_rhs2); | |
00f4f705 | 2827 | |
12421545 | 2828 | if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs))) |
03d37e4e | 2829 | add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs); |
12421545 | 2830 | |
ffebd9c5 | 2831 | /* Handle constants. */ |
2832 | if (TREE_CODE (mult_rhs1) == INTEGER_CST) | |
d5a3bb10 | 2833 | mult_rhs1 = fold_convert (type1, mult_rhs1); |
ffebd9c5 | 2834 | if (TREE_CODE (mult_rhs2) == INTEGER_CST) |
d5a3bb10 | 2835 | mult_rhs2 = fold_convert (type2, mult_rhs2); |
ffebd9c5 | 2836 | |
806413d2 | 2837 | gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2, |
2838 | add_rhs); | |
00f4f705 | 2839 | update_stmt (gsi_stmt (*gsi)); |
30c4e60d | 2840 | widen_mul_stats.maccs_inserted++; |
00f4f705 | 2841 | return true; |
2842 | } | |
2843 | ||
ed306e55 | 2844 | /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and |
2845 | OP2 and which we know is used in statements that can be, together with the | |
2846 | multiplication, converted to FMAs, perform the transformation. */ | |
2847 | ||
2848 | static void | |
2849 | convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2) | |
2850 | { | |
2851 | tree type = TREE_TYPE (mul_result); | |
2852 | gimple *use_stmt; | |
2853 | imm_use_iterator imm_iter; | |
143c3c9a | 2854 | gcall *fma_stmt; |
ed306e55 | 2855 | |
2856 | FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result) | |
2857 | { | |
2858 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); | |
ed306e55 | 2859 | tree addop, mulop1 = op1, result = mul_result; |
2860 | bool negate_p = false; | |
143c3c9a | 2861 | gimple_seq seq = NULL; |
ed306e55 | 2862 | |
2863 | if (is_gimple_debug (use_stmt)) | |
2864 | continue; | |
2865 | ||
e3798ed9 | 2866 | if (is_gimple_assign (use_stmt) |
2867 | && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR) | |
ed306e55 | 2868 | { |
2869 | result = gimple_assign_lhs (use_stmt); | |
2870 | use_operand_p use_p; | |
2871 | gimple *neguse_stmt; | |
2872 | single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt); | |
2873 | gsi_remove (&gsi, true); | |
2874 | release_defs (use_stmt); | |
2875 | ||
2876 | use_stmt = neguse_stmt; | |
2877 | gsi = gsi_for_stmt (use_stmt); | |
ed306e55 | 2878 | negate_p = true; |
2879 | } | |
2880 | ||
e3798ed9 | 2881 | tree cond, else_value, ops[3]; |
2882 | tree_code code; | |
2883 | if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, | |
2884 | ops, &else_value)) | |
2885 | gcc_unreachable (); | |
2886 | addop = ops[0] == result ? ops[1] : ops[0]; | |
2887 | ||
2888 | if (code == MINUS_EXPR) | |
ed306e55 | 2889 | { |
e3798ed9 | 2890 | if (ops[0] == result) |
2891 | /* a * b - c -> a * b + (-c) */ | |
143c3c9a | 2892 | addop = gimple_build (&seq, NEGATE_EXPR, type, addop); |
e3798ed9 | 2893 | else |
2894 | /* a - b * c -> (-b) * c + a */ | |
ed306e55 | 2895 | negate_p = !negate_p; |
2896 | } | |
2897 | ||
2898 | if (negate_p) | |
143c3c9a | 2899 | mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1); |
ed306e55 | 2900 | |
143c3c9a | 2901 | if (seq) |
2902 | gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); | |
e3798ed9 | 2903 | |
2904 | if (cond) | |
2905 | fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1, | |
2906 | op2, addop, else_value); | |
2907 | else | |
2908 | fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop); | |
2909 | gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt)); | |
aac19106 | 2910 | gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun, |
2911 | use_stmt)); | |
143c3c9a | 2912 | gsi_replace (&gsi, fma_stmt, true); |
2913 | /* Follow all SSA edges so that we generate FMS, FNMA and FNMS | |
2914 | regardless of where the negation occurs. */ | |
43a607ba | 2915 | gimple *orig_stmt = gsi_stmt (gsi); |
143c3c9a | 2916 | if (fold_stmt (&gsi, follow_all_ssa_edges)) |
43a607ba | 2917 | { |
2918 | if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi))) | |
2919 | gcc_unreachable (); | |
2920 | update_stmt (gsi_stmt (gsi)); | |
2921 | } | |
ed306e55 | 2922 | |
2923 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2924 | { | |
2925 | fprintf (dump_file, "Generated FMA "); | |
54e7de93 | 2926 | print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE); |
ed306e55 | 2927 | fprintf (dump_file, "\n"); |
2928 | } | |
2929 | ||
ed306e55 | 2930 | widen_mul_stats.fmas_inserted++; |
2931 | } | |
2932 | } | |
2933 | ||
2934 | /* Data necessary to perform the actual transformation from a multiplication | |
2935 | and an addition to an FMA after decision is taken it should be done and to | |
2936 | then delete the multiplication statement from the function IL. */ | |
2937 | ||
2938 | struct fma_transformation_info | |
2939 | { | |
2940 | gimple *mul_stmt; | |
2941 | tree mul_result; | |
2942 | tree op1; | |
2943 | tree op2; | |
2944 | }; | |
2945 | ||
2946 | /* Structure containing the current state of FMA deferring, i.e. whether we are | |
2947 | deferring, whether to continue deferring, and all data necessary to come | |
2948 | back and perform all deferred transformations. */ | |
2949 | ||
2950 | class fma_deferring_state | |
2951 | { | |
2952 | public: | |
2953 | /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually | |
2954 | do any deferring. */ | |
2955 | ||
2956 | fma_deferring_state (bool perform_deferring) | |
2957 | : m_candidates (), m_mul_result_set (), m_initial_phi (NULL), | |
2958 | m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {} | |
2959 | ||
2960 | /* List of FMA candidates for which we the transformation has been determined | |
2961 | possible but we at this point in BB analysis we do not consider them | |
2962 | beneficial. */ | |
2963 | auto_vec<fma_transformation_info, 8> m_candidates; | |
2964 | ||
2965 | /* Set of results of multiplication that are part of an already deferred FMA | |
2966 | candidates. */ | |
2967 | hash_set<tree> m_mul_result_set; | |
2968 | ||
2969 | /* The PHI that supposedly feeds back result of a FMA to another over loop | |
2970 | boundary. */ | |
2971 | gphi *m_initial_phi; | |
2972 | ||
2973 | /* Result of the last produced FMA candidate or NULL if there has not been | |
2974 | one. */ | |
2975 | tree m_last_result; | |
2976 | ||
2977 | /* If true, deferring might still be profitable. If false, transform all | |
2978 | candidates and no longer defer. */ | |
2979 | bool m_deferring_p; | |
2980 | }; | |
2981 | ||
2982 | /* Transform all deferred FMA candidates and mark STATE as no longer | |
2983 | deferring. */ | |
2984 | ||
2985 | static void | |
2986 | cancel_fma_deferring (fma_deferring_state *state) | |
2987 | { | |
2988 | if (!state->m_deferring_p) | |
2989 | return; | |
2990 | ||
2991 | for (unsigned i = 0; i < state->m_candidates.length (); i++) | |
2992 | { | |
2993 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2994 | fprintf (dump_file, "Generating deferred FMA\n"); | |
2995 | ||
2996 | const fma_transformation_info &fti = state->m_candidates[i]; | |
2997 | convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2); | |
2998 | ||
2999 | gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt); | |
3000 | gsi_remove (&gsi, true); | |
3001 | release_defs (fti.mul_stmt); | |
3002 | } | |
3003 | state->m_deferring_p = false; | |
3004 | } | |
3005 | ||
3006 | /* If OP is an SSA name defined by a PHI node, return the PHI statement. | |
3007 | Otherwise return NULL. */ | |
3008 | ||
3009 | static gphi * | |
3010 | result_of_phi (tree op) | |
3011 | { | |
3012 | if (TREE_CODE (op) != SSA_NAME) | |
3013 | return NULL; | |
3014 | ||
3015 | return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op)); | |
3016 | } | |
3017 | ||
3018 | /* After processing statements of a BB and recording STATE, return true if the | |
3019 | initial phi is fed by the last FMA candidate result ore one such result from | |
3020 | previously processed BBs marked in LAST_RESULT_SET. */ | |
3021 | ||
3022 | static bool | |
3023 | last_fma_candidate_feeds_initial_phi (fma_deferring_state *state, | |
3024 | hash_set<tree> *last_result_set) | |
3025 | { | |
3026 | ssa_op_iter iter; | |
3027 | use_operand_p use; | |
3028 | FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE) | |
3029 | { | |
3030 | tree t = USE_FROM_PTR (use); | |
3031 | if (t == state->m_last_result | |
3032 | || last_result_set->contains (t)) | |
3033 | return true; | |
3034 | } | |
3035 | ||
3036 | return false; | |
3037 | } | |
3038 | ||
15dbdc8f | 3039 | /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2 |
3040 | with uses in additions and subtractions to form fused multiply-add | |
ed306e55 | 3041 | operations. Returns true if successful and MUL_STMT should be removed. |
9c19fd8a | 3042 | If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional |
3043 | on MUL_COND, otherwise it is unconditional. | |
ed306e55 | 3044 | |
3045 | If STATE indicates that we are deferring FMA transformation, that means | |
3046 | that we do not produce FMAs for basic blocks which look like: | |
3047 | ||
3048 | <bb 6> | |
3049 | # accumulator_111 = PHI <0.0(5), accumulator_66(6)> | |
3050 | _65 = _14 * _16; | |
3051 | accumulator_66 = _65 + accumulator_111; | |
3052 | ||
3053 | or its unrolled version, i.e. with several FMA candidates that feed result | |
3054 | of one into the addend of another. Instead, we add them to a list in STATE | |
3055 | and if we later discover an FMA candidate that is not part of such a chain, | |
3056 | we go back and perform all deferred past candidates. */ | |
b9be572e | 3057 | |
3058 | static bool | |
ed306e55 | 3059 | convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2, |
9c19fd8a | 3060 | fma_deferring_state *state, tree mul_cond = NULL_TREE) |
b9be572e | 3061 | { |
15dbdc8f | 3062 | tree mul_result = gimple_get_lhs (mul_stmt); |
b9be572e | 3063 | tree type = TREE_TYPE (mul_result); |
42acab1c | 3064 | gimple *use_stmt, *neguse_stmt; |
b9be572e | 3065 | use_operand_p use_p; |
3066 | imm_use_iterator imm_iter; | |
3067 | ||
3068 | if (FLOAT_TYPE_P (type) | |
3069 | && flag_fp_contract_mode == FP_CONTRACT_OFF) | |
3070 | return false; | |
3071 | ||
3072 | /* We don't want to do bitfield reduction ops. */ | |
3073 | if (INTEGRAL_TYPE_P (type) | |
f2ef7276 | 3074 | && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type))) |
b9be572e | 3075 | return false; |
3076 | ||
3077 | /* If the target doesn't support it, don't generate it. We assume that | |
3078 | if fma isn't available then fms, fnma or fnms are not either. */ | |
143c3c9a | 3079 | optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt)); |
3080 | if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type)) | |
b9be572e | 3081 | return false; |
3082 | ||
5ed3d3b8 | 3083 | /* If the multiplication has zero uses, it is kept around probably because |
3084 | of -fnon-call-exceptions. Don't optimize it away in that case, | |
3085 | it is DCE job. */ | |
3086 | if (has_zero_uses (mul_result)) | |
3087 | return false; | |
3088 | ||
ed306e55 | 3089 | bool check_defer |
3090 | = (state->m_deferring_p | |
3091 | && (tree_to_shwi (TYPE_SIZE (type)) | |
3092 | <= PARAM_VALUE (PARAM_AVOID_FMA_MAX_BITS))); | |
3093 | bool defer = check_defer; | |
9cbde7ad | 3094 | bool seen_negate_p = false; |
b9be572e | 3095 | /* Make sure that the multiplication statement becomes dead after |
3096 | the transformation, thus that all uses are transformed to FMAs. | |
3097 | This means we assume that an FMA operation has the same cost | |
3098 | as an addition. */ | |
3099 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result) | |
3100 | { | |
44579526 | 3101 | tree result = mul_result; |
3102 | bool negate_p = false; | |
b9be572e | 3103 | |
3104 | use_stmt = USE_STMT (use_p); | |
3105 | ||
17a2c727 | 3106 | if (is_gimple_debug (use_stmt)) |
3107 | continue; | |
3108 | ||
b9be572e | 3109 | /* For now restrict this operations to single basic blocks. In theory |
3110 | we would want to support sinking the multiplication in | |
3111 | m = a*b; | |
3112 | if () | |
3113 | ma = m + c; | |
3114 | else | |
3115 | d = m; | |
3116 | to form a fma in the then block and sink the multiplication to the | |
3117 | else block. */ | |
3118 | if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) | |
3119 | return false; | |
3120 | ||
44579526 | 3121 | /* A negate on the multiplication leads to FNMA. */ |
e3798ed9 | 3122 | if (is_gimple_assign (use_stmt) |
3123 | && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR) | |
44579526 | 3124 | { |
805ad414 | 3125 | ssa_op_iter iter; |
5715c09b | 3126 | use_operand_p usep; |
805ad414 | 3127 | |
9cbde7ad | 3128 | /* If (due to earlier missed optimizations) we have two |
3129 | negates of the same value, treat them as equivalent | |
3130 | to a single negate with multiple uses. */ | |
3131 | if (seen_negate_p) | |
3132 | return false; | |
3133 | ||
44579526 | 3134 | result = gimple_assign_lhs (use_stmt); |
3135 | ||
3136 | /* Make sure the negate statement becomes dead with this | |
3137 | single transformation. */ | |
3138 | if (!single_imm_use (gimple_assign_lhs (use_stmt), | |
3139 | &use_p, &neguse_stmt)) | |
3140 | return false; | |
3141 | ||
805ad414 | 3142 | /* Make sure the multiplication isn't also used on that stmt. */ |
5715c09b | 3143 | FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE) |
3144 | if (USE_FROM_PTR (usep) == mul_result) | |
805ad414 | 3145 | return false; |
3146 | ||
44579526 | 3147 | /* Re-validate. */ |
3148 | use_stmt = neguse_stmt; | |
3149 | if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) | |
3150 | return false; | |
44579526 | 3151 | |
9cbde7ad | 3152 | negate_p = seen_negate_p = true; |
44579526 | 3153 | } |
b9be572e | 3154 | |
e3798ed9 | 3155 | tree cond, else_value, ops[3]; |
3156 | tree_code code; | |
3157 | if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops, | |
3158 | &else_value)) | |
3159 | return false; | |
3160 | ||
3161 | switch (code) | |
44579526 | 3162 | { |
3163 | case MINUS_EXPR: | |
e3798ed9 | 3164 | if (ops[1] == result) |
8a9d0572 | 3165 | negate_p = !negate_p; |
3166 | break; | |
44579526 | 3167 | case PLUS_EXPR: |
44579526 | 3168 | break; |
44579526 | 3169 | default: |
3170 | /* FMA can only be formed from PLUS and MINUS. */ | |
3171 | return false; | |
3172 | } | |
b9be572e | 3173 | |
9c19fd8a | 3174 | if (mul_cond && cond != mul_cond) |
3175 | return false; | |
3176 | ||
e3798ed9 | 3177 | if (cond) |
3178 | { | |
3179 | if (cond == result || else_value == result) | |
3180 | return false; | |
3181 | if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type)) | |
3182 | return false; | |
3183 | } | |
3184 | ||
3185 | /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that | |
3186 | we'll visit later, we might be able to get a more profitable | |
3187 | match with fnma. | |
b095bd6a | 3188 | OTOH, if we don't, a negate / fma pair has likely lower latency |
3189 | that a mult / subtract pair. */ | |
e3798ed9 | 3190 | if (code == MINUS_EXPR |
3191 | && !negate_p | |
3192 | && ops[0] == result | |
143c3c9a | 3193 | && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type) |
e3798ed9 | 3194 | && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type) |
3195 | && TREE_CODE (ops[1]) == SSA_NAME | |
3196 | && has_single_use (ops[1])) | |
b095bd6a | 3197 | { |
e3798ed9 | 3198 | gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]); |
3199 | if (is_gimple_assign (stmt2) | |
3200 | && gimple_assign_rhs_code (stmt2) == MULT_EXPR) | |
3201 | return false; | |
b095bd6a | 3202 | } |
3203 | ||
44579526 | 3204 | /* We can't handle a * b + a * b. */ |
e3798ed9 | 3205 | if (ops[0] == ops[1]) |
ed306e55 | 3206 | return false; |
3207 | /* If deferring, make sure we are not looking at an instruction that | |
3208 | wouldn't have existed if we were not. */ | |
3209 | if (state->m_deferring_p | |
e3798ed9 | 3210 | && (state->m_mul_result_set.contains (ops[0]) |
3211 | || state->m_mul_result_set.contains (ops[1]))) | |
44579526 | 3212 | return false; |
8a9d0572 | 3213 | |
ed306e55 | 3214 | if (check_defer) |
44579526 | 3215 | { |
e3798ed9 | 3216 | tree use_lhs = gimple_get_lhs (use_stmt); |
ed306e55 | 3217 | if (state->m_last_result) |
3218 | { | |
e3798ed9 | 3219 | if (ops[1] == state->m_last_result |
3220 | || ops[0] == state->m_last_result) | |
ed306e55 | 3221 | defer = true; |
3222 | else | |
3223 | defer = false; | |
3224 | } | |
3225 | else | |
3226 | { | |
3227 | gcc_checking_assert (!state->m_initial_phi); | |
3228 | gphi *phi; | |
e3798ed9 | 3229 | if (ops[0] == result) |
3230 | phi = result_of_phi (ops[1]); | |
ed306e55 | 3231 | else |
3232 | { | |
e3798ed9 | 3233 | gcc_assert (ops[1] == result); |
3234 | phi = result_of_phi (ops[0]); | |
ed306e55 | 3235 | } |
44579526 | 3236 | |
ed306e55 | 3237 | if (phi) |
3238 | { | |
3239 | state->m_initial_phi = phi; | |
3240 | defer = true; | |
3241 | } | |
3242 | else | |
3243 | defer = false; | |
3244 | } | |
44579526 | 3245 | |
ed306e55 | 3246 | state->m_last_result = use_lhs; |
3247 | check_defer = false; | |
b9be572e | 3248 | } |
3249 | else | |
ed306e55 | 3250 | defer = false; |
3251 | ||
3252 | /* While it is possible to validate whether or not the exact form that | |
3253 | we've recognized is available in the backend, the assumption is that | |
3254 | if the deferring logic above did not trigger, the transformation is | |
3255 | never a loss. For instance, suppose the target only has the plain FMA | |
3256 | pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged | |
3257 | MUL+SUB for FMA+NEG, which is still two operations. Consider | |
3258 | -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA | |
3259 | form the two NEGs are independent and could be run in parallel. */ | |
3260 | } | |
3261 | ||
3262 | if (defer) | |
3263 | { | |
3264 | fma_transformation_info fti; | |
3265 | fti.mul_stmt = mul_stmt; | |
3266 | fti.mul_result = mul_result; | |
3267 | fti.op1 = op1; | |
3268 | fti.op2 = op2; | |
3269 | state->m_candidates.safe_push (fti); | |
3270 | state->m_mul_result_set.add (mul_result); | |
3271 | ||
3272 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
b9be572e | 3273 | { |
ed306e55 | 3274 | fprintf (dump_file, "Deferred generating FMA for multiplication "); |
54e7de93 | 3275 | print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE); |
ed306e55 | 3276 | fprintf (dump_file, "\n"); |
b9be572e | 3277 | } |
3278 | ||
ed306e55 | 3279 | return false; |
3280 | } | |
3281 | else | |
3282 | { | |
3283 | if (state->m_deferring_p) | |
3284 | cancel_fma_deferring (state); | |
3285 | convert_mult_to_fma_1 (mul_result, op1, op2); | |
3286 | return true; | |
b9be572e | 3287 | } |
b9be572e | 3288 | } |
3289 | ||
e11a63e8 | 3290 | |
3291 | /* Helper function of match_uaddsub_overflow. Return 1 | |
3292 | if USE_STMT is unsigned overflow check ovf != 0 for | |
3293 | STMT, -1 if USE_STMT is unsigned overflow check ovf == 0 | |
3294 | and 0 otherwise. */ | |
3295 | ||
3296 | static int | |
3297 | uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt) | |
3298 | { | |
3299 | enum tree_code ccode = ERROR_MARK; | |
3300 | tree crhs1 = NULL_TREE, crhs2 = NULL_TREE; | |
3301 | if (gimple_code (use_stmt) == GIMPLE_COND) | |
3302 | { | |
3303 | ccode = gimple_cond_code (use_stmt); | |
3304 | crhs1 = gimple_cond_lhs (use_stmt); | |
3305 | crhs2 = gimple_cond_rhs (use_stmt); | |
3306 | } | |
3307 | else if (is_gimple_assign (use_stmt)) | |
3308 | { | |
3309 | if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS) | |
3310 | { | |
3311 | ccode = gimple_assign_rhs_code (use_stmt); | |
3312 | crhs1 = gimple_assign_rhs1 (use_stmt); | |
3313 | crhs2 = gimple_assign_rhs2 (use_stmt); | |
3314 | } | |
3315 | else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR) | |
3316 | { | |
3317 | tree cond = gimple_assign_rhs1 (use_stmt); | |
3318 | if (COMPARISON_CLASS_P (cond)) | |
3319 | { | |
3320 | ccode = TREE_CODE (cond); | |
3321 | crhs1 = TREE_OPERAND (cond, 0); | |
3322 | crhs2 = TREE_OPERAND (cond, 1); | |
3323 | } | |
3324 | else | |
3325 | return 0; | |
3326 | } | |
3327 | else | |
3328 | return 0; | |
3329 | } | |
3330 | else | |
3331 | return 0; | |
3332 | ||
3333 | if (TREE_CODE_CLASS (ccode) != tcc_comparison) | |
3334 | return 0; | |
3335 | ||
3336 | enum tree_code code = gimple_assign_rhs_code (stmt); | |
3337 | tree lhs = gimple_assign_lhs (stmt); | |
3338 | tree rhs1 = gimple_assign_rhs1 (stmt); | |
3339 | tree rhs2 = gimple_assign_rhs2 (stmt); | |
3340 | ||
3341 | switch (ccode) | |
3342 | { | |
3343 | case GT_EXPR: | |
3344 | case LE_EXPR: | |
3345 | /* r = a - b; r > a or r <= a | |
3346 | r = a + b; a > r or a <= r or b > r or b <= r. */ | |
3347 | if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1) | |
3348 | || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2) | |
3349 | && crhs2 == lhs)) | |
3350 | return ccode == GT_EXPR ? 1 : -1; | |
3351 | break; | |
3352 | case LT_EXPR: | |
3353 | case GE_EXPR: | |
3354 | /* r = a - b; a < r or a >= r | |
3355 | r = a + b; r < a or r >= a or r < b or r >= b. */ | |
3356 | if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs) | |
3357 | || (code == PLUS_EXPR && crhs1 == lhs | |
3358 | && (crhs2 == rhs1 || crhs2 == rhs2))) | |
3359 | return ccode == LT_EXPR ? 1 : -1; | |
3360 | break; | |
3361 | default: | |
3362 | break; | |
3363 | } | |
3364 | return 0; | |
3365 | } | |
3366 | ||
3367 | /* Recognize for unsigned x | |
3368 | x = y - z; | |
3369 | if (x > y) | |
3370 | where there are other uses of x and replace it with | |
3371 | _7 = SUB_OVERFLOW (y, z); | |
3372 | x = REALPART_EXPR <_7>; | |
3373 | _8 = IMAGPART_EXPR <_7>; | |
3374 | if (_8) | |
3375 | and similarly for addition. */ | |
3376 | ||
3377 | static bool | |
3378 | match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt, | |
3379 | enum tree_code code) | |
3380 | { | |
3381 | tree lhs = gimple_assign_lhs (stmt); | |
3382 | tree type = TREE_TYPE (lhs); | |
3383 | use_operand_p use_p; | |
3384 | imm_use_iterator iter; | |
3385 | bool use_seen = false; | |
3386 | bool ovf_use_seen = false; | |
3387 | gimple *use_stmt; | |
3388 | ||
3389 | gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR); | |
3390 | if (!INTEGRAL_TYPE_P (type) | |
3391 | || !TYPE_UNSIGNED (type) | |
3392 | || has_zero_uses (lhs) | |
3393 | || has_single_use (lhs) | |
3394 | || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab, | |
3395 | TYPE_MODE (type)) == CODE_FOR_nothing) | |
3396 | return false; | |
3397 | ||
3398 | FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) | |
3399 | { | |
3400 | use_stmt = USE_STMT (use_p); | |
3401 | if (is_gimple_debug (use_stmt)) | |
3402 | continue; | |
3403 | ||
3404 | if (uaddsub_overflow_check_p (stmt, use_stmt)) | |
3405 | ovf_use_seen = true; | |
3406 | else | |
3407 | use_seen = true; | |
3408 | if (ovf_use_seen && use_seen) | |
3409 | break; | |
3410 | } | |
3411 | ||
3412 | if (!ovf_use_seen || !use_seen) | |
3413 | return false; | |
3414 | ||
3415 | tree ctype = build_complex_type (type); | |
3416 | tree rhs1 = gimple_assign_rhs1 (stmt); | |
3417 | tree rhs2 = gimple_assign_rhs2 (stmt); | |
3418 | gcall *g = gimple_build_call_internal (code == PLUS_EXPR | |
3419 | ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW, | |
3420 | 2, rhs1, rhs2); | |
3421 | tree ctmp = make_ssa_name (ctype); | |
3422 | gimple_call_set_lhs (g, ctmp); | |
3423 | gsi_insert_before (gsi, g, GSI_SAME_STMT); | |
3424 | gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR, | |
3425 | build1 (REALPART_EXPR, type, ctmp)); | |
3426 | gsi_replace (gsi, g2, true); | |
3427 | tree ovf = make_ssa_name (type); | |
3428 | g2 = gimple_build_assign (ovf, IMAGPART_EXPR, | |
3429 | build1 (IMAGPART_EXPR, type, ctmp)); | |
3430 | gsi_insert_after (gsi, g2, GSI_NEW_STMT); | |
3431 | ||
3432 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) | |
3433 | { | |
3434 | if (is_gimple_debug (use_stmt)) | |
3435 | continue; | |
3436 | ||
3437 | int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt); | |
3438 | if (ovf_use == 0) | |
3439 | continue; | |
3440 | if (gimple_code (use_stmt) == GIMPLE_COND) | |
3441 | { | |
3442 | gcond *cond_stmt = as_a <gcond *> (use_stmt); | |
3443 | gimple_cond_set_lhs (cond_stmt, ovf); | |
3444 | gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0)); | |
3445 | gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR); | |
3446 | } | |
3447 | else | |
3448 | { | |
3449 | gcc_checking_assert (is_gimple_assign (use_stmt)); | |
3450 | if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS) | |
3451 | { | |
3452 | gimple_assign_set_rhs1 (use_stmt, ovf); | |
3453 | gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0)); | |
3454 | gimple_assign_set_rhs_code (use_stmt, | |
3455 | ovf_use == 1 ? NE_EXPR : EQ_EXPR); | |
3456 | } | |
3457 | else | |
3458 | { | |
3459 | gcc_checking_assert (gimple_assign_rhs_code (use_stmt) | |
3460 | == COND_EXPR); | |
3461 | tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR, | |
3462 | boolean_type_node, ovf, | |
3463 | build_int_cst (type, 0)); | |
3464 | gimple_assign_set_rhs1 (use_stmt, cond); | |
3465 | } | |
3466 | } | |
3467 | update_stmt (use_stmt); | |
3468 | } | |
3469 | return true; | |
3470 | } | |
3471 | ||
67f7b566 | 3472 | /* Return true if target has support for divmod. */ |
3473 | ||
3474 | static bool | |
3475 | target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode) | |
3476 | { | |
3477 | /* If target supports hardware divmod insn, use it for divmod. */ | |
3478 | if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing) | |
3479 | return true; | |
3480 | ||
3481 | /* Check if libfunc for divmod is available. */ | |
3482 | rtx libfunc = optab_libfunc (divmod_optab, mode); | |
3483 | if (libfunc != NULL_RTX) | |
3484 | { | |
3485 | /* If optab_handler exists for div_optab, perhaps in a wider mode, | |
3486 | we don't want to use the libfunc even if it exists for given mode. */ | |
19a4dce4 | 3487 | machine_mode div_mode; |
3488 | FOR_EACH_MODE_FROM (div_mode, mode) | |
67f7b566 | 3489 | if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing) |
3490 | return false; | |
3491 | ||
3492 | return targetm.expand_divmod_libfunc != NULL; | |
3493 | } | |
3494 | ||
3495 | return false; | |
3496 | } | |
3497 | ||
3498 | /* Check if stmt is candidate for divmod transform. */ | |
3499 | ||
3500 | static bool | |
3501 | divmod_candidate_p (gassign *stmt) | |
3502 | { | |
3503 | tree type = TREE_TYPE (gimple_assign_lhs (stmt)); | |
582adad1 | 3504 | machine_mode mode = TYPE_MODE (type); |
67f7b566 | 3505 | optab divmod_optab, div_optab; |
3506 | ||
3507 | if (TYPE_UNSIGNED (type)) | |
3508 | { | |
3509 | divmod_optab = udivmod_optab; | |
3510 | div_optab = udiv_optab; | |
3511 | } | |
3512 | else | |
3513 | { | |
3514 | divmod_optab = sdivmod_optab; | |
3515 | div_optab = sdiv_optab; | |
3516 | } | |
3517 | ||
3518 | tree op1 = gimple_assign_rhs1 (stmt); | |
3519 | tree op2 = gimple_assign_rhs2 (stmt); | |
3520 | ||
3521 | /* Disable the transform if either is a constant, since division-by-constant | |
3522 | may have specialized expansion. */ | |
3523 | if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2)) | |
3524 | return false; | |
3525 | ||
3526 | /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should | |
3527 | expand using the [su]divv optabs. */ | |
3528 | if (TYPE_OVERFLOW_TRAPS (type)) | |
3529 | return false; | |
3530 | ||
3531 | if (!target_supports_divmod_p (divmod_optab, div_optab, mode)) | |
3532 | return false; | |
3533 | ||
3534 | return true; | |
3535 | } | |
3536 | ||
3537 | /* This function looks for: | |
3538 | t1 = a TRUNC_DIV_EXPR b; | |
3539 | t2 = a TRUNC_MOD_EXPR b; | |
3540 | and transforms it to the following sequence: | |
3541 | complex_tmp = DIVMOD (a, b); | |
3542 | t1 = REALPART_EXPR(a); | |
3543 | t2 = IMAGPART_EXPR(b); | |
3544 | For conditions enabling the transform see divmod_candidate_p(). | |
3545 | ||
3546 | The pass has three parts: | |
3547 | 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all | |
3548 | other trunc_div_expr and trunc_mod_expr stmts. | |
3549 | 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt | |
3550 | to stmts vector. | |
3551 | 3) Insert DIVMOD call just before top_stmt and update entries in | |
3552 | stmts vector to use return value of DIMOVD (REALEXPR_PART for div, | |
3553 | IMAGPART_EXPR for mod). */ | |
3554 | ||
3555 | static bool | |
3556 | convert_to_divmod (gassign *stmt) | |
3557 | { | |
aac19106 | 3558 | if (stmt_can_throw_internal (cfun, stmt) |
67f7b566 | 3559 | || !divmod_candidate_p (stmt)) |
3560 | return false; | |
3561 | ||
3562 | tree op1 = gimple_assign_rhs1 (stmt); | |
3563 | tree op2 = gimple_assign_rhs2 (stmt); | |
3564 | ||
3565 | imm_use_iterator use_iter; | |
3566 | gimple *use_stmt; | |
3567 | auto_vec<gimple *> stmts; | |
3568 | ||
3569 | gimple *top_stmt = stmt; | |
3570 | basic_block top_bb = gimple_bb (stmt); | |
3571 | ||
3572 | /* Part 1: Try to set top_stmt to "topmost" stmt that dominates | |
3573 | at-least stmt and possibly other trunc_div/trunc_mod stmts | |
3574 | having same operands as stmt. */ | |
3575 | ||
3576 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1) | |
3577 | { | |
3578 | if (is_gimple_assign (use_stmt) | |
3579 | && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR | |
3580 | || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR) | |
3581 | && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0) | |
3582 | && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0)) | |
3583 | { | |
aac19106 | 3584 | if (stmt_can_throw_internal (cfun, use_stmt)) |
67f7b566 | 3585 | continue; |
3586 | ||
3587 | basic_block bb = gimple_bb (use_stmt); | |
3588 | ||
3589 | if (bb == top_bb) | |
3590 | { | |
3591 | if (gimple_uid (use_stmt) < gimple_uid (top_stmt)) | |
3592 | top_stmt = use_stmt; | |
3593 | } | |
3594 | else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb)) | |
3595 | { | |
3596 | top_bb = bb; | |
3597 | top_stmt = use_stmt; | |
3598 | } | |
3599 | } | |
3600 | } | |
3601 | ||
3602 | tree top_op1 = gimple_assign_rhs1 (top_stmt); | |
3603 | tree top_op2 = gimple_assign_rhs2 (top_stmt); | |
3604 | ||
3605 | stmts.safe_push (top_stmt); | |
3606 | bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR); | |
3607 | ||
3608 | /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb | |
3609 | to stmts vector. The 2nd loop will always add stmt to stmts vector, since | |
3610 | gimple_bb (top_stmt) dominates gimple_bb (stmt), so the | |
3611 | 2nd loop ends up adding at-least single trunc_mod_expr stmt. */ | |
3612 | ||
3613 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1) | |
3614 | { | |
3615 | if (is_gimple_assign (use_stmt) | |
3616 | && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR | |
3617 | || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR) | |
3618 | && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0) | |
3619 | && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0)) | |
3620 | { | |
3621 | if (use_stmt == top_stmt | |
aac19106 | 3622 | || stmt_can_throw_internal (cfun, use_stmt) |
67f7b566 | 3623 | || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb)) |
3624 | continue; | |
3625 | ||
3626 | stmts.safe_push (use_stmt); | |
3627 | if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR) | |
3628 | div_seen = true; | |
3629 | } | |
3630 | } | |
3631 | ||
3632 | if (!div_seen) | |
3633 | return false; | |
3634 | ||
3635 | /* Part 3: Create libcall to internal fn DIVMOD: | |
3636 | divmod_tmp = DIVMOD (op1, op2). */ | |
3637 | ||
3638 | gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2); | |
3639 | tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)), | |
3640 | call_stmt, "divmod_tmp"); | |
3641 | gimple_call_set_lhs (call_stmt, res); | |
989f02dc | 3642 | /* We rejected throwing statements above. */ |
3643 | gimple_call_set_nothrow (call_stmt, true); | |
67f7b566 | 3644 | |
3645 | /* Insert the call before top_stmt. */ | |
3646 | gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt); | |
3647 | gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT); | |
3648 | ||
3649 | widen_mul_stats.divmod_calls_inserted++; | |
3650 | ||
3651 | /* Update all statements in stmts vector: | |
3652 | lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp> | |
3653 | lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */ | |
3654 | ||
3655 | for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i) | |
3656 | { | |
3657 | tree new_rhs; | |
3658 | ||
3659 | switch (gimple_assign_rhs_code (use_stmt)) | |
3660 | { | |
3661 | case TRUNC_DIV_EXPR: | |
3662 | new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res); | |
3663 | break; | |
3664 | ||
3665 | case TRUNC_MOD_EXPR: | |
3666 | new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res); | |
3667 | break; | |
3668 | ||
3669 | default: | |
3670 | gcc_unreachable (); | |
3671 | } | |
3672 | ||
3673 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); | |
3674 | gimple_assign_set_rhs_from_tree (&gsi, new_rhs); | |
3675 | update_stmt (use_stmt); | |
3676 | } | |
3677 | ||
3678 | return true; | |
3679 | } | |
e11a63e8 | 3680 | |
62be004c | 3681 | /* Find integer multiplications where the operands are extended from |
3682 | smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR | |
3683 | where appropriate. */ | |
3684 | ||
65b0537f | 3685 | namespace { |
3686 | ||
3687 | const pass_data pass_data_optimize_widening_mul = | |
3688 | { | |
3689 | GIMPLE_PASS, /* type */ | |
3690 | "widening_mul", /* name */ | |
3691 | OPTGROUP_NONE, /* optinfo_flags */ | |
8ed378fe | 3692 | TV_TREE_WIDEN_MUL, /* tv_id */ |
65b0537f | 3693 | PROP_ssa, /* properties_required */ |
3694 | 0, /* properties_provided */ | |
3695 | 0, /* properties_destroyed */ | |
3696 | 0, /* todo_flags_start */ | |
8b88439e | 3697 | TODO_update_ssa, /* todo_flags_finish */ |
65b0537f | 3698 | }; |
3699 | ||
3700 | class pass_optimize_widening_mul : public gimple_opt_pass | |
3701 | { | |
3702 | public: | |
3703 | pass_optimize_widening_mul (gcc::context *ctxt) | |
3704 | : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt) | |
3705 | {} | |
3706 | ||
3707 | /* opt_pass methods: */ | |
3708 | virtual bool gate (function *) | |
3709 | { | |
3710 | return flag_expensive_optimizations && optimize; | |
3711 | } | |
3712 | ||
3713 | virtual unsigned int execute (function *); | |
3714 | ||
3715 | }; // class pass_optimize_widening_mul | |
3716 | ||
ed306e55 | 3717 | /* Walker class to perform the transformation in reverse dominance order. */ |
3718 | ||
3719 | class math_opts_dom_walker : public dom_walker | |
62be004c | 3720 | { |
ed306e55 | 3721 | public: |
3722 | /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set | |
3723 | if walking modidifes the CFG. */ | |
62be004c | 3724 | |
ed306e55 | 3725 | math_opts_dom_walker (bool *cfg_changed_p) |
3726 | : dom_walker (CDI_DOMINATORS), m_last_result_set (), | |
3727 | m_cfg_changed_p (cfg_changed_p) {} | |
30c4e60d | 3728 | |
ed306e55 | 3729 | /* The actual actions performed in the walk. */ |
3730 | ||
3731 | virtual void after_dom_children (basic_block); | |
3732 | ||
3733 | /* Set of results of chains of multiply and add statement combinations that | |
3734 | were not transformed into FMAs because of active deferring. */ | |
3735 | hash_set<tree> m_last_result_set; | |
3736 | ||
3737 | /* Pointer to a flag of the user that needs to be set if CFG has been | |
3738 | modified. */ | |
3739 | bool *m_cfg_changed_p; | |
3740 | }; | |
3741 | ||
3742 | void | |
3743 | math_opts_dom_walker::after_dom_children (basic_block bb) | |
3744 | { | |
3745 | gimple_stmt_iterator gsi; | |
3746 | ||
3747 | fma_deferring_state fma_state (PARAM_VALUE (PARAM_AVOID_FMA_MAX_BITS) > 0); | |
3748 | ||
3749 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) | |
62be004c | 3750 | { |
ed306e55 | 3751 | gimple *stmt = gsi_stmt (gsi); |
3752 | enum tree_code code; | |
62be004c | 3753 | |
ed306e55 | 3754 | if (is_gimple_assign (stmt)) |
3755 | { | |
3756 | code = gimple_assign_rhs_code (stmt); | |
3757 | switch (code) | |
3758 | { | |
3759 | case MULT_EXPR: | |
3760 | if (!convert_mult_to_widen (stmt, &gsi) | |
3761 | && !convert_expand_mult_copysign (stmt, &gsi) | |
3762 | && convert_mult_to_fma (stmt, | |
3763 | gimple_assign_rhs1 (stmt), | |
3764 | gimple_assign_rhs2 (stmt), | |
3765 | &fma_state)) | |
3766 | { | |
3767 | gsi_remove (&gsi, true); | |
3768 | release_defs (stmt); | |
3769 | continue; | |
3770 | } | |
3771 | break; | |
3772 | ||
3773 | case PLUS_EXPR: | |
3774 | case MINUS_EXPR: | |
3775 | if (!convert_plusminus_to_widen (&gsi, stmt, code)) | |
3776 | match_uaddsub_overflow (&gsi, stmt, code); | |
3777 | break; | |
62be004c | 3778 | |
ed306e55 | 3779 | case TRUNC_MOD_EXPR: |
3780 | convert_to_divmod (as_a<gassign *> (stmt)); | |
3781 | break; | |
3782 | ||
3783 | default:; | |
3784 | } | |
3785 | } | |
3786 | else if (is_gimple_call (stmt)) | |
3787 | { | |
9c19fd8a | 3788 | switch (gimple_call_combined_fn (stmt)) |
b9be572e | 3789 | { |
9c19fd8a | 3790 | CASE_CFN_POW: |
3791 | if (gimple_call_lhs (stmt) | |
3792 | && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST | |
3793 | && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)), | |
3794 | &dconst2) | |
3795 | && convert_mult_to_fma (stmt, | |
3796 | gimple_call_arg (stmt, 0), | |
3797 | gimple_call_arg (stmt, 0), | |
3798 | &fma_state)) | |
b9be572e | 3799 | { |
9c19fd8a | 3800 | unlink_stmt_vdef (stmt); |
3801 | if (gsi_remove (&gsi, true) | |
3802 | && gimple_purge_dead_eh_edges (bb)) | |
3803 | *m_cfg_changed_p = true; | |
3804 | release_defs (stmt); | |
3805 | continue; | |
3806 | } | |
3807 | break; | |
b9be572e | 3808 | |
9c19fd8a | 3809 | case CFN_COND_MUL: |
3810 | if (convert_mult_to_fma (stmt, | |
3811 | gimple_call_arg (stmt, 1), | |
3812 | gimple_call_arg (stmt, 2), | |
3813 | &fma_state, | |
3814 | gimple_call_arg (stmt, 0))) | |
3815 | ||
3816 | { | |
3817 | gsi_remove (&gsi, true); | |
3818 | release_defs (stmt); | |
3819 | continue; | |
b9be572e | 3820 | } |
9c19fd8a | 3821 | break; |
3822 | ||
3823 | case CFN_LAST: | |
3824 | cancel_fma_deferring (&fma_state); | |
3825 | break; | |
3826 | ||
3827 | default: | |
3828 | break; | |
b9be572e | 3829 | } |
62be004c | 3830 | } |
ed306e55 | 3831 | gsi_next (&gsi); |
62be004c | 3832 | } |
ed306e55 | 3833 | if (fma_state.m_deferring_p |
3834 | && fma_state.m_initial_phi) | |
3835 | { | |
3836 | gcc_checking_assert (fma_state.m_last_result); | |
3837 | if (!last_fma_candidate_feeds_initial_phi (&fma_state, | |
3838 | &m_last_result_set)) | |
3839 | cancel_fma_deferring (&fma_state); | |
3840 | else | |
3841 | m_last_result_set.add (fma_state.m_last_result); | |
3842 | } | |
3843 | } | |
3844 | ||
3845 | ||
3846 | unsigned int | |
3847 | pass_optimize_widening_mul::execute (function *fun) | |
3848 | { | |
3849 | bool cfg_changed = false; | |
3850 | ||
3851 | memset (&widen_mul_stats, 0, sizeof (widen_mul_stats)); | |
3852 | calculate_dominance_info (CDI_DOMINATORS); | |
3853 | renumber_gimple_stmt_uids (); | |
3854 | ||
3855 | math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun)); | |
00f4f705 | 3856 | |
65b0537f | 3857 | statistics_counter_event (fun, "widening multiplications inserted", |
30c4e60d | 3858 | widen_mul_stats.widen_mults_inserted); |
65b0537f | 3859 | statistics_counter_event (fun, "widening maccs inserted", |
30c4e60d | 3860 | widen_mul_stats.maccs_inserted); |
65b0537f | 3861 | statistics_counter_event (fun, "fused multiply-adds inserted", |
30c4e60d | 3862 | widen_mul_stats.fmas_inserted); |
67f7b566 | 3863 | statistics_counter_event (fun, "divmod calls inserted", |
3864 | widen_mul_stats.divmod_calls_inserted); | |
30c4e60d | 3865 | |
15dbdc8f | 3866 | return cfg_changed ? TODO_cleanup_cfg : 0; |
62be004c | 3867 | } |
3868 | ||
cbe8bda8 | 3869 | } // anon namespace |
3870 | ||
3871 | gimple_opt_pass * | |
3872 | make_pass_optimize_widening_mul (gcc::context *ctxt) | |
3873 | { | |
3874 | return new pass_optimize_widening_mul (ctxt); | |
3875 | } |