]>
Commit | Line | Data |
---|---|---|
abacb398 | 1 | /* Global, SSA-based optimizations using mathematical identities. |
aad93da1 | 2 | Copyright (C) 2005-2017 Free Software Foundation, Inc. |
48e1416a | 3 | |
abacb398 | 4 | This file is part of GCC. |
48e1416a | 5 | |
abacb398 | 6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the | |
8c4c00c1 | 8 | Free Software Foundation; either version 3, or (at your option) any |
abacb398 | 9 | later version. |
48e1416a | 10 | |
abacb398 | 11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
48e1416a | 15 | |
abacb398 | 16 | You should have received a copy of the GNU General Public License |
8c4c00c1 | 17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
abacb398 | 19 | |
20 | /* Currently, the only mini-pass in this file tries to CSE reciprocal | |
21 | operations. These are common in sequences such as this one: | |
22 | ||
23 | modulus = sqrt(x*x + y*y + z*z); | |
24 | x = x / modulus; | |
25 | y = y / modulus; | |
26 | z = z / modulus; | |
27 | ||
28 | that can be optimized to | |
29 | ||
30 | modulus = sqrt(x*x + y*y + z*z); | |
31 | rmodulus = 1.0 / modulus; | |
32 | x = x * rmodulus; | |
33 | y = y * rmodulus; | |
34 | z = z * rmodulus; | |
35 | ||
36 | We do this for loop invariant divisors, and with this pass whenever | |
ac70caad | 37 | we notice that a division has the same divisor multiple times. |
38 | ||
39 | Of course, like in PRE, we don't insert a division if a dominator | |
40 | already has one. However, this cannot be done as an extension of | |
41 | PRE for several reasons. | |
42 | ||
43 | First of all, with some experiments it was found out that the | |
44 | transformation is not always useful if there are only two divisions | |
24794e79 | 45 | by the same divisor. This is probably because modern processors |
ac70caad | 46 | can pipeline the divisions; on older, in-order processors it should |
47 | still be effective to optimize two divisions by the same number. | |
48 | We make this a param, and it shall be called N in the remainder of | |
49 | this comment. | |
50 | ||
51 | Second, if trapping math is active, we have less freedom on where | |
52 | to insert divisions: we can only do so in basic blocks that already | |
53 | contain one. (If divisions don't trap, instead, we can insert | |
54 | divisions elsewhere, which will be in blocks that are common dominators | |
55 | of those that have the division). | |
56 | ||
57 | We really don't want to compute the reciprocal unless a division will | |
58 | be found. To do this, we won't insert the division in a basic block | |
59 | that has less than N divisions *post-dominating* it. | |
60 | ||
61 | The algorithm constructs a subset of the dominator tree, holding the | |
62 | blocks containing the divisions and the common dominators to them, | |
63 | and walk it twice. The first walk is in post-order, and it annotates | |
64 | each block with the number of divisions that post-dominate it: this | |
65 | gives information on where divisions can be inserted profitably. | |
66 | The second walk is in pre-order, and it inserts divisions as explained | |
67 | above, and replaces divisions by multiplications. | |
68 | ||
69 | In the best case, the cost of the pass is O(n_statements). In the | |
70 | worst-case, the cost is due to creating the dominator tree subset, | |
71 | with a cost of O(n_basic_blocks ^ 2); however this can only happen | |
72 | for n_statements / n_basic_blocks statements. So, the amortized cost | |
73 | of creating the dominator tree subset is O(n_basic_blocks) and the | |
74 | worst-case cost of the pass is O(n_statements * n_basic_blocks). | |
75 | ||
76 | More practically, the cost will be small because there are few | |
77 | divisions, and they tend to be in the same basic block, so insert_bb | |
78 | is called very few times. | |
79 | ||
80 | If we did this using domwalk.c, an efficient implementation would have | |
81 | to work on all the variables in a single pass, because we could not | |
82 | work on just a subset of the dominator tree, as we do now, and the | |
83 | cost would also be something like O(n_statements * n_basic_blocks). | |
84 | The data structures would be more complex in order to work on all the | |
85 | variables in a single pass. */ | |
abacb398 | 86 | |
87 | #include "config.h" | |
88 | #include "system.h" | |
89 | #include "coretypes.h" | |
9ef16211 | 90 | #include "backend.h" |
7c29e30e | 91 | #include "target.h" |
92 | #include "rtl.h" | |
9ef16211 | 93 | #include "tree.h" |
94 | #include "gimple.h" | |
7c29e30e | 95 | #include "predict.h" |
96 | #include "alloc-pool.h" | |
97 | #include "tree-pass.h" | |
9ef16211 | 98 | #include "ssa.h" |
7c29e30e | 99 | #include "optabs-tree.h" |
100 | #include "gimple-pretty-print.h" | |
b20a8bb4 | 101 | #include "alias.h" |
b20a8bb4 | 102 | #include "fold-const.h" |
bc61cadb | 103 | #include "gimple-fold.h" |
dcf1a1ec | 104 | #include "gimple-iterator.h" |
470d5bb5 | 105 | #include "gimplify.h" |
e795d6e1 | 106 | #include "gimplify-me.h" |
9ed99284 | 107 | #include "stor-layout.h" |
073c1fd5 | 108 | #include "tree-cfg.h" |
073c1fd5 | 109 | #include "tree-dfa.h" |
69ee5dbb | 110 | #include "tree-ssa.h" |
f7715905 | 111 | #include "builtins.h" |
c3206272 | 112 | #include "params.h" |
4cfd27a5 | 113 | #include "internal-fn.h" |
fa0793ad | 114 | #include "case-cfn-macros.h" |
67f7b566 | 115 | #include "optabs-libfuncs.h" |
116 | #include "tree-eh.h" | |
117 | #include "targhooks.h" | |
ac70caad | 118 | |
119 | /* This structure represents one basic block that either computes a | |
120 | division, or is a common dominator for basic block that compute a | |
121 | division. */ | |
122 | struct occurrence { | |
123 | /* The basic block represented by this structure. */ | |
124 | basic_block bb; | |
125 | ||
126 | /* If non-NULL, the SSA_NAME holding the definition for a reciprocal | |
127 | inserted in BB. */ | |
128 | tree recip_def; | |
129 | ||
75a70cf9 | 130 | /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that |
ac70caad | 131 | was inserted in BB. */ |
42acab1c | 132 | gimple *recip_def_stmt; |
ac70caad | 133 | |
134 | /* Pointer to a list of "struct occurrence"s for blocks dominated | |
135 | by BB. */ | |
136 | struct occurrence *children; | |
137 | ||
138 | /* Pointer to the next "struct occurrence"s in the list of blocks | |
139 | sharing a common dominator. */ | |
140 | struct occurrence *next; | |
141 | ||
142 | /* The number of divisions that are in BB before compute_merit. The | |
143 | number of divisions that are in BB or post-dominate it after | |
144 | compute_merit. */ | |
145 | int num_divisions; | |
146 | ||
147 | /* True if the basic block has a division, false if it is a common | |
148 | dominator for basic blocks that do. If it is false and trapping | |
149 | math is active, BB is not a candidate for inserting a reciprocal. */ | |
150 | bool bb_has_division; | |
151 | }; | |
152 | ||
30c4e60d | 153 | static struct |
154 | { | |
155 | /* Number of 1.0/X ops inserted. */ | |
156 | int rdivs_inserted; | |
157 | ||
158 | /* Number of 1.0/FUNC ops inserted. */ | |
159 | int rfuncs_inserted; | |
160 | } reciprocal_stats; | |
161 | ||
162 | static struct | |
163 | { | |
164 | /* Number of cexpi calls inserted. */ | |
165 | int inserted; | |
166 | } sincos_stats; | |
167 | ||
168 | static struct | |
169 | { | |
470d5bb5 | 170 | /* Number of hand-written 16-bit nop / bswaps found. */ |
f811051b | 171 | int found_16bit; |
172 | ||
470d5bb5 | 173 | /* Number of hand-written 32-bit nop / bswaps found. */ |
30c4e60d | 174 | int found_32bit; |
175 | ||
470d5bb5 | 176 | /* Number of hand-written 64-bit nop / bswaps found. */ |
30c4e60d | 177 | int found_64bit; |
470d5bb5 | 178 | } nop_stats, bswap_stats; |
30c4e60d | 179 | |
180 | static struct | |
181 | { | |
182 | /* Number of widening multiplication ops inserted. */ | |
183 | int widen_mults_inserted; | |
184 | ||
185 | /* Number of integer multiply-and-accumulate ops inserted. */ | |
186 | int maccs_inserted; | |
187 | ||
188 | /* Number of fp fused multiply-add ops inserted. */ | |
189 | int fmas_inserted; | |
67f7b566 | 190 | |
191 | /* Number of divmod calls inserted. */ | |
192 | int divmod_calls_inserted; | |
30c4e60d | 193 | } widen_mul_stats; |
ac70caad | 194 | |
195 | /* The instance of "struct occurrence" representing the highest | |
196 | interesting block in the dominator tree. */ | |
197 | static struct occurrence *occ_head; | |
198 | ||
199 | /* Allocation pool for getting instances of "struct occurrence". */ | |
e16712b1 | 200 | static object_allocator<occurrence> *occ_pool; |
ac70caad | 201 | |
202 | ||
203 | ||
204 | /* Allocate and return a new struct occurrence for basic block BB, and | |
205 | whose children list is headed by CHILDREN. */ | |
206 | static struct occurrence * | |
207 | occ_new (basic_block bb, struct occurrence *children) | |
abacb398 | 208 | { |
ac70caad | 209 | struct occurrence *occ; |
210 | ||
d8e7268c | 211 | bb->aux = occ = occ_pool->allocate (); |
ac70caad | 212 | memset (occ, 0, sizeof (struct occurrence)); |
213 | ||
214 | occ->bb = bb; | |
215 | occ->children = children; | |
216 | return occ; | |
abacb398 | 217 | } |
218 | ||
ac70caad | 219 | |
220 | /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a | |
221 | list of "struct occurrence"s, one per basic block, having IDOM as | |
222 | their common dominator. | |
223 | ||
224 | We try to insert NEW_OCC as deep as possible in the tree, and we also | |
225 | insert any other block that is a common dominator for BB and one | |
226 | block already in the tree. */ | |
227 | ||
228 | static void | |
229 | insert_bb (struct occurrence *new_occ, basic_block idom, | |
230 | struct occurrence **p_head) | |
9e583fac | 231 | { |
ac70caad | 232 | struct occurrence *occ, **p_occ; |
9e583fac | 233 | |
ac70caad | 234 | for (p_occ = p_head; (occ = *p_occ) != NULL; ) |
235 | { | |
236 | basic_block bb = new_occ->bb, occ_bb = occ->bb; | |
237 | basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb); | |
238 | if (dom == bb) | |
239 | { | |
240 | /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC | |
241 | from its list. */ | |
242 | *p_occ = occ->next; | |
243 | occ->next = new_occ->children; | |
244 | new_occ->children = occ; | |
245 | ||
246 | /* Try the next block (it may as well be dominated by BB). */ | |
247 | } | |
248 | ||
249 | else if (dom == occ_bb) | |
250 | { | |
251 | /* OCC_BB dominates BB. Tail recurse to look deeper. */ | |
252 | insert_bb (new_occ, dom, &occ->children); | |
253 | return; | |
254 | } | |
255 | ||
256 | else if (dom != idom) | |
257 | { | |
258 | gcc_assert (!dom->aux); | |
259 | ||
260 | /* There is a dominator between IDOM and BB, add it and make | |
261 | two children out of NEW_OCC and OCC. First, remove OCC from | |
262 | its list. */ | |
263 | *p_occ = occ->next; | |
264 | new_occ->next = occ; | |
265 | occ->next = NULL; | |
266 | ||
267 | /* None of the previous blocks has DOM as a dominator: if we tail | |
268 | recursed, we would reexamine them uselessly. Just switch BB with | |
269 | DOM, and go on looking for blocks dominated by DOM. */ | |
270 | new_occ = occ_new (dom, new_occ); | |
271 | } | |
272 | ||
273 | else | |
274 | { | |
275 | /* Nothing special, go on with the next element. */ | |
276 | p_occ = &occ->next; | |
277 | } | |
278 | } | |
279 | ||
280 | /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */ | |
281 | new_occ->next = *p_head; | |
282 | *p_head = new_occ; | |
283 | } | |
284 | ||
285 | /* Register that we found a division in BB. */ | |
286 | ||
287 | static inline void | |
288 | register_division_in (basic_block bb) | |
289 | { | |
290 | struct occurrence *occ; | |
291 | ||
292 | occ = (struct occurrence *) bb->aux; | |
293 | if (!occ) | |
294 | { | |
295 | occ = occ_new (bb, NULL); | |
34154e27 | 296 | insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head); |
ac70caad | 297 | } |
298 | ||
299 | occ->bb_has_division = true; | |
300 | occ->num_divisions++; | |
301 | } | |
302 | ||
303 | ||
304 | /* Compute the number of divisions that postdominate each block in OCC and | |
305 | its children. */ | |
abacb398 | 306 | |
abacb398 | 307 | static void |
ac70caad | 308 | compute_merit (struct occurrence *occ) |
abacb398 | 309 | { |
ac70caad | 310 | struct occurrence *occ_child; |
311 | basic_block dom = occ->bb; | |
abacb398 | 312 | |
ac70caad | 313 | for (occ_child = occ->children; occ_child; occ_child = occ_child->next) |
abacb398 | 314 | { |
ac70caad | 315 | basic_block bb; |
316 | if (occ_child->children) | |
317 | compute_merit (occ_child); | |
318 | ||
319 | if (flag_exceptions) | |
320 | bb = single_noncomplex_succ (dom); | |
321 | else | |
322 | bb = dom; | |
323 | ||
324 | if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb)) | |
325 | occ->num_divisions += occ_child->num_divisions; | |
326 | } | |
327 | } | |
328 | ||
329 | ||
330 | /* Return whether USE_STMT is a floating-point division by DEF. */ | |
331 | static inline bool | |
42acab1c | 332 | is_division_by (gimple *use_stmt, tree def) |
ac70caad | 333 | { |
75a70cf9 | 334 | return is_gimple_assign (use_stmt) |
335 | && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR | |
336 | && gimple_assign_rhs2 (use_stmt) == def | |
119368d7 | 337 | /* Do not recognize x / x as valid division, as we are getting |
338 | confused later by replacing all immediate uses x in such | |
339 | a stmt. */ | |
75a70cf9 | 340 | && gimple_assign_rhs1 (use_stmt) != def; |
ac70caad | 341 | } |
342 | ||
343 | /* Walk the subset of the dominator tree rooted at OCC, setting the | |
344 | RECIP_DEF field to a definition of 1.0 / DEF that can be used in | |
345 | the given basic block. The field may be left NULL, of course, | |
346 | if it is not possible or profitable to do the optimization. | |
347 | ||
348 | DEF_BSI is an iterator pointing at the statement defining DEF. | |
349 | If RECIP_DEF is set, a dominator already has a computation that can | |
350 | be used. */ | |
351 | ||
352 | static void | |
75a70cf9 | 353 | insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, |
ac70caad | 354 | tree def, tree recip_def, int threshold) |
355 | { | |
75a70cf9 | 356 | tree type; |
1a91d914 | 357 | gassign *new_stmt; |
75a70cf9 | 358 | gimple_stmt_iterator gsi; |
ac70caad | 359 | struct occurrence *occ_child; |
360 | ||
361 | if (!recip_def | |
362 | && (occ->bb_has_division || !flag_trapping_math) | |
363 | && occ->num_divisions >= threshold) | |
364 | { | |
365 | /* Make a variable with the replacement and substitute it. */ | |
366 | type = TREE_TYPE (def); | |
072f7ab1 | 367 | recip_def = create_tmp_reg (type, "reciptmp"); |
e9cf809e | 368 | new_stmt = gimple_build_assign (recip_def, RDIV_EXPR, |
369 | build_one_cst (type), def); | |
48e1416a | 370 | |
ac70caad | 371 | if (occ->bb_has_division) |
372 | { | |
373 | /* Case 1: insert before an existing division. */ | |
75a70cf9 | 374 | gsi = gsi_after_labels (occ->bb); |
375 | while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def)) | |
376 | gsi_next (&gsi); | |
ac70caad | 377 | |
75a70cf9 | 378 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |
ac70caad | 379 | } |
75a70cf9 | 380 | else if (def_gsi && occ->bb == def_gsi->bb) |
685b24f5 | 381 | { |
ac70caad | 382 | /* Case 2: insert right after the definition. Note that this will |
383 | never happen if the definition statement can throw, because in | |
384 | that case the sole successor of the statement's basic block will | |
385 | dominate all the uses as well. */ | |
75a70cf9 | 386 | gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT); |
685b24f5 | 387 | } |
ac70caad | 388 | else |
389 | { | |
390 | /* Case 3: insert in a basic block not containing defs/uses. */ | |
75a70cf9 | 391 | gsi = gsi_after_labels (occ->bb); |
392 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); | |
ac70caad | 393 | } |
394 | ||
30c4e60d | 395 | reciprocal_stats.rdivs_inserted++; |
396 | ||
ac70caad | 397 | occ->recip_def_stmt = new_stmt; |
abacb398 | 398 | } |
399 | ||
ac70caad | 400 | occ->recip_def = recip_def; |
401 | for (occ_child = occ->children; occ_child; occ_child = occ_child->next) | |
75a70cf9 | 402 | insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold); |
ac70caad | 403 | } |
404 | ||
405 | ||
406 | /* Replace the division at USE_P with a multiplication by the reciprocal, if | |
407 | possible. */ | |
408 | ||
409 | static inline void | |
410 | replace_reciprocal (use_operand_p use_p) | |
411 | { | |
42acab1c | 412 | gimple *use_stmt = USE_STMT (use_p); |
75a70cf9 | 413 | basic_block bb = gimple_bb (use_stmt); |
ac70caad | 414 | struct occurrence *occ = (struct occurrence *) bb->aux; |
415 | ||
0bfd8d5c | 416 | if (optimize_bb_for_speed_p (bb) |
417 | && occ->recip_def && use_stmt != occ->recip_def_stmt) | |
ac70caad | 418 | { |
50aacf4c | 419 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
75a70cf9 | 420 | gimple_assign_set_rhs_code (use_stmt, MULT_EXPR); |
ac70caad | 421 | SET_USE (use_p, occ->recip_def); |
50aacf4c | 422 | fold_stmt_inplace (&gsi); |
ac70caad | 423 | update_stmt (use_stmt); |
424 | } | |
425 | } | |
426 | ||
427 | ||
428 | /* Free OCC and return one more "struct occurrence" to be freed. */ | |
429 | ||
430 | static struct occurrence * | |
431 | free_bb (struct occurrence *occ) | |
432 | { | |
433 | struct occurrence *child, *next; | |
434 | ||
435 | /* First get the two pointers hanging off OCC. */ | |
436 | next = occ->next; | |
437 | child = occ->children; | |
438 | occ->bb->aux = NULL; | |
d8e7268c | 439 | occ_pool->remove (occ); |
ac70caad | 440 | |
441 | /* Now ensure that we don't recurse unless it is necessary. */ | |
442 | if (!child) | |
443 | return next; | |
9e583fac | 444 | else |
ac70caad | 445 | { |
446 | while (next) | |
447 | next = free_bb (next); | |
448 | ||
449 | return child; | |
450 | } | |
451 | } | |
452 | ||
453 | ||
454 | /* Look for floating-point divisions among DEF's uses, and try to | |
455 | replace them by multiplications with the reciprocal. Add | |
456 | as many statements computing the reciprocal as needed. | |
457 | ||
458 | DEF must be a GIMPLE register of a floating-point type. */ | |
459 | ||
460 | static void | |
75a70cf9 | 461 | execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def) |
ac70caad | 462 | { |
463 | use_operand_p use_p; | |
464 | imm_use_iterator use_iter; | |
465 | struct occurrence *occ; | |
466 | int count = 0, threshold; | |
abacb398 | 467 | |
ac70caad | 468 | gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def)); |
469 | ||
470 | FOR_EACH_IMM_USE_FAST (use_p, use_iter, def) | |
abacb398 | 471 | { |
42acab1c | 472 | gimple *use_stmt = USE_STMT (use_p); |
ac70caad | 473 | if (is_division_by (use_stmt, def)) |
abacb398 | 474 | { |
75a70cf9 | 475 | register_division_in (gimple_bb (use_stmt)); |
ac70caad | 476 | count++; |
abacb398 | 477 | } |
478 | } | |
48e1416a | 479 | |
ac70caad | 480 | /* Do the expensive part only if we can hope to optimize something. */ |
481 | threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def))); | |
482 | if (count >= threshold) | |
483 | { | |
42acab1c | 484 | gimple *use_stmt; |
ac70caad | 485 | for (occ = occ_head; occ; occ = occ->next) |
486 | { | |
487 | compute_merit (occ); | |
75a70cf9 | 488 | insert_reciprocals (def_gsi, occ, def, NULL, threshold); |
ac70caad | 489 | } |
490 | ||
09aca5bc | 491 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def) |
ac70caad | 492 | { |
ac70caad | 493 | if (is_division_by (use_stmt, def)) |
09aca5bc | 494 | { |
495 | FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) | |
496 | replace_reciprocal (use_p); | |
497 | } | |
ac70caad | 498 | } |
499 | } | |
500 | ||
501 | for (occ = occ_head; occ; ) | |
502 | occ = free_bb (occ); | |
503 | ||
504 | occ_head = NULL; | |
abacb398 | 505 | } |
506 | ||
4cfd27a5 | 507 | /* Return an internal function that implements the reciprocal of CALL, |
508 | or IFN_LAST if there is no such function that the target supports. */ | |
509 | ||
510 | internal_fn | |
511 | internal_fn_reciprocal (gcall *call) | |
512 | { | |
513 | internal_fn ifn; | |
514 | ||
515 | switch (gimple_call_combined_fn (call)) | |
516 | { | |
517 | CASE_CFN_SQRT: | |
518 | ifn = IFN_RSQRT; | |
519 | break; | |
520 | ||
521 | default: | |
522 | return IFN_LAST; | |
523 | } | |
524 | ||
525 | tree_pair types = direct_internal_fn_types (ifn, call); | |
526 | if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED)) | |
527 | return IFN_LAST; | |
528 | ||
529 | return ifn; | |
530 | } | |
531 | ||
ac70caad | 532 | /* Go through all the floating-point SSA_NAMEs, and call |
533 | execute_cse_reciprocals_1 on each of them. */ | |
65b0537f | 534 | namespace { |
535 | ||
536 | const pass_data pass_data_cse_reciprocals = | |
537 | { | |
538 | GIMPLE_PASS, /* type */ | |
539 | "recip", /* name */ | |
540 | OPTGROUP_NONE, /* optinfo_flags */ | |
65b0537f | 541 | TV_NONE, /* tv_id */ |
542 | PROP_ssa, /* properties_required */ | |
543 | 0, /* properties_provided */ | |
544 | 0, /* properties_destroyed */ | |
545 | 0, /* todo_flags_start */ | |
8b88439e | 546 | TODO_update_ssa, /* todo_flags_finish */ |
65b0537f | 547 | }; |
548 | ||
549 | class pass_cse_reciprocals : public gimple_opt_pass | |
550 | { | |
551 | public: | |
552 | pass_cse_reciprocals (gcc::context *ctxt) | |
553 | : gimple_opt_pass (pass_data_cse_reciprocals, ctxt) | |
554 | {} | |
555 | ||
556 | /* opt_pass methods: */ | |
557 | virtual bool gate (function *) { return optimize && flag_reciprocal_math; } | |
558 | virtual unsigned int execute (function *); | |
559 | ||
560 | }; // class pass_cse_reciprocals | |
561 | ||
562 | unsigned int | |
563 | pass_cse_reciprocals::execute (function *fun) | |
abacb398 | 564 | { |
565 | basic_block bb; | |
51b60a11 | 566 | tree arg; |
685b24f5 | 567 | |
1dc6c44d | 568 | occ_pool = new object_allocator<occurrence> ("dominators for recip"); |
685b24f5 | 569 | |
30c4e60d | 570 | memset (&reciprocal_stats, 0, sizeof (reciprocal_stats)); |
c136ae61 | 571 | calculate_dominance_info (CDI_DOMINATORS); |
572 | calculate_dominance_info (CDI_POST_DOMINATORS); | |
ac70caad | 573 | |
382ecba7 | 574 | if (flag_checking) |
575 | FOR_EACH_BB_FN (bb, fun) | |
576 | gcc_assert (!bb->aux); | |
ac70caad | 577 | |
65b0537f | 578 | for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg)) |
c6dfe037 | 579 | if (FLOAT_TYPE_P (TREE_TYPE (arg)) |
ac70caad | 580 | && is_gimple_reg (arg)) |
c6dfe037 | 581 | { |
65b0537f | 582 | tree name = ssa_default_def (fun, arg); |
c6dfe037 | 583 | if (name) |
584 | execute_cse_reciprocals_1 (NULL, name); | |
585 | } | |
51b60a11 | 586 | |
65b0537f | 587 | FOR_EACH_BB_FN (bb, fun) |
abacb398 | 588 | { |
75a70cf9 | 589 | tree def; |
abacb398 | 590 | |
1a91d914 | 591 | for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); |
592 | gsi_next (&gsi)) | |
abacb398 | 593 | { |
1a91d914 | 594 | gphi *phi = gsi.phi (); |
abacb398 | 595 | def = PHI_RESULT (phi); |
7c782c9b | 596 | if (! virtual_operand_p (def) |
597 | && FLOAT_TYPE_P (TREE_TYPE (def))) | |
ac70caad | 598 | execute_cse_reciprocals_1 (NULL, def); |
abacb398 | 599 | } |
600 | ||
1a91d914 | 601 | for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi); |
602 | gsi_next (&gsi)) | |
abacb398 | 603 | { |
42acab1c | 604 | gimple *stmt = gsi_stmt (gsi); |
a0315874 | 605 | |
75a70cf9 | 606 | if (gimple_has_lhs (stmt) |
abacb398 | 607 | && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL |
608 | && FLOAT_TYPE_P (TREE_TYPE (def)) | |
51b60a11 | 609 | && TREE_CODE (def) == SSA_NAME) |
75a70cf9 | 610 | execute_cse_reciprocals_1 (&gsi, def); |
abacb398 | 611 | } |
e174638f | 612 | |
0bfd8d5c | 613 | if (optimize_bb_for_size_p (bb)) |
614 | continue; | |
615 | ||
e174638f | 616 | /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */ |
1a91d914 | 617 | for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi); |
618 | gsi_next (&gsi)) | |
e174638f | 619 | { |
42acab1c | 620 | gimple *stmt = gsi_stmt (gsi); |
e174638f | 621 | |
75a70cf9 | 622 | if (is_gimple_assign (stmt) |
623 | && gimple_assign_rhs_code (stmt) == RDIV_EXPR) | |
e174638f | 624 | { |
75a70cf9 | 625 | tree arg1 = gimple_assign_rhs2 (stmt); |
42acab1c | 626 | gimple *stmt1; |
2cd360b6 | 627 | |
628 | if (TREE_CODE (arg1) != SSA_NAME) | |
629 | continue; | |
630 | ||
631 | stmt1 = SSA_NAME_DEF_STMT (arg1); | |
e174638f | 632 | |
75a70cf9 | 633 | if (is_gimple_call (stmt1) |
4cfd27a5 | 634 | && gimple_call_lhs (stmt1)) |
e174638f | 635 | { |
851c1b0c | 636 | bool fail; |
774b1cdd | 637 | imm_use_iterator ui; |
638 | use_operand_p use_p; | |
4cfd27a5 | 639 | tree fndecl = NULL_TREE; |
e174638f | 640 | |
4cfd27a5 | 641 | gcall *call = as_a <gcall *> (stmt1); |
642 | internal_fn ifn = internal_fn_reciprocal (call); | |
643 | if (ifn == IFN_LAST) | |
644 | { | |
645 | fndecl = gimple_call_fndecl (call); | |
646 | if (!fndecl | |
647 | || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_MD) | |
648 | continue; | |
649 | fndecl = targetm.builtin_reciprocal (fndecl); | |
650 | if (!fndecl) | |
651 | continue; | |
652 | } | |
e174638f | 653 | |
774b1cdd | 654 | /* Check that all uses of the SSA name are divisions, |
655 | otherwise replacing the defining statement will do | |
656 | the wrong thing. */ | |
657 | fail = false; | |
658 | FOR_EACH_IMM_USE_FAST (use_p, ui, arg1) | |
659 | { | |
42acab1c | 660 | gimple *stmt2 = USE_STMT (use_p); |
774b1cdd | 661 | if (is_gimple_debug (stmt2)) |
662 | continue; | |
663 | if (!is_gimple_assign (stmt2) | |
664 | || gimple_assign_rhs_code (stmt2) != RDIV_EXPR | |
665 | || gimple_assign_rhs1 (stmt2) == arg1 | |
666 | || gimple_assign_rhs2 (stmt2) != arg1) | |
667 | { | |
668 | fail = true; | |
669 | break; | |
670 | } | |
671 | } | |
672 | if (fail) | |
673 | continue; | |
674 | ||
4cfd27a5 | 675 | gimple_replace_ssa_lhs (call, arg1); |
676 | if (gimple_call_internal_p (call) != (ifn != IFN_LAST)) | |
851c1b0c | 677 | { |
678 | auto_vec<tree, 4> args; | |
679 | for (unsigned int i = 0; | |
4cfd27a5 | 680 | i < gimple_call_num_args (call); i++) |
681 | args.safe_push (gimple_call_arg (call, i)); | |
682 | gcall *stmt2; | |
683 | if (ifn == IFN_LAST) | |
684 | stmt2 = gimple_build_call_vec (fndecl, args); | |
685 | else | |
686 | stmt2 = gimple_build_call_internal_vec (ifn, args); | |
851c1b0c | 687 | gimple_call_set_lhs (stmt2, arg1); |
4cfd27a5 | 688 | if (gimple_vdef (call)) |
851c1b0c | 689 | { |
4cfd27a5 | 690 | gimple_set_vdef (stmt2, gimple_vdef (call)); |
851c1b0c | 691 | SSA_NAME_DEF_STMT (gimple_vdef (stmt2)) = stmt2; |
692 | } | |
989f02dc | 693 | gimple_call_set_nothrow (stmt2, |
694 | gimple_call_nothrow_p (call)); | |
4cfd27a5 | 695 | gimple_set_vuse (stmt2, gimple_vuse (call)); |
696 | gimple_stmt_iterator gsi2 = gsi_for_stmt (call); | |
851c1b0c | 697 | gsi_replace (&gsi2, stmt2, true); |
698 | } | |
699 | else | |
700 | { | |
4cfd27a5 | 701 | if (ifn == IFN_LAST) |
702 | gimple_call_set_fndecl (call, fndecl); | |
703 | else | |
704 | gimple_call_set_internal_fn (call, ifn); | |
705 | update_stmt (call); | |
851c1b0c | 706 | } |
30c4e60d | 707 | reciprocal_stats.rfuncs_inserted++; |
e174638f | 708 | |
774b1cdd | 709 | FOR_EACH_IMM_USE_STMT (stmt, ui, arg1) |
710 | { | |
50aacf4c | 711 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
774b1cdd | 712 | gimple_assign_set_rhs_code (stmt, MULT_EXPR); |
50aacf4c | 713 | fold_stmt_inplace (&gsi); |
774b1cdd | 714 | update_stmt (stmt); |
715 | } | |
e174638f | 716 | } |
717 | } | |
718 | } | |
abacb398 | 719 | } |
685b24f5 | 720 | |
65b0537f | 721 | statistics_counter_event (fun, "reciprocal divs inserted", |
30c4e60d | 722 | reciprocal_stats.rdivs_inserted); |
65b0537f | 723 | statistics_counter_event (fun, "reciprocal functions inserted", |
30c4e60d | 724 | reciprocal_stats.rfuncs_inserted); |
725 | ||
c136ae61 | 726 | free_dominance_info (CDI_DOMINATORS); |
727 | free_dominance_info (CDI_POST_DOMINATORS); | |
d8e7268c | 728 | delete occ_pool; |
2a1990e9 | 729 | return 0; |
abacb398 | 730 | } |
731 | ||
cbe8bda8 | 732 | } // anon namespace |
733 | ||
734 | gimple_opt_pass * | |
735 | make_pass_cse_reciprocals (gcc::context *ctxt) | |
736 | { | |
737 | return new pass_cse_reciprocals (ctxt); | |
738 | } | |
739 | ||
0d424440 | 740 | /* Records an occurrence at statement USE_STMT in the vector of trees |
a0315874 | 741 | STMTS if it is dominated by *TOP_BB or dominates it or this basic block |
0d424440 | 742 | is not yet initialized. Returns true if the occurrence was pushed on |
a0315874 | 743 | the vector. Adjusts *TOP_BB to be the basic block dominating all |
744 | statements in the vector. */ | |
745 | ||
746 | static bool | |
42acab1c | 747 | maybe_record_sincos (vec<gimple *> *stmts, |
748 | basic_block *top_bb, gimple *use_stmt) | |
a0315874 | 749 | { |
75a70cf9 | 750 | basic_block use_bb = gimple_bb (use_stmt); |
a0315874 | 751 | if (*top_bb |
752 | && (*top_bb == use_bb | |
753 | || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb))) | |
f1f41a6c | 754 | stmts->safe_push (use_stmt); |
a0315874 | 755 | else if (!*top_bb |
756 | || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb)) | |
757 | { | |
f1f41a6c | 758 | stmts->safe_push (use_stmt); |
a0315874 | 759 | *top_bb = use_bb; |
760 | } | |
761 | else | |
762 | return false; | |
763 | ||
764 | return true; | |
765 | } | |
766 | ||
767 | /* Look for sin, cos and cexpi calls with the same argument NAME and | |
768 | create a single call to cexpi CSEing the result in this case. | |
769 | We first walk over all immediate uses of the argument collecting | |
770 | statements that we can CSE in a vector and in a second pass replace | |
771 | the statement rhs with a REALPART or IMAGPART expression on the | |
772 | result of the cexpi call we insert before the use statement that | |
773 | dominates all other candidates. */ | |
774 | ||
4c80086d | 775 | static bool |
a0315874 | 776 | execute_cse_sincos_1 (tree name) |
777 | { | |
75a70cf9 | 778 | gimple_stmt_iterator gsi; |
a0315874 | 779 | imm_use_iterator use_iter; |
75a70cf9 | 780 | tree fndecl, res, type; |
42acab1c | 781 | gimple *def_stmt, *use_stmt, *stmt; |
a0315874 | 782 | int seen_cos = 0, seen_sin = 0, seen_cexpi = 0; |
42acab1c | 783 | auto_vec<gimple *> stmts; |
a0315874 | 784 | basic_block top_bb = NULL; |
785 | int i; | |
4c80086d | 786 | bool cfg_changed = false; |
a0315874 | 787 | |
788 | type = TREE_TYPE (name); | |
789 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name) | |
790 | { | |
75a70cf9 | 791 | if (gimple_code (use_stmt) != GIMPLE_CALL |
fa0793ad | 792 | || !gimple_call_lhs (use_stmt)) |
a0315874 | 793 | continue; |
794 | ||
fa0793ad | 795 | switch (gimple_call_combined_fn (use_stmt)) |
a0315874 | 796 | { |
fa0793ad | 797 | CASE_CFN_COS: |
a0315874 | 798 | seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; |
799 | break; | |
800 | ||
fa0793ad | 801 | CASE_CFN_SIN: |
a0315874 | 802 | seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; |
803 | break; | |
804 | ||
fa0793ad | 805 | CASE_CFN_CEXPI: |
a0315874 | 806 | seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; |
807 | break; | |
808 | ||
809 | default:; | |
810 | } | |
811 | } | |
812 | ||
813 | if (seen_cos + seen_sin + seen_cexpi <= 1) | |
6702d09a | 814 | return false; |
a0315874 | 815 | |
816 | /* Simply insert cexpi at the beginning of top_bb but not earlier than | |
817 | the name def statement. */ | |
818 | fndecl = mathfn_built_in (type, BUILT_IN_CEXPI); | |
819 | if (!fndecl) | |
4c80086d | 820 | return false; |
75a70cf9 | 821 | stmt = gimple_build_call (fndecl, 1, name); |
03d37e4e | 822 | res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp"); |
75a70cf9 | 823 | gimple_call_set_lhs (stmt, res); |
824 | ||
a0315874 | 825 | def_stmt = SSA_NAME_DEF_STMT (name); |
8090c12d | 826 | if (!SSA_NAME_IS_DEFAULT_DEF (name) |
75a70cf9 | 827 | && gimple_code (def_stmt) != GIMPLE_PHI |
828 | && gimple_bb (def_stmt) == top_bb) | |
a0315874 | 829 | { |
75a70cf9 | 830 | gsi = gsi_for_stmt (def_stmt); |
831 | gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); | |
a0315874 | 832 | } |
833 | else | |
834 | { | |
75a70cf9 | 835 | gsi = gsi_after_labels (top_bb); |
836 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
a0315874 | 837 | } |
30c4e60d | 838 | sincos_stats.inserted++; |
a0315874 | 839 | |
840 | /* And adjust the recorded old call sites. */ | |
f1f41a6c | 841 | for (i = 0; stmts.iterate (i, &use_stmt); ++i) |
a0315874 | 842 | { |
75a70cf9 | 843 | tree rhs = NULL; |
75a70cf9 | 844 | |
fa0793ad | 845 | switch (gimple_call_combined_fn (use_stmt)) |
a0315874 | 846 | { |
fa0793ad | 847 | CASE_CFN_COS: |
75a70cf9 | 848 | rhs = fold_build1 (REALPART_EXPR, type, res); |
a0315874 | 849 | break; |
850 | ||
fa0793ad | 851 | CASE_CFN_SIN: |
75a70cf9 | 852 | rhs = fold_build1 (IMAGPART_EXPR, type, res); |
a0315874 | 853 | break; |
854 | ||
fa0793ad | 855 | CASE_CFN_CEXPI: |
75a70cf9 | 856 | rhs = res; |
a0315874 | 857 | break; |
858 | ||
859 | default:; | |
860 | gcc_unreachable (); | |
861 | } | |
862 | ||
75a70cf9 | 863 | /* Replace call with a copy. */ |
864 | stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs); | |
865 | ||
866 | gsi = gsi_for_stmt (use_stmt); | |
4c80086d | 867 | gsi_replace (&gsi, stmt, true); |
868 | if (gimple_purge_dead_eh_edges (gimple_bb (stmt))) | |
869 | cfg_changed = true; | |
a0315874 | 870 | } |
871 | ||
4c80086d | 872 | return cfg_changed; |
a0315874 | 873 | } |
874 | ||
e9a6c4bc | 875 | /* To evaluate powi(x,n), the floating point value x raised to the |
876 | constant integer exponent n, we use a hybrid algorithm that | |
877 | combines the "window method" with look-up tables. For an | |
878 | introduction to exponentiation algorithms and "addition chains", | |
879 | see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth, | |
880 | "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming", | |
881 | 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation | |
882 | Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */ | |
883 | ||
884 | /* Provide a default value for POWI_MAX_MULTS, the maximum number of | |
885 | multiplications to inline before calling the system library's pow | |
886 | function. powi(x,n) requires at worst 2*bits(n)-2 multiplications, | |
887 | so this default never requires calling pow, powf or powl. */ | |
888 | ||
889 | #ifndef POWI_MAX_MULTS | |
890 | #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2) | |
891 | #endif | |
892 | ||
893 | /* The size of the "optimal power tree" lookup table. All | |
894 | exponents less than this value are simply looked up in the | |
895 | powi_table below. This threshold is also used to size the | |
896 | cache of pseudo registers that hold intermediate results. */ | |
897 | #define POWI_TABLE_SIZE 256 | |
898 | ||
899 | /* The size, in bits of the window, used in the "window method" | |
900 | exponentiation algorithm. This is equivalent to a radix of | |
901 | (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */ | |
902 | #define POWI_WINDOW_SIZE 3 | |
903 | ||
904 | /* The following table is an efficient representation of an | |
905 | "optimal power tree". For each value, i, the corresponding | |
906 | value, j, in the table states than an optimal evaluation | |
907 | sequence for calculating pow(x,i) can be found by evaluating | |
908 | pow(x,j)*pow(x,i-j). An optimal power tree for the first | |
909 | 100 integers is given in Knuth's "Seminumerical algorithms". */ | |
910 | ||
911 | static const unsigned char powi_table[POWI_TABLE_SIZE] = | |
912 | { | |
913 | 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */ | |
914 | 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */ | |
915 | 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */ | |
916 | 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */ | |
917 | 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */ | |
918 | 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */ | |
919 | 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */ | |
920 | 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */ | |
921 | 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */ | |
922 | 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */ | |
923 | 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */ | |
924 | 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */ | |
925 | 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */ | |
926 | 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */ | |
927 | 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */ | |
928 | 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */ | |
929 | 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */ | |
930 | 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */ | |
931 | 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */ | |
932 | 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */ | |
933 | 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */ | |
934 | 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */ | |
935 | 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */ | |
936 | 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */ | |
937 | 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */ | |
938 | 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */ | |
939 | 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */ | |
940 | 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */ | |
941 | 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */ | |
942 | 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */ | |
943 | 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */ | |
944 | 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */ | |
945 | }; | |
946 | ||
947 | ||
948 | /* Return the number of multiplications required to calculate | |
949 | powi(x,n) where n is less than POWI_TABLE_SIZE. This is a | |
950 | subroutine of powi_cost. CACHE is an array indicating | |
951 | which exponents have already been calculated. */ | |
952 | ||
953 | static int | |
954 | powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache) | |
955 | { | |
956 | /* If we've already calculated this exponent, then this evaluation | |
957 | doesn't require any additional multiplications. */ | |
958 | if (cache[n]) | |
959 | return 0; | |
960 | ||
961 | cache[n] = true; | |
962 | return powi_lookup_cost (n - powi_table[n], cache) | |
963 | + powi_lookup_cost (powi_table[n], cache) + 1; | |
964 | } | |
965 | ||
966 | /* Return the number of multiplications required to calculate | |
967 | powi(x,n) for an arbitrary x, given the exponent N. This | |
968 | function needs to be kept in sync with powi_as_mults below. */ | |
969 | ||
970 | static int | |
971 | powi_cost (HOST_WIDE_INT n) | |
972 | { | |
973 | bool cache[POWI_TABLE_SIZE]; | |
974 | unsigned HOST_WIDE_INT digit; | |
975 | unsigned HOST_WIDE_INT val; | |
976 | int result; | |
977 | ||
978 | if (n == 0) | |
979 | return 0; | |
980 | ||
981 | /* Ignore the reciprocal when calculating the cost. */ | |
982 | val = (n < 0) ? -n : n; | |
983 | ||
984 | /* Initialize the exponent cache. */ | |
985 | memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool)); | |
986 | cache[1] = true; | |
987 | ||
988 | result = 0; | |
989 | ||
990 | while (val >= POWI_TABLE_SIZE) | |
991 | { | |
992 | if (val & 1) | |
993 | { | |
994 | digit = val & ((1 << POWI_WINDOW_SIZE) - 1); | |
995 | result += powi_lookup_cost (digit, cache) | |
996 | + POWI_WINDOW_SIZE + 1; | |
997 | val >>= POWI_WINDOW_SIZE; | |
998 | } | |
999 | else | |
1000 | { | |
1001 | val >>= 1; | |
1002 | result++; | |
1003 | } | |
1004 | } | |
1005 | ||
1006 | return result + powi_lookup_cost (val, cache); | |
1007 | } | |
1008 | ||
1009 | /* Recursive subroutine of powi_as_mults. This function takes the | |
1010 | array, CACHE, of already calculated exponents and an exponent N and | |
1011 | returns a tree that corresponds to CACHE[1]**N, with type TYPE. */ | |
1012 | ||
1013 | static tree | |
1014 | powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type, | |
03d37e4e | 1015 | HOST_WIDE_INT n, tree *cache) |
e9a6c4bc | 1016 | { |
1017 | tree op0, op1, ssa_target; | |
1018 | unsigned HOST_WIDE_INT digit; | |
1a91d914 | 1019 | gassign *mult_stmt; |
e9a6c4bc | 1020 | |
1021 | if (n < POWI_TABLE_SIZE && cache[n]) | |
1022 | return cache[n]; | |
1023 | ||
03d37e4e | 1024 | ssa_target = make_temp_ssa_name (type, NULL, "powmult"); |
e9a6c4bc | 1025 | |
1026 | if (n < POWI_TABLE_SIZE) | |
1027 | { | |
1028 | cache[n] = ssa_target; | |
03d37e4e | 1029 | op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache); |
1030 | op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache); | |
e9a6c4bc | 1031 | } |
1032 | else if (n & 1) | |
1033 | { | |
1034 | digit = n & ((1 << POWI_WINDOW_SIZE) - 1); | |
03d37e4e | 1035 | op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache); |
1036 | op1 = powi_as_mults_1 (gsi, loc, type, digit, cache); | |
e9a6c4bc | 1037 | } |
1038 | else | |
1039 | { | |
03d37e4e | 1040 | op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache); |
e9a6c4bc | 1041 | op1 = op0; |
1042 | } | |
1043 | ||
e9cf809e | 1044 | mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1); |
ae43b05e | 1045 | gimple_set_location (mult_stmt, loc); |
e9a6c4bc | 1046 | gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT); |
1047 | ||
1048 | return ssa_target; | |
1049 | } | |
1050 | ||
1051 | /* Convert ARG0**N to a tree of multiplications of ARG0 with itself. | |
1052 | This function needs to be kept in sync with powi_cost above. */ | |
1053 | ||
1054 | static tree | |
1055 | powi_as_mults (gimple_stmt_iterator *gsi, location_t loc, | |
1056 | tree arg0, HOST_WIDE_INT n) | |
1057 | { | |
03d37e4e | 1058 | tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0); |
1a91d914 | 1059 | gassign *div_stmt; |
03d37e4e | 1060 | tree target; |
e9a6c4bc | 1061 | |
1062 | if (n == 0) | |
1063 | return build_real (type, dconst1); | |
1064 | ||
1065 | memset (cache, 0, sizeof (cache)); | |
1066 | cache[1] = arg0; | |
1067 | ||
03d37e4e | 1068 | result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache); |
e9a6c4bc | 1069 | if (n >= 0) |
1070 | return result; | |
1071 | ||
1072 | /* If the original exponent was negative, reciprocate the result. */ | |
03d37e4e | 1073 | target = make_temp_ssa_name (type, NULL, "powmult"); |
e9cf809e | 1074 | div_stmt = gimple_build_assign (target, RDIV_EXPR, |
1075 | build_real (type, dconst1), result); | |
ae43b05e | 1076 | gimple_set_location (div_stmt, loc); |
e9a6c4bc | 1077 | gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT); |
1078 | ||
1079 | return target; | |
1080 | } | |
1081 | ||
1082 | /* ARG0 and N are the two arguments to a powi builtin in GSI with | |
1083 | location info LOC. If the arguments are appropriate, create an | |
1084 | equivalent sequence of statements prior to GSI using an optimal | |
1085 | number of multiplications, and return an expession holding the | |
1086 | result. */ | |
1087 | ||
1088 | static tree | |
1089 | gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc, | |
1090 | tree arg0, HOST_WIDE_INT n) | |
1091 | { | |
1092 | /* Avoid largest negative number. */ | |
1093 | if (n != -n | |
1094 | && ((n >= -1 && n <= 2) | |
1095 | || (optimize_function_for_speed_p (cfun) | |
1096 | && powi_cost (n) <= POWI_MAX_MULTS))) | |
1097 | return powi_as_mults (gsi, loc, arg0, n); | |
1098 | ||
1099 | return NULL_TREE; | |
1100 | } | |
1101 | ||
ae43b05e | 1102 | /* Build a gimple call statement that calls FN with argument ARG. |
03d37e4e | 1103 | Set the lhs of the call statement to a fresh SSA name. Insert the |
ae43b05e | 1104 | statement prior to GSI's current position, and return the fresh |
1105 | SSA name. */ | |
1106 | ||
1107 | static tree | |
ca12eb68 | 1108 | build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc, |
03d37e4e | 1109 | tree fn, tree arg) |
ae43b05e | 1110 | { |
1a91d914 | 1111 | gcall *call_stmt; |
ae43b05e | 1112 | tree ssa_target; |
1113 | ||
ae43b05e | 1114 | call_stmt = gimple_build_call (fn, 1, arg); |
03d37e4e | 1115 | ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot"); |
ae43b05e | 1116 | gimple_set_lhs (call_stmt, ssa_target); |
1117 | gimple_set_location (call_stmt, loc); | |
1118 | gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT); | |
1119 | ||
1120 | return ssa_target; | |
1121 | } | |
1122 | ||
ca12eb68 | 1123 | /* Build a gimple binary operation with the given CODE and arguments |
1124 | ARG0, ARG1, assigning the result to a new SSA name for variable | |
1125 | TARGET. Insert the statement prior to GSI's current position, and | |
1126 | return the fresh SSA name.*/ | |
1127 | ||
1128 | static tree | |
1129 | build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc, | |
03d37e4e | 1130 | const char *name, enum tree_code code, |
1131 | tree arg0, tree arg1) | |
ca12eb68 | 1132 | { |
03d37e4e | 1133 | tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name); |
e9cf809e | 1134 | gassign *stmt = gimple_build_assign (result, code, arg0, arg1); |
ca12eb68 | 1135 | gimple_set_location (stmt, loc); |
1136 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); | |
1137 | return result; | |
1138 | } | |
1139 | ||
a5c384c1 | 1140 | /* Build a gimple reference operation with the given CODE and argument |
03d37e4e | 1141 | ARG, assigning the result to a new SSA name of TYPE with NAME. |
a5c384c1 | 1142 | Insert the statement prior to GSI's current position, and return |
1143 | the fresh SSA name. */ | |
1144 | ||
1145 | static inline tree | |
1146 | build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type, | |
03d37e4e | 1147 | const char *name, enum tree_code code, tree arg0) |
a5c384c1 | 1148 | { |
03d37e4e | 1149 | tree result = make_temp_ssa_name (type, NULL, name); |
42acab1c | 1150 | gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0)); |
a5c384c1 | 1151 | gimple_set_location (stmt, loc); |
1152 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); | |
1153 | return result; | |
1154 | } | |
1155 | ||
03d37e4e | 1156 | /* Build a gimple assignment to cast VAL to TYPE. Insert the statement |
aff5fb4d | 1157 | prior to GSI's current position, and return the fresh SSA name. */ |
1158 | ||
1159 | static tree | |
1160 | build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc, | |
03d37e4e | 1161 | tree type, tree val) |
aff5fb4d | 1162 | { |
f9e245b2 | 1163 | tree result = make_ssa_name (type); |
e9cf809e | 1164 | gassign *stmt = gimple_build_assign (result, NOP_EXPR, val); |
03d37e4e | 1165 | gimple_set_location (stmt, loc); |
1166 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); | |
1167 | return result; | |
aff5fb4d | 1168 | } |
1169 | ||
c3206272 | 1170 | struct pow_synth_sqrt_info |
1171 | { | |
1172 | bool *factors; | |
1173 | unsigned int deepest; | |
1174 | unsigned int num_mults; | |
1175 | }; | |
1176 | ||
1177 | /* Return true iff the real value C can be represented as a | |
1178 | sum of powers of 0.5 up to N. That is: | |
1179 | C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1. | |
1180 | Record in INFO the various parameters of the synthesis algorithm such | |
1181 | as the factors a[i], the maximum 0.5 power and the number of | |
1182 | multiplications that will be required. */ | |
1183 | ||
1184 | bool | |
1185 | representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n, | |
1186 | struct pow_synth_sqrt_info *info) | |
1187 | { | |
1188 | REAL_VALUE_TYPE factor = dconsthalf; | |
1189 | REAL_VALUE_TYPE remainder = c; | |
1190 | ||
1191 | info->deepest = 0; | |
1192 | info->num_mults = 0; | |
1193 | memset (info->factors, 0, n * sizeof (bool)); | |
1194 | ||
1195 | for (unsigned i = 0; i < n; i++) | |
1196 | { | |
1197 | REAL_VALUE_TYPE res; | |
1198 | ||
1199 | /* If something inexact happened bail out now. */ | |
f2ad9e38 | 1200 | if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor)) |
c3206272 | 1201 | return false; |
1202 | ||
1203 | /* We have hit zero. The number is representable as a sum | |
1204 | of powers of 0.5. */ | |
20cb53c9 | 1205 | if (real_equal (&res, &dconst0)) |
c3206272 | 1206 | { |
1207 | info->factors[i] = true; | |
1208 | info->deepest = i + 1; | |
1209 | return true; | |
1210 | } | |
1211 | else if (!REAL_VALUE_NEGATIVE (res)) | |
1212 | { | |
1213 | remainder = res; | |
1214 | info->factors[i] = true; | |
1215 | info->num_mults++; | |
1216 | } | |
1217 | else | |
1218 | info->factors[i] = false; | |
1219 | ||
f2ad9e38 | 1220 | real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf); |
c3206272 | 1221 | } |
1222 | return false; | |
1223 | } | |
1224 | ||
1225 | /* Return the tree corresponding to FN being applied | |
1226 | to ARG N times at GSI and LOC. | |
1227 | Look up previous results from CACHE if need be. | |
1228 | cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */ | |
1229 | ||
1230 | static tree | |
1231 | get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi, | |
1232 | tree fn, location_t loc, tree *cache) | |
1233 | { | |
1234 | tree res = cache[n]; | |
1235 | if (!res) | |
1236 | { | |
1237 | tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache); | |
1238 | res = build_and_insert_call (gsi, loc, fn, prev); | |
1239 | cache[n] = res; | |
1240 | } | |
1241 | ||
1242 | return res; | |
1243 | } | |
1244 | ||
1245 | /* Print to STREAM the repeated application of function FNAME to ARG | |
1246 | N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print: | |
1247 | "foo (foo (x))". */ | |
1248 | ||
1249 | static void | |
1250 | print_nested_fn (FILE* stream, const char *fname, const char* arg, | |
1251 | unsigned int n) | |
1252 | { | |
1253 | if (n == 0) | |
1254 | fprintf (stream, "%s", arg); | |
1255 | else | |
1256 | { | |
1257 | fprintf (stream, "%s (", fname); | |
1258 | print_nested_fn (stream, fname, arg, n - 1); | |
1259 | fprintf (stream, ")"); | |
1260 | } | |
1261 | } | |
1262 | ||
1263 | /* Print to STREAM the fractional sequence of sqrt chains | |
1264 | applied to ARG, described by INFO. Used for the dump file. */ | |
1265 | ||
1266 | static void | |
1267 | dump_fractional_sqrt_sequence (FILE *stream, const char *arg, | |
1268 | struct pow_synth_sqrt_info *info) | |
1269 | { | |
1270 | for (unsigned int i = 0; i < info->deepest; i++) | |
1271 | { | |
1272 | bool is_set = info->factors[i]; | |
1273 | if (is_set) | |
1274 | { | |
1275 | print_nested_fn (stream, "sqrt", arg, i + 1); | |
1276 | if (i != info->deepest - 1) | |
1277 | fprintf (stream, " * "); | |
1278 | } | |
1279 | } | |
1280 | } | |
1281 | ||
1282 | /* Print to STREAM a representation of raising ARG to an integer | |
1283 | power N. Used for the dump file. */ | |
1284 | ||
1285 | static void | |
1286 | dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n) | |
1287 | { | |
1288 | if (n > 1) | |
1289 | fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n); | |
1290 | else if (n == 1) | |
1291 | fprintf (stream, "%s", arg); | |
1292 | } | |
1293 | ||
1294 | /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of | |
1295 | square roots. Place at GSI and LOC. Limit the maximum depth | |
1296 | of the sqrt chains to MAX_DEPTH. Return the tree holding the | |
1297 | result of the expanded sequence or NULL_TREE if the expansion failed. | |
1298 | ||
1299 | This routine assumes that ARG1 is a real number with a fractional part | |
1300 | (the integer exponent case will have been handled earlier in | |
1301 | gimple_expand_builtin_pow). | |
1302 | ||
1303 | For ARG1 > 0.0: | |
1304 | * For ARG1 composed of a whole part WHOLE_PART and a fractional part | |
1305 | FRAC_PART i.e. WHOLE_PART == floor (ARG1) and | |
1306 | FRAC_PART == ARG1 - WHOLE_PART: | |
1307 | Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where | |
1308 | POW (ARG0, FRAC_PART) is expanded as a product of square root chains | |
1309 | if it can be expressed as such, that is if FRAC_PART satisfies: | |
1310 | FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i)) | |
1311 | where integer a[i] is either 0 or 1. | |
1312 | ||
1313 | Example: | |
1314 | POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625) | |
1315 | --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x))) | |
1316 | ||
1317 | For ARG1 < 0.0 there are two approaches: | |
1318 | * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1) | |
1319 | is calculated as above. | |
1320 | ||
1321 | Example: | |
1322 | POW (x, -5.625) == 1.0 / POW (x, 5.625) | |
1323 | --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x)))) | |
1324 | ||
1325 | * (B) : WHOLE_PART := - ceil (abs (ARG1)) | |
1326 | FRAC_PART := ARG1 - WHOLE_PART | |
1327 | and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART). | |
1328 | Example: | |
1329 | POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6) | |
1330 | --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6)) | |
1331 | ||
1332 | For ARG1 < 0.0 we choose between (A) and (B) depending on | |
1333 | how many multiplications we'd have to do. | |
1334 | So, for the example in (B): POW (x, -5.875), if we were to | |
1335 | follow algorithm (A) we would produce: | |
1336 | 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X))) | |
1337 | which contains more multiplications than approach (B). | |
1338 | ||
1339 | Hopefully, this approach will eliminate potentially expensive POW library | |
1340 | calls when unsafe floating point math is enabled and allow the compiler to | |
1341 | further optimise the multiplies, square roots and divides produced by this | |
1342 | function. */ | |
1343 | ||
1344 | static tree | |
1345 | expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc, | |
1346 | tree arg0, tree arg1, HOST_WIDE_INT max_depth) | |
1347 | { | |
1348 | tree type = TREE_TYPE (arg0); | |
1349 | machine_mode mode = TYPE_MODE (type); | |
1350 | tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); | |
1351 | bool one_over = true; | |
1352 | ||
1353 | if (!sqrtfn) | |
1354 | return NULL_TREE; | |
1355 | ||
1356 | if (TREE_CODE (arg1) != REAL_CST) | |
1357 | return NULL_TREE; | |
1358 | ||
1359 | REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1); | |
1360 | ||
1361 | gcc_assert (max_depth > 0); | |
1362 | tree *cache = XALLOCAVEC (tree, max_depth + 1); | |
1363 | ||
1364 | struct pow_synth_sqrt_info synth_info; | |
1365 | synth_info.factors = XALLOCAVEC (bool, max_depth + 1); | |
1366 | synth_info.deepest = 0; | |
1367 | synth_info.num_mults = 0; | |
1368 | ||
1369 | bool neg_exp = REAL_VALUE_NEGATIVE (exp_init); | |
1370 | REAL_VALUE_TYPE exp = real_value_abs (&exp_init); | |
1371 | ||
1372 | /* The whole and fractional parts of exp. */ | |
1373 | REAL_VALUE_TYPE whole_part; | |
1374 | REAL_VALUE_TYPE frac_part; | |
1375 | ||
1376 | real_floor (&whole_part, mode, &exp); | |
f2ad9e38 | 1377 | real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part); |
c3206272 | 1378 | |
1379 | ||
1380 | REAL_VALUE_TYPE ceil_whole = dconst0; | |
1381 | REAL_VALUE_TYPE ceil_fract = dconst0; | |
1382 | ||
1383 | if (neg_exp) | |
1384 | { | |
1385 | real_ceil (&ceil_whole, mode, &exp); | |
f2ad9e38 | 1386 | real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp); |
c3206272 | 1387 | } |
1388 | ||
1389 | if (!representable_as_half_series_p (frac_part, max_depth, &synth_info)) | |
1390 | return NULL_TREE; | |
1391 | ||
1392 | /* Check whether it's more profitable to not use 1.0 / ... */ | |
1393 | if (neg_exp) | |
1394 | { | |
1395 | struct pow_synth_sqrt_info alt_synth_info; | |
1396 | alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1); | |
1397 | alt_synth_info.deepest = 0; | |
1398 | alt_synth_info.num_mults = 0; | |
1399 | ||
1400 | if (representable_as_half_series_p (ceil_fract, max_depth, | |
1401 | &alt_synth_info) | |
1402 | && alt_synth_info.deepest <= synth_info.deepest | |
1403 | && alt_synth_info.num_mults < synth_info.num_mults) | |
1404 | { | |
1405 | whole_part = ceil_whole; | |
1406 | frac_part = ceil_fract; | |
1407 | synth_info.deepest = alt_synth_info.deepest; | |
1408 | synth_info.num_mults = alt_synth_info.num_mults; | |
1409 | memcpy (synth_info.factors, alt_synth_info.factors, | |
1410 | (max_depth + 1) * sizeof (bool)); | |
1411 | one_over = false; | |
1412 | } | |
1413 | } | |
1414 | ||
1415 | HOST_WIDE_INT n = real_to_integer (&whole_part); | |
1416 | REAL_VALUE_TYPE cint; | |
1417 | real_from_integer (&cint, VOIDmode, n, SIGNED); | |
1418 | ||
1419 | if (!real_identical (&whole_part, &cint)) | |
1420 | return NULL_TREE; | |
1421 | ||
1422 | if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS) | |
1423 | return NULL_TREE; | |
1424 | ||
1425 | memset (cache, 0, (max_depth + 1) * sizeof (tree)); | |
1426 | ||
1427 | tree integer_res = n == 0 ? build_real (type, dconst1) : arg0; | |
1428 | ||
1429 | /* Calculate the integer part of the exponent. */ | |
1430 | if (n > 1) | |
1431 | { | |
1432 | integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n); | |
1433 | if (!integer_res) | |
1434 | return NULL_TREE; | |
1435 | } | |
1436 | ||
1437 | if (dump_file) | |
1438 | { | |
1439 | char string[64]; | |
1440 | ||
1441 | real_to_decimal (string, &exp_init, sizeof (string), 0, 1); | |
1442 | fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string); | |
1443 | ||
1444 | if (neg_exp) | |
1445 | { | |
1446 | if (one_over) | |
1447 | { | |
1448 | fprintf (dump_file, "1.0 / ("); | |
1449 | dump_integer_part (dump_file, "x", n); | |
1450 | if (n > 0) | |
1451 | fprintf (dump_file, " * "); | |
1452 | dump_fractional_sqrt_sequence (dump_file, "x", &synth_info); | |
1453 | fprintf (dump_file, ")"); | |
1454 | } | |
1455 | else | |
1456 | { | |
1457 | dump_fractional_sqrt_sequence (dump_file, "x", &synth_info); | |
1458 | fprintf (dump_file, " / ("); | |
1459 | dump_integer_part (dump_file, "x", n); | |
1460 | fprintf (dump_file, ")"); | |
1461 | } | |
1462 | } | |
1463 | else | |
1464 | { | |
1465 | dump_fractional_sqrt_sequence (dump_file, "x", &synth_info); | |
1466 | if (n > 0) | |
1467 | fprintf (dump_file, " * "); | |
1468 | dump_integer_part (dump_file, "x", n); | |
1469 | } | |
1470 | ||
1471 | fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest); | |
1472 | } | |
1473 | ||
1474 | ||
1475 | tree fract_res = NULL_TREE; | |
1476 | cache[0] = arg0; | |
1477 | ||
1478 | /* Calculate the fractional part of the exponent. */ | |
1479 | for (unsigned i = 0; i < synth_info.deepest; i++) | |
1480 | { | |
1481 | if (synth_info.factors[i]) | |
1482 | { | |
1483 | tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache); | |
1484 | ||
1485 | if (!fract_res) | |
1486 | fract_res = sqrt_chain; | |
1487 | ||
1488 | else | |
1489 | fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, | |
1490 | fract_res, sqrt_chain); | |
1491 | } | |
1492 | } | |
1493 | ||
1494 | tree res = NULL_TREE; | |
1495 | ||
1496 | if (neg_exp) | |
1497 | { | |
1498 | if (one_over) | |
1499 | { | |
1500 | if (n > 0) | |
1501 | res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, | |
1502 | fract_res, integer_res); | |
1503 | else | |
1504 | res = fract_res; | |
1505 | ||
1506 | res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR, | |
1507 | build_real (type, dconst1), res); | |
1508 | } | |
1509 | else | |
1510 | { | |
1511 | res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR, | |
1512 | fract_res, integer_res); | |
1513 | } | |
1514 | } | |
1515 | else | |
1516 | res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, | |
1517 | fract_res, integer_res); | |
1518 | return res; | |
1519 | } | |
1520 | ||
e78306af | 1521 | /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI |
1522 | with location info LOC. If possible, create an equivalent and | |
1523 | less expensive sequence of statements prior to GSI, and return an | |
1524 | expession holding the result. */ | |
1525 | ||
1526 | static tree | |
1527 | gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc, | |
1528 | tree arg0, tree arg1) | |
1529 | { | |
c3206272 | 1530 | REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6; |
ca12eb68 | 1531 | REAL_VALUE_TYPE c2, dconst3; |
e78306af | 1532 | HOST_WIDE_INT n; |
c3206272 | 1533 | tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x; |
3754d046 | 1534 | machine_mode mode; |
c3206272 | 1535 | bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi)); |
0190fe95 | 1536 | bool hw_sqrt_exists, c_is_int, c2_is_int; |
e78306af | 1537 | |
c3206272 | 1538 | dconst1_4 = dconst1; |
1539 | SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2); | |
1540 | ||
e78306af | 1541 | /* If the exponent isn't a constant, there's nothing of interest |
1542 | to be done. */ | |
1543 | if (TREE_CODE (arg1) != REAL_CST) | |
1544 | return NULL_TREE; | |
1545 | ||
9f27d92a | 1546 | /* Don't perform the operation if flag_signaling_nans is on |
1547 | and the operand is a signaling NaN. */ | |
1548 | if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1))) | |
2a659064 | 1549 | && ((TREE_CODE (arg0) == REAL_CST |
1550 | && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0))) | |
9f27d92a | 1551 | || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1)))) |
1552 | return NULL_TREE; | |
1553 | ||
ae43b05e | 1554 | /* If the exponent is equivalent to an integer, expand to an optimal |
1555 | multiplication sequence when profitable. */ | |
e78306af | 1556 | c = TREE_REAL_CST (arg1); |
1557 | n = real_to_integer (&c); | |
e913b5cd | 1558 | real_from_integer (&cint, VOIDmode, n, SIGNED); |
0190fe95 | 1559 | c_is_int = real_identical (&c, &cint); |
e78306af | 1560 | |
0190fe95 | 1561 | if (c_is_int |
e78306af | 1562 | && ((n >= -1 && n <= 2) |
1563 | || (flag_unsafe_math_optimizations | |
c3206272 | 1564 | && speed_p |
e78306af | 1565 | && powi_cost (n) <= POWI_MAX_MULTS))) |
1566 | return gimple_expand_builtin_powi (gsi, loc, arg0, n); | |
1567 | ||
ae43b05e | 1568 | /* Attempt various optimizations using sqrt and cbrt. */ |
1569 | type = TREE_TYPE (arg0); | |
1570 | mode = TYPE_MODE (type); | |
1571 | sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); | |
1572 | ||
1573 | /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe | |
1574 | unless signed zeros must be maintained. pow(-0,0.5) = +0, while | |
1575 | sqrt(-0) = -0. */ | |
1576 | if (sqrtfn | |
20cb53c9 | 1577 | && real_equal (&c, &dconsthalf) |
ae43b05e | 1578 | && !HONOR_SIGNED_ZEROS (mode)) |
03d37e4e | 1579 | return build_and_insert_call (gsi, loc, sqrtfn, arg0); |
ae43b05e | 1580 | |
a5c384c1 | 1581 | hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing; |
ae43b05e | 1582 | |
ae43b05e | 1583 | /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math |
1584 | optimizations since 1./3. is not exactly representable. If x | |
1585 | is negative and finite, the correct value of pow(x,1./3.) is | |
1586 | a NaN with the "invalid" exception raised, because the value | |
1587 | of 1./3. actually has an even denominator. The correct value | |
1588 | of cbrt(x) is a negative real value. */ | |
1589 | cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT); | |
1590 | dconst1_3 = real_value_truncate (mode, dconst_third ()); | |
1591 | ||
1592 | if (flag_unsafe_math_optimizations | |
1593 | && cbrtfn | |
ee230333 | 1594 | && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) |
20cb53c9 | 1595 | && real_equal (&c, &dconst1_3)) |
03d37e4e | 1596 | return build_and_insert_call (gsi, loc, cbrtfn, arg0); |
ae43b05e | 1597 | |
1598 | /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization | |
1599 | if we don't have a hardware sqrt insn. */ | |
1600 | dconst1_6 = dconst1_3; | |
1601 | SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1); | |
1602 | ||
1603 | if (flag_unsafe_math_optimizations | |
1604 | && sqrtfn | |
1605 | && cbrtfn | |
ee230333 | 1606 | && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) |
c3206272 | 1607 | && speed_p |
ae43b05e | 1608 | && hw_sqrt_exists |
20cb53c9 | 1609 | && real_equal (&c, &dconst1_6)) |
ae43b05e | 1610 | { |
1611 | /* sqrt(x) */ | |
03d37e4e | 1612 | sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0); |
ae43b05e | 1613 | |
1614 | /* cbrt(sqrt(x)) */ | |
03d37e4e | 1615 | return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0); |
ca12eb68 | 1616 | } |
1617 | ||
ca12eb68 | 1618 | |
c3206272 | 1619 | /* Attempt to expand the POW as a product of square root chains. |
1620 | Expand the 0.25 case even when otpimising for size. */ | |
ca12eb68 | 1621 | if (flag_unsafe_math_optimizations |
1622 | && sqrtfn | |
c3206272 | 1623 | && hw_sqrt_exists |
20cb53c9 | 1624 | && (speed_p || real_equal (&c, &dconst1_4)) |
c3206272 | 1625 | && !HONOR_SIGNED_ZEROS (mode)) |
ca12eb68 | 1626 | { |
c3206272 | 1627 | unsigned int max_depth = speed_p |
1628 | ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH) | |
1629 | : 2; | |
ca12eb68 | 1630 | |
c3206272 | 1631 | tree expand_with_sqrts |
1632 | = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth); | |
ca12eb68 | 1633 | |
c3206272 | 1634 | if (expand_with_sqrts) |
1635 | return expand_with_sqrts; | |
ca12eb68 | 1636 | } |
1637 | ||
c3206272 | 1638 | real_arithmetic (&c2, MULT_EXPR, &c, &dconst2); |
1639 | n = real_to_integer (&c2); | |
1640 | real_from_integer (&cint, VOIDmode, n, SIGNED); | |
1641 | c2_is_int = real_identical (&c2, &cint); | |
1642 | ||
ca12eb68 | 1643 | /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into |
1644 | ||
1645 | powi(x, n/3) * powi(cbrt(x), n%3), n > 0; | |
1646 | 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0. | |
1647 | ||
1648 | Do not calculate the first factor when n/3 = 0. As cbrt(x) is | |
1649 | different from pow(x, 1./3.) due to rounding and behavior with | |
1650 | negative x, we need to constrain this transformation to unsafe | |
1651 | math and positive x or finite math. */ | |
e913b5cd | 1652 | real_from_integer (&dconst3, VOIDmode, 3, SIGNED); |
ca12eb68 | 1653 | real_arithmetic (&c2, MULT_EXPR, &c, &dconst3); |
1654 | real_round (&c2, mode, &c2); | |
1655 | n = real_to_integer (&c2); | |
e913b5cd | 1656 | real_from_integer (&cint, VOIDmode, n, SIGNED); |
ca12eb68 | 1657 | real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3); |
1658 | real_convert (&c2, mode, &c2); | |
1659 | ||
1660 | if (flag_unsafe_math_optimizations | |
1661 | && cbrtfn | |
ee230333 | 1662 | && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) |
ca12eb68 | 1663 | && real_identical (&c2, &c) |
0190fe95 | 1664 | && !c2_is_int |
ca12eb68 | 1665 | && optimize_function_for_speed_p (cfun) |
1666 | && powi_cost (n / 3) <= POWI_MAX_MULTS) | |
1667 | { | |
1668 | tree powi_x_ndiv3 = NULL_TREE; | |
1669 | ||
1670 | /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not | |
1671 | possible or profitable, give up. Skip the degenerate case when | |
1672 | abs(n) < 3, where the result is always 1. */ | |
b1757d46 | 1673 | if (absu_hwi (n) >= 3) |
ca12eb68 | 1674 | { |
1675 | powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0, | |
5ebd604f | 1676 | abs_hwi (n / 3)); |
ca12eb68 | 1677 | if (!powi_x_ndiv3) |
1678 | return NULL_TREE; | |
1679 | } | |
1680 | ||
1681 | /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi | |
1682 | as that creates an unnecessary variable. Instead, just produce | |
1683 | either cbrt(x) or cbrt(x) * cbrt(x). */ | |
03d37e4e | 1684 | cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0); |
ca12eb68 | 1685 | |
b1757d46 | 1686 | if (absu_hwi (n) % 3 == 1) |
ca12eb68 | 1687 | powi_cbrt_x = cbrt_x; |
1688 | else | |
03d37e4e | 1689 | powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, |
ca12eb68 | 1690 | cbrt_x, cbrt_x); |
1691 | ||
1692 | /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */ | |
b1757d46 | 1693 | if (absu_hwi (n) < 3) |
ca12eb68 | 1694 | result = powi_cbrt_x; |
1695 | else | |
03d37e4e | 1696 | result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, |
ca12eb68 | 1697 | powi_x_ndiv3, powi_cbrt_x); |
1698 | ||
1699 | /* If n is negative, reciprocate the result. */ | |
1700 | if (n < 0) | |
03d37e4e | 1701 | result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR, |
ca12eb68 | 1702 | build_real (type, dconst1), result); |
1703 | ||
1704 | return result; | |
ae43b05e | 1705 | } |
1706 | ||
ca12eb68 | 1707 | /* No optimizations succeeded. */ |
e78306af | 1708 | return NULL_TREE; |
1709 | } | |
1710 | ||
a5c384c1 | 1711 | /* ARG is the argument to a cabs builtin call in GSI with location info |
1712 | LOC. Create a sequence of statements prior to GSI that calculates | |
1713 | sqrt(R*R + I*I), where R and I are the real and imaginary components | |
1714 | of ARG, respectively. Return an expression holding the result. */ | |
1715 | ||
1716 | static tree | |
1717 | gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg) | |
1718 | { | |
03d37e4e | 1719 | tree real_part, imag_part, addend1, addend2, sum, result; |
a5c384c1 | 1720 | tree type = TREE_TYPE (TREE_TYPE (arg)); |
1721 | tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); | |
3754d046 | 1722 | machine_mode mode = TYPE_MODE (type); |
a5c384c1 | 1723 | |
1724 | if (!flag_unsafe_math_optimizations | |
1725 | || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi))) | |
1726 | || !sqrtfn | |
1727 | || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing) | |
1728 | return NULL_TREE; | |
1729 | ||
03d37e4e | 1730 | real_part = build_and_insert_ref (gsi, loc, type, "cabs", |
a5c384c1 | 1731 | REALPART_EXPR, arg); |
03d37e4e | 1732 | addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR, |
a5c384c1 | 1733 | real_part, real_part); |
03d37e4e | 1734 | imag_part = build_and_insert_ref (gsi, loc, type, "cabs", |
a5c384c1 | 1735 | IMAGPART_EXPR, arg); |
03d37e4e | 1736 | addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR, |
a5c384c1 | 1737 | imag_part, imag_part); |
03d37e4e | 1738 | sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2); |
1739 | result = build_and_insert_call (gsi, loc, sqrtfn, sum); | |
a5c384c1 | 1740 | |
1741 | return result; | |
1742 | } | |
1743 | ||
a0315874 | 1744 | /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1 |
e9a6c4bc | 1745 | on the SSA_NAME argument of each of them. Also expand powi(x,n) into |
1746 | an optimal number of multiplies, when n is a constant. */ | |
a0315874 | 1747 | |
65b0537f | 1748 | namespace { |
1749 | ||
1750 | const pass_data pass_data_cse_sincos = | |
1751 | { | |
1752 | GIMPLE_PASS, /* type */ | |
1753 | "sincos", /* name */ | |
1754 | OPTGROUP_NONE, /* optinfo_flags */ | |
65b0537f | 1755 | TV_NONE, /* tv_id */ |
1756 | PROP_ssa, /* properties_required */ | |
a153e7b3 | 1757 | PROP_gimple_opt_math, /* properties_provided */ |
65b0537f | 1758 | 0, /* properties_destroyed */ |
1759 | 0, /* todo_flags_start */ | |
8b88439e | 1760 | TODO_update_ssa, /* todo_flags_finish */ |
65b0537f | 1761 | }; |
1762 | ||
1763 | class pass_cse_sincos : public gimple_opt_pass | |
1764 | { | |
1765 | public: | |
1766 | pass_cse_sincos (gcc::context *ctxt) | |
1767 | : gimple_opt_pass (pass_data_cse_sincos, ctxt) | |
1768 | {} | |
1769 | ||
1770 | /* opt_pass methods: */ | |
1771 | virtual bool gate (function *) | |
1772 | { | |
1773 | /* We no longer require either sincos or cexp, since powi expansion | |
1774 | piggybacks on this pass. */ | |
1775 | return optimize; | |
1776 | } | |
1777 | ||
1778 | virtual unsigned int execute (function *); | |
1779 | ||
1780 | }; // class pass_cse_sincos | |
1781 | ||
1782 | unsigned int | |
1783 | pass_cse_sincos::execute (function *fun) | |
a0315874 | 1784 | { |
1785 | basic_block bb; | |
4c80086d | 1786 | bool cfg_changed = false; |
a0315874 | 1787 | |
1788 | calculate_dominance_info (CDI_DOMINATORS); | |
30c4e60d | 1789 | memset (&sincos_stats, 0, sizeof (sincos_stats)); |
a0315874 | 1790 | |
65b0537f | 1791 | FOR_EACH_BB_FN (bb, fun) |
a0315874 | 1792 | { |
75a70cf9 | 1793 | gimple_stmt_iterator gsi; |
2a155cf0 | 1794 | bool cleanup_eh = false; |
a0315874 | 1795 | |
75a70cf9 | 1796 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
a0315874 | 1797 | { |
42acab1c | 1798 | gimple *stmt = gsi_stmt (gsi); |
a0315874 | 1799 | |
2a155cf0 | 1800 | /* Only the last stmt in a bb could throw, no need to call |
1801 | gimple_purge_dead_eh_edges if we change something in the middle | |
1802 | of a basic block. */ | |
1803 | cleanup_eh = false; | |
1804 | ||
fa0793ad | 1805 | if (is_gimple_call (stmt) |
5e8b972c | 1806 | && gimple_call_lhs (stmt)) |
a0315874 | 1807 | { |
e9a6c4bc | 1808 | tree arg, arg0, arg1, result; |
1809 | HOST_WIDE_INT n; | |
1810 | location_t loc; | |
a0315874 | 1811 | |
fa0793ad | 1812 | switch (gimple_call_combined_fn (stmt)) |
a0315874 | 1813 | { |
fa0793ad | 1814 | CASE_CFN_COS: |
1815 | CASE_CFN_SIN: | |
1816 | CASE_CFN_CEXPI: | |
d312d7df | 1817 | /* Make sure we have either sincos or cexp. */ |
30f690e0 | 1818 | if (!targetm.libc_has_function (function_c99_math_complex) |
1819 | && !targetm.libc_has_function (function_sincos)) | |
d312d7df | 1820 | break; |
1821 | ||
75a70cf9 | 1822 | arg = gimple_call_arg (stmt, 0); |
a0315874 | 1823 | if (TREE_CODE (arg) == SSA_NAME) |
4c80086d | 1824 | cfg_changed |= execute_cse_sincos_1 (arg); |
a0315874 | 1825 | break; |
1826 | ||
fa0793ad | 1827 | CASE_CFN_POW: |
e78306af | 1828 | arg0 = gimple_call_arg (stmt, 0); |
1829 | arg1 = gimple_call_arg (stmt, 1); | |
1830 | ||
1831 | loc = gimple_location (stmt); | |
1832 | result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1); | |
1833 | ||
1834 | if (result) | |
1835 | { | |
1836 | tree lhs = gimple_get_lhs (stmt); | |
1a91d914 | 1837 | gassign *new_stmt = gimple_build_assign (lhs, result); |
e78306af | 1838 | gimple_set_location (new_stmt, loc); |
1839 | unlink_stmt_vdef (stmt); | |
1840 | gsi_replace (&gsi, new_stmt, true); | |
2a155cf0 | 1841 | cleanup_eh = true; |
bc8a8451 | 1842 | if (gimple_vdef (stmt)) |
1843 | release_ssa_name (gimple_vdef (stmt)); | |
e78306af | 1844 | } |
1845 | break; | |
1846 | ||
fa0793ad | 1847 | CASE_CFN_POWI: |
e9a6c4bc | 1848 | arg0 = gimple_call_arg (stmt, 0); |
1849 | arg1 = gimple_call_arg (stmt, 1); | |
e9a6c4bc | 1850 | loc = gimple_location (stmt); |
377db285 | 1851 | |
6dfe7d53 | 1852 | if (real_minus_onep (arg0)) |
377db285 | 1853 | { |
1854 | tree t0, t1, cond, one, minus_one; | |
1a91d914 | 1855 | gassign *stmt; |
377db285 | 1856 | |
1857 | t0 = TREE_TYPE (arg0); | |
1858 | t1 = TREE_TYPE (arg1); | |
1859 | one = build_real (t0, dconst1); | |
1860 | minus_one = build_real (t0, dconstm1); | |
1861 | ||
1862 | cond = make_temp_ssa_name (t1, NULL, "powi_cond"); | |
e9cf809e | 1863 | stmt = gimple_build_assign (cond, BIT_AND_EXPR, |
1864 | arg1, build_int_cst (t1, 1)); | |
377db285 | 1865 | gimple_set_location (stmt, loc); |
1866 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
1867 | ||
1868 | result = make_temp_ssa_name (t0, NULL, "powi"); | |
e9cf809e | 1869 | stmt = gimple_build_assign (result, COND_EXPR, cond, |
1870 | minus_one, one); | |
377db285 | 1871 | gimple_set_location (stmt, loc); |
1872 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
1873 | } | |
1874 | else | |
1875 | { | |
e913b5cd | 1876 | if (!tree_fits_shwi_p (arg1)) |
d48be958 | 1877 | break; |
1878 | ||
e913b5cd | 1879 | n = tree_to_shwi (arg1); |
377db285 | 1880 | result = gimple_expand_builtin_powi (&gsi, loc, arg0, n); |
1881 | } | |
e9a6c4bc | 1882 | |
1883 | if (result) | |
1884 | { | |
1885 | tree lhs = gimple_get_lhs (stmt); | |
1a91d914 | 1886 | gassign *new_stmt = gimple_build_assign (lhs, result); |
e9a6c4bc | 1887 | gimple_set_location (new_stmt, loc); |
a5c384c1 | 1888 | unlink_stmt_vdef (stmt); |
1889 | gsi_replace (&gsi, new_stmt, true); | |
2a155cf0 | 1890 | cleanup_eh = true; |
bc8a8451 | 1891 | if (gimple_vdef (stmt)) |
1892 | release_ssa_name (gimple_vdef (stmt)); | |
a5c384c1 | 1893 | } |
1894 | break; | |
1895 | ||
fa0793ad | 1896 | CASE_CFN_CABS: |
a5c384c1 | 1897 | arg0 = gimple_call_arg (stmt, 0); |
1898 | loc = gimple_location (stmt); | |
1899 | result = gimple_expand_builtin_cabs (&gsi, loc, arg0); | |
1900 | ||
1901 | if (result) | |
1902 | { | |
1903 | tree lhs = gimple_get_lhs (stmt); | |
1a91d914 | 1904 | gassign *new_stmt = gimple_build_assign (lhs, result); |
a5c384c1 | 1905 | gimple_set_location (new_stmt, loc); |
e9a6c4bc | 1906 | unlink_stmt_vdef (stmt); |
1907 | gsi_replace (&gsi, new_stmt, true); | |
2a155cf0 | 1908 | cleanup_eh = true; |
bc8a8451 | 1909 | if (gimple_vdef (stmt)) |
1910 | release_ssa_name (gimple_vdef (stmt)); | |
e9a6c4bc | 1911 | } |
1912 | break; | |
1913 | ||
a0315874 | 1914 | default:; |
1915 | } | |
1916 | } | |
1917 | } | |
2a155cf0 | 1918 | if (cleanup_eh) |
1919 | cfg_changed |= gimple_purge_dead_eh_edges (bb); | |
a0315874 | 1920 | } |
1921 | ||
65b0537f | 1922 | statistics_counter_event (fun, "sincos statements inserted", |
30c4e60d | 1923 | sincos_stats.inserted); |
1924 | ||
4c80086d | 1925 | return cfg_changed ? TODO_cleanup_cfg : 0; |
a0315874 | 1926 | } |
1927 | ||
cbe8bda8 | 1928 | } // anon namespace |
1929 | ||
1930 | gimple_opt_pass * | |
1931 | make_pass_cse_sincos (gcc::context *ctxt) | |
1932 | { | |
1933 | return new pass_cse_sincos (ctxt); | |
1934 | } | |
1935 | ||
ee0c17ad | 1936 | /* A symbolic number structure is used to detect byte permutation and selection |
1937 | patterns of a source. To achieve that, its field N contains an artificial | |
1938 | number consisting of BITS_PER_MARKER sized markers tracking where does each | |
1939 | byte come from in the source: | |
84cc784c | 1940 | |
ee0c17ad | 1941 | 0 - target byte has the value 0 |
1942 | FF - target byte has an unknown value (eg. due to sign extension) | |
1943 | 1..size - marker value is the byte index in the source (0 for lsb). | |
470d5bb5 | 1944 | |
1945 | To detect permutations on memory sources (arrays and structures), a symbolic | |
ee0c17ad | 1946 | number is also associated: |
1947 | - a base address BASE_ADDR and an OFFSET giving the address of the source; | |
1948 | - a range which gives the difference between the highest and lowest accessed | |
1949 | memory location to make such a symbolic number; | |
1950 | - the address SRC of the source element of lowest address as a convenience | |
e20b054d | 1951 | to easily get BASE_ADDR + offset + lowest bytepos; |
1952 | - number of expressions N_OPS bitwise ored together to represent | |
1953 | approximate cost of the computation. | |
ee0c17ad | 1954 | |
1955 | Note 1: the range is different from size as size reflects the size of the | |
1956 | type of the current expression. For instance, for an array char a[], | |
1957 | (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while | |
1958 | (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this | |
1959 | time a range of 1. | |
1960 | ||
1961 | Note 2: for non-memory sources, range holds the same value as size. | |
1962 | ||
1963 | Note 3: SRC points to the SSA_NAME in case of non-memory source. */ | |
84cc784c | 1964 | |
1965 | struct symbolic_number { | |
3a4303e7 | 1966 | uint64_t n; |
7101c0cf | 1967 | tree type; |
470d5bb5 | 1968 | tree base_addr; |
1969 | tree offset; | |
1970 | HOST_WIDE_INT bytepos; | |
a9eff296 | 1971 | tree src; |
470d5bb5 | 1972 | tree alias_set; |
1973 | tree vuse; | |
1974 | unsigned HOST_WIDE_INT range; | |
e20b054d | 1975 | int n_ops; |
84cc784c | 1976 | }; |
1977 | ||
cc76206c | 1978 | #define BITS_PER_MARKER 8 |
14cbfffb | 1979 | #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1) |
1980 | #define MARKER_BYTE_UNKNOWN MARKER_MASK | |
1981 | #define HEAD_MARKER(n, size) \ | |
1982 | ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER))) | |
cc76206c | 1983 | |
470d5bb5 | 1984 | /* The number which the find_bswap_or_nop_1 result should match in |
1985 | order to have a nop. The number is masked according to the size of | |
1986 | the symbolic number before using it. */ | |
3a4303e7 | 1987 | #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \ |
1988 | (uint64_t)0x08070605 << 32 | 0x04030201) | |
470d5bb5 | 1989 | |
1990 | /* The number which the find_bswap_or_nop_1 result should match in | |
1991 | order to have a byte swap. The number is masked according to the | |
1992 | size of the symbolic number before using it. */ | |
3a4303e7 | 1993 | #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \ |
1994 | (uint64_t)0x01020304 << 32 | 0x05060708) | |
470d5bb5 | 1995 | |
84cc784c | 1996 | /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic |
1997 | number N. Return false if the requested operation is not permitted | |
1998 | on a symbolic number. */ | |
1999 | ||
2000 | static inline bool | |
2001 | do_shift_rotate (enum tree_code code, | |
2002 | struct symbolic_number *n, | |
2003 | int count) | |
2004 | { | |
14cbfffb | 2005 | int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; |
2006 | unsigned head_marker; | |
7101c0cf | 2007 | |
cc76206c | 2008 | if (count % BITS_PER_UNIT != 0) |
84cc784c | 2009 | return false; |
cc76206c | 2010 | count = (count / BITS_PER_UNIT) * BITS_PER_MARKER; |
84cc784c | 2011 | |
2012 | /* Zero out the extra bits of N in order to avoid them being shifted | |
2013 | into the significant bits. */ | |
cc76206c | 2014 | if (size < 64 / BITS_PER_MARKER) |
2015 | n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; | |
84cc784c | 2016 | |
2017 | switch (code) | |
2018 | { | |
2019 | case LSHIFT_EXPR: | |
2020 | n->n <<= count; | |
2021 | break; | |
2022 | case RSHIFT_EXPR: | |
14cbfffb | 2023 | head_marker = HEAD_MARKER (n->n, size); |
84cc784c | 2024 | n->n >>= count; |
14cbfffb | 2025 | /* Arithmetic shift of signed type: result is dependent on the value. */ |
2026 | if (!TYPE_UNSIGNED (n->type) && head_marker) | |
2027 | for (i = 0; i < count / BITS_PER_MARKER; i++) | |
2028 | n->n |= (uint64_t) MARKER_BYTE_UNKNOWN | |
2029 | << ((size - 1 - i) * BITS_PER_MARKER); | |
84cc784c | 2030 | break; |
2031 | case LROTATE_EXPR: | |
cc76206c | 2032 | n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count)); |
84cc784c | 2033 | break; |
2034 | case RROTATE_EXPR: | |
cc76206c | 2035 | n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count)); |
84cc784c | 2036 | break; |
2037 | default: | |
2038 | return false; | |
2039 | } | |
0f09ed00 | 2040 | /* Zero unused bits for size. */ |
cc76206c | 2041 | if (size < 64 / BITS_PER_MARKER) |
2042 | n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; | |
84cc784c | 2043 | return true; |
2044 | } | |
2045 | ||
2046 | /* Perform sanity checking for the symbolic number N and the gimple | |
2047 | statement STMT. */ | |
2048 | ||
2049 | static inline bool | |
42acab1c | 2050 | verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt) |
84cc784c | 2051 | { |
2052 | tree lhs_type; | |
2053 | ||
2054 | lhs_type = gimple_expr_type (stmt); | |
2055 | ||
2056 | if (TREE_CODE (lhs_type) != INTEGER_TYPE) | |
2057 | return false; | |
2058 | ||
7101c0cf | 2059 | if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type)) |
84cc784c | 2060 | return false; |
2061 | ||
2062 | return true; | |
2063 | } | |
2064 | ||
ee054e83 | 2065 | /* Initialize the symbolic number N for the bswap pass from the base element |
2066 | SRC manipulated by the bitwise OR expression. */ | |
2067 | ||
2068 | static bool | |
2069 | init_symbolic_number (struct symbolic_number *n, tree src) | |
2070 | { | |
7101c0cf | 2071 | int size; |
2072 | ||
23edc6d1 | 2073 | if (! INTEGRAL_TYPE_P (TREE_TYPE (src))) |
2074 | return false; | |
2075 | ||
ee054e83 | 2076 | n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE; |
a9eff296 | 2077 | n->src = src; |
ee054e83 | 2078 | |
2079 | /* Set up the symbolic number N by setting each byte to a value between 1 and | |
2080 | the byte size of rhs1. The highest order byte is set to n->size and the | |
2081 | lowest order byte to 1. */ | |
7101c0cf | 2082 | n->type = TREE_TYPE (src); |
2083 | size = TYPE_PRECISION (n->type); | |
2084 | if (size % BITS_PER_UNIT != 0) | |
ee054e83 | 2085 | return false; |
7101c0cf | 2086 | size /= BITS_PER_UNIT; |
cc76206c | 2087 | if (size > 64 / BITS_PER_MARKER) |
c0dd3a90 | 2088 | return false; |
7101c0cf | 2089 | n->range = size; |
ee054e83 | 2090 | n->n = CMPNOP; |
e20b054d | 2091 | n->n_ops = 1; |
ee054e83 | 2092 | |
cc76206c | 2093 | if (size < 64 / BITS_PER_MARKER) |
2094 | n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; | |
ee054e83 | 2095 | |
2096 | return true; | |
2097 | } | |
2098 | ||
470d5bb5 | 2099 | /* Check if STMT might be a byte swap or a nop from a memory source and returns |
2100 | the answer. If so, REF is that memory source and the base of the memory area | |
2101 | accessed and the offset of the access from that base are recorded in N. */ | |
2102 | ||
2103 | bool | |
42acab1c | 2104 | find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n) |
470d5bb5 | 2105 | { |
2106 | /* Leaf node is an array or component ref. Memorize its base and | |
2107 | offset from base to compare to other such leaf node. */ | |
2108 | HOST_WIDE_INT bitsize, bitpos; | |
3754d046 | 2109 | machine_mode mode; |
292237f3 | 2110 | int unsignedp, reversep, volatilep; |
ee054e83 | 2111 | tree offset, base_addr; |
470d5bb5 | 2112 | |
1e6c1ce6 | 2113 | /* Not prepared to handle PDP endian. */ |
2114 | if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) | |
2115 | return false; | |
2116 | ||
470d5bb5 | 2117 | if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt)) |
2118 | return false; | |
2119 | ||
ee054e83 | 2120 | base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode, |
b3b6e4b5 | 2121 | &unsignedp, &reversep, &volatilep); |
470d5bb5 | 2122 | |
ee054e83 | 2123 | if (TREE_CODE (base_addr) == MEM_REF) |
470d5bb5 | 2124 | { |
2125 | offset_int bit_offset = 0; | |
ee054e83 | 2126 | tree off = TREE_OPERAND (base_addr, 1); |
470d5bb5 | 2127 | |
2128 | if (!integer_zerop (off)) | |
2129 | { | |
ee054e83 | 2130 | offset_int boff, coff = mem_ref_offset (base_addr); |
9fdc1ed4 | 2131 | boff = coff << LOG2_BITS_PER_UNIT; |
470d5bb5 | 2132 | bit_offset += boff; |
2133 | } | |
2134 | ||
ee054e83 | 2135 | base_addr = TREE_OPERAND (base_addr, 0); |
470d5bb5 | 2136 | |
2137 | /* Avoid returning a negative bitpos as this may wreak havoc later. */ | |
2138 | if (wi::neg_p (bit_offset)) | |
2139 | { | |
2140 | offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false); | |
2141 | offset_int tem = bit_offset.and_not (mask); | |
2142 | /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf. | |
2143 | Subtract it to BIT_OFFSET and add it (scaled) to OFFSET. */ | |
2144 | bit_offset -= tem; | |
9fdc1ed4 | 2145 | tem >>= LOG2_BITS_PER_UNIT; |
ee054e83 | 2146 | if (offset) |
2147 | offset = size_binop (PLUS_EXPR, offset, | |
470d5bb5 | 2148 | wide_int_to_tree (sizetype, tem)); |
2149 | else | |
ee054e83 | 2150 | offset = wide_int_to_tree (sizetype, tem); |
470d5bb5 | 2151 | } |
2152 | ||
2153 | bitpos += bit_offset.to_shwi (); | |
2154 | } | |
2155 | ||
2156 | if (bitpos % BITS_PER_UNIT) | |
2157 | return false; | |
2158 | if (bitsize % BITS_PER_UNIT) | |
2159 | return false; | |
292237f3 | 2160 | if (reversep) |
2161 | return false; | |
470d5bb5 | 2162 | |
935abe57 | 2163 | if (!init_symbolic_number (n, ref)) |
2164 | return false; | |
ee054e83 | 2165 | n->base_addr = base_addr; |
2166 | n->offset = offset; | |
470d5bb5 | 2167 | n->bytepos = bitpos / BITS_PER_UNIT; |
2168 | n->alias_set = reference_alias_ptr_type (ref); | |
2169 | n->vuse = gimple_vuse (stmt); | |
2170 | return true; | |
2171 | } | |
2172 | ||
f7a40894 | 2173 | /* Compute the symbolic number N representing the result of a bitwise OR on 2 |
2174 | symbolic number N1 and N2 whose source statements are respectively | |
2175 | SOURCE_STMT1 and SOURCE_STMT2. */ | |
2176 | ||
42acab1c | 2177 | static gimple * |
2178 | perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1, | |
2179 | gimple *source_stmt2, struct symbolic_number *n2, | |
f7a40894 | 2180 | struct symbolic_number *n) |
2181 | { | |
2182 | int i, size; | |
2183 | uint64_t mask; | |
42acab1c | 2184 | gimple *source_stmt; |
f7a40894 | 2185 | struct symbolic_number *n_start; |
2186 | ||
42333dd9 | 2187 | tree rhs1 = gimple_assign_rhs1 (source_stmt1); |
123c61e0 | 2188 | if (TREE_CODE (rhs1) == BIT_FIELD_REF |
2189 | && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME) | |
42333dd9 | 2190 | rhs1 = TREE_OPERAND (rhs1, 0); |
2191 | tree rhs2 = gimple_assign_rhs1 (source_stmt2); | |
123c61e0 | 2192 | if (TREE_CODE (rhs2) == BIT_FIELD_REF |
2193 | && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME) | |
42333dd9 | 2194 | rhs2 = TREE_OPERAND (rhs2, 0); |
2195 | ||
f7a40894 | 2196 | /* Sources are different, cancel bswap if they are not memory location with |
2197 | the same base (array, structure, ...). */ | |
42333dd9 | 2198 | if (rhs1 != rhs2) |
f7a40894 | 2199 | { |
ddc9c989 | 2200 | uint64_t inc; |
f7a40894 | 2201 | HOST_WIDE_INT start_sub, end_sub, end1, end2, end; |
2202 | struct symbolic_number *toinc_n_ptr, *n_end; | |
a9eff296 | 2203 | basic_block bb1, bb2; |
f7a40894 | 2204 | |
2205 | if (!n1->base_addr || !n2->base_addr | |
2206 | || !operand_equal_p (n1->base_addr, n2->base_addr, 0)) | |
2207 | return NULL; | |
2208 | ||
1e6c1ce6 | 2209 | if (!n1->offset != !n2->offset |
2210 | || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0))) | |
f7a40894 | 2211 | return NULL; |
2212 | ||
2213 | if (n1->bytepos < n2->bytepos) | |
2214 | { | |
2215 | n_start = n1; | |
2216 | start_sub = n2->bytepos - n1->bytepos; | |
f7a40894 | 2217 | } |
2218 | else | |
2219 | { | |
2220 | n_start = n2; | |
2221 | start_sub = n1->bytepos - n2->bytepos; | |
f7a40894 | 2222 | } |
2223 | ||
a9eff296 | 2224 | bb1 = gimple_bb (source_stmt1); |
2225 | bb2 = gimple_bb (source_stmt2); | |
2226 | if (dominated_by_p (CDI_DOMINATORS, bb1, bb2)) | |
2227 | source_stmt = source_stmt1; | |
2228 | else | |
2229 | source_stmt = source_stmt2; | |
2230 | ||
f7a40894 | 2231 | /* Find the highest address at which a load is performed and |
2232 | compute related info. */ | |
2233 | end1 = n1->bytepos + (n1->range - 1); | |
2234 | end2 = n2->bytepos + (n2->range - 1); | |
2235 | if (end1 < end2) | |
2236 | { | |
2237 | end = end2; | |
2238 | end_sub = end2 - end1; | |
2239 | } | |
2240 | else | |
2241 | { | |
2242 | end = end1; | |
2243 | end_sub = end1 - end2; | |
2244 | } | |
2245 | n_end = (end2 > end1) ? n2 : n1; | |
2246 | ||
2247 | /* Find symbolic number whose lsb is the most significant. */ | |
2248 | if (BYTES_BIG_ENDIAN) | |
2249 | toinc_n_ptr = (n_end == n1) ? n2 : n1; | |
2250 | else | |
2251 | toinc_n_ptr = (n_start == n1) ? n2 : n1; | |
2252 | ||
2253 | n->range = end - n_start->bytepos + 1; | |
2254 | ||
2255 | /* Check that the range of memory covered can be represented by | |
2256 | a symbolic number. */ | |
2257 | if (n->range > 64 / BITS_PER_MARKER) | |
2258 | return NULL; | |
2259 | ||
2260 | /* Reinterpret byte marks in symbolic number holding the value of | |
2261 | bigger weight according to target endianness. */ | |
2262 | inc = BYTES_BIG_ENDIAN ? end_sub : start_sub; | |
2263 | size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT; | |
2264 | for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER) | |
2265 | { | |
1e6c1ce6 | 2266 | unsigned marker |
2267 | = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK; | |
f7a40894 | 2268 | if (marker && marker != MARKER_BYTE_UNKNOWN) |
2269 | toinc_n_ptr->n += inc; | |
2270 | } | |
2271 | } | |
2272 | else | |
2273 | { | |
2274 | n->range = n1->range; | |
2275 | n_start = n1; | |
2276 | source_stmt = source_stmt1; | |
2277 | } | |
2278 | ||
2279 | if (!n1->alias_set | |
2280 | || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set)) | |
2281 | n->alias_set = n1->alias_set; | |
2282 | else | |
2283 | n->alias_set = ptr_type_node; | |
2284 | n->vuse = n_start->vuse; | |
2285 | n->base_addr = n_start->base_addr; | |
2286 | n->offset = n_start->offset; | |
a9eff296 | 2287 | n->src = n_start->src; |
f7a40894 | 2288 | n->bytepos = n_start->bytepos; |
2289 | n->type = n_start->type; | |
2290 | size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; | |
2291 | ||
2292 | for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER) | |
2293 | { | |
2294 | uint64_t masked1, masked2; | |
2295 | ||
2296 | masked1 = n1->n & mask; | |
2297 | masked2 = n2->n & mask; | |
2298 | if (masked1 && masked2 && masked1 != masked2) | |
2299 | return NULL; | |
2300 | } | |
2301 | n->n = n1->n | n2->n; | |
e20b054d | 2302 | n->n_ops = n1->n_ops + n2->n_ops; |
f7a40894 | 2303 | |
2304 | return source_stmt; | |
2305 | } | |
2306 | ||
470d5bb5 | 2307 | /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform |
2308 | the operation given by the rhs of STMT on the result. If the operation | |
29314253 | 2309 | could successfully be executed the function returns a gimple stmt whose |
2310 | rhs's first tree is the expression of the source operand and NULL | |
2311 | otherwise. */ | |
84cc784c | 2312 | |
42acab1c | 2313 | static gimple * |
2314 | find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit) | |
84cc784c | 2315 | { |
2316 | enum tree_code code; | |
2317 | tree rhs1, rhs2 = NULL; | |
42acab1c | 2318 | gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1; |
84cc784c | 2319 | enum gimple_rhs_class rhs_class; |
2320 | ||
2321 | if (!limit || !is_gimple_assign (stmt)) | |
29314253 | 2322 | return NULL; |
84cc784c | 2323 | |
2324 | rhs1 = gimple_assign_rhs1 (stmt); | |
2325 | ||
470d5bb5 | 2326 | if (find_bswap_or_nop_load (stmt, rhs1, n)) |
29314253 | 2327 | return stmt; |
470d5bb5 | 2328 | |
42333dd9 | 2329 | /* Handle BIT_FIELD_REF. */ |
2330 | if (TREE_CODE (rhs1) == BIT_FIELD_REF | |
2331 | && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME) | |
2332 | { | |
2333 | unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1)); | |
2334 | unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2)); | |
2335 | if (bitpos % BITS_PER_UNIT == 0 | |
2336 | && bitsize % BITS_PER_UNIT == 0 | |
2337 | && init_symbolic_number (n, TREE_OPERAND (rhs1, 0))) | |
2338 | { | |
e5c957b4 | 2339 | /* Handle big-endian bit numbering in BIT_FIELD_REF. */ |
2340 | if (BYTES_BIG_ENDIAN) | |
2341 | bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize; | |
2342 | ||
42333dd9 | 2343 | /* Shift. */ |
2344 | if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos)) | |
2345 | return NULL; | |
2346 | ||
2347 | /* Mask. */ | |
2348 | uint64_t mask = 0; | |
2349 | uint64_t tmp = (1 << BITS_PER_UNIT) - 1; | |
2350 | for (unsigned i = 0; i < bitsize / BITS_PER_UNIT; | |
2351 | i++, tmp <<= BITS_PER_UNIT) | |
2352 | mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER); | |
2353 | n->n &= mask; | |
2354 | ||
2355 | /* Convert. */ | |
2356 | n->type = TREE_TYPE (rhs1); | |
2357 | if (!n->base_addr) | |
2358 | n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT; | |
2359 | ||
2360 | return verify_symbolic_number_p (n, stmt) ? stmt : NULL; | |
2361 | } | |
2362 | ||
2363 | return NULL; | |
2364 | } | |
2365 | ||
84cc784c | 2366 | if (TREE_CODE (rhs1) != SSA_NAME) |
29314253 | 2367 | return NULL; |
84cc784c | 2368 | |
2369 | code = gimple_assign_rhs_code (stmt); | |
2370 | rhs_class = gimple_assign_rhs_class (stmt); | |
2371 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); | |
2372 | ||
2373 | if (rhs_class == GIMPLE_BINARY_RHS) | |
2374 | rhs2 = gimple_assign_rhs2 (stmt); | |
2375 | ||
2376 | /* Handle unary rhs and binary rhs with integer constants as second | |
2377 | operand. */ | |
2378 | ||
2379 | if (rhs_class == GIMPLE_UNARY_RHS | |
2380 | || (rhs_class == GIMPLE_BINARY_RHS | |
2381 | && TREE_CODE (rhs2) == INTEGER_CST)) | |
2382 | { | |
2383 | if (code != BIT_AND_EXPR | |
2384 | && code != LSHIFT_EXPR | |
2385 | && code != RSHIFT_EXPR | |
2386 | && code != LROTATE_EXPR | |
2387 | && code != RROTATE_EXPR | |
d09ef31a | 2388 | && !CONVERT_EXPR_CODE_P (code)) |
29314253 | 2389 | return NULL; |
84cc784c | 2390 | |
29314253 | 2391 | source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1); |
84cc784c | 2392 | |
470d5bb5 | 2393 | /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and |
2394 | we have to initialize the symbolic number. */ | |
29314253 | 2395 | if (!source_stmt1) |
84cc784c | 2396 | { |
ee054e83 | 2397 | if (gimple_assign_load_p (stmt) |
2398 | || !init_symbolic_number (n, rhs1)) | |
29314253 | 2399 | return NULL; |
2400 | source_stmt1 = stmt; | |
84cc784c | 2401 | } |
2402 | ||
2403 | switch (code) | |
2404 | { | |
2405 | case BIT_AND_EXPR: | |
2406 | { | |
7101c0cf | 2407 | int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; |
cc76206c | 2408 | uint64_t val = int_cst_value (rhs2), mask = 0; |
2409 | uint64_t tmp = (1 << BITS_PER_UNIT) - 1; | |
84cc784c | 2410 | |
2411 | /* Only constants masking full bytes are allowed. */ | |
cc76206c | 2412 | for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT) |
2413 | if ((val & tmp) != 0 && (val & tmp) != tmp) | |
29314253 | 2414 | return NULL; |
cc76206c | 2415 | else if (val & tmp) |
14cbfffb | 2416 | mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER); |
84cc784c | 2417 | |
cc76206c | 2418 | n->n &= mask; |
84cc784c | 2419 | } |
2420 | break; | |
2421 | case LSHIFT_EXPR: | |
2422 | case RSHIFT_EXPR: | |
2423 | case LROTATE_EXPR: | |
2424 | case RROTATE_EXPR: | |
1e6c1ce6 | 2425 | if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2))) |
29314253 | 2426 | return NULL; |
84cc784c | 2427 | break; |
2428 | CASE_CONVERT: | |
2429 | { | |
14cbfffb | 2430 | int i, type_size, old_type_size; |
7101c0cf | 2431 | tree type; |
84cc784c | 2432 | |
7101c0cf | 2433 | type = gimple_expr_type (stmt); |
2434 | type_size = TYPE_PRECISION (type); | |
84cc784c | 2435 | if (type_size % BITS_PER_UNIT != 0) |
29314253 | 2436 | return NULL; |
cc76206c | 2437 | type_size /= BITS_PER_UNIT; |
2438 | if (type_size > 64 / BITS_PER_MARKER) | |
29314253 | 2439 | return NULL; |
84cc784c | 2440 | |
7101c0cf | 2441 | /* Sign extension: result is dependent on the value. */ |
cc76206c | 2442 | old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; |
14cbfffb | 2443 | if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size |
2444 | && HEAD_MARKER (n->n, old_type_size)) | |
2445 | for (i = 0; i < type_size - old_type_size; i++) | |
a7e676a4 | 2446 | n->n |= (uint64_t) MARKER_BYTE_UNKNOWN |
409678bf | 2447 | << ((type_size - 1 - i) * BITS_PER_MARKER); |
7101c0cf | 2448 | |
cc76206c | 2449 | if (type_size < 64 / BITS_PER_MARKER) |
84cc784c | 2450 | { |
2451 | /* If STMT casts to a smaller type mask out the bits not | |
2452 | belonging to the target type. */ | |
cc76206c | 2453 | n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1; |
84cc784c | 2454 | } |
7101c0cf | 2455 | n->type = type; |
470d5bb5 | 2456 | if (!n->base_addr) |
cc76206c | 2457 | n->range = type_size; |
84cc784c | 2458 | } |
2459 | break; | |
2460 | default: | |
29314253 | 2461 | return NULL; |
84cc784c | 2462 | }; |
29314253 | 2463 | return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL; |
84cc784c | 2464 | } |
2465 | ||
2466 | /* Handle binary rhs. */ | |
2467 | ||
2468 | if (rhs_class == GIMPLE_BINARY_RHS) | |
2469 | { | |
2470 | struct symbolic_number n1, n2; | |
42acab1c | 2471 | gimple *source_stmt, *source_stmt2; |
84cc784c | 2472 | |
2473 | if (code != BIT_IOR_EXPR) | |
29314253 | 2474 | return NULL; |
84cc784c | 2475 | |
2476 | if (TREE_CODE (rhs2) != SSA_NAME) | |
29314253 | 2477 | return NULL; |
84cc784c | 2478 | |
2479 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); | |
2480 | ||
2481 | switch (code) | |
2482 | { | |
2483 | case BIT_IOR_EXPR: | |
29314253 | 2484 | source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1); |
84cc784c | 2485 | |
29314253 | 2486 | if (!source_stmt1) |
2487 | return NULL; | |
84cc784c | 2488 | |
29314253 | 2489 | source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1); |
470d5bb5 | 2490 | |
29314253 | 2491 | if (!source_stmt2) |
2492 | return NULL; | |
ee054e83 | 2493 | |
7101c0cf | 2494 | if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type)) |
29314253 | 2495 | return NULL; |
84cc784c | 2496 | |
1e6c1ce6 | 2497 | if (!n1.vuse != !n2.vuse |
2498 | || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0))) | |
29314253 | 2499 | return NULL; |
84cc784c | 2500 | |
1e6c1ce6 | 2501 | source_stmt |
2502 | = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n); | |
470d5bb5 | 2503 | |
f7a40894 | 2504 | if (!source_stmt) |
2505 | return NULL; | |
84cc784c | 2506 | |
2507 | if (!verify_symbolic_number_p (n, stmt)) | |
29314253 | 2508 | return NULL; |
84cc784c | 2509 | |
2510 | break; | |
2511 | default: | |
29314253 | 2512 | return NULL; |
84cc784c | 2513 | } |
f7a40894 | 2514 | return source_stmt; |
84cc784c | 2515 | } |
29314253 | 2516 | return NULL; |
84cc784c | 2517 | } |
2518 | ||
470d5bb5 | 2519 | /* Check if STMT completes a bswap implementation or a read in a given |
2520 | endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP | |
2521 | accordingly. It also sets N to represent the kind of operations | |
2522 | performed: size of the resulting expression and whether it works on | |
2523 | a memory source, and if so alias-set and vuse. At last, the | |
29314253 | 2524 | function returns a stmt whose rhs's first tree is the source |
2525 | expression. */ | |
84cc784c | 2526 | |
42acab1c | 2527 | static gimple * |
2528 | find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap) | |
84cc784c | 2529 | { |
77dfd3f2 | 2530 | unsigned rsize; |
2531 | uint64_t tmpn, mask; | |
2532 | /* The number which the find_bswap_or_nop_1 result should match in order | |
2533 | to have a full byte swap. The number is shifted to the right | |
2534 | according to the size of the symbolic number before using it. */ | |
3a4303e7 | 2535 | uint64_t cmpxchg = CMPXCHG; |
2536 | uint64_t cmpnop = CMPNOP; | |
470d5bb5 | 2537 | |
a9eff296 | 2538 | gimple *ins_stmt; |
0f09ed00 | 2539 | int limit; |
84cc784c | 2540 | |
9bc1852a | 2541 | /* The last parameter determines the depth search limit. It usually |
470d5bb5 | 2542 | correlates directly to the number n of bytes to be touched. We |
2543 | increase that number by log2(n) + 1 here in order to also | |
2544 | cover signed -> unsigned conversions of the src operand as can be seen | |
0f09ed00 | 2545 | in libgcc, and for initial shift/and operation of the src operand. */ |
f9ae6f95 | 2546 | limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt))); |
0f09ed00 | 2547 | limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit); |
a9eff296 | 2548 | ins_stmt = find_bswap_or_nop_1 (stmt, n, limit); |
84cc784c | 2549 | |
a9eff296 | 2550 | if (!ins_stmt) |
29314253 | 2551 | return NULL; |
84cc784c | 2552 | |
1e6c1ce6 | 2553 | /* Find real size of result (highest non-zero byte). */ |
470d5bb5 | 2554 | if (n->base_addr) |
77dfd3f2 | 2555 | for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++); |
2556 | else | |
2557 | rsize = n->range; | |
50e0cd76 | 2558 | |
77dfd3f2 | 2559 | /* Zero out the bits corresponding to untouched bytes in original gimple |
2560 | expression. */ | |
cc76206c | 2561 | if (n->range < (int) sizeof (int64_t)) |
470d5bb5 | 2562 | { |
cc76206c | 2563 | mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1; |
2564 | cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER; | |
470d5bb5 | 2565 | cmpnop &= mask; |
2566 | } | |
2567 | ||
77dfd3f2 | 2568 | /* Zero out the bits corresponding to unused bytes in the result of the |
2569 | gimple expression. */ | |
2570 | if (rsize < n->range) | |
2571 | { | |
2572 | if (BYTES_BIG_ENDIAN) | |
2573 | { | |
2574 | mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1; | |
2575 | cmpxchg &= mask; | |
2576 | cmpnop >>= (n->range - rsize) * BITS_PER_MARKER; | |
2577 | } | |
2578 | else | |
2579 | { | |
2580 | mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1; | |
2581 | cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER; | |
2582 | cmpnop &= mask; | |
2583 | } | |
2584 | n->range = rsize; | |
2585 | } | |
2586 | ||
470d5bb5 | 2587 | /* A complete byte swap should make the symbolic number to start with |
2588 | the largest digit in the highest order byte. Unchanged symbolic | |
89d0fbd4 | 2589 | number indicates a read with same endianness as target architecture. */ |
470d5bb5 | 2590 | if (n->n == cmpnop) |
2591 | *bswap = false; | |
2592 | else if (n->n == cmpxchg) | |
2593 | *bswap = true; | |
2594 | else | |
29314253 | 2595 | return NULL; |
470d5bb5 | 2596 | |
2597 | /* Useless bit manipulation performed by code. */ | |
e20b054d | 2598 | if (!n->base_addr && n->n == cmpnop && n->n_ops == 1) |
29314253 | 2599 | return NULL; |
84cc784c | 2600 | |
470d5bb5 | 2601 | n->range *= BITS_PER_UNIT; |
a9eff296 | 2602 | return ins_stmt; |
84cc784c | 2603 | } |
2604 | ||
65b0537f | 2605 | namespace { |
2606 | ||
2607 | const pass_data pass_data_optimize_bswap = | |
2608 | { | |
2609 | GIMPLE_PASS, /* type */ | |
2610 | "bswap", /* name */ | |
2611 | OPTGROUP_NONE, /* optinfo_flags */ | |
65b0537f | 2612 | TV_NONE, /* tv_id */ |
2613 | PROP_ssa, /* properties_required */ | |
2614 | 0, /* properties_provided */ | |
2615 | 0, /* properties_destroyed */ | |
2616 | 0, /* todo_flags_start */ | |
2617 | 0, /* todo_flags_finish */ | |
2618 | }; | |
2619 | ||
2620 | class pass_optimize_bswap : public gimple_opt_pass | |
2621 | { | |
2622 | public: | |
2623 | pass_optimize_bswap (gcc::context *ctxt) | |
2624 | : gimple_opt_pass (pass_data_optimize_bswap, ctxt) | |
2625 | {} | |
2626 | ||
2627 | /* opt_pass methods: */ | |
2628 | virtual bool gate (function *) | |
2629 | { | |
2630 | return flag_expensive_optimizations && optimize; | |
2631 | } | |
2632 | ||
2633 | virtual unsigned int execute (function *); | |
2634 | ||
2635 | }; // class pass_optimize_bswap | |
2636 | ||
4b2d4ce4 | 2637 | /* Perform the bswap optimization: replace the expression computed in the rhs |
2638 | of CUR_STMT by an equivalent bswap, load or load + bswap expression. | |
2639 | Which of these alternatives replace the rhs is given by N->base_addr (non | |
2640 | null if a load is needed) and BSWAP. The type, VUSE and set-alias of the | |
2641 | load to perform are also given in N while the builtin bswap invoke is given | |
2642 | in FNDEL. Finally, if a load is involved, SRC_STMT refers to one of the | |
2643 | load statements involved to construct the rhs in CUR_STMT and N->range gives | |
2644 | the size of the rhs expression for maintaining some statistics. | |
2645 | ||
2646 | Note that if the replacement involve a load, CUR_STMT is moved just after | |
2647 | SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT | |
2648 | changing of basic block. */ | |
470d5bb5 | 2649 | |
2650 | static bool | |
a9eff296 | 2651 | bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl, |
42acab1c | 2652 | tree bswap_type, tree load_type, struct symbolic_number *n, |
2653 | bool bswap) | |
470d5bb5 | 2654 | { |
4b2d4ce4 | 2655 | gimple_stmt_iterator gsi; |
29314253 | 2656 | tree src, tmp, tgt; |
42acab1c | 2657 | gimple *bswap_stmt; |
470d5bb5 | 2658 | |
4b2d4ce4 | 2659 | gsi = gsi_for_stmt (cur_stmt); |
a9eff296 | 2660 | src = n->src; |
29314253 | 2661 | tgt = gimple_assign_lhs (cur_stmt); |
470d5bb5 | 2662 | |
2663 | /* Need to load the value from memory first. */ | |
2664 | if (n->base_addr) | |
2665 | { | |
a9eff296 | 2666 | gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt); |
470d5bb5 | 2667 | tree addr_expr, addr_tmp, val_expr, val_tmp; |
2668 | tree load_offset_ptr, aligned_load_type; | |
42acab1c | 2669 | gimple *addr_stmt, *load_stmt; |
470d5bb5 | 2670 | unsigned align; |
1e6c1ce6 | 2671 | HOST_WIDE_INT load_offset = 0; |
a9eff296 | 2672 | basic_block ins_bb, cur_bb; |
2673 | ||
2674 | ins_bb = gimple_bb (ins_stmt); | |
2675 | cur_bb = gimple_bb (cur_stmt); | |
2676 | if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb)) | |
2677 | return false; | |
470d5bb5 | 2678 | |
2679 | align = get_object_alignment (src); | |
1e6c1ce6 | 2680 | |
4b2d4ce4 | 2681 | /* Move cur_stmt just before one of the load of the original |
2682 | to ensure it has the same VUSE. See PR61517 for what could | |
2683 | go wrong. */ | |
a9eff296 | 2684 | if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt)) |
32f2925a | 2685 | reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt)); |
29314253 | 2686 | gsi_move_before (&gsi, &gsi_ins); |
2687 | gsi = gsi_for_stmt (cur_stmt); | |
2688 | ||
1e6c1ce6 | 2689 | /* Compute address to load from and cast according to the size |
2690 | of the load. */ | |
470d5bb5 | 2691 | addr_expr = build_fold_addr_expr (unshare_expr (src)); |
1e6c1ce6 | 2692 | if (is_gimple_mem_ref_addr (addr_expr)) |
470d5bb5 | 2693 | addr_tmp = addr_expr; |
2694 | else | |
2695 | { | |
2696 | addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL, | |
2697 | "load_src"); | |
2698 | addr_stmt = gimple_build_assign (addr_tmp, addr_expr); | |
29314253 | 2699 | gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT); |
470d5bb5 | 2700 | } |
2701 | ||
2702 | /* Perform the load. */ | |
2703 | aligned_load_type = load_type; | |
2704 | if (align < TYPE_ALIGN (load_type)) | |
2705 | aligned_load_type = build_aligned_type (load_type, align); | |
1e6c1ce6 | 2706 | load_offset_ptr = build_int_cst (n->alias_set, load_offset); |
470d5bb5 | 2707 | val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp, |
2708 | load_offset_ptr); | |
2709 | ||
2710 | if (!bswap) | |
2711 | { | |
2712 | if (n->range == 16) | |
2713 | nop_stats.found_16bit++; | |
2714 | else if (n->range == 32) | |
2715 | nop_stats.found_32bit++; | |
2716 | else | |
2717 | { | |
2718 | gcc_assert (n->range == 64); | |
2719 | nop_stats.found_64bit++; | |
2720 | } | |
2721 | ||
2722 | /* Convert the result of load if necessary. */ | |
2723 | if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type)) | |
2724 | { | |
2725 | val_tmp = make_temp_ssa_name (aligned_load_type, NULL, | |
2726 | "load_dst"); | |
2727 | load_stmt = gimple_build_assign (val_tmp, val_expr); | |
2728 | gimple_set_vuse (load_stmt, n->vuse); | |
29314253 | 2729 | gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT); |
806413d2 | 2730 | gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp); |
470d5bb5 | 2731 | } |
2732 | else | |
29314253 | 2733 | { |
806413d2 | 2734 | gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr); |
29314253 | 2735 | gimple_set_vuse (cur_stmt, n->vuse); |
2736 | } | |
2737 | update_stmt (cur_stmt); | |
470d5bb5 | 2738 | |
2739 | if (dump_file) | |
2740 | { | |
2741 | fprintf (dump_file, | |
89d0fbd4 | 2742 | "%d bit load in target endianness found at: ", |
1e6c1ce6 | 2743 | (int) n->range); |
1ffa4346 | 2744 | print_gimple_stmt (dump_file, cur_stmt, 0); |
470d5bb5 | 2745 | } |
2746 | return true; | |
2747 | } | |
2748 | else | |
2749 | { | |
2750 | val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst"); | |
2751 | load_stmt = gimple_build_assign (val_tmp, val_expr); | |
2752 | gimple_set_vuse (load_stmt, n->vuse); | |
29314253 | 2753 | gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT); |
470d5bb5 | 2754 | } |
2755 | src = val_tmp; | |
2756 | } | |
e20b054d | 2757 | else if (!bswap) |
2758 | { | |
2759 | gimple *g; | |
2760 | if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src))) | |
2761 | { | |
2762 | if (!is_gimple_val (src)) | |
2763 | return false; | |
2764 | g = gimple_build_assign (tgt, NOP_EXPR, src); | |
2765 | } | |
2766 | else | |
2767 | g = gimple_build_assign (tgt, src); | |
2768 | if (n->range == 16) | |
2769 | nop_stats.found_16bit++; | |
2770 | else if (n->range == 32) | |
2771 | nop_stats.found_32bit++; | |
2772 | else | |
2773 | { | |
2774 | gcc_assert (n->range == 64); | |
2775 | nop_stats.found_64bit++; | |
2776 | } | |
2777 | if (dump_file) | |
2778 | { | |
2779 | fprintf (dump_file, | |
2780 | "%d bit reshuffle in target endianness found at: ", | |
2781 | (int) n->range); | |
2782 | print_gimple_stmt (dump_file, cur_stmt, 0); | |
2783 | } | |
2784 | gsi_replace (&gsi, g, true); | |
2785 | return true; | |
2786 | } | |
42333dd9 | 2787 | else if (TREE_CODE (src) == BIT_FIELD_REF) |
2788 | src = TREE_OPERAND (src, 0); | |
470d5bb5 | 2789 | |
2790 | if (n->range == 16) | |
2791 | bswap_stats.found_16bit++; | |
2792 | else if (n->range == 32) | |
2793 | bswap_stats.found_32bit++; | |
2794 | else | |
2795 | { | |
2796 | gcc_assert (n->range == 64); | |
2797 | bswap_stats.found_64bit++; | |
2798 | } | |
2799 | ||
2800 | tmp = src; | |
2801 | ||
904c73a0 | 2802 | /* Convert the src expression if necessary. */ |
2803 | if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type)) | |
2804 | { | |
42acab1c | 2805 | gimple *convert_stmt; |
904c73a0 | 2806 | |
2807 | tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc"); | |
2808 | convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src); | |
2809 | gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT); | |
2810 | } | |
2811 | ||
4b2d4ce4 | 2812 | /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values |
2813 | are considered as rotation of 2N bit values by N bits is generally not | |
904c73a0 | 2814 | equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which |
2815 | gives 0x03040102 while a bswap for that value is 0x04030201. */ | |
0b6c968a | 2816 | if (bswap && n->range == 16) |
470d5bb5 | 2817 | { |
0b6c968a | 2818 | tree count = build_int_cst (NULL, BITS_PER_UNIT); |
904c73a0 | 2819 | src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count); |
0b6c968a | 2820 | bswap_stmt = gimple_build_assign (NULL, src); |
470d5bb5 | 2821 | } |
0b6c968a | 2822 | else |
904c73a0 | 2823 | bswap_stmt = gimple_build_call (fndecl, 1, tmp); |
470d5bb5 | 2824 | |
2825 | tmp = tgt; | |
2826 | ||
2827 | /* Convert the result if necessary. */ | |
2828 | if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type)) | |
2829 | { | |
42acab1c | 2830 | gimple *convert_stmt; |
904c73a0 | 2831 | |
470d5bb5 | 2832 | tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst"); |
e9cf809e | 2833 | convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp); |
29314253 | 2834 | gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT); |
470d5bb5 | 2835 | } |
2836 | ||
0b6c968a | 2837 | gimple_set_lhs (bswap_stmt, tmp); |
470d5bb5 | 2838 | |
2839 | if (dump_file) | |
2840 | { | |
2841 | fprintf (dump_file, "%d bit bswap implementation found at: ", | |
1e6c1ce6 | 2842 | (int) n->range); |
1ffa4346 | 2843 | print_gimple_stmt (dump_file, cur_stmt, 0); |
470d5bb5 | 2844 | } |
2845 | ||
0b6c968a | 2846 | gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT); |
29314253 | 2847 | gsi_remove (&gsi, true); |
470d5bb5 | 2848 | return true; |
2849 | } | |
2850 | ||
2851 | /* Find manual byte swap implementations as well as load in a given | |
2852 | endianness. Byte swaps are turned into a bswap builtin invokation | |
2853 | while endian loads are converted to bswap builtin invokation or | |
89d0fbd4 | 2854 | simple load according to the target endianness. */ |
470d5bb5 | 2855 | |
65b0537f | 2856 | unsigned int |
2857 | pass_optimize_bswap::execute (function *fun) | |
84cc784c | 2858 | { |
2859 | basic_block bb; | |
bbb9b536 | 2860 | bool bswap32_p, bswap64_p; |
84cc784c | 2861 | bool changed = false; |
bbb9b536 | 2862 | tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE; |
84cc784c | 2863 | |
2864 | if (BITS_PER_UNIT != 8) | |
2865 | return 0; | |
2866 | ||
b9a16870 | 2867 | bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32) |
d6bf3b14 | 2868 | && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing); |
b9a16870 | 2869 | bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64) |
d6bf3b14 | 2870 | && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing |
3328b1fb | 2871 | || (bswap32_p && word_mode == SImode))); |
84cc784c | 2872 | |
0af25806 | 2873 | /* Determine the argument type of the builtins. The code later on |
2874 | assumes that the return and argument type are the same. */ | |
2875 | if (bswap32_p) | |
2876 | { | |
b9a16870 | 2877 | tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32); |
0af25806 | 2878 | bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); |
2879 | } | |
2880 | ||
2881 | if (bswap64_p) | |
2882 | { | |
b9a16870 | 2883 | tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64); |
0af25806 | 2884 | bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); |
2885 | } | |
2886 | ||
470d5bb5 | 2887 | memset (&nop_stats, 0, sizeof (nop_stats)); |
30c4e60d | 2888 | memset (&bswap_stats, 0, sizeof (bswap_stats)); |
a9eff296 | 2889 | calculate_dominance_info (CDI_DOMINATORS); |
30c4e60d | 2890 | |
65b0537f | 2891 | FOR_EACH_BB_FN (bb, fun) |
84cc784c | 2892 | { |
2893 | gimple_stmt_iterator gsi; | |
2894 | ||
0ec31268 | 2895 | /* We do a reverse scan for bswap patterns to make sure we get the |
4b2d4ce4 | 2896 | widest match. As bswap pattern matching doesn't handle previously |
2897 | inserted smaller bswap replacements as sub-patterns, the wider | |
2898 | variant wouldn't be detected. */ | |
2899 | for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);) | |
84cc784c | 2900 | { |
a9eff296 | 2901 | gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi); |
29314253 | 2902 | tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type; |
0b6c968a | 2903 | enum tree_code code; |
470d5bb5 | 2904 | struct symbolic_number n; |
2905 | bool bswap; | |
84cc784c | 2906 | |
4b2d4ce4 | 2907 | /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt |
2908 | might be moved to a different basic block by bswap_replace and gsi | |
2909 | must not points to it if that's the case. Moving the gsi_prev | |
2910 | there make sure that gsi points to the statement previous to | |
2911 | cur_stmt while still making sure that all statements are | |
2912 | considered in this basic block. */ | |
2913 | gsi_prev (&gsi); | |
2914 | ||
0b6c968a | 2915 | if (!is_gimple_assign (cur_stmt)) |
84cc784c | 2916 | continue; |
2917 | ||
0b6c968a | 2918 | code = gimple_assign_rhs_code (cur_stmt); |
2919 | switch (code) | |
2920 | { | |
2921 | case LROTATE_EXPR: | |
2922 | case RROTATE_EXPR: | |
2923 | if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt)) | |
2924 | || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt)) | |
2925 | % BITS_PER_UNIT) | |
2926 | continue; | |
2927 | /* Fall through. */ | |
2928 | case BIT_IOR_EXPR: | |
2929 | break; | |
2930 | default: | |
2931 | continue; | |
2932 | } | |
2933 | ||
a9eff296 | 2934 | ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap); |
470d5bb5 | 2935 | |
a9eff296 | 2936 | if (!ins_stmt) |
470d5bb5 | 2937 | continue; |
84cc784c | 2938 | |
470d5bb5 | 2939 | switch (n.range) |
84cc784c | 2940 | { |
f811051b | 2941 | case 16: |
4b2d4ce4 | 2942 | /* Already in canonical form, nothing to do. */ |
2943 | if (code == LROTATE_EXPR || code == RROTATE_EXPR) | |
2944 | continue; | |
904c73a0 | 2945 | load_type = bswap_type = uint16_type_node; |
f811051b | 2946 | break; |
84cc784c | 2947 | case 32: |
470d5bb5 | 2948 | load_type = uint32_type_node; |
84cc784c | 2949 | if (bswap32_p) |
0af25806 | 2950 | { |
b9a16870 | 2951 | fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32); |
0af25806 | 2952 | bswap_type = bswap32_type; |
2953 | } | |
84cc784c | 2954 | break; |
2955 | case 64: | |
470d5bb5 | 2956 | load_type = uint64_type_node; |
84cc784c | 2957 | if (bswap64_p) |
0af25806 | 2958 | { |
b9a16870 | 2959 | fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64); |
0af25806 | 2960 | bswap_type = bswap64_type; |
2961 | } | |
84cc784c | 2962 | break; |
2963 | default: | |
2964 | continue; | |
2965 | } | |
2966 | ||
bbb9b536 | 2967 | if (bswap && !fndecl && n.range != 16) |
84cc784c | 2968 | continue; |
2969 | ||
a9eff296 | 2970 | if (bswap_replace (cur_stmt, ins_stmt, fndecl, bswap_type, load_type, |
4b2d4ce4 | 2971 | &n, bswap)) |
470d5bb5 | 2972 | changed = true; |
84cc784c | 2973 | } |
2974 | } | |
2975 | ||
470d5bb5 | 2976 | statistics_counter_event (fun, "16-bit nop implementations found", |
2977 | nop_stats.found_16bit); | |
2978 | statistics_counter_event (fun, "32-bit nop implementations found", | |
2979 | nop_stats.found_32bit); | |
2980 | statistics_counter_event (fun, "64-bit nop implementations found", | |
2981 | nop_stats.found_64bit); | |
65b0537f | 2982 | statistics_counter_event (fun, "16-bit bswap implementations found", |
f811051b | 2983 | bswap_stats.found_16bit); |
65b0537f | 2984 | statistics_counter_event (fun, "32-bit bswap implementations found", |
30c4e60d | 2985 | bswap_stats.found_32bit); |
65b0537f | 2986 | statistics_counter_event (fun, "64-bit bswap implementations found", |
30c4e60d | 2987 | bswap_stats.found_64bit); |
2988 | ||
8b88439e | 2989 | return (changed ? TODO_update_ssa : 0); |
84cc784c | 2990 | } |
2991 | ||
cbe8bda8 | 2992 | } // anon namespace |
2993 | ||
2994 | gimple_opt_pass * | |
2995 | make_pass_optimize_bswap (gcc::context *ctxt) | |
2996 | { | |
2997 | return new pass_optimize_bswap (ctxt); | |
2998 | } | |
2999 | ||
71dbd910 | 3000 | /* Return true if stmt is a type conversion operation that can be stripped |
3001 | when used in a widening multiply operation. */ | |
3002 | static bool | |
42acab1c | 3003 | widening_mult_conversion_strippable_p (tree result_type, gimple *stmt) |
71dbd910 | 3004 | { |
3005 | enum tree_code rhs_code = gimple_assign_rhs_code (stmt); | |
3006 | ||
3007 | if (TREE_CODE (result_type) == INTEGER_TYPE) | |
3008 | { | |
3009 | tree op_type; | |
3010 | tree inner_op_type; | |
3011 | ||
3012 | if (!CONVERT_EXPR_CODE_P (rhs_code)) | |
3013 | return false; | |
3014 | ||
3015 | op_type = TREE_TYPE (gimple_assign_lhs (stmt)); | |
3016 | ||
3017 | /* If the type of OP has the same precision as the result, then | |
3018 | we can strip this conversion. The multiply operation will be | |
3019 | selected to create the correct extension as a by-product. */ | |
3020 | if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type)) | |
3021 | return true; | |
3022 | ||
3023 | /* We can also strip a conversion if it preserves the signed-ness of | |
3024 | the operation and doesn't narrow the range. */ | |
3025 | inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); | |
3026 | ||
8f9d1531 | 3027 | /* If the inner-most type is unsigned, then we can strip any |
3028 | intermediate widening operation. If it's signed, then the | |
3029 | intermediate widening operation must also be signed. */ | |
3030 | if ((TYPE_UNSIGNED (inner_op_type) | |
3031 | || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type)) | |
71dbd910 | 3032 | && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type)) |
3033 | return true; | |
3034 | ||
3035 | return false; | |
3036 | } | |
3037 | ||
3038 | return rhs_code == FIXED_CONVERT_EXPR; | |
3039 | } | |
3040 | ||
0989f516 | 3041 | /* Return true if RHS is a suitable operand for a widening multiplication, |
3042 | assuming a target type of TYPE. | |
7e4c867e | 3043 | There are two cases: |
3044 | ||
aff5fb4d | 3045 | - RHS makes some value at least twice as wide. Store that value |
3046 | in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT. | |
7e4c867e | 3047 | |
3048 | - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so, | |
3049 | but leave *TYPE_OUT untouched. */ | |
00f4f705 | 3050 | |
3051 | static bool | |
0989f516 | 3052 | is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out, |
3053 | tree *new_rhs_out) | |
7e4c867e | 3054 | { |
42acab1c | 3055 | gimple *stmt; |
0989f516 | 3056 | tree type1, rhs1; |
7e4c867e | 3057 | |
3058 | if (TREE_CODE (rhs) == SSA_NAME) | |
3059 | { | |
7e4c867e | 3060 | stmt = SSA_NAME_DEF_STMT (rhs); |
0989f516 | 3061 | if (is_gimple_assign (stmt)) |
3062 | { | |
71dbd910 | 3063 | if (! widening_mult_conversion_strippable_p (type, stmt)) |
0989f516 | 3064 | rhs1 = rhs; |
3065 | else | |
ffebd9c5 | 3066 | { |
3067 | rhs1 = gimple_assign_rhs1 (stmt); | |
3068 | ||
3069 | if (TREE_CODE (rhs1) == INTEGER_CST) | |
3070 | { | |
3071 | *new_rhs_out = rhs1; | |
3072 | *type_out = NULL; | |
3073 | return true; | |
3074 | } | |
3075 | } | |
0989f516 | 3076 | } |
3077 | else | |
3078 | rhs1 = rhs; | |
7e4c867e | 3079 | |
7e4c867e | 3080 | type1 = TREE_TYPE (rhs1); |
0989f516 | 3081 | |
7e4c867e | 3082 | if (TREE_CODE (type1) != TREE_CODE (type) |
aff5fb4d | 3083 | || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type)) |
7e4c867e | 3084 | return false; |
3085 | ||
3086 | *new_rhs_out = rhs1; | |
3087 | *type_out = type1; | |
3088 | return true; | |
3089 | } | |
3090 | ||
3091 | if (TREE_CODE (rhs) == INTEGER_CST) | |
3092 | { | |
3093 | *new_rhs_out = rhs; | |
3094 | *type_out = NULL; | |
3095 | return true; | |
3096 | } | |
3097 | ||
3098 | return false; | |
3099 | } | |
3100 | ||
0989f516 | 3101 | /* Return true if STMT performs a widening multiplication, assuming the |
3102 | output type is TYPE. If so, store the unwidened types of the operands | |
3103 | in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and | |
3104 | *RHS2_OUT such that converting those operands to types *TYPE1_OUT | |
3105 | and *TYPE2_OUT would give the operands of the multiplication. */ | |
7e4c867e | 3106 | |
3107 | static bool | |
42acab1c | 3108 | is_widening_mult_p (gimple *stmt, |
7e4c867e | 3109 | tree *type1_out, tree *rhs1_out, |
3110 | tree *type2_out, tree *rhs2_out) | |
00f4f705 | 3111 | { |
4333b41f | 3112 | tree type = TREE_TYPE (gimple_assign_lhs (stmt)); |
3113 | ||
7e4c867e | 3114 | if (TREE_CODE (type) != INTEGER_TYPE |
3115 | && TREE_CODE (type) != FIXED_POINT_TYPE) | |
3116 | return false; | |
00f4f705 | 3117 | |
0989f516 | 3118 | if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out, |
3119 | rhs1_out)) | |
00f4f705 | 3120 | return false; |
3121 | ||
0989f516 | 3122 | if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out, |
3123 | rhs2_out)) | |
7e4c867e | 3124 | return false; |
00f4f705 | 3125 | |
7e4c867e | 3126 | if (*type1_out == NULL) |
00f4f705 | 3127 | { |
7e4c867e | 3128 | if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out)) |
00f4f705 | 3129 | return false; |
7e4c867e | 3130 | *type1_out = *type2_out; |
00f4f705 | 3131 | } |
00f4f705 | 3132 | |
7e4c867e | 3133 | if (*type2_out == NULL) |
00f4f705 | 3134 | { |
7e4c867e | 3135 | if (!int_fits_type_p (*rhs2_out, *type1_out)) |
00f4f705 | 3136 | return false; |
7e4c867e | 3137 | *type2_out = *type1_out; |
00f4f705 | 3138 | } |
00f4f705 | 3139 | |
287c271c | 3140 | /* Ensure that the larger of the two operands comes first. */ |
3141 | if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out)) | |
3142 | { | |
dfcf26a5 | 3143 | std::swap (*type1_out, *type2_out); |
3144 | std::swap (*rhs1_out, *rhs2_out); | |
287c271c | 3145 | } |
aff5fb4d | 3146 | |
7e4c867e | 3147 | return true; |
3148 | } | |
00f4f705 | 3149 | |
eb728046 | 3150 | /* Check to see if the CALL statement is an invocation of copysign |
3151 | with 1. being the first argument. */ | |
3152 | static bool | |
3153 | is_copysign_call_with_1 (gimple *call) | |
3154 | { | |
3155 | gcall *c = dyn_cast <gcall *> (call); | |
3156 | if (! c) | |
3157 | return false; | |
3158 | ||
3159 | enum combined_fn code = gimple_call_combined_fn (c); | |
3160 | ||
3161 | if (code == CFN_LAST) | |
3162 | return false; | |
3163 | ||
3164 | if (builtin_fn_p (code)) | |
3165 | { | |
3166 | switch (as_builtin_fn (code)) | |
3167 | { | |
3168 | CASE_FLT_FN (BUILT_IN_COPYSIGN): | |
3169 | CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN): | |
3170 | return real_onep (gimple_call_arg (c, 0)); | |
3171 | default: | |
3172 | return false; | |
3173 | } | |
3174 | } | |
3175 | ||
3176 | if (internal_fn_p (code)) | |
3177 | { | |
3178 | switch (as_internal_fn (code)) | |
3179 | { | |
3180 | case IFN_COPYSIGN: | |
3181 | return real_onep (gimple_call_arg (c, 0)); | |
3182 | default: | |
3183 | return false; | |
3184 | } | |
3185 | } | |
3186 | ||
3187 | return false; | |
3188 | } | |
3189 | ||
3190 | /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y). | |
3191 | This only happens when the the xorsign optab is defined, if the | |
3192 | pattern is not a xorsign pattern or if expansion fails FALSE is | |
3193 | returned, otherwise TRUE is returned. */ | |
3194 | static bool | |
3195 | convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi) | |
3196 | { | |
3197 | tree treeop0, treeop1, lhs, type; | |
3198 | location_t loc = gimple_location (stmt); | |
3199 | lhs = gimple_assign_lhs (stmt); | |
3200 | treeop0 = gimple_assign_rhs1 (stmt); | |
3201 | treeop1 = gimple_assign_rhs2 (stmt); | |
3202 | type = TREE_TYPE (lhs); | |
3203 | machine_mode mode = TYPE_MODE (type); | |
3204 | ||
3aa2a10c | 3205 | if (HONOR_SNANS (type)) |
eb728046 | 3206 | return false; |
3207 | ||
3208 | if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME) | |
3209 | { | |
3210 | gimple *call0 = SSA_NAME_DEF_STMT (treeop0); | |
3aa2a10c | 3211 | if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0)) |
eb728046 | 3212 | { |
3213 | call0 = SSA_NAME_DEF_STMT (treeop1); | |
3aa2a10c | 3214 | if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0)) |
eb728046 | 3215 | return false; |
3216 | ||
3217 | treeop1 = treeop0; | |
3218 | } | |
eb728046 | 3219 | if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing) |
3220 | return false; | |
3221 | ||
3222 | gcall *c = as_a<gcall*> (call0); | |
3223 | treeop0 = gimple_call_arg (c, 1); | |
3224 | ||
3225 | gcall *call_stmt | |
3226 | = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0); | |
3227 | gimple_set_lhs (call_stmt, lhs); | |
3228 | gimple_set_location (call_stmt, loc); | |
3229 | gsi_replace (gsi, call_stmt, true); | |
3230 | return true; | |
3231 | } | |
3232 | ||
3233 | return false; | |
3234 | } | |
3235 | ||
7e4c867e | 3236 | /* Process a single gimple statement STMT, which has a MULT_EXPR as |
3237 | its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return | |
3238 | value is true iff we converted the statement. */ | |
3239 | ||
3240 | static bool | |
42acab1c | 3241 | convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi) |
7e4c867e | 3242 | { |
03d37e4e | 3243 | tree lhs, rhs1, rhs2, type, type1, type2; |
7e4c867e | 3244 | enum insn_code handler; |
3754d046 | 3245 | machine_mode to_mode, from_mode, actual_mode; |
5a574e8b | 3246 | optab op; |
aff5fb4d | 3247 | int actual_precision; |
3248 | location_t loc = gimple_location (stmt); | |
3f2ab719 | 3249 | bool from_unsigned1, from_unsigned2; |
7e4c867e | 3250 | |
3251 | lhs = gimple_assign_lhs (stmt); | |
3252 | type = TREE_TYPE (lhs); | |
3253 | if (TREE_CODE (type) != INTEGER_TYPE) | |
00f4f705 | 3254 | return false; |
3255 | ||
4333b41f | 3256 | if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2)) |
00f4f705 | 3257 | return false; |
3258 | ||
5a574e8b | 3259 | to_mode = TYPE_MODE (type); |
3260 | from_mode = TYPE_MODE (type1); | |
3f2ab719 | 3261 | from_unsigned1 = TYPE_UNSIGNED (type1); |
3262 | from_unsigned2 = TYPE_UNSIGNED (type2); | |
5a574e8b | 3263 | |
3f2ab719 | 3264 | if (from_unsigned1 && from_unsigned2) |
5a574e8b | 3265 | op = umul_widen_optab; |
3f2ab719 | 3266 | else if (!from_unsigned1 && !from_unsigned2) |
5a574e8b | 3267 | op = smul_widen_optab; |
00f4f705 | 3268 | else |
5a574e8b | 3269 | op = usmul_widen_optab; |
3270 | ||
aff5fb4d | 3271 | handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode, |
3272 | 0, &actual_mode); | |
7e4c867e | 3273 | |
3274 | if (handler == CODE_FOR_nothing) | |
3f2ab719 | 3275 | { |
3276 | if (op != smul_widen_optab) | |
3277 | { | |
22ffd684 | 3278 | /* We can use a signed multiply with unsigned types as long as |
3279 | there is a wider mode to use, or it is the smaller of the two | |
3280 | types that is unsigned. Note that type1 >= type2, always. */ | |
3281 | if ((TYPE_UNSIGNED (type1) | |
3282 | && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode)) | |
3283 | || (TYPE_UNSIGNED (type2) | |
3284 | && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode))) | |
3285 | { | |
28ebc73c | 3286 | if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode) |
3287 | || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode)) | |
22ffd684 | 3288 | return false; |
3289 | } | |
3f2ab719 | 3290 | |
3291 | op = smul_widen_optab; | |
3292 | handler = find_widening_optab_handler_and_mode (op, to_mode, | |
3293 | from_mode, 0, | |
3294 | &actual_mode); | |
3295 | ||
3296 | if (handler == CODE_FOR_nothing) | |
3297 | return false; | |
3298 | ||
3299 | from_unsigned1 = from_unsigned2 = false; | |
3300 | } | |
3301 | else | |
3302 | return false; | |
3303 | } | |
7e4c867e | 3304 | |
aff5fb4d | 3305 | /* Ensure that the inputs to the handler are in the correct precison |
3306 | for the opcode. This will be the full mode size. */ | |
3307 | actual_precision = GET_MODE_PRECISION (actual_mode); | |
b36be69d | 3308 | if (2 * actual_precision > TYPE_PRECISION (type)) |
3309 | return false; | |
3f2ab719 | 3310 | if (actual_precision != TYPE_PRECISION (type1) |
3311 | || from_unsigned1 != TYPE_UNSIGNED (type1)) | |
03d37e4e | 3312 | rhs1 = build_and_insert_cast (gsi, loc, |
3313 | build_nonstandard_integer_type | |
3314 | (actual_precision, from_unsigned1), rhs1); | |
3f2ab719 | 3315 | if (actual_precision != TYPE_PRECISION (type2) |
3316 | || from_unsigned2 != TYPE_UNSIGNED (type2)) | |
03d37e4e | 3317 | rhs2 = build_and_insert_cast (gsi, loc, |
3318 | build_nonstandard_integer_type | |
3319 | (actual_precision, from_unsigned2), rhs2); | |
aff5fb4d | 3320 | |
ffebd9c5 | 3321 | /* Handle constants. */ |
3322 | if (TREE_CODE (rhs1) == INTEGER_CST) | |
3323 | rhs1 = fold_convert (type1, rhs1); | |
3324 | if (TREE_CODE (rhs2) == INTEGER_CST) | |
3325 | rhs2 = fold_convert (type2, rhs2); | |
3326 | ||
aff5fb4d | 3327 | gimple_assign_set_rhs1 (stmt, rhs1); |
3328 | gimple_assign_set_rhs2 (stmt, rhs2); | |
00f4f705 | 3329 | gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR); |
3330 | update_stmt (stmt); | |
30c4e60d | 3331 | widen_mul_stats.widen_mults_inserted++; |
00f4f705 | 3332 | return true; |
3333 | } | |
3334 | ||
3335 | /* Process a single gimple statement STMT, which is found at the | |
3336 | iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its | |
3337 | rhs (given by CODE), and try to convert it into a | |
3338 | WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value | |
3339 | is true iff we converted the statement. */ | |
3340 | ||
3341 | static bool | |
42acab1c | 3342 | convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt, |
00f4f705 | 3343 | enum tree_code code) |
3344 | { | |
42acab1c | 3345 | gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL; |
3346 | gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt; | |
03d37e4e | 3347 | tree type, type1, type2, optype; |
00f4f705 | 3348 | tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs; |
3349 | enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK; | |
3350 | optab this_optab; | |
3351 | enum tree_code wmult_code; | |
aff5fb4d | 3352 | enum insn_code handler; |
3754d046 | 3353 | machine_mode to_mode, from_mode, actual_mode; |
aff5fb4d | 3354 | location_t loc = gimple_location (stmt); |
3355 | int actual_precision; | |
3f2ab719 | 3356 | bool from_unsigned1, from_unsigned2; |
00f4f705 | 3357 | |
3358 | lhs = gimple_assign_lhs (stmt); | |
3359 | type = TREE_TYPE (lhs); | |
7e4c867e | 3360 | if (TREE_CODE (type) != INTEGER_TYPE |
3361 | && TREE_CODE (type) != FIXED_POINT_TYPE) | |
00f4f705 | 3362 | return false; |
3363 | ||
3364 | if (code == MINUS_EXPR) | |
3365 | wmult_code = WIDEN_MULT_MINUS_EXPR; | |
3366 | else | |
3367 | wmult_code = WIDEN_MULT_PLUS_EXPR; | |
3368 | ||
00f4f705 | 3369 | rhs1 = gimple_assign_rhs1 (stmt); |
3370 | rhs2 = gimple_assign_rhs2 (stmt); | |
3371 | ||
3372 | if (TREE_CODE (rhs1) == SSA_NAME) | |
3373 | { | |
3374 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); | |
3375 | if (is_gimple_assign (rhs1_stmt)) | |
3376 | rhs1_code = gimple_assign_rhs_code (rhs1_stmt); | |
3377 | } | |
00f4f705 | 3378 | |
3379 | if (TREE_CODE (rhs2) == SSA_NAME) | |
3380 | { | |
3381 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); | |
3382 | if (is_gimple_assign (rhs2_stmt)) | |
3383 | rhs2_code = gimple_assign_rhs_code (rhs2_stmt); | |
3384 | } | |
00f4f705 | 3385 | |
07ea3e5c | 3386 | /* Allow for one conversion statement between the multiply |
3387 | and addition/subtraction statement. If there are more than | |
3388 | one conversions then we assume they would invalidate this | |
3389 | transformation. If that's not the case then they should have | |
3390 | been folded before now. */ | |
3391 | if (CONVERT_EXPR_CODE_P (rhs1_code)) | |
3392 | { | |
3393 | conv1_stmt = rhs1_stmt; | |
3394 | rhs1 = gimple_assign_rhs1 (rhs1_stmt); | |
3395 | if (TREE_CODE (rhs1) == SSA_NAME) | |
3396 | { | |
3397 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); | |
3398 | if (is_gimple_assign (rhs1_stmt)) | |
3399 | rhs1_code = gimple_assign_rhs_code (rhs1_stmt); | |
3400 | } | |
3401 | else | |
3402 | return false; | |
3403 | } | |
3404 | if (CONVERT_EXPR_CODE_P (rhs2_code)) | |
3405 | { | |
3406 | conv2_stmt = rhs2_stmt; | |
3407 | rhs2 = gimple_assign_rhs1 (rhs2_stmt); | |
3408 | if (TREE_CODE (rhs2) == SSA_NAME) | |
3409 | { | |
3410 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); | |
3411 | if (is_gimple_assign (rhs2_stmt)) | |
3412 | rhs2_code = gimple_assign_rhs_code (rhs2_stmt); | |
3413 | } | |
3414 | else | |
3415 | return false; | |
3416 | } | |
3417 | ||
aff5fb4d | 3418 | /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call |
3419 | is_widening_mult_p, but we still need the rhs returns. | |
3420 | ||
3421 | It might also appear that it would be sufficient to use the existing | |
3422 | operands of the widening multiply, but that would limit the choice of | |
e0df5be0 | 3423 | multiply-and-accumulate instructions. |
3424 | ||
3425 | If the widened-multiplication result has more than one uses, it is | |
3426 | probably wiser not to do the conversion. */ | |
aff5fb4d | 3427 | if (code == PLUS_EXPR |
3428 | && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR)) | |
00f4f705 | 3429 | { |
e0df5be0 | 3430 | if (!has_single_use (rhs1) |
3431 | || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1, | |
3432 | &type2, &mult_rhs2)) | |
00f4f705 | 3433 | return false; |
7e4c867e | 3434 | add_rhs = rhs2; |
07ea3e5c | 3435 | conv_stmt = conv1_stmt; |
00f4f705 | 3436 | } |
aff5fb4d | 3437 | else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR) |
00f4f705 | 3438 | { |
e0df5be0 | 3439 | if (!has_single_use (rhs2) |
3440 | || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1, | |
3441 | &type2, &mult_rhs2)) | |
00f4f705 | 3442 | return false; |
7e4c867e | 3443 | add_rhs = rhs1; |
07ea3e5c | 3444 | conv_stmt = conv2_stmt; |
00f4f705 | 3445 | } |
00f4f705 | 3446 | else |
3447 | return false; | |
3448 | ||
aff5fb4d | 3449 | to_mode = TYPE_MODE (type); |
3450 | from_mode = TYPE_MODE (type1); | |
3f2ab719 | 3451 | from_unsigned1 = TYPE_UNSIGNED (type1); |
3452 | from_unsigned2 = TYPE_UNSIGNED (type2); | |
4ccf368d | 3453 | optype = type1; |
aff5fb4d | 3454 | |
3f2ab719 | 3455 | /* There's no such thing as a mixed sign madd yet, so use a wider mode. */ |
3456 | if (from_unsigned1 != from_unsigned2) | |
3457 | { | |
4ccf368d | 3458 | if (!INTEGRAL_TYPE_P (type)) |
3459 | return false; | |
22ffd684 | 3460 | /* We can use a signed multiply with unsigned types as long as |
3461 | there is a wider mode to use, or it is the smaller of the two | |
3462 | types that is unsigned. Note that type1 >= type2, always. */ | |
3463 | if ((from_unsigned1 | |
3464 | && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode)) | |
3465 | || (from_unsigned2 | |
3466 | && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode))) | |
3f2ab719 | 3467 | { |
28ebc73c | 3468 | if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode) |
3469 | || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode)) | |
22ffd684 | 3470 | return false; |
3f2ab719 | 3471 | } |
22ffd684 | 3472 | |
3473 | from_unsigned1 = from_unsigned2 = false; | |
4ccf368d | 3474 | optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode), |
3475 | false); | |
3f2ab719 | 3476 | } |
815a0224 | 3477 | |
07ea3e5c | 3478 | /* If there was a conversion between the multiply and addition |
3479 | then we need to make sure it fits a multiply-and-accumulate. | |
3480 | The should be a single mode change which does not change the | |
3481 | value. */ | |
3482 | if (conv_stmt) | |
3483 | { | |
3f2ab719 | 3484 | /* We use the original, unmodified data types for this. */ |
07ea3e5c | 3485 | tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt)); |
3486 | tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt)); | |
3487 | int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2); | |
3488 | bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2); | |
3489 | ||
3490 | if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type)) | |
3491 | { | |
3492 | /* Conversion is a truncate. */ | |
3493 | if (TYPE_PRECISION (to_type) < data_size) | |
3494 | return false; | |
3495 | } | |
3496 | else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)) | |
3497 | { | |
3498 | /* Conversion is an extend. Check it's the right sort. */ | |
3499 | if (TYPE_UNSIGNED (from_type) != is_unsigned | |
3500 | && !(is_unsigned && TYPE_PRECISION (from_type) > data_size)) | |
3501 | return false; | |
3502 | } | |
3503 | /* else convert is a no-op for our purposes. */ | |
3504 | } | |
3505 | ||
815a0224 | 3506 | /* Verify that the machine can perform a widening multiply |
3507 | accumulate in this mode/signedness combination, otherwise | |
3508 | this transformation is likely to pessimize code. */ | |
3f2ab719 | 3509 | this_optab = optab_for_tree_code (wmult_code, optype, optab_default); |
aff5fb4d | 3510 | handler = find_widening_optab_handler_and_mode (this_optab, to_mode, |
3511 | from_mode, 0, &actual_mode); | |
3512 | ||
3513 | if (handler == CODE_FOR_nothing) | |
815a0224 | 3514 | return false; |
3515 | ||
aff5fb4d | 3516 | /* Ensure that the inputs to the handler are in the correct precison |
3517 | for the opcode. This will be the full mode size. */ | |
3518 | actual_precision = GET_MODE_PRECISION (actual_mode); | |
3f2ab719 | 3519 | if (actual_precision != TYPE_PRECISION (type1) |
3520 | || from_unsigned1 != TYPE_UNSIGNED (type1)) | |
03d37e4e | 3521 | mult_rhs1 = build_and_insert_cast (gsi, loc, |
3522 | build_nonstandard_integer_type | |
3523 | (actual_precision, from_unsigned1), | |
3524 | mult_rhs1); | |
3f2ab719 | 3525 | if (actual_precision != TYPE_PRECISION (type2) |
3526 | || from_unsigned2 != TYPE_UNSIGNED (type2)) | |
03d37e4e | 3527 | mult_rhs2 = build_and_insert_cast (gsi, loc, |
3528 | build_nonstandard_integer_type | |
3529 | (actual_precision, from_unsigned2), | |
3530 | mult_rhs2); | |
00f4f705 | 3531 | |
12421545 | 3532 | if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs))) |
03d37e4e | 3533 | add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs); |
12421545 | 3534 | |
ffebd9c5 | 3535 | /* Handle constants. */ |
3536 | if (TREE_CODE (mult_rhs1) == INTEGER_CST) | |
d5a3bb10 | 3537 | mult_rhs1 = fold_convert (type1, mult_rhs1); |
ffebd9c5 | 3538 | if (TREE_CODE (mult_rhs2) == INTEGER_CST) |
d5a3bb10 | 3539 | mult_rhs2 = fold_convert (type2, mult_rhs2); |
ffebd9c5 | 3540 | |
806413d2 | 3541 | gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2, |
3542 | add_rhs); | |
00f4f705 | 3543 | update_stmt (gsi_stmt (*gsi)); |
30c4e60d | 3544 | widen_mul_stats.maccs_inserted++; |
00f4f705 | 3545 | return true; |
3546 | } | |
3547 | ||
15dbdc8f | 3548 | /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2 |
3549 | with uses in additions and subtractions to form fused multiply-add | |
3550 | operations. Returns true if successful and MUL_STMT should be removed. */ | |
b9be572e | 3551 | |
3552 | static bool | |
42acab1c | 3553 | convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2) |
b9be572e | 3554 | { |
15dbdc8f | 3555 | tree mul_result = gimple_get_lhs (mul_stmt); |
b9be572e | 3556 | tree type = TREE_TYPE (mul_result); |
42acab1c | 3557 | gimple *use_stmt, *neguse_stmt; |
1a91d914 | 3558 | gassign *fma_stmt; |
b9be572e | 3559 | use_operand_p use_p; |
3560 | imm_use_iterator imm_iter; | |
3561 | ||
3562 | if (FLOAT_TYPE_P (type) | |
3563 | && flag_fp_contract_mode == FP_CONTRACT_OFF) | |
3564 | return false; | |
3565 | ||
3566 | /* We don't want to do bitfield reduction ops. */ | |
3567 | if (INTEGRAL_TYPE_P (type) | |
654ba22c | 3568 | && !type_has_mode_precision_p (type)) |
b9be572e | 3569 | return false; |
3570 | ||
3571 | /* If the target doesn't support it, don't generate it. We assume that | |
3572 | if fma isn't available then fms, fnma or fnms are not either. */ | |
3573 | if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing) | |
3574 | return false; | |
3575 | ||
5ed3d3b8 | 3576 | /* If the multiplication has zero uses, it is kept around probably because |
3577 | of -fnon-call-exceptions. Don't optimize it away in that case, | |
3578 | it is DCE job. */ | |
3579 | if (has_zero_uses (mul_result)) | |
3580 | return false; | |
3581 | ||
b9be572e | 3582 | /* Make sure that the multiplication statement becomes dead after |
3583 | the transformation, thus that all uses are transformed to FMAs. | |
3584 | This means we assume that an FMA operation has the same cost | |
3585 | as an addition. */ | |
3586 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result) | |
3587 | { | |
3588 | enum tree_code use_code; | |
44579526 | 3589 | tree result = mul_result; |
3590 | bool negate_p = false; | |
b9be572e | 3591 | |
3592 | use_stmt = USE_STMT (use_p); | |
3593 | ||
17a2c727 | 3594 | if (is_gimple_debug (use_stmt)) |
3595 | continue; | |
3596 | ||
b9be572e | 3597 | /* For now restrict this operations to single basic blocks. In theory |
3598 | we would want to support sinking the multiplication in | |
3599 | m = a*b; | |
3600 | if () | |
3601 | ma = m + c; | |
3602 | else | |
3603 | d = m; | |
3604 | to form a fma in the then block and sink the multiplication to the | |
3605 | else block. */ | |
3606 | if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) | |
3607 | return false; | |
3608 | ||
44579526 | 3609 | if (!is_gimple_assign (use_stmt)) |
b9be572e | 3610 | return false; |
3611 | ||
44579526 | 3612 | use_code = gimple_assign_rhs_code (use_stmt); |
3613 | ||
3614 | /* A negate on the multiplication leads to FNMA. */ | |
3615 | if (use_code == NEGATE_EXPR) | |
3616 | { | |
805ad414 | 3617 | ssa_op_iter iter; |
5715c09b | 3618 | use_operand_p usep; |
805ad414 | 3619 | |
44579526 | 3620 | result = gimple_assign_lhs (use_stmt); |
3621 | ||
3622 | /* Make sure the negate statement becomes dead with this | |
3623 | single transformation. */ | |
3624 | if (!single_imm_use (gimple_assign_lhs (use_stmt), | |
3625 | &use_p, &neguse_stmt)) | |
3626 | return false; | |
3627 | ||
805ad414 | 3628 | /* Make sure the multiplication isn't also used on that stmt. */ |
5715c09b | 3629 | FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE) |
3630 | if (USE_FROM_PTR (usep) == mul_result) | |
805ad414 | 3631 | return false; |
3632 | ||
44579526 | 3633 | /* Re-validate. */ |
3634 | use_stmt = neguse_stmt; | |
3635 | if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) | |
3636 | return false; | |
3637 | if (!is_gimple_assign (use_stmt)) | |
3638 | return false; | |
3639 | ||
3640 | use_code = gimple_assign_rhs_code (use_stmt); | |
3641 | negate_p = true; | |
3642 | } | |
b9be572e | 3643 | |
44579526 | 3644 | switch (use_code) |
3645 | { | |
3646 | case MINUS_EXPR: | |
8a9d0572 | 3647 | if (gimple_assign_rhs2 (use_stmt) == result) |
3648 | negate_p = !negate_p; | |
3649 | break; | |
44579526 | 3650 | case PLUS_EXPR: |
44579526 | 3651 | break; |
44579526 | 3652 | default: |
3653 | /* FMA can only be formed from PLUS and MINUS. */ | |
3654 | return false; | |
3655 | } | |
b9be572e | 3656 | |
b095bd6a | 3657 | /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed |
3658 | by a MULT_EXPR that we'll visit later, we might be able to | |
3659 | get a more profitable match with fnma. | |
3660 | OTOH, if we don't, a negate / fma pair has likely lower latency | |
3661 | that a mult / subtract pair. */ | |
3662 | if (use_code == MINUS_EXPR && !negate_p | |
3663 | && gimple_assign_rhs1 (use_stmt) == result | |
3664 | && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing | |
3665 | && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing) | |
3666 | { | |
3667 | tree rhs2 = gimple_assign_rhs2 (use_stmt); | |
b095bd6a | 3668 | |
058e9571 | 3669 | if (TREE_CODE (rhs2) == SSA_NAME) |
3670 | { | |
42acab1c | 3671 | gimple *stmt2 = SSA_NAME_DEF_STMT (rhs2); |
058e9571 | 3672 | if (has_single_use (rhs2) |
3673 | && is_gimple_assign (stmt2) | |
3674 | && gimple_assign_rhs_code (stmt2) == MULT_EXPR) | |
3675 | return false; | |
3676 | } | |
b095bd6a | 3677 | } |
3678 | ||
44579526 | 3679 | /* We can't handle a * b + a * b. */ |
3680 | if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt)) | |
3681 | return false; | |
8a9d0572 | 3682 | |
3683 | /* While it is possible to validate whether or not the exact form | |
3684 | that we've recognized is available in the backend, the assumption | |
3685 | is that the transformation is never a loss. For instance, suppose | |
3686 | the target only has the plain FMA pattern available. Consider | |
3687 | a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which | |
3688 | is still two operations. Consider -(a*b)-c -> fma(-a,b,-c): we | |
3689 | still have 3 operations, but in the FMA form the two NEGs are | |
9d75589a | 3690 | independent and could be run in parallel. */ |
b9be572e | 3691 | } |
3692 | ||
3693 | FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result) | |
3694 | { | |
b9be572e | 3695 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
17a2c727 | 3696 | enum tree_code use_code; |
15dbdc8f | 3697 | tree addop, mulop1 = op1, result = mul_result; |
44579526 | 3698 | bool negate_p = false; |
b9be572e | 3699 | |
17a2c727 | 3700 | if (is_gimple_debug (use_stmt)) |
3701 | continue; | |
3702 | ||
3703 | use_code = gimple_assign_rhs_code (use_stmt); | |
44579526 | 3704 | if (use_code == NEGATE_EXPR) |
3705 | { | |
3706 | result = gimple_assign_lhs (use_stmt); | |
3707 | single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt); | |
3708 | gsi_remove (&gsi, true); | |
3709 | release_defs (use_stmt); | |
3710 | ||
3711 | use_stmt = neguse_stmt; | |
3712 | gsi = gsi_for_stmt (use_stmt); | |
3713 | use_code = gimple_assign_rhs_code (use_stmt); | |
3714 | negate_p = true; | |
3715 | } | |
3716 | ||
3717 | if (gimple_assign_rhs1 (use_stmt) == result) | |
b9be572e | 3718 | { |
3719 | addop = gimple_assign_rhs2 (use_stmt); | |
3720 | /* a * b - c -> a * b + (-c) */ | |
3721 | if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) | |
3722 | addop = force_gimple_operand_gsi (&gsi, | |
3723 | build1 (NEGATE_EXPR, | |
3724 | type, addop), | |
3725 | true, NULL_TREE, true, | |
3726 | GSI_SAME_STMT); | |
3727 | } | |
3728 | else | |
3729 | { | |
3730 | addop = gimple_assign_rhs1 (use_stmt); | |
3731 | /* a - b * c -> (-b) * c + a */ | |
3732 | if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) | |
44579526 | 3733 | negate_p = !negate_p; |
b9be572e | 3734 | } |
3735 | ||
44579526 | 3736 | if (negate_p) |
3737 | mulop1 = force_gimple_operand_gsi (&gsi, | |
3738 | build1 (NEGATE_EXPR, | |
3739 | type, mulop1), | |
3740 | true, NULL_TREE, true, | |
3741 | GSI_SAME_STMT); | |
3742 | ||
e9cf809e | 3743 | fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt), |
3744 | FMA_EXPR, mulop1, op2, addop); | |
b9be572e | 3745 | gsi_replace (&gsi, fma_stmt, true); |
30c4e60d | 3746 | widen_mul_stats.fmas_inserted++; |
b9be572e | 3747 | } |
3748 | ||
3749 | return true; | |
3750 | } | |
3751 | ||
e11a63e8 | 3752 | |
3753 | /* Helper function of match_uaddsub_overflow. Return 1 | |
3754 | if USE_STMT is unsigned overflow check ovf != 0 for | |
3755 | STMT, -1 if USE_STMT is unsigned overflow check ovf == 0 | |
3756 | and 0 otherwise. */ | |
3757 | ||
3758 | static int | |
3759 | uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt) | |
3760 | { | |
3761 | enum tree_code ccode = ERROR_MARK; | |
3762 | tree crhs1 = NULL_TREE, crhs2 = NULL_TREE; | |
3763 | if (gimple_code (use_stmt) == GIMPLE_COND) | |
3764 | { | |
3765 | ccode = gimple_cond_code (use_stmt); | |
3766 | crhs1 = gimple_cond_lhs (use_stmt); | |
3767 | crhs2 = gimple_cond_rhs (use_stmt); | |
3768 | } | |
3769 | else if (is_gimple_assign (use_stmt)) | |
3770 | { | |
3771 | if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS) | |
3772 | { | |
3773 | ccode = gimple_assign_rhs_code (use_stmt); | |
3774 | crhs1 = gimple_assign_rhs1 (use_stmt); | |
3775 | crhs2 = gimple_assign_rhs2 (use_stmt); | |
3776 | } | |
3777 | else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR) | |
3778 | { | |
3779 | tree cond = gimple_assign_rhs1 (use_stmt); | |
3780 | if (COMPARISON_CLASS_P (cond)) | |
3781 | { | |
3782 | ccode = TREE_CODE (cond); | |
3783 | crhs1 = TREE_OPERAND (cond, 0); | |
3784 | crhs2 = TREE_OPERAND (cond, 1); | |
3785 | } | |
3786 | else | |
3787 | return 0; | |
3788 | } | |
3789 | else | |
3790 | return 0; | |
3791 | } | |
3792 | else | |
3793 | return 0; | |
3794 | ||
3795 | if (TREE_CODE_CLASS (ccode) != tcc_comparison) | |
3796 | return 0; | |
3797 | ||
3798 | enum tree_code code = gimple_assign_rhs_code (stmt); | |
3799 | tree lhs = gimple_assign_lhs (stmt); | |
3800 | tree rhs1 = gimple_assign_rhs1 (stmt); | |
3801 | tree rhs2 = gimple_assign_rhs2 (stmt); | |
3802 | ||
3803 | switch (ccode) | |
3804 | { | |
3805 | case GT_EXPR: | |
3806 | case LE_EXPR: | |
3807 | /* r = a - b; r > a or r <= a | |
3808 | r = a + b; a > r or a <= r or b > r or b <= r. */ | |
3809 | if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1) | |
3810 | || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2) | |
3811 | && crhs2 == lhs)) | |
3812 | return ccode == GT_EXPR ? 1 : -1; | |
3813 | break; | |
3814 | case LT_EXPR: | |
3815 | case GE_EXPR: | |
3816 | /* r = a - b; a < r or a >= r | |
3817 | r = a + b; r < a or r >= a or r < b or r >= b. */ | |
3818 | if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs) | |
3819 | || (code == PLUS_EXPR && crhs1 == lhs | |
3820 | && (crhs2 == rhs1 || crhs2 == rhs2))) | |
3821 | return ccode == LT_EXPR ? 1 : -1; | |
3822 | break; | |
3823 | default: | |
3824 | break; | |
3825 | } | |
3826 | return 0; | |
3827 | } | |
3828 | ||
3829 | /* Recognize for unsigned x | |
3830 | x = y - z; | |
3831 | if (x > y) | |
3832 | where there are other uses of x and replace it with | |
3833 | _7 = SUB_OVERFLOW (y, z); | |
3834 | x = REALPART_EXPR <_7>; | |
3835 | _8 = IMAGPART_EXPR <_7>; | |
3836 | if (_8) | |
3837 | and similarly for addition. */ | |
3838 | ||
3839 | static bool | |
3840 | match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt, | |
3841 | enum tree_code code) | |
3842 | { | |
3843 | tree lhs = gimple_assign_lhs (stmt); | |
3844 | tree type = TREE_TYPE (lhs); | |
3845 | use_operand_p use_p; | |
3846 | imm_use_iterator iter; | |
3847 | bool use_seen = false; | |
3848 | bool ovf_use_seen = false; | |
3849 | gimple *use_stmt; | |
3850 | ||
3851 | gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR); | |
3852 | if (!INTEGRAL_TYPE_P (type) | |
3853 | || !TYPE_UNSIGNED (type) | |
3854 | || has_zero_uses (lhs) | |
3855 | || has_single_use (lhs) | |
3856 | || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab, | |
3857 | TYPE_MODE (type)) == CODE_FOR_nothing) | |
3858 | return false; | |
3859 | ||
3860 | FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) | |
3861 | { | |
3862 | use_stmt = USE_STMT (use_p); | |
3863 | if (is_gimple_debug (use_stmt)) | |
3864 | continue; | |
3865 | ||
3866 | if (uaddsub_overflow_check_p (stmt, use_stmt)) | |
3867 | ovf_use_seen = true; | |
3868 | else | |
3869 | use_seen = true; | |
3870 | if (ovf_use_seen && use_seen) | |
3871 | break; | |
3872 | } | |
3873 | ||
3874 | if (!ovf_use_seen || !use_seen) | |
3875 | return false; | |
3876 | ||
3877 | tree ctype = build_complex_type (type); | |
3878 | tree rhs1 = gimple_assign_rhs1 (stmt); | |
3879 | tree rhs2 = gimple_assign_rhs2 (stmt); | |
3880 | gcall *g = gimple_build_call_internal (code == PLUS_EXPR | |
3881 | ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW, | |
3882 | 2, rhs1, rhs2); | |
3883 | tree ctmp = make_ssa_name (ctype); | |
3884 | gimple_call_set_lhs (g, ctmp); | |
3885 | gsi_insert_before (gsi, g, GSI_SAME_STMT); | |
3886 | gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR, | |
3887 | build1 (REALPART_EXPR, type, ctmp)); | |
3888 | gsi_replace (gsi, g2, true); | |
3889 | tree ovf = make_ssa_name (type); | |
3890 | g2 = gimple_build_assign (ovf, IMAGPART_EXPR, | |
3891 | build1 (IMAGPART_EXPR, type, ctmp)); | |
3892 | gsi_insert_after (gsi, g2, GSI_NEW_STMT); | |
3893 | ||
3894 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) | |
3895 | { | |
3896 | if (is_gimple_debug (use_stmt)) | |
3897 | continue; | |
3898 | ||
3899 | int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt); | |
3900 | if (ovf_use == 0) | |
3901 | continue; | |
3902 | if (gimple_code (use_stmt) == GIMPLE_COND) | |
3903 | { | |
3904 | gcond *cond_stmt = as_a <gcond *> (use_stmt); | |
3905 | gimple_cond_set_lhs (cond_stmt, ovf); | |
3906 | gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0)); | |
3907 | gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR); | |
3908 | } | |
3909 | else | |
3910 | { | |
3911 | gcc_checking_assert (is_gimple_assign (use_stmt)); | |
3912 | if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS) | |
3913 | { | |
3914 | gimple_assign_set_rhs1 (use_stmt, ovf); | |
3915 | gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0)); | |
3916 | gimple_assign_set_rhs_code (use_stmt, | |
3917 | ovf_use == 1 ? NE_EXPR : EQ_EXPR); | |
3918 | } | |
3919 | else | |
3920 | { | |
3921 | gcc_checking_assert (gimple_assign_rhs_code (use_stmt) | |
3922 | == COND_EXPR); | |
3923 | tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR, | |
3924 | boolean_type_node, ovf, | |
3925 | build_int_cst (type, 0)); | |
3926 | gimple_assign_set_rhs1 (use_stmt, cond); | |
3927 | } | |
3928 | } | |
3929 | update_stmt (use_stmt); | |
3930 | } | |
3931 | return true; | |
3932 | } | |
3933 | ||
67f7b566 | 3934 | /* Return true if target has support for divmod. */ |
3935 | ||
3936 | static bool | |
3937 | target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode) | |
3938 | { | |
3939 | /* If target supports hardware divmod insn, use it for divmod. */ | |
3940 | if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing) | |
3941 | return true; | |
3942 | ||
3943 | /* Check if libfunc for divmod is available. */ | |
3944 | rtx libfunc = optab_libfunc (divmod_optab, mode); | |
3945 | if (libfunc != NULL_RTX) | |
3946 | { | |
3947 | /* If optab_handler exists for div_optab, perhaps in a wider mode, | |
3948 | we don't want to use the libfunc even if it exists for given mode. */ | |
19a4dce4 | 3949 | machine_mode div_mode; |
3950 | FOR_EACH_MODE_FROM (div_mode, mode) | |
67f7b566 | 3951 | if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing) |
3952 | return false; | |
3953 | ||
3954 | return targetm.expand_divmod_libfunc != NULL; | |
3955 | } | |
3956 | ||
3957 | return false; | |
3958 | } | |
3959 | ||
3960 | /* Check if stmt is candidate for divmod transform. */ | |
3961 | ||
3962 | static bool | |
3963 | divmod_candidate_p (gassign *stmt) | |
3964 | { | |
3965 | tree type = TREE_TYPE (gimple_assign_lhs (stmt)); | |
582adad1 | 3966 | machine_mode mode = TYPE_MODE (type); |
67f7b566 | 3967 | optab divmod_optab, div_optab; |
3968 | ||
3969 | if (TYPE_UNSIGNED (type)) | |
3970 | { | |
3971 | divmod_optab = udivmod_optab; | |
3972 | div_optab = udiv_optab; | |
3973 | } | |
3974 | else | |
3975 | { | |
3976 | divmod_optab = sdivmod_optab; | |
3977 | div_optab = sdiv_optab; | |
3978 | } | |
3979 | ||
3980 | tree op1 = gimple_assign_rhs1 (stmt); | |
3981 | tree op2 = gimple_assign_rhs2 (stmt); | |
3982 | ||
3983 | /* Disable the transform if either is a constant, since division-by-constant | |
3984 | may have specialized expansion. */ | |
3985 | if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2)) | |
3986 | return false; | |
3987 | ||
3988 | /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should | |
3989 | expand using the [su]divv optabs. */ | |
3990 | if (TYPE_OVERFLOW_TRAPS (type)) | |
3991 | return false; | |
3992 | ||
3993 | if (!target_supports_divmod_p (divmod_optab, div_optab, mode)) | |
3994 | return false; | |
3995 | ||
3996 | return true; | |
3997 | } | |
3998 | ||
3999 | /* This function looks for: | |
4000 | t1 = a TRUNC_DIV_EXPR b; | |
4001 | t2 = a TRUNC_MOD_EXPR b; | |
4002 | and transforms it to the following sequence: | |
4003 | complex_tmp = DIVMOD (a, b); | |
4004 | t1 = REALPART_EXPR(a); | |
4005 | t2 = IMAGPART_EXPR(b); | |
4006 | For conditions enabling the transform see divmod_candidate_p(). | |
4007 | ||
4008 | The pass has three parts: | |
4009 | 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all | |
4010 | other trunc_div_expr and trunc_mod_expr stmts. | |
4011 | 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt | |
4012 | to stmts vector. | |
4013 | 3) Insert DIVMOD call just before top_stmt and update entries in | |
4014 | stmts vector to use return value of DIMOVD (REALEXPR_PART for div, | |
4015 | IMAGPART_EXPR for mod). */ | |
4016 | ||
4017 | static bool | |
4018 | convert_to_divmod (gassign *stmt) | |
4019 | { | |
4020 | if (stmt_can_throw_internal (stmt) | |
4021 | || !divmod_candidate_p (stmt)) | |
4022 | return false; | |
4023 | ||
4024 | tree op1 = gimple_assign_rhs1 (stmt); | |
4025 | tree op2 = gimple_assign_rhs2 (stmt); | |
4026 | ||
4027 | imm_use_iterator use_iter; | |
4028 | gimple *use_stmt; | |
4029 | auto_vec<gimple *> stmts; | |
4030 | ||
4031 | gimple *top_stmt = stmt; | |
4032 | basic_block top_bb = gimple_bb (stmt); | |
4033 | ||
4034 | /* Part 1: Try to set top_stmt to "topmost" stmt that dominates | |
4035 | at-least stmt and possibly other trunc_div/trunc_mod stmts | |
4036 | having same operands as stmt. */ | |
4037 | ||
4038 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1) | |
4039 | { | |
4040 | if (is_gimple_assign (use_stmt) | |
4041 | && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR | |
4042 | || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR) | |
4043 | && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0) | |
4044 | && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0)) | |
4045 | { | |
4046 | if (stmt_can_throw_internal (use_stmt)) | |
4047 | continue; | |
4048 | ||
4049 | basic_block bb = gimple_bb (use_stmt); | |
4050 | ||
4051 | if (bb == top_bb) | |
4052 | { | |
4053 | if (gimple_uid (use_stmt) < gimple_uid (top_stmt)) | |
4054 | top_stmt = use_stmt; | |
4055 | } | |
4056 | else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb)) | |
4057 | { | |
4058 | top_bb = bb; | |
4059 | top_stmt = use_stmt; | |
4060 | } | |
4061 | } | |
4062 | } | |
4063 | ||
4064 | tree top_op1 = gimple_assign_rhs1 (top_stmt); | |
4065 | tree top_op2 = gimple_assign_rhs2 (top_stmt); | |
4066 | ||
4067 | stmts.safe_push (top_stmt); | |
4068 | bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR); | |
4069 | ||
4070 | /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb | |
4071 | to stmts vector. The 2nd loop will always add stmt to stmts vector, since | |
4072 | gimple_bb (top_stmt) dominates gimple_bb (stmt), so the | |
4073 | 2nd loop ends up adding at-least single trunc_mod_expr stmt. */ | |
4074 | ||
4075 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1) | |
4076 | { | |
4077 | if (is_gimple_assign (use_stmt) | |
4078 | && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR | |
4079 | || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR) | |
4080 | && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0) | |
4081 | && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0)) | |
4082 | { | |
4083 | if (use_stmt == top_stmt | |
4084 | || stmt_can_throw_internal (use_stmt) | |
4085 | || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb)) | |
4086 | continue; | |
4087 | ||
4088 | stmts.safe_push (use_stmt); | |
4089 | if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR) | |
4090 | div_seen = true; | |
4091 | } | |
4092 | } | |
4093 | ||
4094 | if (!div_seen) | |
4095 | return false; | |
4096 | ||
4097 | /* Part 3: Create libcall to internal fn DIVMOD: | |
4098 | divmod_tmp = DIVMOD (op1, op2). */ | |
4099 | ||
4100 | gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2); | |
4101 | tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)), | |
4102 | call_stmt, "divmod_tmp"); | |
4103 | gimple_call_set_lhs (call_stmt, res); | |
989f02dc | 4104 | /* We rejected throwing statements above. */ |
4105 | gimple_call_set_nothrow (call_stmt, true); | |
67f7b566 | 4106 | |
4107 | /* Insert the call before top_stmt. */ | |
4108 | gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt); | |
4109 | gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT); | |
4110 | ||
4111 | widen_mul_stats.divmod_calls_inserted++; | |
4112 | ||
4113 | /* Update all statements in stmts vector: | |
4114 | lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp> | |
4115 | lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */ | |
4116 | ||
4117 | for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i) | |
4118 | { | |
4119 | tree new_rhs; | |
4120 | ||
4121 | switch (gimple_assign_rhs_code (use_stmt)) | |
4122 | { | |
4123 | case TRUNC_DIV_EXPR: | |
4124 | new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res); | |
4125 | break; | |
4126 | ||
4127 | case TRUNC_MOD_EXPR: | |
4128 | new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res); | |
4129 | break; | |
4130 | ||
4131 | default: | |
4132 | gcc_unreachable (); | |
4133 | } | |
4134 | ||
4135 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); | |
4136 | gimple_assign_set_rhs_from_tree (&gsi, new_rhs); | |
4137 | update_stmt (use_stmt); | |
4138 | } | |
4139 | ||
4140 | return true; | |
4141 | } | |
e11a63e8 | 4142 | |
62be004c | 4143 | /* Find integer multiplications where the operands are extended from |
4144 | smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR | |
4145 | where appropriate. */ | |
4146 | ||
65b0537f | 4147 | namespace { |
4148 | ||
4149 | const pass_data pass_data_optimize_widening_mul = | |
4150 | { | |
4151 | GIMPLE_PASS, /* type */ | |
4152 | "widening_mul", /* name */ | |
4153 | OPTGROUP_NONE, /* optinfo_flags */ | |
65b0537f | 4154 | TV_NONE, /* tv_id */ |
4155 | PROP_ssa, /* properties_required */ | |
4156 | 0, /* properties_provided */ | |
4157 | 0, /* properties_destroyed */ | |
4158 | 0, /* todo_flags_start */ | |
8b88439e | 4159 | TODO_update_ssa, /* todo_flags_finish */ |
65b0537f | 4160 | }; |
4161 | ||
4162 | class pass_optimize_widening_mul : public gimple_opt_pass | |
4163 | { | |
4164 | public: | |
4165 | pass_optimize_widening_mul (gcc::context *ctxt) | |
4166 | : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt) | |
4167 | {} | |
4168 | ||
4169 | /* opt_pass methods: */ | |
4170 | virtual bool gate (function *) | |
4171 | { | |
4172 | return flag_expensive_optimizations && optimize; | |
4173 | } | |
4174 | ||
4175 | virtual unsigned int execute (function *); | |
4176 | ||
4177 | }; // class pass_optimize_widening_mul | |
4178 | ||
4179 | unsigned int | |
4180 | pass_optimize_widening_mul::execute (function *fun) | |
62be004c | 4181 | { |
62be004c | 4182 | basic_block bb; |
15dbdc8f | 4183 | bool cfg_changed = false; |
62be004c | 4184 | |
30c4e60d | 4185 | memset (&widen_mul_stats, 0, sizeof (widen_mul_stats)); |
67f7b566 | 4186 | calculate_dominance_info (CDI_DOMINATORS); |
4187 | renumber_gimple_stmt_uids (); | |
30c4e60d | 4188 | |
65b0537f | 4189 | FOR_EACH_BB_FN (bb, fun) |
62be004c | 4190 | { |
4191 | gimple_stmt_iterator gsi; | |
4192 | ||
b9be572e | 4193 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) |
62be004c | 4194 | { |
42acab1c | 4195 | gimple *stmt = gsi_stmt (gsi); |
00f4f705 | 4196 | enum tree_code code; |
62be004c | 4197 | |
b9be572e | 4198 | if (is_gimple_assign (stmt)) |
4199 | { | |
4200 | code = gimple_assign_rhs_code (stmt); | |
4201 | switch (code) | |
4202 | { | |
4203 | case MULT_EXPR: | |
aff5fb4d | 4204 | if (!convert_mult_to_widen (stmt, &gsi) |
eb728046 | 4205 | && !convert_expand_mult_copysign (stmt, &gsi) |
15dbdc8f | 4206 | && convert_mult_to_fma (stmt, |
4207 | gimple_assign_rhs1 (stmt), | |
4208 | gimple_assign_rhs2 (stmt))) | |
b9be572e | 4209 | { |
4210 | gsi_remove (&gsi, true); | |
4211 | release_defs (stmt); | |
4212 | continue; | |
4213 | } | |
4214 | break; | |
4215 | ||
4216 | case PLUS_EXPR: | |
4217 | case MINUS_EXPR: | |
e11a63e8 | 4218 | if (!convert_plusminus_to_widen (&gsi, stmt, code)) |
4219 | match_uaddsub_overflow (&gsi, stmt, code); | |
b9be572e | 4220 | break; |
62be004c | 4221 | |
67f7b566 | 4222 | case TRUNC_MOD_EXPR: |
4223 | convert_to_divmod (as_a<gassign *> (stmt)); | |
4224 | break; | |
4225 | ||
b9be572e | 4226 | default:; |
4227 | } | |
4228 | } | |
d4af184a | 4229 | else if (is_gimple_call (stmt) |
4230 | && gimple_call_lhs (stmt)) | |
15dbdc8f | 4231 | { |
4232 | tree fndecl = gimple_call_fndecl (stmt); | |
4233 | if (fndecl | |
8ff377a6 | 4234 | && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)) |
15dbdc8f | 4235 | { |
4236 | switch (DECL_FUNCTION_CODE (fndecl)) | |
4237 | { | |
4238 | case BUILT_IN_POWF: | |
4239 | case BUILT_IN_POW: | |
4240 | case BUILT_IN_POWL: | |
4241 | if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST | |
20cb53c9 | 4242 | && real_equal |
4243 | (&TREE_REAL_CST (gimple_call_arg (stmt, 1)), | |
4244 | &dconst2) | |
15dbdc8f | 4245 | && convert_mult_to_fma (stmt, |
4246 | gimple_call_arg (stmt, 0), | |
4247 | gimple_call_arg (stmt, 0))) | |
4248 | { | |
6716f635 | 4249 | unlink_stmt_vdef (stmt); |
13ff78a4 | 4250 | if (gsi_remove (&gsi, true) |
4251 | && gimple_purge_dead_eh_edges (bb)) | |
15dbdc8f | 4252 | cfg_changed = true; |
13ff78a4 | 4253 | release_defs (stmt); |
15dbdc8f | 4254 | continue; |
4255 | } | |
4256 | break; | |
4257 | ||
4258 | default:; | |
4259 | } | |
4260 | } | |
4261 | } | |
b9be572e | 4262 | gsi_next (&gsi); |
62be004c | 4263 | } |
4264 | } | |
00f4f705 | 4265 | |
65b0537f | 4266 | statistics_counter_event (fun, "widening multiplications inserted", |
30c4e60d | 4267 | widen_mul_stats.widen_mults_inserted); |
65b0537f | 4268 | statistics_counter_event (fun, "widening maccs inserted", |
30c4e60d | 4269 | widen_mul_stats.maccs_inserted); |
65b0537f | 4270 | statistics_counter_event (fun, "fused multiply-adds inserted", |
30c4e60d | 4271 | widen_mul_stats.fmas_inserted); |
67f7b566 | 4272 | statistics_counter_event (fun, "divmod calls inserted", |
4273 | widen_mul_stats.divmod_calls_inserted); | |
30c4e60d | 4274 | |
15dbdc8f | 4275 | return cfg_changed ? TODO_cleanup_cfg : 0; |
62be004c | 4276 | } |
4277 | ||
cbe8bda8 | 4278 | } // anon namespace |
4279 | ||
4280 | gimple_opt_pass * | |
4281 | make_pass_optimize_widening_mul (gcc::context *ctxt) | |
4282 | { | |
4283 | return new pass_optimize_widening_mul (ctxt); | |
4284 | } |