]>
Commit | Line | Data |
---|---|---|
6c2a63a3 | 1 | /* Global, SSA-based optimizations using mathematical identities. |
5624e564 | 2 | Copyright (C) 2005-2015 Free Software Foundation, Inc. |
b8698a0f | 3 | |
6c2a63a3 | 4 | This file is part of GCC. |
b8698a0f | 5 | |
6c2a63a3 PB |
6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the | |
9dcd6f09 | 8 | Free Software Foundation; either version 3, or (at your option) any |
6c2a63a3 | 9 | later version. |
b8698a0f | 10 | |
6c2a63a3 PB |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
b8698a0f | 15 | |
6c2a63a3 | 16 | You should have received a copy of the GNU General Public License |
9dcd6f09 NC |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
6c2a63a3 PB |
19 | |
20 | /* Currently, the only mini-pass in this file tries to CSE reciprocal | |
21 | operations. These are common in sequences such as this one: | |
22 | ||
23 | modulus = sqrt(x*x + y*y + z*z); | |
24 | x = x / modulus; | |
25 | y = y / modulus; | |
26 | z = z / modulus; | |
27 | ||
28 | that can be optimized to | |
29 | ||
30 | modulus = sqrt(x*x + y*y + z*z); | |
31 | rmodulus = 1.0 / modulus; | |
32 | x = x * rmodulus; | |
33 | y = y * rmodulus; | |
34 | z = z * rmodulus; | |
35 | ||
36 | We do this for loop invariant divisors, and with this pass whenever | |
bc23502b PB |
37 | we notice that a division has the same divisor multiple times. |
38 | ||
39 | Of course, like in PRE, we don't insert a division if a dominator | |
40 | already has one. However, this cannot be done as an extension of | |
41 | PRE for several reasons. | |
42 | ||
43 | First of all, with some experiments it was found out that the | |
44 | transformation is not always useful if there are only two divisions | |
45 | hy the same divisor. This is probably because modern processors | |
46 | can pipeline the divisions; on older, in-order processors it should | |
47 | still be effective to optimize two divisions by the same number. | |
48 | We make this a param, and it shall be called N in the remainder of | |
49 | this comment. | |
50 | ||
51 | Second, if trapping math is active, we have less freedom on where | |
52 | to insert divisions: we can only do so in basic blocks that already | |
53 | contain one. (If divisions don't trap, instead, we can insert | |
54 | divisions elsewhere, which will be in blocks that are common dominators | |
55 | of those that have the division). | |
56 | ||
57 | We really don't want to compute the reciprocal unless a division will | |
58 | be found. To do this, we won't insert the division in a basic block | |
59 | that has less than N divisions *post-dominating* it. | |
60 | ||
61 | The algorithm constructs a subset of the dominator tree, holding the | |
62 | blocks containing the divisions and the common dominators to them, | |
63 | and walk it twice. The first walk is in post-order, and it annotates | |
64 | each block with the number of divisions that post-dominate it: this | |
65 | gives information on where divisions can be inserted profitably. | |
66 | The second walk is in pre-order, and it inserts divisions as explained | |
67 | above, and replaces divisions by multiplications. | |
68 | ||
69 | In the best case, the cost of the pass is O(n_statements). In the | |
70 | worst-case, the cost is due to creating the dominator tree subset, | |
71 | with a cost of O(n_basic_blocks ^ 2); however this can only happen | |
72 | for n_statements / n_basic_blocks statements. So, the amortized cost | |
73 | of creating the dominator tree subset is O(n_basic_blocks) and the | |
74 | worst-case cost of the pass is O(n_statements * n_basic_blocks). | |
75 | ||
76 | More practically, the cost will be small because there are few | |
77 | divisions, and they tend to be in the same basic block, so insert_bb | |
78 | is called very few times. | |
79 | ||
80 | If we did this using domwalk.c, an efficient implementation would have | |
81 | to work on all the variables in a single pass, because we could not | |
82 | work on just a subset of the dominator tree, as we do now, and the | |
83 | cost would also be something like O(n_statements * n_basic_blocks). | |
84 | The data structures would be more complex in order to work on all the | |
85 | variables in a single pass. */ | |
6c2a63a3 PB |
86 | |
87 | #include "config.h" | |
88 | #include "system.h" | |
89 | #include "coretypes.h" | |
90 | #include "tm.h" | |
91 | #include "flags.h" | |
60393bbc AM |
92 | #include "hash-set.h" |
93 | #include "machmode.h" | |
40e23961 MC |
94 | #include "vec.h" |
95 | #include "double-int.h" | |
96 | #include "input.h" | |
97 | #include "alias.h" | |
98 | #include "symtab.h" | |
99 | #include "wide-int.h" | |
100 | #include "inchash.h" | |
101 | #include "tree.h" | |
102 | #include "fold-const.h" | |
103 | #include "predict.h" | |
60393bbc AM |
104 | #include "hard-reg-set.h" |
105 | #include "input.h" | |
106 | #include "function.h" | |
107 | #include "dominance.h" | |
108 | #include "cfg.h" | |
2fb9a547 AM |
109 | #include "basic-block.h" |
110 | #include "tree-ssa-alias.h" | |
111 | #include "internal-fn.h" | |
112 | #include "gimple-fold.h" | |
113 | #include "gimple-expr.h" | |
114 | #include "is-a.h" | |
18f429e2 | 115 | #include "gimple.h" |
5be5c238 | 116 | #include "gimple-iterator.h" |
73984f84 | 117 | #include "gimplify.h" |
18f429e2 | 118 | #include "gimplify-me.h" |
d8a2d370 | 119 | #include "stor-layout.h" |
442b4905 AM |
120 | #include "gimple-ssa.h" |
121 | #include "tree-cfg.h" | |
122 | #include "tree-phinodes.h" | |
123 | #include "ssa-iterators.h" | |
d8a2d370 | 124 | #include "stringpool.h" |
442b4905 | 125 | #include "tree-ssanames.h" |
d8a2d370 | 126 | #include "expr.h" |
442b4905 | 127 | #include "tree-dfa.h" |
7a300452 | 128 | #include "tree-ssa.h" |
6c2a63a3 | 129 | #include "tree-pass.h" |
bc23502b | 130 | #include "alloc-pool.h" |
bc23502b | 131 | #include "target.h" |
cf835838 | 132 | #include "gimple-pretty-print.h" |
9b2b7279 | 133 | #include "builtins.h" |
40013784 SB |
134 | |
135 | /* FIXME: RTL headers have to be included here for optabs. */ | |
136 | #include "rtl.h" /* Because optabs.h wants enum rtx_code. */ | |
137 | #include "expr.h" /* Because optabs.h wants sepops. */ | |
b0710fe1 | 138 | #include "insn-codes.h" |
03bd2f1a | 139 | #include "optabs.h" |
bc23502b PB |
140 | |
141 | /* This structure represents one basic block that either computes a | |
142 | division, or is a common dominator for basic block that compute a | |
143 | division. */ | |
144 | struct occurrence { | |
145 | /* The basic block represented by this structure. */ | |
146 | basic_block bb; | |
147 | ||
148 | /* If non-NULL, the SSA_NAME holding the definition for a reciprocal | |
149 | inserted in BB. */ | |
150 | tree recip_def; | |
151 | ||
726a989a | 152 | /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that |
bc23502b | 153 | was inserted in BB. */ |
726a989a | 154 | gimple recip_def_stmt; |
bc23502b PB |
155 | |
156 | /* Pointer to a list of "struct occurrence"s for blocks dominated | |
157 | by BB. */ | |
158 | struct occurrence *children; | |
159 | ||
160 | /* Pointer to the next "struct occurrence"s in the list of blocks | |
161 | sharing a common dominator. */ | |
162 | struct occurrence *next; | |
163 | ||
164 | /* The number of divisions that are in BB before compute_merit. The | |
165 | number of divisions that are in BB or post-dominate it after | |
166 | compute_merit. */ | |
167 | int num_divisions; | |
168 | ||
169 | /* True if the basic block has a division, false if it is a common | |
170 | dominator for basic blocks that do. If it is false and trapping | |
171 | math is active, BB is not a candidate for inserting a reciprocal. */ | |
172 | bool bb_has_division; | |
173 | }; | |
174 | ||
4da3b811 NF |
175 | static struct |
176 | { | |
177 | /* Number of 1.0/X ops inserted. */ | |
178 | int rdivs_inserted; | |
179 | ||
180 | /* Number of 1.0/FUNC ops inserted. */ | |
181 | int rfuncs_inserted; | |
182 | } reciprocal_stats; | |
183 | ||
184 | static struct | |
185 | { | |
186 | /* Number of cexpi calls inserted. */ | |
187 | int inserted; | |
188 | } sincos_stats; | |
189 | ||
190 | static struct | |
191 | { | |
73984f84 | 192 | /* Number of hand-written 16-bit nop / bswaps found. */ |
1df855ce CL |
193 | int found_16bit; |
194 | ||
73984f84 | 195 | /* Number of hand-written 32-bit nop / bswaps found. */ |
4da3b811 NF |
196 | int found_32bit; |
197 | ||
73984f84 | 198 | /* Number of hand-written 64-bit nop / bswaps found. */ |
4da3b811 | 199 | int found_64bit; |
73984f84 | 200 | } nop_stats, bswap_stats; |
4da3b811 NF |
201 | |
202 | static struct | |
203 | { | |
204 | /* Number of widening multiplication ops inserted. */ | |
205 | int widen_mults_inserted; | |
206 | ||
207 | /* Number of integer multiply-and-accumulate ops inserted. */ | |
208 | int maccs_inserted; | |
209 | ||
210 | /* Number of fp fused multiply-add ops inserted. */ | |
211 | int fmas_inserted; | |
212 | } widen_mul_stats; | |
bc23502b PB |
213 | |
214 | /* The instance of "struct occurrence" representing the highest | |
215 | interesting block in the dominator tree. */ | |
216 | static struct occurrence *occ_head; | |
217 | ||
218 | /* Allocation pool for getting instances of "struct occurrence". */ | |
219 | static alloc_pool occ_pool; | |
220 | ||
221 | ||
222 | ||
223 | /* Allocate and return a new struct occurrence for basic block BB, and | |
224 | whose children list is headed by CHILDREN. */ | |
225 | static struct occurrence * | |
226 | occ_new (basic_block bb, struct occurrence *children) | |
6c2a63a3 | 227 | { |
bc23502b PB |
228 | struct occurrence *occ; |
229 | ||
c22940cd | 230 | bb->aux = occ = (struct occurrence *) pool_alloc (occ_pool); |
bc23502b PB |
231 | memset (occ, 0, sizeof (struct occurrence)); |
232 | ||
233 | occ->bb = bb; | |
234 | occ->children = children; | |
235 | return occ; | |
6c2a63a3 PB |
236 | } |
237 | ||
bc23502b PB |
238 | |
239 | /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a | |
240 | list of "struct occurrence"s, one per basic block, having IDOM as | |
241 | their common dominator. | |
242 | ||
243 | We try to insert NEW_OCC as deep as possible in the tree, and we also | |
244 | insert any other block that is a common dominator for BB and one | |
245 | block already in the tree. */ | |
246 | ||
247 | static void | |
248 | insert_bb (struct occurrence *new_occ, basic_block idom, | |
249 | struct occurrence **p_head) | |
2ef571e2 | 250 | { |
bc23502b | 251 | struct occurrence *occ, **p_occ; |
2ef571e2 | 252 | |
bc23502b PB |
253 | for (p_occ = p_head; (occ = *p_occ) != NULL; ) |
254 | { | |
255 | basic_block bb = new_occ->bb, occ_bb = occ->bb; | |
256 | basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb); | |
257 | if (dom == bb) | |
258 | { | |
259 | /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC | |
260 | from its list. */ | |
261 | *p_occ = occ->next; | |
262 | occ->next = new_occ->children; | |
263 | new_occ->children = occ; | |
264 | ||
265 | /* Try the next block (it may as well be dominated by BB). */ | |
266 | } | |
267 | ||
268 | else if (dom == occ_bb) | |
269 | { | |
270 | /* OCC_BB dominates BB. Tail recurse to look deeper. */ | |
271 | insert_bb (new_occ, dom, &occ->children); | |
272 | return; | |
273 | } | |
274 | ||
275 | else if (dom != idom) | |
276 | { | |
277 | gcc_assert (!dom->aux); | |
278 | ||
279 | /* There is a dominator between IDOM and BB, add it and make | |
280 | two children out of NEW_OCC and OCC. First, remove OCC from | |
281 | its list. */ | |
282 | *p_occ = occ->next; | |
283 | new_occ->next = occ; | |
284 | occ->next = NULL; | |
285 | ||
286 | /* None of the previous blocks has DOM as a dominator: if we tail | |
287 | recursed, we would reexamine them uselessly. Just switch BB with | |
288 | DOM, and go on looking for blocks dominated by DOM. */ | |
289 | new_occ = occ_new (dom, new_occ); | |
290 | } | |
291 | ||
292 | else | |
293 | { | |
294 | /* Nothing special, go on with the next element. */ | |
295 | p_occ = &occ->next; | |
296 | } | |
297 | } | |
298 | ||
299 | /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */ | |
300 | new_occ->next = *p_head; | |
301 | *p_head = new_occ; | |
302 | } | |
303 | ||
304 | /* Register that we found a division in BB. */ | |
305 | ||
306 | static inline void | |
307 | register_division_in (basic_block bb) | |
308 | { | |
309 | struct occurrence *occ; | |
310 | ||
311 | occ = (struct occurrence *) bb->aux; | |
312 | if (!occ) | |
313 | { | |
314 | occ = occ_new (bb, NULL); | |
fefa31b5 | 315 | insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head); |
bc23502b PB |
316 | } |
317 | ||
318 | occ->bb_has_division = true; | |
319 | occ->num_divisions++; | |
320 | } | |
321 | ||
322 | ||
323 | /* Compute the number of divisions that postdominate each block in OCC and | |
324 | its children. */ | |
6c2a63a3 | 325 | |
6c2a63a3 | 326 | static void |
bc23502b | 327 | compute_merit (struct occurrence *occ) |
6c2a63a3 | 328 | { |
bc23502b PB |
329 | struct occurrence *occ_child; |
330 | basic_block dom = occ->bb; | |
6c2a63a3 | 331 | |
bc23502b | 332 | for (occ_child = occ->children; occ_child; occ_child = occ_child->next) |
6c2a63a3 | 333 | { |
bc23502b PB |
334 | basic_block bb; |
335 | if (occ_child->children) | |
336 | compute_merit (occ_child); | |
337 | ||
338 | if (flag_exceptions) | |
339 | bb = single_noncomplex_succ (dom); | |
340 | else | |
341 | bb = dom; | |
342 | ||
343 | if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb)) | |
344 | occ->num_divisions += occ_child->num_divisions; | |
345 | } | |
346 | } | |
347 | ||
348 | ||
349 | /* Return whether USE_STMT is a floating-point division by DEF. */ | |
350 | static inline bool | |
726a989a | 351 | is_division_by (gimple use_stmt, tree def) |
bc23502b | 352 | { |
726a989a RB |
353 | return is_gimple_assign (use_stmt) |
354 | && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR | |
355 | && gimple_assign_rhs2 (use_stmt) == def | |
8a5b57cd RG |
356 | /* Do not recognize x / x as valid division, as we are getting |
357 | confused later by replacing all immediate uses x in such | |
358 | a stmt. */ | |
726a989a | 359 | && gimple_assign_rhs1 (use_stmt) != def; |
bc23502b PB |
360 | } |
361 | ||
362 | /* Walk the subset of the dominator tree rooted at OCC, setting the | |
363 | RECIP_DEF field to a definition of 1.0 / DEF that can be used in | |
364 | the given basic block. The field may be left NULL, of course, | |
365 | if it is not possible or profitable to do the optimization. | |
366 | ||
367 | DEF_BSI is an iterator pointing at the statement defining DEF. | |
368 | If RECIP_DEF is set, a dominator already has a computation that can | |
369 | be used. */ | |
370 | ||
371 | static void | |
726a989a | 372 | insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, |
bc23502b PB |
373 | tree def, tree recip_def, int threshold) |
374 | { | |
726a989a | 375 | tree type; |
538dd0b7 | 376 | gassign *new_stmt; |
726a989a | 377 | gimple_stmt_iterator gsi; |
bc23502b PB |
378 | struct occurrence *occ_child; |
379 | ||
380 | if (!recip_def | |
381 | && (occ->bb_has_division || !flag_trapping_math) | |
382 | && occ->num_divisions >= threshold) | |
383 | { | |
384 | /* Make a variable with the replacement and substitute it. */ | |
385 | type = TREE_TYPE (def); | |
7cc434a3 | 386 | recip_def = create_tmp_reg (type, "reciptmp"); |
0d0e4a03 JJ |
387 | new_stmt = gimple_build_assign (recip_def, RDIV_EXPR, |
388 | build_one_cst (type), def); | |
b8698a0f | 389 | |
bc23502b PB |
390 | if (occ->bb_has_division) |
391 | { | |
392 | /* Case 1: insert before an existing division. */ | |
726a989a RB |
393 | gsi = gsi_after_labels (occ->bb); |
394 | while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def)) | |
395 | gsi_next (&gsi); | |
bc23502b | 396 | |
726a989a | 397 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |
bc23502b | 398 | } |
726a989a | 399 | else if (def_gsi && occ->bb == def_gsi->bb) |
ac264fef | 400 | { |
bc23502b PB |
401 | /* Case 2: insert right after the definition. Note that this will |
402 | never happen if the definition statement can throw, because in | |
403 | that case the sole successor of the statement's basic block will | |
404 | dominate all the uses as well. */ | |
726a989a | 405 | gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT); |
ac264fef | 406 | } |
bc23502b PB |
407 | else |
408 | { | |
409 | /* Case 3: insert in a basic block not containing defs/uses. */ | |
726a989a RB |
410 | gsi = gsi_after_labels (occ->bb); |
411 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); | |
bc23502b PB |
412 | } |
413 | ||
4da3b811 NF |
414 | reciprocal_stats.rdivs_inserted++; |
415 | ||
bc23502b | 416 | occ->recip_def_stmt = new_stmt; |
6c2a63a3 PB |
417 | } |
418 | ||
bc23502b PB |
419 | occ->recip_def = recip_def; |
420 | for (occ_child = occ->children; occ_child; occ_child = occ_child->next) | |
726a989a | 421 | insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold); |
bc23502b PB |
422 | } |
423 | ||
424 | ||
425 | /* Replace the division at USE_P with a multiplication by the reciprocal, if | |
426 | possible. */ | |
427 | ||
428 | static inline void | |
429 | replace_reciprocal (use_operand_p use_p) | |
430 | { | |
726a989a RB |
431 | gimple use_stmt = USE_STMT (use_p); |
432 | basic_block bb = gimple_bb (use_stmt); | |
bc23502b PB |
433 | struct occurrence *occ = (struct occurrence *) bb->aux; |
434 | ||
efd8f750 JH |
435 | if (optimize_bb_for_speed_p (bb) |
436 | && occ->recip_def && use_stmt != occ->recip_def_stmt) | |
bc23502b | 437 | { |
59401b92 | 438 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
726a989a | 439 | gimple_assign_set_rhs_code (use_stmt, MULT_EXPR); |
bc23502b | 440 | SET_USE (use_p, occ->recip_def); |
59401b92 | 441 | fold_stmt_inplace (&gsi); |
bc23502b PB |
442 | update_stmt (use_stmt); |
443 | } | |
444 | } | |
445 | ||
446 | ||
447 | /* Free OCC and return one more "struct occurrence" to be freed. */ | |
448 | ||
449 | static struct occurrence * | |
450 | free_bb (struct occurrence *occ) | |
451 | { | |
452 | struct occurrence *child, *next; | |
453 | ||
454 | /* First get the two pointers hanging off OCC. */ | |
455 | next = occ->next; | |
456 | child = occ->children; | |
457 | occ->bb->aux = NULL; | |
458 | pool_free (occ_pool, occ); | |
459 | ||
460 | /* Now ensure that we don't recurse unless it is necessary. */ | |
461 | if (!child) | |
462 | return next; | |
2ef571e2 | 463 | else |
bc23502b PB |
464 | { |
465 | while (next) | |
466 | next = free_bb (next); | |
467 | ||
468 | return child; | |
469 | } | |
470 | } | |
471 | ||
472 | ||
473 | /* Look for floating-point divisions among DEF's uses, and try to | |
474 | replace them by multiplications with the reciprocal. Add | |
475 | as many statements computing the reciprocal as needed. | |
476 | ||
477 | DEF must be a GIMPLE register of a floating-point type. */ | |
478 | ||
479 | static void | |
726a989a | 480 | execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def) |
bc23502b PB |
481 | { |
482 | use_operand_p use_p; | |
483 | imm_use_iterator use_iter; | |
484 | struct occurrence *occ; | |
485 | int count = 0, threshold; | |
6c2a63a3 | 486 | |
bc23502b PB |
487 | gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def)); |
488 | ||
489 | FOR_EACH_IMM_USE_FAST (use_p, use_iter, def) | |
6c2a63a3 | 490 | { |
726a989a | 491 | gimple use_stmt = USE_STMT (use_p); |
bc23502b | 492 | if (is_division_by (use_stmt, def)) |
6c2a63a3 | 493 | { |
726a989a | 494 | register_division_in (gimple_bb (use_stmt)); |
bc23502b | 495 | count++; |
6c2a63a3 PB |
496 | } |
497 | } | |
b8698a0f | 498 | |
bc23502b PB |
499 | /* Do the expensive part only if we can hope to optimize something. */ |
500 | threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def))); | |
501 | if (count >= threshold) | |
502 | { | |
726a989a | 503 | gimple use_stmt; |
bc23502b PB |
504 | for (occ = occ_head; occ; occ = occ->next) |
505 | { | |
506 | compute_merit (occ); | |
726a989a | 507 | insert_reciprocals (def_gsi, occ, def, NULL, threshold); |
bc23502b PB |
508 | } |
509 | ||
6c00f606 | 510 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def) |
bc23502b | 511 | { |
bc23502b | 512 | if (is_division_by (use_stmt, def)) |
6c00f606 AM |
513 | { |
514 | FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) | |
515 | replace_reciprocal (use_p); | |
516 | } | |
bc23502b PB |
517 | } |
518 | } | |
519 | ||
520 | for (occ = occ_head; occ; ) | |
521 | occ = free_bb (occ); | |
522 | ||
523 | occ_head = NULL; | |
6c2a63a3 PB |
524 | } |
525 | ||
bc23502b PB |
526 | /* Go through all the floating-point SSA_NAMEs, and call |
527 | execute_cse_reciprocals_1 on each of them. */ | |
be55bfe6 TS |
528 | namespace { |
529 | ||
530 | const pass_data pass_data_cse_reciprocals = | |
531 | { | |
532 | GIMPLE_PASS, /* type */ | |
533 | "recip", /* name */ | |
534 | OPTGROUP_NONE, /* optinfo_flags */ | |
be55bfe6 TS |
535 | TV_NONE, /* tv_id */ |
536 | PROP_ssa, /* properties_required */ | |
537 | 0, /* properties_provided */ | |
538 | 0, /* properties_destroyed */ | |
539 | 0, /* todo_flags_start */ | |
3bea341f | 540 | TODO_update_ssa, /* todo_flags_finish */ |
be55bfe6 TS |
541 | }; |
542 | ||
543 | class pass_cse_reciprocals : public gimple_opt_pass | |
544 | { | |
545 | public: | |
546 | pass_cse_reciprocals (gcc::context *ctxt) | |
547 | : gimple_opt_pass (pass_data_cse_reciprocals, ctxt) | |
548 | {} | |
549 | ||
550 | /* opt_pass methods: */ | |
551 | virtual bool gate (function *) { return optimize && flag_reciprocal_math; } | |
552 | virtual unsigned int execute (function *); | |
553 | ||
554 | }; // class pass_cse_reciprocals | |
555 | ||
556 | unsigned int | |
557 | pass_cse_reciprocals::execute (function *fun) | |
6c2a63a3 PB |
558 | { |
559 | basic_block bb; | |
a8f82ec4 | 560 | tree arg; |
ac264fef | 561 | |
bc23502b PB |
562 | occ_pool = create_alloc_pool ("dominators for recip", |
563 | sizeof (struct occurrence), | |
be55bfe6 | 564 | n_basic_blocks_for_fn (fun) / 3 + 1); |
ac264fef | 565 | |
4da3b811 | 566 | memset (&reciprocal_stats, 0, sizeof (reciprocal_stats)); |
d898f3ce PB |
567 | calculate_dominance_info (CDI_DOMINATORS); |
568 | calculate_dominance_info (CDI_POST_DOMINATORS); | |
bc23502b PB |
569 | |
570 | #ifdef ENABLE_CHECKING | |
be55bfe6 | 571 | FOR_EACH_BB_FN (bb, fun) |
bc23502b PB |
572 | gcc_assert (!bb->aux); |
573 | #endif | |
574 | ||
be55bfe6 | 575 | for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg)) |
32244553 | 576 | if (FLOAT_TYPE_P (TREE_TYPE (arg)) |
bc23502b | 577 | && is_gimple_reg (arg)) |
32244553 | 578 | { |
be55bfe6 | 579 | tree name = ssa_default_def (fun, arg); |
32244553 RG |
580 | if (name) |
581 | execute_cse_reciprocals_1 (NULL, name); | |
582 | } | |
a8f82ec4 | 583 | |
be55bfe6 | 584 | FOR_EACH_BB_FN (bb, fun) |
6c2a63a3 | 585 | { |
726a989a | 586 | tree def; |
6c2a63a3 | 587 | |
538dd0b7 DM |
588 | for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); |
589 | gsi_next (&gsi)) | |
6c2a63a3 | 590 | { |
538dd0b7 | 591 | gphi *phi = gsi.phi (); |
6c2a63a3 | 592 | def = PHI_RESULT (phi); |
ea057359 RG |
593 | if (! virtual_operand_p (def) |
594 | && FLOAT_TYPE_P (TREE_TYPE (def))) | |
bc23502b | 595 | execute_cse_reciprocals_1 (NULL, def); |
6c2a63a3 PB |
596 | } |
597 | ||
538dd0b7 DM |
598 | for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi); |
599 | gsi_next (&gsi)) | |
6c2a63a3 | 600 | { |
726a989a | 601 | gimple stmt = gsi_stmt (gsi); |
2f397a93 | 602 | |
726a989a | 603 | if (gimple_has_lhs (stmt) |
6c2a63a3 PB |
604 | && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL |
605 | && FLOAT_TYPE_P (TREE_TYPE (def)) | |
a8f82ec4 | 606 | && TREE_CODE (def) == SSA_NAME) |
726a989a | 607 | execute_cse_reciprocals_1 (&gsi, def); |
6c2a63a3 | 608 | } |
6b889d89 | 609 | |
efd8f750 JH |
610 | if (optimize_bb_for_size_p (bb)) |
611 | continue; | |
612 | ||
6b889d89 | 613 | /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */ |
538dd0b7 DM |
614 | for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi); |
615 | gsi_next (&gsi)) | |
6b889d89 | 616 | { |
726a989a | 617 | gimple stmt = gsi_stmt (gsi); |
6b889d89 UB |
618 | tree fndecl; |
619 | ||
726a989a RB |
620 | if (is_gimple_assign (stmt) |
621 | && gimple_assign_rhs_code (stmt) == RDIV_EXPR) | |
6b889d89 | 622 | { |
726a989a RB |
623 | tree arg1 = gimple_assign_rhs2 (stmt); |
624 | gimple stmt1; | |
ac10986f UB |
625 | |
626 | if (TREE_CODE (arg1) != SSA_NAME) | |
627 | continue; | |
628 | ||
629 | stmt1 = SSA_NAME_DEF_STMT (arg1); | |
6b889d89 | 630 | |
726a989a RB |
631 | if (is_gimple_call (stmt1) |
632 | && gimple_call_lhs (stmt1) | |
633 | && (fndecl = gimple_call_fndecl (stmt1)) | |
6b889d89 UB |
634 | && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL |
635 | || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)) | |
636 | { | |
637 | enum built_in_function code; | |
79af7c1f MM |
638 | bool md_code, fail; |
639 | imm_use_iterator ui; | |
640 | use_operand_p use_p; | |
6b889d89 UB |
641 | |
642 | code = DECL_FUNCTION_CODE (fndecl); | |
ac10986f UB |
643 | md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD; |
644 | ||
645 | fndecl = targetm.builtin_reciprocal (code, md_code, false); | |
6b889d89 UB |
646 | if (!fndecl) |
647 | continue; | |
648 | ||
79af7c1f MM |
649 | /* Check that all uses of the SSA name are divisions, |
650 | otherwise replacing the defining statement will do | |
651 | the wrong thing. */ | |
652 | fail = false; | |
653 | FOR_EACH_IMM_USE_FAST (use_p, ui, arg1) | |
654 | { | |
655 | gimple stmt2 = USE_STMT (use_p); | |
656 | if (is_gimple_debug (stmt2)) | |
657 | continue; | |
658 | if (!is_gimple_assign (stmt2) | |
659 | || gimple_assign_rhs_code (stmt2) != RDIV_EXPR | |
660 | || gimple_assign_rhs1 (stmt2) == arg1 | |
661 | || gimple_assign_rhs2 (stmt2) != arg1) | |
662 | { | |
663 | fail = true; | |
664 | break; | |
665 | } | |
666 | } | |
667 | if (fail) | |
668 | continue; | |
669 | ||
ff2a63a7 | 670 | gimple_replace_ssa_lhs (stmt1, arg1); |
7c9577be | 671 | gimple_call_set_fndecl (stmt1, fndecl); |
6b889d89 | 672 | update_stmt (stmt1); |
4da3b811 | 673 | reciprocal_stats.rfuncs_inserted++; |
6b889d89 | 674 | |
79af7c1f MM |
675 | FOR_EACH_IMM_USE_STMT (stmt, ui, arg1) |
676 | { | |
59401b92 | 677 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
79af7c1f | 678 | gimple_assign_set_rhs_code (stmt, MULT_EXPR); |
59401b92 | 679 | fold_stmt_inplace (&gsi); |
79af7c1f MM |
680 | update_stmt (stmt); |
681 | } | |
6b889d89 UB |
682 | } |
683 | } | |
684 | } | |
6c2a63a3 | 685 | } |
ac264fef | 686 | |
be55bfe6 | 687 | statistics_counter_event (fun, "reciprocal divs inserted", |
4da3b811 | 688 | reciprocal_stats.rdivs_inserted); |
be55bfe6 | 689 | statistics_counter_event (fun, "reciprocal functions inserted", |
4da3b811 NF |
690 | reciprocal_stats.rfuncs_inserted); |
691 | ||
d898f3ce PB |
692 | free_dominance_info (CDI_DOMINATORS); |
693 | free_dominance_info (CDI_POST_DOMINATORS); | |
bc23502b | 694 | free_alloc_pool (occ_pool); |
c2924966 | 695 | return 0; |
6c2a63a3 PB |
696 | } |
697 | ||
27a4cd48 DM |
698 | } // anon namespace |
699 | ||
700 | gimple_opt_pass * | |
701 | make_pass_cse_reciprocals (gcc::context *ctxt) | |
702 | { | |
703 | return new pass_cse_reciprocals (ctxt); | |
704 | } | |
705 | ||
88512ba0 | 706 | /* Records an occurrence at statement USE_STMT in the vector of trees |
2f397a93 | 707 | STMTS if it is dominated by *TOP_BB or dominates it or this basic block |
88512ba0 | 708 | is not yet initialized. Returns true if the occurrence was pushed on |
2f397a93 RG |
709 | the vector. Adjusts *TOP_BB to be the basic block dominating all |
710 | statements in the vector. */ | |
711 | ||
712 | static bool | |
9771b263 | 713 | maybe_record_sincos (vec<gimple> *stmts, |
726a989a | 714 | basic_block *top_bb, gimple use_stmt) |
2f397a93 | 715 | { |
726a989a | 716 | basic_block use_bb = gimple_bb (use_stmt); |
2f397a93 RG |
717 | if (*top_bb |
718 | && (*top_bb == use_bb | |
719 | || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb))) | |
9771b263 | 720 | stmts->safe_push (use_stmt); |
2f397a93 RG |
721 | else if (!*top_bb |
722 | || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb)) | |
723 | { | |
9771b263 | 724 | stmts->safe_push (use_stmt); |
2f397a93 RG |
725 | *top_bb = use_bb; |
726 | } | |
727 | else | |
728 | return false; | |
729 | ||
730 | return true; | |
731 | } | |
732 | ||
733 | /* Look for sin, cos and cexpi calls with the same argument NAME and | |
734 | create a single call to cexpi CSEing the result in this case. | |
735 | We first walk over all immediate uses of the argument collecting | |
736 | statements that we can CSE in a vector and in a second pass replace | |
737 | the statement rhs with a REALPART or IMAGPART expression on the | |
738 | result of the cexpi call we insert before the use statement that | |
739 | dominates all other candidates. */ | |
740 | ||
90bc1cb8 | 741 | static bool |
2f397a93 RG |
742 | execute_cse_sincos_1 (tree name) |
743 | { | |
726a989a | 744 | gimple_stmt_iterator gsi; |
2f397a93 | 745 | imm_use_iterator use_iter; |
726a989a RB |
746 | tree fndecl, res, type; |
747 | gimple def_stmt, use_stmt, stmt; | |
2f397a93 | 748 | int seen_cos = 0, seen_sin = 0, seen_cexpi = 0; |
9370adeb | 749 | auto_vec<gimple> stmts; |
2f397a93 RG |
750 | basic_block top_bb = NULL; |
751 | int i; | |
90bc1cb8 | 752 | bool cfg_changed = false; |
2f397a93 RG |
753 | |
754 | type = TREE_TYPE (name); | |
755 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name) | |
756 | { | |
726a989a RB |
757 | if (gimple_code (use_stmt) != GIMPLE_CALL |
758 | || !gimple_call_lhs (use_stmt) | |
759 | || !(fndecl = gimple_call_fndecl (use_stmt)) | |
2f397a93 RG |
760 | || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) |
761 | continue; | |
762 | ||
763 | switch (DECL_FUNCTION_CODE (fndecl)) | |
764 | { | |
765 | CASE_FLT_FN (BUILT_IN_COS): | |
766 | seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; | |
767 | break; | |
768 | ||
769 | CASE_FLT_FN (BUILT_IN_SIN): | |
770 | seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; | |
771 | break; | |
772 | ||
773 | CASE_FLT_FN (BUILT_IN_CEXPI): | |
774 | seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; | |
775 | break; | |
776 | ||
777 | default:; | |
778 | } | |
779 | } | |
780 | ||
781 | if (seen_cos + seen_sin + seen_cexpi <= 1) | |
9370adeb | 782 | return false; |
2f397a93 RG |
783 | |
784 | /* Simply insert cexpi at the beginning of top_bb but not earlier than | |
785 | the name def statement. */ | |
786 | fndecl = mathfn_built_in (type, BUILT_IN_CEXPI); | |
787 | if (!fndecl) | |
90bc1cb8 | 788 | return false; |
726a989a | 789 | stmt = gimple_build_call (fndecl, 1, name); |
83d5977e | 790 | res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp"); |
726a989a RB |
791 | gimple_call_set_lhs (stmt, res); |
792 | ||
2f397a93 | 793 | def_stmt = SSA_NAME_DEF_STMT (name); |
59805c3b | 794 | if (!SSA_NAME_IS_DEFAULT_DEF (name) |
726a989a RB |
795 | && gimple_code (def_stmt) != GIMPLE_PHI |
796 | && gimple_bb (def_stmt) == top_bb) | |
2f397a93 | 797 | { |
726a989a RB |
798 | gsi = gsi_for_stmt (def_stmt); |
799 | gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); | |
2f397a93 RG |
800 | } |
801 | else | |
802 | { | |
726a989a RB |
803 | gsi = gsi_after_labels (top_bb); |
804 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
2f397a93 | 805 | } |
4da3b811 | 806 | sincos_stats.inserted++; |
2f397a93 RG |
807 | |
808 | /* And adjust the recorded old call sites. */ | |
9771b263 | 809 | for (i = 0; stmts.iterate (i, &use_stmt); ++i) |
2f397a93 | 810 | { |
726a989a RB |
811 | tree rhs = NULL; |
812 | fndecl = gimple_call_fndecl (use_stmt); | |
813 | ||
2f397a93 RG |
814 | switch (DECL_FUNCTION_CODE (fndecl)) |
815 | { | |
816 | CASE_FLT_FN (BUILT_IN_COS): | |
726a989a | 817 | rhs = fold_build1 (REALPART_EXPR, type, res); |
2f397a93 RG |
818 | break; |
819 | ||
820 | CASE_FLT_FN (BUILT_IN_SIN): | |
726a989a | 821 | rhs = fold_build1 (IMAGPART_EXPR, type, res); |
2f397a93 RG |
822 | break; |
823 | ||
824 | CASE_FLT_FN (BUILT_IN_CEXPI): | |
726a989a | 825 | rhs = res; |
2f397a93 RG |
826 | break; |
827 | ||
828 | default:; | |
829 | gcc_unreachable (); | |
830 | } | |
831 | ||
726a989a RB |
832 | /* Replace call with a copy. */ |
833 | stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs); | |
834 | ||
835 | gsi = gsi_for_stmt (use_stmt); | |
90bc1cb8 RG |
836 | gsi_replace (&gsi, stmt, true); |
837 | if (gimple_purge_dead_eh_edges (gimple_bb (stmt))) | |
838 | cfg_changed = true; | |
2f397a93 RG |
839 | } |
840 | ||
90bc1cb8 | 841 | return cfg_changed; |
2f397a93 RG |
842 | } |
843 | ||
78be79d5 BS |
844 | /* To evaluate powi(x,n), the floating point value x raised to the |
845 | constant integer exponent n, we use a hybrid algorithm that | |
846 | combines the "window method" with look-up tables. For an | |
847 | introduction to exponentiation algorithms and "addition chains", | |
848 | see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth, | |
849 | "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming", | |
850 | 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation | |
851 | Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */ | |
852 | ||
853 | /* Provide a default value for POWI_MAX_MULTS, the maximum number of | |
854 | multiplications to inline before calling the system library's pow | |
855 | function. powi(x,n) requires at worst 2*bits(n)-2 multiplications, | |
856 | so this default never requires calling pow, powf or powl. */ | |
857 | ||
858 | #ifndef POWI_MAX_MULTS | |
859 | #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2) | |
860 | #endif | |
861 | ||
862 | /* The size of the "optimal power tree" lookup table. All | |
863 | exponents less than this value are simply looked up in the | |
864 | powi_table below. This threshold is also used to size the | |
865 | cache of pseudo registers that hold intermediate results. */ | |
866 | #define POWI_TABLE_SIZE 256 | |
867 | ||
868 | /* The size, in bits of the window, used in the "window method" | |
869 | exponentiation algorithm. This is equivalent to a radix of | |
870 | (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */ | |
871 | #define POWI_WINDOW_SIZE 3 | |
872 | ||
873 | /* The following table is an efficient representation of an | |
874 | "optimal power tree". For each value, i, the corresponding | |
875 | value, j, in the table states than an optimal evaluation | |
876 | sequence for calculating pow(x,i) can be found by evaluating | |
877 | pow(x,j)*pow(x,i-j). An optimal power tree for the first | |
878 | 100 integers is given in Knuth's "Seminumerical algorithms". */ | |
879 | ||
880 | static const unsigned char powi_table[POWI_TABLE_SIZE] = | |
881 | { | |
882 | 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */ | |
883 | 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */ | |
884 | 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */ | |
885 | 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */ | |
886 | 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */ | |
887 | 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */ | |
888 | 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */ | |
889 | 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */ | |
890 | 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */ | |
891 | 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */ | |
892 | 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */ | |
893 | 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */ | |
894 | 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */ | |
895 | 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */ | |
896 | 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */ | |
897 | 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */ | |
898 | 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */ | |
899 | 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */ | |
900 | 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */ | |
901 | 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */ | |
902 | 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */ | |
903 | 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */ | |
904 | 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */ | |
905 | 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */ | |
906 | 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */ | |
907 | 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */ | |
908 | 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */ | |
909 | 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */ | |
910 | 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */ | |
911 | 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */ | |
912 | 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */ | |
913 | 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */ | |
914 | }; | |
915 | ||
916 | ||
917 | /* Return the number of multiplications required to calculate | |
918 | powi(x,n) where n is less than POWI_TABLE_SIZE. This is a | |
919 | subroutine of powi_cost. CACHE is an array indicating | |
920 | which exponents have already been calculated. */ | |
921 | ||
922 | static int | |
923 | powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache) | |
924 | { | |
925 | /* If we've already calculated this exponent, then this evaluation | |
926 | doesn't require any additional multiplications. */ | |
927 | if (cache[n]) | |
928 | return 0; | |
929 | ||
930 | cache[n] = true; | |
931 | return powi_lookup_cost (n - powi_table[n], cache) | |
932 | + powi_lookup_cost (powi_table[n], cache) + 1; | |
933 | } | |
934 | ||
935 | /* Return the number of multiplications required to calculate | |
936 | powi(x,n) for an arbitrary x, given the exponent N. This | |
937 | function needs to be kept in sync with powi_as_mults below. */ | |
938 | ||
939 | static int | |
940 | powi_cost (HOST_WIDE_INT n) | |
941 | { | |
942 | bool cache[POWI_TABLE_SIZE]; | |
943 | unsigned HOST_WIDE_INT digit; | |
944 | unsigned HOST_WIDE_INT val; | |
945 | int result; | |
946 | ||
947 | if (n == 0) | |
948 | return 0; | |
949 | ||
950 | /* Ignore the reciprocal when calculating the cost. */ | |
951 | val = (n < 0) ? -n : n; | |
952 | ||
953 | /* Initialize the exponent cache. */ | |
954 | memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool)); | |
955 | cache[1] = true; | |
956 | ||
957 | result = 0; | |
958 | ||
959 | while (val >= POWI_TABLE_SIZE) | |
960 | { | |
961 | if (val & 1) | |
962 | { | |
963 | digit = val & ((1 << POWI_WINDOW_SIZE) - 1); | |
964 | result += powi_lookup_cost (digit, cache) | |
965 | + POWI_WINDOW_SIZE + 1; | |
966 | val >>= POWI_WINDOW_SIZE; | |
967 | } | |
968 | else | |
969 | { | |
970 | val >>= 1; | |
971 | result++; | |
972 | } | |
973 | } | |
974 | ||
975 | return result + powi_lookup_cost (val, cache); | |
976 | } | |
977 | ||
978 | /* Recursive subroutine of powi_as_mults. This function takes the | |
979 | array, CACHE, of already calculated exponents and an exponent N and | |
980 | returns a tree that corresponds to CACHE[1]**N, with type TYPE. */ | |
981 | ||
982 | static tree | |
983 | powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type, | |
83d5977e | 984 | HOST_WIDE_INT n, tree *cache) |
78be79d5 BS |
985 | { |
986 | tree op0, op1, ssa_target; | |
987 | unsigned HOST_WIDE_INT digit; | |
538dd0b7 | 988 | gassign *mult_stmt; |
78be79d5 BS |
989 | |
990 | if (n < POWI_TABLE_SIZE && cache[n]) | |
991 | return cache[n]; | |
992 | ||
83d5977e | 993 | ssa_target = make_temp_ssa_name (type, NULL, "powmult"); |
78be79d5 BS |
994 | |
995 | if (n < POWI_TABLE_SIZE) | |
996 | { | |
997 | cache[n] = ssa_target; | |
83d5977e RG |
998 | op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache); |
999 | op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache); | |
78be79d5 BS |
1000 | } |
1001 | else if (n & 1) | |
1002 | { | |
1003 | digit = n & ((1 << POWI_WINDOW_SIZE) - 1); | |
83d5977e RG |
1004 | op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache); |
1005 | op1 = powi_as_mults_1 (gsi, loc, type, digit, cache); | |
78be79d5 BS |
1006 | } |
1007 | else | |
1008 | { | |
83d5977e | 1009 | op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache); |
78be79d5 BS |
1010 | op1 = op0; |
1011 | } | |
1012 | ||
0d0e4a03 | 1013 | mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1); |
ba869341 | 1014 | gimple_set_location (mult_stmt, loc); |
78be79d5 BS |
1015 | gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT); |
1016 | ||
1017 | return ssa_target; | |
1018 | } | |
1019 | ||
1020 | /* Convert ARG0**N to a tree of multiplications of ARG0 with itself. | |
1021 | This function needs to be kept in sync with powi_cost above. */ | |
1022 | ||
1023 | static tree | |
1024 | powi_as_mults (gimple_stmt_iterator *gsi, location_t loc, | |
1025 | tree arg0, HOST_WIDE_INT n) | |
1026 | { | |
83d5977e | 1027 | tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0); |
538dd0b7 | 1028 | gassign *div_stmt; |
83d5977e | 1029 | tree target; |
78be79d5 BS |
1030 | |
1031 | if (n == 0) | |
1032 | return build_real (type, dconst1); | |
1033 | ||
1034 | memset (cache, 0, sizeof (cache)); | |
1035 | cache[1] = arg0; | |
1036 | ||
83d5977e | 1037 | result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache); |
78be79d5 BS |
1038 | if (n >= 0) |
1039 | return result; | |
1040 | ||
1041 | /* If the original exponent was negative, reciprocate the result. */ | |
83d5977e | 1042 | target = make_temp_ssa_name (type, NULL, "powmult"); |
0d0e4a03 JJ |
1043 | div_stmt = gimple_build_assign (target, RDIV_EXPR, |
1044 | build_real (type, dconst1), result); | |
ba869341 | 1045 | gimple_set_location (div_stmt, loc); |
78be79d5 BS |
1046 | gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT); |
1047 | ||
1048 | return target; | |
1049 | } | |
1050 | ||
1051 | /* ARG0 and N are the two arguments to a powi builtin in GSI with | |
1052 | location info LOC. If the arguments are appropriate, create an | |
1053 | equivalent sequence of statements prior to GSI using an optimal | |
1054 | number of multiplications, and return an expession holding the | |
1055 | result. */ | |
1056 | ||
1057 | static tree | |
1058 | gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc, | |
1059 | tree arg0, HOST_WIDE_INT n) | |
1060 | { | |
1061 | /* Avoid largest negative number. */ | |
1062 | if (n != -n | |
1063 | && ((n >= -1 && n <= 2) | |
1064 | || (optimize_function_for_speed_p (cfun) | |
1065 | && powi_cost (n) <= POWI_MAX_MULTS))) | |
1066 | return powi_as_mults (gsi, loc, arg0, n); | |
1067 | ||
1068 | return NULL_TREE; | |
1069 | } | |
1070 | ||
ba869341 | 1071 | /* Build a gimple call statement that calls FN with argument ARG. |
83d5977e | 1072 | Set the lhs of the call statement to a fresh SSA name. Insert the |
ba869341 BS |
1073 | statement prior to GSI's current position, and return the fresh |
1074 | SSA name. */ | |
1075 | ||
1076 | static tree | |
6e96f98a | 1077 | build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc, |
83d5977e | 1078 | tree fn, tree arg) |
ba869341 | 1079 | { |
538dd0b7 | 1080 | gcall *call_stmt; |
ba869341 BS |
1081 | tree ssa_target; |
1082 | ||
ba869341 | 1083 | call_stmt = gimple_build_call (fn, 1, arg); |
83d5977e | 1084 | ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot"); |
ba869341 BS |
1085 | gimple_set_lhs (call_stmt, ssa_target); |
1086 | gimple_set_location (call_stmt, loc); | |
1087 | gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT); | |
1088 | ||
1089 | return ssa_target; | |
1090 | } | |
1091 | ||
6e96f98a BS |
1092 | /* Build a gimple binary operation with the given CODE and arguments |
1093 | ARG0, ARG1, assigning the result to a new SSA name for variable | |
1094 | TARGET. Insert the statement prior to GSI's current position, and | |
1095 | return the fresh SSA name.*/ | |
1096 | ||
1097 | static tree | |
1098 | build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc, | |
83d5977e RG |
1099 | const char *name, enum tree_code code, |
1100 | tree arg0, tree arg1) | |
6e96f98a | 1101 | { |
83d5977e | 1102 | tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name); |
0d0e4a03 | 1103 | gassign *stmt = gimple_build_assign (result, code, arg0, arg1); |
6e96f98a BS |
1104 | gimple_set_location (stmt, loc); |
1105 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); | |
1106 | return result; | |
1107 | } | |
1108 | ||
d7e2a1c1 | 1109 | /* Build a gimple reference operation with the given CODE and argument |
83d5977e | 1110 | ARG, assigning the result to a new SSA name of TYPE with NAME. |
d7e2a1c1 BS |
1111 | Insert the statement prior to GSI's current position, and return |
1112 | the fresh SSA name. */ | |
1113 | ||
1114 | static inline tree | |
1115 | build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type, | |
83d5977e | 1116 | const char *name, enum tree_code code, tree arg0) |
d7e2a1c1 | 1117 | { |
83d5977e | 1118 | tree result = make_temp_ssa_name (type, NULL, name); |
d7e2a1c1 BS |
1119 | gimple stmt = gimple_build_assign (result, build1 (code, type, arg0)); |
1120 | gimple_set_location (stmt, loc); | |
1121 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); | |
1122 | return result; | |
1123 | } | |
1124 | ||
83d5977e | 1125 | /* Build a gimple assignment to cast VAL to TYPE. Insert the statement |
5dfe80ba AS |
1126 | prior to GSI's current position, and return the fresh SSA name. */ |
1127 | ||
1128 | static tree | |
1129 | build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc, | |
83d5977e | 1130 | tree type, tree val) |
5dfe80ba | 1131 | { |
b731b390 | 1132 | tree result = make_ssa_name (type); |
0d0e4a03 | 1133 | gassign *stmt = gimple_build_assign (result, NOP_EXPR, val); |
83d5977e RG |
1134 | gimple_set_location (stmt, loc); |
1135 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); | |
1136 | return result; | |
5dfe80ba AS |
1137 | } |
1138 | ||
d24ad7d6 BS |
1139 | /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI |
1140 | with location info LOC. If possible, create an equivalent and | |
1141 | less expensive sequence of statements prior to GSI, and return an | |
1142 | expession holding the result. */ | |
1143 | ||
1144 | static tree | |
1145 | gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc, | |
1146 | tree arg0, tree arg1) | |
1147 | { | |
ba869341 | 1148 | REAL_VALUE_TYPE c, cint, dconst1_4, dconst3_4, dconst1_3, dconst1_6; |
6e96f98a | 1149 | REAL_VALUE_TYPE c2, dconst3; |
d24ad7d6 | 1150 | HOST_WIDE_INT n; |
6e96f98a | 1151 | tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, result, cbrt_x, powi_cbrt_x; |
ef4bddc2 | 1152 | machine_mode mode; |
0bfbca58 | 1153 | bool hw_sqrt_exists, c_is_int, c2_is_int; |
d24ad7d6 BS |
1154 | |
1155 | /* If the exponent isn't a constant, there's nothing of interest | |
1156 | to be done. */ | |
1157 | if (TREE_CODE (arg1) != REAL_CST) | |
1158 | return NULL_TREE; | |
1159 | ||
ba869341 BS |
1160 | /* If the exponent is equivalent to an integer, expand to an optimal |
1161 | multiplication sequence when profitable. */ | |
d24ad7d6 BS |
1162 | c = TREE_REAL_CST (arg1); |
1163 | n = real_to_integer (&c); | |
807e902e | 1164 | real_from_integer (&cint, VOIDmode, n, SIGNED); |
0bfbca58 | 1165 | c_is_int = real_identical (&c, &cint); |
d24ad7d6 | 1166 | |
0bfbca58 | 1167 | if (c_is_int |
d24ad7d6 BS |
1168 | && ((n >= -1 && n <= 2) |
1169 | || (flag_unsafe_math_optimizations | |
72798784 | 1170 | && optimize_bb_for_speed_p (gsi_bb (*gsi)) |
d24ad7d6 BS |
1171 | && powi_cost (n) <= POWI_MAX_MULTS))) |
1172 | return gimple_expand_builtin_powi (gsi, loc, arg0, n); | |
1173 | ||
ba869341 BS |
1174 | /* Attempt various optimizations using sqrt and cbrt. */ |
1175 | type = TREE_TYPE (arg0); | |
1176 | mode = TYPE_MODE (type); | |
1177 | sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); | |
1178 | ||
1179 | /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe | |
1180 | unless signed zeros must be maintained. pow(-0,0.5) = +0, while | |
1181 | sqrt(-0) = -0. */ | |
1182 | if (sqrtfn | |
1183 | && REAL_VALUES_EQUAL (c, dconsthalf) | |
1184 | && !HONOR_SIGNED_ZEROS (mode)) | |
83d5977e | 1185 | return build_and_insert_call (gsi, loc, sqrtfn, arg0); |
ba869341 BS |
1186 | |
1187 | /* Optimize pow(x,0.25) = sqrt(sqrt(x)). Assume on most machines that | |
1188 | a builtin sqrt instruction is smaller than a call to pow with 0.25, | |
1189 | so do this optimization even if -Os. Don't do this optimization | |
1190 | if we don't have a hardware sqrt insn. */ | |
1191 | dconst1_4 = dconst1; | |
1192 | SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2); | |
d7e2a1c1 | 1193 | hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing; |
ba869341 BS |
1194 | |
1195 | if (flag_unsafe_math_optimizations | |
1196 | && sqrtfn | |
1197 | && REAL_VALUES_EQUAL (c, dconst1_4) | |
1198 | && hw_sqrt_exists) | |
1199 | { | |
1200 | /* sqrt(x) */ | |
83d5977e | 1201 | sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0); |
ba869341 BS |
1202 | |
1203 | /* sqrt(sqrt(x)) */ | |
83d5977e | 1204 | return build_and_insert_call (gsi, loc, sqrtfn, sqrt_arg0); |
ba869341 BS |
1205 | } |
1206 | ||
1207 | /* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)) unless we are | |
1208 | optimizing for space. Don't do this optimization if we don't have | |
1209 | a hardware sqrt insn. */ | |
807e902e | 1210 | real_from_integer (&dconst3_4, VOIDmode, 3, SIGNED); |
ba869341 BS |
1211 | SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2); |
1212 | ||
1213 | if (flag_unsafe_math_optimizations | |
1214 | && sqrtfn | |
1215 | && optimize_function_for_speed_p (cfun) | |
1216 | && REAL_VALUES_EQUAL (c, dconst3_4) | |
1217 | && hw_sqrt_exists) | |
1218 | { | |
1219 | /* sqrt(x) */ | |
83d5977e | 1220 | sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0); |
ba869341 BS |
1221 | |
1222 | /* sqrt(sqrt(x)) */ | |
83d5977e | 1223 | sqrt_sqrt = build_and_insert_call (gsi, loc, sqrtfn, sqrt_arg0); |
ba869341 BS |
1224 | |
1225 | /* sqrt(x) * sqrt(sqrt(x)) */ | |
83d5977e | 1226 | return build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, |
6e96f98a | 1227 | sqrt_arg0, sqrt_sqrt); |
ba869341 BS |
1228 | } |
1229 | ||
1230 | /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math | |
1231 | optimizations since 1./3. is not exactly representable. If x | |
1232 | is negative and finite, the correct value of pow(x,1./3.) is | |
1233 | a NaN with the "invalid" exception raised, because the value | |
1234 | of 1./3. actually has an even denominator. The correct value | |
1235 | of cbrt(x) is a negative real value. */ | |
1236 | cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT); | |
1237 | dconst1_3 = real_value_truncate (mode, dconst_third ()); | |
1238 | ||
1239 | if (flag_unsafe_math_optimizations | |
1240 | && cbrtfn | |
06bc3ec7 | 1241 | && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode)) |
ba869341 | 1242 | && REAL_VALUES_EQUAL (c, dconst1_3)) |
83d5977e | 1243 | return build_and_insert_call (gsi, loc, cbrtfn, arg0); |
ba869341 BS |
1244 | |
1245 | /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization | |
1246 | if we don't have a hardware sqrt insn. */ | |
1247 | dconst1_6 = dconst1_3; | |
1248 | SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1); | |
1249 | ||
1250 | if (flag_unsafe_math_optimizations | |
1251 | && sqrtfn | |
1252 | && cbrtfn | |
06bc3ec7 | 1253 | && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode)) |
ba869341 BS |
1254 | && optimize_function_for_speed_p (cfun) |
1255 | && hw_sqrt_exists | |
1256 | && REAL_VALUES_EQUAL (c, dconst1_6)) | |
1257 | { | |
1258 | /* sqrt(x) */ | |
83d5977e | 1259 | sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0); |
ba869341 BS |
1260 | |
1261 | /* cbrt(sqrt(x)) */ | |
83d5977e | 1262 | return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0); |
6e96f98a BS |
1263 | } |
1264 | ||
0bfbca58 JJ |
1265 | /* Optimize pow(x,c), where n = 2c for some nonzero integer n |
1266 | and c not an integer, into | |
6e96f98a BS |
1267 | |
1268 | sqrt(x) * powi(x, n/2), n > 0; | |
1269 | 1.0 / (sqrt(x) * powi(x, abs(n/2))), n < 0. | |
1270 | ||
1271 | Do not calculate the powi factor when n/2 = 0. */ | |
1272 | real_arithmetic (&c2, MULT_EXPR, &c, &dconst2); | |
1273 | n = real_to_integer (&c2); | |
807e902e | 1274 | real_from_integer (&cint, VOIDmode, n, SIGNED); |
0bfbca58 | 1275 | c2_is_int = real_identical (&c2, &cint); |
6e96f98a BS |
1276 | |
1277 | if (flag_unsafe_math_optimizations | |
1278 | && sqrtfn | |
0bfbca58 JJ |
1279 | && c2_is_int |
1280 | && !c_is_int | |
1281 | && optimize_function_for_speed_p (cfun)) | |
6e96f98a BS |
1282 | { |
1283 | tree powi_x_ndiv2 = NULL_TREE; | |
1284 | ||
1285 | /* Attempt to fold powi(arg0, abs(n/2)) into multiplies. If not | |
1286 | possible or profitable, give up. Skip the degenerate case when | |
1287 | n is 1 or -1, where the result is always 1. */ | |
4c9cf7af | 1288 | if (absu_hwi (n) != 1) |
6e96f98a | 1289 | { |
9f813990 PC |
1290 | powi_x_ndiv2 = gimple_expand_builtin_powi (gsi, loc, arg0, |
1291 | abs_hwi (n / 2)); | |
6e96f98a BS |
1292 | if (!powi_x_ndiv2) |
1293 | return NULL_TREE; | |
1294 | } | |
1295 | ||
1296 | /* Calculate sqrt(x). When n is not 1 or -1, multiply it by the | |
1297 | result of the optimal multiply sequence just calculated. */ | |
83d5977e | 1298 | sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0); |
6e96f98a | 1299 | |
4c9cf7af | 1300 | if (absu_hwi (n) == 1) |
6e96f98a BS |
1301 | result = sqrt_arg0; |
1302 | else | |
83d5977e | 1303 | result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, |
6e96f98a BS |
1304 | sqrt_arg0, powi_x_ndiv2); |
1305 | ||
1306 | /* If n is negative, reciprocate the result. */ | |
1307 | if (n < 0) | |
83d5977e | 1308 | result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR, |
6e96f98a BS |
1309 | build_real (type, dconst1), result); |
1310 | return result; | |
1311 | } | |
1312 | ||
1313 | /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into | |
1314 | ||
1315 | powi(x, n/3) * powi(cbrt(x), n%3), n > 0; | |
1316 | 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0. | |
1317 | ||
1318 | Do not calculate the first factor when n/3 = 0. As cbrt(x) is | |
1319 | different from pow(x, 1./3.) due to rounding and behavior with | |
1320 | negative x, we need to constrain this transformation to unsafe | |
1321 | math and positive x or finite math. */ | |
807e902e | 1322 | real_from_integer (&dconst3, VOIDmode, 3, SIGNED); |
6e96f98a BS |
1323 | real_arithmetic (&c2, MULT_EXPR, &c, &dconst3); |
1324 | real_round (&c2, mode, &c2); | |
1325 | n = real_to_integer (&c2); | |
807e902e | 1326 | real_from_integer (&cint, VOIDmode, n, SIGNED); |
6e96f98a BS |
1327 | real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3); |
1328 | real_convert (&c2, mode, &c2); | |
1329 | ||
1330 | if (flag_unsafe_math_optimizations | |
1331 | && cbrtfn | |
06bc3ec7 | 1332 | && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode)) |
6e96f98a | 1333 | && real_identical (&c2, &c) |
0bfbca58 | 1334 | && !c2_is_int |
6e96f98a BS |
1335 | && optimize_function_for_speed_p (cfun) |
1336 | && powi_cost (n / 3) <= POWI_MAX_MULTS) | |
1337 | { | |
1338 | tree powi_x_ndiv3 = NULL_TREE; | |
1339 | ||
1340 | /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not | |
1341 | possible or profitable, give up. Skip the degenerate case when | |
1342 | abs(n) < 3, where the result is always 1. */ | |
4c9cf7af | 1343 | if (absu_hwi (n) >= 3) |
6e96f98a BS |
1344 | { |
1345 | powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0, | |
9f813990 | 1346 | abs_hwi (n / 3)); |
6e96f98a BS |
1347 | if (!powi_x_ndiv3) |
1348 | return NULL_TREE; | |
1349 | } | |
1350 | ||
1351 | /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi | |
1352 | as that creates an unnecessary variable. Instead, just produce | |
1353 | either cbrt(x) or cbrt(x) * cbrt(x). */ | |
83d5977e | 1354 | cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0); |
6e96f98a | 1355 | |
4c9cf7af | 1356 | if (absu_hwi (n) % 3 == 1) |
6e96f98a BS |
1357 | powi_cbrt_x = cbrt_x; |
1358 | else | |
83d5977e | 1359 | powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, |
6e96f98a BS |
1360 | cbrt_x, cbrt_x); |
1361 | ||
1362 | /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */ | |
4c9cf7af | 1363 | if (absu_hwi (n) < 3) |
6e96f98a BS |
1364 | result = powi_cbrt_x; |
1365 | else | |
83d5977e | 1366 | result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, |
6e96f98a BS |
1367 | powi_x_ndiv3, powi_cbrt_x); |
1368 | ||
1369 | /* If n is negative, reciprocate the result. */ | |
1370 | if (n < 0) | |
83d5977e | 1371 | result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR, |
6e96f98a BS |
1372 | build_real (type, dconst1), result); |
1373 | ||
1374 | return result; | |
ba869341 BS |
1375 | } |
1376 | ||
6e96f98a | 1377 | /* No optimizations succeeded. */ |
d24ad7d6 BS |
1378 | return NULL_TREE; |
1379 | } | |
1380 | ||
d7e2a1c1 BS |
1381 | /* ARG is the argument to a cabs builtin call in GSI with location info |
1382 | LOC. Create a sequence of statements prior to GSI that calculates | |
1383 | sqrt(R*R + I*I), where R and I are the real and imaginary components | |
1384 | of ARG, respectively. Return an expression holding the result. */ | |
1385 | ||
1386 | static tree | |
1387 | gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg) | |
1388 | { | |
83d5977e | 1389 | tree real_part, imag_part, addend1, addend2, sum, result; |
d7e2a1c1 BS |
1390 | tree type = TREE_TYPE (TREE_TYPE (arg)); |
1391 | tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); | |
ef4bddc2 | 1392 | machine_mode mode = TYPE_MODE (type); |
d7e2a1c1 BS |
1393 | |
1394 | if (!flag_unsafe_math_optimizations | |
1395 | || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi))) | |
1396 | || !sqrtfn | |
1397 | || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing) | |
1398 | return NULL_TREE; | |
1399 | ||
83d5977e | 1400 | real_part = build_and_insert_ref (gsi, loc, type, "cabs", |
d7e2a1c1 | 1401 | REALPART_EXPR, arg); |
83d5977e | 1402 | addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR, |
d7e2a1c1 | 1403 | real_part, real_part); |
83d5977e | 1404 | imag_part = build_and_insert_ref (gsi, loc, type, "cabs", |
d7e2a1c1 | 1405 | IMAGPART_EXPR, arg); |
83d5977e | 1406 | addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR, |
d7e2a1c1 | 1407 | imag_part, imag_part); |
83d5977e RG |
1408 | sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2); |
1409 | result = build_and_insert_call (gsi, loc, sqrtfn, sum); | |
d7e2a1c1 BS |
1410 | |
1411 | return result; | |
1412 | } | |
1413 | ||
2f397a93 | 1414 | /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1 |
78be79d5 BS |
1415 | on the SSA_NAME argument of each of them. Also expand powi(x,n) into |
1416 | an optimal number of multiplies, when n is a constant. */ | |
2f397a93 | 1417 | |
be55bfe6 TS |
1418 | namespace { |
1419 | ||
1420 | const pass_data pass_data_cse_sincos = | |
1421 | { | |
1422 | GIMPLE_PASS, /* type */ | |
1423 | "sincos", /* name */ | |
1424 | OPTGROUP_NONE, /* optinfo_flags */ | |
be55bfe6 TS |
1425 | TV_NONE, /* tv_id */ |
1426 | PROP_ssa, /* properties_required */ | |
1427 | 0, /* properties_provided */ | |
1428 | 0, /* properties_destroyed */ | |
1429 | 0, /* todo_flags_start */ | |
3bea341f | 1430 | TODO_update_ssa, /* todo_flags_finish */ |
be55bfe6 TS |
1431 | }; |
1432 | ||
1433 | class pass_cse_sincos : public gimple_opt_pass | |
1434 | { | |
1435 | public: | |
1436 | pass_cse_sincos (gcc::context *ctxt) | |
1437 | : gimple_opt_pass (pass_data_cse_sincos, ctxt) | |
1438 | {} | |
1439 | ||
1440 | /* opt_pass methods: */ | |
1441 | virtual bool gate (function *) | |
1442 | { | |
1443 | /* We no longer require either sincos or cexp, since powi expansion | |
1444 | piggybacks on this pass. */ | |
1445 | return optimize; | |
1446 | } | |
1447 | ||
1448 | virtual unsigned int execute (function *); | |
1449 | ||
1450 | }; // class pass_cse_sincos | |
1451 | ||
1452 | unsigned int | |
1453 | pass_cse_sincos::execute (function *fun) | |
2f397a93 RG |
1454 | { |
1455 | basic_block bb; | |
90bc1cb8 | 1456 | bool cfg_changed = false; |
2f397a93 RG |
1457 | |
1458 | calculate_dominance_info (CDI_DOMINATORS); | |
4da3b811 | 1459 | memset (&sincos_stats, 0, sizeof (sincos_stats)); |
2f397a93 | 1460 | |
be55bfe6 | 1461 | FOR_EACH_BB_FN (bb, fun) |
2f397a93 | 1462 | { |
726a989a | 1463 | gimple_stmt_iterator gsi; |
3b9ee1cc | 1464 | bool cleanup_eh = false; |
2f397a93 | 1465 | |
726a989a | 1466 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
2f397a93 | 1467 | { |
726a989a | 1468 | gimple stmt = gsi_stmt (gsi); |
2f397a93 RG |
1469 | tree fndecl; |
1470 | ||
3b9ee1cc JJ |
1471 | /* Only the last stmt in a bb could throw, no need to call |
1472 | gimple_purge_dead_eh_edges if we change something in the middle | |
1473 | of a basic block. */ | |
1474 | cleanup_eh = false; | |
1475 | ||
726a989a RB |
1476 | if (is_gimple_call (stmt) |
1477 | && gimple_call_lhs (stmt) | |
1478 | && (fndecl = gimple_call_fndecl (stmt)) | |
2f397a93 RG |
1479 | && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL) |
1480 | { | |
78be79d5 BS |
1481 | tree arg, arg0, arg1, result; |
1482 | HOST_WIDE_INT n; | |
1483 | location_t loc; | |
2f397a93 RG |
1484 | |
1485 | switch (DECL_FUNCTION_CODE (fndecl)) | |
1486 | { | |
1487 | CASE_FLT_FN (BUILT_IN_COS): | |
1488 | CASE_FLT_FN (BUILT_IN_SIN): | |
1489 | CASE_FLT_FN (BUILT_IN_CEXPI): | |
fa65a9cf | 1490 | /* Make sure we have either sincos or cexp. */ |
d33d9e47 AI |
1491 | if (!targetm.libc_has_function (function_c99_math_complex) |
1492 | && !targetm.libc_has_function (function_sincos)) | |
fa65a9cf BS |
1493 | break; |
1494 | ||
726a989a | 1495 | arg = gimple_call_arg (stmt, 0); |
2f397a93 | 1496 | if (TREE_CODE (arg) == SSA_NAME) |
90bc1cb8 | 1497 | cfg_changed |= execute_cse_sincos_1 (arg); |
2f397a93 RG |
1498 | break; |
1499 | ||
d24ad7d6 BS |
1500 | CASE_FLT_FN (BUILT_IN_POW): |
1501 | arg0 = gimple_call_arg (stmt, 0); | |
1502 | arg1 = gimple_call_arg (stmt, 1); | |
1503 | ||
1504 | loc = gimple_location (stmt); | |
1505 | result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1); | |
1506 | ||
1507 | if (result) | |
1508 | { | |
1509 | tree lhs = gimple_get_lhs (stmt); | |
538dd0b7 | 1510 | gassign *new_stmt = gimple_build_assign (lhs, result); |
d24ad7d6 BS |
1511 | gimple_set_location (new_stmt, loc); |
1512 | unlink_stmt_vdef (stmt); | |
1513 | gsi_replace (&gsi, new_stmt, true); | |
3b9ee1cc | 1514 | cleanup_eh = true; |
3d3f2249 RG |
1515 | if (gimple_vdef (stmt)) |
1516 | release_ssa_name (gimple_vdef (stmt)); | |
d24ad7d6 BS |
1517 | } |
1518 | break; | |
1519 | ||
78be79d5 BS |
1520 | CASE_FLT_FN (BUILT_IN_POWI): |
1521 | arg0 = gimple_call_arg (stmt, 0); | |
1522 | arg1 = gimple_call_arg (stmt, 1); | |
78be79d5 | 1523 | loc = gimple_location (stmt); |
0fa6e0ef | 1524 | |
e3530904 | 1525 | if (real_minus_onep (arg0)) |
0fa6e0ef TB |
1526 | { |
1527 | tree t0, t1, cond, one, minus_one; | |
538dd0b7 | 1528 | gassign *stmt; |
0fa6e0ef TB |
1529 | |
1530 | t0 = TREE_TYPE (arg0); | |
1531 | t1 = TREE_TYPE (arg1); | |
1532 | one = build_real (t0, dconst1); | |
1533 | minus_one = build_real (t0, dconstm1); | |
1534 | ||
1535 | cond = make_temp_ssa_name (t1, NULL, "powi_cond"); | |
0d0e4a03 JJ |
1536 | stmt = gimple_build_assign (cond, BIT_AND_EXPR, |
1537 | arg1, build_int_cst (t1, 1)); | |
0fa6e0ef TB |
1538 | gimple_set_location (stmt, loc); |
1539 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
1540 | ||
1541 | result = make_temp_ssa_name (t0, NULL, "powi"); | |
0d0e4a03 JJ |
1542 | stmt = gimple_build_assign (result, COND_EXPR, cond, |
1543 | minus_one, one); | |
0fa6e0ef TB |
1544 | gimple_set_location (stmt, loc); |
1545 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
1546 | } | |
1547 | else | |
1548 | { | |
9541ffee | 1549 | if (!tree_fits_shwi_p (arg1)) |
daf4e940 TB |
1550 | break; |
1551 | ||
eb1ce453 | 1552 | n = tree_to_shwi (arg1); |
0fa6e0ef TB |
1553 | result = gimple_expand_builtin_powi (&gsi, loc, arg0, n); |
1554 | } | |
78be79d5 BS |
1555 | |
1556 | if (result) | |
1557 | { | |
1558 | tree lhs = gimple_get_lhs (stmt); | |
538dd0b7 | 1559 | gassign *new_stmt = gimple_build_assign (lhs, result); |
78be79d5 | 1560 | gimple_set_location (new_stmt, loc); |
d7e2a1c1 BS |
1561 | unlink_stmt_vdef (stmt); |
1562 | gsi_replace (&gsi, new_stmt, true); | |
3b9ee1cc | 1563 | cleanup_eh = true; |
3d3f2249 RG |
1564 | if (gimple_vdef (stmt)) |
1565 | release_ssa_name (gimple_vdef (stmt)); | |
d7e2a1c1 BS |
1566 | } |
1567 | break; | |
1568 | ||
1569 | CASE_FLT_FN (BUILT_IN_CABS): | |
1570 | arg0 = gimple_call_arg (stmt, 0); | |
1571 | loc = gimple_location (stmt); | |
1572 | result = gimple_expand_builtin_cabs (&gsi, loc, arg0); | |
1573 | ||
1574 | if (result) | |
1575 | { | |
1576 | tree lhs = gimple_get_lhs (stmt); | |
538dd0b7 | 1577 | gassign *new_stmt = gimple_build_assign (lhs, result); |
d7e2a1c1 | 1578 | gimple_set_location (new_stmt, loc); |
78be79d5 BS |
1579 | unlink_stmt_vdef (stmt); |
1580 | gsi_replace (&gsi, new_stmt, true); | |
3b9ee1cc | 1581 | cleanup_eh = true; |
3d3f2249 RG |
1582 | if (gimple_vdef (stmt)) |
1583 | release_ssa_name (gimple_vdef (stmt)); | |
78be79d5 BS |
1584 | } |
1585 | break; | |
1586 | ||
2f397a93 RG |
1587 | default:; |
1588 | } | |
1589 | } | |
1590 | } | |
3b9ee1cc JJ |
1591 | if (cleanup_eh) |
1592 | cfg_changed |= gimple_purge_dead_eh_edges (bb); | |
2f397a93 RG |
1593 | } |
1594 | ||
be55bfe6 | 1595 | statistics_counter_event (fun, "sincos statements inserted", |
4da3b811 NF |
1596 | sincos_stats.inserted); |
1597 | ||
2f397a93 | 1598 | free_dominance_info (CDI_DOMINATORS); |
90bc1cb8 | 1599 | return cfg_changed ? TODO_cleanup_cfg : 0; |
2f397a93 RG |
1600 | } |
1601 | ||
27a4cd48 DM |
1602 | } // anon namespace |
1603 | ||
1604 | gimple_opt_pass * | |
1605 | make_pass_cse_sincos (gcc::context *ctxt) | |
1606 | { | |
1607 | return new pass_cse_sincos (ctxt); | |
1608 | } | |
1609 | ||
03bd2f1a AK |
1610 | /* A symbolic number is used to detect byte permutation and selection |
1611 | patterns. Therefore the field N contains an artificial number | |
e3ef4162 | 1612 | consisting of octet sized markers: |
03bd2f1a | 1613 | |
e3ef4162 | 1614 | 0 - target byte has the value 0 |
aa29ea0c | 1615 | FF - target byte has an unknown value (eg. due to sign extension) |
e3ef4162 | 1616 | 1..size - marker value is the target byte index minus one. |
73984f84 TP |
1617 | |
1618 | To detect permutations on memory sources (arrays and structures), a symbolic | |
1619 | number is also associated a base address (the array or structure the load is | |
1620 | made from), an offset from the base address and a range which gives the | |
1621 | difference between the highest and lowest accessed memory location to make | |
1622 | such a symbolic number. The range is thus different from size which reflects | |
1623 | the size of the type of current expression. Note that for non memory source, | |
1624 | range holds the same value as size. | |
1625 | ||
1626 | For instance, for an array char a[], (short) a[0] | (short) a[3] would have | |
1627 | a size of 2 but a range of 4 while (short) a[0] | ((short) a[0] << 1) would | |
1628 | still have a size of 2 but this time a range of 1. */ | |
03bd2f1a AK |
1629 | |
1630 | struct symbolic_number { | |
a9243bfc | 1631 | uint64_t n; |
698ff107 | 1632 | tree type; |
73984f84 TP |
1633 | tree base_addr; |
1634 | tree offset; | |
1635 | HOST_WIDE_INT bytepos; | |
1636 | tree alias_set; | |
1637 | tree vuse; | |
1638 | unsigned HOST_WIDE_INT range; | |
03bd2f1a AK |
1639 | }; |
1640 | ||
e3ef4162 | 1641 | #define BITS_PER_MARKER 8 |
aa29ea0c TP |
1642 | #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1) |
1643 | #define MARKER_BYTE_UNKNOWN MARKER_MASK | |
1644 | #define HEAD_MARKER(n, size) \ | |
1645 | ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER))) | |
e3ef4162 | 1646 | |
73984f84 TP |
1647 | /* The number which the find_bswap_or_nop_1 result should match in |
1648 | order to have a nop. The number is masked according to the size of | |
1649 | the symbolic number before using it. */ | |
a9243bfc RB |
1650 | #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \ |
1651 | (uint64_t)0x08070605 << 32 | 0x04030201) | |
73984f84 TP |
1652 | |
1653 | /* The number which the find_bswap_or_nop_1 result should match in | |
1654 | order to have a byte swap. The number is masked according to the | |
1655 | size of the symbolic number before using it. */ | |
a9243bfc RB |
1656 | #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \ |
1657 | (uint64_t)0x01020304 << 32 | 0x05060708) | |
73984f84 | 1658 | |
03bd2f1a AK |
1659 | /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic |
1660 | number N. Return false if the requested operation is not permitted | |
1661 | on a symbolic number. */ | |
1662 | ||
1663 | static inline bool | |
1664 | do_shift_rotate (enum tree_code code, | |
1665 | struct symbolic_number *n, | |
1666 | int count) | |
1667 | { | |
aa29ea0c TP |
1668 | int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; |
1669 | unsigned head_marker; | |
698ff107 | 1670 | |
e3ef4162 | 1671 | if (count % BITS_PER_UNIT != 0) |
03bd2f1a | 1672 | return false; |
e3ef4162 | 1673 | count = (count / BITS_PER_UNIT) * BITS_PER_MARKER; |
03bd2f1a AK |
1674 | |
1675 | /* Zero out the extra bits of N in order to avoid them being shifted | |
1676 | into the significant bits. */ | |
e3ef4162 TP |
1677 | if (size < 64 / BITS_PER_MARKER) |
1678 | n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; | |
03bd2f1a AK |
1679 | |
1680 | switch (code) | |
1681 | { | |
1682 | case LSHIFT_EXPR: | |
1683 | n->n <<= count; | |
1684 | break; | |
1685 | case RSHIFT_EXPR: | |
aa29ea0c | 1686 | head_marker = HEAD_MARKER (n->n, size); |
03bd2f1a | 1687 | n->n >>= count; |
aa29ea0c TP |
1688 | /* Arithmetic shift of signed type: result is dependent on the value. */ |
1689 | if (!TYPE_UNSIGNED (n->type) && head_marker) | |
1690 | for (i = 0; i < count / BITS_PER_MARKER; i++) | |
1691 | n->n |= (uint64_t) MARKER_BYTE_UNKNOWN | |
1692 | << ((size - 1 - i) * BITS_PER_MARKER); | |
03bd2f1a AK |
1693 | break; |
1694 | case LROTATE_EXPR: | |
e3ef4162 | 1695 | n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count)); |
03bd2f1a AK |
1696 | break; |
1697 | case RROTATE_EXPR: | |
e3ef4162 | 1698 | n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count)); |
03bd2f1a AK |
1699 | break; |
1700 | default: | |
1701 | return false; | |
1702 | } | |
5da49a9d | 1703 | /* Zero unused bits for size. */ |
e3ef4162 TP |
1704 | if (size < 64 / BITS_PER_MARKER) |
1705 | n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; | |
03bd2f1a AK |
1706 | return true; |
1707 | } | |
1708 | ||
1709 | /* Perform sanity checking for the symbolic number N and the gimple | |
1710 | statement STMT. */ | |
1711 | ||
1712 | static inline bool | |
1713 | verify_symbolic_number_p (struct symbolic_number *n, gimple stmt) | |
1714 | { | |
1715 | tree lhs_type; | |
1716 | ||
1717 | lhs_type = gimple_expr_type (stmt); | |
1718 | ||
1719 | if (TREE_CODE (lhs_type) != INTEGER_TYPE) | |
1720 | return false; | |
1721 | ||
698ff107 | 1722 | if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type)) |
03bd2f1a AK |
1723 | return false; |
1724 | ||
1725 | return true; | |
1726 | } | |
1727 | ||
3cc272c1 TP |
1728 | /* Initialize the symbolic number N for the bswap pass from the base element |
1729 | SRC manipulated by the bitwise OR expression. */ | |
1730 | ||
1731 | static bool | |
1732 | init_symbolic_number (struct symbolic_number *n, tree src) | |
1733 | { | |
698ff107 TP |
1734 | int size; |
1735 | ||
3cc272c1 TP |
1736 | n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE; |
1737 | ||
1738 | /* Set up the symbolic number N by setting each byte to a value between 1 and | |
1739 | the byte size of rhs1. The highest order byte is set to n->size and the | |
1740 | lowest order byte to 1. */ | |
698ff107 TP |
1741 | n->type = TREE_TYPE (src); |
1742 | size = TYPE_PRECISION (n->type); | |
1743 | if (size % BITS_PER_UNIT != 0) | |
3cc272c1 | 1744 | return false; |
698ff107 | 1745 | size /= BITS_PER_UNIT; |
e3ef4162 | 1746 | if (size > 64 / BITS_PER_MARKER) |
ca6cbdca | 1747 | return false; |
698ff107 | 1748 | n->range = size; |
3cc272c1 TP |
1749 | n->n = CMPNOP; |
1750 | ||
e3ef4162 TP |
1751 | if (size < 64 / BITS_PER_MARKER) |
1752 | n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; | |
3cc272c1 TP |
1753 | |
1754 | return true; | |
1755 | } | |
1756 | ||
73984f84 TP |
1757 | /* Check if STMT might be a byte swap or a nop from a memory source and returns |
1758 | the answer. If so, REF is that memory source and the base of the memory area | |
1759 | accessed and the offset of the access from that base are recorded in N. */ | |
1760 | ||
1761 | bool | |
1762 | find_bswap_or_nop_load (gimple stmt, tree ref, struct symbolic_number *n) | |
1763 | { | |
1764 | /* Leaf node is an array or component ref. Memorize its base and | |
1765 | offset from base to compare to other such leaf node. */ | |
1766 | HOST_WIDE_INT bitsize, bitpos; | |
ef4bddc2 | 1767 | machine_mode mode; |
73984f84 | 1768 | int unsignedp, volatilep; |
3cc272c1 | 1769 | tree offset, base_addr; |
73984f84 TP |
1770 | |
1771 | if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt)) | |
1772 | return false; | |
1773 | ||
3cc272c1 TP |
1774 | base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode, |
1775 | &unsignedp, &volatilep, false); | |
73984f84 | 1776 | |
3cc272c1 | 1777 | if (TREE_CODE (base_addr) == MEM_REF) |
73984f84 TP |
1778 | { |
1779 | offset_int bit_offset = 0; | |
3cc272c1 | 1780 | tree off = TREE_OPERAND (base_addr, 1); |
73984f84 TP |
1781 | |
1782 | if (!integer_zerop (off)) | |
1783 | { | |
3cc272c1 | 1784 | offset_int boff, coff = mem_ref_offset (base_addr); |
73984f84 TP |
1785 | boff = wi::lshift (coff, LOG2_BITS_PER_UNIT); |
1786 | bit_offset += boff; | |
1787 | } | |
1788 | ||
3cc272c1 | 1789 | base_addr = TREE_OPERAND (base_addr, 0); |
73984f84 TP |
1790 | |
1791 | /* Avoid returning a negative bitpos as this may wreak havoc later. */ | |
1792 | if (wi::neg_p (bit_offset)) | |
1793 | { | |
1794 | offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false); | |
1795 | offset_int tem = bit_offset.and_not (mask); | |
1796 | /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf. | |
1797 | Subtract it to BIT_OFFSET and add it (scaled) to OFFSET. */ | |
1798 | bit_offset -= tem; | |
1799 | tem = wi::arshift (tem, LOG2_BITS_PER_UNIT); | |
3cc272c1 TP |
1800 | if (offset) |
1801 | offset = size_binop (PLUS_EXPR, offset, | |
73984f84 TP |
1802 | wide_int_to_tree (sizetype, tem)); |
1803 | else | |
3cc272c1 | 1804 | offset = wide_int_to_tree (sizetype, tem); |
73984f84 TP |
1805 | } |
1806 | ||
1807 | bitpos += bit_offset.to_shwi (); | |
1808 | } | |
1809 | ||
1810 | if (bitpos % BITS_PER_UNIT) | |
1811 | return false; | |
1812 | if (bitsize % BITS_PER_UNIT) | |
1813 | return false; | |
1814 | ||
2cfa504a TP |
1815 | if (!init_symbolic_number (n, ref)) |
1816 | return false; | |
3cc272c1 TP |
1817 | n->base_addr = base_addr; |
1818 | n->offset = offset; | |
73984f84 TP |
1819 | n->bytepos = bitpos / BITS_PER_UNIT; |
1820 | n->alias_set = reference_alias_ptr_type (ref); | |
1821 | n->vuse = gimple_vuse (stmt); | |
1822 | return true; | |
1823 | } | |
1824 | ||
1825 | /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform | |
1826 | the operation given by the rhs of STMT on the result. If the operation | |
a31d2741 TP |
1827 | could successfully be executed the function returns a gimple stmt whose |
1828 | rhs's first tree is the expression of the source operand and NULL | |
1829 | otherwise. */ | |
03bd2f1a | 1830 | |
a31d2741 | 1831 | static gimple |
73984f84 | 1832 | find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit) |
03bd2f1a AK |
1833 | { |
1834 | enum tree_code code; | |
1835 | tree rhs1, rhs2 = NULL; | |
a31d2741 | 1836 | gimple rhs1_stmt, rhs2_stmt, source_stmt1; |
03bd2f1a AK |
1837 | enum gimple_rhs_class rhs_class; |
1838 | ||
1839 | if (!limit || !is_gimple_assign (stmt)) | |
a31d2741 | 1840 | return NULL; |
03bd2f1a AK |
1841 | |
1842 | rhs1 = gimple_assign_rhs1 (stmt); | |
1843 | ||
73984f84 | 1844 | if (find_bswap_or_nop_load (stmt, rhs1, n)) |
a31d2741 | 1845 | return stmt; |
73984f84 | 1846 | |
03bd2f1a | 1847 | if (TREE_CODE (rhs1) != SSA_NAME) |
a31d2741 | 1848 | return NULL; |
03bd2f1a AK |
1849 | |
1850 | code = gimple_assign_rhs_code (stmt); | |
1851 | rhs_class = gimple_assign_rhs_class (stmt); | |
1852 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); | |
1853 | ||
1854 | if (rhs_class == GIMPLE_BINARY_RHS) | |
1855 | rhs2 = gimple_assign_rhs2 (stmt); | |
1856 | ||
1857 | /* Handle unary rhs and binary rhs with integer constants as second | |
1858 | operand. */ | |
1859 | ||
1860 | if (rhs_class == GIMPLE_UNARY_RHS | |
1861 | || (rhs_class == GIMPLE_BINARY_RHS | |
1862 | && TREE_CODE (rhs2) == INTEGER_CST)) | |
1863 | { | |
1864 | if (code != BIT_AND_EXPR | |
1865 | && code != LSHIFT_EXPR | |
1866 | && code != RSHIFT_EXPR | |
1867 | && code != LROTATE_EXPR | |
1868 | && code != RROTATE_EXPR | |
625a9766 | 1869 | && !CONVERT_EXPR_CODE_P (code)) |
a31d2741 | 1870 | return NULL; |
03bd2f1a | 1871 | |
a31d2741 | 1872 | source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1); |
03bd2f1a | 1873 | |
73984f84 TP |
1874 | /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and |
1875 | we have to initialize the symbolic number. */ | |
a31d2741 | 1876 | if (!source_stmt1) |
03bd2f1a | 1877 | { |
3cc272c1 TP |
1878 | if (gimple_assign_load_p (stmt) |
1879 | || !init_symbolic_number (n, rhs1)) | |
a31d2741 TP |
1880 | return NULL; |
1881 | source_stmt1 = stmt; | |
03bd2f1a AK |
1882 | } |
1883 | ||
1884 | switch (code) | |
1885 | { | |
1886 | case BIT_AND_EXPR: | |
1887 | { | |
698ff107 | 1888 | int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; |
e3ef4162 TP |
1889 | uint64_t val = int_cst_value (rhs2), mask = 0; |
1890 | uint64_t tmp = (1 << BITS_PER_UNIT) - 1; | |
03bd2f1a AK |
1891 | |
1892 | /* Only constants masking full bytes are allowed. */ | |
e3ef4162 TP |
1893 | for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT) |
1894 | if ((val & tmp) != 0 && (val & tmp) != tmp) | |
a31d2741 | 1895 | return NULL; |
e3ef4162 | 1896 | else if (val & tmp) |
aa29ea0c | 1897 | mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER); |
03bd2f1a | 1898 | |
e3ef4162 | 1899 | n->n &= mask; |
03bd2f1a AK |
1900 | } |
1901 | break; | |
1902 | case LSHIFT_EXPR: | |
1903 | case RSHIFT_EXPR: | |
1904 | case LROTATE_EXPR: | |
1905 | case RROTATE_EXPR: | |
1906 | if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2))) | |
a31d2741 | 1907 | return NULL; |
03bd2f1a AK |
1908 | break; |
1909 | CASE_CONVERT: | |
1910 | { | |
aa29ea0c | 1911 | int i, type_size, old_type_size; |
698ff107 | 1912 | tree type; |
03bd2f1a | 1913 | |
698ff107 TP |
1914 | type = gimple_expr_type (stmt); |
1915 | type_size = TYPE_PRECISION (type); | |
03bd2f1a | 1916 | if (type_size % BITS_PER_UNIT != 0) |
a31d2741 | 1917 | return NULL; |
e3ef4162 TP |
1918 | type_size /= BITS_PER_UNIT; |
1919 | if (type_size > 64 / BITS_PER_MARKER) | |
a31d2741 | 1920 | return NULL; |
03bd2f1a | 1921 | |
698ff107 | 1922 | /* Sign extension: result is dependent on the value. */ |
e3ef4162 | 1923 | old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; |
aa29ea0c TP |
1924 | if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size |
1925 | && HEAD_MARKER (n->n, old_type_size)) | |
1926 | for (i = 0; i < type_size - old_type_size; i++) | |
8ae9ab2b | 1927 | n->n |= (uint64_t) MARKER_BYTE_UNKNOWN |
e4d2f1db | 1928 | << ((type_size - 1 - i) * BITS_PER_MARKER); |
698ff107 | 1929 | |
e3ef4162 | 1930 | if (type_size < 64 / BITS_PER_MARKER) |
03bd2f1a AK |
1931 | { |
1932 | /* If STMT casts to a smaller type mask out the bits not | |
1933 | belonging to the target type. */ | |
e3ef4162 | 1934 | n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1; |
03bd2f1a | 1935 | } |
698ff107 | 1936 | n->type = type; |
73984f84 | 1937 | if (!n->base_addr) |
e3ef4162 | 1938 | n->range = type_size; |
03bd2f1a AK |
1939 | } |
1940 | break; | |
1941 | default: | |
a31d2741 | 1942 | return NULL; |
03bd2f1a | 1943 | }; |
a31d2741 | 1944 | return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL; |
03bd2f1a AK |
1945 | } |
1946 | ||
1947 | /* Handle binary rhs. */ | |
1948 | ||
1949 | if (rhs_class == GIMPLE_BINARY_RHS) | |
1950 | { | |
698ff107 | 1951 | int i, size; |
03bd2f1a | 1952 | struct symbolic_number n1, n2; |
a9243bfc | 1953 | uint64_t mask; |
a31d2741 | 1954 | gimple source_stmt2; |
03bd2f1a AK |
1955 | |
1956 | if (code != BIT_IOR_EXPR) | |
a31d2741 | 1957 | return NULL; |
03bd2f1a AK |
1958 | |
1959 | if (TREE_CODE (rhs2) != SSA_NAME) | |
a31d2741 | 1960 | return NULL; |
03bd2f1a AK |
1961 | |
1962 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); | |
1963 | ||
1964 | switch (code) | |
1965 | { | |
1966 | case BIT_IOR_EXPR: | |
a31d2741 | 1967 | source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1); |
03bd2f1a | 1968 | |
a31d2741 TP |
1969 | if (!source_stmt1) |
1970 | return NULL; | |
03bd2f1a | 1971 | |
a31d2741 | 1972 | source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1); |
73984f84 | 1973 | |
a31d2741 TP |
1974 | if (!source_stmt2) |
1975 | return NULL; | |
3cc272c1 | 1976 | |
698ff107 | 1977 | if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type)) |
a31d2741 | 1978 | return NULL; |
03bd2f1a | 1979 | |
73984f84 TP |
1980 | if (!n1.vuse != !n2.vuse || |
1981 | (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0))) | |
a31d2741 | 1982 | return NULL; |
03bd2f1a | 1983 | |
a31d2741 TP |
1984 | if (gimple_assign_rhs1 (source_stmt1) |
1985 | != gimple_assign_rhs1 (source_stmt2)) | |
73984f84 | 1986 | { |
aa29ea0c | 1987 | int64_t inc; |
73984f84 TP |
1988 | HOST_WIDE_INT off_sub; |
1989 | struct symbolic_number *n_ptr; | |
1990 | ||
1991 | if (!n1.base_addr || !n2.base_addr | |
1992 | || !operand_equal_p (n1.base_addr, n2.base_addr, 0)) | |
a31d2741 | 1993 | return NULL; |
73984f84 TP |
1994 | if (!n1.offset != !n2.offset || |
1995 | (n1.offset && !operand_equal_p (n1.offset, n2.offset, 0))) | |
a31d2741 | 1996 | return NULL; |
73984f84 TP |
1997 | |
1998 | /* We swap n1 with n2 to have n1 < n2. */ | |
1999 | if (n2.bytepos < n1.bytepos) | |
2000 | { | |
2001 | struct symbolic_number tmpn; | |
2002 | ||
2003 | tmpn = n2; | |
2004 | n2 = n1; | |
2005 | n1 = tmpn; | |
a31d2741 | 2006 | source_stmt1 = source_stmt2; |
73984f84 TP |
2007 | } |
2008 | ||
2009 | off_sub = n2.bytepos - n1.bytepos; | |
2010 | ||
e3ef4162 TP |
2011 | /* Check that the range of memory covered can be represented by |
2012 | a symbolic number. */ | |
2013 | if (off_sub + n2.range > 64 / BITS_PER_MARKER) | |
a31d2741 | 2014 | return NULL; |
73984f84 TP |
2015 | n->range = n2.range + off_sub; |
2016 | ||
2017 | /* Reinterpret byte marks in symbolic number holding the value of | |
58126368 | 2018 | bigger weight according to target endianness. */ |
73984f84 | 2019 | inc = BYTES_BIG_ENDIAN ? off_sub + n2.range - n1.range : off_sub; |
e3ef4162 | 2020 | size = TYPE_PRECISION (n1.type) / BITS_PER_UNIT; |
73984f84 TP |
2021 | if (BYTES_BIG_ENDIAN) |
2022 | n_ptr = &n1; | |
2023 | else | |
2024 | n_ptr = &n2; | |
aa29ea0c | 2025 | for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER) |
73984f84 | 2026 | { |
aa29ea0c TP |
2027 | unsigned marker = |
2028 | (n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK; | |
2029 | if (marker && marker != MARKER_BYTE_UNKNOWN) | |
73984f84 TP |
2030 | n_ptr->n += inc; |
2031 | } | |
2032 | } | |
2033 | else | |
2034 | n->range = n1.range; | |
2035 | ||
2036 | if (!n1.alias_set | |
2037 | || alias_ptr_types_compatible_p (n1.alias_set, n2.alias_set)) | |
2038 | n->alias_set = n1.alias_set; | |
2039 | else | |
2040 | n->alias_set = ptr_type_node; | |
2041 | n->vuse = n1.vuse; | |
2042 | n->base_addr = n1.base_addr; | |
2043 | n->offset = n1.offset; | |
2044 | n->bytepos = n1.bytepos; | |
698ff107 TP |
2045 | n->type = n1.type; |
2046 | size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; | |
aa29ea0c TP |
2047 | for (i = 0, mask = MARKER_MASK; i < size; |
2048 | i++, mask <<= BITS_PER_MARKER) | |
882a5fbe | 2049 | { |
a9243bfc | 2050 | uint64_t masked1, masked2; |
882a5fbe TP |
2051 | |
2052 | masked1 = n1.n & mask; | |
2053 | masked2 = n2.n & mask; | |
2054 | if (masked1 && masked2 && masked1 != masked2) | |
a31d2741 | 2055 | return NULL; |
882a5fbe | 2056 | } |
03bd2f1a AK |
2057 | n->n = n1.n | n2.n; |
2058 | ||
2059 | if (!verify_symbolic_number_p (n, stmt)) | |
a31d2741 | 2060 | return NULL; |
03bd2f1a AK |
2061 | |
2062 | break; | |
2063 | default: | |
a31d2741 | 2064 | return NULL; |
03bd2f1a | 2065 | } |
a31d2741 | 2066 | return source_stmt1; |
03bd2f1a | 2067 | } |
a31d2741 | 2068 | return NULL; |
03bd2f1a AK |
2069 | } |
2070 | ||
73984f84 TP |
2071 | /* Check if STMT completes a bswap implementation or a read in a given |
2072 | endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP | |
2073 | accordingly. It also sets N to represent the kind of operations | |
2074 | performed: size of the resulting expression and whether it works on | |
2075 | a memory source, and if so alias-set and vuse. At last, the | |
a31d2741 TP |
2076 | function returns a stmt whose rhs's first tree is the source |
2077 | expression. */ | |
03bd2f1a | 2078 | |
a31d2741 | 2079 | static gimple |
73984f84 | 2080 | find_bswap_or_nop (gimple stmt, struct symbolic_number *n, bool *bswap) |
03bd2f1a | 2081 | { |
73984f84 TP |
2082 | /* The number which the find_bswap_or_nop_1 result should match in order |
2083 | to have a full byte swap. The number is shifted to the right | |
2084 | according to the size of the symbolic number before using it. */ | |
a9243bfc RB |
2085 | uint64_t cmpxchg = CMPXCHG; |
2086 | uint64_t cmpnop = CMPNOP; | |
73984f84 | 2087 | |
a31d2741 | 2088 | gimple source_stmt; |
5da49a9d | 2089 | int limit; |
03bd2f1a | 2090 | |
fef015a8 | 2091 | /* The last parameter determines the depth search limit. It usually |
73984f84 TP |
2092 | correlates directly to the number n of bytes to be touched. We |
2093 | increase that number by log2(n) + 1 here in order to also | |
2094 | cover signed -> unsigned conversions of the src operand as can be seen | |
5da49a9d KT |
2095 | in libgcc, and for initial shift/and operation of the src operand. */ |
2096 | limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt))); | |
2097 | limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit); | |
a31d2741 | 2098 | source_stmt = find_bswap_or_nop_1 (stmt, n, limit); |
03bd2f1a | 2099 | |
a31d2741 TP |
2100 | if (!source_stmt) |
2101 | return NULL; | |
03bd2f1a | 2102 | |
73984f84 TP |
2103 | /* Find real size of result (highest non zero byte). */ |
2104 | if (n->base_addr) | |
03bd2f1a | 2105 | { |
73984f84 | 2106 | int rsize; |
a9243bfc | 2107 | uint64_t tmpn; |
03bd2f1a | 2108 | |
e3ef4162 | 2109 | for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++); |
73984f84 | 2110 | n->range = rsize; |
03bd2f1a AK |
2111 | } |
2112 | ||
73984f84 | 2113 | /* Zero out the extra bits of N and CMP*. */ |
e3ef4162 | 2114 | if (n->range < (int) sizeof (int64_t)) |
73984f84 | 2115 | { |
a9243bfc | 2116 | uint64_t mask; |
73984f84 | 2117 | |
e3ef4162 TP |
2118 | mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1; |
2119 | cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER; | |
73984f84 TP |
2120 | cmpnop &= mask; |
2121 | } | |
2122 | ||
2123 | /* A complete byte swap should make the symbolic number to start with | |
2124 | the largest digit in the highest order byte. Unchanged symbolic | |
58126368 | 2125 | number indicates a read with same endianness as target architecture. */ |
73984f84 TP |
2126 | if (n->n == cmpnop) |
2127 | *bswap = false; | |
2128 | else if (n->n == cmpxchg) | |
2129 | *bswap = true; | |
2130 | else | |
a31d2741 | 2131 | return NULL; |
73984f84 TP |
2132 | |
2133 | /* Useless bit manipulation performed by code. */ | |
2134 | if (!n->base_addr && n->n == cmpnop) | |
a31d2741 | 2135 | return NULL; |
03bd2f1a | 2136 | |
73984f84 | 2137 | n->range *= BITS_PER_UNIT; |
a31d2741 | 2138 | return source_stmt; |
03bd2f1a AK |
2139 | } |
2140 | ||
be55bfe6 TS |
2141 | namespace { |
2142 | ||
2143 | const pass_data pass_data_optimize_bswap = | |
2144 | { | |
2145 | GIMPLE_PASS, /* type */ | |
2146 | "bswap", /* name */ | |
2147 | OPTGROUP_NONE, /* optinfo_flags */ | |
be55bfe6 TS |
2148 | TV_NONE, /* tv_id */ |
2149 | PROP_ssa, /* properties_required */ | |
2150 | 0, /* properties_provided */ | |
2151 | 0, /* properties_destroyed */ | |
2152 | 0, /* todo_flags_start */ | |
2153 | 0, /* todo_flags_finish */ | |
2154 | }; | |
2155 | ||
2156 | class pass_optimize_bswap : public gimple_opt_pass | |
2157 | { | |
2158 | public: | |
2159 | pass_optimize_bswap (gcc::context *ctxt) | |
2160 | : gimple_opt_pass (pass_data_optimize_bswap, ctxt) | |
2161 | {} | |
2162 | ||
2163 | /* opt_pass methods: */ | |
2164 | virtual bool gate (function *) | |
2165 | { | |
2166 | return flag_expensive_optimizations && optimize; | |
2167 | } | |
2168 | ||
2169 | virtual unsigned int execute (function *); | |
2170 | ||
2171 | }; // class pass_optimize_bswap | |
2172 | ||
f351abd6 TP |
2173 | /* Perform the bswap optimization: replace the expression computed in the rhs |
2174 | of CUR_STMT by an equivalent bswap, load or load + bswap expression. | |
2175 | Which of these alternatives replace the rhs is given by N->base_addr (non | |
2176 | null if a load is needed) and BSWAP. The type, VUSE and set-alias of the | |
2177 | load to perform are also given in N while the builtin bswap invoke is given | |
2178 | in FNDEL. Finally, if a load is involved, SRC_STMT refers to one of the | |
2179 | load statements involved to construct the rhs in CUR_STMT and N->range gives | |
2180 | the size of the rhs expression for maintaining some statistics. | |
2181 | ||
2182 | Note that if the replacement involve a load, CUR_STMT is moved just after | |
2183 | SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT | |
2184 | changing of basic block. */ | |
73984f84 TP |
2185 | |
2186 | static bool | |
f351abd6 TP |
2187 | bswap_replace (gimple cur_stmt, gimple src_stmt, tree fndecl, tree bswap_type, |
2188 | tree load_type, struct symbolic_number *n, bool bswap) | |
73984f84 | 2189 | { |
f351abd6 | 2190 | gimple_stmt_iterator gsi; |
a31d2741 | 2191 | tree src, tmp, tgt; |
c6e3a931 | 2192 | gimple bswap_stmt; |
73984f84 | 2193 | |
f351abd6 | 2194 | gsi = gsi_for_stmt (cur_stmt); |
a31d2741 TP |
2195 | src = gimple_assign_rhs1 (src_stmt); |
2196 | tgt = gimple_assign_lhs (cur_stmt); | |
73984f84 TP |
2197 | |
2198 | /* Need to load the value from memory first. */ | |
2199 | if (n->base_addr) | |
2200 | { | |
a31d2741 | 2201 | gimple_stmt_iterator gsi_ins = gsi_for_stmt (src_stmt); |
73984f84 TP |
2202 | tree addr_expr, addr_tmp, val_expr, val_tmp; |
2203 | tree load_offset_ptr, aligned_load_type; | |
2204 | gimple addr_stmt, load_stmt; | |
2205 | unsigned align; | |
2206 | ||
2207 | align = get_object_alignment (src); | |
3fd269db RB |
2208 | if (bswap |
2209 | && align < GET_MODE_ALIGNMENT (TYPE_MODE (load_type)) | |
2210 | && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align)) | |
73984f84 TP |
2211 | return false; |
2212 | ||
f351abd6 TP |
2213 | /* Move cur_stmt just before one of the load of the original |
2214 | to ensure it has the same VUSE. See PR61517 for what could | |
2215 | go wrong. */ | |
a31d2741 TP |
2216 | gsi_move_before (&gsi, &gsi_ins); |
2217 | gsi = gsi_for_stmt (cur_stmt); | |
2218 | ||
73984f84 TP |
2219 | /* Compute address to load from and cast according to the size |
2220 | of the load. */ | |
2221 | addr_expr = build_fold_addr_expr (unshare_expr (src)); | |
2222 | if (is_gimple_min_invariant (addr_expr)) | |
2223 | addr_tmp = addr_expr; | |
2224 | else | |
2225 | { | |
2226 | addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL, | |
2227 | "load_src"); | |
2228 | addr_stmt = gimple_build_assign (addr_tmp, addr_expr); | |
a31d2741 | 2229 | gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT); |
73984f84 TP |
2230 | } |
2231 | ||
2232 | /* Perform the load. */ | |
2233 | aligned_load_type = load_type; | |
2234 | if (align < TYPE_ALIGN (load_type)) | |
2235 | aligned_load_type = build_aligned_type (load_type, align); | |
2236 | load_offset_ptr = build_int_cst (n->alias_set, 0); | |
2237 | val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp, | |
2238 | load_offset_ptr); | |
2239 | ||
2240 | if (!bswap) | |
2241 | { | |
2242 | if (n->range == 16) | |
2243 | nop_stats.found_16bit++; | |
2244 | else if (n->range == 32) | |
2245 | nop_stats.found_32bit++; | |
2246 | else | |
2247 | { | |
2248 | gcc_assert (n->range == 64); | |
2249 | nop_stats.found_64bit++; | |
2250 | } | |
2251 | ||
2252 | /* Convert the result of load if necessary. */ | |
2253 | if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type)) | |
2254 | { | |
2255 | val_tmp = make_temp_ssa_name (aligned_load_type, NULL, | |
2256 | "load_dst"); | |
2257 | load_stmt = gimple_build_assign (val_tmp, val_expr); | |
2258 | gimple_set_vuse (load_stmt, n->vuse); | |
a31d2741 | 2259 | gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT); |
00d66391 | 2260 | gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp); |
73984f84 TP |
2261 | } |
2262 | else | |
a31d2741 | 2263 | { |
00d66391 | 2264 | gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr); |
a31d2741 TP |
2265 | gimple_set_vuse (cur_stmt, n->vuse); |
2266 | } | |
2267 | update_stmt (cur_stmt); | |
73984f84 TP |
2268 | |
2269 | if (dump_file) | |
2270 | { | |
2271 | fprintf (dump_file, | |
58126368 | 2272 | "%d bit load in target endianness found at: ", |
73984f84 | 2273 | (int)n->range); |
a31d2741 | 2274 | print_gimple_stmt (dump_file, cur_stmt, 0, 0); |
73984f84 TP |
2275 | } |
2276 | return true; | |
2277 | } | |
2278 | else | |
2279 | { | |
2280 | val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst"); | |
2281 | load_stmt = gimple_build_assign (val_tmp, val_expr); | |
2282 | gimple_set_vuse (load_stmt, n->vuse); | |
a31d2741 | 2283 | gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT); |
73984f84 TP |
2284 | } |
2285 | src = val_tmp; | |
2286 | } | |
2287 | ||
2288 | if (n->range == 16) | |
2289 | bswap_stats.found_16bit++; | |
2290 | else if (n->range == 32) | |
2291 | bswap_stats.found_32bit++; | |
2292 | else | |
2293 | { | |
2294 | gcc_assert (n->range == 64); | |
2295 | bswap_stats.found_64bit++; | |
2296 | } | |
2297 | ||
2298 | tmp = src; | |
2299 | ||
f351abd6 TP |
2300 | /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values |
2301 | are considered as rotation of 2N bit values by N bits is generally not | |
2302 | equivalent to a bswap. Consider for instance 0x01020304 >> 16 which gives | |
2303 | 0x03040102 while a bswap for that value is 0x04030201. */ | |
c6e3a931 | 2304 | if (bswap && n->range == 16) |
73984f84 | 2305 | { |
c6e3a931 TP |
2306 | tree count = build_int_cst (NULL, BITS_PER_UNIT); |
2307 | bswap_type = TREE_TYPE (src); | |
2308 | src = fold_build2 (LROTATE_EXPR, bswap_type, src, count); | |
2309 | bswap_stmt = gimple_build_assign (NULL, src); | |
73984f84 | 2310 | } |
c6e3a931 TP |
2311 | else |
2312 | { | |
2313 | /* Convert the src expression if necessary. */ | |
2314 | if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type)) | |
2315 | { | |
2316 | gimple convert_stmt; | |
2317 | tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc"); | |
0d0e4a03 | 2318 | convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src); |
c6e3a931 TP |
2319 | gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT); |
2320 | } | |
73984f84 | 2321 | |
c6e3a931 TP |
2322 | bswap_stmt = gimple_build_call (fndecl, 1, tmp); |
2323 | } | |
73984f84 TP |
2324 | |
2325 | tmp = tgt; | |
2326 | ||
2327 | /* Convert the result if necessary. */ | |
2328 | if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type)) | |
2329 | { | |
2330 | gimple convert_stmt; | |
2331 | tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst"); | |
0d0e4a03 | 2332 | convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp); |
a31d2741 | 2333 | gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT); |
73984f84 TP |
2334 | } |
2335 | ||
c6e3a931 | 2336 | gimple_set_lhs (bswap_stmt, tmp); |
73984f84 TP |
2337 | |
2338 | if (dump_file) | |
2339 | { | |
2340 | fprintf (dump_file, "%d bit bswap implementation found at: ", | |
2341 | (int)n->range); | |
a31d2741 | 2342 | print_gimple_stmt (dump_file, cur_stmt, 0, 0); |
73984f84 TP |
2343 | } |
2344 | ||
c6e3a931 | 2345 | gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT); |
a31d2741 | 2346 | gsi_remove (&gsi, true); |
73984f84 TP |
2347 | return true; |
2348 | } | |
2349 | ||
2350 | /* Find manual byte swap implementations as well as load in a given | |
2351 | endianness. Byte swaps are turned into a bswap builtin invokation | |
2352 | while endian loads are converted to bswap builtin invokation or | |
58126368 | 2353 | simple load according to the target endianness. */ |
73984f84 | 2354 | |
be55bfe6 TS |
2355 | unsigned int |
2356 | pass_optimize_bswap::execute (function *fun) | |
03bd2f1a AK |
2357 | { |
2358 | basic_block bb; | |
e4a57350 | 2359 | bool bswap32_p, bswap64_p; |
03bd2f1a | 2360 | bool changed = false; |
e4a57350 | 2361 | tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE; |
03bd2f1a AK |
2362 | |
2363 | if (BITS_PER_UNIT != 8) | |
2364 | return 0; | |
2365 | ||
e79983f4 | 2366 | bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32) |
947131ba | 2367 | && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing); |
e79983f4 | 2368 | bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64) |
947131ba | 2369 | && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing |
ba1ee228 | 2370 | || (bswap32_p && word_mode == SImode))); |
03bd2f1a | 2371 | |
fb6234e0 AK |
2372 | /* Determine the argument type of the builtins. The code later on |
2373 | assumes that the return and argument type are the same. */ | |
2374 | if (bswap32_p) | |
2375 | { | |
e79983f4 | 2376 | tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32); |
fb6234e0 AK |
2377 | bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); |
2378 | } | |
2379 | ||
2380 | if (bswap64_p) | |
2381 | { | |
e79983f4 | 2382 | tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64); |
fb6234e0 AK |
2383 | bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); |
2384 | } | |
2385 | ||
73984f84 | 2386 | memset (&nop_stats, 0, sizeof (nop_stats)); |
4da3b811 NF |
2387 | memset (&bswap_stats, 0, sizeof (bswap_stats)); |
2388 | ||
be55bfe6 | 2389 | FOR_EACH_BB_FN (bb, fun) |
03bd2f1a AK |
2390 | { |
2391 | gimple_stmt_iterator gsi; | |
2392 | ||
72a32729 | 2393 | /* We do a reverse scan for bswap patterns to make sure we get the |
f351abd6 TP |
2394 | widest match. As bswap pattern matching doesn't handle previously |
2395 | inserted smaller bswap replacements as sub-patterns, the wider | |
2396 | variant wouldn't be detected. */ | |
2397 | for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);) | |
03bd2f1a | 2398 | { |
a31d2741 TP |
2399 | gimple src_stmt, cur_stmt = gsi_stmt (gsi); |
2400 | tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type; | |
c6e3a931 | 2401 | enum tree_code code; |
73984f84 TP |
2402 | struct symbolic_number n; |
2403 | bool bswap; | |
03bd2f1a | 2404 | |
f351abd6 TP |
2405 | /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt |
2406 | might be moved to a different basic block by bswap_replace and gsi | |
2407 | must not points to it if that's the case. Moving the gsi_prev | |
2408 | there make sure that gsi points to the statement previous to | |
2409 | cur_stmt while still making sure that all statements are | |
2410 | considered in this basic block. */ | |
2411 | gsi_prev (&gsi); | |
2412 | ||
c6e3a931 | 2413 | if (!is_gimple_assign (cur_stmt)) |
03bd2f1a AK |
2414 | continue; |
2415 | ||
c6e3a931 TP |
2416 | code = gimple_assign_rhs_code (cur_stmt); |
2417 | switch (code) | |
2418 | { | |
2419 | case LROTATE_EXPR: | |
2420 | case RROTATE_EXPR: | |
2421 | if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt)) | |
2422 | || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt)) | |
2423 | % BITS_PER_UNIT) | |
2424 | continue; | |
2425 | /* Fall through. */ | |
2426 | case BIT_IOR_EXPR: | |
2427 | break; | |
2428 | default: | |
2429 | continue; | |
2430 | } | |
2431 | ||
a31d2741 | 2432 | src_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap); |
73984f84 | 2433 | |
a31d2741 | 2434 | if (!src_stmt) |
73984f84 | 2435 | continue; |
03bd2f1a | 2436 | |
73984f84 | 2437 | switch (n.range) |
03bd2f1a | 2438 | { |
1df855ce | 2439 | case 16: |
f351abd6 TP |
2440 | /* Already in canonical form, nothing to do. */ |
2441 | if (code == LROTATE_EXPR || code == RROTATE_EXPR) | |
2442 | continue; | |
73984f84 | 2443 | load_type = uint16_type_node; |
1df855ce | 2444 | break; |
03bd2f1a | 2445 | case 32: |
73984f84 | 2446 | load_type = uint32_type_node; |
03bd2f1a | 2447 | if (bswap32_p) |
fb6234e0 | 2448 | { |
e79983f4 | 2449 | fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32); |
fb6234e0 AK |
2450 | bswap_type = bswap32_type; |
2451 | } | |
03bd2f1a AK |
2452 | break; |
2453 | case 64: | |
73984f84 | 2454 | load_type = uint64_type_node; |
03bd2f1a | 2455 | if (bswap64_p) |
fb6234e0 | 2456 | { |
e79983f4 | 2457 | fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64); |
fb6234e0 AK |
2458 | bswap_type = bswap64_type; |
2459 | } | |
03bd2f1a AK |
2460 | break; |
2461 | default: | |
2462 | continue; | |
2463 | } | |
2464 | ||
e4a57350 | 2465 | if (bswap && !fndecl && n.range != 16) |
03bd2f1a AK |
2466 | continue; |
2467 | ||
f351abd6 TP |
2468 | if (bswap_replace (cur_stmt, src_stmt, fndecl, bswap_type, load_type, |
2469 | &n, bswap)) | |
73984f84 | 2470 | changed = true; |
03bd2f1a AK |
2471 | } |
2472 | } | |
2473 | ||
73984f84 TP |
2474 | statistics_counter_event (fun, "16-bit nop implementations found", |
2475 | nop_stats.found_16bit); | |
2476 | statistics_counter_event (fun, "32-bit nop implementations found", | |
2477 | nop_stats.found_32bit); | |
2478 | statistics_counter_event (fun, "64-bit nop implementations found", | |
2479 | nop_stats.found_64bit); | |
be55bfe6 | 2480 | statistics_counter_event (fun, "16-bit bswap implementations found", |
1df855ce | 2481 | bswap_stats.found_16bit); |
be55bfe6 | 2482 | statistics_counter_event (fun, "32-bit bswap implementations found", |
4da3b811 | 2483 | bswap_stats.found_32bit); |
be55bfe6 | 2484 | statistics_counter_event (fun, "64-bit bswap implementations found", |
4da3b811 NF |
2485 | bswap_stats.found_64bit); |
2486 | ||
3bea341f | 2487 | return (changed ? TODO_update_ssa : 0); |
03bd2f1a AK |
2488 | } |
2489 | ||
27a4cd48 DM |
2490 | } // anon namespace |
2491 | ||
2492 | gimple_opt_pass * | |
2493 | make_pass_optimize_bswap (gcc::context *ctxt) | |
2494 | { | |
2495 | return new pass_optimize_bswap (ctxt); | |
2496 | } | |
2497 | ||
7ab6a828 RE |
2498 | /* Return true if stmt is a type conversion operation that can be stripped |
2499 | when used in a widening multiply operation. */ | |
2500 | static bool | |
2501 | widening_mult_conversion_strippable_p (tree result_type, gimple stmt) | |
2502 | { | |
2503 | enum tree_code rhs_code = gimple_assign_rhs_code (stmt); | |
2504 | ||
2505 | if (TREE_CODE (result_type) == INTEGER_TYPE) | |
2506 | { | |
2507 | tree op_type; | |
2508 | tree inner_op_type; | |
2509 | ||
2510 | if (!CONVERT_EXPR_CODE_P (rhs_code)) | |
2511 | return false; | |
2512 | ||
2513 | op_type = TREE_TYPE (gimple_assign_lhs (stmt)); | |
2514 | ||
2515 | /* If the type of OP has the same precision as the result, then | |
2516 | we can strip this conversion. The multiply operation will be | |
2517 | selected to create the correct extension as a by-product. */ | |
2518 | if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type)) | |
2519 | return true; | |
2520 | ||
2521 | /* We can also strip a conversion if it preserves the signed-ness of | |
2522 | the operation and doesn't narrow the range. */ | |
2523 | inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); | |
2524 | ||
e919e5bf RE |
2525 | /* If the inner-most type is unsigned, then we can strip any |
2526 | intermediate widening operation. If it's signed, then the | |
2527 | intermediate widening operation must also be signed. */ | |
2528 | if ((TYPE_UNSIGNED (inner_op_type) | |
2529 | || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type)) | |
7ab6a828 RE |
2530 | && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type)) |
2531 | return true; | |
2532 | ||
2533 | return false; | |
2534 | } | |
2535 | ||
2536 | return rhs_code == FIXED_CONVERT_EXPR; | |
2537 | } | |
2538 | ||
26a855d7 AS |
2539 | /* Return true if RHS is a suitable operand for a widening multiplication, |
2540 | assuming a target type of TYPE. | |
1a39adae RS |
2541 | There are two cases: |
2542 | ||
5dfe80ba AS |
2543 | - RHS makes some value at least twice as wide. Store that value |
2544 | in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT. | |
1a39adae RS |
2545 | |
2546 | - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so, | |
2547 | but leave *TYPE_OUT untouched. */ | |
0354c0c7 BS |
2548 | |
2549 | static bool | |
26a855d7 AS |
2550 | is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out, |
2551 | tree *new_rhs_out) | |
1a39adae RS |
2552 | { |
2553 | gimple stmt; | |
26a855d7 | 2554 | tree type1, rhs1; |
1a39adae RS |
2555 | |
2556 | if (TREE_CODE (rhs) == SSA_NAME) | |
2557 | { | |
1a39adae | 2558 | stmt = SSA_NAME_DEF_STMT (rhs); |
26a855d7 AS |
2559 | if (is_gimple_assign (stmt)) |
2560 | { | |
7ab6a828 | 2561 | if (! widening_mult_conversion_strippable_p (type, stmt)) |
26a855d7 AS |
2562 | rhs1 = rhs; |
2563 | else | |
a6f969f4 AS |
2564 | { |
2565 | rhs1 = gimple_assign_rhs1 (stmt); | |
2566 | ||
2567 | if (TREE_CODE (rhs1) == INTEGER_CST) | |
2568 | { | |
2569 | *new_rhs_out = rhs1; | |
2570 | *type_out = NULL; | |
2571 | return true; | |
2572 | } | |
2573 | } | |
26a855d7 AS |
2574 | } |
2575 | else | |
2576 | rhs1 = rhs; | |
1a39adae | 2577 | |
1a39adae | 2578 | type1 = TREE_TYPE (rhs1); |
26a855d7 | 2579 | |
1a39adae | 2580 | if (TREE_CODE (type1) != TREE_CODE (type) |
5dfe80ba | 2581 | || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type)) |
1a39adae RS |
2582 | return false; |
2583 | ||
2584 | *new_rhs_out = rhs1; | |
2585 | *type_out = type1; | |
2586 | return true; | |
2587 | } | |
2588 | ||
2589 | if (TREE_CODE (rhs) == INTEGER_CST) | |
2590 | { | |
2591 | *new_rhs_out = rhs; | |
2592 | *type_out = NULL; | |
2593 | return true; | |
2594 | } | |
2595 | ||
2596 | return false; | |
2597 | } | |
2598 | ||
26a855d7 AS |
2599 | /* Return true if STMT performs a widening multiplication, assuming the |
2600 | output type is TYPE. If so, store the unwidened types of the operands | |
2601 | in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and | |
2602 | *RHS2_OUT such that converting those operands to types *TYPE1_OUT | |
2603 | and *TYPE2_OUT would give the operands of the multiplication. */ | |
1a39adae RS |
2604 | |
2605 | static bool | |
3d71881d | 2606 | is_widening_mult_p (gimple stmt, |
1a39adae RS |
2607 | tree *type1_out, tree *rhs1_out, |
2608 | tree *type2_out, tree *rhs2_out) | |
0354c0c7 | 2609 | { |
3d71881d AS |
2610 | tree type = TREE_TYPE (gimple_assign_lhs (stmt)); |
2611 | ||
1a39adae RS |
2612 | if (TREE_CODE (type) != INTEGER_TYPE |
2613 | && TREE_CODE (type) != FIXED_POINT_TYPE) | |
2614 | return false; | |
0354c0c7 | 2615 | |
26a855d7 AS |
2616 | if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out, |
2617 | rhs1_out)) | |
0354c0c7 BS |
2618 | return false; |
2619 | ||
26a855d7 AS |
2620 | if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out, |
2621 | rhs2_out)) | |
1a39adae | 2622 | return false; |
0354c0c7 | 2623 | |
1a39adae | 2624 | if (*type1_out == NULL) |
0354c0c7 | 2625 | { |
1a39adae | 2626 | if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out)) |
0354c0c7 | 2627 | return false; |
1a39adae | 2628 | *type1_out = *type2_out; |
0354c0c7 | 2629 | } |
0354c0c7 | 2630 | |
1a39adae | 2631 | if (*type2_out == NULL) |
0354c0c7 | 2632 | { |
1a39adae | 2633 | if (!int_fits_type_p (*rhs2_out, *type1_out)) |
0354c0c7 | 2634 | return false; |
1a39adae | 2635 | *type2_out = *type1_out; |
0354c0c7 | 2636 | } |
0354c0c7 | 2637 | |
ff63d754 AS |
2638 | /* Ensure that the larger of the two operands comes first. */ |
2639 | if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out)) | |
2640 | { | |
2641 | tree tmp; | |
2642 | tmp = *type1_out; | |
2643 | *type1_out = *type2_out; | |
2644 | *type2_out = tmp; | |
2645 | tmp = *rhs1_out; | |
2646 | *rhs1_out = *rhs2_out; | |
2647 | *rhs2_out = tmp; | |
2648 | } | |
5dfe80ba | 2649 | |
1a39adae RS |
2650 | return true; |
2651 | } | |
0354c0c7 | 2652 | |
1a39adae RS |
2653 | /* Process a single gimple statement STMT, which has a MULT_EXPR as |
2654 | its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return | |
2655 | value is true iff we converted the statement. */ | |
2656 | ||
2657 | static bool | |
5dfe80ba | 2658 | convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi) |
1a39adae | 2659 | { |
83d5977e | 2660 | tree lhs, rhs1, rhs2, type, type1, type2; |
1a39adae | 2661 | enum insn_code handler; |
ef4bddc2 | 2662 | machine_mode to_mode, from_mode, actual_mode; |
a484f6ba | 2663 | optab op; |
5dfe80ba AS |
2664 | int actual_precision; |
2665 | location_t loc = gimple_location (stmt); | |
db719f50 | 2666 | bool from_unsigned1, from_unsigned2; |
1a39adae RS |
2667 | |
2668 | lhs = gimple_assign_lhs (stmt); | |
2669 | type = TREE_TYPE (lhs); | |
2670 | if (TREE_CODE (type) != INTEGER_TYPE) | |
0354c0c7 BS |
2671 | return false; |
2672 | ||
3d71881d | 2673 | if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2)) |
0354c0c7 BS |
2674 | return false; |
2675 | ||
a484f6ba AS |
2676 | to_mode = TYPE_MODE (type); |
2677 | from_mode = TYPE_MODE (type1); | |
db719f50 AS |
2678 | from_unsigned1 = TYPE_UNSIGNED (type1); |
2679 | from_unsigned2 = TYPE_UNSIGNED (type2); | |
a484f6ba | 2680 | |
db719f50 | 2681 | if (from_unsigned1 && from_unsigned2) |
a484f6ba | 2682 | op = umul_widen_optab; |
db719f50 | 2683 | else if (!from_unsigned1 && !from_unsigned2) |
a484f6ba | 2684 | op = smul_widen_optab; |
0354c0c7 | 2685 | else |
a484f6ba AS |
2686 | op = usmul_widen_optab; |
2687 | ||
5dfe80ba AS |
2688 | handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode, |
2689 | 0, &actual_mode); | |
1a39adae RS |
2690 | |
2691 | if (handler == CODE_FOR_nothing) | |
db719f50 AS |
2692 | { |
2693 | if (op != smul_widen_optab) | |
2694 | { | |
6a228c2c AS |
2695 | /* We can use a signed multiply with unsigned types as long as |
2696 | there is a wider mode to use, or it is the smaller of the two | |
2697 | types that is unsigned. Note that type1 >= type2, always. */ | |
2698 | if ((TYPE_UNSIGNED (type1) | |
2699 | && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode)) | |
2700 | || (TYPE_UNSIGNED (type2) | |
2701 | && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode))) | |
2702 | { | |
2703 | from_mode = GET_MODE_WIDER_MODE (from_mode); | |
2704 | if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode)) | |
2705 | return false; | |
2706 | } | |
db719f50 AS |
2707 | |
2708 | op = smul_widen_optab; | |
2709 | handler = find_widening_optab_handler_and_mode (op, to_mode, | |
2710 | from_mode, 0, | |
2711 | &actual_mode); | |
2712 | ||
2713 | if (handler == CODE_FOR_nothing) | |
2714 | return false; | |
2715 | ||
2716 | from_unsigned1 = from_unsigned2 = false; | |
2717 | } | |
2718 | else | |
2719 | return false; | |
2720 | } | |
1a39adae | 2721 | |
5dfe80ba AS |
2722 | /* Ensure that the inputs to the handler are in the correct precison |
2723 | for the opcode. This will be the full mode size. */ | |
2724 | actual_precision = GET_MODE_PRECISION (actual_mode); | |
f409d239 RG |
2725 | if (2 * actual_precision > TYPE_PRECISION (type)) |
2726 | return false; | |
db719f50 AS |
2727 | if (actual_precision != TYPE_PRECISION (type1) |
2728 | || from_unsigned1 != TYPE_UNSIGNED (type1)) | |
83d5977e RG |
2729 | rhs1 = build_and_insert_cast (gsi, loc, |
2730 | build_nonstandard_integer_type | |
2731 | (actual_precision, from_unsigned1), rhs1); | |
db719f50 AS |
2732 | if (actual_precision != TYPE_PRECISION (type2) |
2733 | || from_unsigned2 != TYPE_UNSIGNED (type2)) | |
83d5977e RG |
2734 | rhs2 = build_and_insert_cast (gsi, loc, |
2735 | build_nonstandard_integer_type | |
2736 | (actual_precision, from_unsigned2), rhs2); | |
5dfe80ba | 2737 | |
a6f969f4 AS |
2738 | /* Handle constants. */ |
2739 | if (TREE_CODE (rhs1) == INTEGER_CST) | |
2740 | rhs1 = fold_convert (type1, rhs1); | |
2741 | if (TREE_CODE (rhs2) == INTEGER_CST) | |
2742 | rhs2 = fold_convert (type2, rhs2); | |
2743 | ||
5dfe80ba AS |
2744 | gimple_assign_set_rhs1 (stmt, rhs1); |
2745 | gimple_assign_set_rhs2 (stmt, rhs2); | |
0354c0c7 BS |
2746 | gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR); |
2747 | update_stmt (stmt); | |
4da3b811 | 2748 | widen_mul_stats.widen_mults_inserted++; |
0354c0c7 BS |
2749 | return true; |
2750 | } | |
2751 | ||
2752 | /* Process a single gimple statement STMT, which is found at the | |
2753 | iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its | |
2754 | rhs (given by CODE), and try to convert it into a | |
2755 | WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value | |
2756 | is true iff we converted the statement. */ | |
2757 | ||
2758 | static bool | |
2759 | convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt, | |
2760 | enum tree_code code) | |
2761 | { | |
2762 | gimple rhs1_stmt = NULL, rhs2_stmt = NULL; | |
cefb4d4f | 2763 | gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt; |
83d5977e | 2764 | tree type, type1, type2, optype; |
0354c0c7 BS |
2765 | tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs; |
2766 | enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK; | |
2767 | optab this_optab; | |
2768 | enum tree_code wmult_code; | |
5dfe80ba | 2769 | enum insn_code handler; |
ef4bddc2 | 2770 | machine_mode to_mode, from_mode, actual_mode; |
5dfe80ba AS |
2771 | location_t loc = gimple_location (stmt); |
2772 | int actual_precision; | |
db719f50 | 2773 | bool from_unsigned1, from_unsigned2; |
0354c0c7 BS |
2774 | |
2775 | lhs = gimple_assign_lhs (stmt); | |
2776 | type = TREE_TYPE (lhs); | |
1a39adae RS |
2777 | if (TREE_CODE (type) != INTEGER_TYPE |
2778 | && TREE_CODE (type) != FIXED_POINT_TYPE) | |
0354c0c7 BS |
2779 | return false; |
2780 | ||
2781 | if (code == MINUS_EXPR) | |
2782 | wmult_code = WIDEN_MULT_MINUS_EXPR; | |
2783 | else | |
2784 | wmult_code = WIDEN_MULT_PLUS_EXPR; | |
2785 | ||
0354c0c7 BS |
2786 | rhs1 = gimple_assign_rhs1 (stmt); |
2787 | rhs2 = gimple_assign_rhs2 (stmt); | |
2788 | ||
2789 | if (TREE_CODE (rhs1) == SSA_NAME) | |
2790 | { | |
2791 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); | |
2792 | if (is_gimple_assign (rhs1_stmt)) | |
2793 | rhs1_code = gimple_assign_rhs_code (rhs1_stmt); | |
2794 | } | |
0354c0c7 BS |
2795 | |
2796 | if (TREE_CODE (rhs2) == SSA_NAME) | |
2797 | { | |
2798 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); | |
2799 | if (is_gimple_assign (rhs2_stmt)) | |
2800 | rhs2_code = gimple_assign_rhs_code (rhs2_stmt); | |
2801 | } | |
0354c0c7 | 2802 | |
cefb4d4f AS |
2803 | /* Allow for one conversion statement between the multiply |
2804 | and addition/subtraction statement. If there are more than | |
2805 | one conversions then we assume they would invalidate this | |
2806 | transformation. If that's not the case then they should have | |
2807 | been folded before now. */ | |
2808 | if (CONVERT_EXPR_CODE_P (rhs1_code)) | |
2809 | { | |
2810 | conv1_stmt = rhs1_stmt; | |
2811 | rhs1 = gimple_assign_rhs1 (rhs1_stmt); | |
2812 | if (TREE_CODE (rhs1) == SSA_NAME) | |
2813 | { | |
2814 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); | |
2815 | if (is_gimple_assign (rhs1_stmt)) | |
2816 | rhs1_code = gimple_assign_rhs_code (rhs1_stmt); | |
2817 | } | |
2818 | else | |
2819 | return false; | |
2820 | } | |
2821 | if (CONVERT_EXPR_CODE_P (rhs2_code)) | |
2822 | { | |
2823 | conv2_stmt = rhs2_stmt; | |
2824 | rhs2 = gimple_assign_rhs1 (rhs2_stmt); | |
2825 | if (TREE_CODE (rhs2) == SSA_NAME) | |
2826 | { | |
2827 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); | |
2828 | if (is_gimple_assign (rhs2_stmt)) | |
2829 | rhs2_code = gimple_assign_rhs_code (rhs2_stmt); | |
2830 | } | |
2831 | else | |
2832 | return false; | |
2833 | } | |
2834 | ||
5dfe80ba AS |
2835 | /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call |
2836 | is_widening_mult_p, but we still need the rhs returns. | |
2837 | ||
2838 | It might also appear that it would be sufficient to use the existing | |
2839 | operands of the widening multiply, but that would limit the choice of | |
42917d01 YZ |
2840 | multiply-and-accumulate instructions. |
2841 | ||
2842 | If the widened-multiplication result has more than one uses, it is | |
2843 | probably wiser not to do the conversion. */ | |
5dfe80ba AS |
2844 | if (code == PLUS_EXPR |
2845 | && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR)) | |
0354c0c7 | 2846 | { |
42917d01 YZ |
2847 | if (!has_single_use (rhs1) |
2848 | || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1, | |
2849 | &type2, &mult_rhs2)) | |
0354c0c7 | 2850 | return false; |
1a39adae | 2851 | add_rhs = rhs2; |
cefb4d4f | 2852 | conv_stmt = conv1_stmt; |
0354c0c7 | 2853 | } |
5dfe80ba | 2854 | else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR) |
0354c0c7 | 2855 | { |
42917d01 YZ |
2856 | if (!has_single_use (rhs2) |
2857 | || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1, | |
2858 | &type2, &mult_rhs2)) | |
0354c0c7 | 2859 | return false; |
1a39adae | 2860 | add_rhs = rhs1; |
cefb4d4f | 2861 | conv_stmt = conv2_stmt; |
0354c0c7 | 2862 | } |
0354c0c7 BS |
2863 | else |
2864 | return false; | |
2865 | ||
5dfe80ba AS |
2866 | to_mode = TYPE_MODE (type); |
2867 | from_mode = TYPE_MODE (type1); | |
db719f50 AS |
2868 | from_unsigned1 = TYPE_UNSIGNED (type1); |
2869 | from_unsigned2 = TYPE_UNSIGNED (type2); | |
3752b2ab | 2870 | optype = type1; |
5dfe80ba | 2871 | |
db719f50 AS |
2872 | /* There's no such thing as a mixed sign madd yet, so use a wider mode. */ |
2873 | if (from_unsigned1 != from_unsigned2) | |
2874 | { | |
3752b2ab RS |
2875 | if (!INTEGRAL_TYPE_P (type)) |
2876 | return false; | |
6a228c2c AS |
2877 | /* We can use a signed multiply with unsigned types as long as |
2878 | there is a wider mode to use, or it is the smaller of the two | |
2879 | types that is unsigned. Note that type1 >= type2, always. */ | |
2880 | if ((from_unsigned1 | |
2881 | && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode)) | |
2882 | || (from_unsigned2 | |
2883 | && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode))) | |
db719f50 | 2884 | { |
6a228c2c AS |
2885 | from_mode = GET_MODE_WIDER_MODE (from_mode); |
2886 | if (GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode)) | |
2887 | return false; | |
db719f50 | 2888 | } |
6a228c2c AS |
2889 | |
2890 | from_unsigned1 = from_unsigned2 = false; | |
3752b2ab RS |
2891 | optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode), |
2892 | false); | |
db719f50 | 2893 | } |
9eab7f91 | 2894 | |
cefb4d4f AS |
2895 | /* If there was a conversion between the multiply and addition |
2896 | then we need to make sure it fits a multiply-and-accumulate. | |
2897 | The should be a single mode change which does not change the | |
2898 | value. */ | |
2899 | if (conv_stmt) | |
2900 | { | |
db719f50 | 2901 | /* We use the original, unmodified data types for this. */ |
cefb4d4f AS |
2902 | tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt)); |
2903 | tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt)); | |
2904 | int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2); | |
2905 | bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2); | |
2906 | ||
2907 | if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type)) | |
2908 | { | |
2909 | /* Conversion is a truncate. */ | |
2910 | if (TYPE_PRECISION (to_type) < data_size) | |
2911 | return false; | |
2912 | } | |
2913 | else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)) | |
2914 | { | |
2915 | /* Conversion is an extend. Check it's the right sort. */ | |
2916 | if (TYPE_UNSIGNED (from_type) != is_unsigned | |
2917 | && !(is_unsigned && TYPE_PRECISION (from_type) > data_size)) | |
2918 | return false; | |
2919 | } | |
2920 | /* else convert is a no-op for our purposes. */ | |
2921 | } | |
2922 | ||
9eab7f91 RS |
2923 | /* Verify that the machine can perform a widening multiply |
2924 | accumulate in this mode/signedness combination, otherwise | |
2925 | this transformation is likely to pessimize code. */ | |
db719f50 | 2926 | this_optab = optab_for_tree_code (wmult_code, optype, optab_default); |
5dfe80ba AS |
2927 | handler = find_widening_optab_handler_and_mode (this_optab, to_mode, |
2928 | from_mode, 0, &actual_mode); | |
2929 | ||
2930 | if (handler == CODE_FOR_nothing) | |
9eab7f91 RS |
2931 | return false; |
2932 | ||
5dfe80ba AS |
2933 | /* Ensure that the inputs to the handler are in the correct precison |
2934 | for the opcode. This will be the full mode size. */ | |
2935 | actual_precision = GET_MODE_PRECISION (actual_mode); | |
db719f50 AS |
2936 | if (actual_precision != TYPE_PRECISION (type1) |
2937 | || from_unsigned1 != TYPE_UNSIGNED (type1)) | |
83d5977e RG |
2938 | mult_rhs1 = build_and_insert_cast (gsi, loc, |
2939 | build_nonstandard_integer_type | |
2940 | (actual_precision, from_unsigned1), | |
2941 | mult_rhs1); | |
db719f50 AS |
2942 | if (actual_precision != TYPE_PRECISION (type2) |
2943 | || from_unsigned2 != TYPE_UNSIGNED (type2)) | |
83d5977e RG |
2944 | mult_rhs2 = build_and_insert_cast (gsi, loc, |
2945 | build_nonstandard_integer_type | |
2946 | (actual_precision, from_unsigned2), | |
2947 | mult_rhs2); | |
0354c0c7 | 2948 | |
75161d2c | 2949 | if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs))) |
83d5977e | 2950 | add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs); |
75161d2c | 2951 | |
a6f969f4 AS |
2952 | /* Handle constants. */ |
2953 | if (TREE_CODE (mult_rhs1) == INTEGER_CST) | |
c3c5a1cc | 2954 | mult_rhs1 = fold_convert (type1, mult_rhs1); |
a6f969f4 | 2955 | if (TREE_CODE (mult_rhs2) == INTEGER_CST) |
c3c5a1cc | 2956 | mult_rhs2 = fold_convert (type2, mult_rhs2); |
a6f969f4 | 2957 | |
00d66391 JJ |
2958 | gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2, |
2959 | add_rhs); | |
0354c0c7 | 2960 | update_stmt (gsi_stmt (*gsi)); |
4da3b811 | 2961 | widen_mul_stats.maccs_inserted++; |
0354c0c7 BS |
2962 | return true; |
2963 | } | |
2964 | ||
4dbed5f6 RG |
2965 | /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2 |
2966 | with uses in additions and subtractions to form fused multiply-add | |
2967 | operations. Returns true if successful and MUL_STMT should be removed. */ | |
16949072 RG |
2968 | |
2969 | static bool | |
4dbed5f6 | 2970 | convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2) |
16949072 | 2971 | { |
4dbed5f6 | 2972 | tree mul_result = gimple_get_lhs (mul_stmt); |
16949072 | 2973 | tree type = TREE_TYPE (mul_result); |
538dd0b7 DM |
2974 | gimple use_stmt, neguse_stmt; |
2975 | gassign *fma_stmt; | |
16949072 RG |
2976 | use_operand_p use_p; |
2977 | imm_use_iterator imm_iter; | |
2978 | ||
2979 | if (FLOAT_TYPE_P (type) | |
2980 | && flag_fp_contract_mode == FP_CONTRACT_OFF) | |
2981 | return false; | |
2982 | ||
2983 | /* We don't want to do bitfield reduction ops. */ | |
2984 | if (INTEGRAL_TYPE_P (type) | |
2985 | && (TYPE_PRECISION (type) | |
2986 | != GET_MODE_PRECISION (TYPE_MODE (type)))) | |
2987 | return false; | |
2988 | ||
2989 | /* If the target doesn't support it, don't generate it. We assume that | |
2990 | if fma isn't available then fms, fnma or fnms are not either. */ | |
2991 | if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing) | |
2992 | return false; | |
2993 | ||
0fb808ea JJ |
2994 | /* If the multiplication has zero uses, it is kept around probably because |
2995 | of -fnon-call-exceptions. Don't optimize it away in that case, | |
2996 | it is DCE job. */ | |
2997 | if (has_zero_uses (mul_result)) | |
2998 | return false; | |
2999 | ||
16949072 RG |
3000 | /* Make sure that the multiplication statement becomes dead after |
3001 | the transformation, thus that all uses are transformed to FMAs. | |
3002 | This means we assume that an FMA operation has the same cost | |
3003 | as an addition. */ | |
3004 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result) | |
3005 | { | |
3006 | enum tree_code use_code; | |
a5f09e73 RH |
3007 | tree result = mul_result; |
3008 | bool negate_p = false; | |
16949072 RG |
3009 | |
3010 | use_stmt = USE_STMT (use_p); | |
3011 | ||
76b14c29 RG |
3012 | if (is_gimple_debug (use_stmt)) |
3013 | continue; | |
3014 | ||
16949072 RG |
3015 | /* For now restrict this operations to single basic blocks. In theory |
3016 | we would want to support sinking the multiplication in | |
3017 | m = a*b; | |
3018 | if () | |
3019 | ma = m + c; | |
3020 | else | |
3021 | d = m; | |
3022 | to form a fma in the then block and sink the multiplication to the | |
3023 | else block. */ | |
3024 | if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) | |
3025 | return false; | |
3026 | ||
a5f09e73 | 3027 | if (!is_gimple_assign (use_stmt)) |
16949072 RG |
3028 | return false; |
3029 | ||
a5f09e73 RH |
3030 | use_code = gimple_assign_rhs_code (use_stmt); |
3031 | ||
3032 | /* A negate on the multiplication leads to FNMA. */ | |
3033 | if (use_code == NEGATE_EXPR) | |
3034 | { | |
a758fd67 | 3035 | ssa_op_iter iter; |
dae957ae | 3036 | use_operand_p usep; |
a758fd67 | 3037 | |
a5f09e73 RH |
3038 | result = gimple_assign_lhs (use_stmt); |
3039 | ||
3040 | /* Make sure the negate statement becomes dead with this | |
3041 | single transformation. */ | |
3042 | if (!single_imm_use (gimple_assign_lhs (use_stmt), | |
3043 | &use_p, &neguse_stmt)) | |
3044 | return false; | |
3045 | ||
a758fd67 | 3046 | /* Make sure the multiplication isn't also used on that stmt. */ |
dae957ae RG |
3047 | FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE) |
3048 | if (USE_FROM_PTR (usep) == mul_result) | |
a758fd67 RG |
3049 | return false; |
3050 | ||
a5f09e73 RH |
3051 | /* Re-validate. */ |
3052 | use_stmt = neguse_stmt; | |
3053 | if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) | |
3054 | return false; | |
3055 | if (!is_gimple_assign (use_stmt)) | |
3056 | return false; | |
3057 | ||
3058 | use_code = gimple_assign_rhs_code (use_stmt); | |
3059 | negate_p = true; | |
3060 | } | |
16949072 | 3061 | |
a5f09e73 RH |
3062 | switch (use_code) |
3063 | { | |
3064 | case MINUS_EXPR: | |
a1d8aa4b RH |
3065 | if (gimple_assign_rhs2 (use_stmt) == result) |
3066 | negate_p = !negate_p; | |
3067 | break; | |
a5f09e73 | 3068 | case PLUS_EXPR: |
a5f09e73 | 3069 | break; |
a5f09e73 RH |
3070 | default: |
3071 | /* FMA can only be formed from PLUS and MINUS. */ | |
3072 | return false; | |
3073 | } | |
16949072 | 3074 | |
ee8a9b7b JR |
3075 | /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed |
3076 | by a MULT_EXPR that we'll visit later, we might be able to | |
3077 | get a more profitable match with fnma. | |
3078 | OTOH, if we don't, a negate / fma pair has likely lower latency | |
3079 | that a mult / subtract pair. */ | |
3080 | if (use_code == MINUS_EXPR && !negate_p | |
3081 | && gimple_assign_rhs1 (use_stmt) == result | |
3082 | && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing | |
3083 | && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing) | |
3084 | { | |
3085 | tree rhs2 = gimple_assign_rhs2 (use_stmt); | |
ee8a9b7b | 3086 | |
95c03b36 JR |
3087 | if (TREE_CODE (rhs2) == SSA_NAME) |
3088 | { | |
3089 | gimple stmt2 = SSA_NAME_DEF_STMT (rhs2); | |
3090 | if (has_single_use (rhs2) | |
3091 | && is_gimple_assign (stmt2) | |
3092 | && gimple_assign_rhs_code (stmt2) == MULT_EXPR) | |
3093 | return false; | |
3094 | } | |
ee8a9b7b JR |
3095 | } |
3096 | ||
a5f09e73 RH |
3097 | /* We can't handle a * b + a * b. */ |
3098 | if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt)) | |
3099 | return false; | |
a1d8aa4b RH |
3100 | |
3101 | /* While it is possible to validate whether or not the exact form | |
3102 | that we've recognized is available in the backend, the assumption | |
3103 | is that the transformation is never a loss. For instance, suppose | |
3104 | the target only has the plain FMA pattern available. Consider | |
3105 | a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which | |
3106 | is still two operations. Consider -(a*b)-c -> fma(-a,b,-c): we | |
3107 | still have 3 operations, but in the FMA form the two NEGs are | |
073a8998 | 3108 | independent and could be run in parallel. */ |
16949072 RG |
3109 | } |
3110 | ||
3111 | FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result) | |
3112 | { | |
16949072 | 3113 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
76b14c29 | 3114 | enum tree_code use_code; |
4dbed5f6 | 3115 | tree addop, mulop1 = op1, result = mul_result; |
a5f09e73 | 3116 | bool negate_p = false; |
16949072 | 3117 | |
76b14c29 RG |
3118 | if (is_gimple_debug (use_stmt)) |
3119 | continue; | |
3120 | ||
3121 | use_code = gimple_assign_rhs_code (use_stmt); | |
a5f09e73 RH |
3122 | if (use_code == NEGATE_EXPR) |
3123 | { | |
3124 | result = gimple_assign_lhs (use_stmt); | |
3125 | single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt); | |
3126 | gsi_remove (&gsi, true); | |
3127 | release_defs (use_stmt); | |
3128 | ||
3129 | use_stmt = neguse_stmt; | |
3130 | gsi = gsi_for_stmt (use_stmt); | |
3131 | use_code = gimple_assign_rhs_code (use_stmt); | |
3132 | negate_p = true; | |
3133 | } | |
3134 | ||
3135 | if (gimple_assign_rhs1 (use_stmt) == result) | |
16949072 RG |
3136 | { |
3137 | addop = gimple_assign_rhs2 (use_stmt); | |
3138 | /* a * b - c -> a * b + (-c) */ | |
3139 | if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) | |
3140 | addop = force_gimple_operand_gsi (&gsi, | |
3141 | build1 (NEGATE_EXPR, | |
3142 | type, addop), | |
3143 | true, NULL_TREE, true, | |
3144 | GSI_SAME_STMT); | |
3145 | } | |
3146 | else | |
3147 | { | |
3148 | addop = gimple_assign_rhs1 (use_stmt); | |
3149 | /* a - b * c -> (-b) * c + a */ | |
3150 | if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) | |
a5f09e73 | 3151 | negate_p = !negate_p; |
16949072 RG |
3152 | } |
3153 | ||
a5f09e73 RH |
3154 | if (negate_p) |
3155 | mulop1 = force_gimple_operand_gsi (&gsi, | |
3156 | build1 (NEGATE_EXPR, | |
3157 | type, mulop1), | |
3158 | true, NULL_TREE, true, | |
3159 | GSI_SAME_STMT); | |
3160 | ||
0d0e4a03 JJ |
3161 | fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt), |
3162 | FMA_EXPR, mulop1, op2, addop); | |
16949072 | 3163 | gsi_replace (&gsi, fma_stmt, true); |
4da3b811 | 3164 | widen_mul_stats.fmas_inserted++; |
16949072 RG |
3165 | } |
3166 | ||
3167 | return true; | |
3168 | } | |
3169 | ||
5b58b39b BS |
3170 | /* Find integer multiplications where the operands are extended from |
3171 | smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR | |
3172 | where appropriate. */ | |
3173 | ||
be55bfe6 TS |
3174 | namespace { |
3175 | ||
3176 | const pass_data pass_data_optimize_widening_mul = | |
3177 | { | |
3178 | GIMPLE_PASS, /* type */ | |
3179 | "widening_mul", /* name */ | |
3180 | OPTGROUP_NONE, /* optinfo_flags */ | |
be55bfe6 TS |
3181 | TV_NONE, /* tv_id */ |
3182 | PROP_ssa, /* properties_required */ | |
3183 | 0, /* properties_provided */ | |
3184 | 0, /* properties_destroyed */ | |
3185 | 0, /* todo_flags_start */ | |
3bea341f | 3186 | TODO_update_ssa, /* todo_flags_finish */ |
be55bfe6 TS |
3187 | }; |
3188 | ||
3189 | class pass_optimize_widening_mul : public gimple_opt_pass | |
3190 | { | |
3191 | public: | |
3192 | pass_optimize_widening_mul (gcc::context *ctxt) | |
3193 | : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt) | |
3194 | {} | |
3195 | ||
3196 | /* opt_pass methods: */ | |
3197 | virtual bool gate (function *) | |
3198 | { | |
3199 | return flag_expensive_optimizations && optimize; | |
3200 | } | |
3201 | ||
3202 | virtual unsigned int execute (function *); | |
3203 | ||
3204 | }; // class pass_optimize_widening_mul | |
3205 | ||
3206 | unsigned int | |
3207 | pass_optimize_widening_mul::execute (function *fun) | |
5b58b39b | 3208 | { |
5b58b39b | 3209 | basic_block bb; |
4dbed5f6 | 3210 | bool cfg_changed = false; |
5b58b39b | 3211 | |
4da3b811 NF |
3212 | memset (&widen_mul_stats, 0, sizeof (widen_mul_stats)); |
3213 | ||
be55bfe6 | 3214 | FOR_EACH_BB_FN (bb, fun) |
5b58b39b BS |
3215 | { |
3216 | gimple_stmt_iterator gsi; | |
3217 | ||
16949072 | 3218 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) |
5b58b39b BS |
3219 | { |
3220 | gimple stmt = gsi_stmt (gsi); | |
0354c0c7 | 3221 | enum tree_code code; |
5b58b39b | 3222 | |
16949072 RG |
3223 | if (is_gimple_assign (stmt)) |
3224 | { | |
3225 | code = gimple_assign_rhs_code (stmt); | |
3226 | switch (code) | |
3227 | { | |
3228 | case MULT_EXPR: | |
5dfe80ba | 3229 | if (!convert_mult_to_widen (stmt, &gsi) |
4dbed5f6 RG |
3230 | && convert_mult_to_fma (stmt, |
3231 | gimple_assign_rhs1 (stmt), | |
3232 | gimple_assign_rhs2 (stmt))) | |
16949072 RG |
3233 | { |
3234 | gsi_remove (&gsi, true); | |
3235 | release_defs (stmt); | |
3236 | continue; | |
3237 | } | |
3238 | break; | |
3239 | ||
3240 | case PLUS_EXPR: | |
3241 | case MINUS_EXPR: | |
3242 | convert_plusminus_to_widen (&gsi, stmt, code); | |
3243 | break; | |
5b58b39b | 3244 | |
16949072 RG |
3245 | default:; |
3246 | } | |
3247 | } | |
85a47bed RG |
3248 | else if (is_gimple_call (stmt) |
3249 | && gimple_call_lhs (stmt)) | |
4dbed5f6 RG |
3250 | { |
3251 | tree fndecl = gimple_call_fndecl (stmt); | |
3252 | if (fndecl | |
3253 | && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL) | |
3254 | { | |
3255 | switch (DECL_FUNCTION_CODE (fndecl)) | |
3256 | { | |
3257 | case BUILT_IN_POWF: | |
3258 | case BUILT_IN_POW: | |
3259 | case BUILT_IN_POWL: | |
3260 | if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST | |
3261 | && REAL_VALUES_EQUAL | |
3262 | (TREE_REAL_CST (gimple_call_arg (stmt, 1)), | |
3263 | dconst2) | |
3264 | && convert_mult_to_fma (stmt, | |
3265 | gimple_call_arg (stmt, 0), | |
3266 | gimple_call_arg (stmt, 0))) | |
3267 | { | |
0b238a9b | 3268 | unlink_stmt_vdef (stmt); |
b5b3ec3e RG |
3269 | if (gsi_remove (&gsi, true) |
3270 | && gimple_purge_dead_eh_edges (bb)) | |
4dbed5f6 | 3271 | cfg_changed = true; |
b5b3ec3e | 3272 | release_defs (stmt); |
4dbed5f6 RG |
3273 | continue; |
3274 | } | |
3275 | break; | |
3276 | ||
3277 | default:; | |
3278 | } | |
3279 | } | |
3280 | } | |
16949072 | 3281 | gsi_next (&gsi); |
5b58b39b BS |
3282 | } |
3283 | } | |
0354c0c7 | 3284 | |
be55bfe6 | 3285 | statistics_counter_event (fun, "widening multiplications inserted", |
4da3b811 | 3286 | widen_mul_stats.widen_mults_inserted); |
be55bfe6 | 3287 | statistics_counter_event (fun, "widening maccs inserted", |
4da3b811 | 3288 | widen_mul_stats.maccs_inserted); |
be55bfe6 | 3289 | statistics_counter_event (fun, "fused multiply-adds inserted", |
4da3b811 NF |
3290 | widen_mul_stats.fmas_inserted); |
3291 | ||
4dbed5f6 | 3292 | return cfg_changed ? TODO_cleanup_cfg : 0; |
5b58b39b BS |
3293 | } |
3294 | ||
27a4cd48 DM |
3295 | } // anon namespace |
3296 | ||
3297 | gimple_opt_pass * | |
3298 | make_pass_optimize_widening_mul (gcc::context *ctxt) | |
3299 | { | |
3300 | return new pass_optimize_widening_mul (ctxt); | |
3301 | } |