]>
Commit | Line | Data |
---|---|---|
abacb398 | 1 | /* Global, SSA-based optimizations using mathematical identities. |
7cf0dbf3 | 2 | Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 |
3 | Free Software Foundation, Inc. | |
48e1416a | 4 | |
abacb398 | 5 | This file is part of GCC. |
48e1416a | 6 | |
abacb398 | 7 | GCC is free software; you can redistribute it and/or modify it |
8 | under the terms of the GNU General Public License as published by the | |
8c4c00c1 | 9 | Free Software Foundation; either version 3, or (at your option) any |
abacb398 | 10 | later version. |
48e1416a | 11 | |
abacb398 | 12 | GCC is distributed in the hope that it will be useful, but WITHOUT |
13 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
48e1416a | 16 | |
abacb398 | 17 | You should have received a copy of the GNU General Public License |
8c4c00c1 | 18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
abacb398 | 20 | |
21 | /* Currently, the only mini-pass in this file tries to CSE reciprocal | |
22 | operations. These are common in sequences such as this one: | |
23 | ||
24 | modulus = sqrt(x*x + y*y + z*z); | |
25 | x = x / modulus; | |
26 | y = y / modulus; | |
27 | z = z / modulus; | |
28 | ||
29 | that can be optimized to | |
30 | ||
31 | modulus = sqrt(x*x + y*y + z*z); | |
32 | rmodulus = 1.0 / modulus; | |
33 | x = x * rmodulus; | |
34 | y = y * rmodulus; | |
35 | z = z * rmodulus; | |
36 | ||
37 | We do this for loop invariant divisors, and with this pass whenever | |
ac70caad | 38 | we notice that a division has the same divisor multiple times. |
39 | ||
40 | Of course, like in PRE, we don't insert a division if a dominator | |
41 | already has one. However, this cannot be done as an extension of | |
42 | PRE for several reasons. | |
43 | ||
44 | First of all, with some experiments it was found out that the | |
45 | transformation is not always useful if there are only two divisions | |
46 | hy the same divisor. This is probably because modern processors | |
47 | can pipeline the divisions; on older, in-order processors it should | |
48 | still be effective to optimize two divisions by the same number. | |
49 | We make this a param, and it shall be called N in the remainder of | |
50 | this comment. | |
51 | ||
52 | Second, if trapping math is active, we have less freedom on where | |
53 | to insert divisions: we can only do so in basic blocks that already | |
54 | contain one. (If divisions don't trap, instead, we can insert | |
55 | divisions elsewhere, which will be in blocks that are common dominators | |
56 | of those that have the division). | |
57 | ||
58 | We really don't want to compute the reciprocal unless a division will | |
59 | be found. To do this, we won't insert the division in a basic block | |
60 | that has less than N divisions *post-dominating* it. | |
61 | ||
62 | The algorithm constructs a subset of the dominator tree, holding the | |
63 | blocks containing the divisions and the common dominators to them, | |
64 | and walk it twice. The first walk is in post-order, and it annotates | |
65 | each block with the number of divisions that post-dominate it: this | |
66 | gives information on where divisions can be inserted profitably. | |
67 | The second walk is in pre-order, and it inserts divisions as explained | |
68 | above, and replaces divisions by multiplications. | |
69 | ||
70 | In the best case, the cost of the pass is O(n_statements). In the | |
71 | worst-case, the cost is due to creating the dominator tree subset, | |
72 | with a cost of O(n_basic_blocks ^ 2); however this can only happen | |
73 | for n_statements / n_basic_blocks statements. So, the amortized cost | |
74 | of creating the dominator tree subset is O(n_basic_blocks) and the | |
75 | worst-case cost of the pass is O(n_statements * n_basic_blocks). | |
76 | ||
77 | More practically, the cost will be small because there are few | |
78 | divisions, and they tend to be in the same basic block, so insert_bb | |
79 | is called very few times. | |
80 | ||
81 | If we did this using domwalk.c, an efficient implementation would have | |
82 | to work on all the variables in a single pass, because we could not | |
83 | work on just a subset of the dominator tree, as we do now, and the | |
84 | cost would also be something like O(n_statements * n_basic_blocks). | |
85 | The data structures would be more complex in order to work on all the | |
86 | variables in a single pass. */ | |
abacb398 | 87 | |
88 | #include "config.h" | |
89 | #include "system.h" | |
90 | #include "coretypes.h" | |
91 | #include "tm.h" | |
92 | #include "flags.h" | |
93 | #include "tree.h" | |
94 | #include "tree-flow.h" | |
abacb398 | 95 | #include "timevar.h" |
96 | #include "tree-pass.h" | |
ac70caad | 97 | #include "alloc-pool.h" |
98 | #include "basic-block.h" | |
99 | #include "target.h" | |
ce084dfc | 100 | #include "gimple-pretty-print.h" |
a7a46268 | 101 | |
102 | /* FIXME: RTL headers have to be included here for optabs. */ | |
103 | #include "rtl.h" /* Because optabs.h wants enum rtx_code. */ | |
104 | #include "expr.h" /* Because optabs.h wants sepops. */ | |
84cc784c | 105 | #include "optabs.h" |
ac70caad | 106 | |
107 | /* This structure represents one basic block that either computes a | |
108 | division, or is a common dominator for basic block that compute a | |
109 | division. */ | |
110 | struct occurrence { | |
111 | /* The basic block represented by this structure. */ | |
112 | basic_block bb; | |
113 | ||
114 | /* If non-NULL, the SSA_NAME holding the definition for a reciprocal | |
115 | inserted in BB. */ | |
116 | tree recip_def; | |
117 | ||
75a70cf9 | 118 | /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that |
ac70caad | 119 | was inserted in BB. */ |
75a70cf9 | 120 | gimple recip_def_stmt; |
ac70caad | 121 | |
122 | /* Pointer to a list of "struct occurrence"s for blocks dominated | |
123 | by BB. */ | |
124 | struct occurrence *children; | |
125 | ||
126 | /* Pointer to the next "struct occurrence"s in the list of blocks | |
127 | sharing a common dominator. */ | |
128 | struct occurrence *next; | |
129 | ||
130 | /* The number of divisions that are in BB before compute_merit. The | |
131 | number of divisions that are in BB or post-dominate it after | |
132 | compute_merit. */ | |
133 | int num_divisions; | |
134 | ||
135 | /* True if the basic block has a division, false if it is a common | |
136 | dominator for basic blocks that do. If it is false and trapping | |
137 | math is active, BB is not a candidate for inserting a reciprocal. */ | |
138 | bool bb_has_division; | |
139 | }; | |
140 | ||
141 | ||
142 | /* The instance of "struct occurrence" representing the highest | |
143 | interesting block in the dominator tree. */ | |
144 | static struct occurrence *occ_head; | |
145 | ||
146 | /* Allocation pool for getting instances of "struct occurrence". */ | |
147 | static alloc_pool occ_pool; | |
148 | ||
149 | ||
150 | ||
151 | /* Allocate and return a new struct occurrence for basic block BB, and | |
152 | whose children list is headed by CHILDREN. */ | |
153 | static struct occurrence * | |
154 | occ_new (basic_block bb, struct occurrence *children) | |
abacb398 | 155 | { |
ac70caad | 156 | struct occurrence *occ; |
157 | ||
f0d6e81c | 158 | bb->aux = occ = (struct occurrence *) pool_alloc (occ_pool); |
ac70caad | 159 | memset (occ, 0, sizeof (struct occurrence)); |
160 | ||
161 | occ->bb = bb; | |
162 | occ->children = children; | |
163 | return occ; | |
abacb398 | 164 | } |
165 | ||
ac70caad | 166 | |
167 | /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a | |
168 | list of "struct occurrence"s, one per basic block, having IDOM as | |
169 | their common dominator. | |
170 | ||
171 | We try to insert NEW_OCC as deep as possible in the tree, and we also | |
172 | insert any other block that is a common dominator for BB and one | |
173 | block already in the tree. */ | |
174 | ||
175 | static void | |
176 | insert_bb (struct occurrence *new_occ, basic_block idom, | |
177 | struct occurrence **p_head) | |
9e583fac | 178 | { |
ac70caad | 179 | struct occurrence *occ, **p_occ; |
9e583fac | 180 | |
ac70caad | 181 | for (p_occ = p_head; (occ = *p_occ) != NULL; ) |
182 | { | |
183 | basic_block bb = new_occ->bb, occ_bb = occ->bb; | |
184 | basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb); | |
185 | if (dom == bb) | |
186 | { | |
187 | /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC | |
188 | from its list. */ | |
189 | *p_occ = occ->next; | |
190 | occ->next = new_occ->children; | |
191 | new_occ->children = occ; | |
192 | ||
193 | /* Try the next block (it may as well be dominated by BB). */ | |
194 | } | |
195 | ||
196 | else if (dom == occ_bb) | |
197 | { | |
198 | /* OCC_BB dominates BB. Tail recurse to look deeper. */ | |
199 | insert_bb (new_occ, dom, &occ->children); | |
200 | return; | |
201 | } | |
202 | ||
203 | else if (dom != idom) | |
204 | { | |
205 | gcc_assert (!dom->aux); | |
206 | ||
207 | /* There is a dominator between IDOM and BB, add it and make | |
208 | two children out of NEW_OCC and OCC. First, remove OCC from | |
209 | its list. */ | |
210 | *p_occ = occ->next; | |
211 | new_occ->next = occ; | |
212 | occ->next = NULL; | |
213 | ||
214 | /* None of the previous blocks has DOM as a dominator: if we tail | |
215 | recursed, we would reexamine them uselessly. Just switch BB with | |
216 | DOM, and go on looking for blocks dominated by DOM. */ | |
217 | new_occ = occ_new (dom, new_occ); | |
218 | } | |
219 | ||
220 | else | |
221 | { | |
222 | /* Nothing special, go on with the next element. */ | |
223 | p_occ = &occ->next; | |
224 | } | |
225 | } | |
226 | ||
227 | /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */ | |
228 | new_occ->next = *p_head; | |
229 | *p_head = new_occ; | |
230 | } | |
231 | ||
232 | /* Register that we found a division in BB. */ | |
233 | ||
234 | static inline void | |
235 | register_division_in (basic_block bb) | |
236 | { | |
237 | struct occurrence *occ; | |
238 | ||
239 | occ = (struct occurrence *) bb->aux; | |
240 | if (!occ) | |
241 | { | |
242 | occ = occ_new (bb, NULL); | |
243 | insert_bb (occ, ENTRY_BLOCK_PTR, &occ_head); | |
244 | } | |
245 | ||
246 | occ->bb_has_division = true; | |
247 | occ->num_divisions++; | |
248 | } | |
249 | ||
250 | ||
251 | /* Compute the number of divisions that postdominate each block in OCC and | |
252 | its children. */ | |
abacb398 | 253 | |
abacb398 | 254 | static void |
ac70caad | 255 | compute_merit (struct occurrence *occ) |
abacb398 | 256 | { |
ac70caad | 257 | struct occurrence *occ_child; |
258 | basic_block dom = occ->bb; | |
abacb398 | 259 | |
ac70caad | 260 | for (occ_child = occ->children; occ_child; occ_child = occ_child->next) |
abacb398 | 261 | { |
ac70caad | 262 | basic_block bb; |
263 | if (occ_child->children) | |
264 | compute_merit (occ_child); | |
265 | ||
266 | if (flag_exceptions) | |
267 | bb = single_noncomplex_succ (dom); | |
268 | else | |
269 | bb = dom; | |
270 | ||
271 | if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb)) | |
272 | occ->num_divisions += occ_child->num_divisions; | |
273 | } | |
274 | } | |
275 | ||
276 | ||
277 | /* Return whether USE_STMT is a floating-point division by DEF. */ | |
278 | static inline bool | |
75a70cf9 | 279 | is_division_by (gimple use_stmt, tree def) |
ac70caad | 280 | { |
75a70cf9 | 281 | return is_gimple_assign (use_stmt) |
282 | && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR | |
283 | && gimple_assign_rhs2 (use_stmt) == def | |
119368d7 | 284 | /* Do not recognize x / x as valid division, as we are getting |
285 | confused later by replacing all immediate uses x in such | |
286 | a stmt. */ | |
75a70cf9 | 287 | && gimple_assign_rhs1 (use_stmt) != def; |
ac70caad | 288 | } |
289 | ||
290 | /* Walk the subset of the dominator tree rooted at OCC, setting the | |
291 | RECIP_DEF field to a definition of 1.0 / DEF that can be used in | |
292 | the given basic block. The field may be left NULL, of course, | |
293 | if it is not possible or profitable to do the optimization. | |
294 | ||
295 | DEF_BSI is an iterator pointing at the statement defining DEF. | |
296 | If RECIP_DEF is set, a dominator already has a computation that can | |
297 | be used. */ | |
298 | ||
299 | static void | |
75a70cf9 | 300 | insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, |
ac70caad | 301 | tree def, tree recip_def, int threshold) |
302 | { | |
75a70cf9 | 303 | tree type; |
304 | gimple new_stmt; | |
305 | gimple_stmt_iterator gsi; | |
ac70caad | 306 | struct occurrence *occ_child; |
307 | ||
308 | if (!recip_def | |
309 | && (occ->bb_has_division || !flag_trapping_math) | |
310 | && occ->num_divisions >= threshold) | |
311 | { | |
312 | /* Make a variable with the replacement and substitute it. */ | |
313 | type = TREE_TYPE (def); | |
314 | recip_def = make_rename_temp (type, "reciptmp"); | |
75a70cf9 | 315 | new_stmt = gimple_build_assign_with_ops (RDIV_EXPR, recip_def, |
316 | build_one_cst (type), def); | |
48e1416a | 317 | |
ac70caad | 318 | if (occ->bb_has_division) |
319 | { | |
320 | /* Case 1: insert before an existing division. */ | |
75a70cf9 | 321 | gsi = gsi_after_labels (occ->bb); |
322 | while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def)) | |
323 | gsi_next (&gsi); | |
ac70caad | 324 | |
75a70cf9 | 325 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |
ac70caad | 326 | } |
75a70cf9 | 327 | else if (def_gsi && occ->bb == def_gsi->bb) |
685b24f5 | 328 | { |
ac70caad | 329 | /* Case 2: insert right after the definition. Note that this will |
330 | never happen if the definition statement can throw, because in | |
331 | that case the sole successor of the statement's basic block will | |
332 | dominate all the uses as well. */ | |
75a70cf9 | 333 | gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT); |
685b24f5 | 334 | } |
ac70caad | 335 | else |
336 | { | |
337 | /* Case 3: insert in a basic block not containing defs/uses. */ | |
75a70cf9 | 338 | gsi = gsi_after_labels (occ->bb); |
339 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); | |
ac70caad | 340 | } |
341 | ||
342 | occ->recip_def_stmt = new_stmt; | |
abacb398 | 343 | } |
344 | ||
ac70caad | 345 | occ->recip_def = recip_def; |
346 | for (occ_child = occ->children; occ_child; occ_child = occ_child->next) | |
75a70cf9 | 347 | insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold); |
ac70caad | 348 | } |
349 | ||
350 | ||
351 | /* Replace the division at USE_P with a multiplication by the reciprocal, if | |
352 | possible. */ | |
353 | ||
354 | static inline void | |
355 | replace_reciprocal (use_operand_p use_p) | |
356 | { | |
75a70cf9 | 357 | gimple use_stmt = USE_STMT (use_p); |
358 | basic_block bb = gimple_bb (use_stmt); | |
ac70caad | 359 | struct occurrence *occ = (struct occurrence *) bb->aux; |
360 | ||
0bfd8d5c | 361 | if (optimize_bb_for_speed_p (bb) |
362 | && occ->recip_def && use_stmt != occ->recip_def_stmt) | |
ac70caad | 363 | { |
75a70cf9 | 364 | gimple_assign_set_rhs_code (use_stmt, MULT_EXPR); |
ac70caad | 365 | SET_USE (use_p, occ->recip_def); |
366 | fold_stmt_inplace (use_stmt); | |
367 | update_stmt (use_stmt); | |
368 | } | |
369 | } | |
370 | ||
371 | ||
372 | /* Free OCC and return one more "struct occurrence" to be freed. */ | |
373 | ||
374 | static struct occurrence * | |
375 | free_bb (struct occurrence *occ) | |
376 | { | |
377 | struct occurrence *child, *next; | |
378 | ||
379 | /* First get the two pointers hanging off OCC. */ | |
380 | next = occ->next; | |
381 | child = occ->children; | |
382 | occ->bb->aux = NULL; | |
383 | pool_free (occ_pool, occ); | |
384 | ||
385 | /* Now ensure that we don't recurse unless it is necessary. */ | |
386 | if (!child) | |
387 | return next; | |
9e583fac | 388 | else |
ac70caad | 389 | { |
390 | while (next) | |
391 | next = free_bb (next); | |
392 | ||
393 | return child; | |
394 | } | |
395 | } | |
396 | ||
397 | ||
398 | /* Look for floating-point divisions among DEF's uses, and try to | |
399 | replace them by multiplications with the reciprocal. Add | |
400 | as many statements computing the reciprocal as needed. | |
401 | ||
402 | DEF must be a GIMPLE register of a floating-point type. */ | |
403 | ||
404 | static void | |
75a70cf9 | 405 | execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def) |
ac70caad | 406 | { |
407 | use_operand_p use_p; | |
408 | imm_use_iterator use_iter; | |
409 | struct occurrence *occ; | |
410 | int count = 0, threshold; | |
abacb398 | 411 | |
ac70caad | 412 | gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def)); |
413 | ||
414 | FOR_EACH_IMM_USE_FAST (use_p, use_iter, def) | |
abacb398 | 415 | { |
75a70cf9 | 416 | gimple use_stmt = USE_STMT (use_p); |
ac70caad | 417 | if (is_division_by (use_stmt, def)) |
abacb398 | 418 | { |
75a70cf9 | 419 | register_division_in (gimple_bb (use_stmt)); |
ac70caad | 420 | count++; |
abacb398 | 421 | } |
422 | } | |
48e1416a | 423 | |
ac70caad | 424 | /* Do the expensive part only if we can hope to optimize something. */ |
425 | threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def))); | |
426 | if (count >= threshold) | |
427 | { | |
75a70cf9 | 428 | gimple use_stmt; |
ac70caad | 429 | for (occ = occ_head; occ; occ = occ->next) |
430 | { | |
431 | compute_merit (occ); | |
75a70cf9 | 432 | insert_reciprocals (def_gsi, occ, def, NULL, threshold); |
ac70caad | 433 | } |
434 | ||
09aca5bc | 435 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def) |
ac70caad | 436 | { |
ac70caad | 437 | if (is_division_by (use_stmt, def)) |
09aca5bc | 438 | { |
439 | FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) | |
440 | replace_reciprocal (use_p); | |
441 | } | |
ac70caad | 442 | } |
443 | } | |
444 | ||
445 | for (occ = occ_head; occ; ) | |
446 | occ = free_bb (occ); | |
447 | ||
448 | occ_head = NULL; | |
abacb398 | 449 | } |
450 | ||
ac70caad | 451 | static bool |
452 | gate_cse_reciprocals (void) | |
453 | { | |
0bfd8d5c | 454 | return optimize && flag_reciprocal_math; |
ac70caad | 455 | } |
456 | ||
ac70caad | 457 | /* Go through all the floating-point SSA_NAMEs, and call |
458 | execute_cse_reciprocals_1 on each of them. */ | |
2a1990e9 | 459 | static unsigned int |
abacb398 | 460 | execute_cse_reciprocals (void) |
461 | { | |
462 | basic_block bb; | |
51b60a11 | 463 | tree arg; |
685b24f5 | 464 | |
ac70caad | 465 | occ_pool = create_alloc_pool ("dominators for recip", |
466 | sizeof (struct occurrence), | |
467 | n_basic_blocks / 3 + 1); | |
685b24f5 | 468 | |
c136ae61 | 469 | calculate_dominance_info (CDI_DOMINATORS); |
470 | calculate_dominance_info (CDI_POST_DOMINATORS); | |
ac70caad | 471 | |
472 | #ifdef ENABLE_CHECKING | |
473 | FOR_EACH_BB (bb) | |
474 | gcc_assert (!bb->aux); | |
475 | #endif | |
476 | ||
1767a056 | 477 | for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = DECL_CHAIN (arg)) |
2d04fd8d | 478 | if (gimple_default_def (cfun, arg) |
ac70caad | 479 | && FLOAT_TYPE_P (TREE_TYPE (arg)) |
480 | && is_gimple_reg (arg)) | |
2d04fd8d | 481 | execute_cse_reciprocals_1 (NULL, gimple_default_def (cfun, arg)); |
51b60a11 | 482 | |
abacb398 | 483 | FOR_EACH_BB (bb) |
484 | { | |
75a70cf9 | 485 | gimple_stmt_iterator gsi; |
486 | gimple phi; | |
487 | tree def; | |
abacb398 | 488 | |
75a70cf9 | 489 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
abacb398 | 490 | { |
75a70cf9 | 491 | phi = gsi_stmt (gsi); |
abacb398 | 492 | def = PHI_RESULT (phi); |
493 | if (FLOAT_TYPE_P (TREE_TYPE (def)) | |
494 | && is_gimple_reg (def)) | |
ac70caad | 495 | execute_cse_reciprocals_1 (NULL, def); |
abacb398 | 496 | } |
497 | ||
75a70cf9 | 498 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
abacb398 | 499 | { |
75a70cf9 | 500 | gimple stmt = gsi_stmt (gsi); |
a0315874 | 501 | |
75a70cf9 | 502 | if (gimple_has_lhs (stmt) |
abacb398 | 503 | && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL |
504 | && FLOAT_TYPE_P (TREE_TYPE (def)) | |
51b60a11 | 505 | && TREE_CODE (def) == SSA_NAME) |
75a70cf9 | 506 | execute_cse_reciprocals_1 (&gsi, def); |
abacb398 | 507 | } |
e174638f | 508 | |
0bfd8d5c | 509 | if (optimize_bb_for_size_p (bb)) |
510 | continue; | |
511 | ||
e174638f | 512 | /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */ |
75a70cf9 | 513 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
e174638f | 514 | { |
75a70cf9 | 515 | gimple stmt = gsi_stmt (gsi); |
e174638f | 516 | tree fndecl; |
517 | ||
75a70cf9 | 518 | if (is_gimple_assign (stmt) |
519 | && gimple_assign_rhs_code (stmt) == RDIV_EXPR) | |
e174638f | 520 | { |
75a70cf9 | 521 | tree arg1 = gimple_assign_rhs2 (stmt); |
522 | gimple stmt1; | |
2cd360b6 | 523 | |
524 | if (TREE_CODE (arg1) != SSA_NAME) | |
525 | continue; | |
526 | ||
527 | stmt1 = SSA_NAME_DEF_STMT (arg1); | |
e174638f | 528 | |
75a70cf9 | 529 | if (is_gimple_call (stmt1) |
530 | && gimple_call_lhs (stmt1) | |
531 | && (fndecl = gimple_call_fndecl (stmt1)) | |
e174638f | 532 | && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL |
533 | || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)) | |
534 | { | |
535 | enum built_in_function code; | |
774b1cdd | 536 | bool md_code, fail; |
537 | imm_use_iterator ui; | |
538 | use_operand_p use_p; | |
e174638f | 539 | |
540 | code = DECL_FUNCTION_CODE (fndecl); | |
2cd360b6 | 541 | md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD; |
542 | ||
543 | fndecl = targetm.builtin_reciprocal (code, md_code, false); | |
e174638f | 544 | if (!fndecl) |
545 | continue; | |
546 | ||
774b1cdd | 547 | /* Check that all uses of the SSA name are divisions, |
548 | otherwise replacing the defining statement will do | |
549 | the wrong thing. */ | |
550 | fail = false; | |
551 | FOR_EACH_IMM_USE_FAST (use_p, ui, arg1) | |
552 | { | |
553 | gimple stmt2 = USE_STMT (use_p); | |
554 | if (is_gimple_debug (stmt2)) | |
555 | continue; | |
556 | if (!is_gimple_assign (stmt2) | |
557 | || gimple_assign_rhs_code (stmt2) != RDIV_EXPR | |
558 | || gimple_assign_rhs1 (stmt2) == arg1 | |
559 | || gimple_assign_rhs2 (stmt2) != arg1) | |
560 | { | |
561 | fail = true; | |
562 | break; | |
563 | } | |
564 | } | |
565 | if (fail) | |
566 | continue; | |
567 | ||
5fb3d93f | 568 | gimple_replace_lhs (stmt1, arg1); |
0acacf9e | 569 | gimple_call_set_fndecl (stmt1, fndecl); |
e174638f | 570 | update_stmt (stmt1); |
571 | ||
774b1cdd | 572 | FOR_EACH_IMM_USE_STMT (stmt, ui, arg1) |
573 | { | |
574 | gimple_assign_set_rhs_code (stmt, MULT_EXPR); | |
575 | fold_stmt_inplace (stmt); | |
576 | update_stmt (stmt); | |
577 | } | |
e174638f | 578 | } |
579 | } | |
580 | } | |
abacb398 | 581 | } |
685b24f5 | 582 | |
c136ae61 | 583 | free_dominance_info (CDI_DOMINATORS); |
584 | free_dominance_info (CDI_POST_DOMINATORS); | |
ac70caad | 585 | free_alloc_pool (occ_pool); |
2a1990e9 | 586 | return 0; |
abacb398 | 587 | } |
588 | ||
20099e35 | 589 | struct gimple_opt_pass pass_cse_reciprocals = |
abacb398 | 590 | { |
20099e35 | 591 | { |
592 | GIMPLE_PASS, | |
abacb398 | 593 | "recip", /* name */ |
594 | gate_cse_reciprocals, /* gate */ | |
595 | execute_cse_reciprocals, /* execute */ | |
596 | NULL, /* sub */ | |
597 | NULL, /* next */ | |
598 | 0, /* static_pass_number */ | |
0b1615c1 | 599 | TV_NONE, /* tv_id */ |
abacb398 | 600 | PROP_ssa, /* properties_required */ |
601 | 0, /* properties_provided */ | |
602 | 0, /* properties_destroyed */ | |
603 | 0, /* todo_flags_start */ | |
604 | TODO_dump_func | TODO_update_ssa | TODO_verify_ssa | |
20099e35 | 605 | | TODO_verify_stmts /* todo_flags_finish */ |
606 | } | |
abacb398 | 607 | }; |
a0315874 | 608 | |
0d424440 | 609 | /* Records an occurrence at statement USE_STMT in the vector of trees |
a0315874 | 610 | STMTS if it is dominated by *TOP_BB or dominates it or this basic block |
0d424440 | 611 | is not yet initialized. Returns true if the occurrence was pushed on |
a0315874 | 612 | the vector. Adjusts *TOP_BB to be the basic block dominating all |
613 | statements in the vector. */ | |
614 | ||
615 | static bool | |
75a70cf9 | 616 | maybe_record_sincos (VEC(gimple, heap) **stmts, |
617 | basic_block *top_bb, gimple use_stmt) | |
a0315874 | 618 | { |
75a70cf9 | 619 | basic_block use_bb = gimple_bb (use_stmt); |
a0315874 | 620 | if (*top_bb |
621 | && (*top_bb == use_bb | |
622 | || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb))) | |
75a70cf9 | 623 | VEC_safe_push (gimple, heap, *stmts, use_stmt); |
a0315874 | 624 | else if (!*top_bb |
625 | || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb)) | |
626 | { | |
75a70cf9 | 627 | VEC_safe_push (gimple, heap, *stmts, use_stmt); |
a0315874 | 628 | *top_bb = use_bb; |
629 | } | |
630 | else | |
631 | return false; | |
632 | ||
633 | return true; | |
634 | } | |
635 | ||
636 | /* Look for sin, cos and cexpi calls with the same argument NAME and | |
637 | create a single call to cexpi CSEing the result in this case. | |
638 | We first walk over all immediate uses of the argument collecting | |
639 | statements that we can CSE in a vector and in a second pass replace | |
640 | the statement rhs with a REALPART or IMAGPART expression on the | |
641 | result of the cexpi call we insert before the use statement that | |
642 | dominates all other candidates. */ | |
643 | ||
4c80086d | 644 | static bool |
a0315874 | 645 | execute_cse_sincos_1 (tree name) |
646 | { | |
75a70cf9 | 647 | gimple_stmt_iterator gsi; |
a0315874 | 648 | imm_use_iterator use_iter; |
75a70cf9 | 649 | tree fndecl, res, type; |
650 | gimple def_stmt, use_stmt, stmt; | |
a0315874 | 651 | int seen_cos = 0, seen_sin = 0, seen_cexpi = 0; |
75a70cf9 | 652 | VEC(gimple, heap) *stmts = NULL; |
a0315874 | 653 | basic_block top_bb = NULL; |
654 | int i; | |
4c80086d | 655 | bool cfg_changed = false; |
a0315874 | 656 | |
657 | type = TREE_TYPE (name); | |
658 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name) | |
659 | { | |
75a70cf9 | 660 | if (gimple_code (use_stmt) != GIMPLE_CALL |
661 | || !gimple_call_lhs (use_stmt) | |
662 | || !(fndecl = gimple_call_fndecl (use_stmt)) | |
a0315874 | 663 | || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) |
664 | continue; | |
665 | ||
666 | switch (DECL_FUNCTION_CODE (fndecl)) | |
667 | { | |
668 | CASE_FLT_FN (BUILT_IN_COS): | |
669 | seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; | |
670 | break; | |
671 | ||
672 | CASE_FLT_FN (BUILT_IN_SIN): | |
673 | seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; | |
674 | break; | |
675 | ||
676 | CASE_FLT_FN (BUILT_IN_CEXPI): | |
677 | seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; | |
678 | break; | |
679 | ||
680 | default:; | |
681 | } | |
682 | } | |
683 | ||
684 | if (seen_cos + seen_sin + seen_cexpi <= 1) | |
685 | { | |
75a70cf9 | 686 | VEC_free(gimple, heap, stmts); |
4c80086d | 687 | return false; |
a0315874 | 688 | } |
689 | ||
690 | /* Simply insert cexpi at the beginning of top_bb but not earlier than | |
691 | the name def statement. */ | |
692 | fndecl = mathfn_built_in (type, BUILT_IN_CEXPI); | |
693 | if (!fndecl) | |
4c80086d | 694 | return false; |
695 | res = create_tmp_reg (TREE_TYPE (TREE_TYPE (fndecl)), "sincostmp"); | |
75a70cf9 | 696 | stmt = gimple_build_call (fndecl, 1, name); |
4c80086d | 697 | res = make_ssa_name (res, stmt); |
75a70cf9 | 698 | gimple_call_set_lhs (stmt, res); |
699 | ||
a0315874 | 700 | def_stmt = SSA_NAME_DEF_STMT (name); |
8090c12d | 701 | if (!SSA_NAME_IS_DEFAULT_DEF (name) |
75a70cf9 | 702 | && gimple_code (def_stmt) != GIMPLE_PHI |
703 | && gimple_bb (def_stmt) == top_bb) | |
a0315874 | 704 | { |
75a70cf9 | 705 | gsi = gsi_for_stmt (def_stmt); |
706 | gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); | |
a0315874 | 707 | } |
708 | else | |
709 | { | |
75a70cf9 | 710 | gsi = gsi_after_labels (top_bb); |
711 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
a0315874 | 712 | } |
713 | update_stmt (stmt); | |
714 | ||
715 | /* And adjust the recorded old call sites. */ | |
75a70cf9 | 716 | for (i = 0; VEC_iterate(gimple, stmts, i, use_stmt); ++i) |
a0315874 | 717 | { |
75a70cf9 | 718 | tree rhs = NULL; |
719 | fndecl = gimple_call_fndecl (use_stmt); | |
720 | ||
a0315874 | 721 | switch (DECL_FUNCTION_CODE (fndecl)) |
722 | { | |
723 | CASE_FLT_FN (BUILT_IN_COS): | |
75a70cf9 | 724 | rhs = fold_build1 (REALPART_EXPR, type, res); |
a0315874 | 725 | break; |
726 | ||
727 | CASE_FLT_FN (BUILT_IN_SIN): | |
75a70cf9 | 728 | rhs = fold_build1 (IMAGPART_EXPR, type, res); |
a0315874 | 729 | break; |
730 | ||
731 | CASE_FLT_FN (BUILT_IN_CEXPI): | |
75a70cf9 | 732 | rhs = res; |
a0315874 | 733 | break; |
734 | ||
735 | default:; | |
736 | gcc_unreachable (); | |
737 | } | |
738 | ||
75a70cf9 | 739 | /* Replace call with a copy. */ |
740 | stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs); | |
741 | ||
742 | gsi = gsi_for_stmt (use_stmt); | |
4c80086d | 743 | gsi_replace (&gsi, stmt, true); |
744 | if (gimple_purge_dead_eh_edges (gimple_bb (stmt))) | |
745 | cfg_changed = true; | |
a0315874 | 746 | } |
747 | ||
75a70cf9 | 748 | VEC_free(gimple, heap, stmts); |
4c80086d | 749 | |
750 | return cfg_changed; | |
a0315874 | 751 | } |
752 | ||
753 | /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1 | |
754 | on the SSA_NAME argument of each of them. */ | |
755 | ||
756 | static unsigned int | |
757 | execute_cse_sincos (void) | |
758 | { | |
759 | basic_block bb; | |
4c80086d | 760 | bool cfg_changed = false; |
a0315874 | 761 | |
762 | calculate_dominance_info (CDI_DOMINATORS); | |
763 | ||
764 | FOR_EACH_BB (bb) | |
765 | { | |
75a70cf9 | 766 | gimple_stmt_iterator gsi; |
a0315874 | 767 | |
75a70cf9 | 768 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
a0315874 | 769 | { |
75a70cf9 | 770 | gimple stmt = gsi_stmt (gsi); |
a0315874 | 771 | tree fndecl; |
772 | ||
75a70cf9 | 773 | if (is_gimple_call (stmt) |
774 | && gimple_call_lhs (stmt) | |
775 | && (fndecl = gimple_call_fndecl (stmt)) | |
a0315874 | 776 | && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL) |
777 | { | |
778 | tree arg; | |
779 | ||
780 | switch (DECL_FUNCTION_CODE (fndecl)) | |
781 | { | |
782 | CASE_FLT_FN (BUILT_IN_COS): | |
783 | CASE_FLT_FN (BUILT_IN_SIN): | |
784 | CASE_FLT_FN (BUILT_IN_CEXPI): | |
75a70cf9 | 785 | arg = gimple_call_arg (stmt, 0); |
a0315874 | 786 | if (TREE_CODE (arg) == SSA_NAME) |
4c80086d | 787 | cfg_changed |= execute_cse_sincos_1 (arg); |
a0315874 | 788 | break; |
789 | ||
790 | default:; | |
791 | } | |
792 | } | |
793 | } | |
794 | } | |
795 | ||
796 | free_dominance_info (CDI_DOMINATORS); | |
4c80086d | 797 | return cfg_changed ? TODO_cleanup_cfg : 0; |
a0315874 | 798 | } |
799 | ||
800 | static bool | |
801 | gate_cse_sincos (void) | |
802 | { | |
803 | /* Make sure we have either sincos or cexp. */ | |
804 | return (TARGET_HAS_SINCOS | |
805 | || TARGET_C99_FUNCTIONS) | |
806 | && optimize; | |
807 | } | |
808 | ||
20099e35 | 809 | struct gimple_opt_pass pass_cse_sincos = |
a0315874 | 810 | { |
20099e35 | 811 | { |
812 | GIMPLE_PASS, | |
a0315874 | 813 | "sincos", /* name */ |
814 | gate_cse_sincos, /* gate */ | |
815 | execute_cse_sincos, /* execute */ | |
816 | NULL, /* sub */ | |
817 | NULL, /* next */ | |
818 | 0, /* static_pass_number */ | |
0b1615c1 | 819 | TV_NONE, /* tv_id */ |
a0315874 | 820 | PROP_ssa, /* properties_required */ |
821 | 0, /* properties_provided */ | |
822 | 0, /* properties_destroyed */ | |
823 | 0, /* todo_flags_start */ | |
824 | TODO_dump_func | TODO_update_ssa | TODO_verify_ssa | |
20099e35 | 825 | | TODO_verify_stmts /* todo_flags_finish */ |
826 | } | |
a0315874 | 827 | }; |
e174638f | 828 | |
84cc784c | 829 | /* A symbolic number is used to detect byte permutation and selection |
830 | patterns. Therefore the field N contains an artificial number | |
831 | consisting of byte size markers: | |
832 | ||
833 | 0 - byte has the value 0 | |
834 | 1..size - byte contains the content of the byte | |
835 | number indexed with that value minus one */ | |
836 | ||
837 | struct symbolic_number { | |
838 | unsigned HOST_WIDEST_INT n; | |
839 | int size; | |
840 | }; | |
841 | ||
842 | /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic | |
843 | number N. Return false if the requested operation is not permitted | |
844 | on a symbolic number. */ | |
845 | ||
846 | static inline bool | |
847 | do_shift_rotate (enum tree_code code, | |
848 | struct symbolic_number *n, | |
849 | int count) | |
850 | { | |
851 | if (count % 8 != 0) | |
852 | return false; | |
853 | ||
854 | /* Zero out the extra bits of N in order to avoid them being shifted | |
855 | into the significant bits. */ | |
856 | if (n->size < (int)sizeof (HOST_WIDEST_INT)) | |
857 | n->n &= ((unsigned HOST_WIDEST_INT)1 << (n->size * BITS_PER_UNIT)) - 1; | |
858 | ||
859 | switch (code) | |
860 | { | |
861 | case LSHIFT_EXPR: | |
862 | n->n <<= count; | |
863 | break; | |
864 | case RSHIFT_EXPR: | |
865 | n->n >>= count; | |
866 | break; | |
867 | case LROTATE_EXPR: | |
868 | n->n = (n->n << count) | (n->n >> ((n->size * BITS_PER_UNIT) - count)); | |
869 | break; | |
870 | case RROTATE_EXPR: | |
871 | n->n = (n->n >> count) | (n->n << ((n->size * BITS_PER_UNIT) - count)); | |
872 | break; | |
873 | default: | |
874 | return false; | |
875 | } | |
876 | return true; | |
877 | } | |
878 | ||
879 | /* Perform sanity checking for the symbolic number N and the gimple | |
880 | statement STMT. */ | |
881 | ||
882 | static inline bool | |
883 | verify_symbolic_number_p (struct symbolic_number *n, gimple stmt) | |
884 | { | |
885 | tree lhs_type; | |
886 | ||
887 | lhs_type = gimple_expr_type (stmt); | |
888 | ||
889 | if (TREE_CODE (lhs_type) != INTEGER_TYPE) | |
890 | return false; | |
891 | ||
892 | if (TYPE_PRECISION (lhs_type) != n->size * BITS_PER_UNIT) | |
893 | return false; | |
894 | ||
895 | return true; | |
896 | } | |
897 | ||
898 | /* find_bswap_1 invokes itself recursively with N and tries to perform | |
899 | the operation given by the rhs of STMT on the result. If the | |
900 | operation could successfully be executed the function returns the | |
901 | tree expression of the source operand and NULL otherwise. */ | |
902 | ||
903 | static tree | |
904 | find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit) | |
905 | { | |
906 | enum tree_code code; | |
907 | tree rhs1, rhs2 = NULL; | |
908 | gimple rhs1_stmt, rhs2_stmt; | |
909 | tree source_expr1; | |
910 | enum gimple_rhs_class rhs_class; | |
911 | ||
912 | if (!limit || !is_gimple_assign (stmt)) | |
913 | return NULL_TREE; | |
914 | ||
915 | rhs1 = gimple_assign_rhs1 (stmt); | |
916 | ||
917 | if (TREE_CODE (rhs1) != SSA_NAME) | |
918 | return NULL_TREE; | |
919 | ||
920 | code = gimple_assign_rhs_code (stmt); | |
921 | rhs_class = gimple_assign_rhs_class (stmt); | |
922 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); | |
923 | ||
924 | if (rhs_class == GIMPLE_BINARY_RHS) | |
925 | rhs2 = gimple_assign_rhs2 (stmt); | |
926 | ||
927 | /* Handle unary rhs and binary rhs with integer constants as second | |
928 | operand. */ | |
929 | ||
930 | if (rhs_class == GIMPLE_UNARY_RHS | |
931 | || (rhs_class == GIMPLE_BINARY_RHS | |
932 | && TREE_CODE (rhs2) == INTEGER_CST)) | |
933 | { | |
934 | if (code != BIT_AND_EXPR | |
935 | && code != LSHIFT_EXPR | |
936 | && code != RSHIFT_EXPR | |
937 | && code != LROTATE_EXPR | |
938 | && code != RROTATE_EXPR | |
939 | && code != NOP_EXPR | |
940 | && code != CONVERT_EXPR) | |
941 | return NULL_TREE; | |
942 | ||
943 | source_expr1 = find_bswap_1 (rhs1_stmt, n, limit - 1); | |
944 | ||
945 | /* If find_bswap_1 returned NULL STMT is a leaf node and we have | |
946 | to initialize the symbolic number. */ | |
947 | if (!source_expr1) | |
948 | { | |
949 | /* Set up the symbolic number N by setting each byte to a | |
950 | value between 1 and the byte size of rhs1. The highest | |
f9a210c9 | 951 | order byte is set to n->size and the lowest order |
952 | byte to 1. */ | |
84cc784c | 953 | n->size = TYPE_PRECISION (TREE_TYPE (rhs1)); |
954 | if (n->size % BITS_PER_UNIT != 0) | |
955 | return NULL_TREE; | |
956 | n->size /= BITS_PER_UNIT; | |
957 | n->n = (sizeof (HOST_WIDEST_INT) < 8 ? 0 : | |
f9a210c9 | 958 | (unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201); |
959 | ||
960 | if (n->size < (int)sizeof (HOST_WIDEST_INT)) | |
961 | n->n &= ((unsigned HOST_WIDEST_INT)1 << | |
962 | (n->size * BITS_PER_UNIT)) - 1; | |
84cc784c | 963 | |
964 | source_expr1 = rhs1; | |
965 | } | |
966 | ||
967 | switch (code) | |
968 | { | |
969 | case BIT_AND_EXPR: | |
970 | { | |
971 | int i; | |
972 | unsigned HOST_WIDEST_INT val = widest_int_cst_value (rhs2); | |
973 | unsigned HOST_WIDEST_INT tmp = val; | |
974 | ||
975 | /* Only constants masking full bytes are allowed. */ | |
976 | for (i = 0; i < n->size; i++, tmp >>= BITS_PER_UNIT) | |
977 | if ((tmp & 0xff) != 0 && (tmp & 0xff) != 0xff) | |
978 | return NULL_TREE; | |
979 | ||
980 | n->n &= val; | |
981 | } | |
982 | break; | |
983 | case LSHIFT_EXPR: | |
984 | case RSHIFT_EXPR: | |
985 | case LROTATE_EXPR: | |
986 | case RROTATE_EXPR: | |
987 | if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2))) | |
988 | return NULL_TREE; | |
989 | break; | |
990 | CASE_CONVERT: | |
991 | { | |
992 | int type_size; | |
993 | ||
994 | type_size = TYPE_PRECISION (gimple_expr_type (stmt)); | |
995 | if (type_size % BITS_PER_UNIT != 0) | |
996 | return NULL_TREE; | |
997 | ||
84cc784c | 998 | if (type_size / BITS_PER_UNIT < (int)(sizeof (HOST_WIDEST_INT))) |
999 | { | |
1000 | /* If STMT casts to a smaller type mask out the bits not | |
1001 | belonging to the target type. */ | |
84cc784c | 1002 | n->n &= ((unsigned HOST_WIDEST_INT)1 << type_size) - 1; |
1003 | } | |
f9a210c9 | 1004 | n->size = type_size / BITS_PER_UNIT; |
84cc784c | 1005 | } |
1006 | break; | |
1007 | default: | |
1008 | return NULL_TREE; | |
1009 | }; | |
1010 | return verify_symbolic_number_p (n, stmt) ? source_expr1 : NULL; | |
1011 | } | |
1012 | ||
1013 | /* Handle binary rhs. */ | |
1014 | ||
1015 | if (rhs_class == GIMPLE_BINARY_RHS) | |
1016 | { | |
1017 | struct symbolic_number n1, n2; | |
1018 | tree source_expr2; | |
1019 | ||
1020 | if (code != BIT_IOR_EXPR) | |
1021 | return NULL_TREE; | |
1022 | ||
1023 | if (TREE_CODE (rhs2) != SSA_NAME) | |
1024 | return NULL_TREE; | |
1025 | ||
1026 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); | |
1027 | ||
1028 | switch (code) | |
1029 | { | |
1030 | case BIT_IOR_EXPR: | |
1031 | source_expr1 = find_bswap_1 (rhs1_stmt, &n1, limit - 1); | |
1032 | ||
1033 | if (!source_expr1) | |
1034 | return NULL_TREE; | |
1035 | ||
1036 | source_expr2 = find_bswap_1 (rhs2_stmt, &n2, limit - 1); | |
1037 | ||
1038 | if (source_expr1 != source_expr2 | |
1039 | || n1.size != n2.size) | |
1040 | return NULL_TREE; | |
1041 | ||
1042 | n->size = n1.size; | |
1043 | n->n = n1.n | n2.n; | |
1044 | ||
1045 | if (!verify_symbolic_number_p (n, stmt)) | |
1046 | return NULL_TREE; | |
1047 | ||
1048 | break; | |
1049 | default: | |
1050 | return NULL_TREE; | |
1051 | } | |
1052 | return source_expr1; | |
1053 | } | |
1054 | return NULL_TREE; | |
1055 | } | |
1056 | ||
1057 | /* Check if STMT completes a bswap implementation consisting of ORs, | |
1058 | SHIFTs and ANDs. Return the source tree expression on which the | |
1059 | byte swap is performed and NULL if no bswap was found. */ | |
1060 | ||
1061 | static tree | |
1062 | find_bswap (gimple stmt) | |
1063 | { | |
1064 | /* The number which the find_bswap result should match in order to | |
f9a210c9 | 1065 | have a full byte swap. The number is shifted to the left according |
1066 | to the size of the symbolic number before using it. */ | |
84cc784c | 1067 | unsigned HOST_WIDEST_INT cmp = |
1068 | sizeof (HOST_WIDEST_INT) < 8 ? 0 : | |
f9a210c9 | 1069 | (unsigned HOST_WIDEST_INT)0x01020304 << 32 | 0x05060708; |
84cc784c | 1070 | |
1071 | struct symbolic_number n; | |
1072 | tree source_expr; | |
1073 | ||
9bc1852a | 1074 | /* The last parameter determines the depth search limit. It usually |
1075 | correlates directly to the number of bytes to be touched. We | |
1076 | increase that number by one here in order to also cover signed -> | |
1077 | unsigned conversions of the src operand as can be seen in | |
1078 | libgcc. */ | |
84cc784c | 1079 | source_expr = find_bswap_1 (stmt, &n, |
1080 | TREE_INT_CST_LOW ( | |
9bc1852a | 1081 | TYPE_SIZE_UNIT (gimple_expr_type (stmt))) + 1); |
84cc784c | 1082 | |
1083 | if (!source_expr) | |
1084 | return NULL_TREE; | |
1085 | ||
1086 | /* Zero out the extra bits of N and CMP. */ | |
1087 | if (n.size < (int)sizeof (HOST_WIDEST_INT)) | |
1088 | { | |
1089 | unsigned HOST_WIDEST_INT mask = | |
1090 | ((unsigned HOST_WIDEST_INT)1 << (n.size * BITS_PER_UNIT)) - 1; | |
1091 | ||
1092 | n.n &= mask; | |
f9a210c9 | 1093 | cmp >>= (sizeof (HOST_WIDEST_INT) - n.size) * BITS_PER_UNIT; |
84cc784c | 1094 | } |
1095 | ||
1096 | /* A complete byte swap should make the symbolic number to start | |
1097 | with the largest digit in the highest order byte. */ | |
1098 | if (cmp != n.n) | |
1099 | return NULL_TREE; | |
1100 | ||
1101 | return source_expr; | |
1102 | } | |
1103 | ||
1104 | /* Find manual byte swap implementations and turn them into a bswap | |
1105 | builtin invokation. */ | |
1106 | ||
1107 | static unsigned int | |
1108 | execute_optimize_bswap (void) | |
1109 | { | |
1110 | basic_block bb; | |
1111 | bool bswap32_p, bswap64_p; | |
1112 | bool changed = false; | |
0af25806 | 1113 | tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE; |
84cc784c | 1114 | |
1115 | if (BITS_PER_UNIT != 8) | |
1116 | return 0; | |
1117 | ||
1118 | if (sizeof (HOST_WIDEST_INT) < 8) | |
1119 | return 0; | |
1120 | ||
1121 | bswap32_p = (built_in_decls[BUILT_IN_BSWAP32] | |
d6bf3b14 | 1122 | && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing); |
84cc784c | 1123 | bswap64_p = (built_in_decls[BUILT_IN_BSWAP64] |
d6bf3b14 | 1124 | && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing |
3328b1fb | 1125 | || (bswap32_p && word_mode == SImode))); |
84cc784c | 1126 | |
1127 | if (!bswap32_p && !bswap64_p) | |
1128 | return 0; | |
1129 | ||
0af25806 | 1130 | /* Determine the argument type of the builtins. The code later on |
1131 | assumes that the return and argument type are the same. */ | |
1132 | if (bswap32_p) | |
1133 | { | |
1134 | tree fndecl = built_in_decls[BUILT_IN_BSWAP32]; | |
1135 | bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); | |
1136 | } | |
1137 | ||
1138 | if (bswap64_p) | |
1139 | { | |
1140 | tree fndecl = built_in_decls[BUILT_IN_BSWAP64]; | |
1141 | bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); | |
1142 | } | |
1143 | ||
84cc784c | 1144 | FOR_EACH_BB (bb) |
1145 | { | |
1146 | gimple_stmt_iterator gsi; | |
1147 | ||
1148 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1149 | { | |
1150 | gimple stmt = gsi_stmt (gsi); | |
0af25806 | 1151 | tree bswap_src, bswap_type; |
1152 | tree bswap_tmp; | |
84cc784c | 1153 | tree fndecl = NULL_TREE; |
1154 | int type_size; | |
1155 | gimple call; | |
1156 | ||
1157 | if (!is_gimple_assign (stmt) | |
1158 | || gimple_assign_rhs_code (stmt) != BIT_IOR_EXPR) | |
1159 | continue; | |
1160 | ||
1161 | type_size = TYPE_PRECISION (gimple_expr_type (stmt)); | |
1162 | ||
1163 | switch (type_size) | |
1164 | { | |
1165 | case 32: | |
1166 | if (bswap32_p) | |
0af25806 | 1167 | { |
1168 | fndecl = built_in_decls[BUILT_IN_BSWAP32]; | |
1169 | bswap_type = bswap32_type; | |
1170 | } | |
84cc784c | 1171 | break; |
1172 | case 64: | |
1173 | if (bswap64_p) | |
0af25806 | 1174 | { |
1175 | fndecl = built_in_decls[BUILT_IN_BSWAP64]; | |
1176 | bswap_type = bswap64_type; | |
1177 | } | |
84cc784c | 1178 | break; |
1179 | default: | |
1180 | continue; | |
1181 | } | |
1182 | ||
1183 | if (!fndecl) | |
1184 | continue; | |
1185 | ||
1186 | bswap_src = find_bswap (stmt); | |
1187 | ||
1188 | if (!bswap_src) | |
1189 | continue; | |
1190 | ||
1191 | changed = true; | |
0af25806 | 1192 | |
1193 | bswap_tmp = bswap_src; | |
1194 | ||
1195 | /* Convert the src expression if necessary. */ | |
1196 | if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type)) | |
1197 | { | |
1198 | gimple convert_stmt; | |
1199 | ||
1200 | bswap_tmp = create_tmp_var (bswap_type, "bswapsrc"); | |
1201 | add_referenced_var (bswap_tmp); | |
1202 | bswap_tmp = make_ssa_name (bswap_tmp, NULL); | |
1203 | ||
1204 | convert_stmt = gimple_build_assign_with_ops ( | |
1205 | CONVERT_EXPR, bswap_tmp, bswap_src, NULL); | |
1206 | gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT); | |
1207 | } | |
1208 | ||
1209 | call = gimple_build_call (fndecl, 1, bswap_tmp); | |
1210 | ||
1211 | bswap_tmp = gimple_assign_lhs (stmt); | |
1212 | ||
1213 | /* Convert the result if necessary. */ | |
1214 | if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type)) | |
1215 | { | |
1216 | gimple convert_stmt; | |
1217 | ||
1218 | bswap_tmp = create_tmp_var (bswap_type, "bswapdst"); | |
1219 | add_referenced_var (bswap_tmp); | |
1220 | bswap_tmp = make_ssa_name (bswap_tmp, NULL); | |
1221 | convert_stmt = gimple_build_assign_with_ops ( | |
1222 | CONVERT_EXPR, gimple_assign_lhs (stmt), bswap_tmp, NULL); | |
1223 | gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT); | |
1224 | } | |
1225 | ||
1226 | gimple_call_set_lhs (call, bswap_tmp); | |
84cc784c | 1227 | |
1228 | if (dump_file) | |
1229 | { | |
1230 | fprintf (dump_file, "%d bit bswap implementation found at: ", | |
1231 | (int)type_size); | |
1232 | print_gimple_stmt (dump_file, stmt, 0, 0); | |
1233 | } | |
1234 | ||
1235 | gsi_insert_after (&gsi, call, GSI_SAME_STMT); | |
1236 | gsi_remove (&gsi, true); | |
1237 | } | |
1238 | } | |
1239 | ||
1240 | return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa | |
1241 | | TODO_verify_stmts : 0); | |
1242 | } | |
1243 | ||
1244 | static bool | |
1245 | gate_optimize_bswap (void) | |
1246 | { | |
1247 | return flag_expensive_optimizations && optimize; | |
1248 | } | |
1249 | ||
1250 | struct gimple_opt_pass pass_optimize_bswap = | |
1251 | { | |
1252 | { | |
1253 | GIMPLE_PASS, | |
1254 | "bswap", /* name */ | |
1255 | gate_optimize_bswap, /* gate */ | |
1256 | execute_optimize_bswap, /* execute */ | |
1257 | NULL, /* sub */ | |
1258 | NULL, /* next */ | |
1259 | 0, /* static_pass_number */ | |
1260 | TV_NONE, /* tv_id */ | |
1261 | PROP_ssa, /* properties_required */ | |
1262 | 0, /* properties_provided */ | |
1263 | 0, /* properties_destroyed */ | |
1264 | 0, /* todo_flags_start */ | |
1265 | 0 /* todo_flags_finish */ | |
1266 | } | |
1267 | }; | |
62be004c | 1268 | |
7e4c867e | 1269 | /* Return true if RHS is a suitable operand for a widening multiplication. |
1270 | There are two cases: | |
1271 | ||
1272 | - RHS makes some value twice as wide. Store that value in *NEW_RHS_OUT | |
1273 | if so, and store its type in *TYPE_OUT. | |
1274 | ||
1275 | - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so, | |
1276 | but leave *TYPE_OUT untouched. */ | |
00f4f705 | 1277 | |
1278 | static bool | |
7e4c867e | 1279 | is_widening_mult_rhs_p (tree rhs, tree *type_out, tree *new_rhs_out) |
1280 | { | |
1281 | gimple stmt; | |
1282 | tree type, type1, rhs1; | |
1283 | enum tree_code rhs_code; | |
1284 | ||
1285 | if (TREE_CODE (rhs) == SSA_NAME) | |
1286 | { | |
1287 | type = TREE_TYPE (rhs); | |
1288 | stmt = SSA_NAME_DEF_STMT (rhs); | |
1289 | if (!is_gimple_assign (stmt)) | |
1290 | return false; | |
1291 | ||
1292 | rhs_code = gimple_assign_rhs_code (stmt); | |
1293 | if (TREE_CODE (type) == INTEGER_TYPE | |
1294 | ? !CONVERT_EXPR_CODE_P (rhs_code) | |
1295 | : rhs_code != FIXED_CONVERT_EXPR) | |
1296 | return false; | |
1297 | ||
1298 | rhs1 = gimple_assign_rhs1 (stmt); | |
1299 | type1 = TREE_TYPE (rhs1); | |
1300 | if (TREE_CODE (type1) != TREE_CODE (type) | |
1301 | || TYPE_PRECISION (type1) * 2 != TYPE_PRECISION (type)) | |
1302 | return false; | |
1303 | ||
1304 | *new_rhs_out = rhs1; | |
1305 | *type_out = type1; | |
1306 | return true; | |
1307 | } | |
1308 | ||
1309 | if (TREE_CODE (rhs) == INTEGER_CST) | |
1310 | { | |
1311 | *new_rhs_out = rhs; | |
1312 | *type_out = NULL; | |
1313 | return true; | |
1314 | } | |
1315 | ||
1316 | return false; | |
1317 | } | |
1318 | ||
1319 | /* Return true if STMT performs a widening multiplication. If so, | |
1320 | store the unwidened types of the operands in *TYPE1_OUT and *TYPE2_OUT | |
1321 | respectively. Also fill *RHS1_OUT and *RHS2_OUT such that converting | |
1322 | those operands to types *TYPE1_OUT and *TYPE2_OUT would give the | |
1323 | operands of the multiplication. */ | |
1324 | ||
1325 | static bool | |
1326 | is_widening_mult_p (gimple stmt, | |
1327 | tree *type1_out, tree *rhs1_out, | |
1328 | tree *type2_out, tree *rhs2_out) | |
00f4f705 | 1329 | { |
00f4f705 | 1330 | tree type; |
1331 | ||
1332 | type = TREE_TYPE (gimple_assign_lhs (stmt)); | |
7e4c867e | 1333 | if (TREE_CODE (type) != INTEGER_TYPE |
1334 | && TREE_CODE (type) != FIXED_POINT_TYPE) | |
1335 | return false; | |
00f4f705 | 1336 | |
7e4c867e | 1337 | if (!is_widening_mult_rhs_p (gimple_assign_rhs1 (stmt), type1_out, rhs1_out)) |
00f4f705 | 1338 | return false; |
1339 | ||
7e4c867e | 1340 | if (!is_widening_mult_rhs_p (gimple_assign_rhs2 (stmt), type2_out, rhs2_out)) |
1341 | return false; | |
00f4f705 | 1342 | |
7e4c867e | 1343 | if (*type1_out == NULL) |
00f4f705 | 1344 | { |
7e4c867e | 1345 | if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out)) |
00f4f705 | 1346 | return false; |
7e4c867e | 1347 | *type1_out = *type2_out; |
00f4f705 | 1348 | } |
00f4f705 | 1349 | |
7e4c867e | 1350 | if (*type2_out == NULL) |
00f4f705 | 1351 | { |
7e4c867e | 1352 | if (!int_fits_type_p (*rhs2_out, *type1_out)) |
00f4f705 | 1353 | return false; |
7e4c867e | 1354 | *type2_out = *type1_out; |
00f4f705 | 1355 | } |
00f4f705 | 1356 | |
7e4c867e | 1357 | return true; |
1358 | } | |
00f4f705 | 1359 | |
7e4c867e | 1360 | /* Process a single gimple statement STMT, which has a MULT_EXPR as |
1361 | its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return | |
1362 | value is true iff we converted the statement. */ | |
1363 | ||
1364 | static bool | |
1365 | convert_mult_to_widen (gimple stmt) | |
1366 | { | |
1367 | tree lhs, rhs1, rhs2, type, type1, type2; | |
1368 | enum insn_code handler; | |
1369 | ||
1370 | lhs = gimple_assign_lhs (stmt); | |
1371 | type = TREE_TYPE (lhs); | |
1372 | if (TREE_CODE (type) != INTEGER_TYPE) | |
00f4f705 | 1373 | return false; |
1374 | ||
7e4c867e | 1375 | if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2)) |
00f4f705 | 1376 | return false; |
1377 | ||
7e4c867e | 1378 | if (TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2)) |
1379 | handler = optab_handler (umul_widen_optab, TYPE_MODE (type)); | |
1380 | else if (!TYPE_UNSIGNED (type1) && !TYPE_UNSIGNED (type2)) | |
1381 | handler = optab_handler (smul_widen_optab, TYPE_MODE (type)); | |
00f4f705 | 1382 | else |
7e4c867e | 1383 | handler = optab_handler (usmul_widen_optab, TYPE_MODE (type)); |
1384 | ||
1385 | if (handler == CODE_FOR_nothing) | |
1386 | return false; | |
1387 | ||
1388 | gimple_assign_set_rhs1 (stmt, fold_convert (type1, rhs1)); | |
1389 | gimple_assign_set_rhs2 (stmt, fold_convert (type2, rhs2)); | |
00f4f705 | 1390 | gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR); |
1391 | update_stmt (stmt); | |
1392 | return true; | |
1393 | } | |
1394 | ||
1395 | /* Process a single gimple statement STMT, which is found at the | |
1396 | iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its | |
1397 | rhs (given by CODE), and try to convert it into a | |
1398 | WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value | |
1399 | is true iff we converted the statement. */ | |
1400 | ||
1401 | static bool | |
1402 | convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt, | |
1403 | enum tree_code code) | |
1404 | { | |
1405 | gimple rhs1_stmt = NULL, rhs2_stmt = NULL; | |
7e4c867e | 1406 | tree type, type1, type2; |
00f4f705 | 1407 | tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs; |
1408 | enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK; | |
1409 | optab this_optab; | |
1410 | enum tree_code wmult_code; | |
1411 | ||
1412 | lhs = gimple_assign_lhs (stmt); | |
1413 | type = TREE_TYPE (lhs); | |
7e4c867e | 1414 | if (TREE_CODE (type) != INTEGER_TYPE |
1415 | && TREE_CODE (type) != FIXED_POINT_TYPE) | |
00f4f705 | 1416 | return false; |
1417 | ||
1418 | if (code == MINUS_EXPR) | |
1419 | wmult_code = WIDEN_MULT_MINUS_EXPR; | |
1420 | else | |
1421 | wmult_code = WIDEN_MULT_PLUS_EXPR; | |
1422 | ||
00f4f705 | 1423 | rhs1 = gimple_assign_rhs1 (stmt); |
1424 | rhs2 = gimple_assign_rhs2 (stmt); | |
1425 | ||
1426 | if (TREE_CODE (rhs1) == SSA_NAME) | |
1427 | { | |
1428 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); | |
1429 | if (is_gimple_assign (rhs1_stmt)) | |
1430 | rhs1_code = gimple_assign_rhs_code (rhs1_stmt); | |
1431 | } | |
1432 | else | |
1433 | return false; | |
1434 | ||
1435 | if (TREE_CODE (rhs2) == SSA_NAME) | |
1436 | { | |
1437 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); | |
1438 | if (is_gimple_assign (rhs2_stmt)) | |
1439 | rhs2_code = gimple_assign_rhs_code (rhs2_stmt); | |
1440 | } | |
1441 | else | |
1442 | return false; | |
1443 | ||
7e4c867e | 1444 | if (code == PLUS_EXPR && rhs1_code == MULT_EXPR) |
00f4f705 | 1445 | { |
7e4c867e | 1446 | if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1, |
1447 | &type2, &mult_rhs2)) | |
00f4f705 | 1448 | return false; |
7e4c867e | 1449 | add_rhs = rhs2; |
00f4f705 | 1450 | } |
7e4c867e | 1451 | else if (rhs2_code == MULT_EXPR) |
00f4f705 | 1452 | { |
815a0224 | 1453 | if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1, |
7e4c867e | 1454 | &type2, &mult_rhs2)) |
00f4f705 | 1455 | return false; |
7e4c867e | 1456 | add_rhs = rhs1; |
00f4f705 | 1457 | } |
7e4c867e | 1458 | else if (code == PLUS_EXPR && rhs1_code == WIDEN_MULT_EXPR) |
00f4f705 | 1459 | { |
1460 | mult_rhs1 = gimple_assign_rhs1 (rhs1_stmt); | |
1461 | mult_rhs2 = gimple_assign_rhs2 (rhs1_stmt); | |
815a0224 | 1462 | type1 = TREE_TYPE (mult_rhs1); |
1463 | type2 = TREE_TYPE (mult_rhs2); | |
00f4f705 | 1464 | add_rhs = rhs2; |
1465 | } | |
1466 | else if (rhs2_code == WIDEN_MULT_EXPR) | |
1467 | { | |
1468 | mult_rhs1 = gimple_assign_rhs1 (rhs2_stmt); | |
1469 | mult_rhs2 = gimple_assign_rhs2 (rhs2_stmt); | |
815a0224 | 1470 | type1 = TREE_TYPE (mult_rhs1); |
1471 | type2 = TREE_TYPE (mult_rhs2); | |
00f4f705 | 1472 | add_rhs = rhs1; |
1473 | } | |
1474 | else | |
1475 | return false; | |
1476 | ||
815a0224 | 1477 | if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2)) |
1478 | return false; | |
1479 | ||
1480 | /* Verify that the machine can perform a widening multiply | |
1481 | accumulate in this mode/signedness combination, otherwise | |
1482 | this transformation is likely to pessimize code. */ | |
1483 | this_optab = optab_for_tree_code (wmult_code, type1, optab_default); | |
1484 | if (optab_handler (this_optab, TYPE_MODE (type)) == CODE_FOR_nothing) | |
1485 | return false; | |
1486 | ||
00f4f705 | 1487 | /* ??? May need some type verification here? */ |
1488 | ||
815a0224 | 1489 | gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, |
1490 | fold_convert (type1, mult_rhs1), | |
1491 | fold_convert (type2, mult_rhs2), | |
00f4f705 | 1492 | add_rhs); |
1493 | update_stmt (gsi_stmt (*gsi)); | |
1494 | return true; | |
1495 | } | |
1496 | ||
b9be572e | 1497 | /* Combine the multiplication at MUL_STMT with uses in additions and |
1498 | subtractions to form fused multiply-add operations. Returns true | |
1499 | if successful and MUL_STMT should be removed. */ | |
1500 | ||
1501 | static bool | |
1502 | convert_mult_to_fma (gimple mul_stmt) | |
1503 | { | |
1504 | tree mul_result = gimple_assign_lhs (mul_stmt); | |
1505 | tree type = TREE_TYPE (mul_result); | |
44579526 | 1506 | gimple use_stmt, neguse_stmt, fma_stmt; |
b9be572e | 1507 | use_operand_p use_p; |
1508 | imm_use_iterator imm_iter; | |
1509 | ||
1510 | if (FLOAT_TYPE_P (type) | |
1511 | && flag_fp_contract_mode == FP_CONTRACT_OFF) | |
1512 | return false; | |
1513 | ||
1514 | /* We don't want to do bitfield reduction ops. */ | |
1515 | if (INTEGRAL_TYPE_P (type) | |
1516 | && (TYPE_PRECISION (type) | |
1517 | != GET_MODE_PRECISION (TYPE_MODE (type)))) | |
1518 | return false; | |
1519 | ||
1520 | /* If the target doesn't support it, don't generate it. We assume that | |
1521 | if fma isn't available then fms, fnma or fnms are not either. */ | |
1522 | if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing) | |
1523 | return false; | |
1524 | ||
1525 | /* Make sure that the multiplication statement becomes dead after | |
1526 | the transformation, thus that all uses are transformed to FMAs. | |
1527 | This means we assume that an FMA operation has the same cost | |
1528 | as an addition. */ | |
1529 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result) | |
1530 | { | |
1531 | enum tree_code use_code; | |
44579526 | 1532 | tree result = mul_result; |
1533 | bool negate_p = false; | |
b9be572e | 1534 | |
1535 | use_stmt = USE_STMT (use_p); | |
1536 | ||
b9be572e | 1537 | /* For now restrict this operations to single basic blocks. In theory |
1538 | we would want to support sinking the multiplication in | |
1539 | m = a*b; | |
1540 | if () | |
1541 | ma = m + c; | |
1542 | else | |
1543 | d = m; | |
1544 | to form a fma in the then block and sink the multiplication to the | |
1545 | else block. */ | |
1546 | if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) | |
1547 | return false; | |
1548 | ||
44579526 | 1549 | if (!is_gimple_assign (use_stmt)) |
b9be572e | 1550 | return false; |
1551 | ||
44579526 | 1552 | use_code = gimple_assign_rhs_code (use_stmt); |
1553 | ||
1554 | /* A negate on the multiplication leads to FNMA. */ | |
1555 | if (use_code == NEGATE_EXPR) | |
1556 | { | |
1557 | result = gimple_assign_lhs (use_stmt); | |
1558 | ||
1559 | /* Make sure the negate statement becomes dead with this | |
1560 | single transformation. */ | |
1561 | if (!single_imm_use (gimple_assign_lhs (use_stmt), | |
1562 | &use_p, &neguse_stmt)) | |
1563 | return false; | |
1564 | ||
1565 | /* Re-validate. */ | |
1566 | use_stmt = neguse_stmt; | |
1567 | if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) | |
1568 | return false; | |
1569 | if (!is_gimple_assign (use_stmt)) | |
1570 | return false; | |
1571 | ||
1572 | use_code = gimple_assign_rhs_code (use_stmt); | |
1573 | negate_p = true; | |
1574 | } | |
b9be572e | 1575 | |
44579526 | 1576 | switch (use_code) |
1577 | { | |
1578 | case MINUS_EXPR: | |
8a9d0572 | 1579 | if (gimple_assign_rhs2 (use_stmt) == result) |
1580 | negate_p = !negate_p; | |
1581 | break; | |
44579526 | 1582 | case PLUS_EXPR: |
44579526 | 1583 | break; |
44579526 | 1584 | default: |
1585 | /* FMA can only be formed from PLUS and MINUS. */ | |
1586 | return false; | |
1587 | } | |
b9be572e | 1588 | |
44579526 | 1589 | /* We can't handle a * b + a * b. */ |
1590 | if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt)) | |
1591 | return false; | |
8a9d0572 | 1592 | |
1593 | /* While it is possible to validate whether or not the exact form | |
1594 | that we've recognized is available in the backend, the assumption | |
1595 | is that the transformation is never a loss. For instance, suppose | |
1596 | the target only has the plain FMA pattern available. Consider | |
1597 | a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which | |
1598 | is still two operations. Consider -(a*b)-c -> fma(-a,b,-c): we | |
1599 | still have 3 operations, but in the FMA form the two NEGs are | |
1600 | independant and could be run in parallel. */ | |
b9be572e | 1601 | } |
1602 | ||
1603 | FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result) | |
1604 | { | |
b9be572e | 1605 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
44579526 | 1606 | enum tree_code use_code = gimple_assign_rhs_code (use_stmt); |
1607 | tree addop, mulop1, result = mul_result; | |
1608 | bool negate_p = false; | |
b9be572e | 1609 | |
44579526 | 1610 | if (use_code == NEGATE_EXPR) |
1611 | { | |
1612 | result = gimple_assign_lhs (use_stmt); | |
1613 | single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt); | |
1614 | gsi_remove (&gsi, true); | |
1615 | release_defs (use_stmt); | |
1616 | ||
1617 | use_stmt = neguse_stmt; | |
1618 | gsi = gsi_for_stmt (use_stmt); | |
1619 | use_code = gimple_assign_rhs_code (use_stmt); | |
1620 | negate_p = true; | |
1621 | } | |
1622 | ||
1623 | if (gimple_assign_rhs1 (use_stmt) == result) | |
b9be572e | 1624 | { |
1625 | addop = gimple_assign_rhs2 (use_stmt); | |
1626 | /* a * b - c -> a * b + (-c) */ | |
1627 | if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) | |
1628 | addop = force_gimple_operand_gsi (&gsi, | |
1629 | build1 (NEGATE_EXPR, | |
1630 | type, addop), | |
1631 | true, NULL_TREE, true, | |
1632 | GSI_SAME_STMT); | |
1633 | } | |
1634 | else | |
1635 | { | |
1636 | addop = gimple_assign_rhs1 (use_stmt); | |
1637 | /* a - b * c -> (-b) * c + a */ | |
1638 | if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) | |
44579526 | 1639 | negate_p = !negate_p; |
b9be572e | 1640 | } |
1641 | ||
44579526 | 1642 | mulop1 = gimple_assign_rhs1 (mul_stmt); |
1643 | if (negate_p) | |
1644 | mulop1 = force_gimple_operand_gsi (&gsi, | |
1645 | build1 (NEGATE_EXPR, | |
1646 | type, mulop1), | |
1647 | true, NULL_TREE, true, | |
1648 | GSI_SAME_STMT); | |
1649 | ||
b9be572e | 1650 | fma_stmt = gimple_build_assign_with_ops3 (FMA_EXPR, |
1651 | gimple_assign_lhs (use_stmt), | |
1652 | mulop1, | |
1653 | gimple_assign_rhs2 (mul_stmt), | |
1654 | addop); | |
1655 | gsi_replace (&gsi, fma_stmt, true); | |
1656 | } | |
1657 | ||
1658 | return true; | |
1659 | } | |
1660 | ||
62be004c | 1661 | /* Find integer multiplications where the operands are extended from |
1662 | smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR | |
1663 | where appropriate. */ | |
1664 | ||
1665 | static unsigned int | |
1666 | execute_optimize_widening_mul (void) | |
1667 | { | |
62be004c | 1668 | basic_block bb; |
1669 | ||
1670 | FOR_EACH_BB (bb) | |
1671 | { | |
1672 | gimple_stmt_iterator gsi; | |
1673 | ||
b9be572e | 1674 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) |
62be004c | 1675 | { |
1676 | gimple stmt = gsi_stmt (gsi); | |
00f4f705 | 1677 | enum tree_code code; |
62be004c | 1678 | |
b9be572e | 1679 | if (is_gimple_assign (stmt)) |
1680 | { | |
1681 | code = gimple_assign_rhs_code (stmt); | |
1682 | switch (code) | |
1683 | { | |
1684 | case MULT_EXPR: | |
1685 | if (!convert_mult_to_widen (stmt) | |
1686 | && convert_mult_to_fma (stmt)) | |
1687 | { | |
1688 | gsi_remove (&gsi, true); | |
1689 | release_defs (stmt); | |
1690 | continue; | |
1691 | } | |
1692 | break; | |
1693 | ||
1694 | case PLUS_EXPR: | |
1695 | case MINUS_EXPR: | |
1696 | convert_plusminus_to_widen (&gsi, stmt, code); | |
1697 | break; | |
62be004c | 1698 | |
b9be572e | 1699 | default:; |
1700 | } | |
1701 | } | |
1702 | gsi_next (&gsi); | |
62be004c | 1703 | } |
1704 | } | |
00f4f705 | 1705 | |
b9be572e | 1706 | return 0; |
62be004c | 1707 | } |
1708 | ||
1709 | static bool | |
1710 | gate_optimize_widening_mul (void) | |
1711 | { | |
1712 | return flag_expensive_optimizations && optimize; | |
1713 | } | |
1714 | ||
1715 | struct gimple_opt_pass pass_optimize_widening_mul = | |
1716 | { | |
1717 | { | |
1718 | GIMPLE_PASS, | |
1719 | "widening_mul", /* name */ | |
1720 | gate_optimize_widening_mul, /* gate */ | |
1721 | execute_optimize_widening_mul, /* execute */ | |
1722 | NULL, /* sub */ | |
1723 | NULL, /* next */ | |
1724 | 0, /* static_pass_number */ | |
1725 | TV_NONE, /* tv_id */ | |
1726 | PROP_ssa, /* properties_required */ | |
1727 | 0, /* properties_provided */ | |
1728 | 0, /* properties_destroyed */ | |
1729 | 0, /* todo_flags_start */ | |
b9be572e | 1730 | TODO_verify_ssa |
1731 | | TODO_verify_stmts | |
1732 | | TODO_dump_func | |
1733 | | TODO_update_ssa /* todo_flags_finish */ | |
62be004c | 1734 | } |
1735 | }; |