]>
Commit | Line | Data |
---|---|---|
6c2a63a3 | 1 | /* Global, SSA-based optimizations using mathematical identities. |
ebb07520 | 2 | Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc. |
6c2a63a3 PB |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU General Public License as published by the | |
9dcd6f09 | 8 | Free Software Foundation; either version 3, or (at your option) any |
6c2a63a3 PB |
9 | later version. |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
6c2a63a3 PB |
19 | |
20 | /* Currently, the only mini-pass in this file tries to CSE reciprocal | |
21 | operations. These are common in sequences such as this one: | |
22 | ||
23 | modulus = sqrt(x*x + y*y + z*z); | |
24 | x = x / modulus; | |
25 | y = y / modulus; | |
26 | z = z / modulus; | |
27 | ||
28 | that can be optimized to | |
29 | ||
30 | modulus = sqrt(x*x + y*y + z*z); | |
31 | rmodulus = 1.0 / modulus; | |
32 | x = x * rmodulus; | |
33 | y = y * rmodulus; | |
34 | z = z * rmodulus; | |
35 | ||
36 | We do this for loop invariant divisors, and with this pass whenever | |
bc23502b PB |
37 | we notice that a division has the same divisor multiple times. |
38 | ||
39 | Of course, like in PRE, we don't insert a division if a dominator | |
40 | already has one. However, this cannot be done as an extension of | |
41 | PRE for several reasons. | |
42 | ||
43 | First of all, with some experiments it was found out that the | |
44 | transformation is not always useful if there are only two divisions | |
45 | hy the same divisor. This is probably because modern processors | |
46 | can pipeline the divisions; on older, in-order processors it should | |
47 | still be effective to optimize two divisions by the same number. | |
48 | We make this a param, and it shall be called N in the remainder of | |
49 | this comment. | |
50 | ||
51 | Second, if trapping math is active, we have less freedom on where | |
52 | to insert divisions: we can only do so in basic blocks that already | |
53 | contain one. (If divisions don't trap, instead, we can insert | |
54 | divisions elsewhere, which will be in blocks that are common dominators | |
55 | of those that have the division). | |
56 | ||
57 | We really don't want to compute the reciprocal unless a division will | |
58 | be found. To do this, we won't insert the division in a basic block | |
59 | that has less than N divisions *post-dominating* it. | |
60 | ||
61 | The algorithm constructs a subset of the dominator tree, holding the | |
62 | blocks containing the divisions and the common dominators to them, | |
63 | and walk it twice. The first walk is in post-order, and it annotates | |
64 | each block with the number of divisions that post-dominate it: this | |
65 | gives information on where divisions can be inserted profitably. | |
66 | The second walk is in pre-order, and it inserts divisions as explained | |
67 | above, and replaces divisions by multiplications. | |
68 | ||
69 | In the best case, the cost of the pass is O(n_statements). In the | |
70 | worst-case, the cost is due to creating the dominator tree subset, | |
71 | with a cost of O(n_basic_blocks ^ 2); however this can only happen | |
72 | for n_statements / n_basic_blocks statements. So, the amortized cost | |
73 | of creating the dominator tree subset is O(n_basic_blocks) and the | |
74 | worst-case cost of the pass is O(n_statements * n_basic_blocks). | |
75 | ||
76 | More practically, the cost will be small because there are few | |
77 | divisions, and they tend to be in the same basic block, so insert_bb | |
78 | is called very few times. | |
79 | ||
80 | If we did this using domwalk.c, an efficient implementation would have | |
81 | to work on all the variables in a single pass, because we could not | |
82 | work on just a subset of the dominator tree, as we do now, and the | |
83 | cost would also be something like O(n_statements * n_basic_blocks). | |
84 | The data structures would be more complex in order to work on all the | |
85 | variables in a single pass. */ | |
6c2a63a3 PB |
86 | |
87 | #include "config.h" | |
88 | #include "system.h" | |
89 | #include "coretypes.h" | |
90 | #include "tm.h" | |
91 | #include "flags.h" | |
92 | #include "tree.h" | |
93 | #include "tree-flow.h" | |
94 | #include "real.h" | |
95 | #include "timevar.h" | |
96 | #include "tree-pass.h" | |
bc23502b PB |
97 | #include "alloc-pool.h" |
98 | #include "basic-block.h" | |
99 | #include "target.h" | |
6c2a63a3 | 100 | |
bc23502b PB |
101 | |
102 | /* This structure represents one basic block that either computes a | |
103 | division, or is a common dominator for basic block that compute a | |
104 | division. */ | |
105 | struct occurrence { | |
106 | /* The basic block represented by this structure. */ | |
107 | basic_block bb; | |
108 | ||
109 | /* If non-NULL, the SSA_NAME holding the definition for a reciprocal | |
110 | inserted in BB. */ | |
111 | tree recip_def; | |
112 | ||
726a989a | 113 | /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that |
bc23502b | 114 | was inserted in BB. */ |
726a989a | 115 | gimple recip_def_stmt; |
bc23502b PB |
116 | |
117 | /* Pointer to a list of "struct occurrence"s for blocks dominated | |
118 | by BB. */ | |
119 | struct occurrence *children; | |
120 | ||
121 | /* Pointer to the next "struct occurrence"s in the list of blocks | |
122 | sharing a common dominator. */ | |
123 | struct occurrence *next; | |
124 | ||
125 | /* The number of divisions that are in BB before compute_merit. The | |
126 | number of divisions that are in BB or post-dominate it after | |
127 | compute_merit. */ | |
128 | int num_divisions; | |
129 | ||
130 | /* True if the basic block has a division, false if it is a common | |
131 | dominator for basic blocks that do. If it is false and trapping | |
132 | math is active, BB is not a candidate for inserting a reciprocal. */ | |
133 | bool bb_has_division; | |
134 | }; | |
135 | ||
136 | ||
137 | /* The instance of "struct occurrence" representing the highest | |
138 | interesting block in the dominator tree. */ | |
139 | static struct occurrence *occ_head; | |
140 | ||
141 | /* Allocation pool for getting instances of "struct occurrence". */ | |
142 | static alloc_pool occ_pool; | |
143 | ||
144 | ||
145 | ||
146 | /* Allocate and return a new struct occurrence for basic block BB, and | |
147 | whose children list is headed by CHILDREN. */ | |
148 | static struct occurrence * | |
149 | occ_new (basic_block bb, struct occurrence *children) | |
6c2a63a3 | 150 | { |
bc23502b PB |
151 | struct occurrence *occ; |
152 | ||
c22940cd | 153 | bb->aux = occ = (struct occurrence *) pool_alloc (occ_pool); |
bc23502b PB |
154 | memset (occ, 0, sizeof (struct occurrence)); |
155 | ||
156 | occ->bb = bb; | |
157 | occ->children = children; | |
158 | return occ; | |
6c2a63a3 PB |
159 | } |
160 | ||
bc23502b PB |
161 | |
162 | /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a | |
163 | list of "struct occurrence"s, one per basic block, having IDOM as | |
164 | their common dominator. | |
165 | ||
166 | We try to insert NEW_OCC as deep as possible in the tree, and we also | |
167 | insert any other block that is a common dominator for BB and one | |
168 | block already in the tree. */ | |
169 | ||
170 | static void | |
171 | insert_bb (struct occurrence *new_occ, basic_block idom, | |
172 | struct occurrence **p_head) | |
2ef571e2 | 173 | { |
bc23502b | 174 | struct occurrence *occ, **p_occ; |
2ef571e2 | 175 | |
bc23502b PB |
176 | for (p_occ = p_head; (occ = *p_occ) != NULL; ) |
177 | { | |
178 | basic_block bb = new_occ->bb, occ_bb = occ->bb; | |
179 | basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb); | |
180 | if (dom == bb) | |
181 | { | |
182 | /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC | |
183 | from its list. */ | |
184 | *p_occ = occ->next; | |
185 | occ->next = new_occ->children; | |
186 | new_occ->children = occ; | |
187 | ||
188 | /* Try the next block (it may as well be dominated by BB). */ | |
189 | } | |
190 | ||
191 | else if (dom == occ_bb) | |
192 | { | |
193 | /* OCC_BB dominates BB. Tail recurse to look deeper. */ | |
194 | insert_bb (new_occ, dom, &occ->children); | |
195 | return; | |
196 | } | |
197 | ||
198 | else if (dom != idom) | |
199 | { | |
200 | gcc_assert (!dom->aux); | |
201 | ||
202 | /* There is a dominator between IDOM and BB, add it and make | |
203 | two children out of NEW_OCC and OCC. First, remove OCC from | |
204 | its list. */ | |
205 | *p_occ = occ->next; | |
206 | new_occ->next = occ; | |
207 | occ->next = NULL; | |
208 | ||
209 | /* None of the previous blocks has DOM as a dominator: if we tail | |
210 | recursed, we would reexamine them uselessly. Just switch BB with | |
211 | DOM, and go on looking for blocks dominated by DOM. */ | |
212 | new_occ = occ_new (dom, new_occ); | |
213 | } | |
214 | ||
215 | else | |
216 | { | |
217 | /* Nothing special, go on with the next element. */ | |
218 | p_occ = &occ->next; | |
219 | } | |
220 | } | |
221 | ||
222 | /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */ | |
223 | new_occ->next = *p_head; | |
224 | *p_head = new_occ; | |
225 | } | |
226 | ||
227 | /* Register that we found a division in BB. */ | |
228 | ||
229 | static inline void | |
230 | register_division_in (basic_block bb) | |
231 | { | |
232 | struct occurrence *occ; | |
233 | ||
234 | occ = (struct occurrence *) bb->aux; | |
235 | if (!occ) | |
236 | { | |
237 | occ = occ_new (bb, NULL); | |
238 | insert_bb (occ, ENTRY_BLOCK_PTR, &occ_head); | |
239 | } | |
240 | ||
241 | occ->bb_has_division = true; | |
242 | occ->num_divisions++; | |
243 | } | |
244 | ||
245 | ||
246 | /* Compute the number of divisions that postdominate each block in OCC and | |
247 | its children. */ | |
6c2a63a3 | 248 | |
6c2a63a3 | 249 | static void |
bc23502b | 250 | compute_merit (struct occurrence *occ) |
6c2a63a3 | 251 | { |
bc23502b PB |
252 | struct occurrence *occ_child; |
253 | basic_block dom = occ->bb; | |
6c2a63a3 | 254 | |
bc23502b | 255 | for (occ_child = occ->children; occ_child; occ_child = occ_child->next) |
6c2a63a3 | 256 | { |
bc23502b PB |
257 | basic_block bb; |
258 | if (occ_child->children) | |
259 | compute_merit (occ_child); | |
260 | ||
261 | if (flag_exceptions) | |
262 | bb = single_noncomplex_succ (dom); | |
263 | else | |
264 | bb = dom; | |
265 | ||
266 | if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb)) | |
267 | occ->num_divisions += occ_child->num_divisions; | |
268 | } | |
269 | } | |
270 | ||
271 | ||
272 | /* Return whether USE_STMT is a floating-point division by DEF. */ | |
273 | static inline bool | |
726a989a | 274 | is_division_by (gimple use_stmt, tree def) |
bc23502b | 275 | { |
726a989a RB |
276 | return is_gimple_assign (use_stmt) |
277 | && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR | |
278 | && gimple_assign_rhs2 (use_stmt) == def | |
8a5b57cd RG |
279 | /* Do not recognize x / x as valid division, as we are getting |
280 | confused later by replacing all immediate uses x in such | |
281 | a stmt. */ | |
726a989a | 282 | && gimple_assign_rhs1 (use_stmt) != def; |
bc23502b PB |
283 | } |
284 | ||
285 | /* Walk the subset of the dominator tree rooted at OCC, setting the | |
286 | RECIP_DEF field to a definition of 1.0 / DEF that can be used in | |
287 | the given basic block. The field may be left NULL, of course, | |
288 | if it is not possible or profitable to do the optimization. | |
289 | ||
290 | DEF_BSI is an iterator pointing at the statement defining DEF. | |
291 | If RECIP_DEF is set, a dominator already has a computation that can | |
292 | be used. */ | |
293 | ||
294 | static void | |
726a989a | 295 | insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, |
bc23502b PB |
296 | tree def, tree recip_def, int threshold) |
297 | { | |
726a989a RB |
298 | tree type; |
299 | gimple new_stmt; | |
300 | gimple_stmt_iterator gsi; | |
bc23502b PB |
301 | struct occurrence *occ_child; |
302 | ||
303 | if (!recip_def | |
304 | && (occ->bb_has_division || !flag_trapping_math) | |
305 | && occ->num_divisions >= threshold) | |
306 | { | |
307 | /* Make a variable with the replacement and substitute it. */ | |
308 | type = TREE_TYPE (def); | |
309 | recip_def = make_rename_temp (type, "reciptmp"); | |
726a989a RB |
310 | new_stmt = gimple_build_assign_with_ops (RDIV_EXPR, recip_def, |
311 | build_one_cst (type), def); | |
bc23502b PB |
312 | |
313 | if (occ->bb_has_division) | |
314 | { | |
315 | /* Case 1: insert before an existing division. */ | |
726a989a RB |
316 | gsi = gsi_after_labels (occ->bb); |
317 | while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def)) | |
318 | gsi_next (&gsi); | |
bc23502b | 319 | |
726a989a | 320 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |
bc23502b | 321 | } |
726a989a | 322 | else if (def_gsi && occ->bb == def_gsi->bb) |
ac264fef | 323 | { |
bc23502b PB |
324 | /* Case 2: insert right after the definition. Note that this will |
325 | never happen if the definition statement can throw, because in | |
326 | that case the sole successor of the statement's basic block will | |
327 | dominate all the uses as well. */ | |
726a989a | 328 | gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT); |
ac264fef | 329 | } |
bc23502b PB |
330 | else |
331 | { | |
332 | /* Case 3: insert in a basic block not containing defs/uses. */ | |
726a989a RB |
333 | gsi = gsi_after_labels (occ->bb); |
334 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); | |
bc23502b PB |
335 | } |
336 | ||
337 | occ->recip_def_stmt = new_stmt; | |
6c2a63a3 PB |
338 | } |
339 | ||
bc23502b PB |
340 | occ->recip_def = recip_def; |
341 | for (occ_child = occ->children; occ_child; occ_child = occ_child->next) | |
726a989a | 342 | insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold); |
bc23502b PB |
343 | } |
344 | ||
345 | ||
346 | /* Replace the division at USE_P with a multiplication by the reciprocal, if | |
347 | possible. */ | |
348 | ||
349 | static inline void | |
350 | replace_reciprocal (use_operand_p use_p) | |
351 | { | |
726a989a RB |
352 | gimple use_stmt = USE_STMT (use_p); |
353 | basic_block bb = gimple_bb (use_stmt); | |
bc23502b PB |
354 | struct occurrence *occ = (struct occurrence *) bb->aux; |
355 | ||
efd8f750 JH |
356 | if (optimize_bb_for_speed_p (bb) |
357 | && occ->recip_def && use_stmt != occ->recip_def_stmt) | |
bc23502b | 358 | { |
726a989a | 359 | gimple_assign_set_rhs_code (use_stmt, MULT_EXPR); |
bc23502b PB |
360 | SET_USE (use_p, occ->recip_def); |
361 | fold_stmt_inplace (use_stmt); | |
362 | update_stmt (use_stmt); | |
363 | } | |
364 | } | |
365 | ||
366 | ||
367 | /* Free OCC and return one more "struct occurrence" to be freed. */ | |
368 | ||
369 | static struct occurrence * | |
370 | free_bb (struct occurrence *occ) | |
371 | { | |
372 | struct occurrence *child, *next; | |
373 | ||
374 | /* First get the two pointers hanging off OCC. */ | |
375 | next = occ->next; | |
376 | child = occ->children; | |
377 | occ->bb->aux = NULL; | |
378 | pool_free (occ_pool, occ); | |
379 | ||
380 | /* Now ensure that we don't recurse unless it is necessary. */ | |
381 | if (!child) | |
382 | return next; | |
2ef571e2 | 383 | else |
bc23502b PB |
384 | { |
385 | while (next) | |
386 | next = free_bb (next); | |
387 | ||
388 | return child; | |
389 | } | |
390 | } | |
391 | ||
392 | ||
393 | /* Look for floating-point divisions among DEF's uses, and try to | |
394 | replace them by multiplications with the reciprocal. Add | |
395 | as many statements computing the reciprocal as needed. | |
396 | ||
397 | DEF must be a GIMPLE register of a floating-point type. */ | |
398 | ||
399 | static void | |
726a989a | 400 | execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def) |
bc23502b PB |
401 | { |
402 | use_operand_p use_p; | |
403 | imm_use_iterator use_iter; | |
404 | struct occurrence *occ; | |
405 | int count = 0, threshold; | |
6c2a63a3 | 406 | |
bc23502b PB |
407 | gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def)); |
408 | ||
409 | FOR_EACH_IMM_USE_FAST (use_p, use_iter, def) | |
6c2a63a3 | 410 | { |
726a989a | 411 | gimple use_stmt = USE_STMT (use_p); |
bc23502b | 412 | if (is_division_by (use_stmt, def)) |
6c2a63a3 | 413 | { |
726a989a | 414 | register_division_in (gimple_bb (use_stmt)); |
bc23502b | 415 | count++; |
6c2a63a3 PB |
416 | } |
417 | } | |
bc23502b PB |
418 | |
419 | /* Do the expensive part only if we can hope to optimize something. */ | |
420 | threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def))); | |
421 | if (count >= threshold) | |
422 | { | |
726a989a | 423 | gimple use_stmt; |
bc23502b PB |
424 | for (occ = occ_head; occ; occ = occ->next) |
425 | { | |
426 | compute_merit (occ); | |
726a989a | 427 | insert_reciprocals (def_gsi, occ, def, NULL, threshold); |
bc23502b PB |
428 | } |
429 | ||
6c00f606 | 430 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def) |
bc23502b | 431 | { |
bc23502b | 432 | if (is_division_by (use_stmt, def)) |
6c00f606 AM |
433 | { |
434 | FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) | |
435 | replace_reciprocal (use_p); | |
436 | } | |
bc23502b PB |
437 | } |
438 | } | |
439 | ||
440 | for (occ = occ_head; occ; ) | |
441 | occ = free_bb (occ); | |
442 | ||
443 | occ_head = NULL; | |
6c2a63a3 PB |
444 | } |
445 | ||
bc23502b PB |
446 | static bool |
447 | gate_cse_reciprocals (void) | |
448 | { | |
efd8f750 | 449 | return optimize && flag_reciprocal_math; |
bc23502b PB |
450 | } |
451 | ||
bc23502b PB |
452 | /* Go through all the floating-point SSA_NAMEs, and call |
453 | execute_cse_reciprocals_1 on each of them. */ | |
c2924966 | 454 | static unsigned int |
6c2a63a3 PB |
455 | execute_cse_reciprocals (void) |
456 | { | |
457 | basic_block bb; | |
a8f82ec4 | 458 | tree arg; |
ac264fef | 459 | |
bc23502b PB |
460 | occ_pool = create_alloc_pool ("dominators for recip", |
461 | sizeof (struct occurrence), | |
462 | n_basic_blocks / 3 + 1); | |
ac264fef | 463 | |
d898f3ce PB |
464 | calculate_dominance_info (CDI_DOMINATORS); |
465 | calculate_dominance_info (CDI_POST_DOMINATORS); | |
bc23502b PB |
466 | |
467 | #ifdef ENABLE_CHECKING | |
468 | FOR_EACH_BB (bb) | |
469 | gcc_assert (!bb->aux); | |
470 | #endif | |
471 | ||
472 | for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = TREE_CHAIN (arg)) | |
5cd4ec7f | 473 | if (gimple_default_def (cfun, arg) |
bc23502b PB |
474 | && FLOAT_TYPE_P (TREE_TYPE (arg)) |
475 | && is_gimple_reg (arg)) | |
5cd4ec7f | 476 | execute_cse_reciprocals_1 (NULL, gimple_default_def (cfun, arg)); |
a8f82ec4 | 477 | |
6c2a63a3 PB |
478 | FOR_EACH_BB (bb) |
479 | { | |
726a989a RB |
480 | gimple_stmt_iterator gsi; |
481 | gimple phi; | |
482 | tree def; | |
6c2a63a3 | 483 | |
726a989a | 484 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
6c2a63a3 | 485 | { |
726a989a | 486 | phi = gsi_stmt (gsi); |
6c2a63a3 PB |
487 | def = PHI_RESULT (phi); |
488 | if (FLOAT_TYPE_P (TREE_TYPE (def)) | |
489 | && is_gimple_reg (def)) | |
bc23502b | 490 | execute_cse_reciprocals_1 (NULL, def); |
6c2a63a3 PB |
491 | } |
492 | ||
726a989a | 493 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
6c2a63a3 | 494 | { |
726a989a | 495 | gimple stmt = gsi_stmt (gsi); |
2f397a93 | 496 | |
726a989a | 497 | if (gimple_has_lhs (stmt) |
6c2a63a3 PB |
498 | && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL |
499 | && FLOAT_TYPE_P (TREE_TYPE (def)) | |
a8f82ec4 | 500 | && TREE_CODE (def) == SSA_NAME) |
726a989a | 501 | execute_cse_reciprocals_1 (&gsi, def); |
6c2a63a3 | 502 | } |
6b889d89 | 503 | |
efd8f750 JH |
504 | if (optimize_bb_for_size_p (bb)) |
505 | continue; | |
506 | ||
6b889d89 | 507 | /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */ |
726a989a | 508 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
6b889d89 | 509 | { |
726a989a | 510 | gimple stmt = gsi_stmt (gsi); |
6b889d89 UB |
511 | tree fndecl; |
512 | ||
726a989a RB |
513 | if (is_gimple_assign (stmt) |
514 | && gimple_assign_rhs_code (stmt) == RDIV_EXPR) | |
6b889d89 | 515 | { |
726a989a RB |
516 | tree arg1 = gimple_assign_rhs2 (stmt); |
517 | gimple stmt1; | |
ac10986f UB |
518 | |
519 | if (TREE_CODE (arg1) != SSA_NAME) | |
520 | continue; | |
521 | ||
522 | stmt1 = SSA_NAME_DEF_STMT (arg1); | |
6b889d89 | 523 | |
726a989a RB |
524 | if (is_gimple_call (stmt1) |
525 | && gimple_call_lhs (stmt1) | |
526 | && (fndecl = gimple_call_fndecl (stmt1)) | |
6b889d89 UB |
527 | && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL |
528 | || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)) | |
529 | { | |
530 | enum built_in_function code; | |
ac10986f | 531 | bool md_code; |
6b889d89 UB |
532 | |
533 | code = DECL_FUNCTION_CODE (fndecl); | |
ac10986f UB |
534 | md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD; |
535 | ||
536 | fndecl = targetm.builtin_reciprocal (code, md_code, false); | |
6b889d89 UB |
537 | if (!fndecl) |
538 | continue; | |
539 | ||
7c9577be | 540 | gimple_call_set_fndecl (stmt1, fndecl); |
6b889d89 UB |
541 | update_stmt (stmt1); |
542 | ||
726a989a | 543 | gimple_assign_set_rhs_code (stmt, MULT_EXPR); |
6b889d89 UB |
544 | fold_stmt_inplace (stmt); |
545 | update_stmt (stmt); | |
546 | } | |
547 | } | |
548 | } | |
6c2a63a3 | 549 | } |
ac264fef | 550 | |
d898f3ce PB |
551 | free_dominance_info (CDI_DOMINATORS); |
552 | free_dominance_info (CDI_POST_DOMINATORS); | |
bc23502b | 553 | free_alloc_pool (occ_pool); |
c2924966 | 554 | return 0; |
6c2a63a3 PB |
555 | } |
556 | ||
8ddbbcae | 557 | struct gimple_opt_pass pass_cse_reciprocals = |
6c2a63a3 | 558 | { |
8ddbbcae JH |
559 | { |
560 | GIMPLE_PASS, | |
6c2a63a3 PB |
561 | "recip", /* name */ |
562 | gate_cse_reciprocals, /* gate */ | |
563 | execute_cse_reciprocals, /* execute */ | |
564 | NULL, /* sub */ | |
565 | NULL, /* next */ | |
566 | 0, /* static_pass_number */ | |
567 | 0, /* tv_id */ | |
568 | PROP_ssa, /* properties_required */ | |
569 | 0, /* properties_provided */ | |
570 | 0, /* properties_destroyed */ | |
571 | 0, /* todo_flags_start */ | |
572 | TODO_dump_func | TODO_update_ssa | TODO_verify_ssa | |
8ddbbcae JH |
573 | | TODO_verify_stmts /* todo_flags_finish */ |
574 | } | |
6c2a63a3 | 575 | }; |
2f397a93 | 576 | |
88512ba0 | 577 | /* Records an occurrence at statement USE_STMT in the vector of trees |
2f397a93 | 578 | STMTS if it is dominated by *TOP_BB or dominates it or this basic block |
88512ba0 | 579 | is not yet initialized. Returns true if the occurrence was pushed on |
2f397a93 RG |
580 | the vector. Adjusts *TOP_BB to be the basic block dominating all |
581 | statements in the vector. */ | |
582 | ||
583 | static bool | |
726a989a RB |
584 | maybe_record_sincos (VEC(gimple, heap) **stmts, |
585 | basic_block *top_bb, gimple use_stmt) | |
2f397a93 | 586 | { |
726a989a | 587 | basic_block use_bb = gimple_bb (use_stmt); |
2f397a93 RG |
588 | if (*top_bb |
589 | && (*top_bb == use_bb | |
590 | || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb))) | |
726a989a | 591 | VEC_safe_push (gimple, heap, *stmts, use_stmt); |
2f397a93 RG |
592 | else if (!*top_bb |
593 | || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb)) | |
594 | { | |
726a989a | 595 | VEC_safe_push (gimple, heap, *stmts, use_stmt); |
2f397a93 RG |
596 | *top_bb = use_bb; |
597 | } | |
598 | else | |
599 | return false; | |
600 | ||
601 | return true; | |
602 | } | |
603 | ||
604 | /* Look for sin, cos and cexpi calls with the same argument NAME and | |
605 | create a single call to cexpi CSEing the result in this case. | |
606 | We first walk over all immediate uses of the argument collecting | |
607 | statements that we can CSE in a vector and in a second pass replace | |
608 | the statement rhs with a REALPART or IMAGPART expression on the | |
609 | result of the cexpi call we insert before the use statement that | |
610 | dominates all other candidates. */ | |
611 | ||
612 | static void | |
613 | execute_cse_sincos_1 (tree name) | |
614 | { | |
726a989a | 615 | gimple_stmt_iterator gsi; |
2f397a93 | 616 | imm_use_iterator use_iter; |
726a989a RB |
617 | tree fndecl, res, type; |
618 | gimple def_stmt, use_stmt, stmt; | |
2f397a93 | 619 | int seen_cos = 0, seen_sin = 0, seen_cexpi = 0; |
726a989a | 620 | VEC(gimple, heap) *stmts = NULL; |
2f397a93 RG |
621 | basic_block top_bb = NULL; |
622 | int i; | |
623 | ||
624 | type = TREE_TYPE (name); | |
625 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name) | |
626 | { | |
726a989a RB |
627 | if (gimple_code (use_stmt) != GIMPLE_CALL |
628 | || !gimple_call_lhs (use_stmt) | |
629 | || !(fndecl = gimple_call_fndecl (use_stmt)) | |
2f397a93 RG |
630 | || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) |
631 | continue; | |
632 | ||
633 | switch (DECL_FUNCTION_CODE (fndecl)) | |
634 | { | |
635 | CASE_FLT_FN (BUILT_IN_COS): | |
636 | seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; | |
637 | break; | |
638 | ||
639 | CASE_FLT_FN (BUILT_IN_SIN): | |
640 | seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; | |
641 | break; | |
642 | ||
643 | CASE_FLT_FN (BUILT_IN_CEXPI): | |
644 | seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; | |
645 | break; | |
646 | ||
647 | default:; | |
648 | } | |
649 | } | |
650 | ||
651 | if (seen_cos + seen_sin + seen_cexpi <= 1) | |
652 | { | |
726a989a | 653 | VEC_free(gimple, heap, stmts); |
2f397a93 RG |
654 | return; |
655 | } | |
656 | ||
657 | /* Simply insert cexpi at the beginning of top_bb but not earlier than | |
658 | the name def statement. */ | |
659 | fndecl = mathfn_built_in (type, BUILT_IN_CEXPI); | |
660 | if (!fndecl) | |
661 | return; | |
662 | res = make_rename_temp (TREE_TYPE (TREE_TYPE (fndecl)), "sincostmp"); | |
726a989a RB |
663 | stmt = gimple_build_call (fndecl, 1, name); |
664 | gimple_call_set_lhs (stmt, res); | |
665 | ||
2f397a93 | 666 | def_stmt = SSA_NAME_DEF_STMT (name); |
59805c3b | 667 | if (!SSA_NAME_IS_DEFAULT_DEF (name) |
726a989a RB |
668 | && gimple_code (def_stmt) != GIMPLE_PHI |
669 | && gimple_bb (def_stmt) == top_bb) | |
2f397a93 | 670 | { |
726a989a RB |
671 | gsi = gsi_for_stmt (def_stmt); |
672 | gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); | |
2f397a93 RG |
673 | } |
674 | else | |
675 | { | |
726a989a RB |
676 | gsi = gsi_after_labels (top_bb); |
677 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
2f397a93 RG |
678 | } |
679 | update_stmt (stmt); | |
680 | ||
681 | /* And adjust the recorded old call sites. */ | |
726a989a | 682 | for (i = 0; VEC_iterate(gimple, stmts, i, use_stmt); ++i) |
2f397a93 | 683 | { |
726a989a RB |
684 | tree rhs = NULL; |
685 | fndecl = gimple_call_fndecl (use_stmt); | |
686 | ||
2f397a93 RG |
687 | switch (DECL_FUNCTION_CODE (fndecl)) |
688 | { | |
689 | CASE_FLT_FN (BUILT_IN_COS): | |
726a989a | 690 | rhs = fold_build1 (REALPART_EXPR, type, res); |
2f397a93 RG |
691 | break; |
692 | ||
693 | CASE_FLT_FN (BUILT_IN_SIN): | |
726a989a | 694 | rhs = fold_build1 (IMAGPART_EXPR, type, res); |
2f397a93 RG |
695 | break; |
696 | ||
697 | CASE_FLT_FN (BUILT_IN_CEXPI): | |
726a989a | 698 | rhs = res; |
2f397a93 RG |
699 | break; |
700 | ||
701 | default:; | |
702 | gcc_unreachable (); | |
703 | } | |
704 | ||
726a989a RB |
705 | /* Replace call with a copy. */ |
706 | stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs); | |
707 | ||
708 | gsi = gsi_for_stmt (use_stmt); | |
709 | gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); | |
710 | gsi_remove (&gsi, true); | |
2f397a93 RG |
711 | } |
712 | ||
726a989a | 713 | VEC_free(gimple, heap, stmts); |
2f397a93 RG |
714 | } |
715 | ||
716 | /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1 | |
717 | on the SSA_NAME argument of each of them. */ | |
718 | ||
719 | static unsigned int | |
720 | execute_cse_sincos (void) | |
721 | { | |
722 | basic_block bb; | |
723 | ||
724 | calculate_dominance_info (CDI_DOMINATORS); | |
725 | ||
726 | FOR_EACH_BB (bb) | |
727 | { | |
726a989a | 728 | gimple_stmt_iterator gsi; |
2f397a93 | 729 | |
726a989a | 730 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
2f397a93 | 731 | { |
726a989a | 732 | gimple stmt = gsi_stmt (gsi); |
2f397a93 RG |
733 | tree fndecl; |
734 | ||
726a989a RB |
735 | if (is_gimple_call (stmt) |
736 | && gimple_call_lhs (stmt) | |
737 | && (fndecl = gimple_call_fndecl (stmt)) | |
2f397a93 RG |
738 | && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL) |
739 | { | |
740 | tree arg; | |
741 | ||
742 | switch (DECL_FUNCTION_CODE (fndecl)) | |
743 | { | |
744 | CASE_FLT_FN (BUILT_IN_COS): | |
745 | CASE_FLT_FN (BUILT_IN_SIN): | |
746 | CASE_FLT_FN (BUILT_IN_CEXPI): | |
726a989a | 747 | arg = gimple_call_arg (stmt, 0); |
2f397a93 RG |
748 | if (TREE_CODE (arg) == SSA_NAME) |
749 | execute_cse_sincos_1 (arg); | |
750 | break; | |
751 | ||
752 | default:; | |
753 | } | |
754 | } | |
755 | } | |
756 | } | |
757 | ||
758 | free_dominance_info (CDI_DOMINATORS); | |
759 | return 0; | |
760 | } | |
761 | ||
762 | static bool | |
763 | gate_cse_sincos (void) | |
764 | { | |
765 | /* Make sure we have either sincos or cexp. */ | |
766 | return (TARGET_HAS_SINCOS | |
767 | || TARGET_C99_FUNCTIONS) | |
768 | && optimize; | |
769 | } | |
770 | ||
8ddbbcae | 771 | struct gimple_opt_pass pass_cse_sincos = |
2f397a93 | 772 | { |
8ddbbcae JH |
773 | { |
774 | GIMPLE_PASS, | |
2f397a93 RG |
775 | "sincos", /* name */ |
776 | gate_cse_sincos, /* gate */ | |
777 | execute_cse_sincos, /* execute */ | |
778 | NULL, /* sub */ | |
779 | NULL, /* next */ | |
780 | 0, /* static_pass_number */ | |
781 | 0, /* tv_id */ | |
782 | PROP_ssa, /* properties_required */ | |
783 | 0, /* properties_provided */ | |
784 | 0, /* properties_destroyed */ | |
785 | 0, /* todo_flags_start */ | |
786 | TODO_dump_func | TODO_update_ssa | TODO_verify_ssa | |
8ddbbcae JH |
787 | | TODO_verify_stmts /* todo_flags_finish */ |
788 | } | |
2f397a93 | 789 | }; |
6b889d89 UB |
790 | |
791 | /* Find all expressions in the form of sqrt(a/b) and | |
792 | convert them to rsqrt(b/a). */ | |
793 | ||
794 | static unsigned int | |
795 | execute_convert_to_rsqrt (void) | |
796 | { | |
797 | basic_block bb; | |
798 | ||
799 | FOR_EACH_BB (bb) | |
800 | { | |
726a989a | 801 | gimple_stmt_iterator gsi; |
6b889d89 | 802 | |
726a989a | 803 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
6b889d89 | 804 | { |
726a989a | 805 | gimple stmt = gsi_stmt (gsi); |
6b889d89 UB |
806 | tree fndecl; |
807 | ||
726a989a RB |
808 | if (is_gimple_call (stmt) |
809 | && gimple_call_lhs (stmt) | |
810 | && (fndecl = gimple_call_fndecl (stmt)) | |
6b889d89 UB |
811 | && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL |
812 | || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)) | |
813 | { | |
814 | enum built_in_function code; | |
ac10986f | 815 | bool md_code; |
6b889d89 | 816 | tree arg1; |
726a989a | 817 | gimple stmt1; |
6b889d89 UB |
818 | |
819 | code = DECL_FUNCTION_CODE (fndecl); | |
ac10986f UB |
820 | md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD; |
821 | ||
822 | fndecl = targetm.builtin_reciprocal (code, md_code, true); | |
6b889d89 UB |
823 | if (!fndecl) |
824 | continue; | |
825 | ||
726a989a | 826 | arg1 = gimple_call_arg (stmt, 0); |
ac10986f UB |
827 | |
828 | if (TREE_CODE (arg1) != SSA_NAME) | |
829 | continue; | |
830 | ||
6b889d89 UB |
831 | stmt1 = SSA_NAME_DEF_STMT (arg1); |
832 | ||
726a989a RB |
833 | if (is_gimple_assign (stmt1) |
834 | && gimple_assign_rhs_code (stmt1) == RDIV_EXPR) | |
6b889d89 UB |
835 | { |
836 | tree arg10, arg11; | |
6b889d89 | 837 | |
726a989a RB |
838 | arg10 = gimple_assign_rhs1 (stmt1); |
839 | arg11 = gimple_assign_rhs2 (stmt1); | |
6b889d89 UB |
840 | |
841 | /* Swap operands of RDIV_EXPR. */ | |
726a989a RB |
842 | gimple_assign_set_rhs1 (stmt1, arg11); |
843 | gimple_assign_set_rhs2 (stmt1, arg10); | |
6b889d89 UB |
844 | fold_stmt_inplace (stmt1); |
845 | update_stmt (stmt1); | |
846 | ||
7c9577be | 847 | gimple_call_set_fndecl (stmt, fndecl); |
6b889d89 UB |
848 | update_stmt (stmt); |
849 | } | |
850 | } | |
851 | } | |
852 | } | |
853 | ||
854 | return 0; | |
855 | } | |
856 | ||
857 | static bool | |
858 | gate_convert_to_rsqrt (void) | |
859 | { | |
860 | return flag_unsafe_math_optimizations && optimize; | |
861 | } | |
862 | ||
8ddbbcae | 863 | struct gimple_opt_pass pass_convert_to_rsqrt = |
6b889d89 | 864 | { |
8ddbbcae JH |
865 | { |
866 | GIMPLE_PASS, | |
6b889d89 UB |
867 | "rsqrt", /* name */ |
868 | gate_convert_to_rsqrt, /* gate */ | |
869 | execute_convert_to_rsqrt, /* execute */ | |
870 | NULL, /* sub */ | |
871 | NULL, /* next */ | |
872 | 0, /* static_pass_number */ | |
873 | 0, /* tv_id */ | |
874 | PROP_ssa, /* properties_required */ | |
875 | 0, /* properties_provided */ | |
876 | 0, /* properties_destroyed */ | |
877 | 0, /* todo_flags_start */ | |
878 | TODO_dump_func | TODO_update_ssa | TODO_verify_ssa | |
8ddbbcae JH |
879 | | TODO_verify_stmts /* todo_flags_finish */ |
880 | } | |
6b889d89 | 881 | }; |