]>
Commit | Line | Data |
---|---|---|
4ee9c684 | 1 | /* Optimization of PHI nodes by converting them into straightline code. |
711789cc | 2 | Copyright (C) 2004-2013 Free Software Foundation, Inc. |
4ee9c684 | 3 | |
4 | This file is part of GCC. | |
20e5647c | 5 | |
4ee9c684 | 6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the | |
8c4c00c1 | 8 | Free Software Foundation; either version 3, or (at your option) any |
4ee9c684 | 9 | later version. |
20e5647c | 10 | |
4ee9c684 | 11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
20e5647c | 15 | |
4ee9c684 | 16 | You should have received a copy of the GNU General Public License |
8c4c00c1 | 17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
4ee9c684 | 19 | |
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "tm.h" | |
4ee9c684 | 24 | #include "ggc.h" |
25 | #include "tree.h" | |
0beac6fc | 26 | #include "flags.h" |
4ee9c684 | 27 | #include "tm_p.h" |
28 | #include "basic-block.h" | |
4ee9c684 | 29 | #include "tree-flow.h" |
30 | #include "tree-pass.h" | |
4ee9c684 | 31 | #include "langhooks.h" |
e6d0e152 | 32 | #include "pointer-set.h" |
33 | #include "domwalk.h" | |
ec611e12 | 34 | #include "cfgloop.h" |
35 | #include "tree-data-ref.h" | |
239e9670 | 36 | #include "gimple-pretty-print.h" |
37 | #include "insn-config.h" | |
38 | #include "expr.h" | |
39 | #include "optabs.h" | |
40 | ||
41 | #ifndef HAVE_conditional_move | |
42 | #define HAVE_conditional_move (0) | |
43 | #endif | |
4ee9c684 | 44 | |
75a70cf9 | 45 | static unsigned int tree_ssa_phiopt (void); |
239e9670 | 46 | static unsigned int tree_ssa_phiopt_worker (bool, bool); |
a4844041 | 47 | static bool conditional_replacement (basic_block, basic_block, |
75a70cf9 | 48 | edge, edge, gimple, tree, tree); |
fb9912ea | 49 | static int value_replacement (basic_block, basic_block, |
50 | edge, edge, gimple, tree, tree); | |
a4844041 | 51 | static bool minmax_replacement (basic_block, basic_block, |
75a70cf9 | 52 | edge, edge, gimple, tree, tree); |
a4844041 | 53 | static bool abs_replacement (basic_block, basic_block, |
75a70cf9 | 54 | edge, edge, gimple, tree, tree); |
e6d0e152 | 55 | static bool cond_store_replacement (basic_block, basic_block, edge, edge, |
56 | struct pointer_set_t *); | |
91cf53d5 | 57 | static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block); |
e6d0e152 | 58 | static struct pointer_set_t * get_non_trapping (void); |
75a70cf9 | 59 | static void replace_phi_edge_with_variable (basic_block, edge, gimple, tree); |
239e9670 | 60 | static void hoist_adjacent_loads (basic_block, basic_block, |
61 | basic_block, basic_block); | |
62 | static bool gate_hoist_loads (void); | |
902929aa | 63 | |
caac37c2 | 64 | /* This pass tries to replaces an if-then-else block with an |
65 | assignment. We have four kinds of transformations. Some of these | |
66 | transformations are also performed by the ifcvt RTL optimizer. | |
67 | ||
68 | Conditional Replacement | |
69 | ----------------------- | |
70 | ||
30c5ffd2 | 71 | This transformation, implemented in conditional_replacement, |
caac37c2 | 72 | replaces |
4ee9c684 | 73 | |
74 | bb0: | |
75 | if (cond) goto bb2; else goto bb1; | |
76 | bb1: | |
77 | bb2: | |
caac37c2 | 78 | x = PHI <0 (bb1), 1 (bb0), ...>; |
4ee9c684 | 79 | |
caac37c2 | 80 | with |
20e5647c | 81 | |
2ab0a163 | 82 | bb0: |
caac37c2 | 83 | x' = cond; |
84 | goto bb2; | |
2ab0a163 | 85 | bb2: |
caac37c2 | 86 | x = PHI <x' (bb0), ...>; |
4ee9c684 | 87 | |
caac37c2 | 88 | We remove bb1 as it becomes unreachable. This occurs often due to |
89 | gimplification of conditionals. | |
20e5647c | 90 | |
caac37c2 | 91 | Value Replacement |
92 | ----------------- | |
93 | ||
94 | This transformation, implemented in value_replacement, replaces | |
0beac6fc | 95 | |
96 | bb0: | |
caac37c2 | 97 | if (a != b) goto bb2; else goto bb1; |
0beac6fc | 98 | bb1: |
99 | bb2: | |
caac37c2 | 100 | x = PHI <a (bb1), b (bb0), ...>; |
0beac6fc | 101 | |
caac37c2 | 102 | with |
0beac6fc | 103 | |
104 | bb0: | |
0beac6fc | 105 | bb2: |
caac37c2 | 106 | x = PHI <b (bb0), ...>; |
107 | ||
108 | This opportunity can sometimes occur as a result of other | |
109 | optimizations. | |
0beac6fc | 110 | |
caac37c2 | 111 | ABS Replacement |
112 | --------------- | |
70512b93 | 113 | |
caac37c2 | 114 | This transformation, implemented in abs_replacement, replaces |
70512b93 | 115 | |
116 | bb0: | |
caac37c2 | 117 | if (a >= 0) goto bb2; else goto bb1; |
70512b93 | 118 | bb1: |
caac37c2 | 119 | x = -a; |
70512b93 | 120 | bb2: |
caac37c2 | 121 | x = PHI <x (bb1), a (bb0), ...>; |
70512b93 | 122 | |
caac37c2 | 123 | with |
70512b93 | 124 | |
125 | bb0: | |
caac37c2 | 126 | x' = ABS_EXPR< a >; |
70512b93 | 127 | bb2: |
caac37c2 | 128 | x = PHI <x' (bb0), ...>; |
129 | ||
130 | MIN/MAX Replacement | |
131 | ------------------- | |
70512b93 | 132 | |
caac37c2 | 133 | This transformation, minmax_replacement replaces |
194899bf | 134 | |
135 | bb0: | |
caac37c2 | 136 | if (a <= b) goto bb2; else goto bb1; |
194899bf | 137 | bb1: |
194899bf | 138 | bb2: |
caac37c2 | 139 | x = PHI <b (bb1), a (bb0), ...>; |
194899bf | 140 | |
caac37c2 | 141 | with |
194899bf | 142 | |
caac37c2 | 143 | bb0: |
144 | x' = MIN_EXPR (a, b) | |
145 | bb2: | |
146 | x = PHI <x' (bb0), ...>; | |
194899bf | 147 | |
239e9670 | 148 | A similar transformation is done for MAX_EXPR. |
149 | ||
150 | ||
151 | This pass also performs a fifth transformation of a slightly different | |
152 | flavor. | |
153 | ||
154 | Adjacent Load Hoisting | |
155 | ---------------------- | |
156 | ||
157 | This transformation replaces | |
158 | ||
159 | bb0: | |
160 | if (...) goto bb2; else goto bb1; | |
161 | bb1: | |
162 | x1 = (<expr>).field1; | |
163 | goto bb3; | |
164 | bb2: | |
165 | x2 = (<expr>).field2; | |
166 | bb3: | |
167 | # x = PHI <x1, x2>; | |
168 | ||
169 | with | |
170 | ||
171 | bb0: | |
172 | x1 = (<expr>).field1; | |
173 | x2 = (<expr>).field2; | |
174 | if (...) goto bb2; else goto bb1; | |
175 | bb1: | |
176 | goto bb3; | |
177 | bb2: | |
178 | bb3: | |
179 | # x = PHI <x1, x2>; | |
180 | ||
181 | The purpose of this transformation is to enable generation of conditional | |
182 | move instructions such as Intel CMOVE or PowerPC ISEL. Because one of | |
183 | the loads is speculative, the transformation is restricted to very | |
184 | specific cases to avoid introducing a page fault. We are looking for | |
185 | the common idiom: | |
186 | ||
187 | if (...) | |
188 | x = y->left; | |
189 | else | |
190 | x = y->right; | |
191 | ||
192 | where left and right are typically adjacent pointers in a tree structure. */ | |
70512b93 | 193 | |
2a1990e9 | 194 | static unsigned int |
4ee9c684 | 195 | tree_ssa_phiopt (void) |
e6d0e152 | 196 | { |
239e9670 | 197 | return tree_ssa_phiopt_worker (false, gate_hoist_loads ()); |
e6d0e152 | 198 | } |
199 | ||
200 | /* This pass tries to transform conditional stores into unconditional | |
201 | ones, enabling further simplifications with the simpler then and else | |
202 | blocks. In particular it replaces this: | |
203 | ||
204 | bb0: | |
205 | if (cond) goto bb2; else goto bb1; | |
206 | bb1: | |
91cf53d5 | 207 | *p = RHS; |
e6d0e152 | 208 | bb2: |
209 | ||
210 | with | |
211 | ||
212 | bb0: | |
213 | if (cond) goto bb1; else goto bb2; | |
214 | bb1: | |
215 | condtmp' = *p; | |
216 | bb2: | |
217 | condtmp = PHI <RHS, condtmp'> | |
91cf53d5 | 218 | *p = condtmp; |
e6d0e152 | 219 | |
220 | This transformation can only be done under several constraints, | |
91cf53d5 | 221 | documented below. It also replaces: |
222 | ||
223 | bb0: | |
224 | if (cond) goto bb2; else goto bb1; | |
225 | bb1: | |
226 | *p = RHS1; | |
227 | goto bb3; | |
228 | bb2: | |
229 | *p = RHS2; | |
230 | bb3: | |
231 | ||
232 | with | |
233 | ||
234 | bb0: | |
235 | if (cond) goto bb3; else goto bb1; | |
236 | bb1: | |
237 | bb3: | |
238 | condtmp = PHI <RHS1, RHS2> | |
239 | *p = condtmp; */ | |
e6d0e152 | 240 | |
241 | static unsigned int | |
242 | tree_ssa_cs_elim (void) | |
243 | { | |
239e9670 | 244 | return tree_ssa_phiopt_worker (true, false); |
e6d0e152 | 245 | } |
246 | ||
c3597b05 | 247 | /* Return the singleton PHI in the SEQ of PHIs for edges E0 and E1. */ |
248 | ||
249 | static gimple | |
250 | single_non_singleton_phi_for_edges (gimple_seq seq, edge e0, edge e1) | |
251 | { | |
252 | gimple_stmt_iterator i; | |
253 | gimple phi = NULL; | |
254 | if (gimple_seq_singleton_p (seq)) | |
255 | return gsi_stmt (gsi_start (seq)); | |
256 | for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (&i)) | |
257 | { | |
258 | gimple p = gsi_stmt (i); | |
259 | /* If the PHI arguments are equal then we can skip this PHI. */ | |
260 | if (operand_equal_for_phi_arg_p (gimple_phi_arg_def (p, e0->dest_idx), | |
261 | gimple_phi_arg_def (p, e1->dest_idx))) | |
262 | continue; | |
263 | ||
264 | /* If we already have a PHI that has the two edge arguments are | |
265 | different, then return it is not a singleton for these PHIs. */ | |
266 | if (phi) | |
267 | return NULL; | |
268 | ||
269 | phi = p; | |
270 | } | |
271 | return phi; | |
272 | } | |
273 | ||
e6d0e152 | 274 | /* The core routine of conditional store replacement and normal |
275 | phi optimizations. Both share much of the infrastructure in how | |
276 | to match applicable basic block patterns. DO_STORE_ELIM is true | |
239e9670 | 277 | when we want to do conditional store replacement, false otherwise. |
278 | DO_HOIST_LOADS is true when we want to hoist adjacent loads out | |
279 | of diamond control flow patterns, false otherwise. */ | |
e6d0e152 | 280 | static unsigned int |
239e9670 | 281 | tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads) |
4ee9c684 | 282 | { |
283 | basic_block bb; | |
194899bf | 284 | basic_block *bb_order; |
285 | unsigned n, i; | |
1e4b21e3 | 286 | bool cfgchanged = false; |
e6d0e152 | 287 | struct pointer_set_t *nontrap = 0; |
288 | ||
289 | if (do_store_elim) | |
03d37e4e | 290 | /* Calculate the set of non-trapping memory accesses. */ |
291 | nontrap = get_non_trapping (); | |
194899bf | 292 | |
293 | /* Search every basic block for COND_EXPR we may be able to optimize. | |
294 | ||
295 | We walk the blocks in order that guarantees that a block with | |
296 | a single predecessor is processed before the predecessor. | |
297 | This ensures that we collapse inner ifs before visiting the | |
298 | outer ones, and also that we do not try to visit a removed | |
299 | block. */ | |
300 | bb_order = blocks_in_phiopt_order (); | |
4d2e5d52 | 301 | n = n_basic_blocks - NUM_FIXED_BLOCKS; |
4ee9c684 | 302 | |
48e1416a | 303 | for (i = 0; i < n; i++) |
4ee9c684 | 304 | { |
75a70cf9 | 305 | gimple cond_stmt, phi; |
33784d89 | 306 | basic_block bb1, bb2; |
307 | edge e1, e2; | |
194899bf | 308 | tree arg0, arg1; |
309 | ||
310 | bb = bb_order[i]; | |
20e5647c | 311 | |
75a70cf9 | 312 | cond_stmt = last_stmt (bb); |
313 | /* Check to see if the last statement is a GIMPLE_COND. */ | |
314 | if (!cond_stmt | |
315 | || gimple_code (cond_stmt) != GIMPLE_COND) | |
33784d89 | 316 | continue; |
20e5647c | 317 | |
33784d89 | 318 | e1 = EDGE_SUCC (bb, 0); |
319 | bb1 = e1->dest; | |
320 | e2 = EDGE_SUCC (bb, 1); | |
321 | bb2 = e2->dest; | |
20e5647c | 322 | |
33784d89 | 323 | /* We cannot do the optimization on abnormal edges. */ |
324 | if ((e1->flags & EDGE_ABNORMAL) != 0 | |
325 | || (e2->flags & EDGE_ABNORMAL) != 0) | |
326 | continue; | |
20e5647c | 327 | |
33784d89 | 328 | /* If either bb1's succ or bb2 or bb2's succ is non NULL. */ |
ea091dfd | 329 | if (EDGE_COUNT (bb1->succs) == 0 |
33784d89 | 330 | || bb2 == NULL |
ea091dfd | 331 | || EDGE_COUNT (bb2->succs) == 0) |
33784d89 | 332 | continue; |
20e5647c | 333 | |
33784d89 | 334 | /* Find the bb which is the fall through to the other. */ |
335 | if (EDGE_SUCC (bb1, 0)->dest == bb2) | |
336 | ; | |
337 | else if (EDGE_SUCC (bb2, 0)->dest == bb1) | |
338 | { | |
339 | basic_block bb_tmp = bb1; | |
340 | edge e_tmp = e1; | |
341 | bb1 = bb2; | |
342 | bb2 = bb_tmp; | |
343 | e1 = e2; | |
344 | e2 = e_tmp; | |
345 | } | |
91cf53d5 | 346 | else if (do_store_elim |
347 | && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest) | |
348 | { | |
349 | basic_block bb3 = EDGE_SUCC (bb1, 0)->dest; | |
350 | ||
351 | if (!single_succ_p (bb1) | |
352 | || (EDGE_SUCC (bb1, 0)->flags & EDGE_FALLTHRU) == 0 | |
353 | || !single_succ_p (bb2) | |
354 | || (EDGE_SUCC (bb2, 0)->flags & EDGE_FALLTHRU) == 0 | |
355 | || EDGE_COUNT (bb3->preds) != 2) | |
356 | continue; | |
357 | if (cond_if_else_store_replacement (bb1, bb2, bb3)) | |
358 | cfgchanged = true; | |
359 | continue; | |
360 | } | |
239e9670 | 361 | else if (do_hoist_loads |
362 | && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest) | |
363 | { | |
364 | basic_block bb3 = EDGE_SUCC (bb1, 0)->dest; | |
365 | ||
366 | if (!FLOAT_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond_stmt))) | |
367 | && single_succ_p (bb1) | |
368 | && single_succ_p (bb2) | |
369 | && single_pred_p (bb1) | |
370 | && single_pred_p (bb2) | |
371 | && EDGE_COUNT (bb->succs) == 2 | |
372 | && EDGE_COUNT (bb3->preds) == 2 | |
373 | /* If one edge or the other is dominant, a conditional move | |
374 | is likely to perform worse than the well-predicted branch. */ | |
375 | && !predictable_edge_p (EDGE_SUCC (bb, 0)) | |
376 | && !predictable_edge_p (EDGE_SUCC (bb, 1))) | |
377 | hoist_adjacent_loads (bb, bb1, bb2, bb3); | |
378 | continue; | |
379 | } | |
33784d89 | 380 | else |
91cf53d5 | 381 | continue; |
20e5647c | 382 | |
33784d89 | 383 | e1 = EDGE_SUCC (bb1, 0); |
20e5647c | 384 | |
33784d89 | 385 | /* Make sure that bb1 is just a fall through. */ |
db5ba14c | 386 | if (!single_succ_p (bb1) |
33784d89 | 387 | || (e1->flags & EDGE_FALLTHRU) == 0) |
388 | continue; | |
20e5647c | 389 | |
3472707f | 390 | /* Also make sure that bb1 only have one predecessor and that it |
391 | is bb. */ | |
ea091dfd | 392 | if (!single_pred_p (bb1) |
393 | || single_pred (bb1) != bb) | |
33784d89 | 394 | continue; |
20e5647c | 395 | |
e6d0e152 | 396 | if (do_store_elim) |
397 | { | |
398 | /* bb1 is the middle block, bb2 the join block, bb the split block, | |
399 | e1 the fallthrough edge from bb1 to bb2. We can't do the | |
400 | optimization if the join block has more than two predecessors. */ | |
401 | if (EDGE_COUNT (bb2->preds) > 2) | |
402 | continue; | |
403 | if (cond_store_replacement (bb1, bb2, e1, e2, nontrap)) | |
404 | cfgchanged = true; | |
405 | } | |
406 | else | |
407 | { | |
75a70cf9 | 408 | gimple_seq phis = phi_nodes (bb2); |
2109076a | 409 | gimple_stmt_iterator gsi; |
fb9912ea | 410 | bool candorest = true; |
c3597b05 | 411 | |
fb9912ea | 412 | /* Value replacement can work with more than one PHI |
413 | so try that first. */ | |
414 | for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi)) | |
415 | { | |
416 | phi = gsi_stmt (gsi); | |
417 | arg0 = gimple_phi_arg_def (phi, e1->dest_idx); | |
418 | arg1 = gimple_phi_arg_def (phi, e2->dest_idx); | |
419 | if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1) == 2) | |
420 | { | |
421 | candorest = false; | |
422 | cfgchanged = true; | |
423 | break; | |
424 | } | |
425 | } | |
e6d0e152 | 426 | |
fb9912ea | 427 | if (!candorest) |
428 | continue; | |
c3597b05 | 429 | |
430 | phi = single_non_singleton_phi_for_edges (phis, e1, e2); | |
2109076a | 431 | if (!phi) |
e6d0e152 | 432 | continue; |
433 | ||
75a70cf9 | 434 | arg0 = gimple_phi_arg_def (phi, e1->dest_idx); |
435 | arg1 = gimple_phi_arg_def (phi, e2->dest_idx); | |
e6d0e152 | 436 | |
437 | /* Something is wrong if we cannot find the arguments in the PHI | |
438 | node. */ | |
439 | gcc_assert (arg0 != NULL && arg1 != NULL); | |
440 | ||
441 | /* Do the replacement of conditional if it can be done. */ | |
442 | if (conditional_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) | |
443 | cfgchanged = true; | |
e6d0e152 | 444 | else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) |
445 | cfgchanged = true; | |
446 | else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) | |
447 | cfgchanged = true; | |
448 | } | |
194899bf | 449 | } |
450 | ||
451 | free (bb_order); | |
48e1416a | 452 | |
e6d0e152 | 453 | if (do_store_elim) |
454 | pointer_set_destroy (nontrap); | |
455 | /* If the CFG has changed, we should cleanup the CFG. */ | |
456 | if (cfgchanged && do_store_elim) | |
457 | { | |
458 | /* In cond-store replacement we have added some loads on edges | |
459 | and new VOPS (as we moved the store, and created a load). */ | |
75a70cf9 | 460 | gsi_commit_edge_inserts (); |
e6d0e152 | 461 | return TODO_cleanup_cfg | TODO_update_ssa_only_virtuals; |
462 | } | |
463 | else if (cfgchanged) | |
464 | return TODO_cleanup_cfg; | |
465 | return 0; | |
194899bf | 466 | } |
467 | ||
468 | /* Returns the list of basic blocks in the function in an order that guarantees | |
469 | that if a block X has just a single predecessor Y, then Y is after X in the | |
470 | ordering. */ | |
471 | ||
8530c7be | 472 | basic_block * |
194899bf | 473 | blocks_in_phiopt_order (void) |
474 | { | |
475 | basic_block x, y; | |
945865c5 | 476 | basic_block *order = XNEWVEC (basic_block, n_basic_blocks); |
48e1416a | 477 | unsigned n = n_basic_blocks - NUM_FIXED_BLOCKS; |
4d2e5d52 | 478 | unsigned np, i; |
48e1416a | 479 | sbitmap visited = sbitmap_alloc (last_basic_block); |
194899bf | 480 | |
08b7917c | 481 | #define MARK_VISITED(BB) (bitmap_set_bit (visited, (BB)->index)) |
482 | #define VISITED_P(BB) (bitmap_bit_p (visited, (BB)->index)) | |
194899bf | 483 | |
53c5d9d4 | 484 | bitmap_clear (visited); |
194899bf | 485 | |
486 | MARK_VISITED (ENTRY_BLOCK_PTR); | |
487 | FOR_EACH_BB (x) | |
488 | { | |
489 | if (VISITED_P (x)) | |
490 | continue; | |
491 | ||
492 | /* Walk the predecessors of x as long as they have precisely one | |
493 | predecessor and add them to the list, so that they get stored | |
494 | after x. */ | |
495 | for (y = x, np = 1; | |
496 | single_pred_p (y) && !VISITED_P (single_pred (y)); | |
497 | y = single_pred (y)) | |
498 | np++; | |
499 | for (y = x, i = n - np; | |
500 | single_pred_p (y) && !VISITED_P (single_pred (y)); | |
501 | y = single_pred (y), i++) | |
502 | { | |
503 | order[i] = y; | |
504 | MARK_VISITED (y); | |
2ab0a163 | 505 | } |
194899bf | 506 | order[i] = y; |
507 | MARK_VISITED (y); | |
508 | ||
509 | gcc_assert (i == n - 1); | |
510 | n -= np; | |
4ee9c684 | 511 | } |
194899bf | 512 | |
513 | sbitmap_free (visited); | |
514 | gcc_assert (n == 0); | |
515 | return order; | |
516 | ||
517 | #undef MARK_VISITED | |
518 | #undef VISITED_P | |
4ee9c684 | 519 | } |
520 | ||
fccee353 | 521 | /* Replace PHI node element whose edge is E in block BB with variable NEW. |
33784d89 | 522 | Remove the edge from COND_BLOCK which does not lead to BB (COND_BLOCK |
902929aa | 523 | is known to have two edges, one of which must reach BB). */ |
524 | ||
525 | static void | |
a4844041 | 526 | replace_phi_edge_with_variable (basic_block cond_block, |
75a70cf9 | 527 | edge e, gimple phi, tree new_tree) |
902929aa | 528 | { |
75a70cf9 | 529 | basic_block bb = gimple_bb (phi); |
0e1a77e1 | 530 | basic_block block_to_remove; |
75a70cf9 | 531 | gimple_stmt_iterator gsi; |
33784d89 | 532 | |
20e5647c | 533 | /* Change the PHI argument to new. */ |
f0d6e81c | 534 | SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree); |
0e1a77e1 | 535 | |
0e1a77e1 | 536 | /* Remove the empty basic block. */ |
cd665a06 | 537 | if (EDGE_SUCC (cond_block, 0)->dest == bb) |
902929aa | 538 | { |
cd665a06 | 539 | EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU; |
540 | EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); | |
81c5be57 | 541 | EDGE_SUCC (cond_block, 0)->probability = REG_BR_PROB_BASE; |
542 | EDGE_SUCC (cond_block, 0)->count += EDGE_SUCC (cond_block, 1)->count; | |
0e1a77e1 | 543 | |
cd665a06 | 544 | block_to_remove = EDGE_SUCC (cond_block, 1)->dest; |
902929aa | 545 | } |
546 | else | |
547 | { | |
cd665a06 | 548 | EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU; |
549 | EDGE_SUCC (cond_block, 1)->flags | |
902929aa | 550 | &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); |
81c5be57 | 551 | EDGE_SUCC (cond_block, 1)->probability = REG_BR_PROB_BASE; |
552 | EDGE_SUCC (cond_block, 1)->count += EDGE_SUCC (cond_block, 0)->count; | |
0e1a77e1 | 553 | |
cd665a06 | 554 | block_to_remove = EDGE_SUCC (cond_block, 0)->dest; |
902929aa | 555 | } |
0e1a77e1 | 556 | delete_basic_block (block_to_remove); |
20e5647c | 557 | |
902929aa | 558 | /* Eliminate the COND_EXPR at the end of COND_BLOCK. */ |
75a70cf9 | 559 | gsi = gsi_last_bb (cond_block); |
560 | gsi_remove (&gsi, true); | |
20e5647c | 561 | |
902929aa | 562 | if (dump_file && (dump_flags & TDF_DETAILS)) |
563 | fprintf (dump_file, | |
564 | "COND_EXPR in block %d and PHI in block %d converted to straightline code.\n", | |
565 | cond_block->index, | |
566 | bb->index); | |
567 | } | |
568 | ||
569 | /* The function conditional_replacement does the main work of doing the | |
570 | conditional replacement. Return true if the replacement is done. | |
571 | Otherwise return false. | |
572 | BB is the basic block where the replacement is going to be done on. ARG0 | |
dac49aa5 | 573 | is argument 0 from PHI. Likewise for ARG1. */ |
902929aa | 574 | |
575 | static bool | |
33784d89 | 576 | conditional_replacement (basic_block cond_bb, basic_block middle_bb, |
75a70cf9 | 577 | edge e0, edge e1, gimple phi, |
33784d89 | 578 | tree arg0, tree arg1) |
902929aa | 579 | { |
580 | tree result; | |
75a70cf9 | 581 | gimple stmt, new_stmt; |
582 | tree cond; | |
583 | gimple_stmt_iterator gsi; | |
902929aa | 584 | edge true_edge, false_edge; |
75a70cf9 | 585 | tree new_var, new_var2; |
678919fd | 586 | bool neg; |
902929aa | 587 | |
435e1a75 | 588 | /* FIXME: Gimplification of complex type is too hard for now. */ |
47b88316 | 589 | /* We aren't prepared to handle vectors either (and it is a question |
590 | if it would be worthwhile anyway). */ | |
591 | if (!(INTEGRAL_TYPE_P (TREE_TYPE (arg0)) | |
592 | || POINTER_TYPE_P (TREE_TYPE (arg0))) | |
593 | || !(INTEGRAL_TYPE_P (TREE_TYPE (arg1)) | |
594 | || POINTER_TYPE_P (TREE_TYPE (arg1)))) | |
435e1a75 | 595 | return false; |
596 | ||
678919fd | 597 | /* The PHI arguments have the constants 0 and 1, or 0 and -1, then |
598 | convert it to the conditional. */ | |
902929aa | 599 | if ((integer_zerop (arg0) && integer_onep (arg1)) |
600 | || (integer_zerop (arg1) && integer_onep (arg0))) | |
678919fd | 601 | neg = false; |
602 | else if ((integer_zerop (arg0) && integer_all_onesp (arg1)) | |
603 | || (integer_zerop (arg1) && integer_all_onesp (arg0))) | |
604 | neg = true; | |
902929aa | 605 | else |
606 | return false; | |
20e5647c | 607 | |
33784d89 | 608 | if (!empty_block_p (middle_bb)) |
902929aa | 609 | return false; |
20e5647c | 610 | |
75a70cf9 | 611 | /* At this point we know we have a GIMPLE_COND with two successors. |
2ab0a163 | 612 | One successor is BB, the other successor is an empty block which |
613 | falls through into BB. | |
20e5647c | 614 | |
2ab0a163 | 615 | There is a single PHI node at the join point (BB) and its arguments |
678919fd | 616 | are constants (0, 1) or (0, -1). |
20e5647c | 617 | |
2ab0a163 | 618 | So, given the condition COND, and the two PHI arguments, we can |
20e5647c | 619 | rewrite this PHI into non-branching code: |
620 | ||
2ab0a163 | 621 | dest = (COND) or dest = COND' |
20e5647c | 622 | |
2ab0a163 | 623 | We use the condition as-is if the argument associated with the |
624 | true edge has the value one or the argument associated with the | |
625 | false edge as the value zero. Note that those conditions are not | |
75a70cf9 | 626 | the same since only one of the outgoing edges from the GIMPLE_COND |
2ab0a163 | 627 | will directly reach BB and thus be associated with an argument. */ |
ae5a4794 | 628 | |
75a70cf9 | 629 | stmt = last_stmt (cond_bb); |
630 | result = PHI_RESULT (phi); | |
b2a02a0e | 631 | |
75a70cf9 | 632 | /* To handle special cases like floating point comparison, it is easier and |
633 | less error-prone to build a tree and gimplify it on the fly though it is | |
634 | less efficient. */ | |
6f9714b3 | 635 | cond = fold_build2_loc (gimple_location (stmt), |
636 | gimple_cond_code (stmt), boolean_type_node, | |
637 | gimple_cond_lhs (stmt), gimple_cond_rhs (stmt)); | |
4ee9c684 | 638 | |
75a70cf9 | 639 | /* We need to know which is the true edge and which is the false |
640 | edge so that we know when to invert the condition below. */ | |
641 | extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); | |
642 | if ((e0 == true_edge && integer_zerop (arg0)) | |
678919fd | 643 | || (e0 == false_edge && !integer_zerop (arg0)) |
75a70cf9 | 644 | || (e1 == true_edge && integer_zerop (arg1)) |
678919fd | 645 | || (e1 == false_edge && !integer_zerop (arg1))) |
6f9714b3 | 646 | cond = fold_build1_loc (gimple_location (stmt), |
678919fd | 647 | TRUTH_NOT_EXPR, TREE_TYPE (cond), cond); |
648 | ||
649 | if (neg) | |
650 | { | |
651 | cond = fold_convert_loc (gimple_location (stmt), | |
652 | TREE_TYPE (result), cond); | |
653 | cond = fold_build1_loc (gimple_location (stmt), | |
654 | NEGATE_EXPR, TREE_TYPE (cond), cond); | |
655 | } | |
75a70cf9 | 656 | |
657 | /* Insert our new statements at the end of conditional block before the | |
658 | COND_STMT. */ | |
659 | gsi = gsi_for_stmt (stmt); | |
660 | new_var = force_gimple_operand_gsi (&gsi, cond, true, NULL, true, | |
661 | GSI_SAME_STMT); | |
662 | ||
663 | if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (new_var))) | |
664 | { | |
efbcb6de | 665 | source_location locus_0, locus_1; |
666 | ||
03d37e4e | 667 | new_var2 = make_ssa_name (TREE_TYPE (result), NULL); |
75a70cf9 | 668 | new_stmt = gimple_build_assign_with_ops (CONVERT_EXPR, new_var2, |
669 | new_var, NULL); | |
75a70cf9 | 670 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |
671 | new_var = new_var2; | |
efbcb6de | 672 | |
673 | /* Set the locus to the first argument, unless is doesn't have one. */ | |
674 | locus_0 = gimple_phi_arg_location (phi, 0); | |
675 | locus_1 = gimple_phi_arg_location (phi, 1); | |
676 | if (locus_0 == UNKNOWN_LOCATION) | |
677 | locus_0 = locus_1; | |
678 | gimple_set_location (new_stmt, locus_0); | |
4ee9c684 | 679 | } |
20e5647c | 680 | |
75a70cf9 | 681 | replace_phi_edge_with_variable (cond_bb, e1, phi, new_var); |
902929aa | 682 | |
4ee9c684 | 683 | /* Note that we optimized this PHI. */ |
684 | return true; | |
685 | } | |
686 | ||
17b9476e | 687 | /* Update *ARG which is defined in STMT so that it contains the |
688 | computed value if that seems profitable. Return true if the | |
689 | statement is made dead by that rewriting. */ | |
690 | ||
691 | static bool | |
692 | jump_function_from_stmt (tree *arg, gimple stmt) | |
693 | { | |
694 | enum tree_code code = gimple_assign_rhs_code (stmt); | |
695 | if (code == ADDR_EXPR) | |
696 | { | |
697 | /* For arg = &p->i transform it to p, if possible. */ | |
698 | tree rhs1 = gimple_assign_rhs1 (stmt); | |
699 | HOST_WIDE_INT offset; | |
700 | tree tem = get_addr_base_and_unit_offset (TREE_OPERAND (rhs1, 0), | |
701 | &offset); | |
702 | if (tem | |
703 | && TREE_CODE (tem) == MEM_REF | |
cf8f0e63 | 704 | && (mem_ref_offset (tem) + double_int::from_shwi (offset)).is_zero ()) |
17b9476e | 705 | { |
706 | *arg = TREE_OPERAND (tem, 0); | |
707 | return true; | |
708 | } | |
709 | } | |
710 | /* TODO: Much like IPA-CP jump-functions we want to handle constant | |
711 | additions symbolically here, and we'd need to update the comparison | |
712 | code that compares the arg + cst tuples in our caller. For now the | |
713 | code above exactly handles the VEC_BASE pattern from vec.h. */ | |
714 | return false; | |
715 | } | |
716 | ||
0beac6fc | 717 | /* The function value_replacement does the main work of doing the value |
fb9912ea | 718 | replacement. Return non-zero if the replacement is done. Otherwise return |
719 | 0. If we remove the middle basic block, return 2. | |
0beac6fc | 720 | BB is the basic block where the replacement is going to be done on. ARG0 |
dac49aa5 | 721 | is argument 0 from the PHI. Likewise for ARG1. */ |
0beac6fc | 722 | |
fb9912ea | 723 | static int |
33784d89 | 724 | value_replacement (basic_block cond_bb, basic_block middle_bb, |
75a70cf9 | 725 | edge e0, edge e1, gimple phi, |
33784d89 | 726 | tree arg0, tree arg1) |
0beac6fc | 727 | { |
17b9476e | 728 | gimple_stmt_iterator gsi; |
75a70cf9 | 729 | gimple cond; |
0beac6fc | 730 | edge true_edge, false_edge; |
75a70cf9 | 731 | enum tree_code code; |
fb9912ea | 732 | bool emtpy_or_with_defined_p = true; |
0beac6fc | 733 | |
734 | /* If the type says honor signed zeros we cannot do this | |
dac49aa5 | 735 | optimization. */ |
0beac6fc | 736 | if (HONOR_SIGNED_ZEROS (TYPE_MODE (TREE_TYPE (arg1)))) |
fb9912ea | 737 | return 0; |
0beac6fc | 738 | |
fb9912ea | 739 | /* If there is a statement in MIDDLE_BB that defines one of the PHI |
740 | arguments, then adjust arg0 or arg1. */ | |
17b9476e | 741 | gsi = gsi_after_labels (middle_bb); |
fb9912ea | 742 | if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi))) |
743 | gsi_next_nondebug (&gsi); | |
744 | while (!gsi_end_p (gsi)) | |
17b9476e | 745 | { |
fb9912ea | 746 | gimple stmt = gsi_stmt (gsi); |
747 | tree lhs; | |
748 | gsi_next_nondebug (&gsi); | |
749 | if (!is_gimple_assign (stmt)) | |
17b9476e | 750 | { |
fb9912ea | 751 | emtpy_or_with_defined_p = false; |
752 | continue; | |
17b9476e | 753 | } |
fb9912ea | 754 | /* Now try to adjust arg0 or arg1 according to the computation |
755 | in the statement. */ | |
756 | lhs = gimple_assign_lhs (stmt); | |
757 | if (!(lhs == arg0 | |
758 | && jump_function_from_stmt (&arg0, stmt)) | |
759 | || (lhs == arg1 | |
760 | && jump_function_from_stmt (&arg1, stmt))) | |
761 | emtpy_or_with_defined_p = false; | |
17b9476e | 762 | } |
0beac6fc | 763 | |
75a70cf9 | 764 | cond = last_stmt (cond_bb); |
765 | code = gimple_cond_code (cond); | |
0beac6fc | 766 | |
767 | /* This transformation is only valid for equality comparisons. */ | |
75a70cf9 | 768 | if (code != NE_EXPR && code != EQ_EXPR) |
fb9912ea | 769 | return 0; |
0beac6fc | 770 | |
771 | /* We need to know which is the true edge and which is the false | |
772 | edge so that we know if have abs or negative abs. */ | |
33784d89 | 773 | extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); |
0beac6fc | 774 | |
775 | /* At this point we know we have a COND_EXPR with two successors. | |
776 | One successor is BB, the other successor is an empty block which | |
777 | falls through into BB. | |
778 | ||
779 | The condition for the COND_EXPR is known to be NE_EXPR or EQ_EXPR. | |
780 | ||
781 | There is a single PHI node at the join point (BB) with two arguments. | |
782 | ||
783 | We now need to verify that the two arguments in the PHI node match | |
784 | the two arguments to the equality comparison. */ | |
20e5647c | 785 | |
75a70cf9 | 786 | if ((operand_equal_for_phi_arg_p (arg0, gimple_cond_lhs (cond)) |
787 | && operand_equal_for_phi_arg_p (arg1, gimple_cond_rhs (cond))) | |
788 | || (operand_equal_for_phi_arg_p (arg1, gimple_cond_lhs (cond)) | |
789 | && operand_equal_for_phi_arg_p (arg0, gimple_cond_rhs (cond)))) | |
0beac6fc | 790 | { |
791 | edge e; | |
792 | tree arg; | |
793 | ||
50737d20 | 794 | /* For NE_EXPR, we want to build an assignment result = arg where |
795 | arg is the PHI argument associated with the true edge. For | |
796 | EQ_EXPR we want the PHI argument associated with the false edge. */ | |
75a70cf9 | 797 | e = (code == NE_EXPR ? true_edge : false_edge); |
50737d20 | 798 | |
799 | /* Unfortunately, E may not reach BB (it may instead have gone to | |
800 | OTHER_BLOCK). If that is the case, then we want the single outgoing | |
801 | edge from OTHER_BLOCK which reaches BB and represents the desired | |
802 | path from COND_BLOCK. */ | |
33784d89 | 803 | if (e->dest == middle_bb) |
ea091dfd | 804 | e = single_succ_edge (e->dest); |
50737d20 | 805 | |
806 | /* Now we know the incoming edge to BB that has the argument for the | |
807 | RHS of our new assignment statement. */ | |
33784d89 | 808 | if (e0 == e) |
0beac6fc | 809 | arg = arg0; |
810 | else | |
811 | arg = arg1; | |
812 | ||
fb9912ea | 813 | /* If the middle basic block was empty or is defining the |
c3597b05 | 814 | PHI arguments and this is a single phi where the args are different |
815 | for the edges e0 and e1 then we can remove the middle basic block. */ | |
fb9912ea | 816 | if (emtpy_or_with_defined_p |
c3597b05 | 817 | && single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)), |
818 | e0, e1)) | |
fb9912ea | 819 | { |
820 | replace_phi_edge_with_variable (cond_bb, e1, phi, arg); | |
821 | /* Note that we optimized this PHI. */ | |
822 | return 2; | |
823 | } | |
824 | else | |
825 | { | |
826 | /* Replace the PHI arguments with arg. */ | |
827 | SET_PHI_ARG_DEF (phi, e0->dest_idx, arg); | |
828 | SET_PHI_ARG_DEF (phi, e1->dest_idx, arg); | |
829 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
830 | { | |
831 | fprintf (dump_file, "PHI "); | |
832 | print_generic_expr (dump_file, gimple_phi_result (phi), 0); | |
c3597b05 | 833 | fprintf (dump_file, " reduced for COND_EXPR in block %d to ", |
834 | cond_bb->index); | |
fb9912ea | 835 | print_generic_expr (dump_file, arg, 0); |
836 | fprintf (dump_file, ".\n"); | |
837 | } | |
838 | return 1; | |
839 | } | |
0beac6fc | 840 | |
0beac6fc | 841 | } |
fb9912ea | 842 | return 0; |
0beac6fc | 843 | } |
844 | ||
194899bf | 845 | /* The function minmax_replacement does the main work of doing the minmax |
846 | replacement. Return true if the replacement is done. Otherwise return | |
847 | false. | |
848 | BB is the basic block where the replacement is going to be done on. ARG0 | |
849 | is argument 0 from the PHI. Likewise for ARG1. */ | |
850 | ||
851 | static bool | |
852 | minmax_replacement (basic_block cond_bb, basic_block middle_bb, | |
75a70cf9 | 853 | edge e0, edge e1, gimple phi, |
194899bf | 854 | tree arg0, tree arg1) |
855 | { | |
856 | tree result, type; | |
75a70cf9 | 857 | gimple cond, new_stmt; |
194899bf | 858 | edge true_edge, false_edge; |
859 | enum tree_code cmp, minmax, ass_code; | |
860 | tree smaller, larger, arg_true, arg_false; | |
75a70cf9 | 861 | gimple_stmt_iterator gsi, gsi_from; |
194899bf | 862 | |
863 | type = TREE_TYPE (PHI_RESULT (phi)); | |
864 | ||
865 | /* The optimization may be unsafe due to NaNs. */ | |
866 | if (HONOR_NANS (TYPE_MODE (type))) | |
867 | return false; | |
868 | ||
75a70cf9 | 869 | cond = last_stmt (cond_bb); |
870 | cmp = gimple_cond_code (cond); | |
194899bf | 871 | |
872 | /* This transformation is only valid for order comparisons. Record which | |
873 | operand is smaller/larger if the result of the comparison is true. */ | |
874 | if (cmp == LT_EXPR || cmp == LE_EXPR) | |
875 | { | |
75a70cf9 | 876 | smaller = gimple_cond_lhs (cond); |
877 | larger = gimple_cond_rhs (cond); | |
194899bf | 878 | } |
879 | else if (cmp == GT_EXPR || cmp == GE_EXPR) | |
880 | { | |
75a70cf9 | 881 | smaller = gimple_cond_rhs (cond); |
882 | larger = gimple_cond_lhs (cond); | |
194899bf | 883 | } |
884 | else | |
885 | return false; | |
886 | ||
887 | /* We need to know which is the true edge and which is the false | |
888 | edge so that we know if have abs or negative abs. */ | |
889 | extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); | |
890 | ||
891 | /* Forward the edges over the middle basic block. */ | |
892 | if (true_edge->dest == middle_bb) | |
893 | true_edge = EDGE_SUCC (true_edge->dest, 0); | |
894 | if (false_edge->dest == middle_bb) | |
895 | false_edge = EDGE_SUCC (false_edge->dest, 0); | |
896 | ||
897 | if (true_edge == e0) | |
898 | { | |
899 | gcc_assert (false_edge == e1); | |
900 | arg_true = arg0; | |
901 | arg_false = arg1; | |
902 | } | |
903 | else | |
904 | { | |
905 | gcc_assert (false_edge == e0); | |
906 | gcc_assert (true_edge == e1); | |
907 | arg_true = arg1; | |
908 | arg_false = arg0; | |
909 | } | |
910 | ||
911 | if (empty_block_p (middle_bb)) | |
912 | { | |
913 | if (operand_equal_for_phi_arg_p (arg_true, smaller) | |
914 | && operand_equal_for_phi_arg_p (arg_false, larger)) | |
915 | { | |
916 | /* Case | |
48e1416a | 917 | |
194899bf | 918 | if (smaller < larger) |
919 | rslt = smaller; | |
920 | else | |
921 | rslt = larger; */ | |
922 | minmax = MIN_EXPR; | |
923 | } | |
924 | else if (operand_equal_for_phi_arg_p (arg_false, smaller) | |
925 | && operand_equal_for_phi_arg_p (arg_true, larger)) | |
926 | minmax = MAX_EXPR; | |
927 | else | |
928 | return false; | |
929 | } | |
930 | else | |
931 | { | |
932 | /* Recognize the following case, assuming d <= u: | |
933 | ||
934 | if (a <= u) | |
935 | b = MAX (a, d); | |
936 | x = PHI <b, u> | |
937 | ||
938 | This is equivalent to | |
939 | ||
940 | b = MAX (a, d); | |
941 | x = MIN (b, u); */ | |
942 | ||
75a70cf9 | 943 | gimple assign = last_and_only_stmt (middle_bb); |
944 | tree lhs, op0, op1, bound; | |
194899bf | 945 | |
946 | if (!assign | |
75a70cf9 | 947 | || gimple_code (assign) != GIMPLE_ASSIGN) |
194899bf | 948 | return false; |
949 | ||
75a70cf9 | 950 | lhs = gimple_assign_lhs (assign); |
951 | ass_code = gimple_assign_rhs_code (assign); | |
194899bf | 952 | if (ass_code != MAX_EXPR && ass_code != MIN_EXPR) |
953 | return false; | |
75a70cf9 | 954 | op0 = gimple_assign_rhs1 (assign); |
955 | op1 = gimple_assign_rhs2 (assign); | |
194899bf | 956 | |
957 | if (true_edge->src == middle_bb) | |
958 | { | |
959 | /* We got here if the condition is true, i.e., SMALLER < LARGER. */ | |
960 | if (!operand_equal_for_phi_arg_p (lhs, arg_true)) | |
961 | return false; | |
962 | ||
963 | if (operand_equal_for_phi_arg_p (arg_false, larger)) | |
964 | { | |
965 | /* Case | |
966 | ||
967 | if (smaller < larger) | |
968 | { | |
969 | r' = MAX_EXPR (smaller, bound) | |
970 | } | |
971 | r = PHI <r', larger> --> to be turned to MIN_EXPR. */ | |
972 | if (ass_code != MAX_EXPR) | |
973 | return false; | |
974 | ||
975 | minmax = MIN_EXPR; | |
976 | if (operand_equal_for_phi_arg_p (op0, smaller)) | |
977 | bound = op1; | |
978 | else if (operand_equal_for_phi_arg_p (op1, smaller)) | |
979 | bound = op0; | |
980 | else | |
981 | return false; | |
982 | ||
983 | /* We need BOUND <= LARGER. */ | |
49d00087 | 984 | if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node, |
985 | bound, larger))) | |
194899bf | 986 | return false; |
987 | } | |
988 | else if (operand_equal_for_phi_arg_p (arg_false, smaller)) | |
989 | { | |
990 | /* Case | |
991 | ||
992 | if (smaller < larger) | |
993 | { | |
994 | r' = MIN_EXPR (larger, bound) | |
995 | } | |
996 | r = PHI <r', smaller> --> to be turned to MAX_EXPR. */ | |
997 | if (ass_code != MIN_EXPR) | |
998 | return false; | |
999 | ||
1000 | minmax = MAX_EXPR; | |
1001 | if (operand_equal_for_phi_arg_p (op0, larger)) | |
1002 | bound = op1; | |
1003 | else if (operand_equal_for_phi_arg_p (op1, larger)) | |
1004 | bound = op0; | |
1005 | else | |
1006 | return false; | |
1007 | ||
1008 | /* We need BOUND >= SMALLER. */ | |
49d00087 | 1009 | if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node, |
1010 | bound, smaller))) | |
194899bf | 1011 | return false; |
1012 | } | |
1013 | else | |
1014 | return false; | |
1015 | } | |
1016 | else | |
1017 | { | |
1018 | /* We got here if the condition is false, i.e., SMALLER > LARGER. */ | |
1019 | if (!operand_equal_for_phi_arg_p (lhs, arg_false)) | |
1020 | return false; | |
1021 | ||
1022 | if (operand_equal_for_phi_arg_p (arg_true, larger)) | |
1023 | { | |
1024 | /* Case | |
1025 | ||
1026 | if (smaller > larger) | |
1027 | { | |
1028 | r' = MIN_EXPR (smaller, bound) | |
1029 | } | |
1030 | r = PHI <r', larger> --> to be turned to MAX_EXPR. */ | |
1031 | if (ass_code != MIN_EXPR) | |
1032 | return false; | |
1033 | ||
1034 | minmax = MAX_EXPR; | |
1035 | if (operand_equal_for_phi_arg_p (op0, smaller)) | |
1036 | bound = op1; | |
1037 | else if (operand_equal_for_phi_arg_p (op1, smaller)) | |
1038 | bound = op0; | |
1039 | else | |
1040 | return false; | |
1041 | ||
1042 | /* We need BOUND >= LARGER. */ | |
49d00087 | 1043 | if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node, |
1044 | bound, larger))) | |
194899bf | 1045 | return false; |
1046 | } | |
1047 | else if (operand_equal_for_phi_arg_p (arg_true, smaller)) | |
1048 | { | |
1049 | /* Case | |
1050 | ||
1051 | if (smaller > larger) | |
1052 | { | |
1053 | r' = MAX_EXPR (larger, bound) | |
1054 | } | |
1055 | r = PHI <r', smaller> --> to be turned to MIN_EXPR. */ | |
1056 | if (ass_code != MAX_EXPR) | |
1057 | return false; | |
1058 | ||
1059 | minmax = MIN_EXPR; | |
1060 | if (operand_equal_for_phi_arg_p (op0, larger)) | |
1061 | bound = op1; | |
1062 | else if (operand_equal_for_phi_arg_p (op1, larger)) | |
1063 | bound = op0; | |
1064 | else | |
1065 | return false; | |
1066 | ||
1067 | /* We need BOUND <= SMALLER. */ | |
49d00087 | 1068 | if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node, |
1069 | bound, smaller))) | |
194899bf | 1070 | return false; |
1071 | } | |
1072 | else | |
1073 | return false; | |
1074 | } | |
1075 | ||
1076 | /* Move the statement from the middle block. */ | |
75a70cf9 | 1077 | gsi = gsi_last_bb (cond_bb); |
445a6ba5 | 1078 | gsi_from = gsi_last_nondebug_bb (middle_bb); |
75a70cf9 | 1079 | gsi_move_before (&gsi_from, &gsi); |
194899bf | 1080 | } |
1081 | ||
1082 | /* Emit the statement to compute min/max. */ | |
1083 | result = duplicate_ssa_name (PHI_RESULT (phi), NULL); | |
75a70cf9 | 1084 | new_stmt = gimple_build_assign_with_ops (minmax, result, arg0, arg1); |
1085 | gsi = gsi_last_bb (cond_bb); | |
1086 | gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); | |
194899bf | 1087 | |
a4844041 | 1088 | replace_phi_edge_with_variable (cond_bb, e1, phi, result); |
194899bf | 1089 | return true; |
1090 | } | |
1091 | ||
70512b93 | 1092 | /* The function absolute_replacement does the main work of doing the absolute |
1093 | replacement. Return true if the replacement is done. Otherwise return | |
1094 | false. | |
1095 | bb is the basic block where the replacement is going to be done on. arg0 | |
f7f07c95 | 1096 | is argument 0 from the phi. Likewise for arg1. */ |
33784d89 | 1097 | |
70512b93 | 1098 | static bool |
33784d89 | 1099 | abs_replacement (basic_block cond_bb, basic_block middle_bb, |
a4844041 | 1100 | edge e0 ATTRIBUTE_UNUSED, edge e1, |
75a70cf9 | 1101 | gimple phi, tree arg0, tree arg1) |
70512b93 | 1102 | { |
1103 | tree result; | |
75a70cf9 | 1104 | gimple new_stmt, cond; |
1105 | gimple_stmt_iterator gsi; | |
70512b93 | 1106 | edge true_edge, false_edge; |
75a70cf9 | 1107 | gimple assign; |
70512b93 | 1108 | edge e; |
194899bf | 1109 | tree rhs, lhs; |
70512b93 | 1110 | bool negate; |
1111 | enum tree_code cond_code; | |
1112 | ||
1113 | /* If the type says honor signed zeros we cannot do this | |
dac49aa5 | 1114 | optimization. */ |
70512b93 | 1115 | if (HONOR_SIGNED_ZEROS (TYPE_MODE (TREE_TYPE (arg1)))) |
1116 | return false; | |
1117 | ||
70512b93 | 1118 | /* OTHER_BLOCK must have only one executable statement which must have the |
1119 | form arg0 = -arg1 or arg1 = -arg0. */ | |
70512b93 | 1120 | |
194899bf | 1121 | assign = last_and_only_stmt (middle_bb); |
70512b93 | 1122 | /* If we did not find the proper negation assignment, then we can not |
1123 | optimize. */ | |
1124 | if (assign == NULL) | |
1125 | return false; | |
48e1416a | 1126 | |
194899bf | 1127 | /* If we got here, then we have found the only executable statement |
1128 | in OTHER_BLOCK. If it is anything other than arg = -arg1 or | |
1129 | arg1 = -arg0, then we can not optimize. */ | |
75a70cf9 | 1130 | if (gimple_code (assign) != GIMPLE_ASSIGN) |
194899bf | 1131 | return false; |
1132 | ||
75a70cf9 | 1133 | lhs = gimple_assign_lhs (assign); |
194899bf | 1134 | |
75a70cf9 | 1135 | if (gimple_assign_rhs_code (assign) != NEGATE_EXPR) |
194899bf | 1136 | return false; |
1137 | ||
75a70cf9 | 1138 | rhs = gimple_assign_rhs1 (assign); |
48e1416a | 1139 | |
194899bf | 1140 | /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */ |
1141 | if (!(lhs == arg0 && rhs == arg1) | |
1142 | && !(lhs == arg1 && rhs == arg0)) | |
1143 | return false; | |
70512b93 | 1144 | |
75a70cf9 | 1145 | cond = last_stmt (cond_bb); |
70512b93 | 1146 | result = PHI_RESULT (phi); |
1147 | ||
1148 | /* Only relationals comparing arg[01] against zero are interesting. */ | |
75a70cf9 | 1149 | cond_code = gimple_cond_code (cond); |
70512b93 | 1150 | if (cond_code != GT_EXPR && cond_code != GE_EXPR |
1151 | && cond_code != LT_EXPR && cond_code != LE_EXPR) | |
1152 | return false; | |
1153 | ||
dac49aa5 | 1154 | /* Make sure the conditional is arg[01] OP y. */ |
75a70cf9 | 1155 | if (gimple_cond_lhs (cond) != rhs) |
70512b93 | 1156 | return false; |
1157 | ||
75a70cf9 | 1158 | if (FLOAT_TYPE_P (TREE_TYPE (gimple_cond_rhs (cond))) |
1159 | ? real_zerop (gimple_cond_rhs (cond)) | |
1160 | : integer_zerop (gimple_cond_rhs (cond))) | |
70512b93 | 1161 | ; |
1162 | else | |
1163 | return false; | |
1164 | ||
1165 | /* We need to know which is the true edge and which is the false | |
1166 | edge so that we know if have abs or negative abs. */ | |
33784d89 | 1167 | extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); |
70512b93 | 1168 | |
1169 | /* For GT_EXPR/GE_EXPR, if the true edge goes to OTHER_BLOCK, then we | |
1170 | will need to negate the result. Similarly for LT_EXPR/LE_EXPR if | |
1171 | the false edge goes to OTHER_BLOCK. */ | |
1172 | if (cond_code == GT_EXPR || cond_code == GE_EXPR) | |
1173 | e = true_edge; | |
1174 | else | |
1175 | e = false_edge; | |
20e5647c | 1176 | |
33784d89 | 1177 | if (e->dest == middle_bb) |
70512b93 | 1178 | negate = true; |
1179 | else | |
1180 | negate = false; | |
20e5647c | 1181 | |
33784d89 | 1182 | result = duplicate_ssa_name (result, NULL); |
20e5647c | 1183 | |
70512b93 | 1184 | if (negate) |
03d37e4e | 1185 | lhs = make_ssa_name (TREE_TYPE (result), NULL); |
70512b93 | 1186 | else |
1187 | lhs = result; | |
1188 | ||
dac49aa5 | 1189 | /* Build the modify expression with abs expression. */ |
75a70cf9 | 1190 | new_stmt = gimple_build_assign_with_ops (ABS_EXPR, lhs, rhs, NULL); |
70512b93 | 1191 | |
75a70cf9 | 1192 | gsi = gsi_last_bb (cond_bb); |
1193 | gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); | |
70512b93 | 1194 | |
1195 | if (negate) | |
1196 | { | |
75a70cf9 | 1197 | /* Get the right GSI. We want to insert after the recently |
70512b93 | 1198 | added ABS_EXPR statement (which we know is the first statement |
1199 | in the block. */ | |
75a70cf9 | 1200 | new_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, result, lhs, NULL); |
70512b93 | 1201 | |
75a70cf9 | 1202 | gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); |
70512b93 | 1203 | } |
20e5647c | 1204 | |
a4844041 | 1205 | replace_phi_edge_with_variable (cond_bb, e1, phi, result); |
70512b93 | 1206 | |
1207 | /* Note that we optimized this PHI. */ | |
1208 | return true; | |
1209 | } | |
1210 | ||
e6d0e152 | 1211 | /* Auxiliary functions to determine the set of memory accesses which |
1212 | can't trap because they are preceded by accesses to the same memory | |
182cf5a9 | 1213 | portion. We do that for MEM_REFs, so we only need to track |
e6d0e152 | 1214 | the SSA_NAME of the pointer indirectly referenced. The algorithm |
1215 | simply is a walk over all instructions in dominator order. When | |
182cf5a9 | 1216 | we see an MEM_REF we determine if we've already seen a same |
e6d0e152 | 1217 | ref anywhere up to the root of the dominator tree. If we do the |
af4f74fa | 1218 | current access can't trap. If we don't see any dominating access |
e6d0e152 | 1219 | the current access might trap, but might also make later accesses |
af4f74fa | 1220 | non-trapping, so we remember it. We need to be careful with loads |
1221 | or stores, for instance a load might not trap, while a store would, | |
1222 | so if we see a dominating read access this doesn't mean that a later | |
1223 | write access would not trap. Hence we also need to differentiate the | |
1224 | type of access(es) seen. | |
1225 | ||
1226 | ??? We currently are very conservative and assume that a load might | |
1227 | trap even if a store doesn't (write-only memory). This probably is | |
1228 | overly conservative. */ | |
e6d0e152 | 1229 | |
182cf5a9 | 1230 | /* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF |
e6d0e152 | 1231 | through it was seen, which would constitute a no-trap region for |
1232 | same accesses. */ | |
1233 | struct name_to_bb | |
1234 | { | |
963aee26 | 1235 | unsigned int ssa_name_ver; |
42540642 | 1236 | unsigned int phase; |
963aee26 | 1237 | bool store; |
1238 | HOST_WIDE_INT offset, size; | |
e6d0e152 | 1239 | basic_block bb; |
1240 | }; | |
1241 | ||
1242 | /* The hash table for remembering what we've seen. */ | |
1243 | static htab_t seen_ssa_names; | |
1244 | ||
42540642 | 1245 | /* Used for quick clearing of the hash-table when we see calls. |
1246 | Hash entries with phase < nt_call_phase are invalid. */ | |
1247 | static unsigned int nt_call_phase; | |
1248 | ||
182cf5a9 | 1249 | /* The set of MEM_REFs which can't trap. */ |
e6d0e152 | 1250 | static struct pointer_set_t *nontrap_set; |
1251 | ||
963aee26 | 1252 | /* The hash function. */ |
e6d0e152 | 1253 | static hashval_t |
1254 | name_to_bb_hash (const void *p) | |
1255 | { | |
963aee26 | 1256 | const struct name_to_bb *n = (const struct name_to_bb *) p; |
1257 | return n->ssa_name_ver ^ (((hashval_t) n->store) << 31) | |
1258 | ^ (n->offset << 6) ^ (n->size << 3); | |
e6d0e152 | 1259 | } |
1260 | ||
963aee26 | 1261 | /* The equality function of *P1 and *P2. */ |
e6d0e152 | 1262 | static int |
1263 | name_to_bb_eq (const void *p1, const void *p2) | |
1264 | { | |
af4f74fa | 1265 | const struct name_to_bb *n1 = (const struct name_to_bb *)p1; |
1266 | const struct name_to_bb *n2 = (const struct name_to_bb *)p2; | |
e6d0e152 | 1267 | |
963aee26 | 1268 | return n1->ssa_name_ver == n2->ssa_name_ver |
1269 | && n1->store == n2->store | |
1270 | && n1->offset == n2->offset | |
1271 | && n1->size == n2->size; | |
e6d0e152 | 1272 | } |
1273 | ||
f0b5f617 | 1274 | /* We see the expression EXP in basic block BB. If it's an interesting |
182cf5a9 | 1275 | expression (an MEM_REF through an SSA_NAME) possibly insert the |
af4f74fa | 1276 | expression into the set NONTRAP or the hash table of seen expressions. |
1277 | STORE is true if this expression is on the LHS, otherwise it's on | |
1278 | the RHS. */ | |
e6d0e152 | 1279 | static void |
af4f74fa | 1280 | add_or_mark_expr (basic_block bb, tree exp, |
1281 | struct pointer_set_t *nontrap, bool store) | |
e6d0e152 | 1282 | { |
963aee26 | 1283 | HOST_WIDE_INT size; |
1284 | ||
182cf5a9 | 1285 | if (TREE_CODE (exp) == MEM_REF |
963aee26 | 1286 | && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME |
1287 | && host_integerp (TREE_OPERAND (exp, 1), 0) | |
1288 | && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0) | |
e6d0e152 | 1289 | { |
1290 | tree name = TREE_OPERAND (exp, 0); | |
1291 | struct name_to_bb map; | |
1292 | void **slot; | |
af4f74fa | 1293 | struct name_to_bb *n2bb; |
e6d0e152 | 1294 | basic_block found_bb = 0; |
1295 | ||
182cf5a9 | 1296 | /* Try to find the last seen MEM_REF through the same |
e6d0e152 | 1297 | SSA_NAME, which can trap. */ |
963aee26 | 1298 | map.ssa_name_ver = SSA_NAME_VERSION (name); |
42540642 | 1299 | map.phase = 0; |
e6d0e152 | 1300 | map.bb = 0; |
af4f74fa | 1301 | map.store = store; |
963aee26 | 1302 | map.offset = tree_low_cst (TREE_OPERAND (exp, 1), 0); |
1303 | map.size = size; | |
1304 | ||
e6d0e152 | 1305 | slot = htab_find_slot (seen_ssa_names, &map, INSERT); |
af4f74fa | 1306 | n2bb = (struct name_to_bb *) *slot; |
42540642 | 1307 | if (n2bb && n2bb->phase >= nt_call_phase) |
af4f74fa | 1308 | found_bb = n2bb->bb; |
e6d0e152 | 1309 | |
182cf5a9 | 1310 | /* If we've found a trapping MEM_REF, _and_ it dominates EXP |
e6d0e152 | 1311 | (it's in a basic block on the path from us to the dominator root) |
1312 | then we can't trap. */ | |
42540642 | 1313 | if (found_bb && (((size_t)found_bb->aux) & 1) == 1) |
e6d0e152 | 1314 | { |
1315 | pointer_set_insert (nontrap, exp); | |
1316 | } | |
1317 | else | |
1318 | { | |
1319 | /* EXP might trap, so insert it into the hash table. */ | |
af4f74fa | 1320 | if (n2bb) |
e6d0e152 | 1321 | { |
42540642 | 1322 | n2bb->phase = nt_call_phase; |
af4f74fa | 1323 | n2bb->bb = bb; |
e6d0e152 | 1324 | } |
1325 | else | |
1326 | { | |
af4f74fa | 1327 | n2bb = XNEW (struct name_to_bb); |
963aee26 | 1328 | n2bb->ssa_name_ver = SSA_NAME_VERSION (name); |
42540642 | 1329 | n2bb->phase = nt_call_phase; |
af4f74fa | 1330 | n2bb->bb = bb; |
1331 | n2bb->store = store; | |
963aee26 | 1332 | n2bb->offset = map.offset; |
1333 | n2bb->size = size; | |
af4f74fa | 1334 | *slot = n2bb; |
e6d0e152 | 1335 | } |
1336 | } | |
1337 | } | |
1338 | } | |
1339 | ||
42540642 | 1340 | /* Return true when CALL is a call stmt that definitely doesn't |
1341 | free any memory or makes it unavailable otherwise. */ | |
e8d4d8a9 | 1342 | bool |
42540642 | 1343 | nonfreeing_call_p (gimple call) |
1344 | { | |
1345 | if (gimple_call_builtin_p (call, BUILT_IN_NORMAL) | |
1346 | && gimple_call_flags (call) & ECF_LEAF) | |
1347 | switch (DECL_FUNCTION_CODE (gimple_call_fndecl (call))) | |
1348 | { | |
1349 | /* Just in case these become ECF_LEAF in the future. */ | |
1350 | case BUILT_IN_FREE: | |
1351 | case BUILT_IN_TM_FREE: | |
1352 | case BUILT_IN_REALLOC: | |
1353 | case BUILT_IN_STACK_RESTORE: | |
1354 | return false; | |
1355 | default: | |
1356 | return true; | |
1357 | } | |
1358 | ||
1359 | return false; | |
1360 | } | |
1361 | ||
e6d0e152 | 1362 | /* Called by walk_dominator_tree, when entering the block BB. */ |
1363 | static void | |
1364 | nt_init_block (struct dom_walk_data *data ATTRIBUTE_UNUSED, basic_block bb) | |
1365 | { | |
42540642 | 1366 | edge e; |
1367 | edge_iterator ei; | |
75a70cf9 | 1368 | gimple_stmt_iterator gsi; |
42540642 | 1369 | |
1370 | /* If we haven't seen all our predecessors, clear the hash-table. */ | |
1371 | FOR_EACH_EDGE (e, ei, bb->preds) | |
1372 | if ((((size_t)e->src->aux) & 2) == 0) | |
1373 | { | |
1374 | nt_call_phase++; | |
1375 | break; | |
1376 | } | |
1377 | ||
1378 | /* Mark this BB as being on the path to dominator root and as visited. */ | |
1379 | bb->aux = (void*)(1 | 2); | |
e6d0e152 | 1380 | |
1381 | /* And walk the statements in order. */ | |
75a70cf9 | 1382 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
e6d0e152 | 1383 | { |
75a70cf9 | 1384 | gimple stmt = gsi_stmt (gsi); |
e6d0e152 | 1385 | |
42540642 | 1386 | if (is_gimple_call (stmt) && !nonfreeing_call_p (stmt)) |
1387 | nt_call_phase++; | |
1388 | else if (gimple_assign_single_p (stmt) && !gimple_has_volatile_ops (stmt)) | |
e6d0e152 | 1389 | { |
75a70cf9 | 1390 | add_or_mark_expr (bb, gimple_assign_lhs (stmt), nontrap_set, true); |
1391 | add_or_mark_expr (bb, gimple_assign_rhs1 (stmt), nontrap_set, false); | |
e6d0e152 | 1392 | } |
1393 | } | |
1394 | } | |
1395 | ||
1396 | /* Called by walk_dominator_tree, when basic block BB is exited. */ | |
1397 | static void | |
1398 | nt_fini_block (struct dom_walk_data *data ATTRIBUTE_UNUSED, basic_block bb) | |
1399 | { | |
1400 | /* This BB isn't on the path to dominator root anymore. */ | |
42540642 | 1401 | bb->aux = (void*)2; |
e6d0e152 | 1402 | } |
1403 | ||
1404 | /* This is the entry point of gathering non trapping memory accesses. | |
1405 | It will do a dominator walk over the whole function, and it will | |
1406 | make use of the bb->aux pointers. It returns a set of trees | |
182cf5a9 | 1407 | (the MEM_REFs itself) which can't trap. */ |
e6d0e152 | 1408 | static struct pointer_set_t * |
1409 | get_non_trapping (void) | |
1410 | { | |
1411 | struct pointer_set_t *nontrap; | |
1412 | struct dom_walk_data walk_data; | |
1413 | ||
42540642 | 1414 | nt_call_phase = 0; |
e6d0e152 | 1415 | nontrap = pointer_set_create (); |
1416 | seen_ssa_names = htab_create (128, name_to_bb_hash, name_to_bb_eq, | |
1417 | free); | |
1418 | /* We're going to do a dominator walk, so ensure that we have | |
1419 | dominance information. */ | |
1420 | calculate_dominance_info (CDI_DOMINATORS); | |
1421 | ||
1422 | /* Setup callbacks for the generic dominator tree walker. */ | |
1423 | nontrap_set = nontrap; | |
e6d0e152 | 1424 | walk_data.dom_direction = CDI_DOMINATORS; |
1425 | walk_data.initialize_block_local_data = NULL; | |
6bf320fb | 1426 | walk_data.before_dom_children = nt_init_block; |
1427 | walk_data.after_dom_children = nt_fini_block; | |
e6d0e152 | 1428 | walk_data.global_data = NULL; |
1429 | walk_data.block_local_data_size = 0; | |
e6d0e152 | 1430 | |
1431 | init_walk_dominator_tree (&walk_data); | |
1432 | walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR); | |
1433 | fini_walk_dominator_tree (&walk_data); | |
1434 | htab_delete (seen_ssa_names); | |
1435 | ||
42540642 | 1436 | clear_aux_for_blocks (); |
e6d0e152 | 1437 | return nontrap; |
1438 | } | |
1439 | ||
1440 | /* Do the main work of conditional store replacement. We already know | |
1441 | that the recognized pattern looks like so: | |
1442 | ||
1443 | split: | |
1444 | if (cond) goto MIDDLE_BB; else goto JOIN_BB (edge E1) | |
1445 | MIDDLE_BB: | |
1446 | something | |
1447 | fallthrough (edge E0) | |
1448 | JOIN_BB: | |
1449 | some more | |
1450 | ||
1451 | We check that MIDDLE_BB contains only one store, that that store | |
1452 | doesn't trap (not via NOTRAP, but via checking if an access to the same | |
1453 | memory location dominates us) and that the store has a "simple" RHS. */ | |
1454 | ||
1455 | static bool | |
1456 | cond_store_replacement (basic_block middle_bb, basic_block join_bb, | |
1457 | edge e0, edge e1, struct pointer_set_t *nontrap) | |
1458 | { | |
75a70cf9 | 1459 | gimple assign = last_and_only_stmt (middle_bb); |
03d37e4e | 1460 | tree lhs, rhs, name, name2; |
75a70cf9 | 1461 | gimple newphi, new_stmt; |
1462 | gimple_stmt_iterator gsi; | |
efbcb6de | 1463 | source_location locus; |
e6d0e152 | 1464 | |
1465 | /* Check if middle_bb contains of only one store. */ | |
1466 | if (!assign | |
6cc085b6 | 1467 | || !gimple_assign_single_p (assign) |
1468 | || gimple_has_volatile_ops (assign)) | |
e6d0e152 | 1469 | return false; |
1470 | ||
efbcb6de | 1471 | locus = gimple_location (assign); |
75a70cf9 | 1472 | lhs = gimple_assign_lhs (assign); |
1473 | rhs = gimple_assign_rhs1 (assign); | |
182cf5a9 | 1474 | if (TREE_CODE (lhs) != MEM_REF |
91cf53d5 | 1475 | || TREE_CODE (TREE_OPERAND (lhs, 0)) != SSA_NAME |
3211fa0a | 1476 | || !is_gimple_reg_type (TREE_TYPE (lhs))) |
e6d0e152 | 1477 | return false; |
91cf53d5 | 1478 | |
e6d0e152 | 1479 | /* Prove that we can move the store down. We could also check |
1480 | TREE_THIS_NOTRAP here, but in that case we also could move stores, | |
1481 | whose value is not available readily, which we want to avoid. */ | |
1482 | if (!pointer_set_contains (nontrap, lhs)) | |
1483 | return false; | |
1484 | ||
1485 | /* Now we've checked the constraints, so do the transformation: | |
1486 | 1) Remove the single store. */ | |
75a70cf9 | 1487 | gsi = gsi_for_stmt (assign); |
3211fa0a | 1488 | unlink_stmt_vdef (assign); |
75a70cf9 | 1489 | gsi_remove (&gsi, true); |
91cf53d5 | 1490 | release_defs (assign); |
e6d0e152 | 1491 | |
03d37e4e | 1492 | /* 2) Insert a load from the memory of the store to the temporary |
e6d0e152 | 1493 | on the edge which did not contain the store. */ |
1494 | lhs = unshare_expr (lhs); | |
03d37e4e | 1495 | name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore"); |
1496 | new_stmt = gimple_build_assign (name, lhs); | |
efbcb6de | 1497 | gimple_set_location (new_stmt, locus); |
75a70cf9 | 1498 | gsi_insert_on_edge (e1, new_stmt); |
e6d0e152 | 1499 | |
03d37e4e | 1500 | /* 3) Create a PHI node at the join block, with one argument |
e6d0e152 | 1501 | holding the old RHS, and the other holding the temporary |
1502 | where we stored the old memory contents. */ | |
03d37e4e | 1503 | name2 = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore"); |
1504 | newphi = create_phi_node (name2, join_bb); | |
60d535d2 | 1505 | add_phi_arg (newphi, rhs, e0, locus); |
1506 | add_phi_arg (newphi, name, e1, locus); | |
e6d0e152 | 1507 | |
1508 | lhs = unshare_expr (lhs); | |
75a70cf9 | 1509 | new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi)); |
e6d0e152 | 1510 | |
03d37e4e | 1511 | /* 4) Insert that PHI node. */ |
75a70cf9 | 1512 | gsi = gsi_after_labels (join_bb); |
1513 | if (gsi_end_p (gsi)) | |
e6d0e152 | 1514 | { |
75a70cf9 | 1515 | gsi = gsi_last_bb (join_bb); |
1516 | gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); | |
e6d0e152 | 1517 | } |
1518 | else | |
75a70cf9 | 1519 | gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); |
e6d0e152 | 1520 | |
1521 | return true; | |
1522 | } | |
4ee9c684 | 1523 | |
ec611e12 | 1524 | /* Do the main work of conditional store replacement. */ |
91cf53d5 | 1525 | |
1526 | static bool | |
ec611e12 | 1527 | cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb, |
1528 | basic_block join_bb, gimple then_assign, | |
1529 | gimple else_assign) | |
91cf53d5 | 1530 | { |
03d37e4e | 1531 | tree lhs_base, lhs, then_rhs, else_rhs, name; |
91cf53d5 | 1532 | source_location then_locus, else_locus; |
1533 | gimple_stmt_iterator gsi; | |
1534 | gimple newphi, new_stmt; | |
1535 | ||
91cf53d5 | 1536 | if (then_assign == NULL |
1537 | || !gimple_assign_single_p (then_assign) | |
3c25489e | 1538 | || gimple_clobber_p (then_assign) |
6cc085b6 | 1539 | || gimple_has_volatile_ops (then_assign) |
91cf53d5 | 1540 | || else_assign == NULL |
3c25489e | 1541 | || !gimple_assign_single_p (else_assign) |
6cc085b6 | 1542 | || gimple_clobber_p (else_assign) |
1543 | || gimple_has_volatile_ops (else_assign)) | |
91cf53d5 | 1544 | return false; |
1545 | ||
1546 | lhs = gimple_assign_lhs (then_assign); | |
1547 | if (!is_gimple_reg_type (TREE_TYPE (lhs)) | |
1548 | || !operand_equal_p (lhs, gimple_assign_lhs (else_assign), 0)) | |
1549 | return false; | |
1550 | ||
1551 | lhs_base = get_base_address (lhs); | |
1552 | if (lhs_base == NULL_TREE | |
1553 | || (!DECL_P (lhs_base) && TREE_CODE (lhs_base) != MEM_REF)) | |
1554 | return false; | |
1555 | ||
1556 | then_rhs = gimple_assign_rhs1 (then_assign); | |
1557 | else_rhs = gimple_assign_rhs1 (else_assign); | |
1558 | then_locus = gimple_location (then_assign); | |
1559 | else_locus = gimple_location (else_assign); | |
1560 | ||
1561 | /* Now we've checked the constraints, so do the transformation: | |
1562 | 1) Remove the stores. */ | |
1563 | gsi = gsi_for_stmt (then_assign); | |
1564 | unlink_stmt_vdef (then_assign); | |
1565 | gsi_remove (&gsi, true); | |
1566 | release_defs (then_assign); | |
1567 | ||
1568 | gsi = gsi_for_stmt (else_assign); | |
1569 | unlink_stmt_vdef (else_assign); | |
1570 | gsi_remove (&gsi, true); | |
1571 | release_defs (else_assign); | |
1572 | ||
03d37e4e | 1573 | /* 2) Create a PHI node at the join block, with one argument |
91cf53d5 | 1574 | holding the old RHS, and the other holding the temporary |
1575 | where we stored the old memory contents. */ | |
03d37e4e | 1576 | name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore"); |
1577 | newphi = create_phi_node (name, join_bb); | |
60d535d2 | 1578 | add_phi_arg (newphi, then_rhs, EDGE_SUCC (then_bb, 0), then_locus); |
1579 | add_phi_arg (newphi, else_rhs, EDGE_SUCC (else_bb, 0), else_locus); | |
91cf53d5 | 1580 | |
1581 | new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi)); | |
1582 | ||
03d37e4e | 1583 | /* 3) Insert that PHI node. */ |
91cf53d5 | 1584 | gsi = gsi_after_labels (join_bb); |
1585 | if (gsi_end_p (gsi)) | |
1586 | { | |
1587 | gsi = gsi_last_bb (join_bb); | |
1588 | gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); | |
1589 | } | |
1590 | else | |
1591 | gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); | |
1592 | ||
1593 | return true; | |
1594 | } | |
1595 | ||
ec611e12 | 1596 | /* Conditional store replacement. We already know |
1597 | that the recognized pattern looks like so: | |
1598 | ||
1599 | split: | |
1600 | if (cond) goto THEN_BB; else goto ELSE_BB (edge E1) | |
1601 | THEN_BB: | |
1602 | ... | |
1603 | X = Y; | |
1604 | ... | |
1605 | goto JOIN_BB; | |
1606 | ELSE_BB: | |
1607 | ... | |
1608 | X = Z; | |
1609 | ... | |
1610 | fallthrough (edge E0) | |
1611 | JOIN_BB: | |
1612 | some more | |
1613 | ||
1614 | We check that it is safe to sink the store to JOIN_BB by verifying that | |
1615 | there are no read-after-write or write-after-write dependencies in | |
1616 | THEN_BB and ELSE_BB. */ | |
1617 | ||
1618 | static bool | |
1619 | cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb, | |
1620 | basic_block join_bb) | |
1621 | { | |
1622 | gimple then_assign = last_and_only_stmt (then_bb); | |
1623 | gimple else_assign = last_and_only_stmt (else_bb); | |
f1f41a6c | 1624 | vec<data_reference_p> then_datarefs, else_datarefs; |
1625 | vec<ddr_p> then_ddrs, else_ddrs; | |
ec611e12 | 1626 | gimple then_store, else_store; |
1627 | bool found, ok = false, res; | |
1628 | struct data_dependence_relation *ddr; | |
1629 | data_reference_p then_dr, else_dr; | |
1630 | int i, j; | |
1631 | tree then_lhs, else_lhs; | |
f1f41a6c | 1632 | vec<gimple> then_stores, else_stores; |
ec611e12 | 1633 | basic_block blocks[3]; |
1634 | ||
1635 | if (MAX_STORES_TO_SINK == 0) | |
1636 | return false; | |
1637 | ||
1638 | /* Handle the case with single statement in THEN_BB and ELSE_BB. */ | |
1639 | if (then_assign && else_assign) | |
1640 | return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb, | |
1641 | then_assign, else_assign); | |
1642 | ||
1643 | /* Find data references. */ | |
f1f41a6c | 1644 | then_datarefs.create (1); |
1645 | else_datarefs.create (1); | |
ec611e12 | 1646 | if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs) |
1647 | == chrec_dont_know) | |
f1f41a6c | 1648 | || !then_datarefs.length () |
ec611e12 | 1649 | || (find_data_references_in_bb (NULL, else_bb, &else_datarefs) |
1650 | == chrec_dont_know) | |
f1f41a6c | 1651 | || !else_datarefs.length ()) |
ec611e12 | 1652 | { |
1653 | free_data_refs (then_datarefs); | |
1654 | free_data_refs (else_datarefs); | |
1655 | return false; | |
1656 | } | |
1657 | ||
1658 | /* Find pairs of stores with equal LHS. */ | |
f1f41a6c | 1659 | then_stores.create (1); |
1660 | else_stores.create (1); | |
1661 | FOR_EACH_VEC_ELT (then_datarefs, i, then_dr) | |
ec611e12 | 1662 | { |
1663 | if (DR_IS_READ (then_dr)) | |
1664 | continue; | |
1665 | ||
1666 | then_store = DR_STMT (then_dr); | |
728dcc71 | 1667 | then_lhs = gimple_get_lhs (then_store); |
ec611e12 | 1668 | found = false; |
1669 | ||
f1f41a6c | 1670 | FOR_EACH_VEC_ELT (else_datarefs, j, else_dr) |
ec611e12 | 1671 | { |
1672 | if (DR_IS_READ (else_dr)) | |
1673 | continue; | |
1674 | ||
1675 | else_store = DR_STMT (else_dr); | |
728dcc71 | 1676 | else_lhs = gimple_get_lhs (else_store); |
ec611e12 | 1677 | |
1678 | if (operand_equal_p (then_lhs, else_lhs, 0)) | |
1679 | { | |
1680 | found = true; | |
1681 | break; | |
1682 | } | |
1683 | } | |
1684 | ||
1685 | if (!found) | |
1686 | continue; | |
1687 | ||
f1f41a6c | 1688 | then_stores.safe_push (then_store); |
1689 | else_stores.safe_push (else_store); | |
ec611e12 | 1690 | } |
1691 | ||
1692 | /* No pairs of stores found. */ | |
f1f41a6c | 1693 | if (!then_stores.length () |
1694 | || then_stores.length () > (unsigned) MAX_STORES_TO_SINK) | |
ec611e12 | 1695 | { |
1696 | free_data_refs (then_datarefs); | |
1697 | free_data_refs (else_datarefs); | |
f1f41a6c | 1698 | then_stores.release (); |
1699 | else_stores.release (); | |
ec611e12 | 1700 | return false; |
1701 | } | |
1702 | ||
1703 | /* Compute and check data dependencies in both basic blocks. */ | |
f1f41a6c | 1704 | then_ddrs.create (1); |
1705 | else_ddrs.create (1); | |
1706 | if (!compute_all_dependences (then_datarefs, &then_ddrs, | |
1e094109 | 1707 | vNULL, false) |
f1f41a6c | 1708 | || !compute_all_dependences (else_datarefs, &else_ddrs, |
1e094109 | 1709 | vNULL, false)) |
8b3fb720 | 1710 | { |
1711 | free_dependence_relations (then_ddrs); | |
1712 | free_dependence_relations (else_ddrs); | |
1713 | free_data_refs (then_datarefs); | |
1714 | free_data_refs (else_datarefs); | |
f1f41a6c | 1715 | then_stores.release (); |
1716 | else_stores.release (); | |
8b3fb720 | 1717 | return false; |
1718 | } | |
ec611e12 | 1719 | blocks[0] = then_bb; |
1720 | blocks[1] = else_bb; | |
1721 | blocks[2] = join_bb; | |
1722 | renumber_gimple_stmt_uids_in_blocks (blocks, 3); | |
1723 | ||
1724 | /* Check that there are no read-after-write or write-after-write dependencies | |
1725 | in THEN_BB. */ | |
f1f41a6c | 1726 | FOR_EACH_VEC_ELT (then_ddrs, i, ddr) |
ec611e12 | 1727 | { |
1728 | struct data_reference *dra = DDR_A (ddr); | |
1729 | struct data_reference *drb = DDR_B (ddr); | |
1730 | ||
1731 | if (DDR_ARE_DEPENDENT (ddr) != chrec_known | |
1732 | && ((DR_IS_READ (dra) && DR_IS_WRITE (drb) | |
1733 | && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb))) | |
1734 | || (DR_IS_READ (drb) && DR_IS_WRITE (dra) | |
1735 | && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra))) | |
1736 | || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)))) | |
1737 | { | |
1738 | free_dependence_relations (then_ddrs); | |
1739 | free_dependence_relations (else_ddrs); | |
2473bfb7 | 1740 | free_data_refs (then_datarefs); |
1741 | free_data_refs (else_datarefs); | |
f1f41a6c | 1742 | then_stores.release (); |
1743 | else_stores.release (); | |
ec611e12 | 1744 | return false; |
1745 | } | |
1746 | } | |
1747 | ||
1748 | /* Check that there are no read-after-write or write-after-write dependencies | |
1749 | in ELSE_BB. */ | |
f1f41a6c | 1750 | FOR_EACH_VEC_ELT (else_ddrs, i, ddr) |
ec611e12 | 1751 | { |
1752 | struct data_reference *dra = DDR_A (ddr); | |
1753 | struct data_reference *drb = DDR_B (ddr); | |
1754 | ||
1755 | if (DDR_ARE_DEPENDENT (ddr) != chrec_known | |
1756 | && ((DR_IS_READ (dra) && DR_IS_WRITE (drb) | |
1757 | && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb))) | |
1758 | || (DR_IS_READ (drb) && DR_IS_WRITE (dra) | |
1759 | && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra))) | |
1760 | || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)))) | |
1761 | { | |
1762 | free_dependence_relations (then_ddrs); | |
1763 | free_dependence_relations (else_ddrs); | |
2473bfb7 | 1764 | free_data_refs (then_datarefs); |
1765 | free_data_refs (else_datarefs); | |
f1f41a6c | 1766 | then_stores.release (); |
1767 | else_stores.release (); | |
ec611e12 | 1768 | return false; |
1769 | } | |
1770 | } | |
1771 | ||
1772 | /* Sink stores with same LHS. */ | |
f1f41a6c | 1773 | FOR_EACH_VEC_ELT (then_stores, i, then_store) |
ec611e12 | 1774 | { |
f1f41a6c | 1775 | else_store = else_stores[i]; |
ec611e12 | 1776 | res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb, |
1777 | then_store, else_store); | |
1778 | ok = ok || res; | |
1779 | } | |
1780 | ||
1781 | free_dependence_relations (then_ddrs); | |
1782 | free_dependence_relations (else_ddrs); | |
2473bfb7 | 1783 | free_data_refs (then_datarefs); |
1784 | free_data_refs (else_datarefs); | |
f1f41a6c | 1785 | then_stores.release (); |
1786 | else_stores.release (); | |
ec611e12 | 1787 | |
1788 | return ok; | |
1789 | } | |
1790 | ||
239e9670 | 1791 | /* Return TRUE if STMT has a VUSE whose corresponding VDEF is in BB. */ |
1792 | ||
1793 | static bool | |
1794 | local_mem_dependence (gimple stmt, basic_block bb) | |
1795 | { | |
1796 | tree vuse = gimple_vuse (stmt); | |
1797 | gimple def; | |
1798 | ||
1799 | if (!vuse) | |
1800 | return false; | |
1801 | ||
1802 | def = SSA_NAME_DEF_STMT (vuse); | |
1803 | return (def && gimple_bb (def) == bb); | |
1804 | } | |
1805 | ||
1806 | /* Given a "diamond" control-flow pattern where BB0 tests a condition, | |
1807 | BB1 and BB2 are "then" and "else" blocks dependent on this test, | |
1808 | and BB3 rejoins control flow following BB1 and BB2, look for | |
1809 | opportunities to hoist loads as follows. If BB3 contains a PHI of | |
1810 | two loads, one each occurring in BB1 and BB2, and the loads are | |
1811 | provably of adjacent fields in the same structure, then move both | |
1812 | loads into BB0. Of course this can only be done if there are no | |
1813 | dependencies preventing such motion. | |
1814 | ||
1815 | One of the hoisted loads will always be speculative, so the | |
1816 | transformation is currently conservative: | |
1817 | ||
1818 | - The fields must be strictly adjacent. | |
1819 | - The two fields must occupy a single memory block that is | |
1820 | guaranteed to not cross a page boundary. | |
1821 | ||
1822 | The last is difficult to prove, as such memory blocks should be | |
1823 | aligned on the minimum of the stack alignment boundary and the | |
1824 | alignment guaranteed by heap allocation interfaces. Thus we rely | |
1825 | on a parameter for the alignment value. | |
1826 | ||
1827 | Provided a good value is used for the last case, the first | |
1828 | restriction could possibly be relaxed. */ | |
1829 | ||
1830 | static void | |
1831 | hoist_adjacent_loads (basic_block bb0, basic_block bb1, | |
1832 | basic_block bb2, basic_block bb3) | |
1833 | { | |
1834 | int param_align = PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE); | |
1835 | unsigned param_align_bits = (unsigned) (param_align * BITS_PER_UNIT); | |
1836 | gimple_stmt_iterator gsi; | |
1837 | ||
1838 | /* Walk the phis in bb3 looking for an opportunity. We are looking | |
1839 | for phis of two SSA names, one each of which is defined in bb1 and | |
1840 | bb2. */ | |
1841 | for (gsi = gsi_start_phis (bb3); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1842 | { | |
1843 | gimple phi_stmt = gsi_stmt (gsi); | |
1844 | gimple def1, def2, defswap; | |
1845 | tree arg1, arg2, ref1, ref2, field1, field2, fieldswap; | |
1846 | tree tree_offset1, tree_offset2, tree_size2, next; | |
1847 | int offset1, offset2, size2; | |
1848 | unsigned align1; | |
1849 | gimple_stmt_iterator gsi2; | |
1850 | basic_block bb_for_def1, bb_for_def2; | |
1851 | ||
7c782c9b | 1852 | if (gimple_phi_num_args (phi_stmt) != 2 |
1853 | || virtual_operand_p (gimple_phi_result (phi_stmt))) | |
239e9670 | 1854 | continue; |
1855 | ||
1856 | arg1 = gimple_phi_arg_def (phi_stmt, 0); | |
1857 | arg2 = gimple_phi_arg_def (phi_stmt, 1); | |
1858 | ||
1859 | if (TREE_CODE (arg1) != SSA_NAME | |
1860 | || TREE_CODE (arg2) != SSA_NAME | |
1861 | || SSA_NAME_IS_DEFAULT_DEF (arg1) | |
7c782c9b | 1862 | || SSA_NAME_IS_DEFAULT_DEF (arg2)) |
239e9670 | 1863 | continue; |
1864 | ||
1865 | def1 = SSA_NAME_DEF_STMT (arg1); | |
1866 | def2 = SSA_NAME_DEF_STMT (arg2); | |
1867 | ||
1868 | if ((gimple_bb (def1) != bb1 || gimple_bb (def2) != bb2) | |
1869 | && (gimple_bb (def2) != bb1 || gimple_bb (def1) != bb2)) | |
1870 | continue; | |
1871 | ||
1872 | /* Check the mode of the arguments to be sure a conditional move | |
1873 | can be generated for it. */ | |
935611bc | 1874 | if (optab_handler (movcc_optab, TYPE_MODE (TREE_TYPE (arg1))) |
1875 | == CODE_FOR_nothing) | |
239e9670 | 1876 | continue; |
1877 | ||
1878 | /* Both statements must be assignments whose RHS is a COMPONENT_REF. */ | |
1879 | if (!gimple_assign_single_p (def1) | |
6cc085b6 | 1880 | || !gimple_assign_single_p (def2) |
1881 | || gimple_has_volatile_ops (def1) | |
1882 | || gimple_has_volatile_ops (def2)) | |
239e9670 | 1883 | continue; |
1884 | ||
1885 | ref1 = gimple_assign_rhs1 (def1); | |
1886 | ref2 = gimple_assign_rhs1 (def2); | |
1887 | ||
1888 | if (TREE_CODE (ref1) != COMPONENT_REF | |
1889 | || TREE_CODE (ref2) != COMPONENT_REF) | |
1890 | continue; | |
1891 | ||
1892 | /* The zeroth operand of the two component references must be | |
1893 | identical. It is not sufficient to compare get_base_address of | |
1894 | the two references, because this could allow for different | |
1895 | elements of the same array in the two trees. It is not safe to | |
1896 | assume that the existence of one array element implies the | |
1897 | existence of a different one. */ | |
1898 | if (!operand_equal_p (TREE_OPERAND (ref1, 0), TREE_OPERAND (ref2, 0), 0)) | |
1899 | continue; | |
1900 | ||
1901 | field1 = TREE_OPERAND (ref1, 1); | |
1902 | field2 = TREE_OPERAND (ref2, 1); | |
1903 | ||
1904 | /* Check for field adjacency, and ensure field1 comes first. */ | |
1905 | for (next = DECL_CHAIN (field1); | |
1906 | next && TREE_CODE (next) != FIELD_DECL; | |
1907 | next = DECL_CHAIN (next)) | |
1908 | ; | |
1909 | ||
1910 | if (next != field2) | |
1911 | { | |
1912 | for (next = DECL_CHAIN (field2); | |
1913 | next && TREE_CODE (next) != FIELD_DECL; | |
1914 | next = DECL_CHAIN (next)) | |
1915 | ; | |
1916 | ||
1917 | if (next != field1) | |
1918 | continue; | |
1919 | ||
1920 | fieldswap = field1; | |
1921 | field1 = field2; | |
1922 | field2 = fieldswap; | |
1923 | defswap = def1; | |
1924 | def1 = def2; | |
1925 | def2 = defswap; | |
239e9670 | 1926 | } |
1927 | ||
7c74ee50 | 1928 | bb_for_def1 = gimple_bb (def1); |
1929 | bb_for_def2 = gimple_bb (def2); | |
1930 | ||
239e9670 | 1931 | /* Check for proper alignment of the first field. */ |
1932 | tree_offset1 = bit_position (field1); | |
1933 | tree_offset2 = bit_position (field2); | |
1934 | tree_size2 = DECL_SIZE (field2); | |
1935 | ||
1936 | if (!host_integerp (tree_offset1, 1) | |
1937 | || !host_integerp (tree_offset2, 1) | |
1938 | || !host_integerp (tree_size2, 1)) | |
1939 | continue; | |
1940 | ||
1941 | offset1 = TREE_INT_CST_LOW (tree_offset1); | |
1942 | offset2 = TREE_INT_CST_LOW (tree_offset2); | |
1943 | size2 = TREE_INT_CST_LOW (tree_size2); | |
1944 | align1 = DECL_ALIGN (field1) % param_align_bits; | |
1945 | ||
1946 | if (offset1 % BITS_PER_UNIT != 0) | |
1947 | continue; | |
1948 | ||
1949 | /* For profitability, the two field references should fit within | |
1950 | a single cache line. */ | |
1951 | if (align1 + offset2 - offset1 + size2 > param_align_bits) | |
1952 | continue; | |
1953 | ||
1954 | /* The two expressions cannot be dependent upon vdefs defined | |
1955 | in bb1/bb2. */ | |
1956 | if (local_mem_dependence (def1, bb_for_def1) | |
1957 | || local_mem_dependence (def2, bb_for_def2)) | |
1958 | continue; | |
1959 | ||
1960 | /* The conditions are satisfied; hoist the loads from bb1 and bb2 into | |
1961 | bb0. We hoist the first one first so that a cache miss is handled | |
1962 | efficiently regardless of hardware cache-fill policy. */ | |
1963 | gsi2 = gsi_for_stmt (def1); | |
1964 | gsi_move_to_bb_end (&gsi2, bb0); | |
1965 | gsi2 = gsi_for_stmt (def2); | |
1966 | gsi_move_to_bb_end (&gsi2, bb0); | |
1967 | ||
1968 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1969 | { | |
1970 | fprintf (dump_file, | |
1971 | "\nHoisting adjacent loads from %d and %d into %d: \n", | |
1972 | bb_for_def1->index, bb_for_def2->index, bb0->index); | |
1973 | print_gimple_stmt (dump_file, def1, 0, TDF_VOPS|TDF_MEMSYMS); | |
1974 | print_gimple_stmt (dump_file, def2, 0, TDF_VOPS|TDF_MEMSYMS); | |
1975 | } | |
1976 | } | |
1977 | } | |
1978 | ||
1979 | /* Determine whether we should attempt to hoist adjacent loads out of | |
1980 | diamond patterns in pass_phiopt. Always hoist loads if | |
1981 | -fhoist-adjacent-loads is specified and the target machine has | |
6f0ddab1 | 1982 | both a conditional move instruction and a defined cache line size. */ |
239e9670 | 1983 | |
1984 | static bool | |
1985 | gate_hoist_loads (void) | |
1986 | { | |
6f0ddab1 | 1987 | return (flag_hoist_adjacent_loads == 1 |
1988 | && PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE) | |
1989 | && HAVE_conditional_move); | |
239e9670 | 1990 | } |
1991 | ||
4ee9c684 | 1992 | /* Always do these optimizations if we have SSA |
20e5647c | 1993 | trees to work on. */ |
4ee9c684 | 1994 | static bool |
1995 | gate_phiopt (void) | |
1996 | { | |
1997 | return 1; | |
1998 | } | |
20e5647c | 1999 | |
20099e35 | 2000 | struct gimple_opt_pass pass_phiopt = |
4ee9c684 | 2001 | { |
20099e35 | 2002 | { |
2003 | GIMPLE_PASS, | |
4ee9c684 | 2004 | "phiopt", /* name */ |
c7875731 | 2005 | OPTGROUP_NONE, /* optinfo_flags */ |
4ee9c684 | 2006 | gate_phiopt, /* gate */ |
2007 | tree_ssa_phiopt, /* execute */ | |
2008 | NULL, /* sub */ | |
2009 | NULL, /* next */ | |
2010 | 0, /* static_pass_number */ | |
2011 | TV_TREE_PHIOPT, /* tv_id */ | |
2f8eb909 | 2012 | PROP_cfg | PROP_ssa, /* properties_required */ |
4ee9c684 | 2013 | 0, /* properties_provided */ |
2014 | 0, /* properties_destroyed */ | |
2015 | 0, /* todo_flags_start */ | |
771e2890 | 2016 | TODO_ggc_collect |
88dbf20f | 2017 | | TODO_verify_ssa |
88dbf20f | 2018 | | TODO_verify_flow |
20099e35 | 2019 | | TODO_verify_stmts /* todo_flags_finish */ |
2020 | } | |
4ee9c684 | 2021 | }; |
e6d0e152 | 2022 | |
2023 | static bool | |
2024 | gate_cselim (void) | |
2025 | { | |
2026 | return flag_tree_cselim; | |
2027 | } | |
2028 | ||
20099e35 | 2029 | struct gimple_opt_pass pass_cselim = |
e6d0e152 | 2030 | { |
20099e35 | 2031 | { |
2032 | GIMPLE_PASS, | |
e6d0e152 | 2033 | "cselim", /* name */ |
c7875731 | 2034 | OPTGROUP_NONE, /* optinfo_flags */ |
e6d0e152 | 2035 | gate_cselim, /* gate */ |
2036 | tree_ssa_cs_elim, /* execute */ | |
2037 | NULL, /* sub */ | |
2038 | NULL, /* next */ | |
2039 | 0, /* static_pass_number */ | |
2040 | TV_TREE_PHIOPT, /* tv_id */ | |
2f8eb909 | 2041 | PROP_cfg | PROP_ssa, /* properties_required */ |
e6d0e152 | 2042 | 0, /* properties_provided */ |
2043 | 0, /* properties_destroyed */ | |
2044 | 0, /* todo_flags_start */ | |
771e2890 | 2045 | TODO_ggc_collect |
e6d0e152 | 2046 | | TODO_verify_ssa |
2047 | | TODO_verify_flow | |
20099e35 | 2048 | | TODO_verify_stmts /* todo_flags_finish */ |
2049 | } | |
e6d0e152 | 2050 | }; |