]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-phiopt.c
2013-04-10 Richard Biener <rguenther@suse.de>
[thirdparty/gcc.git] / gcc / tree-ssa-phiopt.c
CommitLineData
4ee9c684 1/* Optimization of PHI nodes by converting them into straightline code.
711789cc 2 Copyright (C) 2004-2013 Free Software Foundation, Inc.
4ee9c684 3
4This file is part of GCC.
20e5647c 5
4ee9c684 6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8c4c00c1 8Free Software Foundation; either version 3, or (at your option) any
4ee9c684 9later version.
20e5647c 10
4ee9c684 11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
20e5647c 15
4ee9c684 16You should have received a copy of the GNU General Public License
8c4c00c1 17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
4ee9c684 19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "tm.h"
4ee9c684 24#include "ggc.h"
25#include "tree.h"
0beac6fc 26#include "flags.h"
4ee9c684 27#include "tm_p.h"
28#include "basic-block.h"
4ee9c684 29#include "tree-flow.h"
30#include "tree-pass.h"
4ee9c684 31#include "langhooks.h"
e6d0e152 32#include "pointer-set.h"
33#include "domwalk.h"
ec611e12 34#include "cfgloop.h"
35#include "tree-data-ref.h"
239e9670 36#include "gimple-pretty-print.h"
37#include "insn-config.h"
38#include "expr.h"
39#include "optabs.h"
40
41#ifndef HAVE_conditional_move
42#define HAVE_conditional_move (0)
43#endif
4ee9c684 44
75a70cf9 45static unsigned int tree_ssa_phiopt (void);
239e9670 46static unsigned int tree_ssa_phiopt_worker (bool, bool);
a4844041 47static bool conditional_replacement (basic_block, basic_block,
75a70cf9 48 edge, edge, gimple, tree, tree);
fb9912ea 49static int value_replacement (basic_block, basic_block,
50 edge, edge, gimple, tree, tree);
a4844041 51static bool minmax_replacement (basic_block, basic_block,
75a70cf9 52 edge, edge, gimple, tree, tree);
a4844041 53static bool abs_replacement (basic_block, basic_block,
75a70cf9 54 edge, edge, gimple, tree, tree);
e6d0e152 55static bool cond_store_replacement (basic_block, basic_block, edge, edge,
56 struct pointer_set_t *);
91cf53d5 57static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
e6d0e152 58static struct pointer_set_t * get_non_trapping (void);
75a70cf9 59static void replace_phi_edge_with_variable (basic_block, edge, gimple, tree);
239e9670 60static void hoist_adjacent_loads (basic_block, basic_block,
61 basic_block, basic_block);
62static bool gate_hoist_loads (void);
902929aa 63
caac37c2 64/* This pass tries to replaces an if-then-else block with an
65 assignment. We have four kinds of transformations. Some of these
66 transformations are also performed by the ifcvt RTL optimizer.
67
68 Conditional Replacement
69 -----------------------
70
30c5ffd2 71 This transformation, implemented in conditional_replacement,
caac37c2 72 replaces
4ee9c684 73
74 bb0:
75 if (cond) goto bb2; else goto bb1;
76 bb1:
77 bb2:
caac37c2 78 x = PHI <0 (bb1), 1 (bb0), ...>;
4ee9c684 79
caac37c2 80 with
20e5647c 81
2ab0a163 82 bb0:
caac37c2 83 x' = cond;
84 goto bb2;
2ab0a163 85 bb2:
caac37c2 86 x = PHI <x' (bb0), ...>;
4ee9c684 87
caac37c2 88 We remove bb1 as it becomes unreachable. This occurs often due to
89 gimplification of conditionals.
20e5647c 90
caac37c2 91 Value Replacement
92 -----------------
93
94 This transformation, implemented in value_replacement, replaces
0beac6fc 95
96 bb0:
caac37c2 97 if (a != b) goto bb2; else goto bb1;
0beac6fc 98 bb1:
99 bb2:
caac37c2 100 x = PHI <a (bb1), b (bb0), ...>;
0beac6fc 101
caac37c2 102 with
0beac6fc 103
104 bb0:
0beac6fc 105 bb2:
caac37c2 106 x = PHI <b (bb0), ...>;
107
108 This opportunity can sometimes occur as a result of other
109 optimizations.
0beac6fc 110
caac37c2 111 ABS Replacement
112 ---------------
70512b93 113
caac37c2 114 This transformation, implemented in abs_replacement, replaces
70512b93 115
116 bb0:
caac37c2 117 if (a >= 0) goto bb2; else goto bb1;
70512b93 118 bb1:
caac37c2 119 x = -a;
70512b93 120 bb2:
caac37c2 121 x = PHI <x (bb1), a (bb0), ...>;
70512b93 122
caac37c2 123 with
70512b93 124
125 bb0:
caac37c2 126 x' = ABS_EXPR< a >;
70512b93 127 bb2:
caac37c2 128 x = PHI <x' (bb0), ...>;
129
130 MIN/MAX Replacement
131 -------------------
70512b93 132
caac37c2 133 This transformation, minmax_replacement replaces
194899bf 134
135 bb0:
caac37c2 136 if (a <= b) goto bb2; else goto bb1;
194899bf 137 bb1:
194899bf 138 bb2:
caac37c2 139 x = PHI <b (bb1), a (bb0), ...>;
194899bf 140
caac37c2 141 with
194899bf 142
caac37c2 143 bb0:
144 x' = MIN_EXPR (a, b)
145 bb2:
146 x = PHI <x' (bb0), ...>;
194899bf 147
239e9670 148 A similar transformation is done for MAX_EXPR.
149
150
151 This pass also performs a fifth transformation of a slightly different
152 flavor.
153
154 Adjacent Load Hoisting
155 ----------------------
156
157 This transformation replaces
158
159 bb0:
160 if (...) goto bb2; else goto bb1;
161 bb1:
162 x1 = (<expr>).field1;
163 goto bb3;
164 bb2:
165 x2 = (<expr>).field2;
166 bb3:
167 # x = PHI <x1, x2>;
168
169 with
170
171 bb0:
172 x1 = (<expr>).field1;
173 x2 = (<expr>).field2;
174 if (...) goto bb2; else goto bb1;
175 bb1:
176 goto bb3;
177 bb2:
178 bb3:
179 # x = PHI <x1, x2>;
180
181 The purpose of this transformation is to enable generation of conditional
182 move instructions such as Intel CMOVE or PowerPC ISEL. Because one of
183 the loads is speculative, the transformation is restricted to very
184 specific cases to avoid introducing a page fault. We are looking for
185 the common idiom:
186
187 if (...)
188 x = y->left;
189 else
190 x = y->right;
191
192 where left and right are typically adjacent pointers in a tree structure. */
70512b93 193
2a1990e9 194static unsigned int
4ee9c684 195tree_ssa_phiopt (void)
e6d0e152 196{
239e9670 197 return tree_ssa_phiopt_worker (false, gate_hoist_loads ());
e6d0e152 198}
199
200/* This pass tries to transform conditional stores into unconditional
201 ones, enabling further simplifications with the simpler then and else
202 blocks. In particular it replaces this:
203
204 bb0:
205 if (cond) goto bb2; else goto bb1;
206 bb1:
91cf53d5 207 *p = RHS;
e6d0e152 208 bb2:
209
210 with
211
212 bb0:
213 if (cond) goto bb1; else goto bb2;
214 bb1:
215 condtmp' = *p;
216 bb2:
217 condtmp = PHI <RHS, condtmp'>
91cf53d5 218 *p = condtmp;
e6d0e152 219
220 This transformation can only be done under several constraints,
91cf53d5 221 documented below. It also replaces:
222
223 bb0:
224 if (cond) goto bb2; else goto bb1;
225 bb1:
226 *p = RHS1;
227 goto bb3;
228 bb2:
229 *p = RHS2;
230 bb3:
231
232 with
233
234 bb0:
235 if (cond) goto bb3; else goto bb1;
236 bb1:
237 bb3:
238 condtmp = PHI <RHS1, RHS2>
239 *p = condtmp; */
e6d0e152 240
241static unsigned int
242tree_ssa_cs_elim (void)
243{
239e9670 244 return tree_ssa_phiopt_worker (true, false);
e6d0e152 245}
246
c3597b05 247/* Return the singleton PHI in the SEQ of PHIs for edges E0 and E1. */
248
249static gimple
250single_non_singleton_phi_for_edges (gimple_seq seq, edge e0, edge e1)
251{
252 gimple_stmt_iterator i;
253 gimple phi = NULL;
254 if (gimple_seq_singleton_p (seq))
255 return gsi_stmt (gsi_start (seq));
256 for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (&i))
257 {
258 gimple p = gsi_stmt (i);
259 /* If the PHI arguments are equal then we can skip this PHI. */
260 if (operand_equal_for_phi_arg_p (gimple_phi_arg_def (p, e0->dest_idx),
261 gimple_phi_arg_def (p, e1->dest_idx)))
262 continue;
263
264 /* If we already have a PHI that has the two edge arguments are
265 different, then return it is not a singleton for these PHIs. */
266 if (phi)
267 return NULL;
268
269 phi = p;
270 }
271 return phi;
272}
273
e6d0e152 274/* The core routine of conditional store replacement and normal
275 phi optimizations. Both share much of the infrastructure in how
276 to match applicable basic block patterns. DO_STORE_ELIM is true
239e9670 277 when we want to do conditional store replacement, false otherwise.
278 DO_HOIST_LOADS is true when we want to hoist adjacent loads out
279 of diamond control flow patterns, false otherwise. */
e6d0e152 280static unsigned int
239e9670 281tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads)
4ee9c684 282{
283 basic_block bb;
194899bf 284 basic_block *bb_order;
285 unsigned n, i;
1e4b21e3 286 bool cfgchanged = false;
e6d0e152 287 struct pointer_set_t *nontrap = 0;
288
289 if (do_store_elim)
03d37e4e 290 /* Calculate the set of non-trapping memory accesses. */
291 nontrap = get_non_trapping ();
194899bf 292
293 /* Search every basic block for COND_EXPR we may be able to optimize.
294
295 We walk the blocks in order that guarantees that a block with
296 a single predecessor is processed before the predecessor.
297 This ensures that we collapse inner ifs before visiting the
298 outer ones, and also that we do not try to visit a removed
299 block. */
300 bb_order = blocks_in_phiopt_order ();
4d2e5d52 301 n = n_basic_blocks - NUM_FIXED_BLOCKS;
4ee9c684 302
48e1416a 303 for (i = 0; i < n; i++)
4ee9c684 304 {
75a70cf9 305 gimple cond_stmt, phi;
33784d89 306 basic_block bb1, bb2;
307 edge e1, e2;
194899bf 308 tree arg0, arg1;
309
310 bb = bb_order[i];
20e5647c 311
75a70cf9 312 cond_stmt = last_stmt (bb);
313 /* Check to see if the last statement is a GIMPLE_COND. */
314 if (!cond_stmt
315 || gimple_code (cond_stmt) != GIMPLE_COND)
33784d89 316 continue;
20e5647c 317
33784d89 318 e1 = EDGE_SUCC (bb, 0);
319 bb1 = e1->dest;
320 e2 = EDGE_SUCC (bb, 1);
321 bb2 = e2->dest;
20e5647c 322
33784d89 323 /* We cannot do the optimization on abnormal edges. */
324 if ((e1->flags & EDGE_ABNORMAL) != 0
325 || (e2->flags & EDGE_ABNORMAL) != 0)
326 continue;
20e5647c 327
33784d89 328 /* If either bb1's succ or bb2 or bb2's succ is non NULL. */
ea091dfd 329 if (EDGE_COUNT (bb1->succs) == 0
33784d89 330 || bb2 == NULL
ea091dfd 331 || EDGE_COUNT (bb2->succs) == 0)
33784d89 332 continue;
20e5647c 333
33784d89 334 /* Find the bb which is the fall through to the other. */
335 if (EDGE_SUCC (bb1, 0)->dest == bb2)
336 ;
337 else if (EDGE_SUCC (bb2, 0)->dest == bb1)
338 {
339 basic_block bb_tmp = bb1;
340 edge e_tmp = e1;
341 bb1 = bb2;
342 bb2 = bb_tmp;
343 e1 = e2;
344 e2 = e_tmp;
345 }
91cf53d5 346 else if (do_store_elim
347 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
348 {
349 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest;
350
351 if (!single_succ_p (bb1)
352 || (EDGE_SUCC (bb1, 0)->flags & EDGE_FALLTHRU) == 0
353 || !single_succ_p (bb2)
354 || (EDGE_SUCC (bb2, 0)->flags & EDGE_FALLTHRU) == 0
355 || EDGE_COUNT (bb3->preds) != 2)
356 continue;
357 if (cond_if_else_store_replacement (bb1, bb2, bb3))
358 cfgchanged = true;
359 continue;
360 }
239e9670 361 else if (do_hoist_loads
362 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
363 {
364 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest;
365
366 if (!FLOAT_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond_stmt)))
367 && single_succ_p (bb1)
368 && single_succ_p (bb2)
369 && single_pred_p (bb1)
370 && single_pred_p (bb2)
371 && EDGE_COUNT (bb->succs) == 2
372 && EDGE_COUNT (bb3->preds) == 2
373 /* If one edge or the other is dominant, a conditional move
374 is likely to perform worse than the well-predicted branch. */
375 && !predictable_edge_p (EDGE_SUCC (bb, 0))
376 && !predictable_edge_p (EDGE_SUCC (bb, 1)))
377 hoist_adjacent_loads (bb, bb1, bb2, bb3);
378 continue;
379 }
33784d89 380 else
91cf53d5 381 continue;
20e5647c 382
33784d89 383 e1 = EDGE_SUCC (bb1, 0);
20e5647c 384
33784d89 385 /* Make sure that bb1 is just a fall through. */
db5ba14c 386 if (!single_succ_p (bb1)
33784d89 387 || (e1->flags & EDGE_FALLTHRU) == 0)
388 continue;
20e5647c 389
3472707f 390 /* Also make sure that bb1 only have one predecessor and that it
391 is bb. */
ea091dfd 392 if (!single_pred_p (bb1)
393 || single_pred (bb1) != bb)
33784d89 394 continue;
20e5647c 395
e6d0e152 396 if (do_store_elim)
397 {
398 /* bb1 is the middle block, bb2 the join block, bb the split block,
399 e1 the fallthrough edge from bb1 to bb2. We can't do the
400 optimization if the join block has more than two predecessors. */
401 if (EDGE_COUNT (bb2->preds) > 2)
402 continue;
403 if (cond_store_replacement (bb1, bb2, e1, e2, nontrap))
404 cfgchanged = true;
405 }
406 else
407 {
75a70cf9 408 gimple_seq phis = phi_nodes (bb2);
2109076a 409 gimple_stmt_iterator gsi;
fb9912ea 410 bool candorest = true;
c3597b05 411
fb9912ea 412 /* Value replacement can work with more than one PHI
413 so try that first. */
414 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
415 {
416 phi = gsi_stmt (gsi);
417 arg0 = gimple_phi_arg_def (phi, e1->dest_idx);
418 arg1 = gimple_phi_arg_def (phi, e2->dest_idx);
419 if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1) == 2)
420 {
421 candorest = false;
422 cfgchanged = true;
423 break;
424 }
425 }
e6d0e152 426
fb9912ea 427 if (!candorest)
428 continue;
c3597b05 429
430 phi = single_non_singleton_phi_for_edges (phis, e1, e2);
2109076a 431 if (!phi)
e6d0e152 432 continue;
433
75a70cf9 434 arg0 = gimple_phi_arg_def (phi, e1->dest_idx);
435 arg1 = gimple_phi_arg_def (phi, e2->dest_idx);
e6d0e152 436
437 /* Something is wrong if we cannot find the arguments in the PHI
438 node. */
439 gcc_assert (arg0 != NULL && arg1 != NULL);
440
441 /* Do the replacement of conditional if it can be done. */
442 if (conditional_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
443 cfgchanged = true;
e6d0e152 444 else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
445 cfgchanged = true;
446 else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
447 cfgchanged = true;
448 }
194899bf 449 }
450
451 free (bb_order);
48e1416a 452
e6d0e152 453 if (do_store_elim)
454 pointer_set_destroy (nontrap);
455 /* If the CFG has changed, we should cleanup the CFG. */
456 if (cfgchanged && do_store_elim)
457 {
458 /* In cond-store replacement we have added some loads on edges
459 and new VOPS (as we moved the store, and created a load). */
75a70cf9 460 gsi_commit_edge_inserts ();
e6d0e152 461 return TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
462 }
463 else if (cfgchanged)
464 return TODO_cleanup_cfg;
465 return 0;
194899bf 466}
467
468/* Returns the list of basic blocks in the function in an order that guarantees
469 that if a block X has just a single predecessor Y, then Y is after X in the
470 ordering. */
471
8530c7be 472basic_block *
194899bf 473blocks_in_phiopt_order (void)
474{
475 basic_block x, y;
945865c5 476 basic_block *order = XNEWVEC (basic_block, n_basic_blocks);
48e1416a 477 unsigned n = n_basic_blocks - NUM_FIXED_BLOCKS;
4d2e5d52 478 unsigned np, i;
48e1416a 479 sbitmap visited = sbitmap_alloc (last_basic_block);
194899bf 480
08b7917c 481#define MARK_VISITED(BB) (bitmap_set_bit (visited, (BB)->index))
482#define VISITED_P(BB) (bitmap_bit_p (visited, (BB)->index))
194899bf 483
53c5d9d4 484 bitmap_clear (visited);
194899bf 485
486 MARK_VISITED (ENTRY_BLOCK_PTR);
487 FOR_EACH_BB (x)
488 {
489 if (VISITED_P (x))
490 continue;
491
492 /* Walk the predecessors of x as long as they have precisely one
493 predecessor and add them to the list, so that they get stored
494 after x. */
495 for (y = x, np = 1;
496 single_pred_p (y) && !VISITED_P (single_pred (y));
497 y = single_pred (y))
498 np++;
499 for (y = x, i = n - np;
500 single_pred_p (y) && !VISITED_P (single_pred (y));
501 y = single_pred (y), i++)
502 {
503 order[i] = y;
504 MARK_VISITED (y);
2ab0a163 505 }
194899bf 506 order[i] = y;
507 MARK_VISITED (y);
508
509 gcc_assert (i == n - 1);
510 n -= np;
4ee9c684 511 }
194899bf 512
513 sbitmap_free (visited);
514 gcc_assert (n == 0);
515 return order;
516
517#undef MARK_VISITED
518#undef VISITED_P
4ee9c684 519}
520
fccee353 521/* Replace PHI node element whose edge is E in block BB with variable NEW.
33784d89 522 Remove the edge from COND_BLOCK which does not lead to BB (COND_BLOCK
902929aa 523 is known to have two edges, one of which must reach BB). */
524
525static void
a4844041 526replace_phi_edge_with_variable (basic_block cond_block,
75a70cf9 527 edge e, gimple phi, tree new_tree)
902929aa 528{
75a70cf9 529 basic_block bb = gimple_bb (phi);
0e1a77e1 530 basic_block block_to_remove;
75a70cf9 531 gimple_stmt_iterator gsi;
33784d89 532
20e5647c 533 /* Change the PHI argument to new. */
f0d6e81c 534 SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree);
0e1a77e1 535
0e1a77e1 536 /* Remove the empty basic block. */
cd665a06 537 if (EDGE_SUCC (cond_block, 0)->dest == bb)
902929aa 538 {
cd665a06 539 EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU;
540 EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
81c5be57 541 EDGE_SUCC (cond_block, 0)->probability = REG_BR_PROB_BASE;
542 EDGE_SUCC (cond_block, 0)->count += EDGE_SUCC (cond_block, 1)->count;
0e1a77e1 543
cd665a06 544 block_to_remove = EDGE_SUCC (cond_block, 1)->dest;
902929aa 545 }
546 else
547 {
cd665a06 548 EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU;
549 EDGE_SUCC (cond_block, 1)->flags
902929aa 550 &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
81c5be57 551 EDGE_SUCC (cond_block, 1)->probability = REG_BR_PROB_BASE;
552 EDGE_SUCC (cond_block, 1)->count += EDGE_SUCC (cond_block, 0)->count;
0e1a77e1 553
cd665a06 554 block_to_remove = EDGE_SUCC (cond_block, 0)->dest;
902929aa 555 }
0e1a77e1 556 delete_basic_block (block_to_remove);
20e5647c 557
902929aa 558 /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
75a70cf9 559 gsi = gsi_last_bb (cond_block);
560 gsi_remove (&gsi, true);
20e5647c 561
902929aa 562 if (dump_file && (dump_flags & TDF_DETAILS))
563 fprintf (dump_file,
564 "COND_EXPR in block %d and PHI in block %d converted to straightline code.\n",
565 cond_block->index,
566 bb->index);
567}
568
569/* The function conditional_replacement does the main work of doing the
570 conditional replacement. Return true if the replacement is done.
571 Otherwise return false.
572 BB is the basic block where the replacement is going to be done on. ARG0
dac49aa5 573 is argument 0 from PHI. Likewise for ARG1. */
902929aa 574
575static bool
33784d89 576conditional_replacement (basic_block cond_bb, basic_block middle_bb,
75a70cf9 577 edge e0, edge e1, gimple phi,
33784d89 578 tree arg0, tree arg1)
902929aa 579{
580 tree result;
75a70cf9 581 gimple stmt, new_stmt;
582 tree cond;
583 gimple_stmt_iterator gsi;
902929aa 584 edge true_edge, false_edge;
75a70cf9 585 tree new_var, new_var2;
678919fd 586 bool neg;
902929aa 587
435e1a75 588 /* FIXME: Gimplification of complex type is too hard for now. */
47b88316 589 /* We aren't prepared to handle vectors either (and it is a question
590 if it would be worthwhile anyway). */
591 if (!(INTEGRAL_TYPE_P (TREE_TYPE (arg0))
592 || POINTER_TYPE_P (TREE_TYPE (arg0)))
593 || !(INTEGRAL_TYPE_P (TREE_TYPE (arg1))
594 || POINTER_TYPE_P (TREE_TYPE (arg1))))
435e1a75 595 return false;
596
678919fd 597 /* The PHI arguments have the constants 0 and 1, or 0 and -1, then
598 convert it to the conditional. */
902929aa 599 if ((integer_zerop (arg0) && integer_onep (arg1))
600 || (integer_zerop (arg1) && integer_onep (arg0)))
678919fd 601 neg = false;
602 else if ((integer_zerop (arg0) && integer_all_onesp (arg1))
603 || (integer_zerop (arg1) && integer_all_onesp (arg0)))
604 neg = true;
902929aa 605 else
606 return false;
20e5647c 607
33784d89 608 if (!empty_block_p (middle_bb))
902929aa 609 return false;
20e5647c 610
75a70cf9 611 /* At this point we know we have a GIMPLE_COND with two successors.
2ab0a163 612 One successor is BB, the other successor is an empty block which
613 falls through into BB.
20e5647c 614
2ab0a163 615 There is a single PHI node at the join point (BB) and its arguments
678919fd 616 are constants (0, 1) or (0, -1).
20e5647c 617
2ab0a163 618 So, given the condition COND, and the two PHI arguments, we can
20e5647c 619 rewrite this PHI into non-branching code:
620
2ab0a163 621 dest = (COND) or dest = COND'
20e5647c 622
2ab0a163 623 We use the condition as-is if the argument associated with the
624 true edge has the value one or the argument associated with the
625 false edge as the value zero. Note that those conditions are not
75a70cf9 626 the same since only one of the outgoing edges from the GIMPLE_COND
2ab0a163 627 will directly reach BB and thus be associated with an argument. */
ae5a4794 628
75a70cf9 629 stmt = last_stmt (cond_bb);
630 result = PHI_RESULT (phi);
b2a02a0e 631
75a70cf9 632 /* To handle special cases like floating point comparison, it is easier and
633 less error-prone to build a tree and gimplify it on the fly though it is
634 less efficient. */
6f9714b3 635 cond = fold_build2_loc (gimple_location (stmt),
636 gimple_cond_code (stmt), boolean_type_node,
637 gimple_cond_lhs (stmt), gimple_cond_rhs (stmt));
4ee9c684 638
75a70cf9 639 /* We need to know which is the true edge and which is the false
640 edge so that we know when to invert the condition below. */
641 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
642 if ((e0 == true_edge && integer_zerop (arg0))
678919fd 643 || (e0 == false_edge && !integer_zerop (arg0))
75a70cf9 644 || (e1 == true_edge && integer_zerop (arg1))
678919fd 645 || (e1 == false_edge && !integer_zerop (arg1)))
6f9714b3 646 cond = fold_build1_loc (gimple_location (stmt),
678919fd 647 TRUTH_NOT_EXPR, TREE_TYPE (cond), cond);
648
649 if (neg)
650 {
651 cond = fold_convert_loc (gimple_location (stmt),
652 TREE_TYPE (result), cond);
653 cond = fold_build1_loc (gimple_location (stmt),
654 NEGATE_EXPR, TREE_TYPE (cond), cond);
655 }
75a70cf9 656
657 /* Insert our new statements at the end of conditional block before the
658 COND_STMT. */
659 gsi = gsi_for_stmt (stmt);
660 new_var = force_gimple_operand_gsi (&gsi, cond, true, NULL, true,
661 GSI_SAME_STMT);
662
663 if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (new_var)))
664 {
efbcb6de 665 source_location locus_0, locus_1;
666
03d37e4e 667 new_var2 = make_ssa_name (TREE_TYPE (result), NULL);
75a70cf9 668 new_stmt = gimple_build_assign_with_ops (CONVERT_EXPR, new_var2,
669 new_var, NULL);
75a70cf9 670 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
671 new_var = new_var2;
efbcb6de 672
673 /* Set the locus to the first argument, unless is doesn't have one. */
674 locus_0 = gimple_phi_arg_location (phi, 0);
675 locus_1 = gimple_phi_arg_location (phi, 1);
676 if (locus_0 == UNKNOWN_LOCATION)
677 locus_0 = locus_1;
678 gimple_set_location (new_stmt, locus_0);
4ee9c684 679 }
20e5647c 680
75a70cf9 681 replace_phi_edge_with_variable (cond_bb, e1, phi, new_var);
902929aa 682
4ee9c684 683 /* Note that we optimized this PHI. */
684 return true;
685}
686
17b9476e 687/* Update *ARG which is defined in STMT so that it contains the
688 computed value if that seems profitable. Return true if the
689 statement is made dead by that rewriting. */
690
691static bool
692jump_function_from_stmt (tree *arg, gimple stmt)
693{
694 enum tree_code code = gimple_assign_rhs_code (stmt);
695 if (code == ADDR_EXPR)
696 {
697 /* For arg = &p->i transform it to p, if possible. */
698 tree rhs1 = gimple_assign_rhs1 (stmt);
699 HOST_WIDE_INT offset;
700 tree tem = get_addr_base_and_unit_offset (TREE_OPERAND (rhs1, 0),
701 &offset);
702 if (tem
703 && TREE_CODE (tem) == MEM_REF
cf8f0e63 704 && (mem_ref_offset (tem) + double_int::from_shwi (offset)).is_zero ())
17b9476e 705 {
706 *arg = TREE_OPERAND (tem, 0);
707 return true;
708 }
709 }
710 /* TODO: Much like IPA-CP jump-functions we want to handle constant
711 additions symbolically here, and we'd need to update the comparison
712 code that compares the arg + cst tuples in our caller. For now the
713 code above exactly handles the VEC_BASE pattern from vec.h. */
714 return false;
715}
716
0beac6fc 717/* The function value_replacement does the main work of doing the value
fb9912ea 718 replacement. Return non-zero if the replacement is done. Otherwise return
719 0. If we remove the middle basic block, return 2.
0beac6fc 720 BB is the basic block where the replacement is going to be done on. ARG0
dac49aa5 721 is argument 0 from the PHI. Likewise for ARG1. */
0beac6fc 722
fb9912ea 723static int
33784d89 724value_replacement (basic_block cond_bb, basic_block middle_bb,
75a70cf9 725 edge e0, edge e1, gimple phi,
33784d89 726 tree arg0, tree arg1)
0beac6fc 727{
17b9476e 728 gimple_stmt_iterator gsi;
75a70cf9 729 gimple cond;
0beac6fc 730 edge true_edge, false_edge;
75a70cf9 731 enum tree_code code;
fb9912ea 732 bool emtpy_or_with_defined_p = true;
0beac6fc 733
734 /* If the type says honor signed zeros we cannot do this
dac49aa5 735 optimization. */
0beac6fc 736 if (HONOR_SIGNED_ZEROS (TYPE_MODE (TREE_TYPE (arg1))))
fb9912ea 737 return 0;
0beac6fc 738
fb9912ea 739 /* If there is a statement in MIDDLE_BB that defines one of the PHI
740 arguments, then adjust arg0 or arg1. */
17b9476e 741 gsi = gsi_after_labels (middle_bb);
fb9912ea 742 if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi)))
743 gsi_next_nondebug (&gsi);
744 while (!gsi_end_p (gsi))
17b9476e 745 {
fb9912ea 746 gimple stmt = gsi_stmt (gsi);
747 tree lhs;
748 gsi_next_nondebug (&gsi);
749 if (!is_gimple_assign (stmt))
17b9476e 750 {
fb9912ea 751 emtpy_or_with_defined_p = false;
752 continue;
17b9476e 753 }
fb9912ea 754 /* Now try to adjust arg0 or arg1 according to the computation
755 in the statement. */
756 lhs = gimple_assign_lhs (stmt);
757 if (!(lhs == arg0
758 && jump_function_from_stmt (&arg0, stmt))
759 || (lhs == arg1
760 && jump_function_from_stmt (&arg1, stmt)))
761 emtpy_or_with_defined_p = false;
17b9476e 762 }
0beac6fc 763
75a70cf9 764 cond = last_stmt (cond_bb);
765 code = gimple_cond_code (cond);
0beac6fc 766
767 /* This transformation is only valid for equality comparisons. */
75a70cf9 768 if (code != NE_EXPR && code != EQ_EXPR)
fb9912ea 769 return 0;
0beac6fc 770
771 /* We need to know which is the true edge and which is the false
772 edge so that we know if have abs or negative abs. */
33784d89 773 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
0beac6fc 774
775 /* At this point we know we have a COND_EXPR with two successors.
776 One successor is BB, the other successor is an empty block which
777 falls through into BB.
778
779 The condition for the COND_EXPR is known to be NE_EXPR or EQ_EXPR.
780
781 There is a single PHI node at the join point (BB) with two arguments.
782
783 We now need to verify that the two arguments in the PHI node match
784 the two arguments to the equality comparison. */
20e5647c 785
75a70cf9 786 if ((operand_equal_for_phi_arg_p (arg0, gimple_cond_lhs (cond))
787 && operand_equal_for_phi_arg_p (arg1, gimple_cond_rhs (cond)))
788 || (operand_equal_for_phi_arg_p (arg1, gimple_cond_lhs (cond))
789 && operand_equal_for_phi_arg_p (arg0, gimple_cond_rhs (cond))))
0beac6fc 790 {
791 edge e;
792 tree arg;
793
50737d20 794 /* For NE_EXPR, we want to build an assignment result = arg where
795 arg is the PHI argument associated with the true edge. For
796 EQ_EXPR we want the PHI argument associated with the false edge. */
75a70cf9 797 e = (code == NE_EXPR ? true_edge : false_edge);
50737d20 798
799 /* Unfortunately, E may not reach BB (it may instead have gone to
800 OTHER_BLOCK). If that is the case, then we want the single outgoing
801 edge from OTHER_BLOCK which reaches BB and represents the desired
802 path from COND_BLOCK. */
33784d89 803 if (e->dest == middle_bb)
ea091dfd 804 e = single_succ_edge (e->dest);
50737d20 805
806 /* Now we know the incoming edge to BB that has the argument for the
807 RHS of our new assignment statement. */
33784d89 808 if (e0 == e)
0beac6fc 809 arg = arg0;
810 else
811 arg = arg1;
812
fb9912ea 813 /* If the middle basic block was empty or is defining the
c3597b05 814 PHI arguments and this is a single phi where the args are different
815 for the edges e0 and e1 then we can remove the middle basic block. */
fb9912ea 816 if (emtpy_or_with_defined_p
c3597b05 817 && single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)),
818 e0, e1))
fb9912ea 819 {
820 replace_phi_edge_with_variable (cond_bb, e1, phi, arg);
821 /* Note that we optimized this PHI. */
822 return 2;
823 }
824 else
825 {
826 /* Replace the PHI arguments with arg. */
827 SET_PHI_ARG_DEF (phi, e0->dest_idx, arg);
828 SET_PHI_ARG_DEF (phi, e1->dest_idx, arg);
829 if (dump_file && (dump_flags & TDF_DETAILS))
830 {
831 fprintf (dump_file, "PHI ");
832 print_generic_expr (dump_file, gimple_phi_result (phi), 0);
c3597b05 833 fprintf (dump_file, " reduced for COND_EXPR in block %d to ",
834 cond_bb->index);
fb9912ea 835 print_generic_expr (dump_file, arg, 0);
836 fprintf (dump_file, ".\n");
837 }
838 return 1;
839 }
0beac6fc 840
0beac6fc 841 }
fb9912ea 842 return 0;
0beac6fc 843}
844
194899bf 845/* The function minmax_replacement does the main work of doing the minmax
846 replacement. Return true if the replacement is done. Otherwise return
847 false.
848 BB is the basic block where the replacement is going to be done on. ARG0
849 is argument 0 from the PHI. Likewise for ARG1. */
850
851static bool
852minmax_replacement (basic_block cond_bb, basic_block middle_bb,
75a70cf9 853 edge e0, edge e1, gimple phi,
194899bf 854 tree arg0, tree arg1)
855{
856 tree result, type;
75a70cf9 857 gimple cond, new_stmt;
194899bf 858 edge true_edge, false_edge;
859 enum tree_code cmp, minmax, ass_code;
860 tree smaller, larger, arg_true, arg_false;
75a70cf9 861 gimple_stmt_iterator gsi, gsi_from;
194899bf 862
863 type = TREE_TYPE (PHI_RESULT (phi));
864
865 /* The optimization may be unsafe due to NaNs. */
866 if (HONOR_NANS (TYPE_MODE (type)))
867 return false;
868
75a70cf9 869 cond = last_stmt (cond_bb);
870 cmp = gimple_cond_code (cond);
194899bf 871
872 /* This transformation is only valid for order comparisons. Record which
873 operand is smaller/larger if the result of the comparison is true. */
874 if (cmp == LT_EXPR || cmp == LE_EXPR)
875 {
75a70cf9 876 smaller = gimple_cond_lhs (cond);
877 larger = gimple_cond_rhs (cond);
194899bf 878 }
879 else if (cmp == GT_EXPR || cmp == GE_EXPR)
880 {
75a70cf9 881 smaller = gimple_cond_rhs (cond);
882 larger = gimple_cond_lhs (cond);
194899bf 883 }
884 else
885 return false;
886
887 /* We need to know which is the true edge and which is the false
888 edge so that we know if have abs or negative abs. */
889 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
890
891 /* Forward the edges over the middle basic block. */
892 if (true_edge->dest == middle_bb)
893 true_edge = EDGE_SUCC (true_edge->dest, 0);
894 if (false_edge->dest == middle_bb)
895 false_edge = EDGE_SUCC (false_edge->dest, 0);
896
897 if (true_edge == e0)
898 {
899 gcc_assert (false_edge == e1);
900 arg_true = arg0;
901 arg_false = arg1;
902 }
903 else
904 {
905 gcc_assert (false_edge == e0);
906 gcc_assert (true_edge == e1);
907 arg_true = arg1;
908 arg_false = arg0;
909 }
910
911 if (empty_block_p (middle_bb))
912 {
913 if (operand_equal_for_phi_arg_p (arg_true, smaller)
914 && operand_equal_for_phi_arg_p (arg_false, larger))
915 {
916 /* Case
48e1416a 917
194899bf 918 if (smaller < larger)
919 rslt = smaller;
920 else
921 rslt = larger; */
922 minmax = MIN_EXPR;
923 }
924 else if (operand_equal_for_phi_arg_p (arg_false, smaller)
925 && operand_equal_for_phi_arg_p (arg_true, larger))
926 minmax = MAX_EXPR;
927 else
928 return false;
929 }
930 else
931 {
932 /* Recognize the following case, assuming d <= u:
933
934 if (a <= u)
935 b = MAX (a, d);
936 x = PHI <b, u>
937
938 This is equivalent to
939
940 b = MAX (a, d);
941 x = MIN (b, u); */
942
75a70cf9 943 gimple assign = last_and_only_stmt (middle_bb);
944 tree lhs, op0, op1, bound;
194899bf 945
946 if (!assign
75a70cf9 947 || gimple_code (assign) != GIMPLE_ASSIGN)
194899bf 948 return false;
949
75a70cf9 950 lhs = gimple_assign_lhs (assign);
951 ass_code = gimple_assign_rhs_code (assign);
194899bf 952 if (ass_code != MAX_EXPR && ass_code != MIN_EXPR)
953 return false;
75a70cf9 954 op0 = gimple_assign_rhs1 (assign);
955 op1 = gimple_assign_rhs2 (assign);
194899bf 956
957 if (true_edge->src == middle_bb)
958 {
959 /* We got here if the condition is true, i.e., SMALLER < LARGER. */
960 if (!operand_equal_for_phi_arg_p (lhs, arg_true))
961 return false;
962
963 if (operand_equal_for_phi_arg_p (arg_false, larger))
964 {
965 /* Case
966
967 if (smaller < larger)
968 {
969 r' = MAX_EXPR (smaller, bound)
970 }
971 r = PHI <r', larger> --> to be turned to MIN_EXPR. */
972 if (ass_code != MAX_EXPR)
973 return false;
974
975 minmax = MIN_EXPR;
976 if (operand_equal_for_phi_arg_p (op0, smaller))
977 bound = op1;
978 else if (operand_equal_for_phi_arg_p (op1, smaller))
979 bound = op0;
980 else
981 return false;
982
983 /* We need BOUND <= LARGER. */
49d00087 984 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
985 bound, larger)))
194899bf 986 return false;
987 }
988 else if (operand_equal_for_phi_arg_p (arg_false, smaller))
989 {
990 /* Case
991
992 if (smaller < larger)
993 {
994 r' = MIN_EXPR (larger, bound)
995 }
996 r = PHI <r', smaller> --> to be turned to MAX_EXPR. */
997 if (ass_code != MIN_EXPR)
998 return false;
999
1000 minmax = MAX_EXPR;
1001 if (operand_equal_for_phi_arg_p (op0, larger))
1002 bound = op1;
1003 else if (operand_equal_for_phi_arg_p (op1, larger))
1004 bound = op0;
1005 else
1006 return false;
1007
1008 /* We need BOUND >= SMALLER. */
49d00087 1009 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
1010 bound, smaller)))
194899bf 1011 return false;
1012 }
1013 else
1014 return false;
1015 }
1016 else
1017 {
1018 /* We got here if the condition is false, i.e., SMALLER > LARGER. */
1019 if (!operand_equal_for_phi_arg_p (lhs, arg_false))
1020 return false;
1021
1022 if (operand_equal_for_phi_arg_p (arg_true, larger))
1023 {
1024 /* Case
1025
1026 if (smaller > larger)
1027 {
1028 r' = MIN_EXPR (smaller, bound)
1029 }
1030 r = PHI <r', larger> --> to be turned to MAX_EXPR. */
1031 if (ass_code != MIN_EXPR)
1032 return false;
1033
1034 minmax = MAX_EXPR;
1035 if (operand_equal_for_phi_arg_p (op0, smaller))
1036 bound = op1;
1037 else if (operand_equal_for_phi_arg_p (op1, smaller))
1038 bound = op0;
1039 else
1040 return false;
1041
1042 /* We need BOUND >= LARGER. */
49d00087 1043 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
1044 bound, larger)))
194899bf 1045 return false;
1046 }
1047 else if (operand_equal_for_phi_arg_p (arg_true, smaller))
1048 {
1049 /* Case
1050
1051 if (smaller > larger)
1052 {
1053 r' = MAX_EXPR (larger, bound)
1054 }
1055 r = PHI <r', smaller> --> to be turned to MIN_EXPR. */
1056 if (ass_code != MAX_EXPR)
1057 return false;
1058
1059 minmax = MIN_EXPR;
1060 if (operand_equal_for_phi_arg_p (op0, larger))
1061 bound = op1;
1062 else if (operand_equal_for_phi_arg_p (op1, larger))
1063 bound = op0;
1064 else
1065 return false;
1066
1067 /* We need BOUND <= SMALLER. */
49d00087 1068 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
1069 bound, smaller)))
194899bf 1070 return false;
1071 }
1072 else
1073 return false;
1074 }
1075
1076 /* Move the statement from the middle block. */
75a70cf9 1077 gsi = gsi_last_bb (cond_bb);
445a6ba5 1078 gsi_from = gsi_last_nondebug_bb (middle_bb);
75a70cf9 1079 gsi_move_before (&gsi_from, &gsi);
194899bf 1080 }
1081
1082 /* Emit the statement to compute min/max. */
1083 result = duplicate_ssa_name (PHI_RESULT (phi), NULL);
75a70cf9 1084 new_stmt = gimple_build_assign_with_ops (minmax, result, arg0, arg1);
1085 gsi = gsi_last_bb (cond_bb);
1086 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
194899bf 1087
a4844041 1088 replace_phi_edge_with_variable (cond_bb, e1, phi, result);
194899bf 1089 return true;
1090}
1091
70512b93 1092/* The function absolute_replacement does the main work of doing the absolute
1093 replacement. Return true if the replacement is done. Otherwise return
1094 false.
1095 bb is the basic block where the replacement is going to be done on. arg0
f7f07c95 1096 is argument 0 from the phi. Likewise for arg1. */
33784d89 1097
70512b93 1098static bool
33784d89 1099abs_replacement (basic_block cond_bb, basic_block middle_bb,
a4844041 1100 edge e0 ATTRIBUTE_UNUSED, edge e1,
75a70cf9 1101 gimple phi, tree arg0, tree arg1)
70512b93 1102{
1103 tree result;
75a70cf9 1104 gimple new_stmt, cond;
1105 gimple_stmt_iterator gsi;
70512b93 1106 edge true_edge, false_edge;
75a70cf9 1107 gimple assign;
70512b93 1108 edge e;
194899bf 1109 tree rhs, lhs;
70512b93 1110 bool negate;
1111 enum tree_code cond_code;
1112
1113 /* If the type says honor signed zeros we cannot do this
dac49aa5 1114 optimization. */
70512b93 1115 if (HONOR_SIGNED_ZEROS (TYPE_MODE (TREE_TYPE (arg1))))
1116 return false;
1117
70512b93 1118 /* OTHER_BLOCK must have only one executable statement which must have the
1119 form arg0 = -arg1 or arg1 = -arg0. */
70512b93 1120
194899bf 1121 assign = last_and_only_stmt (middle_bb);
70512b93 1122 /* If we did not find the proper negation assignment, then we can not
1123 optimize. */
1124 if (assign == NULL)
1125 return false;
48e1416a 1126
194899bf 1127 /* If we got here, then we have found the only executable statement
1128 in OTHER_BLOCK. If it is anything other than arg = -arg1 or
1129 arg1 = -arg0, then we can not optimize. */
75a70cf9 1130 if (gimple_code (assign) != GIMPLE_ASSIGN)
194899bf 1131 return false;
1132
75a70cf9 1133 lhs = gimple_assign_lhs (assign);
194899bf 1134
75a70cf9 1135 if (gimple_assign_rhs_code (assign) != NEGATE_EXPR)
194899bf 1136 return false;
1137
75a70cf9 1138 rhs = gimple_assign_rhs1 (assign);
48e1416a 1139
194899bf 1140 /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
1141 if (!(lhs == arg0 && rhs == arg1)
1142 && !(lhs == arg1 && rhs == arg0))
1143 return false;
70512b93 1144
75a70cf9 1145 cond = last_stmt (cond_bb);
70512b93 1146 result = PHI_RESULT (phi);
1147
1148 /* Only relationals comparing arg[01] against zero are interesting. */
75a70cf9 1149 cond_code = gimple_cond_code (cond);
70512b93 1150 if (cond_code != GT_EXPR && cond_code != GE_EXPR
1151 && cond_code != LT_EXPR && cond_code != LE_EXPR)
1152 return false;
1153
dac49aa5 1154 /* Make sure the conditional is arg[01] OP y. */
75a70cf9 1155 if (gimple_cond_lhs (cond) != rhs)
70512b93 1156 return false;
1157
75a70cf9 1158 if (FLOAT_TYPE_P (TREE_TYPE (gimple_cond_rhs (cond)))
1159 ? real_zerop (gimple_cond_rhs (cond))
1160 : integer_zerop (gimple_cond_rhs (cond)))
70512b93 1161 ;
1162 else
1163 return false;
1164
1165 /* We need to know which is the true edge and which is the false
1166 edge so that we know if have abs or negative abs. */
33784d89 1167 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
70512b93 1168
1169 /* For GT_EXPR/GE_EXPR, if the true edge goes to OTHER_BLOCK, then we
1170 will need to negate the result. Similarly for LT_EXPR/LE_EXPR if
1171 the false edge goes to OTHER_BLOCK. */
1172 if (cond_code == GT_EXPR || cond_code == GE_EXPR)
1173 e = true_edge;
1174 else
1175 e = false_edge;
20e5647c 1176
33784d89 1177 if (e->dest == middle_bb)
70512b93 1178 negate = true;
1179 else
1180 negate = false;
20e5647c 1181
33784d89 1182 result = duplicate_ssa_name (result, NULL);
20e5647c 1183
70512b93 1184 if (negate)
03d37e4e 1185 lhs = make_ssa_name (TREE_TYPE (result), NULL);
70512b93 1186 else
1187 lhs = result;
1188
dac49aa5 1189 /* Build the modify expression with abs expression. */
75a70cf9 1190 new_stmt = gimple_build_assign_with_ops (ABS_EXPR, lhs, rhs, NULL);
70512b93 1191
75a70cf9 1192 gsi = gsi_last_bb (cond_bb);
1193 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
70512b93 1194
1195 if (negate)
1196 {
75a70cf9 1197 /* Get the right GSI. We want to insert after the recently
70512b93 1198 added ABS_EXPR statement (which we know is the first statement
1199 in the block. */
75a70cf9 1200 new_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, result, lhs, NULL);
70512b93 1201
75a70cf9 1202 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
70512b93 1203 }
20e5647c 1204
a4844041 1205 replace_phi_edge_with_variable (cond_bb, e1, phi, result);
70512b93 1206
1207 /* Note that we optimized this PHI. */
1208 return true;
1209}
1210
e6d0e152 1211/* Auxiliary functions to determine the set of memory accesses which
1212 can't trap because they are preceded by accesses to the same memory
182cf5a9 1213 portion. We do that for MEM_REFs, so we only need to track
e6d0e152 1214 the SSA_NAME of the pointer indirectly referenced. The algorithm
1215 simply is a walk over all instructions in dominator order. When
182cf5a9 1216 we see an MEM_REF we determine if we've already seen a same
e6d0e152 1217 ref anywhere up to the root of the dominator tree. If we do the
af4f74fa 1218 current access can't trap. If we don't see any dominating access
e6d0e152 1219 the current access might trap, but might also make later accesses
af4f74fa 1220 non-trapping, so we remember it. We need to be careful with loads
1221 or stores, for instance a load might not trap, while a store would,
1222 so if we see a dominating read access this doesn't mean that a later
1223 write access would not trap. Hence we also need to differentiate the
1224 type of access(es) seen.
1225
1226 ??? We currently are very conservative and assume that a load might
1227 trap even if a store doesn't (write-only memory). This probably is
1228 overly conservative. */
e6d0e152 1229
182cf5a9 1230/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF
e6d0e152 1231 through it was seen, which would constitute a no-trap region for
1232 same accesses. */
1233struct name_to_bb
1234{
963aee26 1235 unsigned int ssa_name_ver;
42540642 1236 unsigned int phase;
963aee26 1237 bool store;
1238 HOST_WIDE_INT offset, size;
e6d0e152 1239 basic_block bb;
1240};
1241
1242/* The hash table for remembering what we've seen. */
1243static htab_t seen_ssa_names;
1244
42540642 1245/* Used for quick clearing of the hash-table when we see calls.
1246 Hash entries with phase < nt_call_phase are invalid. */
1247static unsigned int nt_call_phase;
1248
182cf5a9 1249/* The set of MEM_REFs which can't trap. */
e6d0e152 1250static struct pointer_set_t *nontrap_set;
1251
963aee26 1252/* The hash function. */
e6d0e152 1253static hashval_t
1254name_to_bb_hash (const void *p)
1255{
963aee26 1256 const struct name_to_bb *n = (const struct name_to_bb *) p;
1257 return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)
1258 ^ (n->offset << 6) ^ (n->size << 3);
e6d0e152 1259}
1260
963aee26 1261/* The equality function of *P1 and *P2. */
e6d0e152 1262static int
1263name_to_bb_eq (const void *p1, const void *p2)
1264{
af4f74fa 1265 const struct name_to_bb *n1 = (const struct name_to_bb *)p1;
1266 const struct name_to_bb *n2 = (const struct name_to_bb *)p2;
e6d0e152 1267
963aee26 1268 return n1->ssa_name_ver == n2->ssa_name_ver
1269 && n1->store == n2->store
1270 && n1->offset == n2->offset
1271 && n1->size == n2->size;
e6d0e152 1272}
1273
f0b5f617 1274/* We see the expression EXP in basic block BB. If it's an interesting
182cf5a9 1275 expression (an MEM_REF through an SSA_NAME) possibly insert the
af4f74fa 1276 expression into the set NONTRAP or the hash table of seen expressions.
1277 STORE is true if this expression is on the LHS, otherwise it's on
1278 the RHS. */
e6d0e152 1279static void
af4f74fa 1280add_or_mark_expr (basic_block bb, tree exp,
1281 struct pointer_set_t *nontrap, bool store)
e6d0e152 1282{
963aee26 1283 HOST_WIDE_INT size;
1284
182cf5a9 1285 if (TREE_CODE (exp) == MEM_REF
963aee26 1286 && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME
1287 && host_integerp (TREE_OPERAND (exp, 1), 0)
1288 && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)
e6d0e152 1289 {
1290 tree name = TREE_OPERAND (exp, 0);
1291 struct name_to_bb map;
1292 void **slot;
af4f74fa 1293 struct name_to_bb *n2bb;
e6d0e152 1294 basic_block found_bb = 0;
1295
182cf5a9 1296 /* Try to find the last seen MEM_REF through the same
e6d0e152 1297 SSA_NAME, which can trap. */
963aee26 1298 map.ssa_name_ver = SSA_NAME_VERSION (name);
42540642 1299 map.phase = 0;
e6d0e152 1300 map.bb = 0;
af4f74fa 1301 map.store = store;
963aee26 1302 map.offset = tree_low_cst (TREE_OPERAND (exp, 1), 0);
1303 map.size = size;
1304
e6d0e152 1305 slot = htab_find_slot (seen_ssa_names, &map, INSERT);
af4f74fa 1306 n2bb = (struct name_to_bb *) *slot;
42540642 1307 if (n2bb && n2bb->phase >= nt_call_phase)
af4f74fa 1308 found_bb = n2bb->bb;
e6d0e152 1309
182cf5a9 1310 /* If we've found a trapping MEM_REF, _and_ it dominates EXP
e6d0e152 1311 (it's in a basic block on the path from us to the dominator root)
1312 then we can't trap. */
42540642 1313 if (found_bb && (((size_t)found_bb->aux) & 1) == 1)
e6d0e152 1314 {
1315 pointer_set_insert (nontrap, exp);
1316 }
1317 else
1318 {
1319 /* EXP might trap, so insert it into the hash table. */
af4f74fa 1320 if (n2bb)
e6d0e152 1321 {
42540642 1322 n2bb->phase = nt_call_phase;
af4f74fa 1323 n2bb->bb = bb;
e6d0e152 1324 }
1325 else
1326 {
af4f74fa 1327 n2bb = XNEW (struct name_to_bb);
963aee26 1328 n2bb->ssa_name_ver = SSA_NAME_VERSION (name);
42540642 1329 n2bb->phase = nt_call_phase;
af4f74fa 1330 n2bb->bb = bb;
1331 n2bb->store = store;
963aee26 1332 n2bb->offset = map.offset;
1333 n2bb->size = size;
af4f74fa 1334 *slot = n2bb;
e6d0e152 1335 }
1336 }
1337 }
1338}
1339
42540642 1340/* Return true when CALL is a call stmt that definitely doesn't
1341 free any memory or makes it unavailable otherwise. */
e8d4d8a9 1342bool
42540642 1343nonfreeing_call_p (gimple call)
1344{
1345 if (gimple_call_builtin_p (call, BUILT_IN_NORMAL)
1346 && gimple_call_flags (call) & ECF_LEAF)
1347 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (call)))
1348 {
1349 /* Just in case these become ECF_LEAF in the future. */
1350 case BUILT_IN_FREE:
1351 case BUILT_IN_TM_FREE:
1352 case BUILT_IN_REALLOC:
1353 case BUILT_IN_STACK_RESTORE:
1354 return false;
1355 default:
1356 return true;
1357 }
1358
1359 return false;
1360}
1361
e6d0e152 1362/* Called by walk_dominator_tree, when entering the block BB. */
1363static void
1364nt_init_block (struct dom_walk_data *data ATTRIBUTE_UNUSED, basic_block bb)
1365{
42540642 1366 edge e;
1367 edge_iterator ei;
75a70cf9 1368 gimple_stmt_iterator gsi;
42540642 1369
1370 /* If we haven't seen all our predecessors, clear the hash-table. */
1371 FOR_EACH_EDGE (e, ei, bb->preds)
1372 if ((((size_t)e->src->aux) & 2) == 0)
1373 {
1374 nt_call_phase++;
1375 break;
1376 }
1377
1378 /* Mark this BB as being on the path to dominator root and as visited. */
1379 bb->aux = (void*)(1 | 2);
e6d0e152 1380
1381 /* And walk the statements in order. */
75a70cf9 1382 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
e6d0e152 1383 {
75a70cf9 1384 gimple stmt = gsi_stmt (gsi);
e6d0e152 1385
42540642 1386 if (is_gimple_call (stmt) && !nonfreeing_call_p (stmt))
1387 nt_call_phase++;
1388 else if (gimple_assign_single_p (stmt) && !gimple_has_volatile_ops (stmt))
e6d0e152 1389 {
75a70cf9 1390 add_or_mark_expr (bb, gimple_assign_lhs (stmt), nontrap_set, true);
1391 add_or_mark_expr (bb, gimple_assign_rhs1 (stmt), nontrap_set, false);
e6d0e152 1392 }
1393 }
1394}
1395
1396/* Called by walk_dominator_tree, when basic block BB is exited. */
1397static void
1398nt_fini_block (struct dom_walk_data *data ATTRIBUTE_UNUSED, basic_block bb)
1399{
1400 /* This BB isn't on the path to dominator root anymore. */
42540642 1401 bb->aux = (void*)2;
e6d0e152 1402}
1403
1404/* This is the entry point of gathering non trapping memory accesses.
1405 It will do a dominator walk over the whole function, and it will
1406 make use of the bb->aux pointers. It returns a set of trees
182cf5a9 1407 (the MEM_REFs itself) which can't trap. */
e6d0e152 1408static struct pointer_set_t *
1409get_non_trapping (void)
1410{
1411 struct pointer_set_t *nontrap;
1412 struct dom_walk_data walk_data;
1413
42540642 1414 nt_call_phase = 0;
e6d0e152 1415 nontrap = pointer_set_create ();
1416 seen_ssa_names = htab_create (128, name_to_bb_hash, name_to_bb_eq,
1417 free);
1418 /* We're going to do a dominator walk, so ensure that we have
1419 dominance information. */
1420 calculate_dominance_info (CDI_DOMINATORS);
1421
1422 /* Setup callbacks for the generic dominator tree walker. */
1423 nontrap_set = nontrap;
e6d0e152 1424 walk_data.dom_direction = CDI_DOMINATORS;
1425 walk_data.initialize_block_local_data = NULL;
6bf320fb 1426 walk_data.before_dom_children = nt_init_block;
1427 walk_data.after_dom_children = nt_fini_block;
e6d0e152 1428 walk_data.global_data = NULL;
1429 walk_data.block_local_data_size = 0;
e6d0e152 1430
1431 init_walk_dominator_tree (&walk_data);
1432 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
1433 fini_walk_dominator_tree (&walk_data);
1434 htab_delete (seen_ssa_names);
1435
42540642 1436 clear_aux_for_blocks ();
e6d0e152 1437 return nontrap;
1438}
1439
1440/* Do the main work of conditional store replacement. We already know
1441 that the recognized pattern looks like so:
1442
1443 split:
1444 if (cond) goto MIDDLE_BB; else goto JOIN_BB (edge E1)
1445 MIDDLE_BB:
1446 something
1447 fallthrough (edge E0)
1448 JOIN_BB:
1449 some more
1450
1451 We check that MIDDLE_BB contains only one store, that that store
1452 doesn't trap (not via NOTRAP, but via checking if an access to the same
1453 memory location dominates us) and that the store has a "simple" RHS. */
1454
1455static bool
1456cond_store_replacement (basic_block middle_bb, basic_block join_bb,
1457 edge e0, edge e1, struct pointer_set_t *nontrap)
1458{
75a70cf9 1459 gimple assign = last_and_only_stmt (middle_bb);
03d37e4e 1460 tree lhs, rhs, name, name2;
75a70cf9 1461 gimple newphi, new_stmt;
1462 gimple_stmt_iterator gsi;
efbcb6de 1463 source_location locus;
e6d0e152 1464
1465 /* Check if middle_bb contains of only one store. */
1466 if (!assign
6cc085b6 1467 || !gimple_assign_single_p (assign)
1468 || gimple_has_volatile_ops (assign))
e6d0e152 1469 return false;
1470
efbcb6de 1471 locus = gimple_location (assign);
75a70cf9 1472 lhs = gimple_assign_lhs (assign);
1473 rhs = gimple_assign_rhs1 (assign);
182cf5a9 1474 if (TREE_CODE (lhs) != MEM_REF
91cf53d5 1475 || TREE_CODE (TREE_OPERAND (lhs, 0)) != SSA_NAME
3211fa0a 1476 || !is_gimple_reg_type (TREE_TYPE (lhs)))
e6d0e152 1477 return false;
91cf53d5 1478
e6d0e152 1479 /* Prove that we can move the store down. We could also check
1480 TREE_THIS_NOTRAP here, but in that case we also could move stores,
1481 whose value is not available readily, which we want to avoid. */
1482 if (!pointer_set_contains (nontrap, lhs))
1483 return false;
1484
1485 /* Now we've checked the constraints, so do the transformation:
1486 1) Remove the single store. */
75a70cf9 1487 gsi = gsi_for_stmt (assign);
3211fa0a 1488 unlink_stmt_vdef (assign);
75a70cf9 1489 gsi_remove (&gsi, true);
91cf53d5 1490 release_defs (assign);
e6d0e152 1491
03d37e4e 1492 /* 2) Insert a load from the memory of the store to the temporary
e6d0e152 1493 on the edge which did not contain the store. */
1494 lhs = unshare_expr (lhs);
03d37e4e 1495 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
1496 new_stmt = gimple_build_assign (name, lhs);
efbcb6de 1497 gimple_set_location (new_stmt, locus);
75a70cf9 1498 gsi_insert_on_edge (e1, new_stmt);
e6d0e152 1499
03d37e4e 1500 /* 3) Create a PHI node at the join block, with one argument
e6d0e152 1501 holding the old RHS, and the other holding the temporary
1502 where we stored the old memory contents. */
03d37e4e 1503 name2 = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
1504 newphi = create_phi_node (name2, join_bb);
60d535d2 1505 add_phi_arg (newphi, rhs, e0, locus);
1506 add_phi_arg (newphi, name, e1, locus);
e6d0e152 1507
1508 lhs = unshare_expr (lhs);
75a70cf9 1509 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi));
e6d0e152 1510
03d37e4e 1511 /* 4) Insert that PHI node. */
75a70cf9 1512 gsi = gsi_after_labels (join_bb);
1513 if (gsi_end_p (gsi))
e6d0e152 1514 {
75a70cf9 1515 gsi = gsi_last_bb (join_bb);
1516 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
e6d0e152 1517 }
1518 else
75a70cf9 1519 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
e6d0e152 1520
1521 return true;
1522}
4ee9c684 1523
ec611e12 1524/* Do the main work of conditional store replacement. */
91cf53d5 1525
1526static bool
ec611e12 1527cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb,
1528 basic_block join_bb, gimple then_assign,
1529 gimple else_assign)
91cf53d5 1530{
03d37e4e 1531 tree lhs_base, lhs, then_rhs, else_rhs, name;
91cf53d5 1532 source_location then_locus, else_locus;
1533 gimple_stmt_iterator gsi;
1534 gimple newphi, new_stmt;
1535
91cf53d5 1536 if (then_assign == NULL
1537 || !gimple_assign_single_p (then_assign)
3c25489e 1538 || gimple_clobber_p (then_assign)
6cc085b6 1539 || gimple_has_volatile_ops (then_assign)
91cf53d5 1540 || else_assign == NULL
3c25489e 1541 || !gimple_assign_single_p (else_assign)
6cc085b6 1542 || gimple_clobber_p (else_assign)
1543 || gimple_has_volatile_ops (else_assign))
91cf53d5 1544 return false;
1545
1546 lhs = gimple_assign_lhs (then_assign);
1547 if (!is_gimple_reg_type (TREE_TYPE (lhs))
1548 || !operand_equal_p (lhs, gimple_assign_lhs (else_assign), 0))
1549 return false;
1550
1551 lhs_base = get_base_address (lhs);
1552 if (lhs_base == NULL_TREE
1553 || (!DECL_P (lhs_base) && TREE_CODE (lhs_base) != MEM_REF))
1554 return false;
1555
1556 then_rhs = gimple_assign_rhs1 (then_assign);
1557 else_rhs = gimple_assign_rhs1 (else_assign);
1558 then_locus = gimple_location (then_assign);
1559 else_locus = gimple_location (else_assign);
1560
1561 /* Now we've checked the constraints, so do the transformation:
1562 1) Remove the stores. */
1563 gsi = gsi_for_stmt (then_assign);
1564 unlink_stmt_vdef (then_assign);
1565 gsi_remove (&gsi, true);
1566 release_defs (then_assign);
1567
1568 gsi = gsi_for_stmt (else_assign);
1569 unlink_stmt_vdef (else_assign);
1570 gsi_remove (&gsi, true);
1571 release_defs (else_assign);
1572
03d37e4e 1573 /* 2) Create a PHI node at the join block, with one argument
91cf53d5 1574 holding the old RHS, and the other holding the temporary
1575 where we stored the old memory contents. */
03d37e4e 1576 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
1577 newphi = create_phi_node (name, join_bb);
60d535d2 1578 add_phi_arg (newphi, then_rhs, EDGE_SUCC (then_bb, 0), then_locus);
1579 add_phi_arg (newphi, else_rhs, EDGE_SUCC (else_bb, 0), else_locus);
91cf53d5 1580
1581 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi));
1582
03d37e4e 1583 /* 3) Insert that PHI node. */
91cf53d5 1584 gsi = gsi_after_labels (join_bb);
1585 if (gsi_end_p (gsi))
1586 {
1587 gsi = gsi_last_bb (join_bb);
1588 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1589 }
1590 else
1591 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1592
1593 return true;
1594}
1595
ec611e12 1596/* Conditional store replacement. We already know
1597 that the recognized pattern looks like so:
1598
1599 split:
1600 if (cond) goto THEN_BB; else goto ELSE_BB (edge E1)
1601 THEN_BB:
1602 ...
1603 X = Y;
1604 ...
1605 goto JOIN_BB;
1606 ELSE_BB:
1607 ...
1608 X = Z;
1609 ...
1610 fallthrough (edge E0)
1611 JOIN_BB:
1612 some more
1613
1614 We check that it is safe to sink the store to JOIN_BB by verifying that
1615 there are no read-after-write or write-after-write dependencies in
1616 THEN_BB and ELSE_BB. */
1617
1618static bool
1619cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb,
1620 basic_block join_bb)
1621{
1622 gimple then_assign = last_and_only_stmt (then_bb);
1623 gimple else_assign = last_and_only_stmt (else_bb);
f1f41a6c 1624 vec<data_reference_p> then_datarefs, else_datarefs;
1625 vec<ddr_p> then_ddrs, else_ddrs;
ec611e12 1626 gimple then_store, else_store;
1627 bool found, ok = false, res;
1628 struct data_dependence_relation *ddr;
1629 data_reference_p then_dr, else_dr;
1630 int i, j;
1631 tree then_lhs, else_lhs;
f1f41a6c 1632 vec<gimple> then_stores, else_stores;
ec611e12 1633 basic_block blocks[3];
1634
1635 if (MAX_STORES_TO_SINK == 0)
1636 return false;
1637
1638 /* Handle the case with single statement in THEN_BB and ELSE_BB. */
1639 if (then_assign && else_assign)
1640 return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
1641 then_assign, else_assign);
1642
1643 /* Find data references. */
f1f41a6c 1644 then_datarefs.create (1);
1645 else_datarefs.create (1);
ec611e12 1646 if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs)
1647 == chrec_dont_know)
f1f41a6c 1648 || !then_datarefs.length ()
ec611e12 1649 || (find_data_references_in_bb (NULL, else_bb, &else_datarefs)
1650 == chrec_dont_know)
f1f41a6c 1651 || !else_datarefs.length ())
ec611e12 1652 {
1653 free_data_refs (then_datarefs);
1654 free_data_refs (else_datarefs);
1655 return false;
1656 }
1657
1658 /* Find pairs of stores with equal LHS. */
f1f41a6c 1659 then_stores.create (1);
1660 else_stores.create (1);
1661 FOR_EACH_VEC_ELT (then_datarefs, i, then_dr)
ec611e12 1662 {
1663 if (DR_IS_READ (then_dr))
1664 continue;
1665
1666 then_store = DR_STMT (then_dr);
728dcc71 1667 then_lhs = gimple_get_lhs (then_store);
ec611e12 1668 found = false;
1669
f1f41a6c 1670 FOR_EACH_VEC_ELT (else_datarefs, j, else_dr)
ec611e12 1671 {
1672 if (DR_IS_READ (else_dr))
1673 continue;
1674
1675 else_store = DR_STMT (else_dr);
728dcc71 1676 else_lhs = gimple_get_lhs (else_store);
ec611e12 1677
1678 if (operand_equal_p (then_lhs, else_lhs, 0))
1679 {
1680 found = true;
1681 break;
1682 }
1683 }
1684
1685 if (!found)
1686 continue;
1687
f1f41a6c 1688 then_stores.safe_push (then_store);
1689 else_stores.safe_push (else_store);
ec611e12 1690 }
1691
1692 /* No pairs of stores found. */
f1f41a6c 1693 if (!then_stores.length ()
1694 || then_stores.length () > (unsigned) MAX_STORES_TO_SINK)
ec611e12 1695 {
1696 free_data_refs (then_datarefs);
1697 free_data_refs (else_datarefs);
f1f41a6c 1698 then_stores.release ();
1699 else_stores.release ();
ec611e12 1700 return false;
1701 }
1702
1703 /* Compute and check data dependencies in both basic blocks. */
f1f41a6c 1704 then_ddrs.create (1);
1705 else_ddrs.create (1);
1706 if (!compute_all_dependences (then_datarefs, &then_ddrs,
1e094109 1707 vNULL, false)
f1f41a6c 1708 || !compute_all_dependences (else_datarefs, &else_ddrs,
1e094109 1709 vNULL, false))
8b3fb720 1710 {
1711 free_dependence_relations (then_ddrs);
1712 free_dependence_relations (else_ddrs);
1713 free_data_refs (then_datarefs);
1714 free_data_refs (else_datarefs);
f1f41a6c 1715 then_stores.release ();
1716 else_stores.release ();
8b3fb720 1717 return false;
1718 }
ec611e12 1719 blocks[0] = then_bb;
1720 blocks[1] = else_bb;
1721 blocks[2] = join_bb;
1722 renumber_gimple_stmt_uids_in_blocks (blocks, 3);
1723
1724 /* Check that there are no read-after-write or write-after-write dependencies
1725 in THEN_BB. */
f1f41a6c 1726 FOR_EACH_VEC_ELT (then_ddrs, i, ddr)
ec611e12 1727 {
1728 struct data_reference *dra = DDR_A (ddr);
1729 struct data_reference *drb = DDR_B (ddr);
1730
1731 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
1732 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
1733 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
1734 || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
1735 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
1736 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
1737 {
1738 free_dependence_relations (then_ddrs);
1739 free_dependence_relations (else_ddrs);
2473bfb7 1740 free_data_refs (then_datarefs);
1741 free_data_refs (else_datarefs);
f1f41a6c 1742 then_stores.release ();
1743 else_stores.release ();
ec611e12 1744 return false;
1745 }
1746 }
1747
1748 /* Check that there are no read-after-write or write-after-write dependencies
1749 in ELSE_BB. */
f1f41a6c 1750 FOR_EACH_VEC_ELT (else_ddrs, i, ddr)
ec611e12 1751 {
1752 struct data_reference *dra = DDR_A (ddr);
1753 struct data_reference *drb = DDR_B (ddr);
1754
1755 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
1756 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
1757 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
1758 || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
1759 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
1760 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
1761 {
1762 free_dependence_relations (then_ddrs);
1763 free_dependence_relations (else_ddrs);
2473bfb7 1764 free_data_refs (then_datarefs);
1765 free_data_refs (else_datarefs);
f1f41a6c 1766 then_stores.release ();
1767 else_stores.release ();
ec611e12 1768 return false;
1769 }
1770 }
1771
1772 /* Sink stores with same LHS. */
f1f41a6c 1773 FOR_EACH_VEC_ELT (then_stores, i, then_store)
ec611e12 1774 {
f1f41a6c 1775 else_store = else_stores[i];
ec611e12 1776 res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
1777 then_store, else_store);
1778 ok = ok || res;
1779 }
1780
1781 free_dependence_relations (then_ddrs);
1782 free_dependence_relations (else_ddrs);
2473bfb7 1783 free_data_refs (then_datarefs);
1784 free_data_refs (else_datarefs);
f1f41a6c 1785 then_stores.release ();
1786 else_stores.release ();
ec611e12 1787
1788 return ok;
1789}
1790
239e9670 1791/* Return TRUE if STMT has a VUSE whose corresponding VDEF is in BB. */
1792
1793static bool
1794local_mem_dependence (gimple stmt, basic_block bb)
1795{
1796 tree vuse = gimple_vuse (stmt);
1797 gimple def;
1798
1799 if (!vuse)
1800 return false;
1801
1802 def = SSA_NAME_DEF_STMT (vuse);
1803 return (def && gimple_bb (def) == bb);
1804}
1805
1806/* Given a "diamond" control-flow pattern where BB0 tests a condition,
1807 BB1 and BB2 are "then" and "else" blocks dependent on this test,
1808 and BB3 rejoins control flow following BB1 and BB2, look for
1809 opportunities to hoist loads as follows. If BB3 contains a PHI of
1810 two loads, one each occurring in BB1 and BB2, and the loads are
1811 provably of adjacent fields in the same structure, then move both
1812 loads into BB0. Of course this can only be done if there are no
1813 dependencies preventing such motion.
1814
1815 One of the hoisted loads will always be speculative, so the
1816 transformation is currently conservative:
1817
1818 - The fields must be strictly adjacent.
1819 - The two fields must occupy a single memory block that is
1820 guaranteed to not cross a page boundary.
1821
1822 The last is difficult to prove, as such memory blocks should be
1823 aligned on the minimum of the stack alignment boundary and the
1824 alignment guaranteed by heap allocation interfaces. Thus we rely
1825 on a parameter for the alignment value.
1826
1827 Provided a good value is used for the last case, the first
1828 restriction could possibly be relaxed. */
1829
1830static void
1831hoist_adjacent_loads (basic_block bb0, basic_block bb1,
1832 basic_block bb2, basic_block bb3)
1833{
1834 int param_align = PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE);
1835 unsigned param_align_bits = (unsigned) (param_align * BITS_PER_UNIT);
1836 gimple_stmt_iterator gsi;
1837
1838 /* Walk the phis in bb3 looking for an opportunity. We are looking
1839 for phis of two SSA names, one each of which is defined in bb1 and
1840 bb2. */
1841 for (gsi = gsi_start_phis (bb3); !gsi_end_p (gsi); gsi_next (&gsi))
1842 {
1843 gimple phi_stmt = gsi_stmt (gsi);
1844 gimple def1, def2, defswap;
1845 tree arg1, arg2, ref1, ref2, field1, field2, fieldswap;
1846 tree tree_offset1, tree_offset2, tree_size2, next;
1847 int offset1, offset2, size2;
1848 unsigned align1;
1849 gimple_stmt_iterator gsi2;
1850 basic_block bb_for_def1, bb_for_def2;
1851
7c782c9b 1852 if (gimple_phi_num_args (phi_stmt) != 2
1853 || virtual_operand_p (gimple_phi_result (phi_stmt)))
239e9670 1854 continue;
1855
1856 arg1 = gimple_phi_arg_def (phi_stmt, 0);
1857 arg2 = gimple_phi_arg_def (phi_stmt, 1);
1858
1859 if (TREE_CODE (arg1) != SSA_NAME
1860 || TREE_CODE (arg2) != SSA_NAME
1861 || SSA_NAME_IS_DEFAULT_DEF (arg1)
7c782c9b 1862 || SSA_NAME_IS_DEFAULT_DEF (arg2))
239e9670 1863 continue;
1864
1865 def1 = SSA_NAME_DEF_STMT (arg1);
1866 def2 = SSA_NAME_DEF_STMT (arg2);
1867
1868 if ((gimple_bb (def1) != bb1 || gimple_bb (def2) != bb2)
1869 && (gimple_bb (def2) != bb1 || gimple_bb (def1) != bb2))
1870 continue;
1871
1872 /* Check the mode of the arguments to be sure a conditional move
1873 can be generated for it. */
935611bc 1874 if (optab_handler (movcc_optab, TYPE_MODE (TREE_TYPE (arg1)))
1875 == CODE_FOR_nothing)
239e9670 1876 continue;
1877
1878 /* Both statements must be assignments whose RHS is a COMPONENT_REF. */
1879 if (!gimple_assign_single_p (def1)
6cc085b6 1880 || !gimple_assign_single_p (def2)
1881 || gimple_has_volatile_ops (def1)
1882 || gimple_has_volatile_ops (def2))
239e9670 1883 continue;
1884
1885 ref1 = gimple_assign_rhs1 (def1);
1886 ref2 = gimple_assign_rhs1 (def2);
1887
1888 if (TREE_CODE (ref1) != COMPONENT_REF
1889 || TREE_CODE (ref2) != COMPONENT_REF)
1890 continue;
1891
1892 /* The zeroth operand of the two component references must be
1893 identical. It is not sufficient to compare get_base_address of
1894 the two references, because this could allow for different
1895 elements of the same array in the two trees. It is not safe to
1896 assume that the existence of one array element implies the
1897 existence of a different one. */
1898 if (!operand_equal_p (TREE_OPERAND (ref1, 0), TREE_OPERAND (ref2, 0), 0))
1899 continue;
1900
1901 field1 = TREE_OPERAND (ref1, 1);
1902 field2 = TREE_OPERAND (ref2, 1);
1903
1904 /* Check for field adjacency, and ensure field1 comes first. */
1905 for (next = DECL_CHAIN (field1);
1906 next && TREE_CODE (next) != FIELD_DECL;
1907 next = DECL_CHAIN (next))
1908 ;
1909
1910 if (next != field2)
1911 {
1912 for (next = DECL_CHAIN (field2);
1913 next && TREE_CODE (next) != FIELD_DECL;
1914 next = DECL_CHAIN (next))
1915 ;
1916
1917 if (next != field1)
1918 continue;
1919
1920 fieldswap = field1;
1921 field1 = field2;
1922 field2 = fieldswap;
1923 defswap = def1;
1924 def1 = def2;
1925 def2 = defswap;
239e9670 1926 }
1927
7c74ee50 1928 bb_for_def1 = gimple_bb (def1);
1929 bb_for_def2 = gimple_bb (def2);
1930
239e9670 1931 /* Check for proper alignment of the first field. */
1932 tree_offset1 = bit_position (field1);
1933 tree_offset2 = bit_position (field2);
1934 tree_size2 = DECL_SIZE (field2);
1935
1936 if (!host_integerp (tree_offset1, 1)
1937 || !host_integerp (tree_offset2, 1)
1938 || !host_integerp (tree_size2, 1))
1939 continue;
1940
1941 offset1 = TREE_INT_CST_LOW (tree_offset1);
1942 offset2 = TREE_INT_CST_LOW (tree_offset2);
1943 size2 = TREE_INT_CST_LOW (tree_size2);
1944 align1 = DECL_ALIGN (field1) % param_align_bits;
1945
1946 if (offset1 % BITS_PER_UNIT != 0)
1947 continue;
1948
1949 /* For profitability, the two field references should fit within
1950 a single cache line. */
1951 if (align1 + offset2 - offset1 + size2 > param_align_bits)
1952 continue;
1953
1954 /* The two expressions cannot be dependent upon vdefs defined
1955 in bb1/bb2. */
1956 if (local_mem_dependence (def1, bb_for_def1)
1957 || local_mem_dependence (def2, bb_for_def2))
1958 continue;
1959
1960 /* The conditions are satisfied; hoist the loads from bb1 and bb2 into
1961 bb0. We hoist the first one first so that a cache miss is handled
1962 efficiently regardless of hardware cache-fill policy. */
1963 gsi2 = gsi_for_stmt (def1);
1964 gsi_move_to_bb_end (&gsi2, bb0);
1965 gsi2 = gsi_for_stmt (def2);
1966 gsi_move_to_bb_end (&gsi2, bb0);
1967
1968 if (dump_file && (dump_flags & TDF_DETAILS))
1969 {
1970 fprintf (dump_file,
1971 "\nHoisting adjacent loads from %d and %d into %d: \n",
1972 bb_for_def1->index, bb_for_def2->index, bb0->index);
1973 print_gimple_stmt (dump_file, def1, 0, TDF_VOPS|TDF_MEMSYMS);
1974 print_gimple_stmt (dump_file, def2, 0, TDF_VOPS|TDF_MEMSYMS);
1975 }
1976 }
1977}
1978
1979/* Determine whether we should attempt to hoist adjacent loads out of
1980 diamond patterns in pass_phiopt. Always hoist loads if
1981 -fhoist-adjacent-loads is specified and the target machine has
6f0ddab1 1982 both a conditional move instruction and a defined cache line size. */
239e9670 1983
1984static bool
1985gate_hoist_loads (void)
1986{
6f0ddab1 1987 return (flag_hoist_adjacent_loads == 1
1988 && PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE)
1989 && HAVE_conditional_move);
239e9670 1990}
1991
4ee9c684 1992/* Always do these optimizations if we have SSA
20e5647c 1993 trees to work on. */
4ee9c684 1994static bool
1995gate_phiopt (void)
1996{
1997 return 1;
1998}
20e5647c 1999
20099e35 2000struct gimple_opt_pass pass_phiopt =
4ee9c684 2001{
20099e35 2002 {
2003 GIMPLE_PASS,
4ee9c684 2004 "phiopt", /* name */
c7875731 2005 OPTGROUP_NONE, /* optinfo_flags */
4ee9c684 2006 gate_phiopt, /* gate */
2007 tree_ssa_phiopt, /* execute */
2008 NULL, /* sub */
2009 NULL, /* next */
2010 0, /* static_pass_number */
2011 TV_TREE_PHIOPT, /* tv_id */
2f8eb909 2012 PROP_cfg | PROP_ssa, /* properties_required */
4ee9c684 2013 0, /* properties_provided */
2014 0, /* properties_destroyed */
2015 0, /* todo_flags_start */
771e2890 2016 TODO_ggc_collect
88dbf20f 2017 | TODO_verify_ssa
88dbf20f 2018 | TODO_verify_flow
20099e35 2019 | TODO_verify_stmts /* todo_flags_finish */
2020 }
4ee9c684 2021};
e6d0e152 2022
2023static bool
2024gate_cselim (void)
2025{
2026 return flag_tree_cselim;
2027}
2028
20099e35 2029struct gimple_opt_pass pass_cselim =
e6d0e152 2030{
20099e35 2031 {
2032 GIMPLE_PASS,
e6d0e152 2033 "cselim", /* name */
c7875731 2034 OPTGROUP_NONE, /* optinfo_flags */
e6d0e152 2035 gate_cselim, /* gate */
2036 tree_ssa_cs_elim, /* execute */
2037 NULL, /* sub */
2038 NULL, /* next */
2039 0, /* static_pass_number */
2040 TV_TREE_PHIOPT, /* tv_id */
2f8eb909 2041 PROP_cfg | PROP_ssa, /* properties_required */
e6d0e152 2042 0, /* properties_provided */
2043 0, /* properties_destroyed */
2044 0, /* todo_flags_start */
771e2890 2045 TODO_ggc_collect
e6d0e152 2046 | TODO_verify_ssa
2047 | TODO_verify_flow
20099e35 2048 | TODO_verify_stmts /* todo_flags_finish */
2049 }
e6d0e152 2050};