]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-phiopt.c
genattrtab.c (write_header): Include hash-set.h...
[thirdparty/gcc.git] / gcc / tree-ssa-phiopt.c
1 /* Optimization of PHI nodes by converting them into straightline code.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "hash-table.h"
24 #include "tm.h"
25 #include "hash-set.h"
26 #include "machmode.h"
27 #include "vec.h"
28 #include "double-int.h"
29 #include "input.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "wide-int.h"
33 #include "inchash.h"
34 #include "tree.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "flags.h"
38 #include "tm_p.h"
39 #include "predict.h"
40 #include "hard-reg-set.h"
41 #include "input.h"
42 #include "function.h"
43 #include "dominance.h"
44 #include "cfg.h"
45 #include "cfganal.h"
46 #include "basic-block.h"
47 #include "tree-ssa-alias.h"
48 #include "internal-fn.h"
49 #include "gimple-expr.h"
50 #include "is-a.h"
51 #include "gimple.h"
52 #include "gimplify.h"
53 #include "gimple-iterator.h"
54 #include "gimplify-me.h"
55 #include "gimple-ssa.h"
56 #include "tree-cfg.h"
57 #include "tree-phinodes.h"
58 #include "ssa-iterators.h"
59 #include "stringpool.h"
60 #include "tree-ssanames.h"
61 #include "expr.h"
62 #include "tree-dfa.h"
63 #include "tree-pass.h"
64 #include "langhooks.h"
65 #include "domwalk.h"
66 #include "cfgloop.h"
67 #include "tree-data-ref.h"
68 #include "gimple-pretty-print.h"
69 #include "insn-config.h"
70 #include "expr.h"
71 #include "insn-codes.h"
72 #include "optabs.h"
73 #include "tree-scalar-evolution.h"
74 #include "tree-inline.h"
75
76 #ifndef HAVE_conditional_move
77 #define HAVE_conditional_move (0)
78 #endif
79
80 static unsigned int tree_ssa_phiopt_worker (bool, bool);
81 static bool conditional_replacement (basic_block, basic_block,
82 edge, edge, gphi *, tree, tree);
83 static int value_replacement (basic_block, basic_block,
84 edge, edge, gimple, tree, tree);
85 static bool minmax_replacement (basic_block, basic_block,
86 edge, edge, gimple, tree, tree);
87 static bool abs_replacement (basic_block, basic_block,
88 edge, edge, gimple, tree, tree);
89 static bool neg_replacement (basic_block, basic_block,
90 edge, edge, gimple, tree, tree);
91 static bool cond_store_replacement (basic_block, basic_block, edge, edge,
92 hash_set<tree> *);
93 static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
94 static hash_set<tree> * get_non_trapping ();
95 static void replace_phi_edge_with_variable (basic_block, edge, gimple, tree);
96 static void hoist_adjacent_loads (basic_block, basic_block,
97 basic_block, basic_block);
98 static bool gate_hoist_loads (void);
99
100 /* This pass tries to transform conditional stores into unconditional
101 ones, enabling further simplifications with the simpler then and else
102 blocks. In particular it replaces this:
103
104 bb0:
105 if (cond) goto bb2; else goto bb1;
106 bb1:
107 *p = RHS;
108 bb2:
109
110 with
111
112 bb0:
113 if (cond) goto bb1; else goto bb2;
114 bb1:
115 condtmp' = *p;
116 bb2:
117 condtmp = PHI <RHS, condtmp'>
118 *p = condtmp;
119
120 This transformation can only be done under several constraints,
121 documented below. It also replaces:
122
123 bb0:
124 if (cond) goto bb2; else goto bb1;
125 bb1:
126 *p = RHS1;
127 goto bb3;
128 bb2:
129 *p = RHS2;
130 bb3:
131
132 with
133
134 bb0:
135 if (cond) goto bb3; else goto bb1;
136 bb1:
137 bb3:
138 condtmp = PHI <RHS1, RHS2>
139 *p = condtmp; */
140
141 static unsigned int
142 tree_ssa_cs_elim (void)
143 {
144 unsigned todo;
145 /* ??? We are not interested in loop related info, but the following
146 will create it, ICEing as we didn't init loops with pre-headers.
147 An interfacing issue of find_data_references_in_bb. */
148 loop_optimizer_init (LOOPS_NORMAL);
149 scev_initialize ();
150 todo = tree_ssa_phiopt_worker (true, false);
151 scev_finalize ();
152 loop_optimizer_finalize ();
153 return todo;
154 }
155
156 /* Return the singleton PHI in the SEQ of PHIs for edges E0 and E1. */
157
158 static gphi *
159 single_non_singleton_phi_for_edges (gimple_seq seq, edge e0, edge e1)
160 {
161 gimple_stmt_iterator i;
162 gphi *phi = NULL;
163 if (gimple_seq_singleton_p (seq))
164 return as_a <gphi *> (gsi_stmt (gsi_start (seq)));
165 for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (&i))
166 {
167 gphi *p = as_a <gphi *> (gsi_stmt (i));
168 /* If the PHI arguments are equal then we can skip this PHI. */
169 if (operand_equal_for_phi_arg_p (gimple_phi_arg_def (p, e0->dest_idx),
170 gimple_phi_arg_def (p, e1->dest_idx)))
171 continue;
172
173 /* If we already have a PHI that has the two edge arguments are
174 different, then return it is not a singleton for these PHIs. */
175 if (phi)
176 return NULL;
177
178 phi = p;
179 }
180 return phi;
181 }
182
183 /* The core routine of conditional store replacement and normal
184 phi optimizations. Both share much of the infrastructure in how
185 to match applicable basic block patterns. DO_STORE_ELIM is true
186 when we want to do conditional store replacement, false otherwise.
187 DO_HOIST_LOADS is true when we want to hoist adjacent loads out
188 of diamond control flow patterns, false otherwise. */
189 static unsigned int
190 tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads)
191 {
192 basic_block bb;
193 basic_block *bb_order;
194 unsigned n, i;
195 bool cfgchanged = false;
196 hash_set<tree> *nontrap = 0;
197
198 if (do_store_elim)
199 /* Calculate the set of non-trapping memory accesses. */
200 nontrap = get_non_trapping ();
201
202 /* The replacement of conditional negation with a non-branching
203 sequence is really only a win when optimizing for speed and we
204 can avoid transformations by gimple if-conversion that result
205 in poor RTL generation.
206
207 Ideally either gimple if-conversion or the RTL expanders will
208 be improved and the code to emit branchless conditional negation
209 can be removed. */
210 bool replace_conditional_negation = false;
211 if (!do_store_elim)
212 replace_conditional_negation
213 = ((!optimize_size && optimize >= 2)
214 || (((flag_tree_loop_vectorize || cfun->has_force_vectorize_loops)
215 && flag_tree_loop_if_convert != 0)
216 || flag_tree_loop_if_convert == 1
217 || flag_tree_loop_if_convert_stores == 1));
218
219 /* Search every basic block for COND_EXPR we may be able to optimize.
220
221 We walk the blocks in order that guarantees that a block with
222 a single predecessor is processed before the predecessor.
223 This ensures that we collapse inner ifs before visiting the
224 outer ones, and also that we do not try to visit a removed
225 block. */
226 bb_order = single_pred_before_succ_order ();
227 n = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS;
228
229 for (i = 0; i < n; i++)
230 {
231 gimple cond_stmt;
232 gphi *phi;
233 basic_block bb1, bb2;
234 edge e1, e2;
235 tree arg0, arg1;
236
237 bb = bb_order[i];
238
239 cond_stmt = last_stmt (bb);
240 /* Check to see if the last statement is a GIMPLE_COND. */
241 if (!cond_stmt
242 || gimple_code (cond_stmt) != GIMPLE_COND)
243 continue;
244
245 e1 = EDGE_SUCC (bb, 0);
246 bb1 = e1->dest;
247 e2 = EDGE_SUCC (bb, 1);
248 bb2 = e2->dest;
249
250 /* We cannot do the optimization on abnormal edges. */
251 if ((e1->flags & EDGE_ABNORMAL) != 0
252 || (e2->flags & EDGE_ABNORMAL) != 0)
253 continue;
254
255 /* If either bb1's succ or bb2 or bb2's succ is non NULL. */
256 if (EDGE_COUNT (bb1->succs) == 0
257 || bb2 == NULL
258 || EDGE_COUNT (bb2->succs) == 0)
259 continue;
260
261 /* Find the bb which is the fall through to the other. */
262 if (EDGE_SUCC (bb1, 0)->dest == bb2)
263 ;
264 else if (EDGE_SUCC (bb2, 0)->dest == bb1)
265 {
266 basic_block bb_tmp = bb1;
267 edge e_tmp = e1;
268 bb1 = bb2;
269 bb2 = bb_tmp;
270 e1 = e2;
271 e2 = e_tmp;
272 }
273 else if (do_store_elim
274 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
275 {
276 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest;
277
278 if (!single_succ_p (bb1)
279 || (EDGE_SUCC (bb1, 0)->flags & EDGE_FALLTHRU) == 0
280 || !single_succ_p (bb2)
281 || (EDGE_SUCC (bb2, 0)->flags & EDGE_FALLTHRU) == 0
282 || EDGE_COUNT (bb3->preds) != 2)
283 continue;
284 if (cond_if_else_store_replacement (bb1, bb2, bb3))
285 cfgchanged = true;
286 continue;
287 }
288 else if (do_hoist_loads
289 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
290 {
291 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest;
292
293 if (!FLOAT_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond_stmt)))
294 && single_succ_p (bb1)
295 && single_succ_p (bb2)
296 && single_pred_p (bb1)
297 && single_pred_p (bb2)
298 && EDGE_COUNT (bb->succs) == 2
299 && EDGE_COUNT (bb3->preds) == 2
300 /* If one edge or the other is dominant, a conditional move
301 is likely to perform worse than the well-predicted branch. */
302 && !predictable_edge_p (EDGE_SUCC (bb, 0))
303 && !predictable_edge_p (EDGE_SUCC (bb, 1)))
304 hoist_adjacent_loads (bb, bb1, bb2, bb3);
305 continue;
306 }
307 else
308 continue;
309
310 e1 = EDGE_SUCC (bb1, 0);
311
312 /* Make sure that bb1 is just a fall through. */
313 if (!single_succ_p (bb1)
314 || (e1->flags & EDGE_FALLTHRU) == 0)
315 continue;
316
317 /* Also make sure that bb1 only have one predecessor and that it
318 is bb. */
319 if (!single_pred_p (bb1)
320 || single_pred (bb1) != bb)
321 continue;
322
323 if (do_store_elim)
324 {
325 /* bb1 is the middle block, bb2 the join block, bb the split block,
326 e1 the fallthrough edge from bb1 to bb2. We can't do the
327 optimization if the join block has more than two predecessors. */
328 if (EDGE_COUNT (bb2->preds) > 2)
329 continue;
330 if (cond_store_replacement (bb1, bb2, e1, e2, nontrap))
331 cfgchanged = true;
332 }
333 else
334 {
335 gimple_seq phis = phi_nodes (bb2);
336 gimple_stmt_iterator gsi;
337 bool candorest = true;
338
339 /* Value replacement can work with more than one PHI
340 so try that first. */
341 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
342 {
343 phi = as_a <gphi *> (gsi_stmt (gsi));
344 arg0 = gimple_phi_arg_def (phi, e1->dest_idx);
345 arg1 = gimple_phi_arg_def (phi, e2->dest_idx);
346 if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1) == 2)
347 {
348 candorest = false;
349 cfgchanged = true;
350 break;
351 }
352 }
353
354 if (!candorest)
355 continue;
356
357 phi = single_non_singleton_phi_for_edges (phis, e1, e2);
358 if (!phi)
359 continue;
360
361 arg0 = gimple_phi_arg_def (phi, e1->dest_idx);
362 arg1 = gimple_phi_arg_def (phi, e2->dest_idx);
363
364 /* Something is wrong if we cannot find the arguments in the PHI
365 node. */
366 gcc_assert (arg0 != NULL && arg1 != NULL);
367
368 /* Do the replacement of conditional if it can be done. */
369 if (conditional_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
370 cfgchanged = true;
371 else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
372 cfgchanged = true;
373 else if (replace_conditional_negation
374 && neg_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
375 cfgchanged = true;
376 else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
377 cfgchanged = true;
378 }
379 }
380
381 free (bb_order);
382
383 if (do_store_elim)
384 delete nontrap;
385 /* If the CFG has changed, we should cleanup the CFG. */
386 if (cfgchanged && do_store_elim)
387 {
388 /* In cond-store replacement we have added some loads on edges
389 and new VOPS (as we moved the store, and created a load). */
390 gsi_commit_edge_inserts ();
391 return TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
392 }
393 else if (cfgchanged)
394 return TODO_cleanup_cfg;
395 return 0;
396 }
397
398 /* Replace PHI node element whose edge is E in block BB with variable NEW.
399 Remove the edge from COND_BLOCK which does not lead to BB (COND_BLOCK
400 is known to have two edges, one of which must reach BB). */
401
402 static void
403 replace_phi_edge_with_variable (basic_block cond_block,
404 edge e, gimple phi, tree new_tree)
405 {
406 basic_block bb = gimple_bb (phi);
407 basic_block block_to_remove;
408 gimple_stmt_iterator gsi;
409
410 /* Change the PHI argument to new. */
411 SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree);
412
413 /* Remove the empty basic block. */
414 if (EDGE_SUCC (cond_block, 0)->dest == bb)
415 {
416 EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU;
417 EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
418 EDGE_SUCC (cond_block, 0)->probability = REG_BR_PROB_BASE;
419 EDGE_SUCC (cond_block, 0)->count += EDGE_SUCC (cond_block, 1)->count;
420
421 block_to_remove = EDGE_SUCC (cond_block, 1)->dest;
422 }
423 else
424 {
425 EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU;
426 EDGE_SUCC (cond_block, 1)->flags
427 &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
428 EDGE_SUCC (cond_block, 1)->probability = REG_BR_PROB_BASE;
429 EDGE_SUCC (cond_block, 1)->count += EDGE_SUCC (cond_block, 0)->count;
430
431 block_to_remove = EDGE_SUCC (cond_block, 0)->dest;
432 }
433 delete_basic_block (block_to_remove);
434
435 /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
436 gsi = gsi_last_bb (cond_block);
437 gsi_remove (&gsi, true);
438
439 if (dump_file && (dump_flags & TDF_DETAILS))
440 fprintf (dump_file,
441 "COND_EXPR in block %d and PHI in block %d converted to straightline code.\n",
442 cond_block->index,
443 bb->index);
444 }
445
446 /* The function conditional_replacement does the main work of doing the
447 conditional replacement. Return true if the replacement is done.
448 Otherwise return false.
449 BB is the basic block where the replacement is going to be done on. ARG0
450 is argument 0 from PHI. Likewise for ARG1. */
451
452 static bool
453 conditional_replacement (basic_block cond_bb, basic_block middle_bb,
454 edge e0, edge e1, gphi *phi,
455 tree arg0, tree arg1)
456 {
457 tree result;
458 gimple stmt;
459 gassign *new_stmt;
460 tree cond;
461 gimple_stmt_iterator gsi;
462 edge true_edge, false_edge;
463 tree new_var, new_var2;
464 bool neg;
465
466 /* FIXME: Gimplification of complex type is too hard for now. */
467 /* We aren't prepared to handle vectors either (and it is a question
468 if it would be worthwhile anyway). */
469 if (!(INTEGRAL_TYPE_P (TREE_TYPE (arg0))
470 || POINTER_TYPE_P (TREE_TYPE (arg0)))
471 || !(INTEGRAL_TYPE_P (TREE_TYPE (arg1))
472 || POINTER_TYPE_P (TREE_TYPE (arg1))))
473 return false;
474
475 /* The PHI arguments have the constants 0 and 1, or 0 and -1, then
476 convert it to the conditional. */
477 if ((integer_zerop (arg0) && integer_onep (arg1))
478 || (integer_zerop (arg1) && integer_onep (arg0)))
479 neg = false;
480 else if ((integer_zerop (arg0) && integer_all_onesp (arg1))
481 || (integer_zerop (arg1) && integer_all_onesp (arg0)))
482 neg = true;
483 else
484 return false;
485
486 if (!empty_block_p (middle_bb))
487 return false;
488
489 /* At this point we know we have a GIMPLE_COND with two successors.
490 One successor is BB, the other successor is an empty block which
491 falls through into BB.
492
493 There is a single PHI node at the join point (BB) and its arguments
494 are constants (0, 1) or (0, -1).
495
496 So, given the condition COND, and the two PHI arguments, we can
497 rewrite this PHI into non-branching code:
498
499 dest = (COND) or dest = COND'
500
501 We use the condition as-is if the argument associated with the
502 true edge has the value one or the argument associated with the
503 false edge as the value zero. Note that those conditions are not
504 the same since only one of the outgoing edges from the GIMPLE_COND
505 will directly reach BB and thus be associated with an argument. */
506
507 stmt = last_stmt (cond_bb);
508 result = PHI_RESULT (phi);
509
510 /* To handle special cases like floating point comparison, it is easier and
511 less error-prone to build a tree and gimplify it on the fly though it is
512 less efficient. */
513 cond = fold_build2_loc (gimple_location (stmt),
514 gimple_cond_code (stmt), boolean_type_node,
515 gimple_cond_lhs (stmt), gimple_cond_rhs (stmt));
516
517 /* We need to know which is the true edge and which is the false
518 edge so that we know when to invert the condition below. */
519 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
520 if ((e0 == true_edge && integer_zerop (arg0))
521 || (e0 == false_edge && !integer_zerop (arg0))
522 || (e1 == true_edge && integer_zerop (arg1))
523 || (e1 == false_edge && !integer_zerop (arg1)))
524 cond = fold_build1_loc (gimple_location (stmt),
525 TRUTH_NOT_EXPR, TREE_TYPE (cond), cond);
526
527 if (neg)
528 {
529 cond = fold_convert_loc (gimple_location (stmt),
530 TREE_TYPE (result), cond);
531 cond = fold_build1_loc (gimple_location (stmt),
532 NEGATE_EXPR, TREE_TYPE (cond), cond);
533 }
534
535 /* Insert our new statements at the end of conditional block before the
536 COND_STMT. */
537 gsi = gsi_for_stmt (stmt);
538 new_var = force_gimple_operand_gsi (&gsi, cond, true, NULL, true,
539 GSI_SAME_STMT);
540
541 if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (new_var)))
542 {
543 source_location locus_0, locus_1;
544
545 new_var2 = make_ssa_name (TREE_TYPE (result));
546 new_stmt = gimple_build_assign (new_var2, CONVERT_EXPR, new_var);
547 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
548 new_var = new_var2;
549
550 /* Set the locus to the first argument, unless is doesn't have one. */
551 locus_0 = gimple_phi_arg_location (phi, 0);
552 locus_1 = gimple_phi_arg_location (phi, 1);
553 if (locus_0 == UNKNOWN_LOCATION)
554 locus_0 = locus_1;
555 gimple_set_location (new_stmt, locus_0);
556 }
557
558 replace_phi_edge_with_variable (cond_bb, e1, phi, new_var);
559
560 /* Note that we optimized this PHI. */
561 return true;
562 }
563
564 /* Update *ARG which is defined in STMT so that it contains the
565 computed value if that seems profitable. Return true if the
566 statement is made dead by that rewriting. */
567
568 static bool
569 jump_function_from_stmt (tree *arg, gimple stmt)
570 {
571 enum tree_code code = gimple_assign_rhs_code (stmt);
572 if (code == ADDR_EXPR)
573 {
574 /* For arg = &p->i transform it to p, if possible. */
575 tree rhs1 = gimple_assign_rhs1 (stmt);
576 HOST_WIDE_INT offset;
577 tree tem = get_addr_base_and_unit_offset (TREE_OPERAND (rhs1, 0),
578 &offset);
579 if (tem
580 && TREE_CODE (tem) == MEM_REF
581 && (mem_ref_offset (tem) + offset) == 0)
582 {
583 *arg = TREE_OPERAND (tem, 0);
584 return true;
585 }
586 }
587 /* TODO: Much like IPA-CP jump-functions we want to handle constant
588 additions symbolically here, and we'd need to update the comparison
589 code that compares the arg + cst tuples in our caller. For now the
590 code above exactly handles the VEC_BASE pattern from vec.h. */
591 return false;
592 }
593
594 /* RHS is a source argument in a BIT_AND_EXPR which feeds a conditional
595 of the form SSA_NAME NE 0.
596
597 If RHS is fed by a simple EQ_EXPR comparison of two values, see if
598 the two input values of the EQ_EXPR match arg0 and arg1.
599
600 If so update *code and return TRUE. Otherwise return FALSE. */
601
602 static bool
603 rhs_is_fed_for_value_replacement (const_tree arg0, const_tree arg1,
604 enum tree_code *code, const_tree rhs)
605 {
606 /* Obviously if RHS is not an SSA_NAME, we can't look at the defining
607 statement. */
608 if (TREE_CODE (rhs) == SSA_NAME)
609 {
610 gimple def1 = SSA_NAME_DEF_STMT (rhs);
611
612 /* Verify the defining statement has an EQ_EXPR on the RHS. */
613 if (is_gimple_assign (def1) && gimple_assign_rhs_code (def1) == EQ_EXPR)
614 {
615 /* Finally verify the source operands of the EQ_EXPR are equal
616 to arg0 and arg1. */
617 tree op0 = gimple_assign_rhs1 (def1);
618 tree op1 = gimple_assign_rhs2 (def1);
619 if ((operand_equal_for_phi_arg_p (arg0, op0)
620 && operand_equal_for_phi_arg_p (arg1, op1))
621 || (operand_equal_for_phi_arg_p (arg0, op1)
622 && operand_equal_for_phi_arg_p (arg1, op0)))
623 {
624 /* We will perform the optimization. */
625 *code = gimple_assign_rhs_code (def1);
626 return true;
627 }
628 }
629 }
630 return false;
631 }
632
633 /* Return TRUE if arg0/arg1 are equal to the rhs/lhs or lhs/rhs of COND.
634
635 Also return TRUE if arg0/arg1 are equal to the source arguments of a
636 an EQ comparison feeding a BIT_AND_EXPR which feeds COND.
637
638 Return FALSE otherwise. */
639
640 static bool
641 operand_equal_for_value_replacement (const_tree arg0, const_tree arg1,
642 enum tree_code *code, gimple cond)
643 {
644 gimple def;
645 tree lhs = gimple_cond_lhs (cond);
646 tree rhs = gimple_cond_rhs (cond);
647
648 if ((operand_equal_for_phi_arg_p (arg0, lhs)
649 && operand_equal_for_phi_arg_p (arg1, rhs))
650 || (operand_equal_for_phi_arg_p (arg1, lhs)
651 && operand_equal_for_phi_arg_p (arg0, rhs)))
652 return true;
653
654 /* Now handle more complex case where we have an EQ comparison
655 which feeds a BIT_AND_EXPR which feeds COND.
656
657 First verify that COND is of the form SSA_NAME NE 0. */
658 if (*code != NE_EXPR || !integer_zerop (rhs)
659 || TREE_CODE (lhs) != SSA_NAME)
660 return false;
661
662 /* Now ensure that SSA_NAME is set by a BIT_AND_EXPR. */
663 def = SSA_NAME_DEF_STMT (lhs);
664 if (!is_gimple_assign (def) || gimple_assign_rhs_code (def) != BIT_AND_EXPR)
665 return false;
666
667 /* Now verify arg0/arg1 correspond to the source arguments of an
668 EQ comparison feeding the BIT_AND_EXPR. */
669
670 tree tmp = gimple_assign_rhs1 (def);
671 if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
672 return true;
673
674 tmp = gimple_assign_rhs2 (def);
675 if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
676 return true;
677
678 return false;
679 }
680
681 /* Returns true if ARG is a neutral element for operation CODE
682 on the RIGHT side. */
683
684 static bool
685 neutral_element_p (tree_code code, tree arg, bool right)
686 {
687 switch (code)
688 {
689 case PLUS_EXPR:
690 case BIT_IOR_EXPR:
691 case BIT_XOR_EXPR:
692 return integer_zerop (arg);
693
694 case LROTATE_EXPR:
695 case RROTATE_EXPR:
696 case LSHIFT_EXPR:
697 case RSHIFT_EXPR:
698 case MINUS_EXPR:
699 case POINTER_PLUS_EXPR:
700 return right && integer_zerop (arg);
701
702 case MULT_EXPR:
703 return integer_onep (arg);
704
705 case TRUNC_DIV_EXPR:
706 case CEIL_DIV_EXPR:
707 case FLOOR_DIV_EXPR:
708 case ROUND_DIV_EXPR:
709 case EXACT_DIV_EXPR:
710 return right && integer_onep (arg);
711
712 case BIT_AND_EXPR:
713 return integer_all_onesp (arg);
714
715 default:
716 return false;
717 }
718 }
719
720 /* Returns true if ARG is an absorbing element for operation CODE. */
721
722 static bool
723 absorbing_element_p (tree_code code, tree arg)
724 {
725 switch (code)
726 {
727 case BIT_IOR_EXPR:
728 return integer_all_onesp (arg);
729
730 case MULT_EXPR:
731 case BIT_AND_EXPR:
732 return integer_zerop (arg);
733
734 default:
735 return false;
736 }
737 }
738
739 /* The function value_replacement does the main work of doing the value
740 replacement. Return non-zero if the replacement is done. Otherwise return
741 0. If we remove the middle basic block, return 2.
742 BB is the basic block where the replacement is going to be done on. ARG0
743 is argument 0 from the PHI. Likewise for ARG1. */
744
745 static int
746 value_replacement (basic_block cond_bb, basic_block middle_bb,
747 edge e0, edge e1, gimple phi,
748 tree arg0, tree arg1)
749 {
750 gimple_stmt_iterator gsi;
751 gimple cond;
752 edge true_edge, false_edge;
753 enum tree_code code;
754 bool emtpy_or_with_defined_p = true;
755
756 /* If the type says honor signed zeros we cannot do this
757 optimization. */
758 if (HONOR_SIGNED_ZEROS (arg1))
759 return 0;
760
761 /* If there is a statement in MIDDLE_BB that defines one of the PHI
762 arguments, then adjust arg0 or arg1. */
763 gsi = gsi_start_nondebug_after_labels_bb (middle_bb);
764 while (!gsi_end_p (gsi))
765 {
766 gimple stmt = gsi_stmt (gsi);
767 tree lhs;
768 gsi_next_nondebug (&gsi);
769 if (!is_gimple_assign (stmt))
770 {
771 emtpy_or_with_defined_p = false;
772 continue;
773 }
774 /* Now try to adjust arg0 or arg1 according to the computation
775 in the statement. */
776 lhs = gimple_assign_lhs (stmt);
777 if (!(lhs == arg0
778 && jump_function_from_stmt (&arg0, stmt))
779 || (lhs == arg1
780 && jump_function_from_stmt (&arg1, stmt)))
781 emtpy_or_with_defined_p = false;
782 }
783
784 cond = last_stmt (cond_bb);
785 code = gimple_cond_code (cond);
786
787 /* This transformation is only valid for equality comparisons. */
788 if (code != NE_EXPR && code != EQ_EXPR)
789 return 0;
790
791 /* We need to know which is the true edge and which is the false
792 edge so that we know if have abs or negative abs. */
793 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
794
795 /* At this point we know we have a COND_EXPR with two successors.
796 One successor is BB, the other successor is an empty block which
797 falls through into BB.
798
799 The condition for the COND_EXPR is known to be NE_EXPR or EQ_EXPR.
800
801 There is a single PHI node at the join point (BB) with two arguments.
802
803 We now need to verify that the two arguments in the PHI node match
804 the two arguments to the equality comparison. */
805
806 if (operand_equal_for_value_replacement (arg0, arg1, &code, cond))
807 {
808 edge e;
809 tree arg;
810
811 /* For NE_EXPR, we want to build an assignment result = arg where
812 arg is the PHI argument associated with the true edge. For
813 EQ_EXPR we want the PHI argument associated with the false edge. */
814 e = (code == NE_EXPR ? true_edge : false_edge);
815
816 /* Unfortunately, E may not reach BB (it may instead have gone to
817 OTHER_BLOCK). If that is the case, then we want the single outgoing
818 edge from OTHER_BLOCK which reaches BB and represents the desired
819 path from COND_BLOCK. */
820 if (e->dest == middle_bb)
821 e = single_succ_edge (e->dest);
822
823 /* Now we know the incoming edge to BB that has the argument for the
824 RHS of our new assignment statement. */
825 if (e0 == e)
826 arg = arg0;
827 else
828 arg = arg1;
829
830 /* If the middle basic block was empty or is defining the
831 PHI arguments and this is a single phi where the args are different
832 for the edges e0 and e1 then we can remove the middle basic block. */
833 if (emtpy_or_with_defined_p
834 && single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)),
835 e0, e1) == phi)
836 {
837 replace_phi_edge_with_variable (cond_bb, e1, phi, arg);
838 /* Note that we optimized this PHI. */
839 return 2;
840 }
841 else
842 {
843 /* Replace the PHI arguments with arg. */
844 SET_PHI_ARG_DEF (phi, e0->dest_idx, arg);
845 SET_PHI_ARG_DEF (phi, e1->dest_idx, arg);
846 if (dump_file && (dump_flags & TDF_DETAILS))
847 {
848 fprintf (dump_file, "PHI ");
849 print_generic_expr (dump_file, gimple_phi_result (phi), 0);
850 fprintf (dump_file, " reduced for COND_EXPR in block %d to ",
851 cond_bb->index);
852 print_generic_expr (dump_file, arg, 0);
853 fprintf (dump_file, ".\n");
854 }
855 return 1;
856 }
857
858 }
859
860 /* Now optimize (x != 0) ? x + y : y to just y.
861 The following condition is too restrictive, there can easily be another
862 stmt in middle_bb, for instance a CONVERT_EXPR for the second argument. */
863 gimple assign = last_and_only_stmt (middle_bb);
864 if (!assign || gimple_code (assign) != GIMPLE_ASSIGN
865 || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS
866 || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))
867 && !POINTER_TYPE_P (TREE_TYPE (arg0))))
868 return 0;
869
870 /* Punt if there are (degenerate) PHIs in middle_bb, there should not be. */
871 if (!gimple_seq_empty_p (phi_nodes (middle_bb)))
872 return 0;
873
874 /* Only transform if it removes the condition. */
875 if (!single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)), e0, e1))
876 return 0;
877
878 /* Size-wise, this is always profitable. */
879 if (optimize_bb_for_speed_p (cond_bb)
880 /* The special case is useless if it has a low probability. */
881 && profile_status_for_fn (cfun) != PROFILE_ABSENT
882 && EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN
883 /* If assign is cheap, there is no point avoiding it. */
884 && estimate_num_insns (assign, &eni_time_weights)
885 >= 3 * estimate_num_insns (cond, &eni_time_weights))
886 return 0;
887
888 tree lhs = gimple_assign_lhs (assign);
889 tree rhs1 = gimple_assign_rhs1 (assign);
890 tree rhs2 = gimple_assign_rhs2 (assign);
891 enum tree_code code_def = gimple_assign_rhs_code (assign);
892 tree cond_lhs = gimple_cond_lhs (cond);
893 tree cond_rhs = gimple_cond_rhs (cond);
894
895 if (((code == NE_EXPR && e1 == false_edge)
896 || (code == EQ_EXPR && e1 == true_edge))
897 && arg0 == lhs
898 && ((arg1 == rhs1
899 && operand_equal_for_phi_arg_p (rhs2, cond_lhs)
900 && neutral_element_p (code_def, cond_rhs, true))
901 || (arg1 == rhs2
902 && operand_equal_for_phi_arg_p (rhs1, cond_lhs)
903 && neutral_element_p (code_def, cond_rhs, false))
904 || (operand_equal_for_phi_arg_p (arg1, cond_rhs)
905 && (operand_equal_for_phi_arg_p (rhs2, cond_lhs)
906 || operand_equal_for_phi_arg_p (rhs1, cond_lhs))
907 && absorbing_element_p (code_def, cond_rhs))))
908 {
909 gsi = gsi_for_stmt (cond);
910 gimple_stmt_iterator gsi_from = gsi_for_stmt (assign);
911 gsi_move_before (&gsi_from, &gsi);
912 replace_phi_edge_with_variable (cond_bb, e1, phi, lhs);
913 return 2;
914 }
915
916 return 0;
917 }
918
919 /* The function minmax_replacement does the main work of doing the minmax
920 replacement. Return true if the replacement is done. Otherwise return
921 false.
922 BB is the basic block where the replacement is going to be done on. ARG0
923 is argument 0 from the PHI. Likewise for ARG1. */
924
925 static bool
926 minmax_replacement (basic_block cond_bb, basic_block middle_bb,
927 edge e0, edge e1, gimple phi,
928 tree arg0, tree arg1)
929 {
930 tree result, type;
931 gcond *cond;
932 gassign *new_stmt;
933 edge true_edge, false_edge;
934 enum tree_code cmp, minmax, ass_code;
935 tree smaller, larger, arg_true, arg_false;
936 gimple_stmt_iterator gsi, gsi_from;
937
938 type = TREE_TYPE (PHI_RESULT (phi));
939
940 /* The optimization may be unsafe due to NaNs. */
941 if (HONOR_NANS (type))
942 return false;
943
944 cond = as_a <gcond *> (last_stmt (cond_bb));
945 cmp = gimple_cond_code (cond);
946
947 /* This transformation is only valid for order comparisons. Record which
948 operand is smaller/larger if the result of the comparison is true. */
949 if (cmp == LT_EXPR || cmp == LE_EXPR)
950 {
951 smaller = gimple_cond_lhs (cond);
952 larger = gimple_cond_rhs (cond);
953 }
954 else if (cmp == GT_EXPR || cmp == GE_EXPR)
955 {
956 smaller = gimple_cond_rhs (cond);
957 larger = gimple_cond_lhs (cond);
958 }
959 else
960 return false;
961
962 /* We need to know which is the true edge and which is the false
963 edge so that we know if have abs or negative abs. */
964 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
965
966 /* Forward the edges over the middle basic block. */
967 if (true_edge->dest == middle_bb)
968 true_edge = EDGE_SUCC (true_edge->dest, 0);
969 if (false_edge->dest == middle_bb)
970 false_edge = EDGE_SUCC (false_edge->dest, 0);
971
972 if (true_edge == e0)
973 {
974 gcc_assert (false_edge == e1);
975 arg_true = arg0;
976 arg_false = arg1;
977 }
978 else
979 {
980 gcc_assert (false_edge == e0);
981 gcc_assert (true_edge == e1);
982 arg_true = arg1;
983 arg_false = arg0;
984 }
985
986 if (empty_block_p (middle_bb))
987 {
988 if (operand_equal_for_phi_arg_p (arg_true, smaller)
989 && operand_equal_for_phi_arg_p (arg_false, larger))
990 {
991 /* Case
992
993 if (smaller < larger)
994 rslt = smaller;
995 else
996 rslt = larger; */
997 minmax = MIN_EXPR;
998 }
999 else if (operand_equal_for_phi_arg_p (arg_false, smaller)
1000 && operand_equal_for_phi_arg_p (arg_true, larger))
1001 minmax = MAX_EXPR;
1002 else
1003 return false;
1004 }
1005 else
1006 {
1007 /* Recognize the following case, assuming d <= u:
1008
1009 if (a <= u)
1010 b = MAX (a, d);
1011 x = PHI <b, u>
1012
1013 This is equivalent to
1014
1015 b = MAX (a, d);
1016 x = MIN (b, u); */
1017
1018 gimple assign = last_and_only_stmt (middle_bb);
1019 tree lhs, op0, op1, bound;
1020
1021 if (!assign
1022 || gimple_code (assign) != GIMPLE_ASSIGN)
1023 return false;
1024
1025 lhs = gimple_assign_lhs (assign);
1026 ass_code = gimple_assign_rhs_code (assign);
1027 if (ass_code != MAX_EXPR && ass_code != MIN_EXPR)
1028 return false;
1029 op0 = gimple_assign_rhs1 (assign);
1030 op1 = gimple_assign_rhs2 (assign);
1031
1032 if (true_edge->src == middle_bb)
1033 {
1034 /* We got here if the condition is true, i.e., SMALLER < LARGER. */
1035 if (!operand_equal_for_phi_arg_p (lhs, arg_true))
1036 return false;
1037
1038 if (operand_equal_for_phi_arg_p (arg_false, larger))
1039 {
1040 /* Case
1041
1042 if (smaller < larger)
1043 {
1044 r' = MAX_EXPR (smaller, bound)
1045 }
1046 r = PHI <r', larger> --> to be turned to MIN_EXPR. */
1047 if (ass_code != MAX_EXPR)
1048 return false;
1049
1050 minmax = MIN_EXPR;
1051 if (operand_equal_for_phi_arg_p (op0, smaller))
1052 bound = op1;
1053 else if (operand_equal_for_phi_arg_p (op1, smaller))
1054 bound = op0;
1055 else
1056 return false;
1057
1058 /* We need BOUND <= LARGER. */
1059 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
1060 bound, larger)))
1061 return false;
1062 }
1063 else if (operand_equal_for_phi_arg_p (arg_false, smaller))
1064 {
1065 /* Case
1066
1067 if (smaller < larger)
1068 {
1069 r' = MIN_EXPR (larger, bound)
1070 }
1071 r = PHI <r', smaller> --> to be turned to MAX_EXPR. */
1072 if (ass_code != MIN_EXPR)
1073 return false;
1074
1075 minmax = MAX_EXPR;
1076 if (operand_equal_for_phi_arg_p (op0, larger))
1077 bound = op1;
1078 else if (operand_equal_for_phi_arg_p (op1, larger))
1079 bound = op0;
1080 else
1081 return false;
1082
1083 /* We need BOUND >= SMALLER. */
1084 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
1085 bound, smaller)))
1086 return false;
1087 }
1088 else
1089 return false;
1090 }
1091 else
1092 {
1093 /* We got here if the condition is false, i.e., SMALLER > LARGER. */
1094 if (!operand_equal_for_phi_arg_p (lhs, arg_false))
1095 return false;
1096
1097 if (operand_equal_for_phi_arg_p (arg_true, larger))
1098 {
1099 /* Case
1100
1101 if (smaller > larger)
1102 {
1103 r' = MIN_EXPR (smaller, bound)
1104 }
1105 r = PHI <r', larger> --> to be turned to MAX_EXPR. */
1106 if (ass_code != MIN_EXPR)
1107 return false;
1108
1109 minmax = MAX_EXPR;
1110 if (operand_equal_for_phi_arg_p (op0, smaller))
1111 bound = op1;
1112 else if (operand_equal_for_phi_arg_p (op1, smaller))
1113 bound = op0;
1114 else
1115 return false;
1116
1117 /* We need BOUND >= LARGER. */
1118 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
1119 bound, larger)))
1120 return false;
1121 }
1122 else if (operand_equal_for_phi_arg_p (arg_true, smaller))
1123 {
1124 /* Case
1125
1126 if (smaller > larger)
1127 {
1128 r' = MAX_EXPR (larger, bound)
1129 }
1130 r = PHI <r', smaller> --> to be turned to MIN_EXPR. */
1131 if (ass_code != MAX_EXPR)
1132 return false;
1133
1134 minmax = MIN_EXPR;
1135 if (operand_equal_for_phi_arg_p (op0, larger))
1136 bound = op1;
1137 else if (operand_equal_for_phi_arg_p (op1, larger))
1138 bound = op0;
1139 else
1140 return false;
1141
1142 /* We need BOUND <= SMALLER. */
1143 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
1144 bound, smaller)))
1145 return false;
1146 }
1147 else
1148 return false;
1149 }
1150
1151 /* Move the statement from the middle block. */
1152 gsi = gsi_last_bb (cond_bb);
1153 gsi_from = gsi_last_nondebug_bb (middle_bb);
1154 gsi_move_before (&gsi_from, &gsi);
1155 }
1156
1157 /* Emit the statement to compute min/max. */
1158 result = duplicate_ssa_name (PHI_RESULT (phi), NULL);
1159 new_stmt = gimple_build_assign (result, minmax, arg0, arg1);
1160 gsi = gsi_last_bb (cond_bb);
1161 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1162
1163 replace_phi_edge_with_variable (cond_bb, e1, phi, result);
1164 return true;
1165 }
1166
1167 /* The function absolute_replacement does the main work of doing the absolute
1168 replacement. Return true if the replacement is done. Otherwise return
1169 false.
1170 bb is the basic block where the replacement is going to be done on. arg0
1171 is argument 0 from the phi. Likewise for arg1. */
1172
1173 static bool
1174 abs_replacement (basic_block cond_bb, basic_block middle_bb,
1175 edge e0 ATTRIBUTE_UNUSED, edge e1,
1176 gimple phi, tree arg0, tree arg1)
1177 {
1178 tree result;
1179 gassign *new_stmt;
1180 gimple cond;
1181 gimple_stmt_iterator gsi;
1182 edge true_edge, false_edge;
1183 gimple assign;
1184 edge e;
1185 tree rhs, lhs;
1186 bool negate;
1187 enum tree_code cond_code;
1188
1189 /* If the type says honor signed zeros we cannot do this
1190 optimization. */
1191 if (HONOR_SIGNED_ZEROS (arg1))
1192 return false;
1193
1194 /* OTHER_BLOCK must have only one executable statement which must have the
1195 form arg0 = -arg1 or arg1 = -arg0. */
1196
1197 assign = last_and_only_stmt (middle_bb);
1198 /* If we did not find the proper negation assignment, then we can not
1199 optimize. */
1200 if (assign == NULL)
1201 return false;
1202
1203 /* If we got here, then we have found the only executable statement
1204 in OTHER_BLOCK. If it is anything other than arg = -arg1 or
1205 arg1 = -arg0, then we can not optimize. */
1206 if (gimple_code (assign) != GIMPLE_ASSIGN)
1207 return false;
1208
1209 lhs = gimple_assign_lhs (assign);
1210
1211 if (gimple_assign_rhs_code (assign) != NEGATE_EXPR)
1212 return false;
1213
1214 rhs = gimple_assign_rhs1 (assign);
1215
1216 /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
1217 if (!(lhs == arg0 && rhs == arg1)
1218 && !(lhs == arg1 && rhs == arg0))
1219 return false;
1220
1221 cond = last_stmt (cond_bb);
1222 result = PHI_RESULT (phi);
1223
1224 /* Only relationals comparing arg[01] against zero are interesting. */
1225 cond_code = gimple_cond_code (cond);
1226 if (cond_code != GT_EXPR && cond_code != GE_EXPR
1227 && cond_code != LT_EXPR && cond_code != LE_EXPR)
1228 return false;
1229
1230 /* Make sure the conditional is arg[01] OP y. */
1231 if (gimple_cond_lhs (cond) != rhs)
1232 return false;
1233
1234 if (FLOAT_TYPE_P (TREE_TYPE (gimple_cond_rhs (cond)))
1235 ? real_zerop (gimple_cond_rhs (cond))
1236 : integer_zerop (gimple_cond_rhs (cond)))
1237 ;
1238 else
1239 return false;
1240
1241 /* We need to know which is the true edge and which is the false
1242 edge so that we know if have abs or negative abs. */
1243 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1244
1245 /* For GT_EXPR/GE_EXPR, if the true edge goes to OTHER_BLOCK, then we
1246 will need to negate the result. Similarly for LT_EXPR/LE_EXPR if
1247 the false edge goes to OTHER_BLOCK. */
1248 if (cond_code == GT_EXPR || cond_code == GE_EXPR)
1249 e = true_edge;
1250 else
1251 e = false_edge;
1252
1253 if (e->dest == middle_bb)
1254 negate = true;
1255 else
1256 negate = false;
1257
1258 result = duplicate_ssa_name (result, NULL);
1259
1260 if (negate)
1261 lhs = make_ssa_name (TREE_TYPE (result));
1262 else
1263 lhs = result;
1264
1265 /* Build the modify expression with abs expression. */
1266 new_stmt = gimple_build_assign (lhs, ABS_EXPR, rhs);
1267
1268 gsi = gsi_last_bb (cond_bb);
1269 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1270
1271 if (negate)
1272 {
1273 /* Get the right GSI. We want to insert after the recently
1274 added ABS_EXPR statement (which we know is the first statement
1275 in the block. */
1276 new_stmt = gimple_build_assign (result, NEGATE_EXPR, lhs);
1277
1278 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1279 }
1280
1281 replace_phi_edge_with_variable (cond_bb, e1, phi, result);
1282
1283 /* Note that we optimized this PHI. */
1284 return true;
1285 }
1286
1287 /* The function neg_replacement replaces conditional negation with
1288 equivalent straight line code. Returns TRUE if replacement is done,
1289 otherwise returns FALSE.
1290
1291 COND_BB branches around negation occuring in MIDDLE_BB.
1292
1293 E0 and E1 are edges out of COND_BB. E0 reaches MIDDLE_BB and
1294 E1 reaches the other successor which should contain PHI with
1295 arguments ARG0 and ARG1.
1296
1297 Assuming negation is to occur when the condition is true,
1298 then the non-branching sequence is:
1299
1300 result = (rhs ^ -cond) + cond
1301
1302 Inverting the condition or its result gives us negation
1303 when the original condition is false. */
1304
1305 static bool
1306 neg_replacement (basic_block cond_bb, basic_block middle_bb,
1307 edge e0 ATTRIBUTE_UNUSED, edge e1,
1308 gimple phi, tree arg0, tree arg1)
1309 {
1310 gimple new_stmt, cond;
1311 gimple_stmt_iterator gsi;
1312 gimple assign;
1313 edge true_edge, false_edge;
1314 tree rhs, lhs;
1315 enum tree_code cond_code;
1316 bool invert = false;
1317
1318 /* This transformation performs logical operations on the
1319 incoming arguments. So force them to be integral types. */
1320 if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)))
1321 return false;
1322
1323 /* OTHER_BLOCK must have only one executable statement which must have the
1324 form arg0 = -arg1 or arg1 = -arg0. */
1325
1326 assign = last_and_only_stmt (middle_bb);
1327 /* If we did not find the proper negation assignment, then we can not
1328 optimize. */
1329 if (assign == NULL)
1330 return false;
1331
1332 /* If we got here, then we have found the only executable statement
1333 in OTHER_BLOCK. If it is anything other than arg0 = -arg1 or
1334 arg1 = -arg0, then we can not optimize. */
1335 if (gimple_code (assign) != GIMPLE_ASSIGN)
1336 return false;
1337
1338 lhs = gimple_assign_lhs (assign);
1339
1340 if (gimple_assign_rhs_code (assign) != NEGATE_EXPR)
1341 return false;
1342
1343 rhs = gimple_assign_rhs1 (assign);
1344
1345 /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
1346 if (!(lhs == arg0 && rhs == arg1)
1347 && !(lhs == arg1 && rhs == arg0))
1348 return false;
1349
1350 /* The basic sequence assumes we negate when the condition is true.
1351 If we need the opposite, then we will either need to invert the
1352 condition or its result. */
1353 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1354 invert = false_edge->dest == middle_bb;
1355
1356 /* Unlike abs_replacement, we can handle arbitrary conditionals here. */
1357 cond = last_stmt (cond_bb);
1358 cond_code = gimple_cond_code (cond);
1359
1360 /* If inversion is needed, first try to invert the test since
1361 that's cheapest. */
1362 if (invert)
1363 {
1364 bool honor_nans = HONOR_NANS (gimple_cond_lhs (cond));
1365 enum tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
1366
1367 /* If invert_tree_comparison was successful, then use its return
1368 value as the new code and note that inversion is no longer
1369 needed. */
1370 if (new_code != ERROR_MARK)
1371 {
1372 cond_code = new_code;
1373 invert = false;
1374 }
1375 }
1376
1377 tree cond_val = make_ssa_name (boolean_type_node);
1378 new_stmt = gimple_build_assign (cond_val, cond_code,
1379 gimple_cond_lhs (cond),
1380 gimple_cond_rhs (cond));
1381 gsi = gsi_last_bb (cond_bb);
1382 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1383
1384 /* If we still need inversion, then invert the result of the
1385 condition. */
1386 if (invert)
1387 {
1388 tree tmp = make_ssa_name (boolean_type_node);
1389 new_stmt = gimple_build_assign (tmp, BIT_XOR_EXPR, cond_val,
1390 boolean_true_node);
1391 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1392 cond_val = tmp;
1393 }
1394
1395 /* Get the condition in the right type so that we can perform
1396 logical and arithmetic operations on it. */
1397 tree cond_val_converted = make_ssa_name (TREE_TYPE (rhs));
1398 new_stmt = gimple_build_assign (cond_val_converted, NOP_EXPR, cond_val);
1399 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1400
1401 tree neg_cond_val_converted = make_ssa_name (TREE_TYPE (rhs));
1402 new_stmt = gimple_build_assign (neg_cond_val_converted, NEGATE_EXPR,
1403 cond_val_converted);
1404 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1405
1406 tree tmp = make_ssa_name (TREE_TYPE (rhs));
1407 new_stmt = gimple_build_assign (tmp, BIT_XOR_EXPR, rhs,
1408 neg_cond_val_converted);
1409 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1410
1411 tree new_lhs = make_ssa_name (TREE_TYPE (rhs));
1412 new_stmt = gimple_build_assign (new_lhs, PLUS_EXPR, tmp, cond_val_converted);
1413 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1414
1415 replace_phi_edge_with_variable (cond_bb, e1, phi, new_lhs);
1416
1417 /* Note that we optimized this PHI. */
1418 return true;
1419 }
1420
1421 /* Auxiliary functions to determine the set of memory accesses which
1422 can't trap because they are preceded by accesses to the same memory
1423 portion. We do that for MEM_REFs, so we only need to track
1424 the SSA_NAME of the pointer indirectly referenced. The algorithm
1425 simply is a walk over all instructions in dominator order. When
1426 we see an MEM_REF we determine if we've already seen a same
1427 ref anywhere up to the root of the dominator tree. If we do the
1428 current access can't trap. If we don't see any dominating access
1429 the current access might trap, but might also make later accesses
1430 non-trapping, so we remember it. We need to be careful with loads
1431 or stores, for instance a load might not trap, while a store would,
1432 so if we see a dominating read access this doesn't mean that a later
1433 write access would not trap. Hence we also need to differentiate the
1434 type of access(es) seen.
1435
1436 ??? We currently are very conservative and assume that a load might
1437 trap even if a store doesn't (write-only memory). This probably is
1438 overly conservative. */
1439
1440 /* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF
1441 through it was seen, which would constitute a no-trap region for
1442 same accesses. */
1443 struct name_to_bb
1444 {
1445 unsigned int ssa_name_ver;
1446 unsigned int phase;
1447 bool store;
1448 HOST_WIDE_INT offset, size;
1449 basic_block bb;
1450 };
1451
1452 /* Hashtable helpers. */
1453
1454 struct ssa_names_hasher : typed_free_remove <name_to_bb>
1455 {
1456 typedef name_to_bb value_type;
1457 typedef name_to_bb compare_type;
1458 static inline hashval_t hash (const value_type *);
1459 static inline bool equal (const value_type *, const compare_type *);
1460 };
1461
1462 /* Used for quick clearing of the hash-table when we see calls.
1463 Hash entries with phase < nt_call_phase are invalid. */
1464 static unsigned int nt_call_phase;
1465
1466 /* The hash function. */
1467
1468 inline hashval_t
1469 ssa_names_hasher::hash (const value_type *n)
1470 {
1471 return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)
1472 ^ (n->offset << 6) ^ (n->size << 3);
1473 }
1474
1475 /* The equality function of *P1 and *P2. */
1476
1477 inline bool
1478 ssa_names_hasher::equal (const value_type *n1, const compare_type *n2)
1479 {
1480 return n1->ssa_name_ver == n2->ssa_name_ver
1481 && n1->store == n2->store
1482 && n1->offset == n2->offset
1483 && n1->size == n2->size;
1484 }
1485
1486 class nontrapping_dom_walker : public dom_walker
1487 {
1488 public:
1489 nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)
1490 : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {}
1491
1492 virtual void before_dom_children (basic_block);
1493 virtual void after_dom_children (basic_block);
1494
1495 private:
1496
1497 /* We see the expression EXP in basic block BB. If it's an interesting
1498 expression (an MEM_REF through an SSA_NAME) possibly insert the
1499 expression into the set NONTRAP or the hash table of seen expressions.
1500 STORE is true if this expression is on the LHS, otherwise it's on
1501 the RHS. */
1502 void add_or_mark_expr (basic_block, tree, bool);
1503
1504 hash_set<tree> *m_nontrapping;
1505
1506 /* The hash table for remembering what we've seen. */
1507 hash_table<ssa_names_hasher> m_seen_ssa_names;
1508 };
1509
1510 /* Called by walk_dominator_tree, when entering the block BB. */
1511 void
1512 nontrapping_dom_walker::before_dom_children (basic_block bb)
1513 {
1514 edge e;
1515 edge_iterator ei;
1516 gimple_stmt_iterator gsi;
1517
1518 /* If we haven't seen all our predecessors, clear the hash-table. */
1519 FOR_EACH_EDGE (e, ei, bb->preds)
1520 if ((((size_t)e->src->aux) & 2) == 0)
1521 {
1522 nt_call_phase++;
1523 break;
1524 }
1525
1526 /* Mark this BB as being on the path to dominator root and as visited. */
1527 bb->aux = (void*)(1 | 2);
1528
1529 /* And walk the statements in order. */
1530 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1531 {
1532 gimple stmt = gsi_stmt (gsi);
1533
1534 if (is_gimple_call (stmt) && !nonfreeing_call_p (stmt))
1535 nt_call_phase++;
1536 else if (gimple_assign_single_p (stmt) && !gimple_has_volatile_ops (stmt))
1537 {
1538 add_or_mark_expr (bb, gimple_assign_lhs (stmt), true);
1539 add_or_mark_expr (bb, gimple_assign_rhs1 (stmt), false);
1540 }
1541 }
1542 }
1543
1544 /* Called by walk_dominator_tree, when basic block BB is exited. */
1545 void
1546 nontrapping_dom_walker::after_dom_children (basic_block bb)
1547 {
1548 /* This BB isn't on the path to dominator root anymore. */
1549 bb->aux = (void*)2;
1550 }
1551
1552 /* We see the expression EXP in basic block BB. If it's an interesting
1553 expression (an MEM_REF through an SSA_NAME) possibly insert the
1554 expression into the set NONTRAP or the hash table of seen expressions.
1555 STORE is true if this expression is on the LHS, otherwise it's on
1556 the RHS. */
1557 void
1558 nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store)
1559 {
1560 HOST_WIDE_INT size;
1561
1562 if (TREE_CODE (exp) == MEM_REF
1563 && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME
1564 && tree_fits_shwi_p (TREE_OPERAND (exp, 1))
1565 && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)
1566 {
1567 tree name = TREE_OPERAND (exp, 0);
1568 struct name_to_bb map;
1569 name_to_bb **slot;
1570 struct name_to_bb *n2bb;
1571 basic_block found_bb = 0;
1572
1573 /* Try to find the last seen MEM_REF through the same
1574 SSA_NAME, which can trap. */
1575 map.ssa_name_ver = SSA_NAME_VERSION (name);
1576 map.phase = 0;
1577 map.bb = 0;
1578 map.store = store;
1579 map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));
1580 map.size = size;
1581
1582 slot = m_seen_ssa_names.find_slot (&map, INSERT);
1583 n2bb = *slot;
1584 if (n2bb && n2bb->phase >= nt_call_phase)
1585 found_bb = n2bb->bb;
1586
1587 /* If we've found a trapping MEM_REF, _and_ it dominates EXP
1588 (it's in a basic block on the path from us to the dominator root)
1589 then we can't trap. */
1590 if (found_bb && (((size_t)found_bb->aux) & 1) == 1)
1591 {
1592 m_nontrapping->add (exp);
1593 }
1594 else
1595 {
1596 /* EXP might trap, so insert it into the hash table. */
1597 if (n2bb)
1598 {
1599 n2bb->phase = nt_call_phase;
1600 n2bb->bb = bb;
1601 }
1602 else
1603 {
1604 n2bb = XNEW (struct name_to_bb);
1605 n2bb->ssa_name_ver = SSA_NAME_VERSION (name);
1606 n2bb->phase = nt_call_phase;
1607 n2bb->bb = bb;
1608 n2bb->store = store;
1609 n2bb->offset = map.offset;
1610 n2bb->size = size;
1611 *slot = n2bb;
1612 }
1613 }
1614 }
1615 }
1616
1617 /* This is the entry point of gathering non trapping memory accesses.
1618 It will do a dominator walk over the whole function, and it will
1619 make use of the bb->aux pointers. It returns a set of trees
1620 (the MEM_REFs itself) which can't trap. */
1621 static hash_set<tree> *
1622 get_non_trapping (void)
1623 {
1624 nt_call_phase = 0;
1625 hash_set<tree> *nontrap = new hash_set<tree>;
1626 /* We're going to do a dominator walk, so ensure that we have
1627 dominance information. */
1628 calculate_dominance_info (CDI_DOMINATORS);
1629
1630 nontrapping_dom_walker (CDI_DOMINATORS, nontrap)
1631 .walk (cfun->cfg->x_entry_block_ptr);
1632
1633 clear_aux_for_blocks ();
1634 return nontrap;
1635 }
1636
1637 /* Do the main work of conditional store replacement. We already know
1638 that the recognized pattern looks like so:
1639
1640 split:
1641 if (cond) goto MIDDLE_BB; else goto JOIN_BB (edge E1)
1642 MIDDLE_BB:
1643 something
1644 fallthrough (edge E0)
1645 JOIN_BB:
1646 some more
1647
1648 We check that MIDDLE_BB contains only one store, that that store
1649 doesn't trap (not via NOTRAP, but via checking if an access to the same
1650 memory location dominates us) and that the store has a "simple" RHS. */
1651
1652 static bool
1653 cond_store_replacement (basic_block middle_bb, basic_block join_bb,
1654 edge e0, edge e1, hash_set<tree> *nontrap)
1655 {
1656 gimple assign = last_and_only_stmt (middle_bb);
1657 tree lhs, rhs, name, name2;
1658 gphi *newphi;
1659 gassign *new_stmt;
1660 gimple_stmt_iterator gsi;
1661 source_location locus;
1662
1663 /* Check if middle_bb contains of only one store. */
1664 if (!assign
1665 || !gimple_assign_single_p (assign)
1666 || gimple_has_volatile_ops (assign))
1667 return false;
1668
1669 locus = gimple_location (assign);
1670 lhs = gimple_assign_lhs (assign);
1671 rhs = gimple_assign_rhs1 (assign);
1672 if (TREE_CODE (lhs) != MEM_REF
1673 || TREE_CODE (TREE_OPERAND (lhs, 0)) != SSA_NAME
1674 || !is_gimple_reg_type (TREE_TYPE (lhs)))
1675 return false;
1676
1677 /* Prove that we can move the store down. We could also check
1678 TREE_THIS_NOTRAP here, but in that case we also could move stores,
1679 whose value is not available readily, which we want to avoid. */
1680 if (!nontrap->contains (lhs))
1681 return false;
1682
1683 /* Now we've checked the constraints, so do the transformation:
1684 1) Remove the single store. */
1685 gsi = gsi_for_stmt (assign);
1686 unlink_stmt_vdef (assign);
1687 gsi_remove (&gsi, true);
1688 release_defs (assign);
1689
1690 /* 2) Insert a load from the memory of the store to the temporary
1691 on the edge which did not contain the store. */
1692 lhs = unshare_expr (lhs);
1693 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
1694 new_stmt = gimple_build_assign (name, lhs);
1695 gimple_set_location (new_stmt, locus);
1696 gsi_insert_on_edge (e1, new_stmt);
1697
1698 /* 3) Create a PHI node at the join block, with one argument
1699 holding the old RHS, and the other holding the temporary
1700 where we stored the old memory contents. */
1701 name2 = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
1702 newphi = create_phi_node (name2, join_bb);
1703 add_phi_arg (newphi, rhs, e0, locus);
1704 add_phi_arg (newphi, name, e1, locus);
1705
1706 lhs = unshare_expr (lhs);
1707 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi));
1708
1709 /* 4) Insert that PHI node. */
1710 gsi = gsi_after_labels (join_bb);
1711 if (gsi_end_p (gsi))
1712 {
1713 gsi = gsi_last_bb (join_bb);
1714 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1715 }
1716 else
1717 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1718
1719 return true;
1720 }
1721
1722 /* Do the main work of conditional store replacement. */
1723
1724 static bool
1725 cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb,
1726 basic_block join_bb, gimple then_assign,
1727 gimple else_assign)
1728 {
1729 tree lhs_base, lhs, then_rhs, else_rhs, name;
1730 source_location then_locus, else_locus;
1731 gimple_stmt_iterator gsi;
1732 gphi *newphi;
1733 gassign *new_stmt;
1734
1735 if (then_assign == NULL
1736 || !gimple_assign_single_p (then_assign)
1737 || gimple_clobber_p (then_assign)
1738 || gimple_has_volatile_ops (then_assign)
1739 || else_assign == NULL
1740 || !gimple_assign_single_p (else_assign)
1741 || gimple_clobber_p (else_assign)
1742 || gimple_has_volatile_ops (else_assign))
1743 return false;
1744
1745 lhs = gimple_assign_lhs (then_assign);
1746 if (!is_gimple_reg_type (TREE_TYPE (lhs))
1747 || !operand_equal_p (lhs, gimple_assign_lhs (else_assign), 0))
1748 return false;
1749
1750 lhs_base = get_base_address (lhs);
1751 if (lhs_base == NULL_TREE
1752 || (!DECL_P (lhs_base) && TREE_CODE (lhs_base) != MEM_REF))
1753 return false;
1754
1755 then_rhs = gimple_assign_rhs1 (then_assign);
1756 else_rhs = gimple_assign_rhs1 (else_assign);
1757 then_locus = gimple_location (then_assign);
1758 else_locus = gimple_location (else_assign);
1759
1760 /* Now we've checked the constraints, so do the transformation:
1761 1) Remove the stores. */
1762 gsi = gsi_for_stmt (then_assign);
1763 unlink_stmt_vdef (then_assign);
1764 gsi_remove (&gsi, true);
1765 release_defs (then_assign);
1766
1767 gsi = gsi_for_stmt (else_assign);
1768 unlink_stmt_vdef (else_assign);
1769 gsi_remove (&gsi, true);
1770 release_defs (else_assign);
1771
1772 /* 2) Create a PHI node at the join block, with one argument
1773 holding the old RHS, and the other holding the temporary
1774 where we stored the old memory contents. */
1775 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
1776 newphi = create_phi_node (name, join_bb);
1777 add_phi_arg (newphi, then_rhs, EDGE_SUCC (then_bb, 0), then_locus);
1778 add_phi_arg (newphi, else_rhs, EDGE_SUCC (else_bb, 0), else_locus);
1779
1780 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi));
1781
1782 /* 3) Insert that PHI node. */
1783 gsi = gsi_after_labels (join_bb);
1784 if (gsi_end_p (gsi))
1785 {
1786 gsi = gsi_last_bb (join_bb);
1787 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1788 }
1789 else
1790 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1791
1792 return true;
1793 }
1794
1795 /* Conditional store replacement. We already know
1796 that the recognized pattern looks like so:
1797
1798 split:
1799 if (cond) goto THEN_BB; else goto ELSE_BB (edge E1)
1800 THEN_BB:
1801 ...
1802 X = Y;
1803 ...
1804 goto JOIN_BB;
1805 ELSE_BB:
1806 ...
1807 X = Z;
1808 ...
1809 fallthrough (edge E0)
1810 JOIN_BB:
1811 some more
1812
1813 We check that it is safe to sink the store to JOIN_BB by verifying that
1814 there are no read-after-write or write-after-write dependencies in
1815 THEN_BB and ELSE_BB. */
1816
1817 static bool
1818 cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb,
1819 basic_block join_bb)
1820 {
1821 gimple then_assign = last_and_only_stmt (then_bb);
1822 gimple else_assign = last_and_only_stmt (else_bb);
1823 vec<data_reference_p> then_datarefs, else_datarefs;
1824 vec<ddr_p> then_ddrs, else_ddrs;
1825 gimple then_store, else_store;
1826 bool found, ok = false, res;
1827 struct data_dependence_relation *ddr;
1828 data_reference_p then_dr, else_dr;
1829 int i, j;
1830 tree then_lhs, else_lhs;
1831 basic_block blocks[3];
1832
1833 if (MAX_STORES_TO_SINK == 0)
1834 return false;
1835
1836 /* Handle the case with single statement in THEN_BB and ELSE_BB. */
1837 if (then_assign && else_assign)
1838 return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
1839 then_assign, else_assign);
1840
1841 /* Find data references. */
1842 then_datarefs.create (1);
1843 else_datarefs.create (1);
1844 if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs)
1845 == chrec_dont_know)
1846 || !then_datarefs.length ()
1847 || (find_data_references_in_bb (NULL, else_bb, &else_datarefs)
1848 == chrec_dont_know)
1849 || !else_datarefs.length ())
1850 {
1851 free_data_refs (then_datarefs);
1852 free_data_refs (else_datarefs);
1853 return false;
1854 }
1855
1856 /* Find pairs of stores with equal LHS. */
1857 auto_vec<gimple, 1> then_stores, else_stores;
1858 FOR_EACH_VEC_ELT (then_datarefs, i, then_dr)
1859 {
1860 if (DR_IS_READ (then_dr))
1861 continue;
1862
1863 then_store = DR_STMT (then_dr);
1864 then_lhs = gimple_get_lhs (then_store);
1865 if (then_lhs == NULL_TREE)
1866 continue;
1867 found = false;
1868
1869 FOR_EACH_VEC_ELT (else_datarefs, j, else_dr)
1870 {
1871 if (DR_IS_READ (else_dr))
1872 continue;
1873
1874 else_store = DR_STMT (else_dr);
1875 else_lhs = gimple_get_lhs (else_store);
1876 if (else_lhs == NULL_TREE)
1877 continue;
1878
1879 if (operand_equal_p (then_lhs, else_lhs, 0))
1880 {
1881 found = true;
1882 break;
1883 }
1884 }
1885
1886 if (!found)
1887 continue;
1888
1889 then_stores.safe_push (then_store);
1890 else_stores.safe_push (else_store);
1891 }
1892
1893 /* No pairs of stores found. */
1894 if (!then_stores.length ()
1895 || then_stores.length () > (unsigned) MAX_STORES_TO_SINK)
1896 {
1897 free_data_refs (then_datarefs);
1898 free_data_refs (else_datarefs);
1899 return false;
1900 }
1901
1902 /* Compute and check data dependencies in both basic blocks. */
1903 then_ddrs.create (1);
1904 else_ddrs.create (1);
1905 if (!compute_all_dependences (then_datarefs, &then_ddrs,
1906 vNULL, false)
1907 || !compute_all_dependences (else_datarefs, &else_ddrs,
1908 vNULL, false))
1909 {
1910 free_dependence_relations (then_ddrs);
1911 free_dependence_relations (else_ddrs);
1912 free_data_refs (then_datarefs);
1913 free_data_refs (else_datarefs);
1914 return false;
1915 }
1916 blocks[0] = then_bb;
1917 blocks[1] = else_bb;
1918 blocks[2] = join_bb;
1919 renumber_gimple_stmt_uids_in_blocks (blocks, 3);
1920
1921 /* Check that there are no read-after-write or write-after-write dependencies
1922 in THEN_BB. */
1923 FOR_EACH_VEC_ELT (then_ddrs, i, ddr)
1924 {
1925 struct data_reference *dra = DDR_A (ddr);
1926 struct data_reference *drb = DDR_B (ddr);
1927
1928 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
1929 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
1930 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
1931 || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
1932 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
1933 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
1934 {
1935 free_dependence_relations (then_ddrs);
1936 free_dependence_relations (else_ddrs);
1937 free_data_refs (then_datarefs);
1938 free_data_refs (else_datarefs);
1939 return false;
1940 }
1941 }
1942
1943 /* Check that there are no read-after-write or write-after-write dependencies
1944 in ELSE_BB. */
1945 FOR_EACH_VEC_ELT (else_ddrs, i, ddr)
1946 {
1947 struct data_reference *dra = DDR_A (ddr);
1948 struct data_reference *drb = DDR_B (ddr);
1949
1950 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
1951 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
1952 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
1953 || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
1954 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
1955 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
1956 {
1957 free_dependence_relations (then_ddrs);
1958 free_dependence_relations (else_ddrs);
1959 free_data_refs (then_datarefs);
1960 free_data_refs (else_datarefs);
1961 return false;
1962 }
1963 }
1964
1965 /* Sink stores with same LHS. */
1966 FOR_EACH_VEC_ELT (then_stores, i, then_store)
1967 {
1968 else_store = else_stores[i];
1969 res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
1970 then_store, else_store);
1971 ok = ok || res;
1972 }
1973
1974 free_dependence_relations (then_ddrs);
1975 free_dependence_relations (else_ddrs);
1976 free_data_refs (then_datarefs);
1977 free_data_refs (else_datarefs);
1978
1979 return ok;
1980 }
1981
1982 /* Return TRUE if STMT has a VUSE whose corresponding VDEF is in BB. */
1983
1984 static bool
1985 local_mem_dependence (gimple stmt, basic_block bb)
1986 {
1987 tree vuse = gimple_vuse (stmt);
1988 gimple def;
1989
1990 if (!vuse)
1991 return false;
1992
1993 def = SSA_NAME_DEF_STMT (vuse);
1994 return (def && gimple_bb (def) == bb);
1995 }
1996
1997 /* Given a "diamond" control-flow pattern where BB0 tests a condition,
1998 BB1 and BB2 are "then" and "else" blocks dependent on this test,
1999 and BB3 rejoins control flow following BB1 and BB2, look for
2000 opportunities to hoist loads as follows. If BB3 contains a PHI of
2001 two loads, one each occurring in BB1 and BB2, and the loads are
2002 provably of adjacent fields in the same structure, then move both
2003 loads into BB0. Of course this can only be done if there are no
2004 dependencies preventing such motion.
2005
2006 One of the hoisted loads will always be speculative, so the
2007 transformation is currently conservative:
2008
2009 - The fields must be strictly adjacent.
2010 - The two fields must occupy a single memory block that is
2011 guaranteed to not cross a page boundary.
2012
2013 The last is difficult to prove, as such memory blocks should be
2014 aligned on the minimum of the stack alignment boundary and the
2015 alignment guaranteed by heap allocation interfaces. Thus we rely
2016 on a parameter for the alignment value.
2017
2018 Provided a good value is used for the last case, the first
2019 restriction could possibly be relaxed. */
2020
2021 static void
2022 hoist_adjacent_loads (basic_block bb0, basic_block bb1,
2023 basic_block bb2, basic_block bb3)
2024 {
2025 int param_align = PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE);
2026 unsigned param_align_bits = (unsigned) (param_align * BITS_PER_UNIT);
2027 gphi_iterator gsi;
2028
2029 /* Walk the phis in bb3 looking for an opportunity. We are looking
2030 for phis of two SSA names, one each of which is defined in bb1 and
2031 bb2. */
2032 for (gsi = gsi_start_phis (bb3); !gsi_end_p (gsi); gsi_next (&gsi))
2033 {
2034 gphi *phi_stmt = gsi.phi ();
2035 gimple def1, def2, defswap;
2036 tree arg1, arg2, ref1, ref2, field1, field2, fieldswap;
2037 tree tree_offset1, tree_offset2, tree_size2, next;
2038 int offset1, offset2, size2;
2039 unsigned align1;
2040 gimple_stmt_iterator gsi2;
2041 basic_block bb_for_def1, bb_for_def2;
2042
2043 if (gimple_phi_num_args (phi_stmt) != 2
2044 || virtual_operand_p (gimple_phi_result (phi_stmt)))
2045 continue;
2046
2047 arg1 = gimple_phi_arg_def (phi_stmt, 0);
2048 arg2 = gimple_phi_arg_def (phi_stmt, 1);
2049
2050 if (TREE_CODE (arg1) != SSA_NAME
2051 || TREE_CODE (arg2) != SSA_NAME
2052 || SSA_NAME_IS_DEFAULT_DEF (arg1)
2053 || SSA_NAME_IS_DEFAULT_DEF (arg2))
2054 continue;
2055
2056 def1 = SSA_NAME_DEF_STMT (arg1);
2057 def2 = SSA_NAME_DEF_STMT (arg2);
2058
2059 if ((gimple_bb (def1) != bb1 || gimple_bb (def2) != bb2)
2060 && (gimple_bb (def2) != bb1 || gimple_bb (def1) != bb2))
2061 continue;
2062
2063 /* Check the mode of the arguments to be sure a conditional move
2064 can be generated for it. */
2065 if (optab_handler (movcc_optab, TYPE_MODE (TREE_TYPE (arg1)))
2066 == CODE_FOR_nothing)
2067 continue;
2068
2069 /* Both statements must be assignments whose RHS is a COMPONENT_REF. */
2070 if (!gimple_assign_single_p (def1)
2071 || !gimple_assign_single_p (def2)
2072 || gimple_has_volatile_ops (def1)
2073 || gimple_has_volatile_ops (def2))
2074 continue;
2075
2076 ref1 = gimple_assign_rhs1 (def1);
2077 ref2 = gimple_assign_rhs1 (def2);
2078
2079 if (TREE_CODE (ref1) != COMPONENT_REF
2080 || TREE_CODE (ref2) != COMPONENT_REF)
2081 continue;
2082
2083 /* The zeroth operand of the two component references must be
2084 identical. It is not sufficient to compare get_base_address of
2085 the two references, because this could allow for different
2086 elements of the same array in the two trees. It is not safe to
2087 assume that the existence of one array element implies the
2088 existence of a different one. */
2089 if (!operand_equal_p (TREE_OPERAND (ref1, 0), TREE_OPERAND (ref2, 0), 0))
2090 continue;
2091
2092 field1 = TREE_OPERAND (ref1, 1);
2093 field2 = TREE_OPERAND (ref2, 1);
2094
2095 /* Check for field adjacency, and ensure field1 comes first. */
2096 for (next = DECL_CHAIN (field1);
2097 next && TREE_CODE (next) != FIELD_DECL;
2098 next = DECL_CHAIN (next))
2099 ;
2100
2101 if (next != field2)
2102 {
2103 for (next = DECL_CHAIN (field2);
2104 next && TREE_CODE (next) != FIELD_DECL;
2105 next = DECL_CHAIN (next))
2106 ;
2107
2108 if (next != field1)
2109 continue;
2110
2111 fieldswap = field1;
2112 field1 = field2;
2113 field2 = fieldswap;
2114 defswap = def1;
2115 def1 = def2;
2116 def2 = defswap;
2117 }
2118
2119 bb_for_def1 = gimple_bb (def1);
2120 bb_for_def2 = gimple_bb (def2);
2121
2122 /* Check for proper alignment of the first field. */
2123 tree_offset1 = bit_position (field1);
2124 tree_offset2 = bit_position (field2);
2125 tree_size2 = DECL_SIZE (field2);
2126
2127 if (!tree_fits_uhwi_p (tree_offset1)
2128 || !tree_fits_uhwi_p (tree_offset2)
2129 || !tree_fits_uhwi_p (tree_size2))
2130 continue;
2131
2132 offset1 = tree_to_uhwi (tree_offset1);
2133 offset2 = tree_to_uhwi (tree_offset2);
2134 size2 = tree_to_uhwi (tree_size2);
2135 align1 = DECL_ALIGN (field1) % param_align_bits;
2136
2137 if (offset1 % BITS_PER_UNIT != 0)
2138 continue;
2139
2140 /* For profitability, the two field references should fit within
2141 a single cache line. */
2142 if (align1 + offset2 - offset1 + size2 > param_align_bits)
2143 continue;
2144
2145 /* The two expressions cannot be dependent upon vdefs defined
2146 in bb1/bb2. */
2147 if (local_mem_dependence (def1, bb_for_def1)
2148 || local_mem_dependence (def2, bb_for_def2))
2149 continue;
2150
2151 /* The conditions are satisfied; hoist the loads from bb1 and bb2 into
2152 bb0. We hoist the first one first so that a cache miss is handled
2153 efficiently regardless of hardware cache-fill policy. */
2154 gsi2 = gsi_for_stmt (def1);
2155 gsi_move_to_bb_end (&gsi2, bb0);
2156 gsi2 = gsi_for_stmt (def2);
2157 gsi_move_to_bb_end (&gsi2, bb0);
2158
2159 if (dump_file && (dump_flags & TDF_DETAILS))
2160 {
2161 fprintf (dump_file,
2162 "\nHoisting adjacent loads from %d and %d into %d: \n",
2163 bb_for_def1->index, bb_for_def2->index, bb0->index);
2164 print_gimple_stmt (dump_file, def1, 0, TDF_VOPS|TDF_MEMSYMS);
2165 print_gimple_stmt (dump_file, def2, 0, TDF_VOPS|TDF_MEMSYMS);
2166 }
2167 }
2168 }
2169
2170 /* Determine whether we should attempt to hoist adjacent loads out of
2171 diamond patterns in pass_phiopt. Always hoist loads if
2172 -fhoist-adjacent-loads is specified and the target machine has
2173 both a conditional move instruction and a defined cache line size. */
2174
2175 static bool
2176 gate_hoist_loads (void)
2177 {
2178 return (flag_hoist_adjacent_loads == 1
2179 && PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE)
2180 && HAVE_conditional_move);
2181 }
2182
2183 /* This pass tries to replaces an if-then-else block with an
2184 assignment. We have four kinds of transformations. Some of these
2185 transformations are also performed by the ifcvt RTL optimizer.
2186
2187 Conditional Replacement
2188 -----------------------
2189
2190 This transformation, implemented in conditional_replacement,
2191 replaces
2192
2193 bb0:
2194 if (cond) goto bb2; else goto bb1;
2195 bb1:
2196 bb2:
2197 x = PHI <0 (bb1), 1 (bb0), ...>;
2198
2199 with
2200
2201 bb0:
2202 x' = cond;
2203 goto bb2;
2204 bb2:
2205 x = PHI <x' (bb0), ...>;
2206
2207 We remove bb1 as it becomes unreachable. This occurs often due to
2208 gimplification of conditionals.
2209
2210 Value Replacement
2211 -----------------
2212
2213 This transformation, implemented in value_replacement, replaces
2214
2215 bb0:
2216 if (a != b) goto bb2; else goto bb1;
2217 bb1:
2218 bb2:
2219 x = PHI <a (bb1), b (bb0), ...>;
2220
2221 with
2222
2223 bb0:
2224 bb2:
2225 x = PHI <b (bb0), ...>;
2226
2227 This opportunity can sometimes occur as a result of other
2228 optimizations.
2229
2230
2231 Another case caught by value replacement looks like this:
2232
2233 bb0:
2234 t1 = a == CONST;
2235 t2 = b > c;
2236 t3 = t1 & t2;
2237 if (t3 != 0) goto bb1; else goto bb2;
2238 bb1:
2239 bb2:
2240 x = PHI (CONST, a)
2241
2242 Gets replaced with:
2243 bb0:
2244 bb2:
2245 t1 = a == CONST;
2246 t2 = b > c;
2247 t3 = t1 & t2;
2248 x = a;
2249
2250 ABS Replacement
2251 ---------------
2252
2253 This transformation, implemented in abs_replacement, replaces
2254
2255 bb0:
2256 if (a >= 0) goto bb2; else goto bb1;
2257 bb1:
2258 x = -a;
2259 bb2:
2260 x = PHI <x (bb1), a (bb0), ...>;
2261
2262 with
2263
2264 bb0:
2265 x' = ABS_EXPR< a >;
2266 bb2:
2267 x = PHI <x' (bb0), ...>;
2268
2269 MIN/MAX Replacement
2270 -------------------
2271
2272 This transformation, minmax_replacement replaces
2273
2274 bb0:
2275 if (a <= b) goto bb2; else goto bb1;
2276 bb1:
2277 bb2:
2278 x = PHI <b (bb1), a (bb0), ...>;
2279
2280 with
2281
2282 bb0:
2283 x' = MIN_EXPR (a, b)
2284 bb2:
2285 x = PHI <x' (bb0), ...>;
2286
2287 A similar transformation is done for MAX_EXPR.
2288
2289
2290 This pass also performs a fifth transformation of a slightly different
2291 flavor.
2292
2293 Adjacent Load Hoisting
2294 ----------------------
2295
2296 This transformation replaces
2297
2298 bb0:
2299 if (...) goto bb2; else goto bb1;
2300 bb1:
2301 x1 = (<expr>).field1;
2302 goto bb3;
2303 bb2:
2304 x2 = (<expr>).field2;
2305 bb3:
2306 # x = PHI <x1, x2>;
2307
2308 with
2309
2310 bb0:
2311 x1 = (<expr>).field1;
2312 x2 = (<expr>).field2;
2313 if (...) goto bb2; else goto bb1;
2314 bb1:
2315 goto bb3;
2316 bb2:
2317 bb3:
2318 # x = PHI <x1, x2>;
2319
2320 The purpose of this transformation is to enable generation of conditional
2321 move instructions such as Intel CMOVE or PowerPC ISEL. Because one of
2322 the loads is speculative, the transformation is restricted to very
2323 specific cases to avoid introducing a page fault. We are looking for
2324 the common idiom:
2325
2326 if (...)
2327 x = y->left;
2328 else
2329 x = y->right;
2330
2331 where left and right are typically adjacent pointers in a tree structure. */
2332
2333 namespace {
2334
2335 const pass_data pass_data_phiopt =
2336 {
2337 GIMPLE_PASS, /* type */
2338 "phiopt", /* name */
2339 OPTGROUP_NONE, /* optinfo_flags */
2340 TV_TREE_PHIOPT, /* tv_id */
2341 ( PROP_cfg | PROP_ssa ), /* properties_required */
2342 0, /* properties_provided */
2343 0, /* properties_destroyed */
2344 0, /* todo_flags_start */
2345 0, /* todo_flags_finish */
2346 };
2347
2348 class pass_phiopt : public gimple_opt_pass
2349 {
2350 public:
2351 pass_phiopt (gcc::context *ctxt)
2352 : gimple_opt_pass (pass_data_phiopt, ctxt)
2353 {}
2354
2355 /* opt_pass methods: */
2356 opt_pass * clone () { return new pass_phiopt (m_ctxt); }
2357 virtual bool gate (function *) { return flag_ssa_phiopt; }
2358 virtual unsigned int execute (function *)
2359 {
2360 return tree_ssa_phiopt_worker (false, gate_hoist_loads ());
2361 }
2362
2363 }; // class pass_phiopt
2364
2365 } // anon namespace
2366
2367 gimple_opt_pass *
2368 make_pass_phiopt (gcc::context *ctxt)
2369 {
2370 return new pass_phiopt (ctxt);
2371 }
2372
2373 namespace {
2374
2375 const pass_data pass_data_cselim =
2376 {
2377 GIMPLE_PASS, /* type */
2378 "cselim", /* name */
2379 OPTGROUP_NONE, /* optinfo_flags */
2380 TV_TREE_PHIOPT, /* tv_id */
2381 ( PROP_cfg | PROP_ssa ), /* properties_required */
2382 0, /* properties_provided */
2383 0, /* properties_destroyed */
2384 0, /* todo_flags_start */
2385 0, /* todo_flags_finish */
2386 };
2387
2388 class pass_cselim : public gimple_opt_pass
2389 {
2390 public:
2391 pass_cselim (gcc::context *ctxt)
2392 : gimple_opt_pass (pass_data_cselim, ctxt)
2393 {}
2394
2395 /* opt_pass methods: */
2396 virtual bool gate (function *) { return flag_tree_cselim; }
2397 virtual unsigned int execute (function *) { return tree_ssa_cs_elim (); }
2398
2399 }; // class pass_cselim
2400
2401 } // anon namespace
2402
2403 gimple_opt_pass *
2404 make_pass_cselim (gcc::context *ctxt)
2405 {
2406 return new pass_cselim (ctxt);
2407 }