]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-reassoc.c
target.def (rtx_costs): Remove "code" param, add "mode".
[thirdparty/gcc.git] / gcc / tree-ssa-reassoc.c
1 /* Reassociation for trees.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "rtl.h"
28 #include "ssa.h"
29 #include "tm_p.h"
30 #include "alias.h"
31 #include "fold-const.h"
32 #include "stor-layout.h"
33 #include "cfganal.h"
34 #include "gimple-pretty-print.h"
35 #include "tree-inline.h"
36 #include "internal-fn.h"
37 #include "gimple-fold.h"
38 #include "tree-eh.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
41 #include "tree-cfg.h"
42 #include "tree-ssa-loop-niter.h"
43 #include "tree-ssa-loop.h"
44 #include "flags.h"
45 #include "insn-config.h"
46 #include "expmed.h"
47 #include "dojump.h"
48 #include "explow.h"
49 #include "calls.h"
50 #include "emit-rtl.h"
51 #include "varasm.h"
52 #include "stmt.h"
53 #include "expr.h"
54 #include "tree-dfa.h"
55 #include "tree-ssa.h"
56 #include "tree-iterator.h"
57 #include "tree-pass.h"
58 #include "alloc-pool.h"
59 #include "langhooks.h"
60 #include "cfgloop.h"
61 #include "target.h"
62 #include "params.h"
63 #include "diagnostic-core.h"
64 #include "builtins.h"
65 #include "gimplify.h"
66 #include "insn-codes.h"
67 #include "optabs.h"
68
69 /* This is a simple global reassociation pass. It is, in part, based
70 on the LLVM pass of the same name (They do some things more/less
71 than we do, in different orders, etc).
72
73 It consists of five steps:
74
75 1. Breaking up subtract operations into addition + negate, where
76 it would promote the reassociation of adds.
77
78 2. Left linearization of the expression trees, so that (A+B)+(C+D)
79 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
80 During linearization, we place the operands of the binary
81 expressions into a vector of operand_entry_t
82
83 3. Optimization of the operand lists, eliminating things like a +
84 -a, a & a, etc.
85
86 3a. Combine repeated factors with the same occurrence counts
87 into a __builtin_powi call that will later be optimized into
88 an optimal number of multiplies.
89
90 4. Rewrite the expression trees we linearized and optimized so
91 they are in proper rank order.
92
93 5. Repropagate negates, as nothing else will clean it up ATM.
94
95 A bit of theory on #4, since nobody seems to write anything down
96 about why it makes sense to do it the way they do it:
97
98 We could do this much nicer theoretically, but don't (for reasons
99 explained after how to do it theoretically nice :P).
100
101 In order to promote the most redundancy elimination, you want
102 binary expressions whose operands are the same rank (or
103 preferably, the same value) exposed to the redundancy eliminator,
104 for possible elimination.
105
106 So the way to do this if we really cared, is to build the new op
107 tree from the leaves to the roots, merging as you go, and putting the
108 new op on the end of the worklist, until you are left with one
109 thing on the worklist.
110
111 IE if you have to rewrite the following set of operands (listed with
112 rank in parentheses), with opcode PLUS_EXPR:
113
114 a (1), b (1), c (1), d (2), e (2)
115
116
117 We start with our merge worklist empty, and the ops list with all of
118 those on it.
119
120 You want to first merge all leaves of the same rank, as much as
121 possible.
122
123 So first build a binary op of
124
125 mergetmp = a + b, and put "mergetmp" on the merge worklist.
126
127 Because there is no three operand form of PLUS_EXPR, c is not going to
128 be exposed to redundancy elimination as a rank 1 operand.
129
130 So you might as well throw it on the merge worklist (you could also
131 consider it to now be a rank two operand, and merge it with d and e,
132 but in this case, you then have evicted e from a binary op. So at
133 least in this situation, you can't win.)
134
135 Then build a binary op of d + e
136 mergetmp2 = d + e
137
138 and put mergetmp2 on the merge worklist.
139
140 so merge worklist = {mergetmp, c, mergetmp2}
141
142 Continue building binary ops of these operations until you have only
143 one operation left on the worklist.
144
145 So we have
146
147 build binary op
148 mergetmp3 = mergetmp + c
149
150 worklist = {mergetmp2, mergetmp3}
151
152 mergetmp4 = mergetmp2 + mergetmp3
153
154 worklist = {mergetmp4}
155
156 because we have one operation left, we can now just set the original
157 statement equal to the result of that operation.
158
159 This will at least expose a + b and d + e to redundancy elimination
160 as binary operations.
161
162 For extra points, you can reuse the old statements to build the
163 mergetmps, since you shouldn't run out.
164
165 So why don't we do this?
166
167 Because it's expensive, and rarely will help. Most trees we are
168 reassociating have 3 or less ops. If they have 2 ops, they already
169 will be written into a nice single binary op. If you have 3 ops, a
170 single simple check suffices to tell you whether the first two are of the
171 same rank. If so, you know to order it
172
173 mergetmp = op1 + op2
174 newstmt = mergetmp + op3
175
176 instead of
177 mergetmp = op2 + op3
178 newstmt = mergetmp + op1
179
180 If all three are of the same rank, you can't expose them all in a
181 single binary operator anyway, so the above is *still* the best you
182 can do.
183
184 Thus, this is what we do. When we have three ops left, we check to see
185 what order to put them in, and call it a day. As a nod to vector sum
186 reduction, we check if any of the ops are really a phi node that is a
187 destructive update for the associating op, and keep the destructive
188 update together for vector sum reduction recognition. */
189
190
191 /* Statistics */
192 static struct
193 {
194 int linearized;
195 int constants_eliminated;
196 int ops_eliminated;
197 int rewritten;
198 int pows_encountered;
199 int pows_created;
200 } reassociate_stats;
201
202 /* Operator, rank pair. */
203 typedef struct operand_entry
204 {
205 unsigned int rank;
206 int id;
207 tree op;
208 unsigned int count;
209 } *operand_entry_t;
210
211 static pool_allocator<operand_entry> operand_entry_pool ("operand entry pool",
212 30);
213
214 /* This is used to assign a unique ID to each struct operand_entry
215 so that qsort results are identical on different hosts. */
216 static int next_operand_entry_id;
217
218 /* Starting rank number for a given basic block, so that we can rank
219 operations using unmovable instructions in that BB based on the bb
220 depth. */
221 static long *bb_rank;
222
223 /* Operand->rank hashtable. */
224 static hash_map<tree, long> *operand_rank;
225
226 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
227 all basic blocks the CFG should be adjusted - basic blocks
228 split right after that SSA_NAME's definition statement and before
229 the only use, which must be a bit ior. */
230 static vec<tree> reassoc_branch_fixups;
231
232 /* Forward decls. */
233 static long get_rank (tree);
234 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
235
236 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
237 possibly added by gsi_remove. */
238
239 bool
240 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
241 {
242 gimple stmt = gsi_stmt (*gsi);
243
244 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
245 return gsi_remove (gsi, true);
246
247 gimple_stmt_iterator prev = *gsi;
248 gsi_prev (&prev);
249 unsigned uid = gimple_uid (stmt);
250 basic_block bb = gimple_bb (stmt);
251 bool ret = gsi_remove (gsi, true);
252 if (!gsi_end_p (prev))
253 gsi_next (&prev);
254 else
255 prev = gsi_start_bb (bb);
256 gimple end_stmt = gsi_stmt (*gsi);
257 while ((stmt = gsi_stmt (prev)) != end_stmt)
258 {
259 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
260 gimple_set_uid (stmt, uid);
261 gsi_next (&prev);
262 }
263 return ret;
264 }
265
266 /* Bias amount for loop-carried phis. We want this to be larger than
267 the depth of any reassociation tree we can see, but not larger than
268 the rank difference between two blocks. */
269 #define PHI_LOOP_BIAS (1 << 15)
270
271 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
272 an innermost loop, and the phi has only a single use which is inside
273 the loop, then the rank is the block rank of the loop latch plus an
274 extra bias for the loop-carried dependence. This causes expressions
275 calculated into an accumulator variable to be independent for each
276 iteration of the loop. If STMT is some other phi, the rank is the
277 block rank of its containing block. */
278 static long
279 phi_rank (gimple stmt)
280 {
281 basic_block bb = gimple_bb (stmt);
282 struct loop *father = bb->loop_father;
283 tree res;
284 unsigned i;
285 use_operand_p use;
286 gimple use_stmt;
287
288 /* We only care about real loops (those with a latch). */
289 if (!father->latch)
290 return bb_rank[bb->index];
291
292 /* Interesting phis must be in headers of innermost loops. */
293 if (bb != father->header
294 || father->inner)
295 return bb_rank[bb->index];
296
297 /* Ignore virtual SSA_NAMEs. */
298 res = gimple_phi_result (stmt);
299 if (virtual_operand_p (res))
300 return bb_rank[bb->index];
301
302 /* The phi definition must have a single use, and that use must be
303 within the loop. Otherwise this isn't an accumulator pattern. */
304 if (!single_imm_use (res, &use, &use_stmt)
305 || gimple_bb (use_stmt)->loop_father != father)
306 return bb_rank[bb->index];
307
308 /* Look for phi arguments from within the loop. If found, bias this phi. */
309 for (i = 0; i < gimple_phi_num_args (stmt); i++)
310 {
311 tree arg = gimple_phi_arg_def (stmt, i);
312 if (TREE_CODE (arg) == SSA_NAME
313 && !SSA_NAME_IS_DEFAULT_DEF (arg))
314 {
315 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
316 if (gimple_bb (def_stmt)->loop_father == father)
317 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
318 }
319 }
320
321 /* Must be an uninteresting phi. */
322 return bb_rank[bb->index];
323 }
324
325 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
326 loop-carried dependence of an innermost loop, return TRUE; else
327 return FALSE. */
328 static bool
329 loop_carried_phi (tree exp)
330 {
331 gimple phi_stmt;
332 long block_rank;
333
334 if (TREE_CODE (exp) != SSA_NAME
335 || SSA_NAME_IS_DEFAULT_DEF (exp))
336 return false;
337
338 phi_stmt = SSA_NAME_DEF_STMT (exp);
339
340 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
341 return false;
342
343 /* Non-loop-carried phis have block rank. Loop-carried phis have
344 an additional bias added in. If this phi doesn't have block rank,
345 it's biased and should not be propagated. */
346 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
347
348 if (phi_rank (phi_stmt) != block_rank)
349 return true;
350
351 return false;
352 }
353
354 /* Return the maximum of RANK and the rank that should be propagated
355 from expression OP. For most operands, this is just the rank of OP.
356 For loop-carried phis, the value is zero to avoid undoing the bias
357 in favor of the phi. */
358 static long
359 propagate_rank (long rank, tree op)
360 {
361 long op_rank;
362
363 if (loop_carried_phi (op))
364 return rank;
365
366 op_rank = get_rank (op);
367
368 return MAX (rank, op_rank);
369 }
370
371 /* Look up the operand rank structure for expression E. */
372
373 static inline long
374 find_operand_rank (tree e)
375 {
376 long *slot = operand_rank->get (e);
377 return slot ? *slot : -1;
378 }
379
380 /* Insert {E,RANK} into the operand rank hashtable. */
381
382 static inline void
383 insert_operand_rank (tree e, long rank)
384 {
385 gcc_assert (rank > 0);
386 gcc_assert (!operand_rank->put (e, rank));
387 }
388
389 /* Given an expression E, return the rank of the expression. */
390
391 static long
392 get_rank (tree e)
393 {
394 /* SSA_NAME's have the rank of the expression they are the result
395 of.
396 For globals and uninitialized values, the rank is 0.
397 For function arguments, use the pre-setup rank.
398 For PHI nodes, stores, asm statements, etc, we use the rank of
399 the BB.
400 For simple operations, the rank is the maximum rank of any of
401 its operands, or the bb_rank, whichever is less.
402 I make no claims that this is optimal, however, it gives good
403 results. */
404
405 /* We make an exception to the normal ranking system to break
406 dependences of accumulator variables in loops. Suppose we
407 have a simple one-block loop containing:
408
409 x_1 = phi(x_0, x_2)
410 b = a + x_1
411 c = b + d
412 x_2 = c + e
413
414 As shown, each iteration of the calculation into x is fully
415 dependent upon the iteration before it. We would prefer to
416 see this in the form:
417
418 x_1 = phi(x_0, x_2)
419 b = a + d
420 c = b + e
421 x_2 = c + x_1
422
423 If the loop is unrolled, the calculations of b and c from
424 different iterations can be interleaved.
425
426 To obtain this result during reassociation, we bias the rank
427 of the phi definition x_1 upward, when it is recognized as an
428 accumulator pattern. The artificial rank causes it to be
429 added last, providing the desired independence. */
430
431 if (TREE_CODE (e) == SSA_NAME)
432 {
433 ssa_op_iter iter;
434 gimple stmt;
435 long rank;
436 tree op;
437
438 if (SSA_NAME_IS_DEFAULT_DEF (e))
439 return find_operand_rank (e);
440
441 stmt = SSA_NAME_DEF_STMT (e);
442 if (gimple_code (stmt) == GIMPLE_PHI)
443 return phi_rank (stmt);
444
445 if (!is_gimple_assign (stmt))
446 return bb_rank[gimple_bb (stmt)->index];
447
448 /* If we already have a rank for this expression, use that. */
449 rank = find_operand_rank (e);
450 if (rank != -1)
451 return rank;
452
453 /* Otherwise, find the maximum rank for the operands. As an
454 exception, remove the bias from loop-carried phis when propagating
455 the rank so that dependent operations are not also biased. */
456 /* Simply walk over all SSA uses - this takes advatage of the
457 fact that non-SSA operands are is_gimple_min_invariant and
458 thus have rank 0. */
459 rank = 0;
460 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
461 rank = propagate_rank (rank, op);
462
463 if (dump_file && (dump_flags & TDF_DETAILS))
464 {
465 fprintf (dump_file, "Rank for ");
466 print_generic_expr (dump_file, e, 0);
467 fprintf (dump_file, " is %ld\n", (rank + 1));
468 }
469
470 /* Note the rank in the hashtable so we don't recompute it. */
471 insert_operand_rank (e, (rank + 1));
472 return (rank + 1);
473 }
474
475 /* Constants, globals, etc., are rank 0 */
476 return 0;
477 }
478
479
480 /* We want integer ones to end up last no matter what, since they are
481 the ones we can do the most with. */
482 #define INTEGER_CONST_TYPE 1 << 3
483 #define FLOAT_CONST_TYPE 1 << 2
484 #define OTHER_CONST_TYPE 1 << 1
485
486 /* Classify an invariant tree into integer, float, or other, so that
487 we can sort them to be near other constants of the same type. */
488 static inline int
489 constant_type (tree t)
490 {
491 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
492 return INTEGER_CONST_TYPE;
493 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
494 return FLOAT_CONST_TYPE;
495 else
496 return OTHER_CONST_TYPE;
497 }
498
499 /* qsort comparison function to sort operand entries PA and PB by rank
500 so that the sorted array is ordered by rank in decreasing order. */
501 static int
502 sort_by_operand_rank (const void *pa, const void *pb)
503 {
504 const operand_entry_t oea = *(const operand_entry_t *)pa;
505 const operand_entry_t oeb = *(const operand_entry_t *)pb;
506
507 /* It's nicer for optimize_expression if constants that are likely
508 to fold when added/multiplied//whatever are put next to each
509 other. Since all constants have rank 0, order them by type. */
510 if (oeb->rank == 0 && oea->rank == 0)
511 {
512 if (constant_type (oeb->op) != constant_type (oea->op))
513 return constant_type (oeb->op) - constant_type (oea->op);
514 else
515 /* To make sorting result stable, we use unique IDs to determine
516 order. */
517 return oeb->id - oea->id;
518 }
519
520 /* Lastly, make sure the versions that are the same go next to each
521 other. */
522 if ((oeb->rank - oea->rank == 0)
523 && TREE_CODE (oea->op) == SSA_NAME
524 && TREE_CODE (oeb->op) == SSA_NAME)
525 {
526 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
527 versions of removed SSA_NAMEs, so if possible, prefer to sort
528 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
529 See PR60418. */
530 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
531 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
532 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
533 {
534 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
535 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
536 basic_block bba = gimple_bb (stmta);
537 basic_block bbb = gimple_bb (stmtb);
538 if (bbb != bba)
539 {
540 if (bb_rank[bbb->index] != bb_rank[bba->index])
541 return bb_rank[bbb->index] - bb_rank[bba->index];
542 }
543 else
544 {
545 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
546 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
547 if (da != db)
548 return da ? 1 : -1;
549 }
550 }
551
552 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
553 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
554 else
555 return oeb->id - oea->id;
556 }
557
558 if (oeb->rank != oea->rank)
559 return oeb->rank - oea->rank;
560 else
561 return oeb->id - oea->id;
562 }
563
564 /* Add an operand entry to *OPS for the tree operand OP. */
565
566 static void
567 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
568 {
569 operand_entry_t oe = operand_entry_pool.allocate ();
570
571 oe->op = op;
572 oe->rank = get_rank (op);
573 oe->id = next_operand_entry_id++;
574 oe->count = 1;
575 ops->safe_push (oe);
576 }
577
578 /* Add an operand entry to *OPS for the tree operand OP with repeat
579 count REPEAT. */
580
581 static void
582 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
583 HOST_WIDE_INT repeat)
584 {
585 operand_entry_t oe = operand_entry_pool.allocate ();
586
587 oe->op = op;
588 oe->rank = get_rank (op);
589 oe->id = next_operand_entry_id++;
590 oe->count = repeat;
591 ops->safe_push (oe);
592
593 reassociate_stats.pows_encountered++;
594 }
595
596 /* Return true if STMT is reassociable operation containing a binary
597 operation with tree code CODE, and is inside LOOP. */
598
599 static bool
600 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
601 {
602 basic_block bb = gimple_bb (stmt);
603
604 if (gimple_bb (stmt) == NULL)
605 return false;
606
607 if (!flow_bb_inside_loop_p (loop, bb))
608 return false;
609
610 if (is_gimple_assign (stmt)
611 && gimple_assign_rhs_code (stmt) == code
612 && has_single_use (gimple_assign_lhs (stmt)))
613 return true;
614
615 return false;
616 }
617
618
619 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
620 operand of the negate operation. Otherwise, return NULL. */
621
622 static tree
623 get_unary_op (tree name, enum tree_code opcode)
624 {
625 gimple stmt = SSA_NAME_DEF_STMT (name);
626
627 if (!is_gimple_assign (stmt))
628 return NULL_TREE;
629
630 if (gimple_assign_rhs_code (stmt) == opcode)
631 return gimple_assign_rhs1 (stmt);
632 return NULL_TREE;
633 }
634
635 /* If CURR and LAST are a pair of ops that OPCODE allows us to
636 eliminate through equivalences, do so, remove them from OPS, and
637 return true. Otherwise, return false. */
638
639 static bool
640 eliminate_duplicate_pair (enum tree_code opcode,
641 vec<operand_entry_t> *ops,
642 bool *all_done,
643 unsigned int i,
644 operand_entry_t curr,
645 operand_entry_t last)
646 {
647
648 /* If we have two of the same op, and the opcode is & |, min, or max,
649 we can eliminate one of them.
650 If we have two of the same op, and the opcode is ^, we can
651 eliminate both of them. */
652
653 if (last && last->op == curr->op)
654 {
655 switch (opcode)
656 {
657 case MAX_EXPR:
658 case MIN_EXPR:
659 case BIT_IOR_EXPR:
660 case BIT_AND_EXPR:
661 if (dump_file && (dump_flags & TDF_DETAILS))
662 {
663 fprintf (dump_file, "Equivalence: ");
664 print_generic_expr (dump_file, curr->op, 0);
665 fprintf (dump_file, " [&|minmax] ");
666 print_generic_expr (dump_file, last->op, 0);
667 fprintf (dump_file, " -> ");
668 print_generic_stmt (dump_file, last->op, 0);
669 }
670
671 ops->ordered_remove (i);
672 reassociate_stats.ops_eliminated ++;
673
674 return true;
675
676 case BIT_XOR_EXPR:
677 if (dump_file && (dump_flags & TDF_DETAILS))
678 {
679 fprintf (dump_file, "Equivalence: ");
680 print_generic_expr (dump_file, curr->op, 0);
681 fprintf (dump_file, " ^ ");
682 print_generic_expr (dump_file, last->op, 0);
683 fprintf (dump_file, " -> nothing\n");
684 }
685
686 reassociate_stats.ops_eliminated += 2;
687
688 if (ops->length () == 2)
689 {
690 ops->create (0);
691 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
692 *all_done = true;
693 }
694 else
695 {
696 ops->ordered_remove (i-1);
697 ops->ordered_remove (i-1);
698 }
699
700 return true;
701
702 default:
703 break;
704 }
705 }
706 return false;
707 }
708
709 static vec<tree> plus_negates;
710
711 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
712 expression, look in OPS for a corresponding positive operation to cancel
713 it out. If we find one, remove the other from OPS, replace
714 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
715 return false. */
716
717 static bool
718 eliminate_plus_minus_pair (enum tree_code opcode,
719 vec<operand_entry_t> *ops,
720 unsigned int currindex,
721 operand_entry_t curr)
722 {
723 tree negateop;
724 tree notop;
725 unsigned int i;
726 operand_entry_t oe;
727
728 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
729 return false;
730
731 negateop = get_unary_op (curr->op, NEGATE_EXPR);
732 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
733 if (negateop == NULL_TREE && notop == NULL_TREE)
734 return false;
735
736 /* Any non-negated version will have a rank that is one less than
737 the current rank. So once we hit those ranks, if we don't find
738 one, we can stop. */
739
740 for (i = currindex + 1;
741 ops->iterate (i, &oe)
742 && oe->rank >= curr->rank - 1 ;
743 i++)
744 {
745 if (oe->op == negateop)
746 {
747
748 if (dump_file && (dump_flags & TDF_DETAILS))
749 {
750 fprintf (dump_file, "Equivalence: ");
751 print_generic_expr (dump_file, negateop, 0);
752 fprintf (dump_file, " + -");
753 print_generic_expr (dump_file, oe->op, 0);
754 fprintf (dump_file, " -> 0\n");
755 }
756
757 ops->ordered_remove (i);
758 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
759 ops->ordered_remove (currindex);
760 reassociate_stats.ops_eliminated ++;
761
762 return true;
763 }
764 else if (oe->op == notop)
765 {
766 tree op_type = TREE_TYPE (oe->op);
767
768 if (dump_file && (dump_flags & TDF_DETAILS))
769 {
770 fprintf (dump_file, "Equivalence: ");
771 print_generic_expr (dump_file, notop, 0);
772 fprintf (dump_file, " + ~");
773 print_generic_expr (dump_file, oe->op, 0);
774 fprintf (dump_file, " -> -1\n");
775 }
776
777 ops->ordered_remove (i);
778 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
779 ops->ordered_remove (currindex);
780 reassociate_stats.ops_eliminated ++;
781
782 return true;
783 }
784 }
785
786 /* CURR->OP is a negate expr in a plus expr: save it for later
787 inspection in repropagate_negates(). */
788 if (negateop != NULL_TREE)
789 plus_negates.safe_push (curr->op);
790
791 return false;
792 }
793
794 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
795 bitwise not expression, look in OPS for a corresponding operand to
796 cancel it out. If we find one, remove the other from OPS, replace
797 OPS[CURRINDEX] with 0, and return true. Otherwise, return
798 false. */
799
800 static bool
801 eliminate_not_pairs (enum tree_code opcode,
802 vec<operand_entry_t> *ops,
803 unsigned int currindex,
804 operand_entry_t curr)
805 {
806 tree notop;
807 unsigned int i;
808 operand_entry_t oe;
809
810 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
811 || TREE_CODE (curr->op) != SSA_NAME)
812 return false;
813
814 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
815 if (notop == NULL_TREE)
816 return false;
817
818 /* Any non-not version will have a rank that is one less than
819 the current rank. So once we hit those ranks, if we don't find
820 one, we can stop. */
821
822 for (i = currindex + 1;
823 ops->iterate (i, &oe)
824 && oe->rank >= curr->rank - 1;
825 i++)
826 {
827 if (oe->op == notop)
828 {
829 if (dump_file && (dump_flags & TDF_DETAILS))
830 {
831 fprintf (dump_file, "Equivalence: ");
832 print_generic_expr (dump_file, notop, 0);
833 if (opcode == BIT_AND_EXPR)
834 fprintf (dump_file, " & ~");
835 else if (opcode == BIT_IOR_EXPR)
836 fprintf (dump_file, " | ~");
837 print_generic_expr (dump_file, oe->op, 0);
838 if (opcode == BIT_AND_EXPR)
839 fprintf (dump_file, " -> 0\n");
840 else if (opcode == BIT_IOR_EXPR)
841 fprintf (dump_file, " -> -1\n");
842 }
843
844 if (opcode == BIT_AND_EXPR)
845 oe->op = build_zero_cst (TREE_TYPE (oe->op));
846 else if (opcode == BIT_IOR_EXPR)
847 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
848
849 reassociate_stats.ops_eliminated += ops->length () - 1;
850 ops->truncate (0);
851 ops->quick_push (oe);
852 return true;
853 }
854 }
855
856 return false;
857 }
858
859 /* Use constant value that may be present in OPS to try to eliminate
860 operands. Note that this function is only really used when we've
861 eliminated ops for other reasons, or merged constants. Across
862 single statements, fold already does all of this, plus more. There
863 is little point in duplicating logic, so I've only included the
864 identities that I could ever construct testcases to trigger. */
865
866 static void
867 eliminate_using_constants (enum tree_code opcode,
868 vec<operand_entry_t> *ops)
869 {
870 operand_entry_t oelast = ops->last ();
871 tree type = TREE_TYPE (oelast->op);
872
873 if (oelast->rank == 0
874 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
875 {
876 switch (opcode)
877 {
878 case BIT_AND_EXPR:
879 if (integer_zerop (oelast->op))
880 {
881 if (ops->length () != 1)
882 {
883 if (dump_file && (dump_flags & TDF_DETAILS))
884 fprintf (dump_file, "Found & 0, removing all other ops\n");
885
886 reassociate_stats.ops_eliminated += ops->length () - 1;
887
888 ops->truncate (0);
889 ops->quick_push (oelast);
890 return;
891 }
892 }
893 else if (integer_all_onesp (oelast->op))
894 {
895 if (ops->length () != 1)
896 {
897 if (dump_file && (dump_flags & TDF_DETAILS))
898 fprintf (dump_file, "Found & -1, removing\n");
899 ops->pop ();
900 reassociate_stats.ops_eliminated++;
901 }
902 }
903 break;
904 case BIT_IOR_EXPR:
905 if (integer_all_onesp (oelast->op))
906 {
907 if (ops->length () != 1)
908 {
909 if (dump_file && (dump_flags & TDF_DETAILS))
910 fprintf (dump_file, "Found | -1, removing all other ops\n");
911
912 reassociate_stats.ops_eliminated += ops->length () - 1;
913
914 ops->truncate (0);
915 ops->quick_push (oelast);
916 return;
917 }
918 }
919 else if (integer_zerop (oelast->op))
920 {
921 if (ops->length () != 1)
922 {
923 if (dump_file && (dump_flags & TDF_DETAILS))
924 fprintf (dump_file, "Found | 0, removing\n");
925 ops->pop ();
926 reassociate_stats.ops_eliminated++;
927 }
928 }
929 break;
930 case MULT_EXPR:
931 if (integer_zerop (oelast->op)
932 || (FLOAT_TYPE_P (type)
933 && !HONOR_NANS (type)
934 && !HONOR_SIGNED_ZEROS (type)
935 && real_zerop (oelast->op)))
936 {
937 if (ops->length () != 1)
938 {
939 if (dump_file && (dump_flags & TDF_DETAILS))
940 fprintf (dump_file, "Found * 0, removing all other ops\n");
941
942 reassociate_stats.ops_eliminated += ops->length () - 1;
943 ops->truncate (1);
944 ops->quick_push (oelast);
945 return;
946 }
947 }
948 else if (integer_onep (oelast->op)
949 || (FLOAT_TYPE_P (type)
950 && !HONOR_SNANS (type)
951 && real_onep (oelast->op)))
952 {
953 if (ops->length () != 1)
954 {
955 if (dump_file && (dump_flags & TDF_DETAILS))
956 fprintf (dump_file, "Found * 1, removing\n");
957 ops->pop ();
958 reassociate_stats.ops_eliminated++;
959 return;
960 }
961 }
962 break;
963 case BIT_XOR_EXPR:
964 case PLUS_EXPR:
965 case MINUS_EXPR:
966 if (integer_zerop (oelast->op)
967 || (FLOAT_TYPE_P (type)
968 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
969 && fold_real_zero_addition_p (type, oelast->op,
970 opcode == MINUS_EXPR)))
971 {
972 if (ops->length () != 1)
973 {
974 if (dump_file && (dump_flags & TDF_DETAILS))
975 fprintf (dump_file, "Found [|^+] 0, removing\n");
976 ops->pop ();
977 reassociate_stats.ops_eliminated++;
978 return;
979 }
980 }
981 break;
982 default:
983 break;
984 }
985 }
986 }
987
988
989 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
990 bool, bool);
991
992 /* Structure for tracking and counting operands. */
993 typedef struct oecount_s {
994 int cnt;
995 int id;
996 enum tree_code oecode;
997 tree op;
998 } oecount;
999
1000
1001 /* The heap for the oecount hashtable and the sorted list of operands. */
1002 static vec<oecount> cvec;
1003
1004
1005 /* Oecount hashtable helpers. */
1006
1007 struct oecount_hasher : int_hash <int, 0, 1>
1008 {
1009 static inline hashval_t hash (int);
1010 static inline bool equal (int, int);
1011 };
1012
1013 /* Hash function for oecount. */
1014
1015 inline hashval_t
1016 oecount_hasher::hash (int p)
1017 {
1018 const oecount *c = &cvec[p - 42];
1019 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1020 }
1021
1022 /* Comparison function for oecount. */
1023
1024 inline bool
1025 oecount_hasher::equal (int p1, int p2)
1026 {
1027 const oecount *c1 = &cvec[p1 - 42];
1028 const oecount *c2 = &cvec[p2 - 42];
1029 return (c1->oecode == c2->oecode
1030 && c1->op == c2->op);
1031 }
1032
1033 /* Comparison function for qsort sorting oecount elements by count. */
1034
1035 static int
1036 oecount_cmp (const void *p1, const void *p2)
1037 {
1038 const oecount *c1 = (const oecount *)p1;
1039 const oecount *c2 = (const oecount *)p2;
1040 if (c1->cnt != c2->cnt)
1041 return c1->cnt - c2->cnt;
1042 else
1043 /* If counts are identical, use unique IDs to stabilize qsort. */
1044 return c1->id - c2->id;
1045 }
1046
1047 /* Return TRUE iff STMT represents a builtin call that raises OP
1048 to some exponent. */
1049
1050 static bool
1051 stmt_is_power_of_op (gimple stmt, tree op)
1052 {
1053 tree fndecl;
1054
1055 if (!is_gimple_call (stmt))
1056 return false;
1057
1058 fndecl = gimple_call_fndecl (stmt);
1059
1060 if (!fndecl
1061 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1062 return false;
1063
1064 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1065 {
1066 CASE_FLT_FN (BUILT_IN_POW):
1067 CASE_FLT_FN (BUILT_IN_POWI):
1068 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1069
1070 default:
1071 return false;
1072 }
1073 }
1074
1075 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1076 in place and return the result. Assumes that stmt_is_power_of_op
1077 was previously called for STMT and returned TRUE. */
1078
1079 static HOST_WIDE_INT
1080 decrement_power (gimple stmt)
1081 {
1082 REAL_VALUE_TYPE c, cint;
1083 HOST_WIDE_INT power;
1084 tree arg1;
1085
1086 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1087 {
1088 CASE_FLT_FN (BUILT_IN_POW):
1089 arg1 = gimple_call_arg (stmt, 1);
1090 c = TREE_REAL_CST (arg1);
1091 power = real_to_integer (&c) - 1;
1092 real_from_integer (&cint, VOIDmode, power, SIGNED);
1093 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1094 return power;
1095
1096 CASE_FLT_FN (BUILT_IN_POWI):
1097 arg1 = gimple_call_arg (stmt, 1);
1098 power = TREE_INT_CST_LOW (arg1) - 1;
1099 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1100 return power;
1101
1102 default:
1103 gcc_unreachable ();
1104 }
1105 }
1106
1107 /* Find the single immediate use of STMT's LHS, and replace it
1108 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1109 replace *DEF with OP as well. */
1110
1111 static void
1112 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1113 {
1114 tree lhs;
1115 gimple use_stmt;
1116 use_operand_p use;
1117 gimple_stmt_iterator gsi;
1118
1119 if (is_gimple_call (stmt))
1120 lhs = gimple_call_lhs (stmt);
1121 else
1122 lhs = gimple_assign_lhs (stmt);
1123
1124 gcc_assert (has_single_use (lhs));
1125 single_imm_use (lhs, &use, &use_stmt);
1126 if (lhs == *def)
1127 *def = op;
1128 SET_USE (use, op);
1129 if (TREE_CODE (op) != SSA_NAME)
1130 update_stmt (use_stmt);
1131 gsi = gsi_for_stmt (stmt);
1132 unlink_stmt_vdef (stmt);
1133 reassoc_remove_stmt (&gsi);
1134 release_defs (stmt);
1135 }
1136
1137 /* Walks the linear chain with result *DEF searching for an operation
1138 with operand OP and code OPCODE removing that from the chain. *DEF
1139 is updated if there is only one operand but no operation left. */
1140
1141 static void
1142 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1143 {
1144 gimple stmt = SSA_NAME_DEF_STMT (*def);
1145
1146 do
1147 {
1148 tree name;
1149
1150 if (opcode == MULT_EXPR
1151 && stmt_is_power_of_op (stmt, op))
1152 {
1153 if (decrement_power (stmt) == 1)
1154 propagate_op_to_single_use (op, stmt, def);
1155 return;
1156 }
1157
1158 name = gimple_assign_rhs1 (stmt);
1159
1160 /* If this is the operation we look for and one of the operands
1161 is ours simply propagate the other operand into the stmts
1162 single use. */
1163 if (gimple_assign_rhs_code (stmt) == opcode
1164 && (name == op
1165 || gimple_assign_rhs2 (stmt) == op))
1166 {
1167 if (name == op)
1168 name = gimple_assign_rhs2 (stmt);
1169 propagate_op_to_single_use (name, stmt, def);
1170 return;
1171 }
1172
1173 /* We might have a multiply of two __builtin_pow* calls, and
1174 the operand might be hiding in the rightmost one. */
1175 if (opcode == MULT_EXPR
1176 && gimple_assign_rhs_code (stmt) == opcode
1177 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1178 && has_single_use (gimple_assign_rhs2 (stmt)))
1179 {
1180 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1181 if (stmt_is_power_of_op (stmt2, op))
1182 {
1183 if (decrement_power (stmt2) == 1)
1184 propagate_op_to_single_use (op, stmt2, def);
1185 return;
1186 }
1187 }
1188
1189 /* Continue walking the chain. */
1190 gcc_assert (name != op
1191 && TREE_CODE (name) == SSA_NAME);
1192 stmt = SSA_NAME_DEF_STMT (name);
1193 }
1194 while (1);
1195 }
1196
1197 /* Returns true if statement S1 dominates statement S2. Like
1198 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1199
1200 static bool
1201 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1202 {
1203 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1204
1205 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1206 SSA_NAME. Assume it lives at the beginning of function and
1207 thus dominates everything. */
1208 if (!bb1 || s1 == s2)
1209 return true;
1210
1211 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1212 if (!bb2)
1213 return false;
1214
1215 if (bb1 == bb2)
1216 {
1217 /* PHIs in the same basic block are assumed to be
1218 executed all in parallel, if only one stmt is a PHI,
1219 it dominates the other stmt in the same basic block. */
1220 if (gimple_code (s1) == GIMPLE_PHI)
1221 return true;
1222
1223 if (gimple_code (s2) == GIMPLE_PHI)
1224 return false;
1225
1226 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1227
1228 if (gimple_uid (s1) < gimple_uid (s2))
1229 return true;
1230
1231 if (gimple_uid (s1) > gimple_uid (s2))
1232 return false;
1233
1234 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1235 unsigned int uid = gimple_uid (s1);
1236 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1237 {
1238 gimple s = gsi_stmt (gsi);
1239 if (gimple_uid (s) != uid)
1240 break;
1241 if (s == s2)
1242 return true;
1243 }
1244
1245 return false;
1246 }
1247
1248 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1249 }
1250
1251 /* Insert STMT after INSERT_POINT. */
1252
1253 static void
1254 insert_stmt_after (gimple stmt, gimple insert_point)
1255 {
1256 gimple_stmt_iterator gsi;
1257 basic_block bb;
1258
1259 if (gimple_code (insert_point) == GIMPLE_PHI)
1260 bb = gimple_bb (insert_point);
1261 else if (!stmt_ends_bb_p (insert_point))
1262 {
1263 gsi = gsi_for_stmt (insert_point);
1264 gimple_set_uid (stmt, gimple_uid (insert_point));
1265 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1266 return;
1267 }
1268 else
1269 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1270 thus if it must end a basic block, it should be a call that can
1271 throw, or some assignment that can throw. If it throws, the LHS
1272 of it will not be initialized though, so only valid places using
1273 the SSA_NAME should be dominated by the fallthru edge. */
1274 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1275 gsi = gsi_after_labels (bb);
1276 if (gsi_end_p (gsi))
1277 {
1278 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1279 gimple_set_uid (stmt,
1280 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1281 }
1282 else
1283 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1284 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1285 }
1286
1287 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1288 the result. Places the statement after the definition of either
1289 OP1 or OP2. Returns the new statement. */
1290
1291 static gimple
1292 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1293 {
1294 gimple op1def = NULL, op2def = NULL;
1295 gimple_stmt_iterator gsi;
1296 tree op;
1297 gassign *sum;
1298
1299 /* Create the addition statement. */
1300 op = make_ssa_name (type);
1301 sum = gimple_build_assign (op, opcode, op1, op2);
1302
1303 /* Find an insertion place and insert. */
1304 if (TREE_CODE (op1) == SSA_NAME)
1305 op1def = SSA_NAME_DEF_STMT (op1);
1306 if (TREE_CODE (op2) == SSA_NAME)
1307 op2def = SSA_NAME_DEF_STMT (op2);
1308 if ((!op1def || gimple_nop_p (op1def))
1309 && (!op2def || gimple_nop_p (op2def)))
1310 {
1311 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1312 if (gsi_end_p (gsi))
1313 {
1314 gimple_stmt_iterator gsi2
1315 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1316 gimple_set_uid (sum,
1317 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1318 }
1319 else
1320 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1321 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1322 }
1323 else
1324 {
1325 gimple insert_point;
1326 if ((!op1def || gimple_nop_p (op1def))
1327 || (op2def && !gimple_nop_p (op2def)
1328 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1329 insert_point = op2def;
1330 else
1331 insert_point = op1def;
1332 insert_stmt_after (sum, insert_point);
1333 }
1334 update_stmt (sum);
1335
1336 return sum;
1337 }
1338
1339 /* Perform un-distribution of divisions and multiplications.
1340 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1341 to (A + B) / X for real X.
1342
1343 The algorithm is organized as follows.
1344
1345 - First we walk the addition chain *OPS looking for summands that
1346 are defined by a multiplication or a real division. This results
1347 in the candidates bitmap with relevant indices into *OPS.
1348
1349 - Second we build the chains of multiplications or divisions for
1350 these candidates, counting the number of occurrences of (operand, code)
1351 pairs in all of the candidates chains.
1352
1353 - Third we sort the (operand, code) pairs by number of occurrence and
1354 process them starting with the pair with the most uses.
1355
1356 * For each such pair we walk the candidates again to build a
1357 second candidate bitmap noting all multiplication/division chains
1358 that have at least one occurrence of (operand, code).
1359
1360 * We build an alternate addition chain only covering these
1361 candidates with one (operand, code) operation removed from their
1362 multiplication/division chain.
1363
1364 * The first candidate gets replaced by the alternate addition chain
1365 multiplied/divided by the operand.
1366
1367 * All candidate chains get disabled for further processing and
1368 processing of (operand, code) pairs continues.
1369
1370 The alternate addition chains built are re-processed by the main
1371 reassociation algorithm which allows optimizing a * x * y + b * y * x
1372 to (a + b ) * x * y in one invocation of the reassociation pass. */
1373
1374 static bool
1375 undistribute_ops_list (enum tree_code opcode,
1376 vec<operand_entry_t> *ops, struct loop *loop)
1377 {
1378 unsigned int length = ops->length ();
1379 operand_entry_t oe1;
1380 unsigned i, j;
1381 sbitmap candidates, candidates2;
1382 unsigned nr_candidates, nr_candidates2;
1383 sbitmap_iterator sbi0;
1384 vec<operand_entry_t> *subops;
1385 bool changed = false;
1386 int next_oecount_id = 0;
1387
1388 if (length <= 1
1389 || opcode != PLUS_EXPR)
1390 return false;
1391
1392 /* Build a list of candidates to process. */
1393 candidates = sbitmap_alloc (length);
1394 bitmap_clear (candidates);
1395 nr_candidates = 0;
1396 FOR_EACH_VEC_ELT (*ops, i, oe1)
1397 {
1398 enum tree_code dcode;
1399 gimple oe1def;
1400
1401 if (TREE_CODE (oe1->op) != SSA_NAME)
1402 continue;
1403 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1404 if (!is_gimple_assign (oe1def))
1405 continue;
1406 dcode = gimple_assign_rhs_code (oe1def);
1407 if ((dcode != MULT_EXPR
1408 && dcode != RDIV_EXPR)
1409 || !is_reassociable_op (oe1def, dcode, loop))
1410 continue;
1411
1412 bitmap_set_bit (candidates, i);
1413 nr_candidates++;
1414 }
1415
1416 if (nr_candidates < 2)
1417 {
1418 sbitmap_free (candidates);
1419 return false;
1420 }
1421
1422 if (dump_file && (dump_flags & TDF_DETAILS))
1423 {
1424 fprintf (dump_file, "searching for un-distribute opportunities ");
1425 print_generic_expr (dump_file,
1426 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1427 fprintf (dump_file, " %d\n", nr_candidates);
1428 }
1429
1430 /* Build linearized sub-operand lists and the counting table. */
1431 cvec.create (0);
1432
1433 hash_table<oecount_hasher> ctable (15);
1434
1435 /* ??? Macro arguments cannot have multi-argument template types in
1436 them. This typedef is needed to workaround that limitation. */
1437 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1438 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1439 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1440 {
1441 gimple oedef;
1442 enum tree_code oecode;
1443 unsigned j;
1444
1445 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1446 oecode = gimple_assign_rhs_code (oedef);
1447 linearize_expr_tree (&subops[i], oedef,
1448 associative_tree_code (oecode), false);
1449
1450 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1451 {
1452 oecount c;
1453 int *slot;
1454 int idx;
1455 c.oecode = oecode;
1456 c.cnt = 1;
1457 c.id = next_oecount_id++;
1458 c.op = oe1->op;
1459 cvec.safe_push (c);
1460 idx = cvec.length () + 41;
1461 slot = ctable.find_slot (idx, INSERT);
1462 if (!*slot)
1463 {
1464 *slot = idx;
1465 }
1466 else
1467 {
1468 cvec.pop ();
1469 cvec[*slot - 42].cnt++;
1470 }
1471 }
1472 }
1473
1474 /* Sort the counting table. */
1475 cvec.qsort (oecount_cmp);
1476
1477 if (dump_file && (dump_flags & TDF_DETAILS))
1478 {
1479 oecount *c;
1480 fprintf (dump_file, "Candidates:\n");
1481 FOR_EACH_VEC_ELT (cvec, j, c)
1482 {
1483 fprintf (dump_file, " %u %s: ", c->cnt,
1484 c->oecode == MULT_EXPR
1485 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1486 print_generic_expr (dump_file, c->op, 0);
1487 fprintf (dump_file, "\n");
1488 }
1489 }
1490
1491 /* Process the (operand, code) pairs in order of most occurrence. */
1492 candidates2 = sbitmap_alloc (length);
1493 while (!cvec.is_empty ())
1494 {
1495 oecount *c = &cvec.last ();
1496 if (c->cnt < 2)
1497 break;
1498
1499 /* Now collect the operands in the outer chain that contain
1500 the common operand in their inner chain. */
1501 bitmap_clear (candidates2);
1502 nr_candidates2 = 0;
1503 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1504 {
1505 gimple oedef;
1506 enum tree_code oecode;
1507 unsigned j;
1508 tree op = (*ops)[i]->op;
1509
1510 /* If we undistributed in this chain already this may be
1511 a constant. */
1512 if (TREE_CODE (op) != SSA_NAME)
1513 continue;
1514
1515 oedef = SSA_NAME_DEF_STMT (op);
1516 oecode = gimple_assign_rhs_code (oedef);
1517 if (oecode != c->oecode)
1518 continue;
1519
1520 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1521 {
1522 if (oe1->op == c->op)
1523 {
1524 bitmap_set_bit (candidates2, i);
1525 ++nr_candidates2;
1526 break;
1527 }
1528 }
1529 }
1530
1531 if (nr_candidates2 >= 2)
1532 {
1533 operand_entry_t oe1, oe2;
1534 gimple prod;
1535 int first = bitmap_first_set_bit (candidates2);
1536
1537 /* Build the new addition chain. */
1538 oe1 = (*ops)[first];
1539 if (dump_file && (dump_flags & TDF_DETAILS))
1540 {
1541 fprintf (dump_file, "Building (");
1542 print_generic_expr (dump_file, oe1->op, 0);
1543 }
1544 zero_one_operation (&oe1->op, c->oecode, c->op);
1545 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1546 {
1547 gimple sum;
1548 oe2 = (*ops)[i];
1549 if (dump_file && (dump_flags & TDF_DETAILS))
1550 {
1551 fprintf (dump_file, " + ");
1552 print_generic_expr (dump_file, oe2->op, 0);
1553 }
1554 zero_one_operation (&oe2->op, c->oecode, c->op);
1555 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1556 oe1->op, oe2->op, opcode);
1557 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1558 oe2->rank = 0;
1559 oe1->op = gimple_get_lhs (sum);
1560 }
1561
1562 /* Apply the multiplication/division. */
1563 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1564 oe1->op, c->op, c->oecode);
1565 if (dump_file && (dump_flags & TDF_DETAILS))
1566 {
1567 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1568 print_generic_expr (dump_file, c->op, 0);
1569 fprintf (dump_file, "\n");
1570 }
1571
1572 /* Record it in the addition chain and disable further
1573 undistribution with this op. */
1574 oe1->op = gimple_assign_lhs (prod);
1575 oe1->rank = get_rank (oe1->op);
1576 subops[first].release ();
1577
1578 changed = true;
1579 }
1580
1581 cvec.pop ();
1582 }
1583
1584 for (i = 0; i < ops->length (); ++i)
1585 subops[i].release ();
1586 free (subops);
1587 cvec.release ();
1588 sbitmap_free (candidates);
1589 sbitmap_free (candidates2);
1590
1591 return changed;
1592 }
1593
1594 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1595 expression, examine the other OPS to see if any of them are comparisons
1596 of the same values, which we may be able to combine or eliminate.
1597 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1598
1599 static bool
1600 eliminate_redundant_comparison (enum tree_code opcode,
1601 vec<operand_entry_t> *ops,
1602 unsigned int currindex,
1603 operand_entry_t curr)
1604 {
1605 tree op1, op2;
1606 enum tree_code lcode, rcode;
1607 gimple def1, def2;
1608 int i;
1609 operand_entry_t oe;
1610
1611 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1612 return false;
1613
1614 /* Check that CURR is a comparison. */
1615 if (TREE_CODE (curr->op) != SSA_NAME)
1616 return false;
1617 def1 = SSA_NAME_DEF_STMT (curr->op);
1618 if (!is_gimple_assign (def1))
1619 return false;
1620 lcode = gimple_assign_rhs_code (def1);
1621 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1622 return false;
1623 op1 = gimple_assign_rhs1 (def1);
1624 op2 = gimple_assign_rhs2 (def1);
1625
1626 /* Now look for a similar comparison in the remaining OPS. */
1627 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1628 {
1629 tree t;
1630
1631 if (TREE_CODE (oe->op) != SSA_NAME)
1632 continue;
1633 def2 = SSA_NAME_DEF_STMT (oe->op);
1634 if (!is_gimple_assign (def2))
1635 continue;
1636 rcode = gimple_assign_rhs_code (def2);
1637 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1638 continue;
1639
1640 /* If we got here, we have a match. See if we can combine the
1641 two comparisons. */
1642 if (opcode == BIT_IOR_EXPR)
1643 t = maybe_fold_or_comparisons (lcode, op1, op2,
1644 rcode, gimple_assign_rhs1 (def2),
1645 gimple_assign_rhs2 (def2));
1646 else
1647 t = maybe_fold_and_comparisons (lcode, op1, op2,
1648 rcode, gimple_assign_rhs1 (def2),
1649 gimple_assign_rhs2 (def2));
1650 if (!t)
1651 continue;
1652
1653 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1654 always give us a boolean_type_node value back. If the original
1655 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1656 we need to convert. */
1657 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1658 t = fold_convert (TREE_TYPE (curr->op), t);
1659
1660 if (TREE_CODE (t) != INTEGER_CST
1661 && !operand_equal_p (t, curr->op, 0))
1662 {
1663 enum tree_code subcode;
1664 tree newop1, newop2;
1665 if (!COMPARISON_CLASS_P (t))
1666 continue;
1667 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1668 STRIP_USELESS_TYPE_CONVERSION (newop1);
1669 STRIP_USELESS_TYPE_CONVERSION (newop2);
1670 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1671 continue;
1672 }
1673
1674 if (dump_file && (dump_flags & TDF_DETAILS))
1675 {
1676 fprintf (dump_file, "Equivalence: ");
1677 print_generic_expr (dump_file, curr->op, 0);
1678 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1679 print_generic_expr (dump_file, oe->op, 0);
1680 fprintf (dump_file, " -> ");
1681 print_generic_expr (dump_file, t, 0);
1682 fprintf (dump_file, "\n");
1683 }
1684
1685 /* Now we can delete oe, as it has been subsumed by the new combined
1686 expression t. */
1687 ops->ordered_remove (i);
1688 reassociate_stats.ops_eliminated ++;
1689
1690 /* If t is the same as curr->op, we're done. Otherwise we must
1691 replace curr->op with t. Special case is if we got a constant
1692 back, in which case we add it to the end instead of in place of
1693 the current entry. */
1694 if (TREE_CODE (t) == INTEGER_CST)
1695 {
1696 ops->ordered_remove (currindex);
1697 add_to_ops_vec (ops, t);
1698 }
1699 else if (!operand_equal_p (t, curr->op, 0))
1700 {
1701 gimple sum;
1702 enum tree_code subcode;
1703 tree newop1;
1704 tree newop2;
1705 gcc_assert (COMPARISON_CLASS_P (t));
1706 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1707 STRIP_USELESS_TYPE_CONVERSION (newop1);
1708 STRIP_USELESS_TYPE_CONVERSION (newop2);
1709 gcc_checking_assert (is_gimple_val (newop1)
1710 && is_gimple_val (newop2));
1711 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1712 curr->op = gimple_get_lhs (sum);
1713 }
1714 return true;
1715 }
1716
1717 return false;
1718 }
1719
1720 /* Perform various identities and other optimizations on the list of
1721 operand entries, stored in OPS. The tree code for the binary
1722 operation between all the operands is OPCODE. */
1723
1724 static void
1725 optimize_ops_list (enum tree_code opcode,
1726 vec<operand_entry_t> *ops)
1727 {
1728 unsigned int length = ops->length ();
1729 unsigned int i;
1730 operand_entry_t oe;
1731 operand_entry_t oelast = NULL;
1732 bool iterate = false;
1733
1734 if (length == 1)
1735 return;
1736
1737 oelast = ops->last ();
1738
1739 /* If the last two are constants, pop the constants off, merge them
1740 and try the next two. */
1741 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1742 {
1743 operand_entry_t oelm1 = (*ops)[length - 2];
1744
1745 if (oelm1->rank == 0
1746 && is_gimple_min_invariant (oelm1->op)
1747 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1748 TREE_TYPE (oelast->op)))
1749 {
1750 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1751 oelm1->op, oelast->op);
1752
1753 if (folded && is_gimple_min_invariant (folded))
1754 {
1755 if (dump_file && (dump_flags & TDF_DETAILS))
1756 fprintf (dump_file, "Merging constants\n");
1757
1758 ops->pop ();
1759 ops->pop ();
1760
1761 add_to_ops_vec (ops, folded);
1762 reassociate_stats.constants_eliminated++;
1763
1764 optimize_ops_list (opcode, ops);
1765 return;
1766 }
1767 }
1768 }
1769
1770 eliminate_using_constants (opcode, ops);
1771 oelast = NULL;
1772
1773 for (i = 0; ops->iterate (i, &oe);)
1774 {
1775 bool done = false;
1776
1777 if (eliminate_not_pairs (opcode, ops, i, oe))
1778 return;
1779 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1780 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1781 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1782 {
1783 if (done)
1784 return;
1785 iterate = true;
1786 oelast = NULL;
1787 continue;
1788 }
1789 oelast = oe;
1790 i++;
1791 }
1792
1793 length = ops->length ();
1794 oelast = ops->last ();
1795
1796 if (iterate)
1797 optimize_ops_list (opcode, ops);
1798 }
1799
1800 /* The following functions are subroutines to optimize_range_tests and allow
1801 it to try to change a logical combination of comparisons into a range
1802 test.
1803
1804 For example, both
1805 X == 2 || X == 5 || X == 3 || X == 4
1806 and
1807 X >= 2 && X <= 5
1808 are converted to
1809 (unsigned) (X - 2) <= 3
1810
1811 For more information see comments above fold_test_range in fold-const.c,
1812 this implementation is for GIMPLE. */
1813
1814 struct range_entry
1815 {
1816 tree exp;
1817 tree low;
1818 tree high;
1819 bool in_p;
1820 bool strict_overflow_p;
1821 unsigned int idx, next;
1822 };
1823
1824 /* This is similar to make_range in fold-const.c, but on top of
1825 GIMPLE instead of trees. If EXP is non-NULL, it should be
1826 an SSA_NAME and STMT argument is ignored, otherwise STMT
1827 argument should be a GIMPLE_COND. */
1828
1829 static void
1830 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1831 {
1832 int in_p;
1833 tree low, high;
1834 bool is_bool, strict_overflow_p;
1835
1836 r->exp = NULL_TREE;
1837 r->in_p = false;
1838 r->strict_overflow_p = false;
1839 r->low = NULL_TREE;
1840 r->high = NULL_TREE;
1841 if (exp != NULL_TREE
1842 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1843 return;
1844
1845 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1846 and see if we can refine the range. Some of the cases below may not
1847 happen, but it doesn't seem worth worrying about this. We "continue"
1848 the outer loop when we've changed something; otherwise we "break"
1849 the switch, which will "break" the while. */
1850 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1851 high = low;
1852 in_p = 0;
1853 strict_overflow_p = false;
1854 is_bool = false;
1855 if (exp == NULL_TREE)
1856 is_bool = true;
1857 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1858 {
1859 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1860 is_bool = true;
1861 else
1862 return;
1863 }
1864 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1865 is_bool = true;
1866
1867 while (1)
1868 {
1869 enum tree_code code;
1870 tree arg0, arg1, exp_type;
1871 tree nexp;
1872 location_t loc;
1873
1874 if (exp != NULL_TREE)
1875 {
1876 if (TREE_CODE (exp) != SSA_NAME
1877 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1878 break;
1879
1880 stmt = SSA_NAME_DEF_STMT (exp);
1881 if (!is_gimple_assign (stmt))
1882 break;
1883
1884 code = gimple_assign_rhs_code (stmt);
1885 arg0 = gimple_assign_rhs1 (stmt);
1886 arg1 = gimple_assign_rhs2 (stmt);
1887 exp_type = TREE_TYPE (exp);
1888 }
1889 else
1890 {
1891 code = gimple_cond_code (stmt);
1892 arg0 = gimple_cond_lhs (stmt);
1893 arg1 = gimple_cond_rhs (stmt);
1894 exp_type = boolean_type_node;
1895 }
1896
1897 if (TREE_CODE (arg0) != SSA_NAME)
1898 break;
1899 loc = gimple_location (stmt);
1900 switch (code)
1901 {
1902 case BIT_NOT_EXPR:
1903 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1904 /* Ensure the range is either +[-,0], +[0,0],
1905 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1906 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1907 or similar expression of unconditional true or
1908 false, it should not be negated. */
1909 && ((high && integer_zerop (high))
1910 || (low && integer_onep (low))))
1911 {
1912 in_p = !in_p;
1913 exp = arg0;
1914 continue;
1915 }
1916 break;
1917 case SSA_NAME:
1918 exp = arg0;
1919 continue;
1920 CASE_CONVERT:
1921 if (is_bool)
1922 goto do_default;
1923 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1924 {
1925 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1926 is_bool = true;
1927 else
1928 return;
1929 }
1930 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1931 is_bool = true;
1932 goto do_default;
1933 case EQ_EXPR:
1934 case NE_EXPR:
1935 case LT_EXPR:
1936 case LE_EXPR:
1937 case GE_EXPR:
1938 case GT_EXPR:
1939 is_bool = true;
1940 /* FALLTHRU */
1941 default:
1942 if (!is_bool)
1943 return;
1944 do_default:
1945 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1946 &low, &high, &in_p,
1947 &strict_overflow_p);
1948 if (nexp != NULL_TREE)
1949 {
1950 exp = nexp;
1951 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1952 continue;
1953 }
1954 break;
1955 }
1956 break;
1957 }
1958 if (is_bool)
1959 {
1960 r->exp = exp;
1961 r->in_p = in_p;
1962 r->low = low;
1963 r->high = high;
1964 r->strict_overflow_p = strict_overflow_p;
1965 }
1966 }
1967
1968 /* Comparison function for qsort. Sort entries
1969 without SSA_NAME exp first, then with SSA_NAMEs sorted
1970 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1971 by increasing ->low and if ->low is the same, by increasing
1972 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1973 maximum. */
1974
1975 static int
1976 range_entry_cmp (const void *a, const void *b)
1977 {
1978 const struct range_entry *p = (const struct range_entry *) a;
1979 const struct range_entry *q = (const struct range_entry *) b;
1980
1981 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1982 {
1983 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1984 {
1985 /* Group range_entries for the same SSA_NAME together. */
1986 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1987 return -1;
1988 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1989 return 1;
1990 /* If ->low is different, NULL low goes first, then by
1991 ascending low. */
1992 if (p->low != NULL_TREE)
1993 {
1994 if (q->low != NULL_TREE)
1995 {
1996 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1997 p->low, q->low);
1998 if (tem && integer_onep (tem))
1999 return -1;
2000 tem = fold_binary (GT_EXPR, boolean_type_node,
2001 p->low, q->low);
2002 if (tem && integer_onep (tem))
2003 return 1;
2004 }
2005 else
2006 return 1;
2007 }
2008 else if (q->low != NULL_TREE)
2009 return -1;
2010 /* If ->high is different, NULL high goes last, before that by
2011 ascending high. */
2012 if (p->high != NULL_TREE)
2013 {
2014 if (q->high != NULL_TREE)
2015 {
2016 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2017 p->high, q->high);
2018 if (tem && integer_onep (tem))
2019 return -1;
2020 tem = fold_binary (GT_EXPR, boolean_type_node,
2021 p->high, q->high);
2022 if (tem && integer_onep (tem))
2023 return 1;
2024 }
2025 else
2026 return -1;
2027 }
2028 else if (q->high != NULL_TREE)
2029 return 1;
2030 /* If both ranges are the same, sort below by ascending idx. */
2031 }
2032 else
2033 return 1;
2034 }
2035 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2036 return -1;
2037
2038 if (p->idx < q->idx)
2039 return -1;
2040 else
2041 {
2042 gcc_checking_assert (p->idx > q->idx);
2043 return 1;
2044 }
2045 }
2046
2047 /* Helper routine of optimize_range_test.
2048 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2049 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2050 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2051 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2052 an array of COUNT pointers to other ranges. Return
2053 true if the range merge has been successful.
2054 If OPCODE is ERROR_MARK, this is called from within
2055 maybe_optimize_range_tests and is performing inter-bb range optimization.
2056 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2057 oe->rank. */
2058
2059 static bool
2060 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2061 struct range_entry **otherrangep,
2062 unsigned int count, enum tree_code opcode,
2063 vec<operand_entry_t> *ops, tree exp, gimple_seq seq,
2064 bool in_p, tree low, tree high, bool strict_overflow_p)
2065 {
2066 operand_entry_t oe = (*ops)[range->idx];
2067 tree op = oe->op;
2068 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2069 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2070 location_t loc = gimple_location (stmt);
2071 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2072 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2073 in_p, low, high);
2074 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2075 gimple_stmt_iterator gsi;
2076 unsigned int i;
2077
2078 if (tem == NULL_TREE)
2079 return false;
2080
2081 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2082 warning_at (loc, OPT_Wstrict_overflow,
2083 "assuming signed overflow does not occur "
2084 "when simplifying range test");
2085
2086 if (dump_file && (dump_flags & TDF_DETAILS))
2087 {
2088 struct range_entry *r;
2089 fprintf (dump_file, "Optimizing range tests ");
2090 print_generic_expr (dump_file, range->exp, 0);
2091 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2092 print_generic_expr (dump_file, range->low, 0);
2093 fprintf (dump_file, ", ");
2094 print_generic_expr (dump_file, range->high, 0);
2095 fprintf (dump_file, "]");
2096 for (i = 0; i < count; i++)
2097 {
2098 if (otherrange)
2099 r = otherrange + i;
2100 else
2101 r = otherrangep[i];
2102 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2103 print_generic_expr (dump_file, r->low, 0);
2104 fprintf (dump_file, ", ");
2105 print_generic_expr (dump_file, r->high, 0);
2106 fprintf (dump_file, "]");
2107 }
2108 fprintf (dump_file, "\n into ");
2109 print_generic_expr (dump_file, tem, 0);
2110 fprintf (dump_file, "\n");
2111 }
2112
2113 if (opcode == BIT_IOR_EXPR
2114 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2115 tem = invert_truthvalue_loc (loc, tem);
2116
2117 tem = fold_convert_loc (loc, optype, tem);
2118 gsi = gsi_for_stmt (stmt);
2119 unsigned int uid = gimple_uid (stmt);
2120 /* In rare cases range->exp can be equal to lhs of stmt.
2121 In that case we have to insert after the stmt rather then before
2122 it. If stmt is a PHI, insert it at the start of the basic block. */
2123 if (op != range->exp)
2124 {
2125 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2126 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2127 GSI_SAME_STMT);
2128 gsi_prev (&gsi);
2129 }
2130 else if (gimple_code (stmt) != GIMPLE_PHI)
2131 {
2132 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2133 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2134 GSI_CONTINUE_LINKING);
2135 }
2136 else
2137 {
2138 gsi = gsi_after_labels (gimple_bb (stmt));
2139 if (!gsi_end_p (gsi))
2140 uid = gimple_uid (gsi_stmt (gsi));
2141 else
2142 {
2143 gsi = gsi_start_bb (gimple_bb (stmt));
2144 uid = 1;
2145 while (!gsi_end_p (gsi))
2146 {
2147 uid = gimple_uid (gsi_stmt (gsi));
2148 gsi_next (&gsi);
2149 }
2150 }
2151 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2152 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2153 GSI_SAME_STMT);
2154 if (gsi_end_p (gsi))
2155 gsi = gsi_last_bb (gimple_bb (stmt));
2156 else
2157 gsi_prev (&gsi);
2158 }
2159 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2160 if (gimple_uid (gsi_stmt (gsi)))
2161 break;
2162 else
2163 gimple_set_uid (gsi_stmt (gsi), uid);
2164
2165 oe->op = tem;
2166 range->exp = exp;
2167 range->low = low;
2168 range->high = high;
2169 range->in_p = in_p;
2170 range->strict_overflow_p = false;
2171
2172 for (i = 0; i < count; i++)
2173 {
2174 if (otherrange)
2175 range = otherrange + i;
2176 else
2177 range = otherrangep[i];
2178 oe = (*ops)[range->idx];
2179 /* Now change all the other range test immediate uses, so that
2180 those tests will be optimized away. */
2181 if (opcode == ERROR_MARK)
2182 {
2183 if (oe->op)
2184 oe->op = build_int_cst (TREE_TYPE (oe->op),
2185 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2186 else
2187 oe->op = (oe->rank == BIT_IOR_EXPR
2188 ? boolean_false_node : boolean_true_node);
2189 }
2190 else
2191 oe->op = error_mark_node;
2192 range->exp = NULL_TREE;
2193 }
2194 return true;
2195 }
2196
2197 /* Optimize X == CST1 || X == CST2
2198 if popcount (CST1 ^ CST2) == 1 into
2199 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2200 Similarly for ranges. E.g.
2201 X != 2 && X != 3 && X != 10 && X != 11
2202 will be transformed by the previous optimization into
2203 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2204 and this loop can transform that into
2205 !(((X & ~8) - 2U) <= 1U). */
2206
2207 static bool
2208 optimize_range_tests_xor (enum tree_code opcode, tree type,
2209 tree lowi, tree lowj, tree highi, tree highj,
2210 vec<operand_entry_t> *ops,
2211 struct range_entry *rangei,
2212 struct range_entry *rangej)
2213 {
2214 tree lowxor, highxor, tem, exp;
2215 /* Check lowi ^ lowj == highi ^ highj and
2216 popcount (lowi ^ lowj) == 1. */
2217 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2218 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2219 return false;
2220 if (!integer_pow2p (lowxor))
2221 return false;
2222 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2223 if (!tree_int_cst_equal (lowxor, highxor))
2224 return false;
2225
2226 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2227 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2228 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2229 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2230 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2231 NULL, rangei->in_p, lowj, highj,
2232 rangei->strict_overflow_p
2233 || rangej->strict_overflow_p))
2234 return true;
2235 return false;
2236 }
2237
2238 /* Optimize X == CST1 || X == CST2
2239 if popcount (CST2 - CST1) == 1 into
2240 ((X - CST1) & ~(CST2 - CST1)) == 0.
2241 Similarly for ranges. E.g.
2242 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2243 || X == 75 || X == 45
2244 will be transformed by the previous optimization into
2245 (X - 43U) <= 3U || (X - 75U) <= 3U
2246 and this loop can transform that into
2247 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2248 static bool
2249 optimize_range_tests_diff (enum tree_code opcode, tree type,
2250 tree lowi, tree lowj, tree highi, tree highj,
2251 vec<operand_entry_t> *ops,
2252 struct range_entry *rangei,
2253 struct range_entry *rangej)
2254 {
2255 tree tem1, tem2, mask;
2256 /* Check highi - lowi == highj - lowj. */
2257 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2258 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2259 return false;
2260 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2261 if (!tree_int_cst_equal (tem1, tem2))
2262 return false;
2263 /* Check popcount (lowj - lowi) == 1. */
2264 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2265 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2266 return false;
2267 if (!integer_pow2p (tem1))
2268 return false;
2269
2270 type = unsigned_type_for (type);
2271 tem1 = fold_convert (type, tem1);
2272 tem2 = fold_convert (type, tem2);
2273 lowi = fold_convert (type, lowi);
2274 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2275 tem1 = fold_binary (MINUS_EXPR, type,
2276 fold_convert (type, rangei->exp), lowi);
2277 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2278 lowj = build_int_cst (type, 0);
2279 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2280 NULL, rangei->in_p, lowj, tem2,
2281 rangei->strict_overflow_p
2282 || rangej->strict_overflow_p))
2283 return true;
2284 return false;
2285 }
2286
2287 /* It does some common checks for function optimize_range_tests_xor and
2288 optimize_range_tests_diff.
2289 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2290 Else it calls optimize_range_tests_diff. */
2291
2292 static bool
2293 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2294 bool optimize_xor, vec<operand_entry_t> *ops,
2295 struct range_entry *ranges)
2296 {
2297 int i, j;
2298 bool any_changes = false;
2299 for (i = first; i < length; i++)
2300 {
2301 tree lowi, highi, lowj, highj, type, tem;
2302
2303 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2304 continue;
2305 type = TREE_TYPE (ranges[i].exp);
2306 if (!INTEGRAL_TYPE_P (type))
2307 continue;
2308 lowi = ranges[i].low;
2309 if (lowi == NULL_TREE)
2310 lowi = TYPE_MIN_VALUE (type);
2311 highi = ranges[i].high;
2312 if (highi == NULL_TREE)
2313 continue;
2314 for (j = i + 1; j < length && j < i + 64; j++)
2315 {
2316 bool changes;
2317 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2318 continue;
2319 lowj = ranges[j].low;
2320 if (lowj == NULL_TREE)
2321 continue;
2322 highj = ranges[j].high;
2323 if (highj == NULL_TREE)
2324 highj = TYPE_MAX_VALUE (type);
2325 /* Check lowj > highi. */
2326 tem = fold_binary (GT_EXPR, boolean_type_node,
2327 lowj, highi);
2328 if (tem == NULL_TREE || !integer_onep (tem))
2329 continue;
2330 if (optimize_xor)
2331 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2332 highi, highj, ops,
2333 ranges + i, ranges + j);
2334 else
2335 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2336 highi, highj, ops,
2337 ranges + i, ranges + j);
2338 if (changes)
2339 {
2340 any_changes = true;
2341 break;
2342 }
2343 }
2344 }
2345 return any_changes;
2346 }
2347
2348 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2349 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2350 EXP on success, NULL otherwise. */
2351
2352 static tree
2353 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2354 wide_int *mask, tree *totallowp)
2355 {
2356 tree tem = int_const_binop (MINUS_EXPR, high, low);
2357 if (tem == NULL_TREE
2358 || TREE_CODE (tem) != INTEGER_CST
2359 || TREE_OVERFLOW (tem)
2360 || tree_int_cst_sgn (tem) == -1
2361 || compare_tree_int (tem, prec) != -1)
2362 return NULL_TREE;
2363
2364 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2365 *mask = wi::shifted_mask (0, max, false, prec);
2366 if (TREE_CODE (exp) == BIT_AND_EXPR
2367 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2368 {
2369 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2370 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2371 if (wi::popcount (msk) == 1
2372 && wi::ltu_p (msk, prec - max))
2373 {
2374 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2375 max += msk.to_uhwi ();
2376 exp = TREE_OPERAND (exp, 0);
2377 if (integer_zerop (low)
2378 && TREE_CODE (exp) == PLUS_EXPR
2379 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2380 {
2381 tree ret = TREE_OPERAND (exp, 0);
2382 STRIP_NOPS (ret);
2383 widest_int bias
2384 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2385 TYPE_PRECISION (TREE_TYPE (low))));
2386 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2387 if (totallowp)
2388 {
2389 *totallowp = tbias;
2390 return ret;
2391 }
2392 else if (!tree_int_cst_lt (totallow, tbias))
2393 return NULL_TREE;
2394 bias = wi::to_widest (tbias);
2395 bias -= wi::to_widest (totallow);
2396 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2397 {
2398 *mask = wi::lshift (*mask, bias);
2399 return ret;
2400 }
2401 }
2402 }
2403 }
2404 if (totallowp)
2405 return exp;
2406 if (!tree_int_cst_lt (totallow, low))
2407 return exp;
2408 tem = int_const_binop (MINUS_EXPR, low, totallow);
2409 if (tem == NULL_TREE
2410 || TREE_CODE (tem) != INTEGER_CST
2411 || TREE_OVERFLOW (tem)
2412 || compare_tree_int (tem, prec - max) == 1)
2413 return NULL_TREE;
2414
2415 *mask = wi::lshift (*mask, wi::to_widest (tem));
2416 return exp;
2417 }
2418
2419 /* Attempt to optimize small range tests using bit test.
2420 E.g.
2421 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2422 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2423 has been by earlier optimizations optimized into:
2424 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2425 As all the 43 through 82 range is less than 64 numbers,
2426 for 64-bit word targets optimize that into:
2427 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2428
2429 static bool
2430 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2431 vec<operand_entry_t> *ops,
2432 struct range_entry *ranges)
2433 {
2434 int i, j;
2435 bool any_changes = false;
2436 int prec = GET_MODE_BITSIZE (word_mode);
2437 auto_vec<struct range_entry *, 64> candidates;
2438
2439 for (i = first; i < length - 2; i++)
2440 {
2441 tree lowi, highi, lowj, highj, type;
2442
2443 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2444 continue;
2445 type = TREE_TYPE (ranges[i].exp);
2446 if (!INTEGRAL_TYPE_P (type))
2447 continue;
2448 lowi = ranges[i].low;
2449 if (lowi == NULL_TREE)
2450 lowi = TYPE_MIN_VALUE (type);
2451 highi = ranges[i].high;
2452 if (highi == NULL_TREE)
2453 continue;
2454 wide_int mask;
2455 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2456 highi, &mask, &lowi);
2457 if (exp == NULL_TREE)
2458 continue;
2459 bool strict_overflow_p = ranges[i].strict_overflow_p;
2460 candidates.truncate (0);
2461 int end = MIN (i + 64, length);
2462 for (j = i + 1; j < end; j++)
2463 {
2464 tree exp2;
2465 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2466 continue;
2467 if (ranges[j].exp == exp)
2468 ;
2469 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2470 {
2471 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2472 if (exp2 == exp)
2473 ;
2474 else if (TREE_CODE (exp2) == PLUS_EXPR)
2475 {
2476 exp2 = TREE_OPERAND (exp2, 0);
2477 STRIP_NOPS (exp2);
2478 if (exp2 != exp)
2479 continue;
2480 }
2481 else
2482 continue;
2483 }
2484 else
2485 continue;
2486 lowj = ranges[j].low;
2487 if (lowj == NULL_TREE)
2488 continue;
2489 highj = ranges[j].high;
2490 if (highj == NULL_TREE)
2491 highj = TYPE_MAX_VALUE (type);
2492 wide_int mask2;
2493 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2494 highj, &mask2, NULL);
2495 if (exp2 != exp)
2496 continue;
2497 mask |= mask2;
2498 strict_overflow_p |= ranges[j].strict_overflow_p;
2499 candidates.safe_push (&ranges[j]);
2500 }
2501
2502 /* If we need otherwise 3 or more comparisons, use a bit test. */
2503 if (candidates.length () >= 2)
2504 {
2505 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2506 wi::to_widest (lowi)
2507 + prec - 1 - wi::clz (mask));
2508 operand_entry_t oe = (*ops)[ranges[i].idx];
2509 tree op = oe->op;
2510 gimple stmt = op ? SSA_NAME_DEF_STMT (op)
2511 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2512 location_t loc = gimple_location (stmt);
2513 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2514
2515 /* See if it isn't cheaper to pretend the minimum value of the
2516 range is 0, if maximum value is small enough.
2517 We can avoid then subtraction of the minimum value, but the
2518 mask constant could be perhaps more expensive. */
2519 if (compare_tree_int (lowi, 0) > 0
2520 && compare_tree_int (high, prec) < 0)
2521 {
2522 int cost_diff;
2523 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2524 rtx reg = gen_raw_REG (word_mode, 10000);
2525 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2526 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2527 GEN_INT (-m)), speed_p);
2528 rtx r = immed_wide_int_const (mask, word_mode);
2529 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2530 word_mode, speed_p);
2531 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2532 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2533 word_mode, speed_p);
2534 if (cost_diff > 0)
2535 {
2536 mask = wi::lshift (mask, m);
2537 lowi = build_zero_cst (TREE_TYPE (lowi));
2538 }
2539 }
2540
2541 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2542 false, lowi, high);
2543 if (tem == NULL_TREE || is_gimple_val (tem))
2544 continue;
2545 tree etype = unsigned_type_for (TREE_TYPE (exp));
2546 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2547 fold_convert_loc (loc, etype, exp),
2548 fold_convert_loc (loc, etype, lowi));
2549 exp = fold_convert_loc (loc, integer_type_node, exp);
2550 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2551 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2552 build_int_cst (word_type, 1), exp);
2553 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2554 wide_int_to_tree (word_type, mask));
2555 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2556 build_zero_cst (word_type));
2557 if (is_gimple_val (exp))
2558 continue;
2559
2560 /* The shift might have undefined behavior if TEM is true,
2561 but reassociate_bb isn't prepared to have basic blocks
2562 split when it is running. So, temporarily emit a code
2563 with BIT_IOR_EXPR instead of &&, and fix it up in
2564 branch_fixup. */
2565 gimple_seq seq;
2566 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2567 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2568 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2569 gimple_seq seq2;
2570 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2571 gimple_seq_add_seq_without_update (&seq, seq2);
2572 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2573 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2574 gimple g = gimple_build_assign (make_ssa_name (optype),
2575 BIT_IOR_EXPR, tem, exp);
2576 gimple_set_location (g, loc);
2577 gimple_seq_add_stmt_without_update (&seq, g);
2578 exp = gimple_assign_lhs (g);
2579 tree val = build_zero_cst (optype);
2580 if (update_range_test (&ranges[i], NULL, candidates.address (),
2581 candidates.length (), opcode, ops, exp,
2582 seq, false, val, val, strict_overflow_p))
2583 {
2584 any_changes = true;
2585 reassoc_branch_fixups.safe_push (tem);
2586 }
2587 else
2588 gimple_seq_discard (seq);
2589 }
2590 }
2591 return any_changes;
2592 }
2593
2594 /* Optimize range tests, similarly how fold_range_test optimizes
2595 it on trees. The tree code for the binary
2596 operation between all the operands is OPCODE.
2597 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2598 maybe_optimize_range_tests for inter-bb range optimization.
2599 In that case if oe->op is NULL, oe->id is bb->index whose
2600 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2601 the actual opcode. */
2602
2603 static bool
2604 optimize_range_tests (enum tree_code opcode,
2605 vec<operand_entry_t> *ops)
2606 {
2607 unsigned int length = ops->length (), i, j, first;
2608 operand_entry_t oe;
2609 struct range_entry *ranges;
2610 bool any_changes = false;
2611
2612 if (length == 1)
2613 return false;
2614
2615 ranges = XNEWVEC (struct range_entry, length);
2616 for (i = 0; i < length; i++)
2617 {
2618 oe = (*ops)[i];
2619 ranges[i].idx = i;
2620 init_range_entry (ranges + i, oe->op,
2621 oe->op ? NULL :
2622 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2623 /* For | invert it now, we will invert it again before emitting
2624 the optimized expression. */
2625 if (opcode == BIT_IOR_EXPR
2626 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2627 ranges[i].in_p = !ranges[i].in_p;
2628 }
2629
2630 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2631 for (i = 0; i < length; i++)
2632 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2633 break;
2634
2635 /* Try to merge ranges. */
2636 for (first = i; i < length; i++)
2637 {
2638 tree low = ranges[i].low;
2639 tree high = ranges[i].high;
2640 int in_p = ranges[i].in_p;
2641 bool strict_overflow_p = ranges[i].strict_overflow_p;
2642 int update_fail_count = 0;
2643
2644 for (j = i + 1; j < length; j++)
2645 {
2646 if (ranges[i].exp != ranges[j].exp)
2647 break;
2648 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2649 ranges[j].in_p, ranges[j].low, ranges[j].high))
2650 break;
2651 strict_overflow_p |= ranges[j].strict_overflow_p;
2652 }
2653
2654 if (j == i + 1)
2655 continue;
2656
2657 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2658 opcode, ops, ranges[i].exp, NULL, in_p,
2659 low, high, strict_overflow_p))
2660 {
2661 i = j - 1;
2662 any_changes = true;
2663 }
2664 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2665 while update_range_test would fail. */
2666 else if (update_fail_count == 64)
2667 i = j - 1;
2668 else
2669 ++update_fail_count;
2670 }
2671
2672 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2673 ops, ranges);
2674
2675 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2676 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2677 ops, ranges);
2678 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2679 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2680 ops, ranges);
2681
2682 if (any_changes && opcode != ERROR_MARK)
2683 {
2684 j = 0;
2685 FOR_EACH_VEC_ELT (*ops, i, oe)
2686 {
2687 if (oe->op == error_mark_node)
2688 continue;
2689 else if (i != j)
2690 (*ops)[j] = oe;
2691 j++;
2692 }
2693 ops->truncate (j);
2694 }
2695
2696 XDELETEVEC (ranges);
2697 return any_changes;
2698 }
2699
2700 /* Return true if STMT is a cast like:
2701 <bb N>:
2702 ...
2703 _123 = (int) _234;
2704
2705 <bb M>:
2706 # _345 = PHI <_123(N), 1(...), 1(...)>
2707 where _234 has bool type, _123 has single use and
2708 bb N has a single successor M. This is commonly used in
2709 the last block of a range test. */
2710
2711 static bool
2712 final_range_test_p (gimple stmt)
2713 {
2714 basic_block bb, rhs_bb;
2715 edge e;
2716 tree lhs, rhs;
2717 use_operand_p use_p;
2718 gimple use_stmt;
2719
2720 if (!gimple_assign_cast_p (stmt))
2721 return false;
2722 bb = gimple_bb (stmt);
2723 if (!single_succ_p (bb))
2724 return false;
2725 e = single_succ_edge (bb);
2726 if (e->flags & EDGE_COMPLEX)
2727 return false;
2728
2729 lhs = gimple_assign_lhs (stmt);
2730 rhs = gimple_assign_rhs1 (stmt);
2731 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2732 || TREE_CODE (rhs) != SSA_NAME
2733 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2734 return false;
2735
2736 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2737 if (!single_imm_use (lhs, &use_p, &use_stmt))
2738 return false;
2739
2740 if (gimple_code (use_stmt) != GIMPLE_PHI
2741 || gimple_bb (use_stmt) != e->dest)
2742 return false;
2743
2744 /* And that the rhs is defined in the same loop. */
2745 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2746 if (rhs_bb == NULL
2747 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2748 return false;
2749
2750 return true;
2751 }
2752
2753 /* Return true if BB is suitable basic block for inter-bb range test
2754 optimization. If BACKWARD is true, BB should be the only predecessor
2755 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2756 or compared with to find a common basic block to which all conditions
2757 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2758 be the only predecessor of BB. */
2759
2760 static bool
2761 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2762 bool backward)
2763 {
2764 edge_iterator ei, ei2;
2765 edge e, e2;
2766 gimple stmt;
2767 gphi_iterator gsi;
2768 bool other_edge_seen = false;
2769 bool is_cond;
2770
2771 if (test_bb == bb)
2772 return false;
2773 /* Check last stmt first. */
2774 stmt = last_stmt (bb);
2775 if (stmt == NULL
2776 || (gimple_code (stmt) != GIMPLE_COND
2777 && (backward || !final_range_test_p (stmt)))
2778 || gimple_visited_p (stmt)
2779 || stmt_could_throw_p (stmt)
2780 || *other_bb == bb)
2781 return false;
2782 is_cond = gimple_code (stmt) == GIMPLE_COND;
2783 if (is_cond)
2784 {
2785 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2786 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2787 to *OTHER_BB (if not set yet, try to find it out). */
2788 if (EDGE_COUNT (bb->succs) != 2)
2789 return false;
2790 FOR_EACH_EDGE (e, ei, bb->succs)
2791 {
2792 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2793 return false;
2794 if (e->dest == test_bb)
2795 {
2796 if (backward)
2797 continue;
2798 else
2799 return false;
2800 }
2801 if (e->dest == bb)
2802 return false;
2803 if (*other_bb == NULL)
2804 {
2805 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2806 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2807 return false;
2808 else if (e->dest == e2->dest)
2809 *other_bb = e->dest;
2810 if (*other_bb == NULL)
2811 return false;
2812 }
2813 if (e->dest == *other_bb)
2814 other_edge_seen = true;
2815 else if (backward)
2816 return false;
2817 }
2818 if (*other_bb == NULL || !other_edge_seen)
2819 return false;
2820 }
2821 else if (single_succ (bb) != *other_bb)
2822 return false;
2823
2824 /* Now check all PHIs of *OTHER_BB. */
2825 e = find_edge (bb, *other_bb);
2826 e2 = find_edge (test_bb, *other_bb);
2827 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2828 {
2829 gphi *phi = gsi.phi ();
2830 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2831 corresponding to BB and TEST_BB predecessor must be the same. */
2832 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2833 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2834 {
2835 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2836 one of the PHIs should have the lhs of the last stmt in
2837 that block as PHI arg and that PHI should have 0 or 1
2838 corresponding to it in all other range test basic blocks
2839 considered. */
2840 if (!is_cond)
2841 {
2842 if (gimple_phi_arg_def (phi, e->dest_idx)
2843 == gimple_assign_lhs (stmt)
2844 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2845 || integer_onep (gimple_phi_arg_def (phi,
2846 e2->dest_idx))))
2847 continue;
2848 }
2849 else
2850 {
2851 gimple test_last = last_stmt (test_bb);
2852 if (gimple_code (test_last) != GIMPLE_COND
2853 && gimple_phi_arg_def (phi, e2->dest_idx)
2854 == gimple_assign_lhs (test_last)
2855 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2856 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2857 continue;
2858 }
2859
2860 return false;
2861 }
2862 }
2863 return true;
2864 }
2865
2866 /* Return true if BB doesn't have side-effects that would disallow
2867 range test optimization, all SSA_NAMEs set in the bb are consumed
2868 in the bb and there are no PHIs. */
2869
2870 static bool
2871 no_side_effect_bb (basic_block bb)
2872 {
2873 gimple_stmt_iterator gsi;
2874 gimple last;
2875
2876 if (!gimple_seq_empty_p (phi_nodes (bb)))
2877 return false;
2878 last = last_stmt (bb);
2879 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2880 {
2881 gimple stmt = gsi_stmt (gsi);
2882 tree lhs;
2883 imm_use_iterator imm_iter;
2884 use_operand_p use_p;
2885
2886 if (is_gimple_debug (stmt))
2887 continue;
2888 if (gimple_has_side_effects (stmt))
2889 return false;
2890 if (stmt == last)
2891 return true;
2892 if (!is_gimple_assign (stmt))
2893 return false;
2894 lhs = gimple_assign_lhs (stmt);
2895 if (TREE_CODE (lhs) != SSA_NAME)
2896 return false;
2897 if (gimple_assign_rhs_could_trap_p (stmt))
2898 return false;
2899 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2900 {
2901 gimple use_stmt = USE_STMT (use_p);
2902 if (is_gimple_debug (use_stmt))
2903 continue;
2904 if (gimple_bb (use_stmt) != bb)
2905 return false;
2906 }
2907 }
2908 return false;
2909 }
2910
2911 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2912 return true and fill in *OPS recursively. */
2913
2914 static bool
2915 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2916 struct loop *loop)
2917 {
2918 gimple stmt = SSA_NAME_DEF_STMT (var);
2919 tree rhs[2];
2920 int i;
2921
2922 if (!is_reassociable_op (stmt, code, loop))
2923 return false;
2924
2925 rhs[0] = gimple_assign_rhs1 (stmt);
2926 rhs[1] = gimple_assign_rhs2 (stmt);
2927 gimple_set_visited (stmt, true);
2928 for (i = 0; i < 2; i++)
2929 if (TREE_CODE (rhs[i]) == SSA_NAME
2930 && !get_ops (rhs[i], code, ops, loop)
2931 && has_single_use (rhs[i]))
2932 {
2933 operand_entry_t oe = operand_entry_pool.allocate ();
2934
2935 oe->op = rhs[i];
2936 oe->rank = code;
2937 oe->id = 0;
2938 oe->count = 1;
2939 ops->safe_push (oe);
2940 }
2941 return true;
2942 }
2943
2944 /* Find the ops that were added by get_ops starting from VAR, see if
2945 they were changed during update_range_test and if yes, create new
2946 stmts. */
2947
2948 static tree
2949 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2950 unsigned int *pidx, struct loop *loop)
2951 {
2952 gimple stmt = SSA_NAME_DEF_STMT (var);
2953 tree rhs[4];
2954 int i;
2955
2956 if (!is_reassociable_op (stmt, code, loop))
2957 return NULL;
2958
2959 rhs[0] = gimple_assign_rhs1 (stmt);
2960 rhs[1] = gimple_assign_rhs2 (stmt);
2961 rhs[2] = rhs[0];
2962 rhs[3] = rhs[1];
2963 for (i = 0; i < 2; i++)
2964 if (TREE_CODE (rhs[i]) == SSA_NAME)
2965 {
2966 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2967 if (rhs[2 + i] == NULL_TREE)
2968 {
2969 if (has_single_use (rhs[i]))
2970 rhs[2 + i] = ops[(*pidx)++]->op;
2971 else
2972 rhs[2 + i] = rhs[i];
2973 }
2974 }
2975 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2976 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2977 {
2978 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2979 var = make_ssa_name (TREE_TYPE (var));
2980 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
2981 rhs[2], rhs[3]);
2982 gimple_set_uid (g, gimple_uid (stmt));
2983 gimple_set_visited (g, true);
2984 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2985 }
2986 return var;
2987 }
2988
2989 /* Structure to track the initial value passed to get_ops and
2990 the range in the ops vector for each basic block. */
2991
2992 struct inter_bb_range_test_entry
2993 {
2994 tree op;
2995 unsigned int first_idx, last_idx;
2996 };
2997
2998 /* Inter-bb range test optimization. */
2999
3000 static void
3001 maybe_optimize_range_tests (gimple stmt)
3002 {
3003 basic_block first_bb = gimple_bb (stmt);
3004 basic_block last_bb = first_bb;
3005 basic_block other_bb = NULL;
3006 basic_block bb;
3007 edge_iterator ei;
3008 edge e;
3009 auto_vec<operand_entry_t> ops;
3010 auto_vec<inter_bb_range_test_entry> bbinfo;
3011 bool any_changes = false;
3012
3013 /* Consider only basic blocks that end with GIMPLE_COND or
3014 a cast statement satisfying final_range_test_p. All
3015 but the last bb in the first_bb .. last_bb range
3016 should end with GIMPLE_COND. */
3017 if (gimple_code (stmt) == GIMPLE_COND)
3018 {
3019 if (EDGE_COUNT (first_bb->succs) != 2)
3020 return;
3021 }
3022 else if (final_range_test_p (stmt))
3023 other_bb = single_succ (first_bb);
3024 else
3025 return;
3026
3027 if (stmt_could_throw_p (stmt))
3028 return;
3029
3030 /* As relative ordering of post-dominator sons isn't fixed,
3031 maybe_optimize_range_tests can be called first on any
3032 bb in the range we want to optimize. So, start searching
3033 backwards, if first_bb can be set to a predecessor. */
3034 while (single_pred_p (first_bb))
3035 {
3036 basic_block pred_bb = single_pred (first_bb);
3037 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3038 break;
3039 if (!no_side_effect_bb (first_bb))
3040 break;
3041 first_bb = pred_bb;
3042 }
3043 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3044 Before starting forward search in last_bb successors, find
3045 out the other_bb. */
3046 if (first_bb == last_bb)
3047 {
3048 other_bb = NULL;
3049 /* As non-GIMPLE_COND last stmt always terminates the range,
3050 if forward search didn't discover anything, just give up. */
3051 if (gimple_code (stmt) != GIMPLE_COND)
3052 return;
3053 /* Look at both successors. Either it ends with a GIMPLE_COND
3054 and satisfies suitable_cond_bb, or ends with a cast and
3055 other_bb is that cast's successor. */
3056 FOR_EACH_EDGE (e, ei, first_bb->succs)
3057 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3058 || e->dest == first_bb)
3059 return;
3060 else if (single_pred_p (e->dest))
3061 {
3062 stmt = last_stmt (e->dest);
3063 if (stmt
3064 && gimple_code (stmt) == GIMPLE_COND
3065 && EDGE_COUNT (e->dest->succs) == 2)
3066 {
3067 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3068 break;
3069 else
3070 other_bb = NULL;
3071 }
3072 else if (stmt
3073 && final_range_test_p (stmt)
3074 && find_edge (first_bb, single_succ (e->dest)))
3075 {
3076 other_bb = single_succ (e->dest);
3077 if (other_bb == first_bb)
3078 other_bb = NULL;
3079 }
3080 }
3081 if (other_bb == NULL)
3082 return;
3083 }
3084 /* Now do the forward search, moving last_bb to successor bbs
3085 that aren't other_bb. */
3086 while (EDGE_COUNT (last_bb->succs) == 2)
3087 {
3088 FOR_EACH_EDGE (e, ei, last_bb->succs)
3089 if (e->dest != other_bb)
3090 break;
3091 if (e == NULL)
3092 break;
3093 if (!single_pred_p (e->dest))
3094 break;
3095 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3096 break;
3097 if (!no_side_effect_bb (e->dest))
3098 break;
3099 last_bb = e->dest;
3100 }
3101 if (first_bb == last_bb)
3102 return;
3103 /* Here basic blocks first_bb through last_bb's predecessor
3104 end with GIMPLE_COND, all of them have one of the edges to
3105 other_bb and another to another block in the range,
3106 all blocks except first_bb don't have side-effects and
3107 last_bb ends with either GIMPLE_COND, or cast satisfying
3108 final_range_test_p. */
3109 for (bb = last_bb; ; bb = single_pred (bb))
3110 {
3111 enum tree_code code;
3112 tree lhs, rhs;
3113 inter_bb_range_test_entry bb_ent;
3114
3115 bb_ent.op = NULL_TREE;
3116 bb_ent.first_idx = ops.length ();
3117 bb_ent.last_idx = bb_ent.first_idx;
3118 e = find_edge (bb, other_bb);
3119 stmt = last_stmt (bb);
3120 gimple_set_visited (stmt, true);
3121 if (gimple_code (stmt) != GIMPLE_COND)
3122 {
3123 use_operand_p use_p;
3124 gimple phi;
3125 edge e2;
3126 unsigned int d;
3127
3128 lhs = gimple_assign_lhs (stmt);
3129 rhs = gimple_assign_rhs1 (stmt);
3130 gcc_assert (bb == last_bb);
3131
3132 /* stmt is
3133 _123 = (int) _234;
3134
3135 followed by:
3136 <bb M>:
3137 # _345 = PHI <_123(N), 1(...), 1(...)>
3138
3139 or 0 instead of 1. If it is 0, the _234
3140 range test is anded together with all the
3141 other range tests, if it is 1, it is ored with
3142 them. */
3143 single_imm_use (lhs, &use_p, &phi);
3144 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3145 e2 = find_edge (first_bb, other_bb);
3146 d = e2->dest_idx;
3147 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3148 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3149 code = BIT_AND_EXPR;
3150 else
3151 {
3152 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3153 code = BIT_IOR_EXPR;
3154 }
3155
3156 /* If _234 SSA_NAME_DEF_STMT is
3157 _234 = _567 | _789;
3158 (or &, corresponding to 1/0 in the phi arguments,
3159 push into ops the individual range test arguments
3160 of the bitwise or resp. and, recursively. */
3161 if (!get_ops (rhs, code, &ops,
3162 loop_containing_stmt (stmt))
3163 && has_single_use (rhs))
3164 {
3165 /* Otherwise, push the _234 range test itself. */
3166 operand_entry_t oe = operand_entry_pool.allocate ();
3167
3168 oe->op = rhs;
3169 oe->rank = code;
3170 oe->id = 0;
3171 oe->count = 1;
3172 ops.safe_push (oe);
3173 bb_ent.last_idx++;
3174 }
3175 else
3176 bb_ent.last_idx = ops.length ();
3177 bb_ent.op = rhs;
3178 bbinfo.safe_push (bb_ent);
3179 continue;
3180 }
3181 /* Otherwise stmt is GIMPLE_COND. */
3182 code = gimple_cond_code (stmt);
3183 lhs = gimple_cond_lhs (stmt);
3184 rhs = gimple_cond_rhs (stmt);
3185 if (TREE_CODE (lhs) == SSA_NAME
3186 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3187 && ((code != EQ_EXPR && code != NE_EXPR)
3188 || rhs != boolean_false_node
3189 /* Either push into ops the individual bitwise
3190 or resp. and operands, depending on which
3191 edge is other_bb. */
3192 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3193 ^ (code == EQ_EXPR))
3194 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3195 loop_containing_stmt (stmt))))
3196 {
3197 /* Or push the GIMPLE_COND stmt itself. */
3198 operand_entry_t oe = operand_entry_pool.allocate ();
3199
3200 oe->op = NULL;
3201 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3202 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3203 /* oe->op = NULL signs that there is no SSA_NAME
3204 for the range test, and oe->id instead is the
3205 basic block number, at which's end the GIMPLE_COND
3206 is. */
3207 oe->id = bb->index;
3208 oe->count = 1;
3209 ops.safe_push (oe);
3210 bb_ent.op = NULL;
3211 bb_ent.last_idx++;
3212 }
3213 else if (ops.length () > bb_ent.first_idx)
3214 {
3215 bb_ent.op = lhs;
3216 bb_ent.last_idx = ops.length ();
3217 }
3218 bbinfo.safe_push (bb_ent);
3219 if (bb == first_bb)
3220 break;
3221 }
3222 if (ops.length () > 1)
3223 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3224 if (any_changes)
3225 {
3226 unsigned int idx;
3227 /* update_ops relies on has_single_use predicates returning the
3228 same values as it did during get_ops earlier. Additionally it
3229 never removes statements, only adds new ones and it should walk
3230 from the single imm use and check the predicate already before
3231 making those changes.
3232 On the other side, the handling of GIMPLE_COND directly can turn
3233 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3234 it needs to be done in a separate loop afterwards. */
3235 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3236 {
3237 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3238 && bbinfo[idx].op != NULL_TREE)
3239 {
3240 tree new_op;
3241
3242 stmt = last_stmt (bb);
3243 new_op = update_ops (bbinfo[idx].op,
3244 (enum tree_code)
3245 ops[bbinfo[idx].first_idx]->rank,
3246 ops, &bbinfo[idx].first_idx,
3247 loop_containing_stmt (stmt));
3248 if (new_op == NULL_TREE)
3249 {
3250 gcc_assert (bb == last_bb);
3251 new_op = ops[bbinfo[idx].first_idx++]->op;
3252 }
3253 if (bbinfo[idx].op != new_op)
3254 {
3255 imm_use_iterator iter;
3256 use_operand_p use_p;
3257 gimple use_stmt, cast_stmt = NULL;
3258
3259 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3260 if (is_gimple_debug (use_stmt))
3261 continue;
3262 else if (gimple_code (use_stmt) == GIMPLE_COND
3263 || gimple_code (use_stmt) == GIMPLE_PHI)
3264 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3265 SET_USE (use_p, new_op);
3266 else if (gimple_assign_cast_p (use_stmt))
3267 cast_stmt = use_stmt;
3268 else
3269 gcc_unreachable ();
3270 if (cast_stmt)
3271 {
3272 gcc_assert (bb == last_bb);
3273 tree lhs = gimple_assign_lhs (cast_stmt);
3274 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3275 enum tree_code rhs_code
3276 = gimple_assign_rhs_code (cast_stmt);
3277 gassign *g;
3278 if (is_gimple_min_invariant (new_op))
3279 {
3280 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3281 g = gimple_build_assign (new_lhs, new_op);
3282 }
3283 else
3284 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3285 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3286 gimple_set_uid (g, gimple_uid (cast_stmt));
3287 gimple_set_visited (g, true);
3288 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3289 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3290 if (is_gimple_debug (use_stmt))
3291 continue;
3292 else if (gimple_code (use_stmt) == GIMPLE_COND
3293 || gimple_code (use_stmt) == GIMPLE_PHI)
3294 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3295 SET_USE (use_p, new_lhs);
3296 else
3297 gcc_unreachable ();
3298 }
3299 }
3300 }
3301 if (bb == first_bb)
3302 break;
3303 }
3304 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3305 {
3306 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3307 && bbinfo[idx].op == NULL_TREE
3308 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3309 {
3310 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3311 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3312 gimple_cond_make_false (cond_stmt);
3313 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3314 gimple_cond_make_true (cond_stmt);
3315 else
3316 {
3317 gimple_cond_set_code (cond_stmt, NE_EXPR);
3318 gimple_cond_set_lhs (cond_stmt,
3319 ops[bbinfo[idx].first_idx]->op);
3320 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3321 }
3322 update_stmt (cond_stmt);
3323 }
3324 if (bb == first_bb)
3325 break;
3326 }
3327 }
3328 }
3329
3330 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3331 of STMT in it's operands. This is also known as a "destructive
3332 update" operation. */
3333
3334 static bool
3335 is_phi_for_stmt (gimple stmt, tree operand)
3336 {
3337 gimple def_stmt;
3338 gphi *def_phi;
3339 tree lhs;
3340 use_operand_p arg_p;
3341 ssa_op_iter i;
3342
3343 if (TREE_CODE (operand) != SSA_NAME)
3344 return false;
3345
3346 lhs = gimple_assign_lhs (stmt);
3347
3348 def_stmt = SSA_NAME_DEF_STMT (operand);
3349 def_phi = dyn_cast <gphi *> (def_stmt);
3350 if (!def_phi)
3351 return false;
3352
3353 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3354 if (lhs == USE_FROM_PTR (arg_p))
3355 return true;
3356 return false;
3357 }
3358
3359 /* Remove def stmt of VAR if VAR has zero uses and recurse
3360 on rhs1 operand if so. */
3361
3362 static void
3363 remove_visited_stmt_chain (tree var)
3364 {
3365 gimple stmt;
3366 gimple_stmt_iterator gsi;
3367
3368 while (1)
3369 {
3370 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3371 return;
3372 stmt = SSA_NAME_DEF_STMT (var);
3373 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3374 {
3375 var = gimple_assign_rhs1 (stmt);
3376 gsi = gsi_for_stmt (stmt);
3377 reassoc_remove_stmt (&gsi);
3378 release_defs (stmt);
3379 }
3380 else
3381 return;
3382 }
3383 }
3384
3385 /* This function checks three consequtive operands in
3386 passed operands vector OPS starting from OPINDEX and
3387 swaps two operands if it is profitable for binary operation
3388 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3389
3390 We pair ops with the same rank if possible.
3391
3392 The alternative we try is to see if STMT is a destructive
3393 update style statement, which is like:
3394 b = phi (a, ...)
3395 a = c + b;
3396 In that case, we want to use the destructive update form to
3397 expose the possible vectorizer sum reduction opportunity.
3398 In that case, the third operand will be the phi node. This
3399 check is not performed if STMT is null.
3400
3401 We could, of course, try to be better as noted above, and do a
3402 lot of work to try to find these opportunities in >3 operand
3403 cases, but it is unlikely to be worth it. */
3404
3405 static void
3406 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3407 unsigned int opindex, gimple stmt)
3408 {
3409 operand_entry_t oe1, oe2, oe3;
3410
3411 oe1 = ops[opindex];
3412 oe2 = ops[opindex + 1];
3413 oe3 = ops[opindex + 2];
3414
3415 if ((oe1->rank == oe2->rank
3416 && oe2->rank != oe3->rank)
3417 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3418 && !is_phi_for_stmt (stmt, oe1->op)
3419 && !is_phi_for_stmt (stmt, oe2->op)))
3420 {
3421 struct operand_entry temp = *oe3;
3422 oe3->op = oe1->op;
3423 oe3->rank = oe1->rank;
3424 oe1->op = temp.op;
3425 oe1->rank= temp.rank;
3426 }
3427 else if ((oe1->rank == oe3->rank
3428 && oe2->rank != oe3->rank)
3429 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3430 && !is_phi_for_stmt (stmt, oe1->op)
3431 && !is_phi_for_stmt (stmt, oe3->op)))
3432 {
3433 struct operand_entry temp = *oe2;
3434 oe2->op = oe1->op;
3435 oe2->rank = oe1->rank;
3436 oe1->op = temp.op;
3437 oe1->rank = temp.rank;
3438 }
3439 }
3440
3441 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3442 two definitions, otherwise return STMT. */
3443
3444 static inline gimple
3445 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3446 {
3447 if (TREE_CODE (rhs1) == SSA_NAME
3448 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3449 stmt = SSA_NAME_DEF_STMT (rhs1);
3450 if (TREE_CODE (rhs2) == SSA_NAME
3451 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3452 stmt = SSA_NAME_DEF_STMT (rhs2);
3453 return stmt;
3454 }
3455
3456 /* Recursively rewrite our linearized statements so that the operators
3457 match those in OPS[OPINDEX], putting the computation in rank
3458 order. Return new lhs. */
3459
3460 static tree
3461 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3462 vec<operand_entry_t> ops, bool changed)
3463 {
3464 tree rhs1 = gimple_assign_rhs1 (stmt);
3465 tree rhs2 = gimple_assign_rhs2 (stmt);
3466 tree lhs = gimple_assign_lhs (stmt);
3467 operand_entry_t oe;
3468
3469 /* The final recursion case for this function is that you have
3470 exactly two operations left.
3471 If we had exactly one op in the entire list to start with, we
3472 would have never called this function, and the tail recursion
3473 rewrites them one at a time. */
3474 if (opindex + 2 == ops.length ())
3475 {
3476 operand_entry_t oe1, oe2;
3477
3478 oe1 = ops[opindex];
3479 oe2 = ops[opindex + 1];
3480
3481 if (rhs1 != oe1->op || rhs2 != oe2->op)
3482 {
3483 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3484 unsigned int uid = gimple_uid (stmt);
3485
3486 if (dump_file && (dump_flags & TDF_DETAILS))
3487 {
3488 fprintf (dump_file, "Transforming ");
3489 print_gimple_stmt (dump_file, stmt, 0, 0);
3490 }
3491
3492 /* Even when changed is false, reassociation could have e.g. removed
3493 some redundant operations, so unless we are just swapping the
3494 arguments or unless there is no change at all (then we just
3495 return lhs), force creation of a new SSA_NAME. */
3496 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3497 {
3498 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3499 lhs = make_ssa_name (TREE_TYPE (lhs));
3500 stmt
3501 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3502 oe1->op, oe2->op);
3503 gimple_set_uid (stmt, uid);
3504 gimple_set_visited (stmt, true);
3505 if (insert_point == gsi_stmt (gsi))
3506 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3507 else
3508 insert_stmt_after (stmt, insert_point);
3509 }
3510 else
3511 {
3512 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3513 == stmt);
3514 gimple_assign_set_rhs1 (stmt, oe1->op);
3515 gimple_assign_set_rhs2 (stmt, oe2->op);
3516 update_stmt (stmt);
3517 }
3518
3519 if (rhs1 != oe1->op && rhs1 != oe2->op)
3520 remove_visited_stmt_chain (rhs1);
3521
3522 if (dump_file && (dump_flags & TDF_DETAILS))
3523 {
3524 fprintf (dump_file, " into ");
3525 print_gimple_stmt (dump_file, stmt, 0, 0);
3526 }
3527 }
3528 return lhs;
3529 }
3530
3531 /* If we hit here, we should have 3 or more ops left. */
3532 gcc_assert (opindex + 2 < ops.length ());
3533
3534 /* Rewrite the next operator. */
3535 oe = ops[opindex];
3536
3537 /* Recurse on the LHS of the binary operator, which is guaranteed to
3538 be the non-leaf side. */
3539 tree new_rhs1
3540 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3541 changed || oe->op != rhs2);
3542
3543 if (oe->op != rhs2 || new_rhs1 != rhs1)
3544 {
3545 if (dump_file && (dump_flags & TDF_DETAILS))
3546 {
3547 fprintf (dump_file, "Transforming ");
3548 print_gimple_stmt (dump_file, stmt, 0, 0);
3549 }
3550
3551 /* If changed is false, this is either opindex == 0
3552 or all outer rhs2's were equal to corresponding oe->op,
3553 and powi_result is NULL.
3554 That means lhs is equivalent before and after reassociation.
3555 Otherwise ensure the old lhs SSA_NAME is not reused and
3556 create a new stmt as well, so that any debug stmts will be
3557 properly adjusted. */
3558 if (changed)
3559 {
3560 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3561 unsigned int uid = gimple_uid (stmt);
3562 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3563
3564 lhs = make_ssa_name (TREE_TYPE (lhs));
3565 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3566 new_rhs1, oe->op);
3567 gimple_set_uid (stmt, uid);
3568 gimple_set_visited (stmt, true);
3569 if (insert_point == gsi_stmt (gsi))
3570 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3571 else
3572 insert_stmt_after (stmt, insert_point);
3573 }
3574 else
3575 {
3576 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3577 == stmt);
3578 gimple_assign_set_rhs1 (stmt, new_rhs1);
3579 gimple_assign_set_rhs2 (stmt, oe->op);
3580 update_stmt (stmt);
3581 }
3582
3583 if (dump_file && (dump_flags & TDF_DETAILS))
3584 {
3585 fprintf (dump_file, " into ");
3586 print_gimple_stmt (dump_file, stmt, 0, 0);
3587 }
3588 }
3589 return lhs;
3590 }
3591
3592 /* Find out how many cycles we need to compute statements chain.
3593 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3594 maximum number of independent statements we may execute per cycle. */
3595
3596 static int
3597 get_required_cycles (int ops_num, int cpu_width)
3598 {
3599 int res;
3600 int elog;
3601 unsigned int rest;
3602
3603 /* While we have more than 2 * cpu_width operands
3604 we may reduce number of operands by cpu_width
3605 per cycle. */
3606 res = ops_num / (2 * cpu_width);
3607
3608 /* Remained operands count may be reduced twice per cycle
3609 until we have only one operand. */
3610 rest = (unsigned)(ops_num - res * cpu_width);
3611 elog = exact_log2 (rest);
3612 if (elog >= 0)
3613 res += elog;
3614 else
3615 res += floor_log2 (rest) + 1;
3616
3617 return res;
3618 }
3619
3620 /* Returns an optimal number of registers to use for computation of
3621 given statements. */
3622
3623 static int
3624 get_reassociation_width (int ops_num, enum tree_code opc,
3625 machine_mode mode)
3626 {
3627 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3628 int width;
3629 int width_min;
3630 int cycles_best;
3631
3632 if (param_width > 0)
3633 width = param_width;
3634 else
3635 width = targetm.sched.reassociation_width (opc, mode);
3636
3637 if (width == 1)
3638 return width;
3639
3640 /* Get the minimal time required for sequence computation. */
3641 cycles_best = get_required_cycles (ops_num, width);
3642
3643 /* Check if we may use less width and still compute sequence for
3644 the same time. It will allow us to reduce registers usage.
3645 get_required_cycles is monotonically increasing with lower width
3646 so we can perform a binary search for the minimal width that still
3647 results in the optimal cycle count. */
3648 width_min = 1;
3649 while (width > width_min)
3650 {
3651 int width_mid = (width + width_min) / 2;
3652
3653 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3654 width = width_mid;
3655 else if (width_min < width_mid)
3656 width_min = width_mid;
3657 else
3658 break;
3659 }
3660
3661 return width;
3662 }
3663
3664 /* Recursively rewrite our linearized statements so that the operators
3665 match those in OPS[OPINDEX], putting the computation in rank
3666 order and trying to allow operations to be executed in
3667 parallel. */
3668
3669 static void
3670 rewrite_expr_tree_parallel (gassign *stmt, int width,
3671 vec<operand_entry_t> ops)
3672 {
3673 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3674 int op_num = ops.length ();
3675 int stmt_num = op_num - 1;
3676 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3677 int op_index = op_num - 1;
3678 int stmt_index = 0;
3679 int ready_stmts_end = 0;
3680 int i = 0;
3681 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3682
3683 /* We start expression rewriting from the top statements.
3684 So, in this loop we create a full list of statements
3685 we will work with. */
3686 stmts[stmt_num - 1] = stmt;
3687 for (i = stmt_num - 2; i >= 0; i--)
3688 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3689
3690 for (i = 0; i < stmt_num; i++)
3691 {
3692 tree op1, op2;
3693
3694 /* Determine whether we should use results of
3695 already handled statements or not. */
3696 if (ready_stmts_end == 0
3697 && (i - stmt_index >= width || op_index < 1))
3698 ready_stmts_end = i;
3699
3700 /* Now we choose operands for the next statement. Non zero
3701 value in ready_stmts_end means here that we should use
3702 the result of already generated statements as new operand. */
3703 if (ready_stmts_end > 0)
3704 {
3705 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3706 if (ready_stmts_end > stmt_index)
3707 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3708 else if (op_index >= 0)
3709 op2 = ops[op_index--]->op;
3710 else
3711 {
3712 gcc_assert (stmt_index < i);
3713 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3714 }
3715
3716 if (stmt_index >= ready_stmts_end)
3717 ready_stmts_end = 0;
3718 }
3719 else
3720 {
3721 if (op_index > 1)
3722 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3723 op2 = ops[op_index--]->op;
3724 op1 = ops[op_index--]->op;
3725 }
3726
3727 /* If we emit the last statement then we should put
3728 operands into the last statement. It will also
3729 break the loop. */
3730 if (op_index < 0 && stmt_index == i)
3731 i = stmt_num - 1;
3732
3733 if (dump_file && (dump_flags & TDF_DETAILS))
3734 {
3735 fprintf (dump_file, "Transforming ");
3736 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3737 }
3738
3739 /* We keep original statement only for the last one. All
3740 others are recreated. */
3741 if (i == stmt_num - 1)
3742 {
3743 gimple_assign_set_rhs1 (stmts[i], op1);
3744 gimple_assign_set_rhs2 (stmts[i], op2);
3745 update_stmt (stmts[i]);
3746 }
3747 else
3748 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3749
3750 if (dump_file && (dump_flags & TDF_DETAILS))
3751 {
3752 fprintf (dump_file, " into ");
3753 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3754 }
3755 }
3756
3757 remove_visited_stmt_chain (last_rhs1);
3758 }
3759
3760 /* Transform STMT, which is really (A +B) + (C + D) into the left
3761 linear form, ((A+B)+C)+D.
3762 Recurse on D if necessary. */
3763
3764 static void
3765 linearize_expr (gimple stmt)
3766 {
3767 gimple_stmt_iterator gsi;
3768 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3769 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3770 gimple oldbinrhs = binrhs;
3771 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3772 gimple newbinrhs = NULL;
3773 struct loop *loop = loop_containing_stmt (stmt);
3774 tree lhs = gimple_assign_lhs (stmt);
3775
3776 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3777 && is_reassociable_op (binrhs, rhscode, loop));
3778
3779 gsi = gsi_for_stmt (stmt);
3780
3781 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3782 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
3783 gimple_assign_rhs_code (binrhs),
3784 gimple_assign_lhs (binlhs),
3785 gimple_assign_rhs2 (binrhs));
3786 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3787 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3788 gimple_set_uid (binrhs, gimple_uid (stmt));
3789
3790 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3791 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3792
3793 if (dump_file && (dump_flags & TDF_DETAILS))
3794 {
3795 fprintf (dump_file, "Linearized: ");
3796 print_gimple_stmt (dump_file, stmt, 0, 0);
3797 }
3798
3799 reassociate_stats.linearized++;
3800 update_stmt (stmt);
3801
3802 gsi = gsi_for_stmt (oldbinrhs);
3803 reassoc_remove_stmt (&gsi);
3804 release_defs (oldbinrhs);
3805
3806 gimple_set_visited (stmt, true);
3807 gimple_set_visited (binlhs, true);
3808 gimple_set_visited (binrhs, true);
3809
3810 /* Tail recurse on the new rhs if it still needs reassociation. */
3811 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3812 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3813 want to change the algorithm while converting to tuples. */
3814 linearize_expr (stmt);
3815 }
3816
3817 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3818 it. Otherwise, return NULL. */
3819
3820 static gimple
3821 get_single_immediate_use (tree lhs)
3822 {
3823 use_operand_p immuse;
3824 gimple immusestmt;
3825
3826 if (TREE_CODE (lhs) == SSA_NAME
3827 && single_imm_use (lhs, &immuse, &immusestmt)
3828 && is_gimple_assign (immusestmt))
3829 return immusestmt;
3830
3831 return NULL;
3832 }
3833
3834 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3835 representing the negated value. Insertions of any necessary
3836 instructions go before GSI.
3837 This function is recursive in that, if you hand it "a_5" as the
3838 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3839 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3840
3841 static tree
3842 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3843 {
3844 gimple negatedefstmt = NULL;
3845 tree resultofnegate;
3846 gimple_stmt_iterator gsi;
3847 unsigned int uid;
3848
3849 /* If we are trying to negate a name, defined by an add, negate the
3850 add operands instead. */
3851 if (TREE_CODE (tonegate) == SSA_NAME)
3852 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3853 if (TREE_CODE (tonegate) == SSA_NAME
3854 && is_gimple_assign (negatedefstmt)
3855 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3856 && has_single_use (gimple_assign_lhs (negatedefstmt))
3857 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3858 {
3859 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3860 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3861 tree lhs = gimple_assign_lhs (negatedefstmt);
3862 gimple g;
3863
3864 gsi = gsi_for_stmt (negatedefstmt);
3865 rhs1 = negate_value (rhs1, &gsi);
3866
3867 gsi = gsi_for_stmt (negatedefstmt);
3868 rhs2 = negate_value (rhs2, &gsi);
3869
3870 gsi = gsi_for_stmt (negatedefstmt);
3871 lhs = make_ssa_name (TREE_TYPE (lhs));
3872 gimple_set_visited (negatedefstmt, true);
3873 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
3874 gimple_set_uid (g, gimple_uid (negatedefstmt));
3875 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3876 return lhs;
3877 }
3878
3879 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3880 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3881 NULL_TREE, true, GSI_SAME_STMT);
3882 gsi = *gsip;
3883 uid = gimple_uid (gsi_stmt (gsi));
3884 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3885 {
3886 gimple stmt = gsi_stmt (gsi);
3887 if (gimple_uid (stmt) != 0)
3888 break;
3889 gimple_set_uid (stmt, uid);
3890 }
3891 return resultofnegate;
3892 }
3893
3894 /* Return true if we should break up the subtract in STMT into an add
3895 with negate. This is true when we the subtract operands are really
3896 adds, or the subtract itself is used in an add expression. In
3897 either case, breaking up the subtract into an add with negate
3898 exposes the adds to reassociation. */
3899
3900 static bool
3901 should_break_up_subtract (gimple stmt)
3902 {
3903 tree lhs = gimple_assign_lhs (stmt);
3904 tree binlhs = gimple_assign_rhs1 (stmt);
3905 tree binrhs = gimple_assign_rhs2 (stmt);
3906 gimple immusestmt;
3907 struct loop *loop = loop_containing_stmt (stmt);
3908
3909 if (TREE_CODE (binlhs) == SSA_NAME
3910 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3911 return true;
3912
3913 if (TREE_CODE (binrhs) == SSA_NAME
3914 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3915 return true;
3916
3917 if (TREE_CODE (lhs) == SSA_NAME
3918 && (immusestmt = get_single_immediate_use (lhs))
3919 && is_gimple_assign (immusestmt)
3920 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3921 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3922 return true;
3923 return false;
3924 }
3925
3926 /* Transform STMT from A - B into A + -B. */
3927
3928 static void
3929 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3930 {
3931 tree rhs1 = gimple_assign_rhs1 (stmt);
3932 tree rhs2 = gimple_assign_rhs2 (stmt);
3933
3934 if (dump_file && (dump_flags & TDF_DETAILS))
3935 {
3936 fprintf (dump_file, "Breaking up subtract ");
3937 print_gimple_stmt (dump_file, stmt, 0, 0);
3938 }
3939
3940 rhs2 = negate_value (rhs2, gsip);
3941 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3942 update_stmt (stmt);
3943 }
3944
3945 /* Determine whether STMT is a builtin call that raises an SSA name
3946 to an integer power and has only one use. If so, and this is early
3947 reassociation and unsafe math optimizations are permitted, place
3948 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3949 If any of these conditions does not hold, return FALSE. */
3950
3951 static bool
3952 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3953 {
3954 tree fndecl, arg1;
3955 REAL_VALUE_TYPE c, cint;
3956
3957 if (!first_pass_instance
3958 || !flag_unsafe_math_optimizations
3959 || !is_gimple_call (stmt)
3960 || !has_single_use (gimple_call_lhs (stmt)))
3961 return false;
3962
3963 fndecl = gimple_call_fndecl (stmt);
3964
3965 if (!fndecl
3966 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3967 return false;
3968
3969 switch (DECL_FUNCTION_CODE (fndecl))
3970 {
3971 CASE_FLT_FN (BUILT_IN_POW):
3972 if (flag_errno_math)
3973 return false;
3974
3975 *base = gimple_call_arg (stmt, 0);
3976 arg1 = gimple_call_arg (stmt, 1);
3977
3978 if (TREE_CODE (arg1) != REAL_CST)
3979 return false;
3980
3981 c = TREE_REAL_CST (arg1);
3982
3983 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3984 return false;
3985
3986 *exponent = real_to_integer (&c);
3987 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
3988 if (!real_identical (&c, &cint))
3989 return false;
3990
3991 break;
3992
3993 CASE_FLT_FN (BUILT_IN_POWI):
3994 *base = gimple_call_arg (stmt, 0);
3995 arg1 = gimple_call_arg (stmt, 1);
3996
3997 if (!tree_fits_shwi_p (arg1))
3998 return false;
3999
4000 *exponent = tree_to_shwi (arg1);
4001 break;
4002
4003 default:
4004 return false;
4005 }
4006
4007 /* Expanding negative exponents is generally unproductive, so we don't
4008 complicate matters with those. Exponents of zero and one should
4009 have been handled by expression folding. */
4010 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4011 return false;
4012
4013 return true;
4014 }
4015
4016 /* Recursively linearize a binary expression that is the RHS of STMT.
4017 Place the operands of the expression tree in the vector named OPS. */
4018
4019 static void
4020 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
4021 bool is_associative, bool set_visited)
4022 {
4023 tree binlhs = gimple_assign_rhs1 (stmt);
4024 tree binrhs = gimple_assign_rhs2 (stmt);
4025 gimple binlhsdef = NULL, binrhsdef = NULL;
4026 bool binlhsisreassoc = false;
4027 bool binrhsisreassoc = false;
4028 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4029 struct loop *loop = loop_containing_stmt (stmt);
4030 tree base = NULL_TREE;
4031 HOST_WIDE_INT exponent = 0;
4032
4033 if (set_visited)
4034 gimple_set_visited (stmt, true);
4035
4036 if (TREE_CODE (binlhs) == SSA_NAME)
4037 {
4038 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4039 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4040 && !stmt_could_throw_p (binlhsdef));
4041 }
4042
4043 if (TREE_CODE (binrhs) == SSA_NAME)
4044 {
4045 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4046 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4047 && !stmt_could_throw_p (binrhsdef));
4048 }
4049
4050 /* If the LHS is not reassociable, but the RHS is, we need to swap
4051 them. If neither is reassociable, there is nothing we can do, so
4052 just put them in the ops vector. If the LHS is reassociable,
4053 linearize it. If both are reassociable, then linearize the RHS
4054 and the LHS. */
4055
4056 if (!binlhsisreassoc)
4057 {
4058 /* If this is not a associative operation like division, give up. */
4059 if (!is_associative)
4060 {
4061 add_to_ops_vec (ops, binrhs);
4062 return;
4063 }
4064
4065 if (!binrhsisreassoc)
4066 {
4067 if (rhscode == MULT_EXPR
4068 && TREE_CODE (binrhs) == SSA_NAME
4069 && acceptable_pow_call (binrhsdef, &base, &exponent))
4070 {
4071 add_repeat_to_ops_vec (ops, base, exponent);
4072 gimple_set_visited (binrhsdef, true);
4073 }
4074 else
4075 add_to_ops_vec (ops, binrhs);
4076
4077 if (rhscode == MULT_EXPR
4078 && TREE_CODE (binlhs) == SSA_NAME
4079 && acceptable_pow_call (binlhsdef, &base, &exponent))
4080 {
4081 add_repeat_to_ops_vec (ops, base, exponent);
4082 gimple_set_visited (binlhsdef, true);
4083 }
4084 else
4085 add_to_ops_vec (ops, binlhs);
4086
4087 return;
4088 }
4089
4090 if (dump_file && (dump_flags & TDF_DETAILS))
4091 {
4092 fprintf (dump_file, "swapping operands of ");
4093 print_gimple_stmt (dump_file, stmt, 0, 0);
4094 }
4095
4096 swap_ssa_operands (stmt,
4097 gimple_assign_rhs1_ptr (stmt),
4098 gimple_assign_rhs2_ptr (stmt));
4099 update_stmt (stmt);
4100
4101 if (dump_file && (dump_flags & TDF_DETAILS))
4102 {
4103 fprintf (dump_file, " is now ");
4104 print_gimple_stmt (dump_file, stmt, 0, 0);
4105 }
4106
4107 /* We want to make it so the lhs is always the reassociative op,
4108 so swap. */
4109 std::swap (binlhs, binrhs);
4110 }
4111 else if (binrhsisreassoc)
4112 {
4113 linearize_expr (stmt);
4114 binlhs = gimple_assign_rhs1 (stmt);
4115 binrhs = gimple_assign_rhs2 (stmt);
4116 }
4117
4118 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4119 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4120 rhscode, loop));
4121 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4122 is_associative, set_visited);
4123
4124 if (rhscode == MULT_EXPR
4125 && TREE_CODE (binrhs) == SSA_NAME
4126 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4127 {
4128 add_repeat_to_ops_vec (ops, base, exponent);
4129 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4130 }
4131 else
4132 add_to_ops_vec (ops, binrhs);
4133 }
4134
4135 /* Repropagate the negates back into subtracts, since no other pass
4136 currently does it. */
4137
4138 static void
4139 repropagate_negates (void)
4140 {
4141 unsigned int i = 0;
4142 tree negate;
4143
4144 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4145 {
4146 gimple user = get_single_immediate_use (negate);
4147
4148 if (!user || !is_gimple_assign (user))
4149 continue;
4150
4151 /* The negate operand can be either operand of a PLUS_EXPR
4152 (it can be the LHS if the RHS is a constant for example).
4153
4154 Force the negate operand to the RHS of the PLUS_EXPR, then
4155 transform the PLUS_EXPR into a MINUS_EXPR. */
4156 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4157 {
4158 /* If the negated operand appears on the LHS of the
4159 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4160 to force the negated operand to the RHS of the PLUS_EXPR. */
4161 if (gimple_assign_rhs1 (user) == negate)
4162 {
4163 swap_ssa_operands (user,
4164 gimple_assign_rhs1_ptr (user),
4165 gimple_assign_rhs2_ptr (user));
4166 }
4167
4168 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4169 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4170 if (gimple_assign_rhs2 (user) == negate)
4171 {
4172 tree rhs1 = gimple_assign_rhs1 (user);
4173 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
4174 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4175 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4176 update_stmt (user);
4177 }
4178 }
4179 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4180 {
4181 if (gimple_assign_rhs1 (user) == negate)
4182 {
4183 /* We have
4184 x = -a
4185 y = x - b
4186 which we transform into
4187 x = a + b
4188 y = -x .
4189 This pushes down the negate which we possibly can merge
4190 into some other operation, hence insert it into the
4191 plus_negates vector. */
4192 gimple feed = SSA_NAME_DEF_STMT (negate);
4193 tree a = gimple_assign_rhs1 (feed);
4194 tree b = gimple_assign_rhs2 (user);
4195 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4196 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4197 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4198 gimple g = gimple_build_assign (x, PLUS_EXPR, a, b);
4199 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4200 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4201 user = gsi_stmt (gsi2);
4202 update_stmt (user);
4203 reassoc_remove_stmt (&gsi);
4204 release_defs (feed);
4205 plus_negates.safe_push (gimple_assign_lhs (user));
4206 }
4207 else
4208 {
4209 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4210 rid of one operation. */
4211 gimple feed = SSA_NAME_DEF_STMT (negate);
4212 tree a = gimple_assign_rhs1 (feed);
4213 tree rhs1 = gimple_assign_rhs1 (user);
4214 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4215 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4216 update_stmt (gsi_stmt (gsi));
4217 }
4218 }
4219 }
4220 }
4221
4222 /* Returns true if OP is of a type for which we can do reassociation.
4223 That is for integral or non-saturating fixed-point types, and for
4224 floating point type when associative-math is enabled. */
4225
4226 static bool
4227 can_reassociate_p (tree op)
4228 {
4229 tree type = TREE_TYPE (op);
4230 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4231 || NON_SAT_FIXED_POINT_TYPE_P (type)
4232 || (flag_associative_math && FLOAT_TYPE_P (type)))
4233 return true;
4234 return false;
4235 }
4236
4237 /* Break up subtract operations in block BB.
4238
4239 We do this top down because we don't know whether the subtract is
4240 part of a possible chain of reassociation except at the top.
4241
4242 IE given
4243 d = f + g
4244 c = a + e
4245 b = c - d
4246 q = b - r
4247 k = t - q
4248
4249 we want to break up k = t - q, but we won't until we've transformed q
4250 = b - r, which won't be broken up until we transform b = c - d.
4251
4252 En passant, clear the GIMPLE visited flag on every statement
4253 and set UIDs within each basic block. */
4254
4255 static void
4256 break_up_subtract_bb (basic_block bb)
4257 {
4258 gimple_stmt_iterator gsi;
4259 basic_block son;
4260 unsigned int uid = 1;
4261
4262 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4263 {
4264 gimple stmt = gsi_stmt (gsi);
4265 gimple_set_visited (stmt, false);
4266 gimple_set_uid (stmt, uid++);
4267
4268 if (!is_gimple_assign (stmt)
4269 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4270 continue;
4271
4272 /* Look for simple gimple subtract operations. */
4273 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4274 {
4275 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4276 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4277 continue;
4278
4279 /* Check for a subtract used only in an addition. If this
4280 is the case, transform it into add of a negate for better
4281 reassociation. IE transform C = A-B into C = A + -B if C
4282 is only used in an addition. */
4283 if (should_break_up_subtract (stmt))
4284 break_up_subtract (stmt, &gsi);
4285 }
4286 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4287 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4288 plus_negates.safe_push (gimple_assign_lhs (stmt));
4289 }
4290 for (son = first_dom_son (CDI_DOMINATORS, bb);
4291 son;
4292 son = next_dom_son (CDI_DOMINATORS, son))
4293 break_up_subtract_bb (son);
4294 }
4295
4296 /* Used for repeated factor analysis. */
4297 struct repeat_factor_d
4298 {
4299 /* An SSA name that occurs in a multiply chain. */
4300 tree factor;
4301
4302 /* Cached rank of the factor. */
4303 unsigned rank;
4304
4305 /* Number of occurrences of the factor in the chain. */
4306 HOST_WIDE_INT count;
4307
4308 /* An SSA name representing the product of this factor and
4309 all factors appearing later in the repeated factor vector. */
4310 tree repr;
4311 };
4312
4313 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4314 typedef const struct repeat_factor_d *const_repeat_factor_t;
4315
4316
4317 static vec<repeat_factor> repeat_factor_vec;
4318
4319 /* Used for sorting the repeat factor vector. Sort primarily by
4320 ascending occurrence count, secondarily by descending rank. */
4321
4322 static int
4323 compare_repeat_factors (const void *x1, const void *x2)
4324 {
4325 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4326 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4327
4328 if (rf1->count != rf2->count)
4329 return rf1->count - rf2->count;
4330
4331 return rf2->rank - rf1->rank;
4332 }
4333
4334 /* Look for repeated operands in OPS in the multiply tree rooted at
4335 STMT. Replace them with an optimal sequence of multiplies and powi
4336 builtin calls, and remove the used operands from OPS. Return an
4337 SSA name representing the value of the replacement sequence. */
4338
4339 static tree
4340 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4341 {
4342 unsigned i, j, vec_len;
4343 int ii;
4344 operand_entry_t oe;
4345 repeat_factor_t rf1, rf2;
4346 repeat_factor rfnew;
4347 tree result = NULL_TREE;
4348 tree target_ssa, iter_result;
4349 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4350 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4351 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4352 gimple mul_stmt, pow_stmt;
4353
4354 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4355 target. */
4356 if (!powi_fndecl)
4357 return NULL_TREE;
4358
4359 /* Allocate the repeated factor vector. */
4360 repeat_factor_vec.create (10);
4361
4362 /* Scan the OPS vector for all SSA names in the product and build
4363 up a vector of occurrence counts for each factor. */
4364 FOR_EACH_VEC_ELT (*ops, i, oe)
4365 {
4366 if (TREE_CODE (oe->op) == SSA_NAME)
4367 {
4368 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4369 {
4370 if (rf1->factor == oe->op)
4371 {
4372 rf1->count += oe->count;
4373 break;
4374 }
4375 }
4376
4377 if (j >= repeat_factor_vec.length ())
4378 {
4379 rfnew.factor = oe->op;
4380 rfnew.rank = oe->rank;
4381 rfnew.count = oe->count;
4382 rfnew.repr = NULL_TREE;
4383 repeat_factor_vec.safe_push (rfnew);
4384 }
4385 }
4386 }
4387
4388 /* Sort the repeated factor vector by (a) increasing occurrence count,
4389 and (b) decreasing rank. */
4390 repeat_factor_vec.qsort (compare_repeat_factors);
4391
4392 /* It is generally best to combine as many base factors as possible
4393 into a product before applying __builtin_powi to the result.
4394 However, the sort order chosen for the repeated factor vector
4395 allows us to cache partial results for the product of the base
4396 factors for subsequent use. When we already have a cached partial
4397 result from a previous iteration, it is best to make use of it
4398 before looking for another __builtin_pow opportunity.
4399
4400 As an example, consider x * x * y * y * y * z * z * z * z.
4401 We want to first compose the product x * y * z, raise it to the
4402 second power, then multiply this by y * z, and finally multiply
4403 by z. This can be done in 5 multiplies provided we cache y * z
4404 for use in both expressions:
4405
4406 t1 = y * z
4407 t2 = t1 * x
4408 t3 = t2 * t2
4409 t4 = t1 * t3
4410 result = t4 * z
4411
4412 If we instead ignored the cached y * z and first multiplied by
4413 the __builtin_pow opportunity z * z, we would get the inferior:
4414
4415 t1 = y * z
4416 t2 = t1 * x
4417 t3 = t2 * t2
4418 t4 = z * z
4419 t5 = t3 * t4
4420 result = t5 * y */
4421
4422 vec_len = repeat_factor_vec.length ();
4423
4424 /* Repeatedly look for opportunities to create a builtin_powi call. */
4425 while (true)
4426 {
4427 HOST_WIDE_INT power;
4428
4429 /* First look for the largest cached product of factors from
4430 preceding iterations. If found, create a builtin_powi for
4431 it if the minimum occurrence count for its factors is at
4432 least 2, or just use this cached product as our next
4433 multiplicand if the minimum occurrence count is 1. */
4434 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4435 {
4436 if (rf1->repr && rf1->count > 0)
4437 break;
4438 }
4439
4440 if (j < vec_len)
4441 {
4442 power = rf1->count;
4443
4444 if (power == 1)
4445 {
4446 iter_result = rf1->repr;
4447
4448 if (dump_file && (dump_flags & TDF_DETAILS))
4449 {
4450 unsigned elt;
4451 repeat_factor_t rf;
4452 fputs ("Multiplying by cached product ", dump_file);
4453 for (elt = j; elt < vec_len; elt++)
4454 {
4455 rf = &repeat_factor_vec[elt];
4456 print_generic_expr (dump_file, rf->factor, 0);
4457 if (elt < vec_len - 1)
4458 fputs (" * ", dump_file);
4459 }
4460 fputs ("\n", dump_file);
4461 }
4462 }
4463 else
4464 {
4465 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4466 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4467 build_int_cst (integer_type_node,
4468 power));
4469 gimple_call_set_lhs (pow_stmt, iter_result);
4470 gimple_set_location (pow_stmt, gimple_location (stmt));
4471 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4472
4473 if (dump_file && (dump_flags & TDF_DETAILS))
4474 {
4475 unsigned elt;
4476 repeat_factor_t rf;
4477 fputs ("Building __builtin_pow call for cached product (",
4478 dump_file);
4479 for (elt = j; elt < vec_len; elt++)
4480 {
4481 rf = &repeat_factor_vec[elt];
4482 print_generic_expr (dump_file, rf->factor, 0);
4483 if (elt < vec_len - 1)
4484 fputs (" * ", dump_file);
4485 }
4486 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4487 power);
4488 }
4489 }
4490 }
4491 else
4492 {
4493 /* Otherwise, find the first factor in the repeated factor
4494 vector whose occurrence count is at least 2. If no such
4495 factor exists, there are no builtin_powi opportunities
4496 remaining. */
4497 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4498 {
4499 if (rf1->count >= 2)
4500 break;
4501 }
4502
4503 if (j >= vec_len)
4504 break;
4505
4506 power = rf1->count;
4507
4508 if (dump_file && (dump_flags & TDF_DETAILS))
4509 {
4510 unsigned elt;
4511 repeat_factor_t rf;
4512 fputs ("Building __builtin_pow call for (", dump_file);
4513 for (elt = j; elt < vec_len; elt++)
4514 {
4515 rf = &repeat_factor_vec[elt];
4516 print_generic_expr (dump_file, rf->factor, 0);
4517 if (elt < vec_len - 1)
4518 fputs (" * ", dump_file);
4519 }
4520 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4521 }
4522
4523 reassociate_stats.pows_created++;
4524
4525 /* Visit each element of the vector in reverse order (so that
4526 high-occurrence elements are visited first, and within the
4527 same occurrence count, lower-ranked elements are visited
4528 first). Form a linear product of all elements in this order
4529 whose occurrencce count is at least that of element J.
4530 Record the SSA name representing the product of each element
4531 with all subsequent elements in the vector. */
4532 if (j == vec_len - 1)
4533 rf1->repr = rf1->factor;
4534 else
4535 {
4536 for (ii = vec_len - 2; ii >= (int)j; ii--)
4537 {
4538 tree op1, op2;
4539
4540 rf1 = &repeat_factor_vec[ii];
4541 rf2 = &repeat_factor_vec[ii + 1];
4542
4543 /* Init the last factor's representative to be itself. */
4544 if (!rf2->repr)
4545 rf2->repr = rf2->factor;
4546
4547 op1 = rf1->factor;
4548 op2 = rf2->repr;
4549
4550 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4551 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4552 op1, op2);
4553 gimple_set_location (mul_stmt, gimple_location (stmt));
4554 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4555 rf1->repr = target_ssa;
4556
4557 /* Don't reprocess the multiply we just introduced. */
4558 gimple_set_visited (mul_stmt, true);
4559 }
4560 }
4561
4562 /* Form a call to __builtin_powi for the maximum product
4563 just formed, raised to the power obtained earlier. */
4564 rf1 = &repeat_factor_vec[j];
4565 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4566 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4567 build_int_cst (integer_type_node,
4568 power));
4569 gimple_call_set_lhs (pow_stmt, iter_result);
4570 gimple_set_location (pow_stmt, gimple_location (stmt));
4571 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4572 }
4573
4574 /* If we previously formed at least one other builtin_powi call,
4575 form the product of this one and those others. */
4576 if (result)
4577 {
4578 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4579 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4580 result, iter_result);
4581 gimple_set_location (mul_stmt, gimple_location (stmt));
4582 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4583 gimple_set_visited (mul_stmt, true);
4584 result = new_result;
4585 }
4586 else
4587 result = iter_result;
4588
4589 /* Decrement the occurrence count of each element in the product
4590 by the count found above, and remove this many copies of each
4591 factor from OPS. */
4592 for (i = j; i < vec_len; i++)
4593 {
4594 unsigned k = power;
4595 unsigned n;
4596
4597 rf1 = &repeat_factor_vec[i];
4598 rf1->count -= power;
4599
4600 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4601 {
4602 if (oe->op == rf1->factor)
4603 {
4604 if (oe->count <= k)
4605 {
4606 ops->ordered_remove (n);
4607 k -= oe->count;
4608
4609 if (k == 0)
4610 break;
4611 }
4612 else
4613 {
4614 oe->count -= k;
4615 break;
4616 }
4617 }
4618 }
4619 }
4620 }
4621
4622 /* At this point all elements in the repeated factor vector have a
4623 remaining occurrence count of 0 or 1, and those with a count of 1
4624 don't have cached representatives. Re-sort the ops vector and
4625 clean up. */
4626 ops->qsort (sort_by_operand_rank);
4627 repeat_factor_vec.release ();
4628
4629 /* Return the final product computed herein. Note that there may
4630 still be some elements with single occurrence count left in OPS;
4631 those will be handled by the normal reassociation logic. */
4632 return result;
4633 }
4634
4635 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4636
4637 static void
4638 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4639 {
4640 tree rhs1;
4641
4642 if (dump_file && (dump_flags & TDF_DETAILS))
4643 {
4644 fprintf (dump_file, "Transforming ");
4645 print_gimple_stmt (dump_file, stmt, 0, 0);
4646 }
4647
4648 rhs1 = gimple_assign_rhs1 (stmt);
4649 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4650 update_stmt (stmt);
4651 remove_visited_stmt_chain (rhs1);
4652
4653 if (dump_file && (dump_flags & TDF_DETAILS))
4654 {
4655 fprintf (dump_file, " into ");
4656 print_gimple_stmt (dump_file, stmt, 0, 0);
4657 }
4658 }
4659
4660 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4661
4662 static void
4663 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4664 tree rhs1, tree rhs2)
4665 {
4666 if (dump_file && (dump_flags & TDF_DETAILS))
4667 {
4668 fprintf (dump_file, "Transforming ");
4669 print_gimple_stmt (dump_file, stmt, 0, 0);
4670 }
4671
4672 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4673 update_stmt (gsi_stmt (*gsi));
4674 remove_visited_stmt_chain (rhs1);
4675
4676 if (dump_file && (dump_flags & TDF_DETAILS))
4677 {
4678 fprintf (dump_file, " into ");
4679 print_gimple_stmt (dump_file, stmt, 0, 0);
4680 }
4681 }
4682
4683 /* Reassociate expressions in basic block BB and its post-dominator as
4684 children. */
4685
4686 static void
4687 reassociate_bb (basic_block bb)
4688 {
4689 gimple_stmt_iterator gsi;
4690 basic_block son;
4691 gimple stmt = last_stmt (bb);
4692
4693 if (stmt && !gimple_visited_p (stmt))
4694 maybe_optimize_range_tests (stmt);
4695
4696 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4697 {
4698 stmt = gsi_stmt (gsi);
4699
4700 if (is_gimple_assign (stmt)
4701 && !stmt_could_throw_p (stmt))
4702 {
4703 tree lhs, rhs1, rhs2;
4704 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4705
4706 /* If this is not a gimple binary expression, there is
4707 nothing for us to do with it. */
4708 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4709 continue;
4710
4711 /* If this was part of an already processed statement,
4712 we don't need to touch it again. */
4713 if (gimple_visited_p (stmt))
4714 {
4715 /* This statement might have become dead because of previous
4716 reassociations. */
4717 if (has_zero_uses (gimple_get_lhs (stmt)))
4718 {
4719 reassoc_remove_stmt (&gsi);
4720 release_defs (stmt);
4721 /* We might end up removing the last stmt above which
4722 places the iterator to the end of the sequence.
4723 Reset it to the last stmt in this case which might
4724 be the end of the sequence as well if we removed
4725 the last statement of the sequence. In which case
4726 we need to bail out. */
4727 if (gsi_end_p (gsi))
4728 {
4729 gsi = gsi_last_bb (bb);
4730 if (gsi_end_p (gsi))
4731 break;
4732 }
4733 }
4734 continue;
4735 }
4736
4737 lhs = gimple_assign_lhs (stmt);
4738 rhs1 = gimple_assign_rhs1 (stmt);
4739 rhs2 = gimple_assign_rhs2 (stmt);
4740
4741 /* For non-bit or min/max operations we can't associate
4742 all types. Verify that here. */
4743 if (rhs_code != BIT_IOR_EXPR
4744 && rhs_code != BIT_AND_EXPR
4745 && rhs_code != BIT_XOR_EXPR
4746 && rhs_code != MIN_EXPR
4747 && rhs_code != MAX_EXPR
4748 && (!can_reassociate_p (lhs)
4749 || !can_reassociate_p (rhs1)
4750 || !can_reassociate_p (rhs2)))
4751 continue;
4752
4753 if (associative_tree_code (rhs_code))
4754 {
4755 auto_vec<operand_entry_t> ops;
4756 tree powi_result = NULL_TREE;
4757
4758 /* There may be no immediate uses left by the time we
4759 get here because we may have eliminated them all. */
4760 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4761 continue;
4762
4763 gimple_set_visited (stmt, true);
4764 linearize_expr_tree (&ops, stmt, true, true);
4765 ops.qsort (sort_by_operand_rank);
4766 optimize_ops_list (rhs_code, &ops);
4767 if (undistribute_ops_list (rhs_code, &ops,
4768 loop_containing_stmt (stmt)))
4769 {
4770 ops.qsort (sort_by_operand_rank);
4771 optimize_ops_list (rhs_code, &ops);
4772 }
4773
4774 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4775 optimize_range_tests (rhs_code, &ops);
4776
4777 if (first_pass_instance
4778 && rhs_code == MULT_EXPR
4779 && flag_unsafe_math_optimizations)
4780 powi_result = attempt_builtin_powi (stmt, &ops);
4781
4782 /* If the operand vector is now empty, all operands were
4783 consumed by the __builtin_powi optimization. */
4784 if (ops.length () == 0)
4785 transform_stmt_to_copy (&gsi, stmt, powi_result);
4786 else if (ops.length () == 1)
4787 {
4788 tree last_op = ops.last ()->op;
4789
4790 if (powi_result)
4791 transform_stmt_to_multiply (&gsi, stmt, last_op,
4792 powi_result);
4793 else
4794 transform_stmt_to_copy (&gsi, stmt, last_op);
4795 }
4796 else
4797 {
4798 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4799 int ops_num = ops.length ();
4800 int width = get_reassociation_width (ops_num, rhs_code, mode);
4801 tree new_lhs = lhs;
4802
4803 if (dump_file && (dump_flags & TDF_DETAILS))
4804 fprintf (dump_file,
4805 "Width = %d was chosen for reassociation\n", width);
4806
4807 if (width > 1
4808 && ops.length () > 3)
4809 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
4810 width, ops);
4811 else
4812 {
4813 /* When there are three operands left, we want
4814 to make sure the ones that get the double
4815 binary op are chosen wisely. */
4816 int len = ops.length ();
4817 if (len >= 3)
4818 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4819
4820 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4821 powi_result != NULL);
4822 }
4823
4824 /* If we combined some repeated factors into a
4825 __builtin_powi call, multiply that result by the
4826 reassociated operands. */
4827 if (powi_result)
4828 {
4829 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4830 tree type = TREE_TYPE (lhs);
4831 tree target_ssa = make_temp_ssa_name (type, NULL,
4832 "reassocpow");
4833 gimple_set_lhs (lhs_stmt, target_ssa);
4834 update_stmt (lhs_stmt);
4835 if (lhs != new_lhs)
4836 target_ssa = new_lhs;
4837 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
4838 powi_result, target_ssa);
4839 gimple_set_location (mul_stmt, gimple_location (stmt));
4840 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4841 }
4842 }
4843 }
4844 }
4845 }
4846 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4847 son;
4848 son = next_dom_son (CDI_POST_DOMINATORS, son))
4849 reassociate_bb (son);
4850 }
4851
4852 /* Add jumps around shifts for range tests turned into bit tests.
4853 For each SSA_NAME VAR we have code like:
4854 VAR = ...; // final stmt of range comparison
4855 // bit test here...;
4856 OTHERVAR = ...; // final stmt of the bit test sequence
4857 RES = VAR | OTHERVAR;
4858 Turn the above into:
4859 VAR = ...;
4860 if (VAR != 0)
4861 goto <l3>;
4862 else
4863 goto <l2>;
4864 <l2>:
4865 // bit test here...;
4866 OTHERVAR = ...;
4867 <l3>:
4868 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4869
4870 static void
4871 branch_fixup (void)
4872 {
4873 tree var;
4874 unsigned int i;
4875
4876 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
4877 {
4878 gimple def_stmt = SSA_NAME_DEF_STMT (var);
4879 gimple use_stmt;
4880 use_operand_p use;
4881 bool ok = single_imm_use (var, &use, &use_stmt);
4882 gcc_assert (ok
4883 && is_gimple_assign (use_stmt)
4884 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
4885 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
4886
4887 basic_block cond_bb = gimple_bb (def_stmt);
4888 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
4889 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
4890
4891 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
4892 gimple g = gimple_build_cond (NE_EXPR, var,
4893 build_zero_cst (TREE_TYPE (var)),
4894 NULL_TREE, NULL_TREE);
4895 location_t loc = gimple_location (use_stmt);
4896 gimple_set_location (g, loc);
4897 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
4898
4899 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
4900 etrue->probability = REG_BR_PROB_BASE / 2;
4901 etrue->count = cond_bb->count / 2;
4902 edge efalse = find_edge (cond_bb, then_bb);
4903 efalse->flags = EDGE_FALSE_VALUE;
4904 efalse->probability -= etrue->probability;
4905 efalse->count -= etrue->count;
4906 then_bb->count -= etrue->count;
4907
4908 tree othervar = NULL_TREE;
4909 if (gimple_assign_rhs1 (use_stmt) == var)
4910 othervar = gimple_assign_rhs2 (use_stmt);
4911 else if (gimple_assign_rhs2 (use_stmt) == var)
4912 othervar = gimple_assign_rhs1 (use_stmt);
4913 else
4914 gcc_unreachable ();
4915 tree lhs = gimple_assign_lhs (use_stmt);
4916 gphi *phi = create_phi_node (lhs, merge_bb);
4917 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
4918 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
4919 gsi = gsi_for_stmt (use_stmt);
4920 gsi_remove (&gsi, true);
4921
4922 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
4923 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
4924 }
4925 reassoc_branch_fixups.release ();
4926 }
4927
4928 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4929 void debug_ops_vector (vec<operand_entry_t> ops);
4930
4931 /* Dump the operand entry vector OPS to FILE. */
4932
4933 void
4934 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4935 {
4936 operand_entry_t oe;
4937 unsigned int i;
4938
4939 FOR_EACH_VEC_ELT (ops, i, oe)
4940 {
4941 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4942 print_generic_expr (file, oe->op, 0);
4943 }
4944 }
4945
4946 /* Dump the operand entry vector OPS to STDERR. */
4947
4948 DEBUG_FUNCTION void
4949 debug_ops_vector (vec<operand_entry_t> ops)
4950 {
4951 dump_ops_vector (stderr, ops);
4952 }
4953
4954 static void
4955 do_reassoc (void)
4956 {
4957 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4958 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4959 }
4960
4961 /* Initialize the reassociation pass. */
4962
4963 static void
4964 init_reassoc (void)
4965 {
4966 int i;
4967 long rank = 2;
4968 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4969
4970 /* Find the loops, so that we can prevent moving calculations in
4971 them. */
4972 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4973
4974 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4975
4976 next_operand_entry_id = 0;
4977
4978 /* Reverse RPO (Reverse Post Order) will give us something where
4979 deeper loops come later. */
4980 pre_and_rev_post_order_compute (NULL, bbs, false);
4981 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4982 operand_rank = new hash_map<tree, long>;
4983
4984 /* Give each default definition a distinct rank. This includes
4985 parameters and the static chain. Walk backwards over all
4986 SSA names so that we get proper rank ordering according
4987 to tree_swap_operands_p. */
4988 for (i = num_ssa_names - 1; i > 0; --i)
4989 {
4990 tree name = ssa_name (i);
4991 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4992 insert_operand_rank (name, ++rank);
4993 }
4994
4995 /* Set up rank for each BB */
4996 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4997 bb_rank[bbs[i]] = ++rank << 16;
4998
4999 free (bbs);
5000 calculate_dominance_info (CDI_POST_DOMINATORS);
5001 plus_negates = vNULL;
5002 }
5003
5004 /* Cleanup after the reassociation pass, and print stats if
5005 requested. */
5006
5007 static void
5008 fini_reassoc (void)
5009 {
5010 statistics_counter_event (cfun, "Linearized",
5011 reassociate_stats.linearized);
5012 statistics_counter_event (cfun, "Constants eliminated",
5013 reassociate_stats.constants_eliminated);
5014 statistics_counter_event (cfun, "Ops eliminated",
5015 reassociate_stats.ops_eliminated);
5016 statistics_counter_event (cfun, "Statements rewritten",
5017 reassociate_stats.rewritten);
5018 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5019 reassociate_stats.pows_encountered);
5020 statistics_counter_event (cfun, "Built-in powi calls created",
5021 reassociate_stats.pows_created);
5022
5023 delete operand_rank;
5024 operand_entry_pool.release ();
5025 free (bb_rank);
5026 plus_negates.release ();
5027 free_dominance_info (CDI_POST_DOMINATORS);
5028 loop_optimizer_finalize ();
5029 }
5030
5031 /* Gate and execute functions for Reassociation. */
5032
5033 static unsigned int
5034 execute_reassoc (void)
5035 {
5036 init_reassoc ();
5037
5038 do_reassoc ();
5039 repropagate_negates ();
5040 branch_fixup ();
5041
5042 fini_reassoc ();
5043 return 0;
5044 }
5045
5046 namespace {
5047
5048 const pass_data pass_data_reassoc =
5049 {
5050 GIMPLE_PASS, /* type */
5051 "reassoc", /* name */
5052 OPTGROUP_NONE, /* optinfo_flags */
5053 TV_TREE_REASSOC, /* tv_id */
5054 ( PROP_cfg | PROP_ssa ), /* properties_required */
5055 0, /* properties_provided */
5056 0, /* properties_destroyed */
5057 0, /* todo_flags_start */
5058 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5059 };
5060
5061 class pass_reassoc : public gimple_opt_pass
5062 {
5063 public:
5064 pass_reassoc (gcc::context *ctxt)
5065 : gimple_opt_pass (pass_data_reassoc, ctxt)
5066 {}
5067
5068 /* opt_pass methods: */
5069 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5070 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5071 virtual unsigned int execute (function *) { return execute_reassoc (); }
5072
5073 }; // class pass_reassoc
5074
5075 } // anon namespace
5076
5077 gimple_opt_pass *
5078 make_pass_reassoc (gcc::context *ctxt)
5079 {
5080 return new pass_reassoc (ctxt);
5081 }