]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-reassoc.c
tree-ssa.h: Remove all #include's
[thirdparty/gcc.git] / gcc / tree-ssa-reassoc.c
1 /* Reassociation for trees.
2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "tree.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-inline.h"
32 #include "gimple.h"
33 #include "gimple-ssa.h"
34 #include "tree-cfg.h"
35 #include "tree-phinodes.h"
36 #include "ssa-iterators.h"
37 #include "tree-ssanames.h"
38 #include "tree-ssa-loop.h"
39 #include "tree-dfa.h"
40 #include "tree-ssa.h"
41 #include "tree-iterator.h"
42 #include "tree-pass.h"
43 #include "alloc-pool.h"
44 #include "vec.h"
45 #include "langhooks.h"
46 #include "pointer-set.h"
47 #include "cfgloop.h"
48 #include "flags.h"
49 #include "target.h"
50 #include "params.h"
51 #include "diagnostic-core.h"
52
53 /* This is a simple global reassociation pass. It is, in part, based
54 on the LLVM pass of the same name (They do some things more/less
55 than we do, in different orders, etc).
56
57 It consists of five steps:
58
59 1. Breaking up subtract operations into addition + negate, where
60 it would promote the reassociation of adds.
61
62 2. Left linearization of the expression trees, so that (A+B)+(C+D)
63 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
64 During linearization, we place the operands of the binary
65 expressions into a vector of operand_entry_t
66
67 3. Optimization of the operand lists, eliminating things like a +
68 -a, a & a, etc.
69
70 3a. Combine repeated factors with the same occurrence counts
71 into a __builtin_powi call that will later be optimized into
72 an optimal number of multiplies.
73
74 4. Rewrite the expression trees we linearized and optimized so
75 they are in proper rank order.
76
77 5. Repropagate negates, as nothing else will clean it up ATM.
78
79 A bit of theory on #4, since nobody seems to write anything down
80 about why it makes sense to do it the way they do it:
81
82 We could do this much nicer theoretically, but don't (for reasons
83 explained after how to do it theoretically nice :P).
84
85 In order to promote the most redundancy elimination, you want
86 binary expressions whose operands are the same rank (or
87 preferably, the same value) exposed to the redundancy eliminator,
88 for possible elimination.
89
90 So the way to do this if we really cared, is to build the new op
91 tree from the leaves to the roots, merging as you go, and putting the
92 new op on the end of the worklist, until you are left with one
93 thing on the worklist.
94
95 IE if you have to rewrite the following set of operands (listed with
96 rank in parentheses), with opcode PLUS_EXPR:
97
98 a (1), b (1), c (1), d (2), e (2)
99
100
101 We start with our merge worklist empty, and the ops list with all of
102 those on it.
103
104 You want to first merge all leaves of the same rank, as much as
105 possible.
106
107 So first build a binary op of
108
109 mergetmp = a + b, and put "mergetmp" on the merge worklist.
110
111 Because there is no three operand form of PLUS_EXPR, c is not going to
112 be exposed to redundancy elimination as a rank 1 operand.
113
114 So you might as well throw it on the merge worklist (you could also
115 consider it to now be a rank two operand, and merge it with d and e,
116 but in this case, you then have evicted e from a binary op. So at
117 least in this situation, you can't win.)
118
119 Then build a binary op of d + e
120 mergetmp2 = d + e
121
122 and put mergetmp2 on the merge worklist.
123
124 so merge worklist = {mergetmp, c, mergetmp2}
125
126 Continue building binary ops of these operations until you have only
127 one operation left on the worklist.
128
129 So we have
130
131 build binary op
132 mergetmp3 = mergetmp + c
133
134 worklist = {mergetmp2, mergetmp3}
135
136 mergetmp4 = mergetmp2 + mergetmp3
137
138 worklist = {mergetmp4}
139
140 because we have one operation left, we can now just set the original
141 statement equal to the result of that operation.
142
143 This will at least expose a + b and d + e to redundancy elimination
144 as binary operations.
145
146 For extra points, you can reuse the old statements to build the
147 mergetmps, since you shouldn't run out.
148
149 So why don't we do this?
150
151 Because it's expensive, and rarely will help. Most trees we are
152 reassociating have 3 or less ops. If they have 2 ops, they already
153 will be written into a nice single binary op. If you have 3 ops, a
154 single simple check suffices to tell you whether the first two are of the
155 same rank. If so, you know to order it
156
157 mergetmp = op1 + op2
158 newstmt = mergetmp + op3
159
160 instead of
161 mergetmp = op2 + op3
162 newstmt = mergetmp + op1
163
164 If all three are of the same rank, you can't expose them all in a
165 single binary operator anyway, so the above is *still* the best you
166 can do.
167
168 Thus, this is what we do. When we have three ops left, we check to see
169 what order to put them in, and call it a day. As a nod to vector sum
170 reduction, we check if any of the ops are really a phi node that is a
171 destructive update for the associating op, and keep the destructive
172 update together for vector sum reduction recognition. */
173
174
175 /* Statistics */
176 static struct
177 {
178 int linearized;
179 int constants_eliminated;
180 int ops_eliminated;
181 int rewritten;
182 int pows_encountered;
183 int pows_created;
184 } reassociate_stats;
185
186 /* Operator, rank pair. */
187 typedef struct operand_entry
188 {
189 unsigned int rank;
190 int id;
191 tree op;
192 unsigned int count;
193 } *operand_entry_t;
194
195 static alloc_pool operand_entry_pool;
196
197 /* This is used to assign a unique ID to each struct operand_entry
198 so that qsort results are identical on different hosts. */
199 static int next_operand_entry_id;
200
201 /* Starting rank number for a given basic block, so that we can rank
202 operations using unmovable instructions in that BB based on the bb
203 depth. */
204 static long *bb_rank;
205
206 /* Operand->rank hashtable. */
207 static struct pointer_map_t *operand_rank;
208
209 /* Forward decls. */
210 static long get_rank (tree);
211
212
213 /* Bias amount for loop-carried phis. We want this to be larger than
214 the depth of any reassociation tree we can see, but not larger than
215 the rank difference between two blocks. */
216 #define PHI_LOOP_BIAS (1 << 15)
217
218 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
219 an innermost loop, and the phi has only a single use which is inside
220 the loop, then the rank is the block rank of the loop latch plus an
221 extra bias for the loop-carried dependence. This causes expressions
222 calculated into an accumulator variable to be independent for each
223 iteration of the loop. If STMT is some other phi, the rank is the
224 block rank of its containing block. */
225 static long
226 phi_rank (gimple stmt)
227 {
228 basic_block bb = gimple_bb (stmt);
229 struct loop *father = bb->loop_father;
230 tree res;
231 unsigned i;
232 use_operand_p use;
233 gimple use_stmt;
234
235 /* We only care about real loops (those with a latch). */
236 if (!father->latch)
237 return bb_rank[bb->index];
238
239 /* Interesting phis must be in headers of innermost loops. */
240 if (bb != father->header
241 || father->inner)
242 return bb_rank[bb->index];
243
244 /* Ignore virtual SSA_NAMEs. */
245 res = gimple_phi_result (stmt);
246 if (virtual_operand_p (res))
247 return bb_rank[bb->index];
248
249 /* The phi definition must have a single use, and that use must be
250 within the loop. Otherwise this isn't an accumulator pattern. */
251 if (!single_imm_use (res, &use, &use_stmt)
252 || gimple_bb (use_stmt)->loop_father != father)
253 return bb_rank[bb->index];
254
255 /* Look for phi arguments from within the loop. If found, bias this phi. */
256 for (i = 0; i < gimple_phi_num_args (stmt); i++)
257 {
258 tree arg = gimple_phi_arg_def (stmt, i);
259 if (TREE_CODE (arg) == SSA_NAME
260 && !SSA_NAME_IS_DEFAULT_DEF (arg))
261 {
262 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
263 if (gimple_bb (def_stmt)->loop_father == father)
264 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
265 }
266 }
267
268 /* Must be an uninteresting phi. */
269 return bb_rank[bb->index];
270 }
271
272 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
273 loop-carried dependence of an innermost loop, return TRUE; else
274 return FALSE. */
275 static bool
276 loop_carried_phi (tree exp)
277 {
278 gimple phi_stmt;
279 long block_rank;
280
281 if (TREE_CODE (exp) != SSA_NAME
282 || SSA_NAME_IS_DEFAULT_DEF (exp))
283 return false;
284
285 phi_stmt = SSA_NAME_DEF_STMT (exp);
286
287 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
288 return false;
289
290 /* Non-loop-carried phis have block rank. Loop-carried phis have
291 an additional bias added in. If this phi doesn't have block rank,
292 it's biased and should not be propagated. */
293 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
294
295 if (phi_rank (phi_stmt) != block_rank)
296 return true;
297
298 return false;
299 }
300
301 /* Return the maximum of RANK and the rank that should be propagated
302 from expression OP. For most operands, this is just the rank of OP.
303 For loop-carried phis, the value is zero to avoid undoing the bias
304 in favor of the phi. */
305 static long
306 propagate_rank (long rank, tree op)
307 {
308 long op_rank;
309
310 if (loop_carried_phi (op))
311 return rank;
312
313 op_rank = get_rank (op);
314
315 return MAX (rank, op_rank);
316 }
317
318 /* Look up the operand rank structure for expression E. */
319
320 static inline long
321 find_operand_rank (tree e)
322 {
323 void **slot = pointer_map_contains (operand_rank, e);
324 return slot ? (long) (intptr_t) *slot : -1;
325 }
326
327 /* Insert {E,RANK} into the operand rank hashtable. */
328
329 static inline void
330 insert_operand_rank (tree e, long rank)
331 {
332 void **slot;
333 gcc_assert (rank > 0);
334 slot = pointer_map_insert (operand_rank, e);
335 gcc_assert (!*slot);
336 *slot = (void *) (intptr_t) rank;
337 }
338
339 /* Given an expression E, return the rank of the expression. */
340
341 static long
342 get_rank (tree e)
343 {
344 /* Constants have rank 0. */
345 if (is_gimple_min_invariant (e))
346 return 0;
347
348 /* SSA_NAME's have the rank of the expression they are the result
349 of.
350 For globals and uninitialized values, the rank is 0.
351 For function arguments, use the pre-setup rank.
352 For PHI nodes, stores, asm statements, etc, we use the rank of
353 the BB.
354 For simple operations, the rank is the maximum rank of any of
355 its operands, or the bb_rank, whichever is less.
356 I make no claims that this is optimal, however, it gives good
357 results. */
358
359 /* We make an exception to the normal ranking system to break
360 dependences of accumulator variables in loops. Suppose we
361 have a simple one-block loop containing:
362
363 x_1 = phi(x_0, x_2)
364 b = a + x_1
365 c = b + d
366 x_2 = c + e
367
368 As shown, each iteration of the calculation into x is fully
369 dependent upon the iteration before it. We would prefer to
370 see this in the form:
371
372 x_1 = phi(x_0, x_2)
373 b = a + d
374 c = b + e
375 x_2 = c + x_1
376
377 If the loop is unrolled, the calculations of b and c from
378 different iterations can be interleaved.
379
380 To obtain this result during reassociation, we bias the rank
381 of the phi definition x_1 upward, when it is recognized as an
382 accumulator pattern. The artificial rank causes it to be
383 added last, providing the desired independence. */
384
385 if (TREE_CODE (e) == SSA_NAME)
386 {
387 gimple stmt;
388 long rank;
389 int i, n;
390 tree op;
391
392 if (SSA_NAME_IS_DEFAULT_DEF (e))
393 return find_operand_rank (e);
394
395 stmt = SSA_NAME_DEF_STMT (e);
396 if (gimple_code (stmt) == GIMPLE_PHI)
397 return phi_rank (stmt);
398
399 if (!is_gimple_assign (stmt)
400 || gimple_vdef (stmt))
401 return bb_rank[gimple_bb (stmt)->index];
402
403 /* If we already have a rank for this expression, use that. */
404 rank = find_operand_rank (e);
405 if (rank != -1)
406 return rank;
407
408 /* Otherwise, find the maximum rank for the operands. As an
409 exception, remove the bias from loop-carried phis when propagating
410 the rank so that dependent operations are not also biased. */
411 rank = 0;
412 if (gimple_assign_single_p (stmt))
413 {
414 tree rhs = gimple_assign_rhs1 (stmt);
415 n = TREE_OPERAND_LENGTH (rhs);
416 if (n == 0)
417 rank = propagate_rank (rank, rhs);
418 else
419 {
420 for (i = 0; i < n; i++)
421 {
422 op = TREE_OPERAND (rhs, i);
423
424 if (op != NULL_TREE)
425 rank = propagate_rank (rank, op);
426 }
427 }
428 }
429 else
430 {
431 n = gimple_num_ops (stmt);
432 for (i = 1; i < n; i++)
433 {
434 op = gimple_op (stmt, i);
435 gcc_assert (op);
436 rank = propagate_rank (rank, op);
437 }
438 }
439
440 if (dump_file && (dump_flags & TDF_DETAILS))
441 {
442 fprintf (dump_file, "Rank for ");
443 print_generic_expr (dump_file, e, 0);
444 fprintf (dump_file, " is %ld\n", (rank + 1));
445 }
446
447 /* Note the rank in the hashtable so we don't recompute it. */
448 insert_operand_rank (e, (rank + 1));
449 return (rank + 1);
450 }
451
452 /* Globals, etc, are rank 0 */
453 return 0;
454 }
455
456
457 /* We want integer ones to end up last no matter what, since they are
458 the ones we can do the most with. */
459 #define INTEGER_CONST_TYPE 1 << 3
460 #define FLOAT_CONST_TYPE 1 << 2
461 #define OTHER_CONST_TYPE 1 << 1
462
463 /* Classify an invariant tree into integer, float, or other, so that
464 we can sort them to be near other constants of the same type. */
465 static inline int
466 constant_type (tree t)
467 {
468 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
469 return INTEGER_CONST_TYPE;
470 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
471 return FLOAT_CONST_TYPE;
472 else
473 return OTHER_CONST_TYPE;
474 }
475
476 /* qsort comparison function to sort operand entries PA and PB by rank
477 so that the sorted array is ordered by rank in decreasing order. */
478 static int
479 sort_by_operand_rank (const void *pa, const void *pb)
480 {
481 const operand_entry_t oea = *(const operand_entry_t *)pa;
482 const operand_entry_t oeb = *(const operand_entry_t *)pb;
483
484 /* It's nicer for optimize_expression if constants that are likely
485 to fold when added/multiplied//whatever are put next to each
486 other. Since all constants have rank 0, order them by type. */
487 if (oeb->rank == 0 && oea->rank == 0)
488 {
489 if (constant_type (oeb->op) != constant_type (oea->op))
490 return constant_type (oeb->op) - constant_type (oea->op);
491 else
492 /* To make sorting result stable, we use unique IDs to determine
493 order. */
494 return oeb->id - oea->id;
495 }
496
497 /* Lastly, make sure the versions that are the same go next to each
498 other. We use SSA_NAME_VERSION because it's stable. */
499 if ((oeb->rank - oea->rank == 0)
500 && TREE_CODE (oea->op) == SSA_NAME
501 && TREE_CODE (oeb->op) == SSA_NAME)
502 {
503 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
504 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
505 else
506 return oeb->id - oea->id;
507 }
508
509 if (oeb->rank != oea->rank)
510 return oeb->rank - oea->rank;
511 else
512 return oeb->id - oea->id;
513 }
514
515 /* Add an operand entry to *OPS for the tree operand OP. */
516
517 static void
518 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
519 {
520 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
521
522 oe->op = op;
523 oe->rank = get_rank (op);
524 oe->id = next_operand_entry_id++;
525 oe->count = 1;
526 ops->safe_push (oe);
527 }
528
529 /* Add an operand entry to *OPS for the tree operand OP with repeat
530 count REPEAT. */
531
532 static void
533 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
534 HOST_WIDE_INT repeat)
535 {
536 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
537
538 oe->op = op;
539 oe->rank = get_rank (op);
540 oe->id = next_operand_entry_id++;
541 oe->count = repeat;
542 ops->safe_push (oe);
543
544 reassociate_stats.pows_encountered++;
545 }
546
547 /* Return true if STMT is reassociable operation containing a binary
548 operation with tree code CODE, and is inside LOOP. */
549
550 static bool
551 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
552 {
553 basic_block bb = gimple_bb (stmt);
554
555 if (gimple_bb (stmt) == NULL)
556 return false;
557
558 if (!flow_bb_inside_loop_p (loop, bb))
559 return false;
560
561 if (is_gimple_assign (stmt)
562 && gimple_assign_rhs_code (stmt) == code
563 && has_single_use (gimple_assign_lhs (stmt)))
564 return true;
565
566 return false;
567 }
568
569
570 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
571 operand of the negate operation. Otherwise, return NULL. */
572
573 static tree
574 get_unary_op (tree name, enum tree_code opcode)
575 {
576 gimple stmt = SSA_NAME_DEF_STMT (name);
577
578 if (!is_gimple_assign (stmt))
579 return NULL_TREE;
580
581 if (gimple_assign_rhs_code (stmt) == opcode)
582 return gimple_assign_rhs1 (stmt);
583 return NULL_TREE;
584 }
585
586 /* If CURR and LAST are a pair of ops that OPCODE allows us to
587 eliminate through equivalences, do so, remove them from OPS, and
588 return true. Otherwise, return false. */
589
590 static bool
591 eliminate_duplicate_pair (enum tree_code opcode,
592 vec<operand_entry_t> *ops,
593 bool *all_done,
594 unsigned int i,
595 operand_entry_t curr,
596 operand_entry_t last)
597 {
598
599 /* If we have two of the same op, and the opcode is & |, min, or max,
600 we can eliminate one of them.
601 If we have two of the same op, and the opcode is ^, we can
602 eliminate both of them. */
603
604 if (last && last->op == curr->op)
605 {
606 switch (opcode)
607 {
608 case MAX_EXPR:
609 case MIN_EXPR:
610 case BIT_IOR_EXPR:
611 case BIT_AND_EXPR:
612 if (dump_file && (dump_flags & TDF_DETAILS))
613 {
614 fprintf (dump_file, "Equivalence: ");
615 print_generic_expr (dump_file, curr->op, 0);
616 fprintf (dump_file, " [&|minmax] ");
617 print_generic_expr (dump_file, last->op, 0);
618 fprintf (dump_file, " -> ");
619 print_generic_stmt (dump_file, last->op, 0);
620 }
621
622 ops->ordered_remove (i);
623 reassociate_stats.ops_eliminated ++;
624
625 return true;
626
627 case BIT_XOR_EXPR:
628 if (dump_file && (dump_flags & TDF_DETAILS))
629 {
630 fprintf (dump_file, "Equivalence: ");
631 print_generic_expr (dump_file, curr->op, 0);
632 fprintf (dump_file, " ^ ");
633 print_generic_expr (dump_file, last->op, 0);
634 fprintf (dump_file, " -> nothing\n");
635 }
636
637 reassociate_stats.ops_eliminated += 2;
638
639 if (ops->length () == 2)
640 {
641 ops->create (0);
642 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
643 *all_done = true;
644 }
645 else
646 {
647 ops->ordered_remove (i-1);
648 ops->ordered_remove (i-1);
649 }
650
651 return true;
652
653 default:
654 break;
655 }
656 }
657 return false;
658 }
659
660 static vec<tree> plus_negates;
661
662 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
663 expression, look in OPS for a corresponding positive operation to cancel
664 it out. If we find one, remove the other from OPS, replace
665 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
666 return false. */
667
668 static bool
669 eliminate_plus_minus_pair (enum tree_code opcode,
670 vec<operand_entry_t> *ops,
671 unsigned int currindex,
672 operand_entry_t curr)
673 {
674 tree negateop;
675 tree notop;
676 unsigned int i;
677 operand_entry_t oe;
678
679 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
680 return false;
681
682 negateop = get_unary_op (curr->op, NEGATE_EXPR);
683 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
684 if (negateop == NULL_TREE && notop == NULL_TREE)
685 return false;
686
687 /* Any non-negated version will have a rank that is one less than
688 the current rank. So once we hit those ranks, if we don't find
689 one, we can stop. */
690
691 for (i = currindex + 1;
692 ops->iterate (i, &oe)
693 && oe->rank >= curr->rank - 1 ;
694 i++)
695 {
696 if (oe->op == negateop)
697 {
698
699 if (dump_file && (dump_flags & TDF_DETAILS))
700 {
701 fprintf (dump_file, "Equivalence: ");
702 print_generic_expr (dump_file, negateop, 0);
703 fprintf (dump_file, " + -");
704 print_generic_expr (dump_file, oe->op, 0);
705 fprintf (dump_file, " -> 0\n");
706 }
707
708 ops->ordered_remove (i);
709 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
710 ops->ordered_remove (currindex);
711 reassociate_stats.ops_eliminated ++;
712
713 return true;
714 }
715 else if (oe->op == notop)
716 {
717 tree op_type = TREE_TYPE (oe->op);
718
719 if (dump_file && (dump_flags & TDF_DETAILS))
720 {
721 fprintf (dump_file, "Equivalence: ");
722 print_generic_expr (dump_file, notop, 0);
723 fprintf (dump_file, " + ~");
724 print_generic_expr (dump_file, oe->op, 0);
725 fprintf (dump_file, " -> -1\n");
726 }
727
728 ops->ordered_remove (i);
729 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
730 ops->ordered_remove (currindex);
731 reassociate_stats.ops_eliminated ++;
732
733 return true;
734 }
735 }
736
737 /* CURR->OP is a negate expr in a plus expr: save it for later
738 inspection in repropagate_negates(). */
739 if (negateop != NULL_TREE)
740 plus_negates.safe_push (curr->op);
741
742 return false;
743 }
744
745 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
746 bitwise not expression, look in OPS for a corresponding operand to
747 cancel it out. If we find one, remove the other from OPS, replace
748 OPS[CURRINDEX] with 0, and return true. Otherwise, return
749 false. */
750
751 static bool
752 eliminate_not_pairs (enum tree_code opcode,
753 vec<operand_entry_t> *ops,
754 unsigned int currindex,
755 operand_entry_t curr)
756 {
757 tree notop;
758 unsigned int i;
759 operand_entry_t oe;
760
761 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
762 || TREE_CODE (curr->op) != SSA_NAME)
763 return false;
764
765 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
766 if (notop == NULL_TREE)
767 return false;
768
769 /* Any non-not version will have a rank that is one less than
770 the current rank. So once we hit those ranks, if we don't find
771 one, we can stop. */
772
773 for (i = currindex + 1;
774 ops->iterate (i, &oe)
775 && oe->rank >= curr->rank - 1;
776 i++)
777 {
778 if (oe->op == notop)
779 {
780 if (dump_file && (dump_flags & TDF_DETAILS))
781 {
782 fprintf (dump_file, "Equivalence: ");
783 print_generic_expr (dump_file, notop, 0);
784 if (opcode == BIT_AND_EXPR)
785 fprintf (dump_file, " & ~");
786 else if (opcode == BIT_IOR_EXPR)
787 fprintf (dump_file, " | ~");
788 print_generic_expr (dump_file, oe->op, 0);
789 if (opcode == BIT_AND_EXPR)
790 fprintf (dump_file, " -> 0\n");
791 else if (opcode == BIT_IOR_EXPR)
792 fprintf (dump_file, " -> -1\n");
793 }
794
795 if (opcode == BIT_AND_EXPR)
796 oe->op = build_zero_cst (TREE_TYPE (oe->op));
797 else if (opcode == BIT_IOR_EXPR)
798 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
799 TYPE_PRECISION (TREE_TYPE (oe->op)));
800
801 reassociate_stats.ops_eliminated += ops->length () - 1;
802 ops->truncate (0);
803 ops->quick_push (oe);
804 return true;
805 }
806 }
807
808 return false;
809 }
810
811 /* Use constant value that may be present in OPS to try to eliminate
812 operands. Note that this function is only really used when we've
813 eliminated ops for other reasons, or merged constants. Across
814 single statements, fold already does all of this, plus more. There
815 is little point in duplicating logic, so I've only included the
816 identities that I could ever construct testcases to trigger. */
817
818 static void
819 eliminate_using_constants (enum tree_code opcode,
820 vec<operand_entry_t> *ops)
821 {
822 operand_entry_t oelast = ops->last ();
823 tree type = TREE_TYPE (oelast->op);
824
825 if (oelast->rank == 0
826 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
827 {
828 switch (opcode)
829 {
830 case BIT_AND_EXPR:
831 if (integer_zerop (oelast->op))
832 {
833 if (ops->length () != 1)
834 {
835 if (dump_file && (dump_flags & TDF_DETAILS))
836 fprintf (dump_file, "Found & 0, removing all other ops\n");
837
838 reassociate_stats.ops_eliminated += ops->length () - 1;
839
840 ops->truncate (0);
841 ops->quick_push (oelast);
842 return;
843 }
844 }
845 else if (integer_all_onesp (oelast->op))
846 {
847 if (ops->length () != 1)
848 {
849 if (dump_file && (dump_flags & TDF_DETAILS))
850 fprintf (dump_file, "Found & -1, removing\n");
851 ops->pop ();
852 reassociate_stats.ops_eliminated++;
853 }
854 }
855 break;
856 case BIT_IOR_EXPR:
857 if (integer_all_onesp (oelast->op))
858 {
859 if (ops->length () != 1)
860 {
861 if (dump_file && (dump_flags & TDF_DETAILS))
862 fprintf (dump_file, "Found | -1, removing all other ops\n");
863
864 reassociate_stats.ops_eliminated += ops->length () - 1;
865
866 ops->truncate (0);
867 ops->quick_push (oelast);
868 return;
869 }
870 }
871 else if (integer_zerop (oelast->op))
872 {
873 if (ops->length () != 1)
874 {
875 if (dump_file && (dump_flags & TDF_DETAILS))
876 fprintf (dump_file, "Found | 0, removing\n");
877 ops->pop ();
878 reassociate_stats.ops_eliminated++;
879 }
880 }
881 break;
882 case MULT_EXPR:
883 if (integer_zerop (oelast->op)
884 || (FLOAT_TYPE_P (type)
885 && !HONOR_NANS (TYPE_MODE (type))
886 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
887 && real_zerop (oelast->op)))
888 {
889 if (ops->length () != 1)
890 {
891 if (dump_file && (dump_flags & TDF_DETAILS))
892 fprintf (dump_file, "Found * 0, removing all other ops\n");
893
894 reassociate_stats.ops_eliminated += ops->length () - 1;
895 ops->truncate (1);
896 ops->quick_push (oelast);
897 return;
898 }
899 }
900 else if (integer_onep (oelast->op)
901 || (FLOAT_TYPE_P (type)
902 && !HONOR_SNANS (TYPE_MODE (type))
903 && real_onep (oelast->op)))
904 {
905 if (ops->length () != 1)
906 {
907 if (dump_file && (dump_flags & TDF_DETAILS))
908 fprintf (dump_file, "Found * 1, removing\n");
909 ops->pop ();
910 reassociate_stats.ops_eliminated++;
911 return;
912 }
913 }
914 break;
915 case BIT_XOR_EXPR:
916 case PLUS_EXPR:
917 case MINUS_EXPR:
918 if (integer_zerop (oelast->op)
919 || (FLOAT_TYPE_P (type)
920 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
921 && fold_real_zero_addition_p (type, oelast->op,
922 opcode == MINUS_EXPR)))
923 {
924 if (ops->length () != 1)
925 {
926 if (dump_file && (dump_flags & TDF_DETAILS))
927 fprintf (dump_file, "Found [|^+] 0, removing\n");
928 ops->pop ();
929 reassociate_stats.ops_eliminated++;
930 return;
931 }
932 }
933 break;
934 default:
935 break;
936 }
937 }
938 }
939
940
941 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
942 bool, bool);
943
944 /* Structure for tracking and counting operands. */
945 typedef struct oecount_s {
946 int cnt;
947 int id;
948 enum tree_code oecode;
949 tree op;
950 } oecount;
951
952
953 /* The heap for the oecount hashtable and the sorted list of operands. */
954 static vec<oecount> cvec;
955
956
957 /* Oecount hashtable helpers. */
958
959 struct oecount_hasher : typed_noop_remove <void>
960 {
961 /* Note that this hash table stores integers, not pointers.
962 So, observe the casting in the member functions. */
963 typedef void value_type;
964 typedef void compare_type;
965 static inline hashval_t hash (const value_type *);
966 static inline bool equal (const value_type *, const compare_type *);
967 };
968
969 /* Hash function for oecount. */
970
971 inline hashval_t
972 oecount_hasher::hash (const value_type *p)
973 {
974 const oecount *c = &cvec[(size_t)p - 42];
975 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
976 }
977
978 /* Comparison function for oecount. */
979
980 inline bool
981 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
982 {
983 const oecount *c1 = &cvec[(size_t)p1 - 42];
984 const oecount *c2 = &cvec[(size_t)p2 - 42];
985 return (c1->oecode == c2->oecode
986 && c1->op == c2->op);
987 }
988
989 /* Comparison function for qsort sorting oecount elements by count. */
990
991 static int
992 oecount_cmp (const void *p1, const void *p2)
993 {
994 const oecount *c1 = (const oecount *)p1;
995 const oecount *c2 = (const oecount *)p2;
996 if (c1->cnt != c2->cnt)
997 return c1->cnt - c2->cnt;
998 else
999 /* If counts are identical, use unique IDs to stabilize qsort. */
1000 return c1->id - c2->id;
1001 }
1002
1003 /* Return TRUE iff STMT represents a builtin call that raises OP
1004 to some exponent. */
1005
1006 static bool
1007 stmt_is_power_of_op (gimple stmt, tree op)
1008 {
1009 tree fndecl;
1010
1011 if (!is_gimple_call (stmt))
1012 return false;
1013
1014 fndecl = gimple_call_fndecl (stmt);
1015
1016 if (!fndecl
1017 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1018 return false;
1019
1020 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1021 {
1022 CASE_FLT_FN (BUILT_IN_POW):
1023 CASE_FLT_FN (BUILT_IN_POWI):
1024 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1025
1026 default:
1027 return false;
1028 }
1029 }
1030
1031 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1032 in place and return the result. Assumes that stmt_is_power_of_op
1033 was previously called for STMT and returned TRUE. */
1034
1035 static HOST_WIDE_INT
1036 decrement_power (gimple stmt)
1037 {
1038 REAL_VALUE_TYPE c, cint;
1039 HOST_WIDE_INT power;
1040 tree arg1;
1041
1042 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1043 {
1044 CASE_FLT_FN (BUILT_IN_POW):
1045 arg1 = gimple_call_arg (stmt, 1);
1046 c = TREE_REAL_CST (arg1);
1047 power = real_to_integer (&c) - 1;
1048 real_from_integer (&cint, VOIDmode, power, 0, 0);
1049 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1050 return power;
1051
1052 CASE_FLT_FN (BUILT_IN_POWI):
1053 arg1 = gimple_call_arg (stmt, 1);
1054 power = TREE_INT_CST_LOW (arg1) - 1;
1055 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1056 return power;
1057
1058 default:
1059 gcc_unreachable ();
1060 }
1061 }
1062
1063 /* Find the single immediate use of STMT's LHS, and replace it
1064 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1065 replace *DEF with OP as well. */
1066
1067 static void
1068 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1069 {
1070 tree lhs;
1071 gimple use_stmt;
1072 use_operand_p use;
1073 gimple_stmt_iterator gsi;
1074
1075 if (is_gimple_call (stmt))
1076 lhs = gimple_call_lhs (stmt);
1077 else
1078 lhs = gimple_assign_lhs (stmt);
1079
1080 gcc_assert (has_single_use (lhs));
1081 single_imm_use (lhs, &use, &use_stmt);
1082 if (lhs == *def)
1083 *def = op;
1084 SET_USE (use, op);
1085 if (TREE_CODE (op) != SSA_NAME)
1086 update_stmt (use_stmt);
1087 gsi = gsi_for_stmt (stmt);
1088 unlink_stmt_vdef (stmt);
1089 gsi_remove (&gsi, true);
1090 release_defs (stmt);
1091 }
1092
1093 /* Walks the linear chain with result *DEF searching for an operation
1094 with operand OP and code OPCODE removing that from the chain. *DEF
1095 is updated if there is only one operand but no operation left. */
1096
1097 static void
1098 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1099 {
1100 gimple stmt = SSA_NAME_DEF_STMT (*def);
1101
1102 do
1103 {
1104 tree name;
1105
1106 if (opcode == MULT_EXPR
1107 && stmt_is_power_of_op (stmt, op))
1108 {
1109 if (decrement_power (stmt) == 1)
1110 propagate_op_to_single_use (op, stmt, def);
1111 return;
1112 }
1113
1114 name = gimple_assign_rhs1 (stmt);
1115
1116 /* If this is the operation we look for and one of the operands
1117 is ours simply propagate the other operand into the stmts
1118 single use. */
1119 if (gimple_assign_rhs_code (stmt) == opcode
1120 && (name == op
1121 || gimple_assign_rhs2 (stmt) == op))
1122 {
1123 if (name == op)
1124 name = gimple_assign_rhs2 (stmt);
1125 propagate_op_to_single_use (name, stmt, def);
1126 return;
1127 }
1128
1129 /* We might have a multiply of two __builtin_pow* calls, and
1130 the operand might be hiding in the rightmost one. */
1131 if (opcode == MULT_EXPR
1132 && gimple_assign_rhs_code (stmt) == opcode
1133 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1134 && has_single_use (gimple_assign_rhs2 (stmt)))
1135 {
1136 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1137 if (stmt_is_power_of_op (stmt2, op))
1138 {
1139 if (decrement_power (stmt2) == 1)
1140 propagate_op_to_single_use (op, stmt2, def);
1141 return;
1142 }
1143 }
1144
1145 /* Continue walking the chain. */
1146 gcc_assert (name != op
1147 && TREE_CODE (name) == SSA_NAME);
1148 stmt = SSA_NAME_DEF_STMT (name);
1149 }
1150 while (1);
1151 }
1152
1153 /* Returns the UID of STMT if it is non-NULL. Otherwise return 1. */
1154
1155 static inline unsigned
1156 get_stmt_uid_with_default (gimple stmt)
1157 {
1158 return stmt ? gimple_uid (stmt) : 1;
1159 }
1160
1161 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1162 the result. Places the statement after the definition of either
1163 OP1 or OP2. Returns the new statement. */
1164
1165 static gimple
1166 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1167 {
1168 gimple op1def = NULL, op2def = NULL;
1169 gimple_stmt_iterator gsi;
1170 tree op;
1171 gimple sum;
1172
1173 /* Create the addition statement. */
1174 op = make_ssa_name (type, NULL);
1175 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1176
1177 /* Find an insertion place and insert. */
1178 if (TREE_CODE (op1) == SSA_NAME)
1179 op1def = SSA_NAME_DEF_STMT (op1);
1180 if (TREE_CODE (op2) == SSA_NAME)
1181 op2def = SSA_NAME_DEF_STMT (op2);
1182 if ((!op1def || gimple_nop_p (op1def))
1183 && (!op2def || gimple_nop_p (op2def)))
1184 {
1185 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1186 gimple_set_uid (sum, get_stmt_uid_with_default (gsi_stmt (gsi)));
1187 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1188 }
1189 else if ((!op1def || gimple_nop_p (op1def))
1190 || (op2def && !gimple_nop_p (op2def)
1191 && stmt_dominates_stmt_p (op1def, op2def)))
1192 {
1193 if (gimple_code (op2def) == GIMPLE_PHI)
1194 {
1195 gsi = gsi_after_labels (gimple_bb (op2def));
1196 gimple_set_uid (sum, get_stmt_uid_with_default (gsi_stmt (gsi)));
1197 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1198 }
1199 else
1200 {
1201 if (!stmt_ends_bb_p (op2def))
1202 {
1203 gsi = gsi_for_stmt (op2def);
1204 gimple_set_uid (sum, gimple_uid (op2def));
1205 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1206 }
1207 else
1208 {
1209 edge e;
1210 edge_iterator ei;
1211
1212 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1213 if (e->flags & EDGE_FALLTHRU)
1214 gsi_insert_on_edge_immediate (e, sum);
1215 }
1216 }
1217 }
1218 else
1219 {
1220 if (gimple_code (op1def) == GIMPLE_PHI)
1221 {
1222 gsi = gsi_after_labels (gimple_bb (op1def));
1223 gimple_set_uid (sum, get_stmt_uid_with_default (gsi_stmt (gsi)));
1224 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1225 }
1226 else
1227 {
1228 if (!stmt_ends_bb_p (op1def))
1229 {
1230 gsi = gsi_for_stmt (op1def);
1231 gimple_set_uid (sum, gimple_uid (op1def));
1232 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1233 }
1234 else
1235 {
1236 edge e;
1237 edge_iterator ei;
1238
1239 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1240 if (e->flags & EDGE_FALLTHRU)
1241 gsi_insert_on_edge_immediate (e, sum);
1242 }
1243 }
1244 }
1245 update_stmt (sum);
1246
1247 return sum;
1248 }
1249
1250 /* Perform un-distribution of divisions and multiplications.
1251 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1252 to (A + B) / X for real X.
1253
1254 The algorithm is organized as follows.
1255
1256 - First we walk the addition chain *OPS looking for summands that
1257 are defined by a multiplication or a real division. This results
1258 in the candidates bitmap with relevant indices into *OPS.
1259
1260 - Second we build the chains of multiplications or divisions for
1261 these candidates, counting the number of occurrences of (operand, code)
1262 pairs in all of the candidates chains.
1263
1264 - Third we sort the (operand, code) pairs by number of occurrence and
1265 process them starting with the pair with the most uses.
1266
1267 * For each such pair we walk the candidates again to build a
1268 second candidate bitmap noting all multiplication/division chains
1269 that have at least one occurrence of (operand, code).
1270
1271 * We build an alternate addition chain only covering these
1272 candidates with one (operand, code) operation removed from their
1273 multiplication/division chain.
1274
1275 * The first candidate gets replaced by the alternate addition chain
1276 multiplied/divided by the operand.
1277
1278 * All candidate chains get disabled for further processing and
1279 processing of (operand, code) pairs continues.
1280
1281 The alternate addition chains built are re-processed by the main
1282 reassociation algorithm which allows optimizing a * x * y + b * y * x
1283 to (a + b ) * x * y in one invocation of the reassociation pass. */
1284
1285 static bool
1286 undistribute_ops_list (enum tree_code opcode,
1287 vec<operand_entry_t> *ops, struct loop *loop)
1288 {
1289 unsigned int length = ops->length ();
1290 operand_entry_t oe1;
1291 unsigned i, j;
1292 sbitmap candidates, candidates2;
1293 unsigned nr_candidates, nr_candidates2;
1294 sbitmap_iterator sbi0;
1295 vec<operand_entry_t> *subops;
1296 hash_table <oecount_hasher> ctable;
1297 bool changed = false;
1298 int next_oecount_id = 0;
1299
1300 if (length <= 1
1301 || opcode != PLUS_EXPR)
1302 return false;
1303
1304 /* Build a list of candidates to process. */
1305 candidates = sbitmap_alloc (length);
1306 bitmap_clear (candidates);
1307 nr_candidates = 0;
1308 FOR_EACH_VEC_ELT (*ops, i, oe1)
1309 {
1310 enum tree_code dcode;
1311 gimple oe1def;
1312
1313 if (TREE_CODE (oe1->op) != SSA_NAME)
1314 continue;
1315 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1316 if (!is_gimple_assign (oe1def))
1317 continue;
1318 dcode = gimple_assign_rhs_code (oe1def);
1319 if ((dcode != MULT_EXPR
1320 && dcode != RDIV_EXPR)
1321 || !is_reassociable_op (oe1def, dcode, loop))
1322 continue;
1323
1324 bitmap_set_bit (candidates, i);
1325 nr_candidates++;
1326 }
1327
1328 if (nr_candidates < 2)
1329 {
1330 sbitmap_free (candidates);
1331 return false;
1332 }
1333
1334 if (dump_file && (dump_flags & TDF_DETAILS))
1335 {
1336 fprintf (dump_file, "searching for un-distribute opportunities ");
1337 print_generic_expr (dump_file,
1338 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1339 fprintf (dump_file, " %d\n", nr_candidates);
1340 }
1341
1342 /* Build linearized sub-operand lists and the counting table. */
1343 cvec.create (0);
1344 ctable.create (15);
1345 /* ??? Macro arguments cannot have multi-argument template types in
1346 them. This typedef is needed to workaround that limitation. */
1347 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1348 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1349 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1350 {
1351 gimple oedef;
1352 enum tree_code oecode;
1353 unsigned j;
1354
1355 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1356 oecode = gimple_assign_rhs_code (oedef);
1357 linearize_expr_tree (&subops[i], oedef,
1358 associative_tree_code (oecode), false);
1359
1360 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1361 {
1362 oecount c;
1363 void **slot;
1364 size_t idx;
1365 c.oecode = oecode;
1366 c.cnt = 1;
1367 c.id = next_oecount_id++;
1368 c.op = oe1->op;
1369 cvec.safe_push (c);
1370 idx = cvec.length () + 41;
1371 slot = ctable.find_slot ((void *)idx, INSERT);
1372 if (!*slot)
1373 {
1374 *slot = (void *)idx;
1375 }
1376 else
1377 {
1378 cvec.pop ();
1379 cvec[(size_t)*slot - 42].cnt++;
1380 }
1381 }
1382 }
1383 ctable.dispose ();
1384
1385 /* Sort the counting table. */
1386 cvec.qsort (oecount_cmp);
1387
1388 if (dump_file && (dump_flags & TDF_DETAILS))
1389 {
1390 oecount *c;
1391 fprintf (dump_file, "Candidates:\n");
1392 FOR_EACH_VEC_ELT (cvec, j, c)
1393 {
1394 fprintf (dump_file, " %u %s: ", c->cnt,
1395 c->oecode == MULT_EXPR
1396 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1397 print_generic_expr (dump_file, c->op, 0);
1398 fprintf (dump_file, "\n");
1399 }
1400 }
1401
1402 /* Process the (operand, code) pairs in order of most occurrence. */
1403 candidates2 = sbitmap_alloc (length);
1404 while (!cvec.is_empty ())
1405 {
1406 oecount *c = &cvec.last ();
1407 if (c->cnt < 2)
1408 break;
1409
1410 /* Now collect the operands in the outer chain that contain
1411 the common operand in their inner chain. */
1412 bitmap_clear (candidates2);
1413 nr_candidates2 = 0;
1414 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1415 {
1416 gimple oedef;
1417 enum tree_code oecode;
1418 unsigned j;
1419 tree op = (*ops)[i]->op;
1420
1421 /* If we undistributed in this chain already this may be
1422 a constant. */
1423 if (TREE_CODE (op) != SSA_NAME)
1424 continue;
1425
1426 oedef = SSA_NAME_DEF_STMT (op);
1427 oecode = gimple_assign_rhs_code (oedef);
1428 if (oecode != c->oecode)
1429 continue;
1430
1431 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1432 {
1433 if (oe1->op == c->op)
1434 {
1435 bitmap_set_bit (candidates2, i);
1436 ++nr_candidates2;
1437 break;
1438 }
1439 }
1440 }
1441
1442 if (nr_candidates2 >= 2)
1443 {
1444 operand_entry_t oe1, oe2;
1445 gimple prod;
1446 int first = bitmap_first_set_bit (candidates2);
1447
1448 /* Build the new addition chain. */
1449 oe1 = (*ops)[first];
1450 if (dump_file && (dump_flags & TDF_DETAILS))
1451 {
1452 fprintf (dump_file, "Building (");
1453 print_generic_expr (dump_file, oe1->op, 0);
1454 }
1455 zero_one_operation (&oe1->op, c->oecode, c->op);
1456 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1457 {
1458 gimple sum;
1459 oe2 = (*ops)[i];
1460 if (dump_file && (dump_flags & TDF_DETAILS))
1461 {
1462 fprintf (dump_file, " + ");
1463 print_generic_expr (dump_file, oe2->op, 0);
1464 }
1465 zero_one_operation (&oe2->op, c->oecode, c->op);
1466 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1467 oe1->op, oe2->op, opcode);
1468 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1469 oe2->rank = 0;
1470 oe1->op = gimple_get_lhs (sum);
1471 }
1472
1473 /* Apply the multiplication/division. */
1474 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1475 oe1->op, c->op, c->oecode);
1476 if (dump_file && (dump_flags & TDF_DETAILS))
1477 {
1478 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1479 print_generic_expr (dump_file, c->op, 0);
1480 fprintf (dump_file, "\n");
1481 }
1482
1483 /* Record it in the addition chain and disable further
1484 undistribution with this op. */
1485 oe1->op = gimple_assign_lhs (prod);
1486 oe1->rank = get_rank (oe1->op);
1487 subops[first].release ();
1488
1489 changed = true;
1490 }
1491
1492 cvec.pop ();
1493 }
1494
1495 for (i = 0; i < ops->length (); ++i)
1496 subops[i].release ();
1497 free (subops);
1498 cvec.release ();
1499 sbitmap_free (candidates);
1500 sbitmap_free (candidates2);
1501
1502 return changed;
1503 }
1504
1505 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1506 expression, examine the other OPS to see if any of them are comparisons
1507 of the same values, which we may be able to combine or eliminate.
1508 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1509
1510 static bool
1511 eliminate_redundant_comparison (enum tree_code opcode,
1512 vec<operand_entry_t> *ops,
1513 unsigned int currindex,
1514 operand_entry_t curr)
1515 {
1516 tree op1, op2;
1517 enum tree_code lcode, rcode;
1518 gimple def1, def2;
1519 int i;
1520 operand_entry_t oe;
1521
1522 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1523 return false;
1524
1525 /* Check that CURR is a comparison. */
1526 if (TREE_CODE (curr->op) != SSA_NAME)
1527 return false;
1528 def1 = SSA_NAME_DEF_STMT (curr->op);
1529 if (!is_gimple_assign (def1))
1530 return false;
1531 lcode = gimple_assign_rhs_code (def1);
1532 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1533 return false;
1534 op1 = gimple_assign_rhs1 (def1);
1535 op2 = gimple_assign_rhs2 (def1);
1536
1537 /* Now look for a similar comparison in the remaining OPS. */
1538 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1539 {
1540 tree t;
1541
1542 if (TREE_CODE (oe->op) != SSA_NAME)
1543 continue;
1544 def2 = SSA_NAME_DEF_STMT (oe->op);
1545 if (!is_gimple_assign (def2))
1546 continue;
1547 rcode = gimple_assign_rhs_code (def2);
1548 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1549 continue;
1550
1551 /* If we got here, we have a match. See if we can combine the
1552 two comparisons. */
1553 if (opcode == BIT_IOR_EXPR)
1554 t = maybe_fold_or_comparisons (lcode, op1, op2,
1555 rcode, gimple_assign_rhs1 (def2),
1556 gimple_assign_rhs2 (def2));
1557 else
1558 t = maybe_fold_and_comparisons (lcode, op1, op2,
1559 rcode, gimple_assign_rhs1 (def2),
1560 gimple_assign_rhs2 (def2));
1561 if (!t)
1562 continue;
1563
1564 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1565 always give us a boolean_type_node value back. If the original
1566 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1567 we need to convert. */
1568 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1569 t = fold_convert (TREE_TYPE (curr->op), t);
1570
1571 if (TREE_CODE (t) != INTEGER_CST
1572 && !operand_equal_p (t, curr->op, 0))
1573 {
1574 enum tree_code subcode;
1575 tree newop1, newop2;
1576 if (!COMPARISON_CLASS_P (t))
1577 continue;
1578 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1579 STRIP_USELESS_TYPE_CONVERSION (newop1);
1580 STRIP_USELESS_TYPE_CONVERSION (newop2);
1581 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1582 continue;
1583 }
1584
1585 if (dump_file && (dump_flags & TDF_DETAILS))
1586 {
1587 fprintf (dump_file, "Equivalence: ");
1588 print_generic_expr (dump_file, curr->op, 0);
1589 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1590 print_generic_expr (dump_file, oe->op, 0);
1591 fprintf (dump_file, " -> ");
1592 print_generic_expr (dump_file, t, 0);
1593 fprintf (dump_file, "\n");
1594 }
1595
1596 /* Now we can delete oe, as it has been subsumed by the new combined
1597 expression t. */
1598 ops->ordered_remove (i);
1599 reassociate_stats.ops_eliminated ++;
1600
1601 /* If t is the same as curr->op, we're done. Otherwise we must
1602 replace curr->op with t. Special case is if we got a constant
1603 back, in which case we add it to the end instead of in place of
1604 the current entry. */
1605 if (TREE_CODE (t) == INTEGER_CST)
1606 {
1607 ops->ordered_remove (currindex);
1608 add_to_ops_vec (ops, t);
1609 }
1610 else if (!operand_equal_p (t, curr->op, 0))
1611 {
1612 gimple sum;
1613 enum tree_code subcode;
1614 tree newop1;
1615 tree newop2;
1616 gcc_assert (COMPARISON_CLASS_P (t));
1617 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1618 STRIP_USELESS_TYPE_CONVERSION (newop1);
1619 STRIP_USELESS_TYPE_CONVERSION (newop2);
1620 gcc_checking_assert (is_gimple_val (newop1)
1621 && is_gimple_val (newop2));
1622 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1623 curr->op = gimple_get_lhs (sum);
1624 }
1625 return true;
1626 }
1627
1628 return false;
1629 }
1630
1631 /* Perform various identities and other optimizations on the list of
1632 operand entries, stored in OPS. The tree code for the binary
1633 operation between all the operands is OPCODE. */
1634
1635 static void
1636 optimize_ops_list (enum tree_code opcode,
1637 vec<operand_entry_t> *ops)
1638 {
1639 unsigned int length = ops->length ();
1640 unsigned int i;
1641 operand_entry_t oe;
1642 operand_entry_t oelast = NULL;
1643 bool iterate = false;
1644
1645 if (length == 1)
1646 return;
1647
1648 oelast = ops->last ();
1649
1650 /* If the last two are constants, pop the constants off, merge them
1651 and try the next two. */
1652 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1653 {
1654 operand_entry_t oelm1 = (*ops)[length - 2];
1655
1656 if (oelm1->rank == 0
1657 && is_gimple_min_invariant (oelm1->op)
1658 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1659 TREE_TYPE (oelast->op)))
1660 {
1661 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1662 oelm1->op, oelast->op);
1663
1664 if (folded && is_gimple_min_invariant (folded))
1665 {
1666 if (dump_file && (dump_flags & TDF_DETAILS))
1667 fprintf (dump_file, "Merging constants\n");
1668
1669 ops->pop ();
1670 ops->pop ();
1671
1672 add_to_ops_vec (ops, folded);
1673 reassociate_stats.constants_eliminated++;
1674
1675 optimize_ops_list (opcode, ops);
1676 return;
1677 }
1678 }
1679 }
1680
1681 eliminate_using_constants (opcode, ops);
1682 oelast = NULL;
1683
1684 for (i = 0; ops->iterate (i, &oe);)
1685 {
1686 bool done = false;
1687
1688 if (eliminate_not_pairs (opcode, ops, i, oe))
1689 return;
1690 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1691 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1692 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1693 {
1694 if (done)
1695 return;
1696 iterate = true;
1697 oelast = NULL;
1698 continue;
1699 }
1700 oelast = oe;
1701 i++;
1702 }
1703
1704 length = ops->length ();
1705 oelast = ops->last ();
1706
1707 if (iterate)
1708 optimize_ops_list (opcode, ops);
1709 }
1710
1711 /* The following functions are subroutines to optimize_range_tests and allow
1712 it to try to change a logical combination of comparisons into a range
1713 test.
1714
1715 For example, both
1716 X == 2 || X == 5 || X == 3 || X == 4
1717 and
1718 X >= 2 && X <= 5
1719 are converted to
1720 (unsigned) (X - 2) <= 3
1721
1722 For more information see comments above fold_test_range in fold-const.c,
1723 this implementation is for GIMPLE. */
1724
1725 struct range_entry
1726 {
1727 tree exp;
1728 tree low;
1729 tree high;
1730 bool in_p;
1731 bool strict_overflow_p;
1732 unsigned int idx, next;
1733 };
1734
1735 /* This is similar to make_range in fold-const.c, but on top of
1736 GIMPLE instead of trees. If EXP is non-NULL, it should be
1737 an SSA_NAME and STMT argument is ignored, otherwise STMT
1738 argument should be a GIMPLE_COND. */
1739
1740 static void
1741 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1742 {
1743 int in_p;
1744 tree low, high;
1745 bool is_bool, strict_overflow_p;
1746
1747 r->exp = NULL_TREE;
1748 r->in_p = false;
1749 r->strict_overflow_p = false;
1750 r->low = NULL_TREE;
1751 r->high = NULL_TREE;
1752 if (exp != NULL_TREE
1753 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1754 return;
1755
1756 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1757 and see if we can refine the range. Some of the cases below may not
1758 happen, but it doesn't seem worth worrying about this. We "continue"
1759 the outer loop when we've changed something; otherwise we "break"
1760 the switch, which will "break" the while. */
1761 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1762 high = low;
1763 in_p = 0;
1764 strict_overflow_p = false;
1765 is_bool = false;
1766 if (exp == NULL_TREE)
1767 is_bool = true;
1768 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1769 {
1770 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1771 is_bool = true;
1772 else
1773 return;
1774 }
1775 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1776 is_bool = true;
1777
1778 while (1)
1779 {
1780 enum tree_code code;
1781 tree arg0, arg1, exp_type;
1782 tree nexp;
1783 location_t loc;
1784
1785 if (exp != NULL_TREE)
1786 {
1787 if (TREE_CODE (exp) != SSA_NAME)
1788 break;
1789
1790 stmt = SSA_NAME_DEF_STMT (exp);
1791 if (!is_gimple_assign (stmt))
1792 break;
1793
1794 code = gimple_assign_rhs_code (stmt);
1795 arg0 = gimple_assign_rhs1 (stmt);
1796 arg1 = gimple_assign_rhs2 (stmt);
1797 exp_type = TREE_TYPE (exp);
1798 }
1799 else
1800 {
1801 code = gimple_cond_code (stmt);
1802 arg0 = gimple_cond_lhs (stmt);
1803 arg1 = gimple_cond_rhs (stmt);
1804 exp_type = boolean_type_node;
1805 }
1806
1807 if (TREE_CODE (arg0) != SSA_NAME)
1808 break;
1809 loc = gimple_location (stmt);
1810 switch (code)
1811 {
1812 case BIT_NOT_EXPR:
1813 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1814 /* Ensure the range is either +[-,0], +[0,0],
1815 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1816 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1817 or similar expression of unconditional true or
1818 false, it should not be negated. */
1819 && ((high && integer_zerop (high))
1820 || (low && integer_onep (low))))
1821 {
1822 in_p = !in_p;
1823 exp = arg0;
1824 continue;
1825 }
1826 break;
1827 case SSA_NAME:
1828 exp = arg0;
1829 continue;
1830 CASE_CONVERT:
1831 if (is_bool)
1832 goto do_default;
1833 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1834 {
1835 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1836 is_bool = true;
1837 else
1838 return;
1839 }
1840 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1841 is_bool = true;
1842 goto do_default;
1843 case EQ_EXPR:
1844 case NE_EXPR:
1845 case LT_EXPR:
1846 case LE_EXPR:
1847 case GE_EXPR:
1848 case GT_EXPR:
1849 is_bool = true;
1850 /* FALLTHRU */
1851 default:
1852 if (!is_bool)
1853 return;
1854 do_default:
1855 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1856 &low, &high, &in_p,
1857 &strict_overflow_p);
1858 if (nexp != NULL_TREE)
1859 {
1860 exp = nexp;
1861 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1862 continue;
1863 }
1864 break;
1865 }
1866 break;
1867 }
1868 if (is_bool)
1869 {
1870 r->exp = exp;
1871 r->in_p = in_p;
1872 r->low = low;
1873 r->high = high;
1874 r->strict_overflow_p = strict_overflow_p;
1875 }
1876 }
1877
1878 /* Comparison function for qsort. Sort entries
1879 without SSA_NAME exp first, then with SSA_NAMEs sorted
1880 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1881 by increasing ->low and if ->low is the same, by increasing
1882 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1883 maximum. */
1884
1885 static int
1886 range_entry_cmp (const void *a, const void *b)
1887 {
1888 const struct range_entry *p = (const struct range_entry *) a;
1889 const struct range_entry *q = (const struct range_entry *) b;
1890
1891 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1892 {
1893 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1894 {
1895 /* Group range_entries for the same SSA_NAME together. */
1896 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1897 return -1;
1898 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1899 return 1;
1900 /* If ->low is different, NULL low goes first, then by
1901 ascending low. */
1902 if (p->low != NULL_TREE)
1903 {
1904 if (q->low != NULL_TREE)
1905 {
1906 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1907 p->low, q->low);
1908 if (tem && integer_onep (tem))
1909 return -1;
1910 tem = fold_binary (GT_EXPR, boolean_type_node,
1911 p->low, q->low);
1912 if (tem && integer_onep (tem))
1913 return 1;
1914 }
1915 else
1916 return 1;
1917 }
1918 else if (q->low != NULL_TREE)
1919 return -1;
1920 /* If ->high is different, NULL high goes last, before that by
1921 ascending high. */
1922 if (p->high != NULL_TREE)
1923 {
1924 if (q->high != NULL_TREE)
1925 {
1926 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1927 p->high, q->high);
1928 if (tem && integer_onep (tem))
1929 return -1;
1930 tem = fold_binary (GT_EXPR, boolean_type_node,
1931 p->high, q->high);
1932 if (tem && integer_onep (tem))
1933 return 1;
1934 }
1935 else
1936 return -1;
1937 }
1938 else if (p->high != NULL_TREE)
1939 return 1;
1940 /* If both ranges are the same, sort below by ascending idx. */
1941 }
1942 else
1943 return 1;
1944 }
1945 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1946 return -1;
1947
1948 if (p->idx < q->idx)
1949 return -1;
1950 else
1951 {
1952 gcc_checking_assert (p->idx > q->idx);
1953 return 1;
1954 }
1955 }
1956
1957 /* Helper routine of optimize_range_test.
1958 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1959 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1960 OPCODE and OPS are arguments of optimize_range_tests. Return
1961 true if the range merge has been successful.
1962 If OPCODE is ERROR_MARK, this is called from within
1963 maybe_optimize_range_tests and is performing inter-bb range optimization.
1964 Changes should be then performed right away, and whether an op is
1965 BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank. */
1966
1967 static bool
1968 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1969 unsigned int count, enum tree_code opcode,
1970 vec<operand_entry_t> *ops, tree exp, bool in_p,
1971 tree low, tree high, bool strict_overflow_p)
1972 {
1973 operand_entry_t oe = (*ops)[range->idx];
1974 tree op = oe->op;
1975 gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
1976 location_t loc = gimple_location (stmt);
1977 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
1978 tree tem = build_range_check (loc, optype, exp, in_p, low, high);
1979 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1980 gimple_stmt_iterator gsi;
1981
1982 if (tem == NULL_TREE)
1983 return false;
1984
1985 if (strict_overflow_p && issue_strict_overflow_warning (wc))
1986 warning_at (loc, OPT_Wstrict_overflow,
1987 "assuming signed overflow does not occur "
1988 "when simplifying range test");
1989
1990 if (dump_file && (dump_flags & TDF_DETAILS))
1991 {
1992 struct range_entry *r;
1993 fprintf (dump_file, "Optimizing range tests ");
1994 print_generic_expr (dump_file, range->exp, 0);
1995 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1996 print_generic_expr (dump_file, range->low, 0);
1997 fprintf (dump_file, ", ");
1998 print_generic_expr (dump_file, range->high, 0);
1999 fprintf (dump_file, "]");
2000 for (r = otherrange; r < otherrange + count; r++)
2001 {
2002 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2003 print_generic_expr (dump_file, r->low, 0);
2004 fprintf (dump_file, ", ");
2005 print_generic_expr (dump_file, r->high, 0);
2006 fprintf (dump_file, "]");
2007 }
2008 fprintf (dump_file, "\n into ");
2009 print_generic_expr (dump_file, tem, 0);
2010 fprintf (dump_file, "\n");
2011 }
2012
2013 if (opcode == BIT_IOR_EXPR
2014 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2015 tem = invert_truthvalue_loc (loc, tem);
2016
2017 tem = fold_convert_loc (loc, optype, tem);
2018 gsi = gsi_for_stmt (stmt);
2019 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2020 GSI_SAME_STMT);
2021
2022 /* If doing inter-bb range test optimization, update the
2023 stmts immediately. Start with changing the first range test
2024 immediate use to the new value (TEM), or, if the first range
2025 test is a GIMPLE_COND stmt, change that condition. */
2026 if (opcode == ERROR_MARK)
2027 {
2028 if (op)
2029 {
2030 imm_use_iterator iter;
2031 use_operand_p use_p;
2032 gimple use_stmt;
2033
2034 FOR_EACH_IMM_USE_STMT (use_stmt, iter, op)
2035 {
2036 if (is_gimple_debug (use_stmt))
2037 continue;
2038 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2039 SET_USE (use_p, tem);
2040 update_stmt (use_stmt);
2041 }
2042 }
2043 else
2044 {
2045 gimple_cond_set_code (stmt, NE_EXPR);
2046 gimple_cond_set_lhs (stmt, tem);
2047 gimple_cond_set_rhs (stmt, boolean_false_node);
2048 update_stmt (stmt);
2049 }
2050 }
2051 oe->op = tem;
2052 range->exp = exp;
2053 range->low = low;
2054 range->high = high;
2055 range->in_p = in_p;
2056 range->strict_overflow_p = false;
2057
2058 for (range = otherrange; range < otherrange + count; range++)
2059 {
2060 oe = (*ops)[range->idx];
2061 /* Now change all the other range test immediate uses, so that
2062 those tests will be optimized away. */
2063 if (opcode == ERROR_MARK)
2064 {
2065 if (oe->op)
2066 {
2067 imm_use_iterator iter;
2068 use_operand_p use_p;
2069 gimple use_stmt;
2070
2071 FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op)
2072 {
2073 if (is_gimple_debug (use_stmt))
2074 continue;
2075 /* If imm use of _8 is a statement like _7 = _8 | _9;,
2076 adjust it into _7 = _9;. */
2077 if (is_gimple_assign (use_stmt)
2078 && gimple_assign_rhs_code (use_stmt) == oe->rank)
2079 {
2080 tree expr = NULL_TREE;
2081 if (oe->op == gimple_assign_rhs1 (use_stmt))
2082 expr = gimple_assign_rhs2 (use_stmt);
2083 else if (oe->op == gimple_assign_rhs2 (use_stmt))
2084 expr = gimple_assign_rhs1 (use_stmt);
2085 if (expr
2086 && expr != oe->op
2087 && TREE_CODE (expr) == SSA_NAME)
2088 {
2089 gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
2090 gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME,
2091 expr, NULL_TREE);
2092 update_stmt (use_stmt);
2093 continue;
2094 }
2095 }
2096 /* If imm use of _8 is a statement like _7 = (int) _8;,
2097 adjust it into _7 = 0; or _7 = 1;. */
2098 if (gimple_assign_cast_p (use_stmt)
2099 && oe->op == gimple_assign_rhs1 (use_stmt))
2100 {
2101 tree lhs = gimple_assign_lhs (use_stmt);
2102 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
2103 {
2104 gimple_stmt_iterator gsi2
2105 = gsi_for_stmt (use_stmt);
2106 tree expr = build_int_cst (TREE_TYPE (lhs),
2107 oe->rank == BIT_IOR_EXPR
2108 ? 0 : 1);
2109 gimple_assign_set_rhs_with_ops (&gsi2,
2110 INTEGER_CST,
2111 expr, NULL_TREE);
2112 update_stmt (use_stmt);
2113 continue;
2114 }
2115 }
2116 /* Otherwise replace the use with 0 or 1. */
2117 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2118 SET_USE (use_p,
2119 build_int_cst (TREE_TYPE (oe->op),
2120 oe->rank == BIT_IOR_EXPR
2121 ? 0 : 1));
2122 update_stmt (use_stmt);
2123 }
2124 }
2125 else
2126 {
2127 /* If range test was a GIMPLE_COND, simply change it
2128 into an always false or always true condition. */
2129 stmt = last_stmt (BASIC_BLOCK (oe->id));
2130 if (oe->rank == BIT_IOR_EXPR)
2131 gimple_cond_make_false (stmt);
2132 else
2133 gimple_cond_make_true (stmt);
2134 update_stmt (stmt);
2135 }
2136 }
2137 oe->op = error_mark_node;
2138 range->exp = NULL_TREE;
2139 }
2140 return true;
2141 }
2142
2143 /* Optimize X == CST1 || X == CST2
2144 if popcount (CST1 ^ CST2) == 1 into
2145 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2146 Similarly for ranges. E.g.
2147 X != 2 && X != 3 && X != 10 && X != 11
2148 will be transformed by the previous optimization into
2149 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2150 and this loop can transform that into
2151 !(((X & ~8) - 2U) <= 1U). */
2152
2153 static bool
2154 optimize_range_tests_xor (enum tree_code opcode, tree type,
2155 tree lowi, tree lowj, tree highi, tree highj,
2156 vec<operand_entry_t> *ops,
2157 struct range_entry *rangei,
2158 struct range_entry *rangej)
2159 {
2160 tree lowxor, highxor, tem, exp;
2161 /* Check highi ^ lowi == highj ^ lowj and
2162 popcount (highi ^ lowi) == 1. */
2163 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2164 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2165 return false;
2166 if (tree_log2 (lowxor) < 0)
2167 return false;
2168 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2169 if (!tree_int_cst_equal (lowxor, highxor))
2170 return false;
2171
2172 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2173 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2174 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2175 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2176 if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2177 rangei->in_p, lowj, highj,
2178 rangei->strict_overflow_p
2179 || rangej->strict_overflow_p))
2180 return true;
2181 return false;
2182 }
2183
2184 /* Optimize X == CST1 || X == CST2
2185 if popcount (CST2 - CST1) == 1 into
2186 ((X - CST1) & ~(CST2 - CST1)) == 0.
2187 Similarly for ranges. E.g.
2188 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2189 || X == 75 || X == 45
2190 will be transformed by the previous optimization into
2191 (X - 43U) <= 3U || (X - 75U) <= 3U
2192 and this loop can transform that into
2193 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2194 static bool
2195 optimize_range_tests_diff (enum tree_code opcode, tree type,
2196 tree lowi, tree lowj, tree highi, tree highj,
2197 vec<operand_entry_t> *ops,
2198 struct range_entry *rangei,
2199 struct range_entry *rangej)
2200 {
2201 tree tem1, tem2, mask;
2202 /* Check highi - lowi == highj - lowj. */
2203 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2204 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2205 return false;
2206 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2207 if (!tree_int_cst_equal (tem1, tem2))
2208 return false;
2209 /* Check popcount (lowj - lowi) == 1. */
2210 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2211 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2212 return false;
2213 if (tree_log2 (tem1) < 0)
2214 return false;
2215
2216 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2217 tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2218 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2219 lowj = build_int_cst (type, 0);
2220 if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2221 rangei->in_p, lowj, tem2,
2222 rangei->strict_overflow_p
2223 || rangej->strict_overflow_p))
2224 return true;
2225 return false;
2226 }
2227
2228 /* It does some common checks for function optimize_range_tests_xor and
2229 optimize_range_tests_diff.
2230 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2231 Else it calls optimize_range_tests_diff. */
2232
2233 static bool
2234 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2235 bool optimize_xor, vec<operand_entry_t> *ops,
2236 struct range_entry *ranges)
2237 {
2238 int i, j;
2239 bool any_changes = false;
2240 for (i = first; i < length; i++)
2241 {
2242 tree lowi, highi, lowj, highj, type, tem;
2243
2244 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2245 continue;
2246 type = TREE_TYPE (ranges[i].exp);
2247 if (!INTEGRAL_TYPE_P (type))
2248 continue;
2249 lowi = ranges[i].low;
2250 if (lowi == NULL_TREE)
2251 lowi = TYPE_MIN_VALUE (type);
2252 highi = ranges[i].high;
2253 if (highi == NULL_TREE)
2254 continue;
2255 for (j = i + 1; j < length && j < i + 64; j++)
2256 {
2257 bool changes;
2258 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2259 continue;
2260 lowj = ranges[j].low;
2261 if (lowj == NULL_TREE)
2262 continue;
2263 highj = ranges[j].high;
2264 if (highj == NULL_TREE)
2265 highj = TYPE_MAX_VALUE (type);
2266 /* Check lowj > highi. */
2267 tem = fold_binary (GT_EXPR, boolean_type_node,
2268 lowj, highi);
2269 if (tem == NULL_TREE || !integer_onep (tem))
2270 continue;
2271 if (optimize_xor)
2272 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2273 highi, highj, ops,
2274 ranges + i, ranges + j);
2275 else
2276 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2277 highi, highj, ops,
2278 ranges + i, ranges + j);
2279 if (changes)
2280 {
2281 any_changes = true;
2282 break;
2283 }
2284 }
2285 }
2286 return any_changes;
2287 }
2288
2289 /* Optimize range tests, similarly how fold_range_test optimizes
2290 it on trees. The tree code for the binary
2291 operation between all the operands is OPCODE.
2292 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2293 maybe_optimize_range_tests for inter-bb range optimization.
2294 In that case if oe->op is NULL, oe->id is bb->index whose
2295 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2296 the actual opcode. */
2297
2298 static void
2299 optimize_range_tests (enum tree_code opcode,
2300 vec<operand_entry_t> *ops)
2301 {
2302 unsigned int length = ops->length (), i, j, first;
2303 operand_entry_t oe;
2304 struct range_entry *ranges;
2305 bool any_changes = false;
2306
2307 if (length == 1)
2308 return;
2309
2310 ranges = XNEWVEC (struct range_entry, length);
2311 for (i = 0; i < length; i++)
2312 {
2313 oe = (*ops)[i];
2314 ranges[i].idx = i;
2315 init_range_entry (ranges + i, oe->op,
2316 oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2317 /* For | invert it now, we will invert it again before emitting
2318 the optimized expression. */
2319 if (opcode == BIT_IOR_EXPR
2320 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2321 ranges[i].in_p = !ranges[i].in_p;
2322 }
2323
2324 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2325 for (i = 0; i < length; i++)
2326 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2327 break;
2328
2329 /* Try to merge ranges. */
2330 for (first = i; i < length; i++)
2331 {
2332 tree low = ranges[i].low;
2333 tree high = ranges[i].high;
2334 int in_p = ranges[i].in_p;
2335 bool strict_overflow_p = ranges[i].strict_overflow_p;
2336 int update_fail_count = 0;
2337
2338 for (j = i + 1; j < length; j++)
2339 {
2340 if (ranges[i].exp != ranges[j].exp)
2341 break;
2342 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2343 ranges[j].in_p, ranges[j].low, ranges[j].high))
2344 break;
2345 strict_overflow_p |= ranges[j].strict_overflow_p;
2346 }
2347
2348 if (j == i + 1)
2349 continue;
2350
2351 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2352 ops, ranges[i].exp, in_p, low, high,
2353 strict_overflow_p))
2354 {
2355 i = j - 1;
2356 any_changes = true;
2357 }
2358 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2359 while update_range_test would fail. */
2360 else if (update_fail_count == 64)
2361 i = j - 1;
2362 else
2363 ++update_fail_count;
2364 }
2365
2366 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2367 ops, ranges);
2368
2369 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2370 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2371 ops, ranges);
2372
2373 if (any_changes && opcode != ERROR_MARK)
2374 {
2375 j = 0;
2376 FOR_EACH_VEC_ELT (*ops, i, oe)
2377 {
2378 if (oe->op == error_mark_node)
2379 continue;
2380 else if (i != j)
2381 (*ops)[j] = oe;
2382 j++;
2383 }
2384 ops->truncate (j);
2385 }
2386
2387 XDELETEVEC (ranges);
2388 }
2389
2390 /* Return true if STMT is a cast like:
2391 <bb N>:
2392 ...
2393 _123 = (int) _234;
2394
2395 <bb M>:
2396 # _345 = PHI <_123(N), 1(...), 1(...)>
2397 where _234 has bool type, _123 has single use and
2398 bb N has a single successor M. This is commonly used in
2399 the last block of a range test. */
2400
2401 static bool
2402 final_range_test_p (gimple stmt)
2403 {
2404 basic_block bb, rhs_bb;
2405 edge e;
2406 tree lhs, rhs;
2407 use_operand_p use_p;
2408 gimple use_stmt;
2409
2410 if (!gimple_assign_cast_p (stmt))
2411 return false;
2412 bb = gimple_bb (stmt);
2413 if (!single_succ_p (bb))
2414 return false;
2415 e = single_succ_edge (bb);
2416 if (e->flags & EDGE_COMPLEX)
2417 return false;
2418
2419 lhs = gimple_assign_lhs (stmt);
2420 rhs = gimple_assign_rhs1 (stmt);
2421 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2422 || TREE_CODE (rhs) != SSA_NAME
2423 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2424 return false;
2425
2426 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2427 if (!single_imm_use (lhs, &use_p, &use_stmt))
2428 return false;
2429
2430 if (gimple_code (use_stmt) != GIMPLE_PHI
2431 || gimple_bb (use_stmt) != e->dest)
2432 return false;
2433
2434 /* And that the rhs is defined in the same loop. */
2435 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2436 if (rhs_bb == NULL
2437 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2438 return false;
2439
2440 return true;
2441 }
2442
2443 /* Return true if BB is suitable basic block for inter-bb range test
2444 optimization. If BACKWARD is true, BB should be the only predecessor
2445 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2446 or compared with to find a common basic block to which all conditions
2447 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2448 be the only predecessor of BB. */
2449
2450 static bool
2451 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2452 bool backward)
2453 {
2454 edge_iterator ei, ei2;
2455 edge e, e2;
2456 gimple stmt;
2457 gimple_stmt_iterator gsi;
2458 bool other_edge_seen = false;
2459 bool is_cond;
2460
2461 if (test_bb == bb)
2462 return false;
2463 /* Check last stmt first. */
2464 stmt = last_stmt (bb);
2465 if (stmt == NULL
2466 || (gimple_code (stmt) != GIMPLE_COND
2467 && (backward || !final_range_test_p (stmt)))
2468 || gimple_visited_p (stmt)
2469 || stmt_could_throw_p (stmt)
2470 || *other_bb == bb)
2471 return false;
2472 is_cond = gimple_code (stmt) == GIMPLE_COND;
2473 if (is_cond)
2474 {
2475 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2476 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2477 to *OTHER_BB (if not set yet, try to find it out). */
2478 if (EDGE_COUNT (bb->succs) != 2)
2479 return false;
2480 FOR_EACH_EDGE (e, ei, bb->succs)
2481 {
2482 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2483 return false;
2484 if (e->dest == test_bb)
2485 {
2486 if (backward)
2487 continue;
2488 else
2489 return false;
2490 }
2491 if (e->dest == bb)
2492 return false;
2493 if (*other_bb == NULL)
2494 {
2495 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2496 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2497 return false;
2498 else if (e->dest == e2->dest)
2499 *other_bb = e->dest;
2500 if (*other_bb == NULL)
2501 return false;
2502 }
2503 if (e->dest == *other_bb)
2504 other_edge_seen = true;
2505 else if (backward)
2506 return false;
2507 }
2508 if (*other_bb == NULL || !other_edge_seen)
2509 return false;
2510 }
2511 else if (single_succ (bb) != *other_bb)
2512 return false;
2513
2514 /* Now check all PHIs of *OTHER_BB. */
2515 e = find_edge (bb, *other_bb);
2516 e2 = find_edge (test_bb, *other_bb);
2517 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2518 {
2519 gimple phi = gsi_stmt (gsi);
2520 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2521 corresponding to BB and TEST_BB predecessor must be the same. */
2522 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2523 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2524 {
2525 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2526 one of the PHIs should have the lhs of the last stmt in
2527 that block as PHI arg and that PHI should have 0 or 1
2528 corresponding to it in all other range test basic blocks
2529 considered. */
2530 if (!is_cond)
2531 {
2532 if (gimple_phi_arg_def (phi, e->dest_idx)
2533 == gimple_assign_lhs (stmt)
2534 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2535 || integer_onep (gimple_phi_arg_def (phi,
2536 e2->dest_idx))))
2537 continue;
2538 }
2539 else
2540 {
2541 gimple test_last = last_stmt (test_bb);
2542 if (gimple_code (test_last) != GIMPLE_COND
2543 && gimple_phi_arg_def (phi, e2->dest_idx)
2544 == gimple_assign_lhs (test_last)
2545 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2546 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2547 continue;
2548 }
2549
2550 return false;
2551 }
2552 }
2553 return true;
2554 }
2555
2556 /* Return true if BB doesn't have side-effects that would disallow
2557 range test optimization, all SSA_NAMEs set in the bb are consumed
2558 in the bb and there are no PHIs. */
2559
2560 static bool
2561 no_side_effect_bb (basic_block bb)
2562 {
2563 gimple_stmt_iterator gsi;
2564 gimple last;
2565
2566 if (!gimple_seq_empty_p (phi_nodes (bb)))
2567 return false;
2568 last = last_stmt (bb);
2569 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2570 {
2571 gimple stmt = gsi_stmt (gsi);
2572 tree lhs;
2573 imm_use_iterator imm_iter;
2574 use_operand_p use_p;
2575
2576 if (is_gimple_debug (stmt))
2577 continue;
2578 if (gimple_has_side_effects (stmt))
2579 return false;
2580 if (stmt == last)
2581 return true;
2582 if (!is_gimple_assign (stmt))
2583 return false;
2584 lhs = gimple_assign_lhs (stmt);
2585 if (TREE_CODE (lhs) != SSA_NAME)
2586 return false;
2587 if (gimple_assign_rhs_could_trap_p (stmt))
2588 return false;
2589 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2590 {
2591 gimple use_stmt = USE_STMT (use_p);
2592 if (is_gimple_debug (use_stmt))
2593 continue;
2594 if (gimple_bb (use_stmt) != bb)
2595 return false;
2596 }
2597 }
2598 return false;
2599 }
2600
2601 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2602 return true and fill in *OPS recursively. */
2603
2604 static bool
2605 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2606 struct loop *loop)
2607 {
2608 gimple stmt = SSA_NAME_DEF_STMT (var);
2609 tree rhs[2];
2610 int i;
2611
2612 if (!is_reassociable_op (stmt, code, loop))
2613 return false;
2614
2615 rhs[0] = gimple_assign_rhs1 (stmt);
2616 rhs[1] = gimple_assign_rhs2 (stmt);
2617 gimple_set_visited (stmt, true);
2618 for (i = 0; i < 2; i++)
2619 if (TREE_CODE (rhs[i]) == SSA_NAME
2620 && !get_ops (rhs[i], code, ops, loop)
2621 && has_single_use (rhs[i]))
2622 {
2623 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2624
2625 oe->op = rhs[i];
2626 oe->rank = code;
2627 oe->id = 0;
2628 oe->count = 1;
2629 ops->safe_push (oe);
2630 }
2631 return true;
2632 }
2633
2634 /* Inter-bb range test optimization. */
2635
2636 static void
2637 maybe_optimize_range_tests (gimple stmt)
2638 {
2639 basic_block first_bb = gimple_bb (stmt);
2640 basic_block last_bb = first_bb;
2641 basic_block other_bb = NULL;
2642 basic_block bb;
2643 edge_iterator ei;
2644 edge e;
2645 vec<operand_entry_t> ops = vNULL;
2646
2647 /* Consider only basic blocks that end with GIMPLE_COND or
2648 a cast statement satisfying final_range_test_p. All
2649 but the last bb in the first_bb .. last_bb range
2650 should end with GIMPLE_COND. */
2651 if (gimple_code (stmt) == GIMPLE_COND)
2652 {
2653 if (EDGE_COUNT (first_bb->succs) != 2)
2654 return;
2655 }
2656 else if (final_range_test_p (stmt))
2657 other_bb = single_succ (first_bb);
2658 else
2659 return;
2660
2661 if (stmt_could_throw_p (stmt))
2662 return;
2663
2664 /* As relative ordering of post-dominator sons isn't fixed,
2665 maybe_optimize_range_tests can be called first on any
2666 bb in the range we want to optimize. So, start searching
2667 backwards, if first_bb can be set to a predecessor. */
2668 while (single_pred_p (first_bb))
2669 {
2670 basic_block pred_bb = single_pred (first_bb);
2671 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2672 break;
2673 if (!no_side_effect_bb (first_bb))
2674 break;
2675 first_bb = pred_bb;
2676 }
2677 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2678 Before starting forward search in last_bb successors, find
2679 out the other_bb. */
2680 if (first_bb == last_bb)
2681 {
2682 other_bb = NULL;
2683 /* As non-GIMPLE_COND last stmt always terminates the range,
2684 if forward search didn't discover anything, just give up. */
2685 if (gimple_code (stmt) != GIMPLE_COND)
2686 return;
2687 /* Look at both successors. Either it ends with a GIMPLE_COND
2688 and satisfies suitable_cond_bb, or ends with a cast and
2689 other_bb is that cast's successor. */
2690 FOR_EACH_EDGE (e, ei, first_bb->succs)
2691 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2692 || e->dest == first_bb)
2693 return;
2694 else if (single_pred_p (e->dest))
2695 {
2696 stmt = last_stmt (e->dest);
2697 if (stmt
2698 && gimple_code (stmt) == GIMPLE_COND
2699 && EDGE_COUNT (e->dest->succs) == 2)
2700 {
2701 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2702 break;
2703 else
2704 other_bb = NULL;
2705 }
2706 else if (stmt
2707 && final_range_test_p (stmt)
2708 && find_edge (first_bb, single_succ (e->dest)))
2709 {
2710 other_bb = single_succ (e->dest);
2711 if (other_bb == first_bb)
2712 other_bb = NULL;
2713 }
2714 }
2715 if (other_bb == NULL)
2716 return;
2717 }
2718 /* Now do the forward search, moving last_bb to successor bbs
2719 that aren't other_bb. */
2720 while (EDGE_COUNT (last_bb->succs) == 2)
2721 {
2722 FOR_EACH_EDGE (e, ei, last_bb->succs)
2723 if (e->dest != other_bb)
2724 break;
2725 if (e == NULL)
2726 break;
2727 if (!single_pred_p (e->dest))
2728 break;
2729 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2730 break;
2731 if (!no_side_effect_bb (e->dest))
2732 break;
2733 last_bb = e->dest;
2734 }
2735 if (first_bb == last_bb)
2736 return;
2737 /* Here basic blocks first_bb through last_bb's predecessor
2738 end with GIMPLE_COND, all of them have one of the edges to
2739 other_bb and another to another block in the range,
2740 all blocks except first_bb don't have side-effects and
2741 last_bb ends with either GIMPLE_COND, or cast satisfying
2742 final_range_test_p. */
2743 for (bb = last_bb; ; bb = single_pred (bb))
2744 {
2745 enum tree_code code;
2746 tree lhs, rhs;
2747
2748 e = find_edge (bb, other_bb);
2749 stmt = last_stmt (bb);
2750 gimple_set_visited (stmt, true);
2751 if (gimple_code (stmt) != GIMPLE_COND)
2752 {
2753 use_operand_p use_p;
2754 gimple phi;
2755 edge e2;
2756 unsigned int d;
2757
2758 lhs = gimple_assign_lhs (stmt);
2759 rhs = gimple_assign_rhs1 (stmt);
2760 gcc_assert (bb == last_bb);
2761
2762 /* stmt is
2763 _123 = (int) _234;
2764
2765 followed by:
2766 <bb M>:
2767 # _345 = PHI <_123(N), 1(...), 1(...)>
2768
2769 or 0 instead of 1. If it is 0, the _234
2770 range test is anded together with all the
2771 other range tests, if it is 1, it is ored with
2772 them. */
2773 single_imm_use (lhs, &use_p, &phi);
2774 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2775 e2 = find_edge (first_bb, other_bb);
2776 d = e2->dest_idx;
2777 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2778 if (integer_zerop (gimple_phi_arg_def (phi, d)))
2779 code = BIT_AND_EXPR;
2780 else
2781 {
2782 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2783 code = BIT_IOR_EXPR;
2784 }
2785
2786 /* If _234 SSA_NAME_DEF_STMT is
2787 _234 = _567 | _789;
2788 (or &, corresponding to 1/0 in the phi arguments,
2789 push into ops the individual range test arguments
2790 of the bitwise or resp. and, recursively. */
2791 if (!get_ops (rhs, code, &ops,
2792 loop_containing_stmt (stmt))
2793 && has_single_use (rhs))
2794 {
2795 /* Otherwise, push the _234 range test itself. */
2796 operand_entry_t oe
2797 = (operand_entry_t) pool_alloc (operand_entry_pool);
2798
2799 oe->op = rhs;
2800 oe->rank = code;
2801 oe->id = 0;
2802 oe->count = 1;
2803 ops.safe_push (oe);
2804 }
2805 continue;
2806 }
2807 /* Otherwise stmt is GIMPLE_COND. */
2808 code = gimple_cond_code (stmt);
2809 lhs = gimple_cond_lhs (stmt);
2810 rhs = gimple_cond_rhs (stmt);
2811 if (TREE_CODE (lhs) == SSA_NAME
2812 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2813 && ((code != EQ_EXPR && code != NE_EXPR)
2814 || rhs != boolean_false_node
2815 /* Either push into ops the individual bitwise
2816 or resp. and operands, depending on which
2817 edge is other_bb. */
2818 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2819 ^ (code == EQ_EXPR))
2820 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2821 loop_containing_stmt (stmt))))
2822 {
2823 /* Or push the GIMPLE_COND stmt itself. */
2824 operand_entry_t oe
2825 = (operand_entry_t) pool_alloc (operand_entry_pool);
2826
2827 oe->op = NULL;
2828 oe->rank = (e->flags & EDGE_TRUE_VALUE)
2829 ? BIT_IOR_EXPR : BIT_AND_EXPR;
2830 /* oe->op = NULL signs that there is no SSA_NAME
2831 for the range test, and oe->id instead is the
2832 basic block number, at which's end the GIMPLE_COND
2833 is. */
2834 oe->id = bb->index;
2835 oe->count = 1;
2836 ops.safe_push (oe);
2837 }
2838 if (bb == first_bb)
2839 break;
2840 }
2841 if (ops.length () > 1)
2842 optimize_range_tests (ERROR_MARK, &ops);
2843 ops.release ();
2844 }
2845
2846 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2847 of STMT in it's operands. This is also known as a "destructive
2848 update" operation. */
2849
2850 static bool
2851 is_phi_for_stmt (gimple stmt, tree operand)
2852 {
2853 gimple def_stmt;
2854 tree lhs;
2855 use_operand_p arg_p;
2856 ssa_op_iter i;
2857
2858 if (TREE_CODE (operand) != SSA_NAME)
2859 return false;
2860
2861 lhs = gimple_assign_lhs (stmt);
2862
2863 def_stmt = SSA_NAME_DEF_STMT (operand);
2864 if (gimple_code (def_stmt) != GIMPLE_PHI)
2865 return false;
2866
2867 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2868 if (lhs == USE_FROM_PTR (arg_p))
2869 return true;
2870 return false;
2871 }
2872
2873 /* Remove def stmt of VAR if VAR has zero uses and recurse
2874 on rhs1 operand if so. */
2875
2876 static void
2877 remove_visited_stmt_chain (tree var)
2878 {
2879 gimple stmt;
2880 gimple_stmt_iterator gsi;
2881
2882 while (1)
2883 {
2884 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2885 return;
2886 stmt = SSA_NAME_DEF_STMT (var);
2887 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2888 {
2889 var = gimple_assign_rhs1 (stmt);
2890 gsi = gsi_for_stmt (stmt);
2891 gsi_remove (&gsi, true);
2892 release_defs (stmt);
2893 }
2894 else
2895 return;
2896 }
2897 }
2898
2899 /* This function checks three consequtive operands in
2900 passed operands vector OPS starting from OPINDEX and
2901 swaps two operands if it is profitable for binary operation
2902 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2903
2904 We pair ops with the same rank if possible.
2905
2906 The alternative we try is to see if STMT is a destructive
2907 update style statement, which is like:
2908 b = phi (a, ...)
2909 a = c + b;
2910 In that case, we want to use the destructive update form to
2911 expose the possible vectorizer sum reduction opportunity.
2912 In that case, the third operand will be the phi node. This
2913 check is not performed if STMT is null.
2914
2915 We could, of course, try to be better as noted above, and do a
2916 lot of work to try to find these opportunities in >3 operand
2917 cases, but it is unlikely to be worth it. */
2918
2919 static void
2920 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
2921 unsigned int opindex, gimple stmt)
2922 {
2923 operand_entry_t oe1, oe2, oe3;
2924
2925 oe1 = ops[opindex];
2926 oe2 = ops[opindex + 1];
2927 oe3 = ops[opindex + 2];
2928
2929 if ((oe1->rank == oe2->rank
2930 && oe2->rank != oe3->rank)
2931 || (stmt && is_phi_for_stmt (stmt, oe3->op)
2932 && !is_phi_for_stmt (stmt, oe1->op)
2933 && !is_phi_for_stmt (stmt, oe2->op)))
2934 {
2935 struct operand_entry temp = *oe3;
2936 oe3->op = oe1->op;
2937 oe3->rank = oe1->rank;
2938 oe1->op = temp.op;
2939 oe1->rank= temp.rank;
2940 }
2941 else if ((oe1->rank == oe3->rank
2942 && oe2->rank != oe3->rank)
2943 || (stmt && is_phi_for_stmt (stmt, oe2->op)
2944 && !is_phi_for_stmt (stmt, oe1->op)
2945 && !is_phi_for_stmt (stmt, oe3->op)))
2946 {
2947 struct operand_entry temp = *oe2;
2948 oe2->op = oe1->op;
2949 oe2->rank = oe1->rank;
2950 oe1->op = temp.op;
2951 oe1->rank= temp.rank;
2952 }
2953 }
2954
2955 /* Determine if stmt A is not dominated by stmt B. If A and B are in
2956 same basic block, then A's UID has to be less than B. If they are
2957 in different BB's, then A's BB must not be dominated by B's BB. */
2958
2959 static inline bool
2960 not_dominated_by (gimple a, gimple b)
2961 {
2962 basic_block bb_a, bb_b;
2963 bb_a = gimple_bb (a);
2964 bb_b = gimple_bb (b);
2965 return ((bb_a == bb_b && gimple_uid (a) < gimple_uid (b))
2966 || (bb_a != bb_b
2967 && !dominated_by_p (CDI_DOMINATORS, bb_a, bb_b)));
2968
2969 }
2970
2971 /* Among STMT1 and STMT2, return the statement that appears later. Both
2972 statements are in same BB and have the same UID. */
2973
2974 static gimple
2975 appears_later_in_bb (gimple stmt1, gimple stmt2)
2976 {
2977 unsigned uid = gimple_uid (stmt1);
2978 gimple_stmt_iterator gsi = gsi_for_stmt (stmt1);
2979 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
2980 {
2981 gimple stmt = gsi_stmt (gsi);
2982
2983 /* If STMT has a different UID than STMT1 and we haven't seen
2984 STMT2 during traversal, we know STMT1 appears later. */
2985 if (gimple_uid (stmt) != uid)
2986 return stmt1;
2987 else if (stmt == stmt2)
2988 return stmt2;
2989 }
2990 return stmt1;
2991 }
2992
2993 /* Find the statement after which STMT must be moved so that the
2994 dependency from DEP_STMT to STMT is maintained. */
2995
2996 static gimple
2997 find_insert_point (gimple stmt, gimple dep_stmt)
2998 {
2999 gimple insert_stmt = stmt;
3000 if (dep_stmt == NULL)
3001 return stmt;
3002 if (gimple_uid (insert_stmt) == gimple_uid (dep_stmt)
3003 && gimple_bb (insert_stmt) == gimple_bb (dep_stmt)
3004 && insert_stmt != dep_stmt)
3005 insert_stmt = appears_later_in_bb (insert_stmt, dep_stmt);
3006 else if (not_dominated_by (insert_stmt, dep_stmt))
3007 insert_stmt = dep_stmt;
3008 return insert_stmt;
3009 }
3010
3011 /* Insert STMT after INSERT_POINT. */
3012
3013 static void
3014 insert_stmt_after (gimple stmt, gimple insert_point)
3015 {
3016 imm_use_iterator iter;
3017 tree lhs;
3018 gimple use_stmt;
3019 gimple_stmt_iterator gsistmt = gsi_for_stmt (stmt), gsi_insert;
3020 basic_block insert_bb = gimple_bb (insert_point);
3021 bool insert_bb_different = (insert_bb != gimple_bb (stmt));
3022 lhs = gimple_assign_lhs (stmt);
3023 /* If there are any debug uses of LHS, reset them. */
3024 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3025 {
3026 if (is_gimple_debug (use_stmt)
3027 && not_dominated_by (use_stmt, insert_point))
3028 {
3029 gimple_debug_bind_reset_value (use_stmt);
3030 update_stmt (use_stmt);
3031 }
3032 }
3033 /* If INSERT_STMT is a phi node, then do not insert just after that statement.
3034 Instead, find the first non-label gimple statement in BB and insert before
3035 that. */
3036 if (gimple_code (insert_point) == GIMPLE_PHI)
3037 {
3038 gsi_insert = gsi_after_labels (insert_bb);
3039 gsi_move_before (&gsistmt, &gsi_insert);
3040 }
3041 /* Statements marked for throw can not be in the middle of a basic block. So
3042 we can not insert a statement (not marked for throw) immediately after. */
3043 else if (stmt_ends_bb_p (insert_point))
3044 {
3045 edge succ_edge = find_fallthru_edge (insert_bb->succs);
3046 insert_bb = succ_edge->dest;
3047 insert_bb_different = (insert_bb != gimple_bb (stmt));
3048 /* Insert STMT at the beginning of the successor basic block. */
3049 gsi_insert = gsi_after_labels (insert_bb);
3050 gsi_move_before (&gsistmt, &gsi_insert);
3051 }
3052 else
3053 {
3054 gsi_insert = gsi_for_stmt (insert_point);
3055 gsi_move_after (&gsistmt, &gsi_insert);
3056 }
3057 /* Set the UID of STMT to that of INSERT_POINT so that subsequent comparisons
3058 of UIDs to determine dominance within a basic block works. */
3059 gimple_set_uid (stmt, gimple_uid (insert_point));
3060 if (dump_file && (dump_flags & TDF_DETAILS))
3061 {
3062 fprintf (dump_file, "Moved stmt ");
3063 print_gimple_stmt (dump_file, stmt, 0, 0);
3064 fprintf (dump_file, " %s to satisfy dependences\n",
3065 insert_bb_different ? "to a different BB" : "within same BB");
3066 }
3067
3068 }
3069
3070 /* If OP is a SSA variable and is not the default definition, return the
3071 gimple statement that defines OP. Else return NULL. */
3072
3073 static inline gimple
3074 get_def_stmt (tree op)
3075 {
3076 if (TREE_CODE (op) == SSA_NAME
3077 && !SSA_NAME_IS_DEFAULT_DEF (op))
3078 return SSA_NAME_DEF_STMT (op);
3079 else
3080 return NULL;
3081 }
3082
3083 /* Ensure that operands in the OPS vector are available for STMT and all
3084 gimple statements on which STMT depends. */
3085
3086 static void
3087 ensure_ops_are_available (gimple stmt, vec<operand_entry_t> ops, int opindex)
3088 {
3089 unsigned int len = ops.length ();
3090 gimple insert_stmt = stmt;
3091 gimple dep_stmts[2];
3092 dep_stmts[0] = get_def_stmt (ops[opindex]->op);
3093 if (len - opindex == 2)
3094 {
3095 dep_stmts[1] = get_def_stmt (ops[opindex + 1]->op);
3096 }
3097 else
3098 {
3099 gimple stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3100 ensure_ops_are_available (stmt1, ops, opindex + 1);
3101 dep_stmts[1] = stmt1;
3102 }
3103 for (int i = 0; i < 2; i++)
3104 insert_stmt = find_insert_point (insert_stmt, dep_stmts[i]);
3105
3106 if (insert_stmt != stmt)
3107 insert_stmt_after (stmt, insert_stmt);
3108 }
3109
3110 /* Recursively rewrite our linearized statements so that the operators
3111 match those in OPS[OPINDEX], putting the computation in rank
3112 order. */
3113
3114 static void
3115 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3116 vec<operand_entry_t> ops, bool moved)
3117 {
3118 tree rhs1 = gimple_assign_rhs1 (stmt);
3119 tree rhs2 = gimple_assign_rhs2 (stmt);
3120 operand_entry_t oe;
3121
3122 /* The final recursion case for this function is that you have
3123 exactly two operations left.
3124 If we had one exactly one op in the entire list to start with, we
3125 would have never called this function, and the tail recursion
3126 rewrites them one at a time. */
3127 if (opindex + 2 == ops.length ())
3128 {
3129 operand_entry_t oe1, oe2;
3130
3131 oe1 = ops[opindex];
3132 oe2 = ops[opindex + 1];
3133
3134 if (rhs1 != oe1->op || rhs2 != oe2->op)
3135 {
3136 if (dump_file && (dump_flags & TDF_DETAILS))
3137 {
3138 fprintf (dump_file, "Transforming ");
3139 print_gimple_stmt (dump_file, stmt, 0, 0);
3140 }
3141
3142 gimple_assign_set_rhs1 (stmt, oe1->op);
3143 gimple_assign_set_rhs2 (stmt, oe2->op);
3144 update_stmt (stmt);
3145 if (rhs1 != oe1->op && rhs1 != oe2->op)
3146 remove_visited_stmt_chain (rhs1);
3147
3148 if (dump_file && (dump_flags & TDF_DETAILS))
3149 {
3150 fprintf (dump_file, " into ");
3151 print_gimple_stmt (dump_file, stmt, 0, 0);
3152 }
3153 }
3154 return;
3155 }
3156
3157 /* If we hit here, we should have 3 or more ops left. */
3158 gcc_assert (opindex + 2 < ops.length ());
3159
3160 /* Rewrite the next operator. */
3161 oe = ops[opindex];
3162
3163 if (oe->op != rhs2)
3164 {
3165 if (!moved)
3166 {
3167 ensure_ops_are_available (stmt, ops, opindex);
3168 moved = true;
3169 }
3170
3171 if (dump_file && (dump_flags & TDF_DETAILS))
3172 {
3173 fprintf (dump_file, "Transforming ");
3174 print_gimple_stmt (dump_file, stmt, 0, 0);
3175 }
3176
3177 gimple_assign_set_rhs2 (stmt, oe->op);
3178 update_stmt (stmt);
3179
3180 if (dump_file && (dump_flags & TDF_DETAILS))
3181 {
3182 fprintf (dump_file, " into ");
3183 print_gimple_stmt (dump_file, stmt, 0, 0);
3184 }
3185 }
3186 /* Recurse on the LHS of the binary operator, which is guaranteed to
3187 be the non-leaf side. */
3188 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
3189 }
3190
3191 /* Find out how many cycles we need to compute statements chain.
3192 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3193 maximum number of independent statements we may execute per cycle. */
3194
3195 static int
3196 get_required_cycles (int ops_num, int cpu_width)
3197 {
3198 int res;
3199 int elog;
3200 unsigned int rest;
3201
3202 /* While we have more than 2 * cpu_width operands
3203 we may reduce number of operands by cpu_width
3204 per cycle. */
3205 res = ops_num / (2 * cpu_width);
3206
3207 /* Remained operands count may be reduced twice per cycle
3208 until we have only one operand. */
3209 rest = (unsigned)(ops_num - res * cpu_width);
3210 elog = exact_log2 (rest);
3211 if (elog >= 0)
3212 res += elog;
3213 else
3214 res += floor_log2 (rest) + 1;
3215
3216 return res;
3217 }
3218
3219 /* Returns an optimal number of registers to use for computation of
3220 given statements. */
3221
3222 static int
3223 get_reassociation_width (int ops_num, enum tree_code opc,
3224 enum machine_mode mode)
3225 {
3226 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3227 int width;
3228 int width_min;
3229 int cycles_best;
3230
3231 if (param_width > 0)
3232 width = param_width;
3233 else
3234 width = targetm.sched.reassociation_width (opc, mode);
3235
3236 if (width == 1)
3237 return width;
3238
3239 /* Get the minimal time required for sequence computation. */
3240 cycles_best = get_required_cycles (ops_num, width);
3241
3242 /* Check if we may use less width and still compute sequence for
3243 the same time. It will allow us to reduce registers usage.
3244 get_required_cycles is monotonically increasing with lower width
3245 so we can perform a binary search for the minimal width that still
3246 results in the optimal cycle count. */
3247 width_min = 1;
3248 while (width > width_min)
3249 {
3250 int width_mid = (width + width_min) / 2;
3251
3252 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3253 width = width_mid;
3254 else if (width_min < width_mid)
3255 width_min = width_mid;
3256 else
3257 break;
3258 }
3259
3260 return width;
3261 }
3262
3263 /* Recursively rewrite our linearized statements so that the operators
3264 match those in OPS[OPINDEX], putting the computation in rank
3265 order and trying to allow operations to be executed in
3266 parallel. */
3267
3268 static void
3269 rewrite_expr_tree_parallel (gimple stmt, int width,
3270 vec<operand_entry_t> ops)
3271 {
3272 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3273 int op_num = ops.length ();
3274 int stmt_num = op_num - 1;
3275 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3276 int op_index = op_num - 1;
3277 int stmt_index = 0;
3278 int ready_stmts_end = 0;
3279 int i = 0;
3280 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3281
3282 /* We start expression rewriting from the top statements.
3283 So, in this loop we create a full list of statements
3284 we will work with. */
3285 stmts[stmt_num - 1] = stmt;
3286 for (i = stmt_num - 2; i >= 0; i--)
3287 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3288
3289 for (i = 0; i < stmt_num; i++)
3290 {
3291 tree op1, op2;
3292
3293 /* Determine whether we should use results of
3294 already handled statements or not. */
3295 if (ready_stmts_end == 0
3296 && (i - stmt_index >= width || op_index < 1))
3297 ready_stmts_end = i;
3298
3299 /* Now we choose operands for the next statement. Non zero
3300 value in ready_stmts_end means here that we should use
3301 the result of already generated statements as new operand. */
3302 if (ready_stmts_end > 0)
3303 {
3304 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3305 if (ready_stmts_end > stmt_index)
3306 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3307 else if (op_index >= 0)
3308 op2 = ops[op_index--]->op;
3309 else
3310 {
3311 gcc_assert (stmt_index < i);
3312 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3313 }
3314
3315 if (stmt_index >= ready_stmts_end)
3316 ready_stmts_end = 0;
3317 }
3318 else
3319 {
3320 if (op_index > 1)
3321 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3322 op2 = ops[op_index--]->op;
3323 op1 = ops[op_index--]->op;
3324 }
3325
3326 /* If we emit the last statement then we should put
3327 operands into the last statement. It will also
3328 break the loop. */
3329 if (op_index < 0 && stmt_index == i)
3330 i = stmt_num - 1;
3331
3332 if (dump_file && (dump_flags & TDF_DETAILS))
3333 {
3334 fprintf (dump_file, "Transforming ");
3335 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3336 }
3337
3338 /* We keep original statement only for the last one. All
3339 others are recreated. */
3340 if (i == stmt_num - 1)
3341 {
3342 gimple_assign_set_rhs1 (stmts[i], op1);
3343 gimple_assign_set_rhs2 (stmts[i], op2);
3344 update_stmt (stmts[i]);
3345 }
3346 else
3347 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3348
3349 if (dump_file && (dump_flags & TDF_DETAILS))
3350 {
3351 fprintf (dump_file, " into ");
3352 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3353 }
3354 }
3355
3356 remove_visited_stmt_chain (last_rhs1);
3357 }
3358
3359 /* Transform STMT, which is really (A +B) + (C + D) into the left
3360 linear form, ((A+B)+C)+D.
3361 Recurse on D if necessary. */
3362
3363 static void
3364 linearize_expr (gimple stmt)
3365 {
3366 gimple_stmt_iterator gsinow, gsirhs;
3367 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3368 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3369 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3370 gimple newbinrhs = NULL;
3371 struct loop *loop = loop_containing_stmt (stmt);
3372
3373 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3374 && is_reassociable_op (binrhs, rhscode, loop));
3375
3376 gsinow = gsi_for_stmt (stmt);
3377 gsirhs = gsi_for_stmt (binrhs);
3378 gsi_move_before (&gsirhs, &gsinow);
3379 gimple_set_uid (binrhs, gimple_uid (stmt));
3380
3381 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3382 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
3383 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3384
3385 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3386 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3387
3388 if (dump_file && (dump_flags & TDF_DETAILS))
3389 {
3390 fprintf (dump_file, "Linearized: ");
3391 print_gimple_stmt (dump_file, stmt, 0, 0);
3392 }
3393
3394 reassociate_stats.linearized++;
3395 update_stmt (binrhs);
3396 update_stmt (binlhs);
3397 update_stmt (stmt);
3398
3399 gimple_set_visited (stmt, true);
3400 gimple_set_visited (binlhs, true);
3401 gimple_set_visited (binrhs, true);
3402
3403 /* Tail recurse on the new rhs if it still needs reassociation. */
3404 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3405 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3406 want to change the algorithm while converting to tuples. */
3407 linearize_expr (stmt);
3408 }
3409
3410 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3411 it. Otherwise, return NULL. */
3412
3413 static gimple
3414 get_single_immediate_use (tree lhs)
3415 {
3416 use_operand_p immuse;
3417 gimple immusestmt;
3418
3419 if (TREE_CODE (lhs) == SSA_NAME
3420 && single_imm_use (lhs, &immuse, &immusestmt)
3421 && is_gimple_assign (immusestmt))
3422 return immusestmt;
3423
3424 return NULL;
3425 }
3426
3427 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3428 representing the negated value. Insertions of any necessary
3429 instructions go before GSI.
3430 This function is recursive in that, if you hand it "a_5" as the
3431 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3432 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3433
3434 static tree
3435 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
3436 {
3437 gimple negatedefstmt= NULL;
3438 tree resultofnegate;
3439
3440 /* If we are trying to negate a name, defined by an add, negate the
3441 add operands instead. */
3442 if (TREE_CODE (tonegate) == SSA_NAME)
3443 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3444 if (TREE_CODE (tonegate) == SSA_NAME
3445 && is_gimple_assign (negatedefstmt)
3446 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3447 && has_single_use (gimple_assign_lhs (negatedefstmt))
3448 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3449 {
3450 gimple_stmt_iterator gsi;
3451 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3452 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3453
3454 gsi = gsi_for_stmt (negatedefstmt);
3455 rhs1 = negate_value (rhs1, &gsi);
3456 gimple_assign_set_rhs1 (negatedefstmt, rhs1);
3457
3458 gsi = gsi_for_stmt (negatedefstmt);
3459 rhs2 = negate_value (rhs2, &gsi);
3460 gimple_assign_set_rhs2 (negatedefstmt, rhs2);
3461
3462 update_stmt (negatedefstmt);
3463 return gimple_assign_lhs (negatedefstmt);
3464 }
3465
3466 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3467 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
3468 NULL_TREE, true, GSI_SAME_STMT);
3469 return resultofnegate;
3470 }
3471
3472 /* Return true if we should break up the subtract in STMT into an add
3473 with negate. This is true when we the subtract operands are really
3474 adds, or the subtract itself is used in an add expression. In
3475 either case, breaking up the subtract into an add with negate
3476 exposes the adds to reassociation. */
3477
3478 static bool
3479 should_break_up_subtract (gimple stmt)
3480 {
3481 tree lhs = gimple_assign_lhs (stmt);
3482 tree binlhs = gimple_assign_rhs1 (stmt);
3483 tree binrhs = gimple_assign_rhs2 (stmt);
3484 gimple immusestmt;
3485 struct loop *loop = loop_containing_stmt (stmt);
3486
3487 if (TREE_CODE (binlhs) == SSA_NAME
3488 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3489 return true;
3490
3491 if (TREE_CODE (binrhs) == SSA_NAME
3492 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3493 return true;
3494
3495 if (TREE_CODE (lhs) == SSA_NAME
3496 && (immusestmt = get_single_immediate_use (lhs))
3497 && is_gimple_assign (immusestmt)
3498 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3499 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3500 return true;
3501 return false;
3502 }
3503
3504 /* Transform STMT from A - B into A + -B. */
3505
3506 static void
3507 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3508 {
3509 tree rhs1 = gimple_assign_rhs1 (stmt);
3510 tree rhs2 = gimple_assign_rhs2 (stmt);
3511
3512 if (dump_file && (dump_flags & TDF_DETAILS))
3513 {
3514 fprintf (dump_file, "Breaking up subtract ");
3515 print_gimple_stmt (dump_file, stmt, 0, 0);
3516 }
3517
3518 rhs2 = negate_value (rhs2, gsip);
3519 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3520 update_stmt (stmt);
3521 }
3522
3523 /* Determine whether STMT is a builtin call that raises an SSA name
3524 to an integer power and has only one use. If so, and this is early
3525 reassociation and unsafe math optimizations are permitted, place
3526 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3527 If any of these conditions does not hold, return FALSE. */
3528
3529 static bool
3530 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3531 {
3532 tree fndecl, arg1;
3533 REAL_VALUE_TYPE c, cint;
3534
3535 if (!first_pass_instance
3536 || !flag_unsafe_math_optimizations
3537 || !is_gimple_call (stmt)
3538 || !has_single_use (gimple_call_lhs (stmt)))
3539 return false;
3540
3541 fndecl = gimple_call_fndecl (stmt);
3542
3543 if (!fndecl
3544 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3545 return false;
3546
3547 switch (DECL_FUNCTION_CODE (fndecl))
3548 {
3549 CASE_FLT_FN (BUILT_IN_POW):
3550 *base = gimple_call_arg (stmt, 0);
3551 arg1 = gimple_call_arg (stmt, 1);
3552
3553 if (TREE_CODE (arg1) != REAL_CST)
3554 return false;
3555
3556 c = TREE_REAL_CST (arg1);
3557
3558 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3559 return false;
3560
3561 *exponent = real_to_integer (&c);
3562 real_from_integer (&cint, VOIDmode, *exponent,
3563 *exponent < 0 ? -1 : 0, 0);
3564 if (!real_identical (&c, &cint))
3565 return false;
3566
3567 break;
3568
3569 CASE_FLT_FN (BUILT_IN_POWI):
3570 *base = gimple_call_arg (stmt, 0);
3571 arg1 = gimple_call_arg (stmt, 1);
3572
3573 if (!host_integerp (arg1, 0))
3574 return false;
3575
3576 *exponent = TREE_INT_CST_LOW (arg1);
3577 break;
3578
3579 default:
3580 return false;
3581 }
3582
3583 /* Expanding negative exponents is generally unproductive, so we don't
3584 complicate matters with those. Exponents of zero and one should
3585 have been handled by expression folding. */
3586 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3587 return false;
3588
3589 return true;
3590 }
3591
3592 /* Recursively linearize a binary expression that is the RHS of STMT.
3593 Place the operands of the expression tree in the vector named OPS. */
3594
3595 static void
3596 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3597 bool is_associative, bool set_visited)
3598 {
3599 tree binlhs = gimple_assign_rhs1 (stmt);
3600 tree binrhs = gimple_assign_rhs2 (stmt);
3601 gimple binlhsdef = NULL, binrhsdef = NULL;
3602 bool binlhsisreassoc = false;
3603 bool binrhsisreassoc = false;
3604 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3605 struct loop *loop = loop_containing_stmt (stmt);
3606 tree base = NULL_TREE;
3607 HOST_WIDE_INT exponent = 0;
3608
3609 if (set_visited)
3610 gimple_set_visited (stmt, true);
3611
3612 if (TREE_CODE (binlhs) == SSA_NAME)
3613 {
3614 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3615 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3616 && !stmt_could_throw_p (binlhsdef));
3617 }
3618
3619 if (TREE_CODE (binrhs) == SSA_NAME)
3620 {
3621 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3622 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3623 && !stmt_could_throw_p (binrhsdef));
3624 }
3625
3626 /* If the LHS is not reassociable, but the RHS is, we need to swap
3627 them. If neither is reassociable, there is nothing we can do, so
3628 just put them in the ops vector. If the LHS is reassociable,
3629 linearize it. If both are reassociable, then linearize the RHS
3630 and the LHS. */
3631
3632 if (!binlhsisreassoc)
3633 {
3634 tree temp;
3635
3636 /* If this is not a associative operation like division, give up. */
3637 if (!is_associative)
3638 {
3639 add_to_ops_vec (ops, binrhs);
3640 return;
3641 }
3642
3643 if (!binrhsisreassoc)
3644 {
3645 if (rhscode == MULT_EXPR
3646 && TREE_CODE (binrhs) == SSA_NAME
3647 && acceptable_pow_call (binrhsdef, &base, &exponent))
3648 {
3649 add_repeat_to_ops_vec (ops, base, exponent);
3650 gimple_set_visited (binrhsdef, true);
3651 }
3652 else
3653 add_to_ops_vec (ops, binrhs);
3654
3655 if (rhscode == MULT_EXPR
3656 && TREE_CODE (binlhs) == SSA_NAME
3657 && acceptable_pow_call (binlhsdef, &base, &exponent))
3658 {
3659 add_repeat_to_ops_vec (ops, base, exponent);
3660 gimple_set_visited (binlhsdef, true);
3661 }
3662 else
3663 add_to_ops_vec (ops, binlhs);
3664
3665 return;
3666 }
3667
3668 if (dump_file && (dump_flags & TDF_DETAILS))
3669 {
3670 fprintf (dump_file, "swapping operands of ");
3671 print_gimple_stmt (dump_file, stmt, 0, 0);
3672 }
3673
3674 swap_ssa_operands (stmt,
3675 gimple_assign_rhs1_ptr (stmt),
3676 gimple_assign_rhs2_ptr (stmt));
3677 update_stmt (stmt);
3678
3679 if (dump_file && (dump_flags & TDF_DETAILS))
3680 {
3681 fprintf (dump_file, " is now ");
3682 print_gimple_stmt (dump_file, stmt, 0, 0);
3683 }
3684
3685 /* We want to make it so the lhs is always the reassociative op,
3686 so swap. */
3687 temp = binlhs;
3688 binlhs = binrhs;
3689 binrhs = temp;
3690 }
3691 else if (binrhsisreassoc)
3692 {
3693 linearize_expr (stmt);
3694 binlhs = gimple_assign_rhs1 (stmt);
3695 binrhs = gimple_assign_rhs2 (stmt);
3696 }
3697
3698 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3699 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3700 rhscode, loop));
3701 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3702 is_associative, set_visited);
3703
3704 if (rhscode == MULT_EXPR
3705 && TREE_CODE (binrhs) == SSA_NAME
3706 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3707 {
3708 add_repeat_to_ops_vec (ops, base, exponent);
3709 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3710 }
3711 else
3712 add_to_ops_vec (ops, binrhs);
3713 }
3714
3715 /* Repropagate the negates back into subtracts, since no other pass
3716 currently does it. */
3717
3718 static void
3719 repropagate_negates (void)
3720 {
3721 unsigned int i = 0;
3722 tree negate;
3723
3724 FOR_EACH_VEC_ELT (plus_negates, i, negate)
3725 {
3726 gimple user = get_single_immediate_use (negate);
3727
3728 if (!user || !is_gimple_assign (user))
3729 continue;
3730
3731 /* The negate operand can be either operand of a PLUS_EXPR
3732 (it can be the LHS if the RHS is a constant for example).
3733
3734 Force the negate operand to the RHS of the PLUS_EXPR, then
3735 transform the PLUS_EXPR into a MINUS_EXPR. */
3736 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3737 {
3738 /* If the negated operand appears on the LHS of the
3739 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3740 to force the negated operand to the RHS of the PLUS_EXPR. */
3741 if (gimple_assign_rhs1 (user) == negate)
3742 {
3743 swap_ssa_operands (user,
3744 gimple_assign_rhs1_ptr (user),
3745 gimple_assign_rhs2_ptr (user));
3746 }
3747
3748 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3749 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3750 if (gimple_assign_rhs2 (user) == negate)
3751 {
3752 tree rhs1 = gimple_assign_rhs1 (user);
3753 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3754 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3755 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3756 update_stmt (user);
3757 }
3758 }
3759 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3760 {
3761 if (gimple_assign_rhs1 (user) == negate)
3762 {
3763 /* We have
3764 x = -a
3765 y = x - b
3766 which we transform into
3767 x = a + b
3768 y = -x .
3769 This pushes down the negate which we possibly can merge
3770 into some other operation, hence insert it into the
3771 plus_negates vector. */
3772 gimple feed = SSA_NAME_DEF_STMT (negate);
3773 tree a = gimple_assign_rhs1 (feed);
3774 tree rhs2 = gimple_assign_rhs2 (user);
3775 gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
3776 gimple_replace_ssa_lhs (feed, negate);
3777 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
3778 update_stmt (gsi_stmt (gsi));
3779 gsi2 = gsi_for_stmt (user);
3780 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
3781 update_stmt (gsi_stmt (gsi2));
3782 gsi_move_before (&gsi, &gsi2);
3783 plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2)));
3784 }
3785 else
3786 {
3787 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3788 rid of one operation. */
3789 gimple feed = SSA_NAME_DEF_STMT (negate);
3790 tree a = gimple_assign_rhs1 (feed);
3791 tree rhs1 = gimple_assign_rhs1 (user);
3792 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3793 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3794 update_stmt (gsi_stmt (gsi));
3795 }
3796 }
3797 }
3798 }
3799
3800 /* Returns true if OP is of a type for which we can do reassociation.
3801 That is for integral or non-saturating fixed-point types, and for
3802 floating point type when associative-math is enabled. */
3803
3804 static bool
3805 can_reassociate_p (tree op)
3806 {
3807 tree type = TREE_TYPE (op);
3808 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3809 || NON_SAT_FIXED_POINT_TYPE_P (type)
3810 || (flag_associative_math && FLOAT_TYPE_P (type)))
3811 return true;
3812 return false;
3813 }
3814
3815 /* Break up subtract operations in block BB.
3816
3817 We do this top down because we don't know whether the subtract is
3818 part of a possible chain of reassociation except at the top.
3819
3820 IE given
3821 d = f + g
3822 c = a + e
3823 b = c - d
3824 q = b - r
3825 k = t - q
3826
3827 we want to break up k = t - q, but we won't until we've transformed q
3828 = b - r, which won't be broken up until we transform b = c - d.
3829
3830 En passant, clear the GIMPLE visited flag on every statement. */
3831
3832 static void
3833 break_up_subtract_bb (basic_block bb)
3834 {
3835 gimple_stmt_iterator gsi;
3836 basic_block son;
3837
3838 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3839 {
3840 gimple stmt = gsi_stmt (gsi);
3841 gimple_set_visited (stmt, false);
3842
3843 if (!is_gimple_assign (stmt)
3844 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3845 continue;
3846
3847 /* Look for simple gimple subtract operations. */
3848 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3849 {
3850 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3851 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3852 continue;
3853
3854 /* Check for a subtract used only in an addition. If this
3855 is the case, transform it into add of a negate for better
3856 reassociation. IE transform C = A-B into C = A + -B if C
3857 is only used in an addition. */
3858 if (should_break_up_subtract (stmt))
3859 break_up_subtract (stmt, &gsi);
3860 }
3861 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3862 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3863 plus_negates.safe_push (gimple_assign_lhs (stmt));
3864 }
3865 for (son = first_dom_son (CDI_DOMINATORS, bb);
3866 son;
3867 son = next_dom_son (CDI_DOMINATORS, son))
3868 break_up_subtract_bb (son);
3869 }
3870
3871 /* Used for repeated factor analysis. */
3872 struct repeat_factor_d
3873 {
3874 /* An SSA name that occurs in a multiply chain. */
3875 tree factor;
3876
3877 /* Cached rank of the factor. */
3878 unsigned rank;
3879
3880 /* Number of occurrences of the factor in the chain. */
3881 HOST_WIDE_INT count;
3882
3883 /* An SSA name representing the product of this factor and
3884 all factors appearing later in the repeated factor vector. */
3885 tree repr;
3886 };
3887
3888 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3889 typedef const struct repeat_factor_d *const_repeat_factor_t;
3890
3891
3892 static vec<repeat_factor> repeat_factor_vec;
3893
3894 /* Used for sorting the repeat factor vector. Sort primarily by
3895 ascending occurrence count, secondarily by descending rank. */
3896
3897 static int
3898 compare_repeat_factors (const void *x1, const void *x2)
3899 {
3900 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3901 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3902
3903 if (rf1->count != rf2->count)
3904 return rf1->count - rf2->count;
3905
3906 return rf2->rank - rf1->rank;
3907 }
3908
3909 /* Look for repeated operands in OPS in the multiply tree rooted at
3910 STMT. Replace them with an optimal sequence of multiplies and powi
3911 builtin calls, and remove the used operands from OPS. Return an
3912 SSA name representing the value of the replacement sequence. */
3913
3914 static tree
3915 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3916 {
3917 unsigned i, j, vec_len;
3918 int ii;
3919 operand_entry_t oe;
3920 repeat_factor_t rf1, rf2;
3921 repeat_factor rfnew;
3922 tree result = NULL_TREE;
3923 tree target_ssa, iter_result;
3924 tree type = TREE_TYPE (gimple_get_lhs (stmt));
3925 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3926 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3927 gimple mul_stmt, pow_stmt;
3928
3929 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3930 target. */
3931 if (!powi_fndecl)
3932 return NULL_TREE;
3933
3934 /* Allocate the repeated factor vector. */
3935 repeat_factor_vec.create (10);
3936
3937 /* Scan the OPS vector for all SSA names in the product and build
3938 up a vector of occurrence counts for each factor. */
3939 FOR_EACH_VEC_ELT (*ops, i, oe)
3940 {
3941 if (TREE_CODE (oe->op) == SSA_NAME)
3942 {
3943 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3944 {
3945 if (rf1->factor == oe->op)
3946 {
3947 rf1->count += oe->count;
3948 break;
3949 }
3950 }
3951
3952 if (j >= repeat_factor_vec.length ())
3953 {
3954 rfnew.factor = oe->op;
3955 rfnew.rank = oe->rank;
3956 rfnew.count = oe->count;
3957 rfnew.repr = NULL_TREE;
3958 repeat_factor_vec.safe_push (rfnew);
3959 }
3960 }
3961 }
3962
3963 /* Sort the repeated factor vector by (a) increasing occurrence count,
3964 and (b) decreasing rank. */
3965 repeat_factor_vec.qsort (compare_repeat_factors);
3966
3967 /* It is generally best to combine as many base factors as possible
3968 into a product before applying __builtin_powi to the result.
3969 However, the sort order chosen for the repeated factor vector
3970 allows us to cache partial results for the product of the base
3971 factors for subsequent use. When we already have a cached partial
3972 result from a previous iteration, it is best to make use of it
3973 before looking for another __builtin_pow opportunity.
3974
3975 As an example, consider x * x * y * y * y * z * z * z * z.
3976 We want to first compose the product x * y * z, raise it to the
3977 second power, then multiply this by y * z, and finally multiply
3978 by z. This can be done in 5 multiplies provided we cache y * z
3979 for use in both expressions:
3980
3981 t1 = y * z
3982 t2 = t1 * x
3983 t3 = t2 * t2
3984 t4 = t1 * t3
3985 result = t4 * z
3986
3987 If we instead ignored the cached y * z and first multiplied by
3988 the __builtin_pow opportunity z * z, we would get the inferior:
3989
3990 t1 = y * z
3991 t2 = t1 * x
3992 t3 = t2 * t2
3993 t4 = z * z
3994 t5 = t3 * t4
3995 result = t5 * y */
3996
3997 vec_len = repeat_factor_vec.length ();
3998
3999 /* Repeatedly look for opportunities to create a builtin_powi call. */
4000 while (true)
4001 {
4002 HOST_WIDE_INT power;
4003
4004 /* First look for the largest cached product of factors from
4005 preceding iterations. If found, create a builtin_powi for
4006 it if the minimum occurrence count for its factors is at
4007 least 2, or just use this cached product as our next
4008 multiplicand if the minimum occurrence count is 1. */
4009 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4010 {
4011 if (rf1->repr && rf1->count > 0)
4012 break;
4013 }
4014
4015 if (j < vec_len)
4016 {
4017 power = rf1->count;
4018
4019 if (power == 1)
4020 {
4021 iter_result = rf1->repr;
4022
4023 if (dump_file && (dump_flags & TDF_DETAILS))
4024 {
4025 unsigned elt;
4026 repeat_factor_t rf;
4027 fputs ("Multiplying by cached product ", dump_file);
4028 for (elt = j; elt < vec_len; elt++)
4029 {
4030 rf = &repeat_factor_vec[elt];
4031 print_generic_expr (dump_file, rf->factor, 0);
4032 if (elt < vec_len - 1)
4033 fputs (" * ", dump_file);
4034 }
4035 fputs ("\n", dump_file);
4036 }
4037 }
4038 else
4039 {
4040 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4041 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4042 build_int_cst (integer_type_node,
4043 power));
4044 gimple_call_set_lhs (pow_stmt, iter_result);
4045 gimple_set_location (pow_stmt, gimple_location (stmt));
4046 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4047
4048 if (dump_file && (dump_flags & TDF_DETAILS))
4049 {
4050 unsigned elt;
4051 repeat_factor_t rf;
4052 fputs ("Building __builtin_pow call for cached product (",
4053 dump_file);
4054 for (elt = j; elt < vec_len; elt++)
4055 {
4056 rf = &repeat_factor_vec[elt];
4057 print_generic_expr (dump_file, rf->factor, 0);
4058 if (elt < vec_len - 1)
4059 fputs (" * ", dump_file);
4060 }
4061 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4062 power);
4063 }
4064 }
4065 }
4066 else
4067 {
4068 /* Otherwise, find the first factor in the repeated factor
4069 vector whose occurrence count is at least 2. If no such
4070 factor exists, there are no builtin_powi opportunities
4071 remaining. */
4072 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4073 {
4074 if (rf1->count >= 2)
4075 break;
4076 }
4077
4078 if (j >= vec_len)
4079 break;
4080
4081 power = rf1->count;
4082
4083 if (dump_file && (dump_flags & TDF_DETAILS))
4084 {
4085 unsigned elt;
4086 repeat_factor_t rf;
4087 fputs ("Building __builtin_pow call for (", dump_file);
4088 for (elt = j; elt < vec_len; elt++)
4089 {
4090 rf = &repeat_factor_vec[elt];
4091 print_generic_expr (dump_file, rf->factor, 0);
4092 if (elt < vec_len - 1)
4093 fputs (" * ", dump_file);
4094 }
4095 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4096 }
4097
4098 reassociate_stats.pows_created++;
4099
4100 /* Visit each element of the vector in reverse order (so that
4101 high-occurrence elements are visited first, and within the
4102 same occurrence count, lower-ranked elements are visited
4103 first). Form a linear product of all elements in this order
4104 whose occurrencce count is at least that of element J.
4105 Record the SSA name representing the product of each element
4106 with all subsequent elements in the vector. */
4107 if (j == vec_len - 1)
4108 rf1->repr = rf1->factor;
4109 else
4110 {
4111 for (ii = vec_len - 2; ii >= (int)j; ii--)
4112 {
4113 tree op1, op2;
4114
4115 rf1 = &repeat_factor_vec[ii];
4116 rf2 = &repeat_factor_vec[ii + 1];
4117
4118 /* Init the last factor's representative to be itself. */
4119 if (!rf2->repr)
4120 rf2->repr = rf2->factor;
4121
4122 op1 = rf1->factor;
4123 op2 = rf2->repr;
4124
4125 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4126 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4127 target_ssa,
4128 op1, op2);
4129 gimple_set_location (mul_stmt, gimple_location (stmt));
4130 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4131 rf1->repr = target_ssa;
4132
4133 /* Don't reprocess the multiply we just introduced. */
4134 gimple_set_visited (mul_stmt, true);
4135 }
4136 }
4137
4138 /* Form a call to __builtin_powi for the maximum product
4139 just formed, raised to the power obtained earlier. */
4140 rf1 = &repeat_factor_vec[j];
4141 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4142 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4143 build_int_cst (integer_type_node,
4144 power));
4145 gimple_call_set_lhs (pow_stmt, iter_result);
4146 gimple_set_location (pow_stmt, gimple_location (stmt));
4147 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4148 }
4149
4150 /* If we previously formed at least one other builtin_powi call,
4151 form the product of this one and those others. */
4152 if (result)
4153 {
4154 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4155 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4156 result, iter_result);
4157 gimple_set_location (mul_stmt, gimple_location (stmt));
4158 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4159 gimple_set_visited (mul_stmt, true);
4160 result = new_result;
4161 }
4162 else
4163 result = iter_result;
4164
4165 /* Decrement the occurrence count of each element in the product
4166 by the count found above, and remove this many copies of each
4167 factor from OPS. */
4168 for (i = j; i < vec_len; i++)
4169 {
4170 unsigned k = power;
4171 unsigned n;
4172
4173 rf1 = &repeat_factor_vec[i];
4174 rf1->count -= power;
4175
4176 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4177 {
4178 if (oe->op == rf1->factor)
4179 {
4180 if (oe->count <= k)
4181 {
4182 ops->ordered_remove (n);
4183 k -= oe->count;
4184
4185 if (k == 0)
4186 break;
4187 }
4188 else
4189 {
4190 oe->count -= k;
4191 break;
4192 }
4193 }
4194 }
4195 }
4196 }
4197
4198 /* At this point all elements in the repeated factor vector have a
4199 remaining occurrence count of 0 or 1, and those with a count of 1
4200 don't have cached representatives. Re-sort the ops vector and
4201 clean up. */
4202 ops->qsort (sort_by_operand_rank);
4203 repeat_factor_vec.release ();
4204
4205 /* Return the final product computed herein. Note that there may
4206 still be some elements with single occurrence count left in OPS;
4207 those will be handled by the normal reassociation logic. */
4208 return result;
4209 }
4210
4211 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4212
4213 static void
4214 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4215 {
4216 tree rhs1;
4217
4218 if (dump_file && (dump_flags & TDF_DETAILS))
4219 {
4220 fprintf (dump_file, "Transforming ");
4221 print_gimple_stmt (dump_file, stmt, 0, 0);
4222 }
4223
4224 rhs1 = gimple_assign_rhs1 (stmt);
4225 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4226 update_stmt (stmt);
4227 remove_visited_stmt_chain (rhs1);
4228
4229 if (dump_file && (dump_flags & TDF_DETAILS))
4230 {
4231 fprintf (dump_file, " into ");
4232 print_gimple_stmt (dump_file, stmt, 0, 0);
4233 }
4234 }
4235
4236 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4237
4238 static void
4239 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4240 tree rhs1, tree rhs2)
4241 {
4242 if (dump_file && (dump_flags & TDF_DETAILS))
4243 {
4244 fprintf (dump_file, "Transforming ");
4245 print_gimple_stmt (dump_file, stmt, 0, 0);
4246 }
4247
4248 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4249 update_stmt (gsi_stmt (*gsi));
4250 remove_visited_stmt_chain (rhs1);
4251
4252 if (dump_file && (dump_flags & TDF_DETAILS))
4253 {
4254 fprintf (dump_file, " into ");
4255 print_gimple_stmt (dump_file, stmt, 0, 0);
4256 }
4257 }
4258
4259 /* Reassociate expressions in basic block BB and its post-dominator as
4260 children. */
4261
4262 static void
4263 reassociate_bb (basic_block bb)
4264 {
4265 gimple_stmt_iterator gsi;
4266 basic_block son;
4267 gimple stmt = last_stmt (bb);
4268
4269 if (stmt && !gimple_visited_p (stmt))
4270 maybe_optimize_range_tests (stmt);
4271
4272 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4273 {
4274 stmt = gsi_stmt (gsi);
4275
4276 if (is_gimple_assign (stmt)
4277 && !stmt_could_throw_p (stmt))
4278 {
4279 tree lhs, rhs1, rhs2;
4280 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4281
4282 /* If this is not a gimple binary expression, there is
4283 nothing for us to do with it. */
4284 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4285 continue;
4286
4287 /* If this was part of an already processed statement,
4288 we don't need to touch it again. */
4289 if (gimple_visited_p (stmt))
4290 {
4291 /* This statement might have become dead because of previous
4292 reassociations. */
4293 if (has_zero_uses (gimple_get_lhs (stmt)))
4294 {
4295 gsi_remove (&gsi, true);
4296 release_defs (stmt);
4297 /* We might end up removing the last stmt above which
4298 places the iterator to the end of the sequence.
4299 Reset it to the last stmt in this case which might
4300 be the end of the sequence as well if we removed
4301 the last statement of the sequence. In which case
4302 we need to bail out. */
4303 if (gsi_end_p (gsi))
4304 {
4305 gsi = gsi_last_bb (bb);
4306 if (gsi_end_p (gsi))
4307 break;
4308 }
4309 }
4310 continue;
4311 }
4312
4313 lhs = gimple_assign_lhs (stmt);
4314 rhs1 = gimple_assign_rhs1 (stmt);
4315 rhs2 = gimple_assign_rhs2 (stmt);
4316
4317 /* For non-bit or min/max operations we can't associate
4318 all types. Verify that here. */
4319 if (rhs_code != BIT_IOR_EXPR
4320 && rhs_code != BIT_AND_EXPR
4321 && rhs_code != BIT_XOR_EXPR
4322 && rhs_code != MIN_EXPR
4323 && rhs_code != MAX_EXPR
4324 && (!can_reassociate_p (lhs)
4325 || !can_reassociate_p (rhs1)
4326 || !can_reassociate_p (rhs2)))
4327 continue;
4328
4329 if (associative_tree_code (rhs_code))
4330 {
4331 vec<operand_entry_t> ops = vNULL;
4332 tree powi_result = NULL_TREE;
4333
4334 /* There may be no immediate uses left by the time we
4335 get here because we may have eliminated them all. */
4336 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4337 continue;
4338
4339 gimple_set_visited (stmt, true);
4340 linearize_expr_tree (&ops, stmt, true, true);
4341 ops.qsort (sort_by_operand_rank);
4342 optimize_ops_list (rhs_code, &ops);
4343 if (undistribute_ops_list (rhs_code, &ops,
4344 loop_containing_stmt (stmt)))
4345 {
4346 ops.qsort (sort_by_operand_rank);
4347 optimize_ops_list (rhs_code, &ops);
4348 }
4349
4350 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4351 optimize_range_tests (rhs_code, &ops);
4352
4353 if (first_pass_instance
4354 && rhs_code == MULT_EXPR
4355 && flag_unsafe_math_optimizations)
4356 powi_result = attempt_builtin_powi (stmt, &ops);
4357
4358 /* If the operand vector is now empty, all operands were
4359 consumed by the __builtin_powi optimization. */
4360 if (ops.length () == 0)
4361 transform_stmt_to_copy (&gsi, stmt, powi_result);
4362 else if (ops.length () == 1)
4363 {
4364 tree last_op = ops.last ()->op;
4365
4366 if (powi_result)
4367 transform_stmt_to_multiply (&gsi, stmt, last_op,
4368 powi_result);
4369 else
4370 transform_stmt_to_copy (&gsi, stmt, last_op);
4371 }
4372 else
4373 {
4374 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4375 int ops_num = ops.length ();
4376 int width = get_reassociation_width (ops_num, rhs_code, mode);
4377
4378 if (dump_file && (dump_flags & TDF_DETAILS))
4379 fprintf (dump_file,
4380 "Width = %d was chosen for reassociation\n", width);
4381
4382 if (width > 1
4383 && ops.length () > 3)
4384 rewrite_expr_tree_parallel (stmt, width, ops);
4385 else
4386 {
4387 /* When there are three operands left, we want
4388 to make sure the ones that get the double
4389 binary op are chosen wisely. */
4390 int len = ops.length ();
4391 if (len >= 3)
4392 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4393
4394 rewrite_expr_tree (stmt, 0, ops, false);
4395 }
4396
4397 /* If we combined some repeated factors into a
4398 __builtin_powi call, multiply that result by the
4399 reassociated operands. */
4400 if (powi_result)
4401 {
4402 gimple mul_stmt;
4403 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4404 tree target_ssa = make_temp_ssa_name (type, NULL,
4405 "reassocpow");
4406 gimple_set_lhs (stmt, target_ssa);
4407 update_stmt (stmt);
4408 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4409 powi_result,
4410 target_ssa);
4411 gimple_set_location (mul_stmt, gimple_location (stmt));
4412 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4413 }
4414 }
4415
4416 ops.release ();
4417 }
4418 }
4419 }
4420 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4421 son;
4422 son = next_dom_son (CDI_POST_DOMINATORS, son))
4423 reassociate_bb (son);
4424 }
4425
4426 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4427 void debug_ops_vector (vec<operand_entry_t> ops);
4428
4429 /* Dump the operand entry vector OPS to FILE. */
4430
4431 void
4432 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4433 {
4434 operand_entry_t oe;
4435 unsigned int i;
4436
4437 FOR_EACH_VEC_ELT (ops, i, oe)
4438 {
4439 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4440 print_generic_expr (file, oe->op, 0);
4441 }
4442 }
4443
4444 /* Dump the operand entry vector OPS to STDERR. */
4445
4446 DEBUG_FUNCTION void
4447 debug_ops_vector (vec<operand_entry_t> ops)
4448 {
4449 dump_ops_vector (stderr, ops);
4450 }
4451
4452 static void
4453 do_reassoc (void)
4454 {
4455 break_up_subtract_bb (ENTRY_BLOCK_PTR);
4456 renumber_gimple_stmt_uids ();
4457 reassociate_bb (EXIT_BLOCK_PTR);
4458 }
4459
4460 /* Initialize the reassociation pass. */
4461
4462 static void
4463 init_reassoc (void)
4464 {
4465 int i;
4466 long rank = 2;
4467 int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4468
4469 /* Find the loops, so that we can prevent moving calculations in
4470 them. */
4471 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4472
4473 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4474
4475 operand_entry_pool = create_alloc_pool ("operand entry pool",
4476 sizeof (struct operand_entry), 30);
4477 next_operand_entry_id = 0;
4478
4479 /* Reverse RPO (Reverse Post Order) will give us something where
4480 deeper loops come later. */
4481 pre_and_rev_post_order_compute (NULL, bbs, false);
4482 bb_rank = XCNEWVEC (long, last_basic_block);
4483 operand_rank = pointer_map_create ();
4484
4485 /* Give each default definition a distinct rank. This includes
4486 parameters and the static chain. Walk backwards over all
4487 SSA names so that we get proper rank ordering according
4488 to tree_swap_operands_p. */
4489 for (i = num_ssa_names - 1; i > 0; --i)
4490 {
4491 tree name = ssa_name (i);
4492 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4493 insert_operand_rank (name, ++rank);
4494 }
4495
4496 /* Set up rank for each BB */
4497 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4498 bb_rank[bbs[i]] = ++rank << 16;
4499
4500 free (bbs);
4501 calculate_dominance_info (CDI_POST_DOMINATORS);
4502 plus_negates = vNULL;
4503 }
4504
4505 /* Cleanup after the reassociation pass, and print stats if
4506 requested. */
4507
4508 static void
4509 fini_reassoc (void)
4510 {
4511 statistics_counter_event (cfun, "Linearized",
4512 reassociate_stats.linearized);
4513 statistics_counter_event (cfun, "Constants eliminated",
4514 reassociate_stats.constants_eliminated);
4515 statistics_counter_event (cfun, "Ops eliminated",
4516 reassociate_stats.ops_eliminated);
4517 statistics_counter_event (cfun, "Statements rewritten",
4518 reassociate_stats.rewritten);
4519 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4520 reassociate_stats.pows_encountered);
4521 statistics_counter_event (cfun, "Built-in powi calls created",
4522 reassociate_stats.pows_created);
4523
4524 pointer_map_destroy (operand_rank);
4525 free_alloc_pool (operand_entry_pool);
4526 free (bb_rank);
4527 plus_negates.release ();
4528 free_dominance_info (CDI_POST_DOMINATORS);
4529 loop_optimizer_finalize ();
4530 }
4531
4532 /* Gate and execute functions for Reassociation. */
4533
4534 static unsigned int
4535 execute_reassoc (void)
4536 {
4537 init_reassoc ();
4538
4539 do_reassoc ();
4540 repropagate_negates ();
4541
4542 fini_reassoc ();
4543 return 0;
4544 }
4545
4546 static bool
4547 gate_tree_ssa_reassoc (void)
4548 {
4549 return flag_tree_reassoc != 0;
4550 }
4551
4552 namespace {
4553
4554 const pass_data pass_data_reassoc =
4555 {
4556 GIMPLE_PASS, /* type */
4557 "reassoc", /* name */
4558 OPTGROUP_NONE, /* optinfo_flags */
4559 true, /* has_gate */
4560 true, /* has_execute */
4561 TV_TREE_REASSOC, /* tv_id */
4562 ( PROP_cfg | PROP_ssa ), /* properties_required */
4563 0, /* properties_provided */
4564 0, /* properties_destroyed */
4565 0, /* todo_flags_start */
4566 ( TODO_verify_ssa
4567 | TODO_update_ssa_only_virtuals
4568 | TODO_verify_flow ), /* todo_flags_finish */
4569 };
4570
4571 class pass_reassoc : public gimple_opt_pass
4572 {
4573 public:
4574 pass_reassoc (gcc::context *ctxt)
4575 : gimple_opt_pass (pass_data_reassoc, ctxt)
4576 {}
4577
4578 /* opt_pass methods: */
4579 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4580 bool gate () { return gate_tree_ssa_reassoc (); }
4581 unsigned int execute () { return execute_reassoc (); }
4582
4583 }; // class pass_reassoc
4584
4585 } // anon namespace
4586
4587 gimple_opt_pass *
4588 make_pass_reassoc (gcc::context *ctxt)
4589 {
4590 return new pass_reassoc (ctxt);
4591 }