]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-reassoc.c
Factor unrelated declarations out of tree.h.
[thirdparty/gcc.git] / gcc / tree-ssa-reassoc.c
CommitLineData
3dec5460 1/* Reassociation for trees.
711789cc 2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
3dec5460 3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
8c4c00c1 9the Free Software Foundation; either version 3, or (at your option)
3dec5460 10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
8c4c00c1 18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
3dec5460 20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
3e871d4d 24#include "hash-table.h"
3dec5460 25#include "tm.h"
b0c0c879 26#include "rtl.h"
27#include "tm_p.h"
3dec5460 28#include "tree.h"
9ed99284 29#include "stor-layout.h"
3dec5460 30#include "basic-block.h"
ce084dfc 31#include "gimple-pretty-print.h"
3dec5460 32#include "tree-inline.h"
e795d6e1 33#include "gimple.h"
dcf1a1ec 34#include "gimple-iterator.h"
e795d6e1 35#include "gimplify-me.h"
073c1fd5 36#include "gimple-ssa.h"
37#include "tree-cfg.h"
38#include "tree-phinodes.h"
39#include "ssa-iterators.h"
9ed99284 40#include "stringpool.h"
073c1fd5 41#include "tree-ssanames.h"
05d9c18a 42#include "tree-ssa-loop-niter.h"
073c1fd5 43#include "tree-ssa-loop.h"
9ed99284 44#include "expr.h"
073c1fd5 45#include "tree-dfa.h"
46#include "tree-ssa.h"
3dec5460 47#include "tree-iterator.h"
48#include "tree-pass.h"
54aceb26 49#include "alloc-pool.h"
50#include "vec.h"
51#include "langhooks.h"
b30a8715 52#include "pointer-set.h"
a4c3fb95 53#include "cfgloop.h"
46ef5347 54#include "flags.h"
5b1c765d 55#include "target.h"
56#include "params.h"
946e9eb4 57#include "diagnostic-core.h"
3dec5460 58
54aceb26 59/* This is a simple global reassociation pass. It is, in part, based
60 on the LLVM pass of the same name (They do some things more/less
61 than we do, in different orders, etc).
3dec5460 62
54aceb26 63 It consists of five steps:
3dec5460 64
54aceb26 65 1. Breaking up subtract operations into addition + negate, where
66 it would promote the reassociation of adds.
3dec5460 67
54aceb26 68 2. Left linearization of the expression trees, so that (A+B)+(C+D)
69 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
70 During linearization, we place the operands of the binary
71 expressions into a vector of operand_entry_t
3dec5460 72
54aceb26 73 3. Optimization of the operand lists, eliminating things like a +
74 -a, a & a, etc.
3dec5460 75
8c5ac7f6 76 3a. Combine repeated factors with the same occurrence counts
77 into a __builtin_powi call that will later be optimized into
78 an optimal number of multiplies.
79
54aceb26 80 4. Rewrite the expression trees we linearized and optimized so
81 they are in proper rank order.
3dec5460 82
54aceb26 83 5. Repropagate negates, as nothing else will clean it up ATM.
3dec5460 84
54aceb26 85 A bit of theory on #4, since nobody seems to write anything down
86 about why it makes sense to do it the way they do it:
3dec5460 87
54aceb26 88 We could do this much nicer theoretically, but don't (for reasons
89 explained after how to do it theoretically nice :P).
3dec5460 90
54aceb26 91 In order to promote the most redundancy elimination, you want
92 binary expressions whose operands are the same rank (or
191ec5a2 93 preferably, the same value) exposed to the redundancy eliminator,
54aceb26 94 for possible elimination.
3dec5460 95
54aceb26 96 So the way to do this if we really cared, is to build the new op
97 tree from the leaves to the roots, merging as you go, and putting the
98 new op on the end of the worklist, until you are left with one
99 thing on the worklist.
3dec5460 100
54aceb26 101 IE if you have to rewrite the following set of operands (listed with
102 rank in parentheses), with opcode PLUS_EXPR:
3dec5460 103
54aceb26 104 a (1), b (1), c (1), d (2), e (2)
3dec5460 105
3dec5460 106
54aceb26 107 We start with our merge worklist empty, and the ops list with all of
108 those on it.
3dec5460 109
54aceb26 110 You want to first merge all leaves of the same rank, as much as
111 possible.
112
113 So first build a binary op of
114
115 mergetmp = a + b, and put "mergetmp" on the merge worklist.
116
117 Because there is no three operand form of PLUS_EXPR, c is not going to
118 be exposed to redundancy elimination as a rank 1 operand.
119
120 So you might as well throw it on the merge worklist (you could also
121 consider it to now be a rank two operand, and merge it with d and e,
122 but in this case, you then have evicted e from a binary op. So at
123 least in this situation, you can't win.)
124
125 Then build a binary op of d + e
126 mergetmp2 = d + e
127
128 and put mergetmp2 on the merge worklist.
48e1416a 129
54aceb26 130 so merge worklist = {mergetmp, c, mergetmp2}
48e1416a 131
54aceb26 132 Continue building binary ops of these operations until you have only
133 one operation left on the worklist.
48e1416a 134
54aceb26 135 So we have
48e1416a 136
54aceb26 137 build binary op
138 mergetmp3 = mergetmp + c
48e1416a 139
54aceb26 140 worklist = {mergetmp2, mergetmp3}
48e1416a 141
54aceb26 142 mergetmp4 = mergetmp2 + mergetmp3
48e1416a 143
54aceb26 144 worklist = {mergetmp4}
48e1416a 145
54aceb26 146 because we have one operation left, we can now just set the original
147 statement equal to the result of that operation.
48e1416a 148
54aceb26 149 This will at least expose a + b and d + e to redundancy elimination
150 as binary operations.
48e1416a 151
54aceb26 152 For extra points, you can reuse the old statements to build the
153 mergetmps, since you shouldn't run out.
154
155 So why don't we do this?
48e1416a 156
54aceb26 157 Because it's expensive, and rarely will help. Most trees we are
158 reassociating have 3 or less ops. If they have 2 ops, they already
159 will be written into a nice single binary op. If you have 3 ops, a
160 single simple check suffices to tell you whether the first two are of the
161 same rank. If so, you know to order it
162
163 mergetmp = op1 + op2
164 newstmt = mergetmp + op3
48e1416a 165
54aceb26 166 instead of
167 mergetmp = op2 + op3
168 newstmt = mergetmp + op1
48e1416a 169
54aceb26 170 If all three are of the same rank, you can't expose them all in a
171 single binary operator anyway, so the above is *still* the best you
172 can do.
48e1416a 173
54aceb26 174 Thus, this is what we do. When we have three ops left, we check to see
175 what order to put them in, and call it a day. As a nod to vector sum
2d97a789 176 reduction, we check if any of the ops are really a phi node that is a
54aceb26 177 destructive update for the associating op, and keep the destructive
178 update together for vector sum reduction recognition. */
3dec5460 179
3dec5460 180
54aceb26 181/* Statistics */
182static struct
183{
184 int linearized;
185 int constants_eliminated;
186 int ops_eliminated;
187 int rewritten;
8c5ac7f6 188 int pows_encountered;
189 int pows_created;
54aceb26 190} reassociate_stats;
3dec5460 191
54aceb26 192/* Operator, rank pair. */
193typedef struct operand_entry
3dec5460 194{
54aceb26 195 unsigned int rank;
17b5ea6f 196 int id;
54aceb26 197 tree op;
8c5ac7f6 198 unsigned int count;
54aceb26 199} *operand_entry_t;
200
201static alloc_pool operand_entry_pool;
202
17b5ea6f 203/* This is used to assign a unique ID to each struct operand_entry
204 so that qsort results are identical on different hosts. */
205static int next_operand_entry_id;
3dec5460 206
207/* Starting rank number for a given basic block, so that we can rank
208 operations using unmovable instructions in that BB based on the bb
209 depth. */
b30a8715 210static long *bb_rank;
3dec5460 211
54aceb26 212/* Operand->rank hashtable. */
b30a8715 213static struct pointer_map_t *operand_rank;
3dec5460 214
b248bf30 215/* Forward decls. */
216static long get_rank (tree);
217
218
219/* Bias amount for loop-carried phis. We want this to be larger than
220 the depth of any reassociation tree we can see, but not larger than
221 the rank difference between two blocks. */
222#define PHI_LOOP_BIAS (1 << 15)
223
224/* Rank assigned to a phi statement. If STMT is a loop-carried phi of
225 an innermost loop, and the phi has only a single use which is inside
226 the loop, then the rank is the block rank of the loop latch plus an
227 extra bias for the loop-carried dependence. This causes expressions
228 calculated into an accumulator variable to be independent for each
229 iteration of the loop. If STMT is some other phi, the rank is the
230 block rank of its containing block. */
231static long
232phi_rank (gimple stmt)
233{
234 basic_block bb = gimple_bb (stmt);
235 struct loop *father = bb->loop_father;
236 tree res;
237 unsigned i;
238 use_operand_p use;
239 gimple use_stmt;
240
241 /* We only care about real loops (those with a latch). */
242 if (!father->latch)
243 return bb_rank[bb->index];
244
245 /* Interesting phis must be in headers of innermost loops. */
246 if (bb != father->header
247 || father->inner)
248 return bb_rank[bb->index];
249
250 /* Ignore virtual SSA_NAMEs. */
251 res = gimple_phi_result (stmt);
7c782c9b 252 if (virtual_operand_p (res))
b248bf30 253 return bb_rank[bb->index];
254
255 /* The phi definition must have a single use, and that use must be
256 within the loop. Otherwise this isn't an accumulator pattern. */
257 if (!single_imm_use (res, &use, &use_stmt)
258 || gimple_bb (use_stmt)->loop_father != father)
259 return bb_rank[bb->index];
260
261 /* Look for phi arguments from within the loop. If found, bias this phi. */
262 for (i = 0; i < gimple_phi_num_args (stmt); i++)
263 {
264 tree arg = gimple_phi_arg_def (stmt, i);
265 if (TREE_CODE (arg) == SSA_NAME
266 && !SSA_NAME_IS_DEFAULT_DEF (arg))
267 {
268 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
269 if (gimple_bb (def_stmt)->loop_father == father)
270 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
271 }
272 }
273
274 /* Must be an uninteresting phi. */
275 return bb_rank[bb->index];
276}
277
278/* If EXP is an SSA_NAME defined by a PHI statement that represents a
279 loop-carried dependence of an innermost loop, return TRUE; else
280 return FALSE. */
281static bool
282loop_carried_phi (tree exp)
283{
284 gimple phi_stmt;
285 long block_rank;
286
287 if (TREE_CODE (exp) != SSA_NAME
288 || SSA_NAME_IS_DEFAULT_DEF (exp))
289 return false;
290
291 phi_stmt = SSA_NAME_DEF_STMT (exp);
292
293 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
294 return false;
295
296 /* Non-loop-carried phis have block rank. Loop-carried phis have
297 an additional bias added in. If this phi doesn't have block rank,
298 it's biased and should not be propagated. */
299 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
300
301 if (phi_rank (phi_stmt) != block_rank)
302 return true;
303
304 return false;
305}
306
307/* Return the maximum of RANK and the rank that should be propagated
308 from expression OP. For most operands, this is just the rank of OP.
309 For loop-carried phis, the value is zero to avoid undoing the bias
310 in favor of the phi. */
311static long
312propagate_rank (long rank, tree op)
313{
314 long op_rank;
315
316 if (loop_carried_phi (op))
317 return rank;
318
319 op_rank = get_rank (op);
320
321 return MAX (rank, op_rank);
322}
3dec5460 323
54aceb26 324/* Look up the operand rank structure for expression E. */
3dec5460 325
b30a8715 326static inline long
54aceb26 327find_operand_rank (tree e)
3dec5460 328{
b30a8715 329 void **slot = pointer_map_contains (operand_rank, e);
89fc44d1 330 return slot ? (long) (intptr_t) *slot : -1;
3dec5460 331}
332
54aceb26 333/* Insert {E,RANK} into the operand rank hashtable. */
3dec5460 334
b30a8715 335static inline void
336insert_operand_rank (tree e, long rank)
3dec5460 337{
338 void **slot;
b30a8715 339 gcc_assert (rank > 0);
340 slot = pointer_map_insert (operand_rank, e);
341 gcc_assert (!*slot);
89fc44d1 342 *slot = (void *) (intptr_t) rank;
3dec5460 343}
344
3dec5460 345/* Given an expression E, return the rank of the expression. */
346
b30a8715 347static long
3dec5460 348get_rank (tree e)
349{
54aceb26 350 /* Constants have rank 0. */
3dec5460 351 if (is_gimple_min_invariant (e))
352 return 0;
54aceb26 353
3dec5460 354 /* SSA_NAME's have the rank of the expression they are the result
355 of.
356 For globals and uninitialized values, the rank is 0.
357 For function arguments, use the pre-setup rank.
358 For PHI nodes, stores, asm statements, etc, we use the rank of
359 the BB.
360 For simple operations, the rank is the maximum rank of any of
361 its operands, or the bb_rank, whichever is less.
362 I make no claims that this is optimal, however, it gives good
363 results. */
364
b248bf30 365 /* We make an exception to the normal ranking system to break
366 dependences of accumulator variables in loops. Suppose we
367 have a simple one-block loop containing:
368
369 x_1 = phi(x_0, x_2)
370 b = a + x_1
371 c = b + d
372 x_2 = c + e
373
374 As shown, each iteration of the calculation into x is fully
375 dependent upon the iteration before it. We would prefer to
376 see this in the form:
377
378 x_1 = phi(x_0, x_2)
379 b = a + d
380 c = b + e
381 x_2 = c + x_1
382
383 If the loop is unrolled, the calculations of b and c from
384 different iterations can be interleaved.
385
386 To obtain this result during reassociation, we bias the rank
387 of the phi definition x_1 upward, when it is recognized as an
388 accumulator pattern. The artificial rank causes it to be
389 added last, providing the desired independence. */
390
3dec5460 391 if (TREE_CODE (e) == SSA_NAME)
392 {
75a70cf9 393 gimple stmt;
aa52f48f 394 long rank;
75a70cf9 395 int i, n;
b248bf30 396 tree op;
54aceb26 397
61e371b0 398 if (SSA_NAME_IS_DEFAULT_DEF (e))
b30a8715 399 return find_operand_rank (e);
54aceb26 400
3dec5460 401 stmt = SSA_NAME_DEF_STMT (e);
b248bf30 402 if (gimple_code (stmt) == GIMPLE_PHI)
403 return phi_rank (stmt);
404
75a70cf9 405 if (!is_gimple_assign (stmt)
dd277d48 406 || gimple_vdef (stmt))
75a70cf9 407 return bb_rank[gimple_bb (stmt)->index];
3dec5460 408
409 /* If we already have a rank for this expression, use that. */
b30a8715 410 rank = find_operand_rank (e);
411 if (rank != -1)
412 return rank;
3dec5460 413
b248bf30 414 /* Otherwise, find the maximum rank for the operands. As an
415 exception, remove the bias from loop-carried phis when propagating
416 the rank so that dependent operations are not also biased. */
3dec5460 417 rank = 0;
75a70cf9 418 if (gimple_assign_single_p (stmt))
419 {
420 tree rhs = gimple_assign_rhs1 (stmt);
421 n = TREE_OPERAND_LENGTH (rhs);
422 if (n == 0)
b248bf30 423 rank = propagate_rank (rank, rhs);
75a70cf9 424 else
425 {
aa52f48f 426 for (i = 0; i < n; i++)
b248bf30 427 {
428 op = TREE_OPERAND (rhs, i);
429
430 if (op != NULL_TREE)
431 rank = propagate_rank (rank, op);
432 }
75a70cf9 433 }
434 }
54aceb26 435 else
3dec5460 436 {
75a70cf9 437 n = gimple_num_ops (stmt);
aa52f48f 438 for (i = 1; i < n; i++)
75a70cf9 439 {
b248bf30 440 op = gimple_op (stmt, i);
441 gcc_assert (op);
442 rank = propagate_rank (rank, op);
75a70cf9 443 }
3dec5460 444 }
54aceb26 445
3dec5460 446 if (dump_file && (dump_flags & TDF_DETAILS))
447 {
448 fprintf (dump_file, "Rank for ");
449 print_generic_expr (dump_file, e, 0);
b30a8715 450 fprintf (dump_file, " is %ld\n", (rank + 1));
3dec5460 451 }
54aceb26 452
3dec5460 453 /* Note the rank in the hashtable so we don't recompute it. */
54aceb26 454 insert_operand_rank (e, (rank + 1));
3dec5460 455 return (rank + 1);
456 }
457
458 /* Globals, etc, are rank 0 */
459 return 0;
460}
461
54aceb26 462
463/* We want integer ones to end up last no matter what, since they are
464 the ones we can do the most with. */
465#define INTEGER_CONST_TYPE 1 << 3
466#define FLOAT_CONST_TYPE 1 << 2
467#define OTHER_CONST_TYPE 1 << 1
468
469/* Classify an invariant tree into integer, float, or other, so that
470 we can sort them to be near other constants of the same type. */
471static inline int
472constant_type (tree t)
473{
474 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
475 return INTEGER_CONST_TYPE;
476 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
477 return FLOAT_CONST_TYPE;
478 else
479 return OTHER_CONST_TYPE;
480}
481
482/* qsort comparison function to sort operand entries PA and PB by rank
483 so that the sorted array is ordered by rank in decreasing order. */
484static int
485sort_by_operand_rank (const void *pa, const void *pb)
486{
487 const operand_entry_t oea = *(const operand_entry_t *)pa;
488 const operand_entry_t oeb = *(const operand_entry_t *)pb;
489
490 /* It's nicer for optimize_expression if constants that are likely
491 to fold when added/multiplied//whatever are put next to each
492 other. Since all constants have rank 0, order them by type. */
61e371b0 493 if (oeb->rank == 0 && oea->rank == 0)
17b5ea6f 494 {
495 if (constant_type (oeb->op) != constant_type (oea->op))
496 return constant_type (oeb->op) - constant_type (oea->op);
497 else
498 /* To make sorting result stable, we use unique IDs to determine
499 order. */
500 return oeb->id - oea->id;
501 }
54aceb26 502
503 /* Lastly, make sure the versions that are the same go next to each
504 other. We use SSA_NAME_VERSION because it's stable. */
505 if ((oeb->rank - oea->rank == 0)
506 && TREE_CODE (oea->op) == SSA_NAME
507 && TREE_CODE (oeb->op) == SSA_NAME)
17b5ea6f 508 {
509 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
510 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
511 else
512 return oeb->id - oea->id;
513 }
54aceb26 514
17b5ea6f 515 if (oeb->rank != oea->rank)
516 return oeb->rank - oea->rank;
517 else
518 return oeb->id - oea->id;
54aceb26 519}
520
521/* Add an operand entry to *OPS for the tree operand OP. */
522
523static void
f1f41a6c 524add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
54aceb26 525{
f0d6e81c 526 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
54aceb26 527
528 oe->op = op;
529 oe->rank = get_rank (op);
17b5ea6f 530 oe->id = next_operand_entry_id++;
8c5ac7f6 531 oe->count = 1;
f1f41a6c 532 ops->safe_push (oe);
54aceb26 533}
3dec5460 534
8c5ac7f6 535/* Add an operand entry to *OPS for the tree operand OP with repeat
536 count REPEAT. */
537
538static void
f1f41a6c 539add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
8c5ac7f6 540 HOST_WIDE_INT repeat)
541{
542 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
543
544 oe->op = op;
545 oe->rank = get_rank (op);
546 oe->id = next_operand_entry_id++;
547 oe->count = repeat;
f1f41a6c 548 ops->safe_push (oe);
8c5ac7f6 549
550 reassociate_stats.pows_encountered++;
551}
552
54aceb26 553/* Return true if STMT is reassociable operation containing a binary
a4c3fb95 554 operation with tree code CODE, and is inside LOOP. */
3dec5460 555
556static bool
75a70cf9 557is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
54aceb26 558{
75a70cf9 559 basic_block bb = gimple_bb (stmt);
a4c3fb95 560
75a70cf9 561 if (gimple_bb (stmt) == NULL)
a4c3fb95 562 return false;
563
a4c3fb95 564 if (!flow_bb_inside_loop_p (loop, bb))
565 return false;
566
75a70cf9 567 if (is_gimple_assign (stmt)
568 && gimple_assign_rhs_code (stmt) == code
569 && has_single_use (gimple_assign_lhs (stmt)))
3dec5460 570 return true;
75a70cf9 571
54aceb26 572 return false;
573}
574
575
576/* Given NAME, if NAME is defined by a unary operation OPCODE, return the
577 operand of the negate operation. Otherwise, return NULL. */
578
579static tree
580get_unary_op (tree name, enum tree_code opcode)
581{
75a70cf9 582 gimple stmt = SSA_NAME_DEF_STMT (name);
54aceb26 583
75a70cf9 584 if (!is_gimple_assign (stmt))
54aceb26 585 return NULL_TREE;
586
75a70cf9 587 if (gimple_assign_rhs_code (stmt) == opcode)
588 return gimple_assign_rhs1 (stmt);
54aceb26 589 return NULL_TREE;
590}
591
592/* If CURR and LAST are a pair of ops that OPCODE allows us to
593 eliminate through equivalences, do so, remove them from OPS, and
594 return true. Otherwise, return false. */
595
596static bool
597eliminate_duplicate_pair (enum tree_code opcode,
f1f41a6c 598 vec<operand_entry_t> *ops,
54aceb26 599 bool *all_done,
600 unsigned int i,
601 operand_entry_t curr,
602 operand_entry_t last)
603{
604
91543a50 605 /* If we have two of the same op, and the opcode is & |, min, or max,
606 we can eliminate one of them.
54aceb26 607 If we have two of the same op, and the opcode is ^, we can
608 eliminate both of them. */
3dec5460 609
54aceb26 610 if (last && last->op == curr->op)
3dec5460 611 {
54aceb26 612 switch (opcode)
613 {
91543a50 614 case MAX_EXPR:
615 case MIN_EXPR:
54aceb26 616 case BIT_IOR_EXPR:
617 case BIT_AND_EXPR:
618 if (dump_file && (dump_flags & TDF_DETAILS))
619 {
620 fprintf (dump_file, "Equivalence: ");
621 print_generic_expr (dump_file, curr->op, 0);
91543a50 622 fprintf (dump_file, " [&|minmax] ");
54aceb26 623 print_generic_expr (dump_file, last->op, 0);
624 fprintf (dump_file, " -> ");
625 print_generic_stmt (dump_file, last->op, 0);
626 }
627
f1f41a6c 628 ops->ordered_remove (i);
54aceb26 629 reassociate_stats.ops_eliminated ++;
630
631 return true;
632
633 case BIT_XOR_EXPR:
634 if (dump_file && (dump_flags & TDF_DETAILS))
635 {
636 fprintf (dump_file, "Equivalence: ");
637 print_generic_expr (dump_file, curr->op, 0);
638 fprintf (dump_file, " ^ ");
639 print_generic_expr (dump_file, last->op, 0);
640 fprintf (dump_file, " -> nothing\n");
641 }
642
643 reassociate_stats.ops_eliminated += 2;
644
f1f41a6c 645 if (ops->length () == 2)
54aceb26 646 {
f1f41a6c 647 ops->create (0);
385f3f36 648 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
54aceb26 649 *all_done = true;
650 }
651 else
652 {
f1f41a6c 653 ops->ordered_remove (i-1);
654 ops->ordered_remove (i-1);
54aceb26 655 }
656
657 return true;
658
659 default:
660 break;
661 }
662 }
3dec5460 663 return false;
664}
665
f1f41a6c 666static vec<tree> plus_negates;
c2f29260 667
50fa62c6 668/* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
669 expression, look in OPS for a corresponding positive operation to cancel
670 it out. If we find one, remove the other from OPS, replace
671 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
672 return false. */
3dec5460 673
674static bool
54aceb26 675eliminate_plus_minus_pair (enum tree_code opcode,
f1f41a6c 676 vec<operand_entry_t> *ops,
54aceb26 677 unsigned int currindex,
678 operand_entry_t curr)
679{
680 tree negateop;
50fa62c6 681 tree notop;
54aceb26 682 unsigned int i;
683 operand_entry_t oe;
684
685 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
3dec5460 686 return false;
54aceb26 687
688 negateop = get_unary_op (curr->op, NEGATE_EXPR);
50fa62c6 689 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
690 if (negateop == NULL_TREE && notop == NULL_TREE)
54aceb26 691 return false;
692
693 /* Any non-negated version will have a rank that is one less than
694 the current rank. So once we hit those ranks, if we don't find
695 one, we can stop. */
696
697 for (i = currindex + 1;
f1f41a6c 698 ops->iterate (i, &oe)
54aceb26 699 && oe->rank >= curr->rank - 1 ;
700 i++)
3dec5460 701 {
54aceb26 702 if (oe->op == negateop)
703 {
704
705 if (dump_file && (dump_flags & TDF_DETAILS))
706 {
707 fprintf (dump_file, "Equivalence: ");
708 print_generic_expr (dump_file, negateop, 0);
709 fprintf (dump_file, " + -");
710 print_generic_expr (dump_file, oe->op, 0);
711 fprintf (dump_file, " -> 0\n");
712 }
713
f1f41a6c 714 ops->ordered_remove (i);
385f3f36 715 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
f1f41a6c 716 ops->ordered_remove (currindex);
54aceb26 717 reassociate_stats.ops_eliminated ++;
718
50fa62c6 719 return true;
720 }
721 else if (oe->op == notop)
722 {
723 tree op_type = TREE_TYPE (oe->op);
724
725 if (dump_file && (dump_flags & TDF_DETAILS))
726 {
727 fprintf (dump_file, "Equivalence: ");
728 print_generic_expr (dump_file, notop, 0);
729 fprintf (dump_file, " + ~");
730 print_generic_expr (dump_file, oe->op, 0);
731 fprintf (dump_file, " -> -1\n");
732 }
733
f1f41a6c 734 ops->ordered_remove (i);
50fa62c6 735 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
f1f41a6c 736 ops->ordered_remove (currindex);
50fa62c6 737 reassociate_stats.ops_eliminated ++;
738
54aceb26 739 return true;
740 }
3dec5460 741 }
742
c2f29260 743 /* CURR->OP is a negate expr in a plus expr: save it for later
744 inspection in repropagate_negates(). */
50fa62c6 745 if (negateop != NULL_TREE)
f1f41a6c 746 plus_negates.safe_push (curr->op);
c2f29260 747
54aceb26 748 return false;
749}
750
751/* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
752 bitwise not expression, look in OPS for a corresponding operand to
753 cancel it out. If we find one, remove the other from OPS, replace
754 OPS[CURRINDEX] with 0, and return true. Otherwise, return
755 false. */
756
757static bool
758eliminate_not_pairs (enum tree_code opcode,
f1f41a6c 759 vec<operand_entry_t> *ops,
54aceb26 760 unsigned int currindex,
761 operand_entry_t curr)
762{
763 tree notop;
764 unsigned int i;
765 operand_entry_t oe;
766
767 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
768 || TREE_CODE (curr->op) != SSA_NAME)
769 return false;
770
771 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
772 if (notop == NULL_TREE)
773 return false;
774
775 /* Any non-not version will have a rank that is one less than
776 the current rank. So once we hit those ranks, if we don't find
777 one, we can stop. */
778
779 for (i = currindex + 1;
f1f41a6c 780 ops->iterate (i, &oe)
54aceb26 781 && oe->rank >= curr->rank - 1;
782 i++)
3dec5460 783 {
54aceb26 784 if (oe->op == notop)
3dec5460 785 {
54aceb26 786 if (dump_file && (dump_flags & TDF_DETAILS))
3dec5460 787 {
54aceb26 788 fprintf (dump_file, "Equivalence: ");
789 print_generic_expr (dump_file, notop, 0);
790 if (opcode == BIT_AND_EXPR)
791 fprintf (dump_file, " & ~");
792 else if (opcode == BIT_IOR_EXPR)
793 fprintf (dump_file, " | ~");
794 print_generic_expr (dump_file, oe->op, 0);
795 if (opcode == BIT_AND_EXPR)
796 fprintf (dump_file, " -> 0\n");
797 else if (opcode == BIT_IOR_EXPR)
798 fprintf (dump_file, " -> -1\n");
3dec5460 799 }
54aceb26 800
801 if (opcode == BIT_AND_EXPR)
385f3f36 802 oe->op = build_zero_cst (TREE_TYPE (oe->op));
54aceb26 803 else if (opcode == BIT_IOR_EXPR)
804 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
805 TYPE_PRECISION (TREE_TYPE (oe->op)));
806
f1f41a6c 807 reassociate_stats.ops_eliminated += ops->length () - 1;
808 ops->truncate (0);
809 ops->quick_push (oe);
54aceb26 810 return true;
3dec5460 811 }
812 }
54aceb26 813
814 return false;
3dec5460 815}
816
54aceb26 817/* Use constant value that may be present in OPS to try to eliminate
818 operands. Note that this function is only really used when we've
819 eliminated ops for other reasons, or merged constants. Across
820 single statements, fold already does all of this, plus more. There
821 is little point in duplicating logic, so I've only included the
822 identities that I could ever construct testcases to trigger. */
3dec5460 823
54aceb26 824static void
825eliminate_using_constants (enum tree_code opcode,
f1f41a6c 826 vec<operand_entry_t> *ops)
3dec5460 827{
f1f41a6c 828 operand_entry_t oelast = ops->last ();
46ef5347 829 tree type = TREE_TYPE (oelast->op);
3dec5460 830
46ef5347 831 if (oelast->rank == 0
832 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
3dec5460 833 {
54aceb26 834 switch (opcode)
3dec5460 835 {
54aceb26 836 case BIT_AND_EXPR:
837 if (integer_zerop (oelast->op))
838 {
f1f41a6c 839 if (ops->length () != 1)
54aceb26 840 {
841 if (dump_file && (dump_flags & TDF_DETAILS))
842 fprintf (dump_file, "Found & 0, removing all other ops\n");
843
f1f41a6c 844 reassociate_stats.ops_eliminated += ops->length () - 1;
48e1416a 845
f1f41a6c 846 ops->truncate (0);
847 ops->quick_push (oelast);
54aceb26 848 return;
849 }
850 }
851 else if (integer_all_onesp (oelast->op))
852 {
f1f41a6c 853 if (ops->length () != 1)
54aceb26 854 {
855 if (dump_file && (dump_flags & TDF_DETAILS))
856 fprintf (dump_file, "Found & -1, removing\n");
f1f41a6c 857 ops->pop ();
54aceb26 858 reassociate_stats.ops_eliminated++;
859 }
860 }
861 break;
862 case BIT_IOR_EXPR:
863 if (integer_all_onesp (oelast->op))
864 {
f1f41a6c 865 if (ops->length () != 1)
54aceb26 866 {
867 if (dump_file && (dump_flags & TDF_DETAILS))
868 fprintf (dump_file, "Found | -1, removing all other ops\n");
869
f1f41a6c 870 reassociate_stats.ops_eliminated += ops->length () - 1;
48e1416a 871
f1f41a6c 872 ops->truncate (0);
873 ops->quick_push (oelast);
54aceb26 874 return;
875 }
48e1416a 876 }
54aceb26 877 else if (integer_zerop (oelast->op))
878 {
f1f41a6c 879 if (ops->length () != 1)
54aceb26 880 {
881 if (dump_file && (dump_flags & TDF_DETAILS))
882 fprintf (dump_file, "Found | 0, removing\n");
f1f41a6c 883 ops->pop ();
54aceb26 884 reassociate_stats.ops_eliminated++;
885 }
886 }
887 break;
888 case MULT_EXPR:
46ef5347 889 if (integer_zerop (oelast->op)
890 || (FLOAT_TYPE_P (type)
891 && !HONOR_NANS (TYPE_MODE (type))
892 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
893 && real_zerop (oelast->op)))
54aceb26 894 {
f1f41a6c 895 if (ops->length () != 1)
54aceb26 896 {
897 if (dump_file && (dump_flags & TDF_DETAILS))
898 fprintf (dump_file, "Found * 0, removing all other ops\n");
48e1416a 899
f1f41a6c 900 reassociate_stats.ops_eliminated += ops->length () - 1;
901 ops->truncate (1);
902 ops->quick_push (oelast);
54aceb26 903 return;
904 }
905 }
46ef5347 906 else if (integer_onep (oelast->op)
907 || (FLOAT_TYPE_P (type)
908 && !HONOR_SNANS (TYPE_MODE (type))
909 && real_onep (oelast->op)))
54aceb26 910 {
f1f41a6c 911 if (ops->length () != 1)
54aceb26 912 {
913 if (dump_file && (dump_flags & TDF_DETAILS))
914 fprintf (dump_file, "Found * 1, removing\n");
f1f41a6c 915 ops->pop ();
54aceb26 916 reassociate_stats.ops_eliminated++;
917 return;
918 }
919 }
920 break;
921 case BIT_XOR_EXPR:
922 case PLUS_EXPR:
923 case MINUS_EXPR:
46ef5347 924 if (integer_zerop (oelast->op)
925 || (FLOAT_TYPE_P (type)
926 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
927 && fold_real_zero_addition_p (type, oelast->op,
928 opcode == MINUS_EXPR)))
3dec5460 929 {
f1f41a6c 930 if (ops->length () != 1)
3dec5460 931 {
54aceb26 932 if (dump_file && (dump_flags & TDF_DETAILS))
933 fprintf (dump_file, "Found [|^+] 0, removing\n");
f1f41a6c 934 ops->pop ();
54aceb26 935 reassociate_stats.ops_eliminated++;
936 return;
3dec5460 937 }
3dec5460 938 }
54aceb26 939 break;
940 default:
941 break;
3dec5460 942 }
943 }
3dec5460 944}
945
dddf5036 946
f1f41a6c 947static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
dddf5036 948 bool, bool);
949
950/* Structure for tracking and counting operands. */
951typedef struct oecount_s {
952 int cnt;
17b5ea6f 953 int id;
dddf5036 954 enum tree_code oecode;
955 tree op;
956} oecount;
957
dddf5036 958
959/* The heap for the oecount hashtable and the sorted list of operands. */
f1f41a6c 960static vec<oecount> cvec;
dddf5036 961
3e871d4d 962
963/* Oecount hashtable helpers. */
964
965struct oecount_hasher : typed_noop_remove <void>
966{
967 /* Note that this hash table stores integers, not pointers.
968 So, observe the casting in the member functions. */
969 typedef void value_type;
970 typedef void compare_type;
971 static inline hashval_t hash (const value_type *);
972 static inline bool equal (const value_type *, const compare_type *);
973};
974
dddf5036 975/* Hash function for oecount. */
976
3e871d4d 977inline hashval_t
978oecount_hasher::hash (const value_type *p)
dddf5036 979{
f1f41a6c 980 const oecount *c = &cvec[(size_t)p - 42];
dddf5036 981 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
982}
983
984/* Comparison function for oecount. */
985
3e871d4d 986inline bool
987oecount_hasher::equal (const value_type *p1, const compare_type *p2)
dddf5036 988{
f1f41a6c 989 const oecount *c1 = &cvec[(size_t)p1 - 42];
990 const oecount *c2 = &cvec[(size_t)p2 - 42];
dddf5036 991 return (c1->oecode == c2->oecode
992 && c1->op == c2->op);
993}
994
995/* Comparison function for qsort sorting oecount elements by count. */
996
997static int
998oecount_cmp (const void *p1, const void *p2)
999{
1000 const oecount *c1 = (const oecount *)p1;
1001 const oecount *c2 = (const oecount *)p2;
17b5ea6f 1002 if (c1->cnt != c2->cnt)
1003 return c1->cnt - c2->cnt;
1004 else
1005 /* If counts are identical, use unique IDs to stabilize qsort. */
1006 return c1->id - c2->id;
dddf5036 1007}
1008
56e650d6 1009/* Return TRUE iff STMT represents a builtin call that raises OP
1010 to some exponent. */
1011
1012static bool
1013stmt_is_power_of_op (gimple stmt, tree op)
1014{
1015 tree fndecl;
1016
1017 if (!is_gimple_call (stmt))
1018 return false;
1019
1020 fndecl = gimple_call_fndecl (stmt);
1021
1022 if (!fndecl
1023 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1024 return false;
1025
1026 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1027 {
1028 CASE_FLT_FN (BUILT_IN_POW):
1029 CASE_FLT_FN (BUILT_IN_POWI):
1030 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1031
1032 default:
1033 return false;
1034 }
1035}
1036
1037/* Given STMT which is a __builtin_pow* call, decrement its exponent
1038 in place and return the result. Assumes that stmt_is_power_of_op
1039 was previously called for STMT and returned TRUE. */
1040
1041static HOST_WIDE_INT
1042decrement_power (gimple stmt)
1043{
1044 REAL_VALUE_TYPE c, cint;
1045 HOST_WIDE_INT power;
1046 tree arg1;
1047
1048 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1049 {
1050 CASE_FLT_FN (BUILT_IN_POW):
1051 arg1 = gimple_call_arg (stmt, 1);
1052 c = TREE_REAL_CST (arg1);
1053 power = real_to_integer (&c) - 1;
1054 real_from_integer (&cint, VOIDmode, power, 0, 0);
1055 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1056 return power;
1057
1058 CASE_FLT_FN (BUILT_IN_POWI):
1059 arg1 = gimple_call_arg (stmt, 1);
1060 power = TREE_INT_CST_LOW (arg1) - 1;
1061 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1062 return power;
1063
1064 default:
1065 gcc_unreachable ();
1066 }
1067}
1068
1069/* Find the single immediate use of STMT's LHS, and replace it
1070 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1071 replace *DEF with OP as well. */
1072
1073static void
1074propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1075{
1076 tree lhs;
1077 gimple use_stmt;
1078 use_operand_p use;
1079 gimple_stmt_iterator gsi;
1080
1081 if (is_gimple_call (stmt))
1082 lhs = gimple_call_lhs (stmt);
1083 else
1084 lhs = gimple_assign_lhs (stmt);
1085
1086 gcc_assert (has_single_use (lhs));
1087 single_imm_use (lhs, &use, &use_stmt);
1088 if (lhs == *def)
1089 *def = op;
1090 SET_USE (use, op);
1091 if (TREE_CODE (op) != SSA_NAME)
1092 update_stmt (use_stmt);
1093 gsi = gsi_for_stmt (stmt);
e6b37e57 1094 unlink_stmt_vdef (stmt);
56e650d6 1095 gsi_remove (&gsi, true);
1096 release_defs (stmt);
56e650d6 1097}
1098
dddf5036 1099/* Walks the linear chain with result *DEF searching for an operation
1100 with operand OP and code OPCODE removing that from the chain. *DEF
1101 is updated if there is only one operand but no operation left. */
1102
1103static void
1104zero_one_operation (tree *def, enum tree_code opcode, tree op)
1105{
1106 gimple stmt = SSA_NAME_DEF_STMT (*def);
1107
1108 do
1109 {
56e650d6 1110 tree name;
1111
1112 if (opcode == MULT_EXPR
1113 && stmt_is_power_of_op (stmt, op))
1114 {
1115 if (decrement_power (stmt) == 1)
1116 propagate_op_to_single_use (op, stmt, def);
1117 return;
1118 }
1119
1120 name = gimple_assign_rhs1 (stmt);
dddf5036 1121
1122 /* If this is the operation we look for and one of the operands
1123 is ours simply propagate the other operand into the stmts
1124 single use. */
1125 if (gimple_assign_rhs_code (stmt) == opcode
1126 && (name == op
1127 || gimple_assign_rhs2 (stmt) == op))
1128 {
dddf5036 1129 if (name == op)
1130 name = gimple_assign_rhs2 (stmt);
56e650d6 1131 propagate_op_to_single_use (name, stmt, def);
dddf5036 1132 return;
1133 }
1134
56e650d6 1135 /* We might have a multiply of two __builtin_pow* calls, and
1136 the operand might be hiding in the rightmost one. */
1137 if (opcode == MULT_EXPR
1138 && gimple_assign_rhs_code (stmt) == opcode
bdb5a35a 1139 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1140 && has_single_use (gimple_assign_rhs2 (stmt)))
56e650d6 1141 {
1142 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1143 if (stmt_is_power_of_op (stmt2, op))
1144 {
1145 if (decrement_power (stmt2) == 1)
1146 propagate_op_to_single_use (op, stmt2, def);
1147 return;
1148 }
1149 }
1150
dddf5036 1151 /* Continue walking the chain. */
1152 gcc_assert (name != op
1153 && TREE_CODE (name) == SSA_NAME);
1154 stmt = SSA_NAME_DEF_STMT (name);
1155 }
1156 while (1);
1157}
1158
82cb4e1c 1159/* Returns true if statement S1 dominates statement S2. Like
1160 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
c9dbb922 1161
82cb4e1c 1162static bool
1163reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
c9dbb922 1164{
82cb4e1c 1165 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1166
1167 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1168 SSA_NAME. Assume it lives at the beginning of function and
1169 thus dominates everything. */
1170 if (!bb1 || s1 == s2)
1171 return true;
1172
1173 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1174 if (!bb2)
1175 return false;
1176
1177 if (bb1 == bb2)
1178 {
1179 /* PHIs in the same basic block are assumed to be
1180 executed all in parallel, if only one stmt is a PHI,
1181 it dominates the other stmt in the same basic block. */
1182 if (gimple_code (s1) == GIMPLE_PHI)
1183 return true;
1184
1185 if (gimple_code (s2) == GIMPLE_PHI)
1186 return false;
1187
1188 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1189
1190 if (gimple_uid (s1) < gimple_uid (s2))
1191 return true;
1192
1193 if (gimple_uid (s1) > gimple_uid (s2))
1194 return false;
1195
1196 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1197 unsigned int uid = gimple_uid (s1);
1198 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1199 {
1200 gimple s = gsi_stmt (gsi);
1201 if (gimple_uid (s) != uid)
1202 break;
1203 if (s == s2)
1204 return true;
1205 }
1206
1207 return false;
1208 }
1209
1210 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1211}
1212
1213/* Insert STMT after INSERT_POINT. */
1214
1215static void
1216insert_stmt_after (gimple stmt, gimple insert_point)
1217{
1218 gimple_stmt_iterator gsi;
1219 basic_block bb;
1220
1221 if (gimple_code (insert_point) == GIMPLE_PHI)
1222 bb = gimple_bb (insert_point);
1223 else if (!stmt_ends_bb_p (insert_point))
1224 {
1225 gsi = gsi_for_stmt (insert_point);
1226 gimple_set_uid (stmt, gimple_uid (insert_point));
1227 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1228 return;
1229 }
1230 else
1231 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1232 thus if it must end a basic block, it should be a call that can
1233 throw, or some assignment that can throw. If it throws, the LHS
1234 of it will not be initialized though, so only valid places using
1235 the SSA_NAME should be dominated by the fallthru edge. */
1236 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1237 gsi = gsi_after_labels (bb);
1238 if (gsi_end_p (gsi))
1239 {
1240 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1241 gimple_set_uid (stmt,
1242 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1243 }
1244 else
1245 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1246 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
c9dbb922 1247}
1248
dddf5036 1249/* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1250 the result. Places the statement after the definition of either
1251 OP1 or OP2. Returns the new statement. */
1252
1253static gimple
03d37e4e 1254build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
dddf5036 1255{
1256 gimple op1def = NULL, op2def = NULL;
1257 gimple_stmt_iterator gsi;
1258 tree op;
1259 gimple sum;
1260
1261 /* Create the addition statement. */
03d37e4e 1262 op = make_ssa_name (type, NULL);
1263 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
dddf5036 1264
1265 /* Find an insertion place and insert. */
1266 if (TREE_CODE (op1) == SSA_NAME)
1267 op1def = SSA_NAME_DEF_STMT (op1);
1268 if (TREE_CODE (op2) == SSA_NAME)
1269 op2def = SSA_NAME_DEF_STMT (op2);
1270 if ((!op1def || gimple_nop_p (op1def))
1271 && (!op2def || gimple_nop_p (op2def)))
1272 {
eb4bdd4d 1273 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
82cb4e1c 1274 if (gsi_end_p (gsi))
dddf5036 1275 {
82cb4e1c 1276 gimple_stmt_iterator gsi2
1277 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR));
1278 gimple_set_uid (sum,
1279 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
dddf5036 1280 }
1281 else
82cb4e1c 1282 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1283 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
dddf5036 1284 }
1285 else
1286 {
82cb4e1c 1287 gimple insert_point;
1288 if ((!op1def || gimple_nop_p (op1def))
1289 || (op2def && !gimple_nop_p (op2def)
1290 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1291 insert_point = op2def;
dddf5036 1292 else
82cb4e1c 1293 insert_point = op1def;
1294 insert_stmt_after (sum, insert_point);
dddf5036 1295 }
1296 update_stmt (sum);
1297
1298 return sum;
1299}
1300
1301/* Perform un-distribution of divisions and multiplications.
1302 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1303 to (A + B) / X for real X.
1304
1305 The algorithm is organized as follows.
1306
1307 - First we walk the addition chain *OPS looking for summands that
1308 are defined by a multiplication or a real division. This results
1309 in the candidates bitmap with relevant indices into *OPS.
1310
1311 - Second we build the chains of multiplications or divisions for
9d75589a 1312 these candidates, counting the number of occurrences of (operand, code)
dddf5036 1313 pairs in all of the candidates chains.
1314
9d75589a 1315 - Third we sort the (operand, code) pairs by number of occurrence and
dddf5036 1316 process them starting with the pair with the most uses.
1317
1318 * For each such pair we walk the candidates again to build a
1319 second candidate bitmap noting all multiplication/division chains
9d75589a 1320 that have at least one occurrence of (operand, code).
dddf5036 1321
1322 * We build an alternate addition chain only covering these
1323 candidates with one (operand, code) operation removed from their
1324 multiplication/division chain.
1325
1326 * The first candidate gets replaced by the alternate addition chain
1327 multiplied/divided by the operand.
1328
1329 * All candidate chains get disabled for further processing and
1330 processing of (operand, code) pairs continues.
1331
1332 The alternate addition chains built are re-processed by the main
1333 reassociation algorithm which allows optimizing a * x * y + b * y * x
1334 to (a + b ) * x * y in one invocation of the reassociation pass. */
1335
1336static bool
1337undistribute_ops_list (enum tree_code opcode,
f1f41a6c 1338 vec<operand_entry_t> *ops, struct loop *loop)
dddf5036 1339{
f1f41a6c 1340 unsigned int length = ops->length ();
dddf5036 1341 operand_entry_t oe1;
1342 unsigned i, j;
1343 sbitmap candidates, candidates2;
1344 unsigned nr_candidates, nr_candidates2;
1345 sbitmap_iterator sbi0;
f1f41a6c 1346 vec<operand_entry_t> *subops;
3e871d4d 1347 hash_table <oecount_hasher> ctable;
dddf5036 1348 bool changed = false;
17b5ea6f 1349 int next_oecount_id = 0;
dddf5036 1350
1351 if (length <= 1
1352 || opcode != PLUS_EXPR)
1353 return false;
1354
1355 /* Build a list of candidates to process. */
1356 candidates = sbitmap_alloc (length);
53c5d9d4 1357 bitmap_clear (candidates);
dddf5036 1358 nr_candidates = 0;
f1f41a6c 1359 FOR_EACH_VEC_ELT (*ops, i, oe1)
dddf5036 1360 {
1361 enum tree_code dcode;
1362 gimple oe1def;
1363
1364 if (TREE_CODE (oe1->op) != SSA_NAME)
1365 continue;
1366 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1367 if (!is_gimple_assign (oe1def))
1368 continue;
1369 dcode = gimple_assign_rhs_code (oe1def);
1370 if ((dcode != MULT_EXPR
1371 && dcode != RDIV_EXPR)
1372 || !is_reassociable_op (oe1def, dcode, loop))
1373 continue;
1374
08b7917c 1375 bitmap_set_bit (candidates, i);
dddf5036 1376 nr_candidates++;
1377 }
1378
1379 if (nr_candidates < 2)
1380 {
1381 sbitmap_free (candidates);
1382 return false;
1383 }
1384
1385 if (dump_file && (dump_flags & TDF_DETAILS))
1386 {
1387 fprintf (dump_file, "searching for un-distribute opportunities ");
1388 print_generic_expr (dump_file,
f1f41a6c 1389 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
dddf5036 1390 fprintf (dump_file, " %d\n", nr_candidates);
1391 }
1392
1393 /* Build linearized sub-operand lists and the counting table. */
f1f41a6c 1394 cvec.create (0);
3e871d4d 1395 ctable.create (15);
f1f41a6c 1396 /* ??? Macro arguments cannot have multi-argument template types in
1397 them. This typedef is needed to workaround that limitation. */
1398 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1399 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
0d211963 1400 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
dddf5036 1401 {
1402 gimple oedef;
1403 enum tree_code oecode;
1404 unsigned j;
1405
f1f41a6c 1406 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
dddf5036 1407 oecode = gimple_assign_rhs_code (oedef);
1408 linearize_expr_tree (&subops[i], oedef,
1409 associative_tree_code (oecode), false);
1410
f1f41a6c 1411 FOR_EACH_VEC_ELT (subops[i], j, oe1)
dddf5036 1412 {
1413 oecount c;
1414 void **slot;
1415 size_t idx;
1416 c.oecode = oecode;
1417 c.cnt = 1;
17b5ea6f 1418 c.id = next_oecount_id++;
dddf5036 1419 c.op = oe1->op;
f1f41a6c 1420 cvec.safe_push (c);
1421 idx = cvec.length () + 41;
3e871d4d 1422 slot = ctable.find_slot ((void *)idx, INSERT);
dddf5036 1423 if (!*slot)
1424 {
1425 *slot = (void *)idx;
1426 }
1427 else
1428 {
f1f41a6c 1429 cvec.pop ();
1430 cvec[(size_t)*slot - 42].cnt++;
dddf5036 1431 }
1432 }
1433 }
3e871d4d 1434 ctable.dispose ();
dddf5036 1435
1436 /* Sort the counting table. */
f1f41a6c 1437 cvec.qsort (oecount_cmp);
dddf5036 1438
1439 if (dump_file && (dump_flags & TDF_DETAILS))
1440 {
1441 oecount *c;
1442 fprintf (dump_file, "Candidates:\n");
f1f41a6c 1443 FOR_EACH_VEC_ELT (cvec, j, c)
dddf5036 1444 {
1445 fprintf (dump_file, " %u %s: ", c->cnt,
1446 c->oecode == MULT_EXPR
1447 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1448 print_generic_expr (dump_file, c->op, 0);
1449 fprintf (dump_file, "\n");
1450 }
1451 }
1452
c31fb425 1453 /* Process the (operand, code) pairs in order of most occurrence. */
dddf5036 1454 candidates2 = sbitmap_alloc (length);
f1f41a6c 1455 while (!cvec.is_empty ())
dddf5036 1456 {
f1f41a6c 1457 oecount *c = &cvec.last ();
dddf5036 1458 if (c->cnt < 2)
1459 break;
1460
1461 /* Now collect the operands in the outer chain that contain
1462 the common operand in their inner chain. */
53c5d9d4 1463 bitmap_clear (candidates2);
dddf5036 1464 nr_candidates2 = 0;
0d211963 1465 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
dddf5036 1466 {
1467 gimple oedef;
1468 enum tree_code oecode;
1469 unsigned j;
f1f41a6c 1470 tree op = (*ops)[i]->op;
dddf5036 1471
1472 /* If we undistributed in this chain already this may be
1473 a constant. */
1474 if (TREE_CODE (op) != SSA_NAME)
1475 continue;
1476
1477 oedef = SSA_NAME_DEF_STMT (op);
1478 oecode = gimple_assign_rhs_code (oedef);
1479 if (oecode != c->oecode)
1480 continue;
1481
f1f41a6c 1482 FOR_EACH_VEC_ELT (subops[i], j, oe1)
dddf5036 1483 {
56e650d6 1484 if (oe1->op == c->op)
dddf5036 1485 {
08b7917c 1486 bitmap_set_bit (candidates2, i);
dddf5036 1487 ++nr_candidates2;
1488 break;
1489 }
1490 }
1491 }
1492
1493 if (nr_candidates2 >= 2)
1494 {
1495 operand_entry_t oe1, oe2;
dddf5036 1496 gimple prod;
53c5d9d4 1497 int first = bitmap_first_set_bit (candidates2);
dddf5036 1498
1499 /* Build the new addition chain. */
f1f41a6c 1500 oe1 = (*ops)[first];
dddf5036 1501 if (dump_file && (dump_flags & TDF_DETAILS))
1502 {
1503 fprintf (dump_file, "Building (");
1504 print_generic_expr (dump_file, oe1->op, 0);
1505 }
dddf5036 1506 zero_one_operation (&oe1->op, c->oecode, c->op);
0d211963 1507 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
dddf5036 1508 {
1509 gimple sum;
f1f41a6c 1510 oe2 = (*ops)[i];
dddf5036 1511 if (dump_file && (dump_flags & TDF_DETAILS))
1512 {
1513 fprintf (dump_file, " + ");
1514 print_generic_expr (dump_file, oe2->op, 0);
1515 }
1516 zero_one_operation (&oe2->op, c->oecode, c->op);
03d37e4e 1517 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1518 oe1->op, oe2->op, opcode);
385f3f36 1519 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
dddf5036 1520 oe2->rank = 0;
1521 oe1->op = gimple_get_lhs (sum);
1522 }
1523
1524 /* Apply the multiplication/division. */
03d37e4e 1525 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1526 oe1->op, c->op, c->oecode);
dddf5036 1527 if (dump_file && (dump_flags & TDF_DETAILS))
1528 {
1529 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1530 print_generic_expr (dump_file, c->op, 0);
1531 fprintf (dump_file, "\n");
1532 }
1533
1534 /* Record it in the addition chain and disable further
1535 undistribution with this op. */
1536 oe1->op = gimple_assign_lhs (prod);
1537 oe1->rank = get_rank (oe1->op);
f1f41a6c 1538 subops[first].release ();
dddf5036 1539
1540 changed = true;
1541 }
1542
f1f41a6c 1543 cvec.pop ();
dddf5036 1544 }
1545
f1f41a6c 1546 for (i = 0; i < ops->length (); ++i)
1547 subops[i].release ();
dddf5036 1548 free (subops);
f1f41a6c 1549 cvec.release ();
dddf5036 1550 sbitmap_free (candidates);
1551 sbitmap_free (candidates2);
1552
1553 return changed;
1554}
1555
5d6da7da 1556/* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1557 expression, examine the other OPS to see if any of them are comparisons
1558 of the same values, which we may be able to combine or eliminate.
1559 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1560
1561static bool
1562eliminate_redundant_comparison (enum tree_code opcode,
f1f41a6c 1563 vec<operand_entry_t> *ops,
5d6da7da 1564 unsigned int currindex,
1565 operand_entry_t curr)
1566{
1567 tree op1, op2;
1568 enum tree_code lcode, rcode;
1569 gimple def1, def2;
1570 int i;
1571 operand_entry_t oe;
1572
1573 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1574 return false;
1575
1576 /* Check that CURR is a comparison. */
1577 if (TREE_CODE (curr->op) != SSA_NAME)
1578 return false;
1579 def1 = SSA_NAME_DEF_STMT (curr->op);
1580 if (!is_gimple_assign (def1))
1581 return false;
1582 lcode = gimple_assign_rhs_code (def1);
1583 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1584 return false;
1585 op1 = gimple_assign_rhs1 (def1);
1586 op2 = gimple_assign_rhs2 (def1);
1587
1588 /* Now look for a similar comparison in the remaining OPS. */
f1f41a6c 1589 for (i = currindex + 1; ops->iterate (i, &oe); i++)
5d6da7da 1590 {
1591 tree t;
1592
1593 if (TREE_CODE (oe->op) != SSA_NAME)
1594 continue;
1595 def2 = SSA_NAME_DEF_STMT (oe->op);
1596 if (!is_gimple_assign (def2))
1597 continue;
1598 rcode = gimple_assign_rhs_code (def2);
1599 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1600 continue;
5d6da7da 1601
1602 /* If we got here, we have a match. See if we can combine the
1603 two comparisons. */
c82d157a 1604 if (opcode == BIT_IOR_EXPR)
1605 t = maybe_fold_or_comparisons (lcode, op1, op2,
1606 rcode, gimple_assign_rhs1 (def2),
1607 gimple_assign_rhs2 (def2));
1608 else
1609 t = maybe_fold_and_comparisons (lcode, op1, op2,
1610 rcode, gimple_assign_rhs1 (def2),
1611 gimple_assign_rhs2 (def2));
5d6da7da 1612 if (!t)
1613 continue;
c82d157a 1614
1615 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1616 always give us a boolean_type_node value back. If the original
1617 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1618 we need to convert. */
1619 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1620 t = fold_convert (TREE_TYPE (curr->op), t);
1621
89c993b6 1622 if (TREE_CODE (t) != INTEGER_CST
1623 && !operand_equal_p (t, curr->op, 0))
1624 {
1625 enum tree_code subcode;
1626 tree newop1, newop2;
1627 if (!COMPARISON_CLASS_P (t))
1628 continue;
1629 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1630 STRIP_USELESS_TYPE_CONVERSION (newop1);
1631 STRIP_USELESS_TYPE_CONVERSION (newop2);
1632 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1633 continue;
1634 }
1635
5d6da7da 1636 if (dump_file && (dump_flags & TDF_DETAILS))
1637 {
1638 fprintf (dump_file, "Equivalence: ");
1639 print_generic_expr (dump_file, curr->op, 0);
1640 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1641 print_generic_expr (dump_file, oe->op, 0);
1642 fprintf (dump_file, " -> ");
1643 print_generic_expr (dump_file, t, 0);
1644 fprintf (dump_file, "\n");
1645 }
1646
1647 /* Now we can delete oe, as it has been subsumed by the new combined
1648 expression t. */
f1f41a6c 1649 ops->ordered_remove (i);
5d6da7da 1650 reassociate_stats.ops_eliminated ++;
1651
1652 /* If t is the same as curr->op, we're done. Otherwise we must
1653 replace curr->op with t. Special case is if we got a constant
1654 back, in which case we add it to the end instead of in place of
1655 the current entry. */
1656 if (TREE_CODE (t) == INTEGER_CST)
1657 {
f1f41a6c 1658 ops->ordered_remove (currindex);
5d6da7da 1659 add_to_ops_vec (ops, t);
1660 }
c82d157a 1661 else if (!operand_equal_p (t, curr->op, 0))
5d6da7da 1662 {
5d6da7da 1663 gimple sum;
1664 enum tree_code subcode;
1665 tree newop1;
1666 tree newop2;
b2d33090 1667 gcc_assert (COMPARISON_CLASS_P (t));
5d6da7da 1668 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
b2d33090 1669 STRIP_USELESS_TYPE_CONVERSION (newop1);
1670 STRIP_USELESS_TYPE_CONVERSION (newop2);
1671 gcc_checking_assert (is_gimple_val (newop1)
1672 && is_gimple_val (newop2));
03d37e4e 1673 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
5d6da7da 1674 curr->op = gimple_get_lhs (sum);
1675 }
1676 return true;
1677 }
1678
1679 return false;
1680}
dddf5036 1681
54aceb26 1682/* Perform various identities and other optimizations on the list of
1683 operand entries, stored in OPS. The tree code for the binary
1684 operation between all the operands is OPCODE. */
3dec5460 1685
54aceb26 1686static void
1687optimize_ops_list (enum tree_code opcode,
f1f41a6c 1688 vec<operand_entry_t> *ops)
54aceb26 1689{
f1f41a6c 1690 unsigned int length = ops->length ();
54aceb26 1691 unsigned int i;
1692 operand_entry_t oe;
1693 operand_entry_t oelast = NULL;
1694 bool iterate = false;
3dec5460 1695
54aceb26 1696 if (length == 1)
1697 return;
3dec5460 1698
f1f41a6c 1699 oelast = ops->last ();
3dec5460 1700
54aceb26 1701 /* If the last two are constants, pop the constants off, merge them
1702 and try the next two. */
1703 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1704 {
f1f41a6c 1705 operand_entry_t oelm1 = (*ops)[length - 2];
54aceb26 1706
1707 if (oelm1->rank == 0
1708 && is_gimple_min_invariant (oelm1->op)
c8ca3ee7 1709 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1710 TREE_TYPE (oelast->op)))
54aceb26 1711 {
5f9acd88 1712 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
54aceb26 1713 oelm1->op, oelast->op);
1714
5f9acd88 1715 if (folded && is_gimple_min_invariant (folded))
54aceb26 1716 {
1717 if (dump_file && (dump_flags & TDF_DETAILS))
1718 fprintf (dump_file, "Merging constants\n");
1719
f1f41a6c 1720 ops->pop ();
1721 ops->pop ();
54aceb26 1722
1723 add_to_ops_vec (ops, folded);
1724 reassociate_stats.constants_eliminated++;
1725
1726 optimize_ops_list (opcode, ops);
1727 return;
1728 }
1729 }
1730 }
1731
1732 eliminate_using_constants (opcode, ops);
1733 oelast = NULL;
1734
f1f41a6c 1735 for (i = 0; ops->iterate (i, &oe);)
54aceb26 1736 {
1737 bool done = false;
1738
1739 if (eliminate_not_pairs (opcode, ops, i, oe))
1740 return;
1741 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
5d6da7da 1742 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1743 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
54aceb26 1744 {
1745 if (done)
1746 return;
1747 iterate = true;
1748 oelast = NULL;
1749 continue;
1750 }
1751 oelast = oe;
1752 i++;
1753 }
1754
f1f41a6c 1755 length = ops->length ();
1756 oelast = ops->last ();
54aceb26 1757
1758 if (iterate)
1759 optimize_ops_list (opcode, ops);
1760}
1761
946e9eb4 1762/* The following functions are subroutines to optimize_range_tests and allow
1763 it to try to change a logical combination of comparisons into a range
1764 test.
1765
1766 For example, both
1767 X == 2 || X == 5 || X == 3 || X == 4
1768 and
1769 X >= 2 && X <= 5
1770 are converted to
1771 (unsigned) (X - 2) <= 3
1772
1773 For more information see comments above fold_test_range in fold-const.c,
1774 this implementation is for GIMPLE. */
1775
1776struct range_entry
1777{
1778 tree exp;
1779 tree low;
1780 tree high;
1781 bool in_p;
1782 bool strict_overflow_p;
1783 unsigned int idx, next;
1784};
1785
1786/* This is similar to make_range in fold-const.c, but on top of
8a2c7744 1787 GIMPLE instead of trees. If EXP is non-NULL, it should be
1788 an SSA_NAME and STMT argument is ignored, otherwise STMT
1789 argument should be a GIMPLE_COND. */
946e9eb4 1790
1791static void
8a2c7744 1792init_range_entry (struct range_entry *r, tree exp, gimple stmt)
946e9eb4 1793{
1794 int in_p;
1795 tree low, high;
1796 bool is_bool, strict_overflow_p;
1797
1798 r->exp = NULL_TREE;
1799 r->in_p = false;
1800 r->strict_overflow_p = false;
1801 r->low = NULL_TREE;
1802 r->high = NULL_TREE;
8a2c7744 1803 if (exp != NULL_TREE
1804 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
946e9eb4 1805 return;
1806
1807 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1808 and see if we can refine the range. Some of the cases below may not
1809 happen, but it doesn't seem worth worrying about this. We "continue"
1810 the outer loop when we've changed something; otherwise we "break"
1811 the switch, which will "break" the while. */
8a2c7744 1812 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
946e9eb4 1813 high = low;
1814 in_p = 0;
1815 strict_overflow_p = false;
1816 is_bool = false;
8a2c7744 1817 if (exp == NULL_TREE)
1818 is_bool = true;
1819 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
946e9eb4 1820 {
1821 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1822 is_bool = true;
1823 else
1824 return;
1825 }
1826 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1827 is_bool = true;
1828
1829 while (1)
1830 {
946e9eb4 1831 enum tree_code code;
1832 tree arg0, arg1, exp_type;
1833 tree nexp;
1834 location_t loc;
1835
8a2c7744 1836 if (exp != NULL_TREE)
1837 {
1838 if (TREE_CODE (exp) != SSA_NAME)
1839 break;
946e9eb4 1840
8a2c7744 1841 stmt = SSA_NAME_DEF_STMT (exp);
1842 if (!is_gimple_assign (stmt))
1843 break;
1844
1845 code = gimple_assign_rhs_code (stmt);
1846 arg0 = gimple_assign_rhs1 (stmt);
1847 arg1 = gimple_assign_rhs2 (stmt);
1848 exp_type = TREE_TYPE (exp);
1849 }
1850 else
1851 {
1852 code = gimple_cond_code (stmt);
1853 arg0 = gimple_cond_lhs (stmt);
1854 arg1 = gimple_cond_rhs (stmt);
1855 exp_type = boolean_type_node;
1856 }
946e9eb4 1857
9adacac7 1858 if (TREE_CODE (arg0) != SSA_NAME)
1859 break;
946e9eb4 1860 loc = gimple_location (stmt);
1861 switch (code)
1862 {
1863 case BIT_NOT_EXPR:
e423480a 1864 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1865 /* Ensure the range is either +[-,0], +[0,0],
1866 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1867 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1868 or similar expression of unconditional true or
1869 false, it should not be negated. */
1870 && ((high && integer_zerop (high))
1871 || (low && integer_onep (low))))
946e9eb4 1872 {
1873 in_p = !in_p;
1874 exp = arg0;
1875 continue;
1876 }
1877 break;
1878 case SSA_NAME:
1879 exp = arg0;
1880 continue;
1881 CASE_CONVERT:
1882 if (is_bool)
1883 goto do_default;
1884 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1885 {
1886 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1887 is_bool = true;
1888 else
1889 return;
1890 }
1891 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1892 is_bool = true;
1893 goto do_default;
1894 case EQ_EXPR:
1895 case NE_EXPR:
1896 case LT_EXPR:
1897 case LE_EXPR:
1898 case GE_EXPR:
1899 case GT_EXPR:
1900 is_bool = true;
1901 /* FALLTHRU */
1902 default:
1903 if (!is_bool)
1904 return;
1905 do_default:
1906 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1907 &low, &high, &in_p,
1908 &strict_overflow_p);
1909 if (nexp != NULL_TREE)
1910 {
1911 exp = nexp;
1912 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1913 continue;
1914 }
1915 break;
1916 }
1917 break;
1918 }
1919 if (is_bool)
1920 {
1921 r->exp = exp;
1922 r->in_p = in_p;
1923 r->low = low;
1924 r->high = high;
1925 r->strict_overflow_p = strict_overflow_p;
1926 }
1927}
1928
1929/* Comparison function for qsort. Sort entries
1930 without SSA_NAME exp first, then with SSA_NAMEs sorted
1931 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1932 by increasing ->low and if ->low is the same, by increasing
1933 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1934 maximum. */
1935
1936static int
1937range_entry_cmp (const void *a, const void *b)
1938{
1939 const struct range_entry *p = (const struct range_entry *) a;
1940 const struct range_entry *q = (const struct range_entry *) b;
1941
1942 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1943 {
1944 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1945 {
1946 /* Group range_entries for the same SSA_NAME together. */
1947 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1948 return -1;
1949 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1950 return 1;
1951 /* If ->low is different, NULL low goes first, then by
1952 ascending low. */
1953 if (p->low != NULL_TREE)
1954 {
1955 if (q->low != NULL_TREE)
1956 {
1957 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1958 p->low, q->low);
1959 if (tem && integer_onep (tem))
1960 return -1;
1961 tem = fold_binary (GT_EXPR, boolean_type_node,
1962 p->low, q->low);
1963 if (tem && integer_onep (tem))
1964 return 1;
1965 }
1966 else
1967 return 1;
1968 }
1969 else if (q->low != NULL_TREE)
1970 return -1;
1971 /* If ->high is different, NULL high goes last, before that by
1972 ascending high. */
1973 if (p->high != NULL_TREE)
1974 {
1975 if (q->high != NULL_TREE)
1976 {
1977 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1978 p->high, q->high);
1979 if (tem && integer_onep (tem))
1980 return -1;
1981 tem = fold_binary (GT_EXPR, boolean_type_node,
1982 p->high, q->high);
1983 if (tem && integer_onep (tem))
1984 return 1;
1985 }
1986 else
1987 return -1;
1988 }
1989 else if (p->high != NULL_TREE)
1990 return 1;
1991 /* If both ranges are the same, sort below by ascending idx. */
1992 }
1993 else
1994 return 1;
1995 }
1996 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1997 return -1;
1998
1999 if (p->idx < q->idx)
2000 return -1;
2001 else
2002 {
2003 gcc_checking_assert (p->idx > q->idx);
2004 return 1;
2005 }
2006}
2007
2008/* Helper routine of optimize_range_test.
2009 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2010 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2011 OPCODE and OPS are arguments of optimize_range_tests. Return
8a2c7744 2012 true if the range merge has been successful.
2013 If OPCODE is ERROR_MARK, this is called from within
2014 maybe_optimize_range_tests and is performing inter-bb range optimization.
82cb4e1c 2015 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2016 oe->rank. */
946e9eb4 2017
2018static bool
2019update_range_test (struct range_entry *range, struct range_entry *otherrange,
2020 unsigned int count, enum tree_code opcode,
f1f41a6c 2021 vec<operand_entry_t> *ops, tree exp, bool in_p,
946e9eb4 2022 tree low, tree high, bool strict_overflow_p)
2023{
f1f41a6c 2024 operand_entry_t oe = (*ops)[range->idx];
8a2c7744 2025 tree op = oe->op;
2026 gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
2027 location_t loc = gimple_location (stmt);
2028 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2029 tree tem = build_range_check (loc, optype, exp, in_p, low, high);
946e9eb4 2030 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2031 gimple_stmt_iterator gsi;
2032
2033 if (tem == NULL_TREE)
2034 return false;
2035
2036 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2037 warning_at (loc, OPT_Wstrict_overflow,
2038 "assuming signed overflow does not occur "
2039 "when simplifying range test");
2040
2041 if (dump_file && (dump_flags & TDF_DETAILS))
2042 {
2043 struct range_entry *r;
2044 fprintf (dump_file, "Optimizing range tests ");
2045 print_generic_expr (dump_file, range->exp, 0);
2046 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2047 print_generic_expr (dump_file, range->low, 0);
2048 fprintf (dump_file, ", ");
2049 print_generic_expr (dump_file, range->high, 0);
2050 fprintf (dump_file, "]");
2051 for (r = otherrange; r < otherrange + count; r++)
2052 {
2053 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2054 print_generic_expr (dump_file, r->low, 0);
2055 fprintf (dump_file, ", ");
2056 print_generic_expr (dump_file, r->high, 0);
2057 fprintf (dump_file, "]");
2058 }
2059 fprintf (dump_file, "\n into ");
2060 print_generic_expr (dump_file, tem, 0);
2061 fprintf (dump_file, "\n");
2062 }
2063
8a2c7744 2064 if (opcode == BIT_IOR_EXPR
2065 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
946e9eb4 2066 tem = invert_truthvalue_loc (loc, tem);
2067
8a2c7744 2068 tem = fold_convert_loc (loc, optype, tem);
2069 gsi = gsi_for_stmt (stmt);
946e9eb4 2070 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2071 GSI_SAME_STMT);
82cb4e1c 2072 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
2073 if (gimple_uid (gsi_stmt (gsi)))
2074 break;
2075 else
2076 gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
946e9eb4 2077
8a2c7744 2078 oe->op = tem;
946e9eb4 2079 range->exp = exp;
2080 range->low = low;
2081 range->high = high;
2082 range->in_p = in_p;
2083 range->strict_overflow_p = false;
2084
2085 for (range = otherrange; range < otherrange + count; range++)
2086 {
f1f41a6c 2087 oe = (*ops)[range->idx];
8a2c7744 2088 /* Now change all the other range test immediate uses, so that
2089 those tests will be optimized away. */
2090 if (opcode == ERROR_MARK)
2091 {
2092 if (oe->op)
82cb4e1c 2093 oe->op = build_int_cst (TREE_TYPE (oe->op),
2094 oe->rank == BIT_IOR_EXPR ? 0 : 1);
8a2c7744 2095 else
82cb4e1c 2096 oe->op = (oe->rank == BIT_IOR_EXPR
2097 ? boolean_false_node : boolean_true_node);
8a2c7744 2098 }
82cb4e1c 2099 else
2100 oe->op = error_mark_node;
946e9eb4 2101 range->exp = NULL_TREE;
2102 }
2103 return true;
2104}
2105
b0c0c879 2106/* Optimize X == CST1 || X == CST2
2107 if popcount (CST1 ^ CST2) == 1 into
2108 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2109 Similarly for ranges. E.g.
2110 X != 2 && X != 3 && X != 10 && X != 11
2111 will be transformed by the previous optimization into
2112 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2113 and this loop can transform that into
2114 !(((X & ~8) - 2U) <= 1U). */
2115
2116static bool
2117optimize_range_tests_xor (enum tree_code opcode, tree type,
2118 tree lowi, tree lowj, tree highi, tree highj,
2119 vec<operand_entry_t> *ops,
2120 struct range_entry *rangei,
2121 struct range_entry *rangej)
2122{
2123 tree lowxor, highxor, tem, exp;
2124 /* Check highi ^ lowi == highj ^ lowj and
2125 popcount (highi ^ lowi) == 1. */
2126 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2127 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2128 return false;
2129 if (tree_log2 (lowxor) < 0)
2130 return false;
2131 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2132 if (!tree_int_cst_equal (lowxor, highxor))
2133 return false;
2134
2135 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2136 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2137 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2138 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2139 if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2140 rangei->in_p, lowj, highj,
2141 rangei->strict_overflow_p
2142 || rangej->strict_overflow_p))
2143 return true;
2144 return false;
2145}
2146
2147/* Optimize X == CST1 || X == CST2
2148 if popcount (CST2 - CST1) == 1 into
2149 ((X - CST1) & ~(CST2 - CST1)) == 0.
2150 Similarly for ranges. E.g.
2151 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2152 || X == 75 || X == 45
2153 will be transformed by the previous optimization into
2154 (X - 43U) <= 3U || (X - 75U) <= 3U
2155 and this loop can transform that into
2156 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2157static bool
2158optimize_range_tests_diff (enum tree_code opcode, tree type,
2159 tree lowi, tree lowj, tree highi, tree highj,
2160 vec<operand_entry_t> *ops,
2161 struct range_entry *rangei,
2162 struct range_entry *rangej)
2163{
2164 tree tem1, tem2, mask;
2165 /* Check highi - lowi == highj - lowj. */
2166 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2167 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2168 return false;
2169 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2170 if (!tree_int_cst_equal (tem1, tem2))
2171 return false;
2172 /* Check popcount (lowj - lowi) == 1. */
2173 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2174 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2175 return false;
2176 if (tree_log2 (tem1) < 0)
2177 return false;
2178
2179 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2180 tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2181 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2182 lowj = build_int_cst (type, 0);
2183 if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2184 rangei->in_p, lowj, tem2,
2185 rangei->strict_overflow_p
2186 || rangej->strict_overflow_p))
2187 return true;
2188 return false;
2189}
2190
2191/* It does some common checks for function optimize_range_tests_xor and
2192 optimize_range_tests_diff.
2193 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2194 Else it calls optimize_range_tests_diff. */
2195
2196static bool
2197optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2198 bool optimize_xor, vec<operand_entry_t> *ops,
2199 struct range_entry *ranges)
2200{
2201 int i, j;
2202 bool any_changes = false;
2203 for (i = first; i < length; i++)
2204 {
2205 tree lowi, highi, lowj, highj, type, tem;
2206
2207 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2208 continue;
2209 type = TREE_TYPE (ranges[i].exp);
2210 if (!INTEGRAL_TYPE_P (type))
2211 continue;
2212 lowi = ranges[i].low;
2213 if (lowi == NULL_TREE)
2214 lowi = TYPE_MIN_VALUE (type);
2215 highi = ranges[i].high;
2216 if (highi == NULL_TREE)
2217 continue;
2218 for (j = i + 1; j < length && j < i + 64; j++)
2219 {
2220 bool changes;
2221 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2222 continue;
2223 lowj = ranges[j].low;
2224 if (lowj == NULL_TREE)
2225 continue;
2226 highj = ranges[j].high;
2227 if (highj == NULL_TREE)
2228 highj = TYPE_MAX_VALUE (type);
2229 /* Check lowj > highi. */
2230 tem = fold_binary (GT_EXPR, boolean_type_node,
2231 lowj, highi);
2232 if (tem == NULL_TREE || !integer_onep (tem))
2233 continue;
2234 if (optimize_xor)
2235 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2236 highi, highj, ops,
2237 ranges + i, ranges + j);
2238 else
2239 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2240 highi, highj, ops,
2241 ranges + i, ranges + j);
2242 if (changes)
2243 {
2244 any_changes = true;
2245 break;
2246 }
2247 }
2248 }
2249 return any_changes;
2250}
2251
946e9eb4 2252/* Optimize range tests, similarly how fold_range_test optimizes
2253 it on trees. The tree code for the binary
8a2c7744 2254 operation between all the operands is OPCODE.
2255 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2256 maybe_optimize_range_tests for inter-bb range optimization.
2257 In that case if oe->op is NULL, oe->id is bb->index whose
2258 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2259 the actual opcode. */
946e9eb4 2260
82cb4e1c 2261static bool
946e9eb4 2262optimize_range_tests (enum tree_code opcode,
f1f41a6c 2263 vec<operand_entry_t> *ops)
946e9eb4 2264{
f1f41a6c 2265 unsigned int length = ops->length (), i, j, first;
946e9eb4 2266 operand_entry_t oe;
2267 struct range_entry *ranges;
2268 bool any_changes = false;
2269
2270 if (length == 1)
82cb4e1c 2271 return false;
946e9eb4 2272
2273 ranges = XNEWVEC (struct range_entry, length);
2274 for (i = 0; i < length; i++)
2275 {
f1f41a6c 2276 oe = (*ops)[i];
946e9eb4 2277 ranges[i].idx = i;
8a2c7744 2278 init_range_entry (ranges + i, oe->op,
2279 oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
946e9eb4 2280 /* For | invert it now, we will invert it again before emitting
2281 the optimized expression. */
8a2c7744 2282 if (opcode == BIT_IOR_EXPR
2283 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
946e9eb4 2284 ranges[i].in_p = !ranges[i].in_p;
2285 }
2286
2287 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2288 for (i = 0; i < length; i++)
2289 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2290 break;
2291
2292 /* Try to merge ranges. */
2293 for (first = i; i < length; i++)
2294 {
2295 tree low = ranges[i].low;
2296 tree high = ranges[i].high;
2297 int in_p = ranges[i].in_p;
2298 bool strict_overflow_p = ranges[i].strict_overflow_p;
2299 int update_fail_count = 0;
2300
2301 for (j = i + 1; j < length; j++)
2302 {
2303 if (ranges[i].exp != ranges[j].exp)
2304 break;
2305 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2306 ranges[j].in_p, ranges[j].low, ranges[j].high))
2307 break;
2308 strict_overflow_p |= ranges[j].strict_overflow_p;
2309 }
2310
2311 if (j == i + 1)
2312 continue;
2313
2314 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2315 ops, ranges[i].exp, in_p, low, high,
2316 strict_overflow_p))
2317 {
2318 i = j - 1;
2319 any_changes = true;
2320 }
2321 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2322 while update_range_test would fail. */
2323 else if (update_fail_count == 64)
2324 i = j - 1;
2325 else
2326 ++update_fail_count;
2327 }
2328
b0c0c879 2329 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2330 ops, ranges);
946e9eb4 2331
b0c0c879 2332 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2333 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2334 ops, ranges);
946e9eb4 2335
8a2c7744 2336 if (any_changes && opcode != ERROR_MARK)
946e9eb4 2337 {
2338 j = 0;
f1f41a6c 2339 FOR_EACH_VEC_ELT (*ops, i, oe)
946e9eb4 2340 {
2341 if (oe->op == error_mark_node)
2342 continue;
2343 else if (i != j)
f1f41a6c 2344 (*ops)[j] = oe;
946e9eb4 2345 j++;
2346 }
f1f41a6c 2347 ops->truncate (j);
946e9eb4 2348 }
2349
2350 XDELETEVEC (ranges);
82cb4e1c 2351 return any_changes;
946e9eb4 2352}
2353
8a2c7744 2354/* Return true if STMT is a cast like:
2355 <bb N>:
2356 ...
2357 _123 = (int) _234;
2358
2359 <bb M>:
2360 # _345 = PHI <_123(N), 1(...), 1(...)>
2361 where _234 has bool type, _123 has single use and
2362 bb N has a single successor M. This is commonly used in
2363 the last block of a range test. */
2364
2365static bool
2366final_range_test_p (gimple stmt)
2367{
2368 basic_block bb, rhs_bb;
2369 edge e;
2370 tree lhs, rhs;
2371 use_operand_p use_p;
2372 gimple use_stmt;
2373
2374 if (!gimple_assign_cast_p (stmt))
2375 return false;
2376 bb = gimple_bb (stmt);
2377 if (!single_succ_p (bb))
2378 return false;
2379 e = single_succ_edge (bb);
2380 if (e->flags & EDGE_COMPLEX)
2381 return false;
2382
2383 lhs = gimple_assign_lhs (stmt);
2384 rhs = gimple_assign_rhs1 (stmt);
2385 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2386 || TREE_CODE (rhs) != SSA_NAME
2387 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2388 return false;
2389
2390 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2391 if (!single_imm_use (lhs, &use_p, &use_stmt))
2392 return false;
2393
2394 if (gimple_code (use_stmt) != GIMPLE_PHI
2395 || gimple_bb (use_stmt) != e->dest)
2396 return false;
2397
2398 /* And that the rhs is defined in the same loop. */
2399 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2400 if (rhs_bb == NULL
2401 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2402 return false;
2403
2404 return true;
2405}
2406
2407/* Return true if BB is suitable basic block for inter-bb range test
2408 optimization. If BACKWARD is true, BB should be the only predecessor
2409 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2410 or compared with to find a common basic block to which all conditions
2411 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2412 be the only predecessor of BB. */
2413
2414static bool
2415suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2416 bool backward)
2417{
2418 edge_iterator ei, ei2;
2419 edge e, e2;
2420 gimple stmt;
2421 gimple_stmt_iterator gsi;
2422 bool other_edge_seen = false;
2423 bool is_cond;
2424
2425 if (test_bb == bb)
2426 return false;
2427 /* Check last stmt first. */
2428 stmt = last_stmt (bb);
2429 if (stmt == NULL
2430 || (gimple_code (stmt) != GIMPLE_COND
2431 && (backward || !final_range_test_p (stmt)))
2432 || gimple_visited_p (stmt)
2433 || stmt_could_throw_p (stmt)
2434 || *other_bb == bb)
2435 return false;
2436 is_cond = gimple_code (stmt) == GIMPLE_COND;
2437 if (is_cond)
2438 {
2439 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2440 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2441 to *OTHER_BB (if not set yet, try to find it out). */
2442 if (EDGE_COUNT (bb->succs) != 2)
2443 return false;
2444 FOR_EACH_EDGE (e, ei, bb->succs)
2445 {
2446 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2447 return false;
2448 if (e->dest == test_bb)
2449 {
2450 if (backward)
2451 continue;
2452 else
2453 return false;
2454 }
2455 if (e->dest == bb)
2456 return false;
2457 if (*other_bb == NULL)
2458 {
2459 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2460 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2461 return false;
2462 else if (e->dest == e2->dest)
2463 *other_bb = e->dest;
2464 if (*other_bb == NULL)
2465 return false;
2466 }
2467 if (e->dest == *other_bb)
2468 other_edge_seen = true;
2469 else if (backward)
2470 return false;
2471 }
2472 if (*other_bb == NULL || !other_edge_seen)
2473 return false;
2474 }
2475 else if (single_succ (bb) != *other_bb)
2476 return false;
2477
2478 /* Now check all PHIs of *OTHER_BB. */
2479 e = find_edge (bb, *other_bb);
2480 e2 = find_edge (test_bb, *other_bb);
2481 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2482 {
2483 gimple phi = gsi_stmt (gsi);
2484 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2485 corresponding to BB and TEST_BB predecessor must be the same. */
2486 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2487 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2488 {
2489 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2490 one of the PHIs should have the lhs of the last stmt in
2491 that block as PHI arg and that PHI should have 0 or 1
2492 corresponding to it in all other range test basic blocks
2493 considered. */
2494 if (!is_cond)
2495 {
2496 if (gimple_phi_arg_def (phi, e->dest_idx)
2497 == gimple_assign_lhs (stmt)
2498 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2499 || integer_onep (gimple_phi_arg_def (phi,
2500 e2->dest_idx))))
2501 continue;
2502 }
2503 else
2504 {
2505 gimple test_last = last_stmt (test_bb);
2506 if (gimple_code (test_last) != GIMPLE_COND
2507 && gimple_phi_arg_def (phi, e2->dest_idx)
2508 == gimple_assign_lhs (test_last)
2509 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2510 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2511 continue;
2512 }
2513
2514 return false;
2515 }
2516 }
2517 return true;
2518}
2519
2520/* Return true if BB doesn't have side-effects that would disallow
2521 range test optimization, all SSA_NAMEs set in the bb are consumed
2522 in the bb and there are no PHIs. */
2523
2524static bool
2525no_side_effect_bb (basic_block bb)
2526{
2527 gimple_stmt_iterator gsi;
2528 gimple last;
2529
2530 if (!gimple_seq_empty_p (phi_nodes (bb)))
2531 return false;
2532 last = last_stmt (bb);
2533 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2534 {
2535 gimple stmt = gsi_stmt (gsi);
2536 tree lhs;
2537 imm_use_iterator imm_iter;
2538 use_operand_p use_p;
2539
2540 if (is_gimple_debug (stmt))
2541 continue;
2542 if (gimple_has_side_effects (stmt))
2543 return false;
2544 if (stmt == last)
2545 return true;
2546 if (!is_gimple_assign (stmt))
2547 return false;
2548 lhs = gimple_assign_lhs (stmt);
2549 if (TREE_CODE (lhs) != SSA_NAME)
2550 return false;
2551 if (gimple_assign_rhs_could_trap_p (stmt))
2552 return false;
2553 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2554 {
2555 gimple use_stmt = USE_STMT (use_p);
2556 if (is_gimple_debug (use_stmt))
2557 continue;
2558 if (gimple_bb (use_stmt) != bb)
2559 return false;
2560 }
2561 }
2562 return false;
2563}
2564
2565/* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2566 return true and fill in *OPS recursively. */
2567
2568static bool
f1f41a6c 2569get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
8a2c7744 2570 struct loop *loop)
2571{
2572 gimple stmt = SSA_NAME_DEF_STMT (var);
2573 tree rhs[2];
2574 int i;
2575
2576 if (!is_reassociable_op (stmt, code, loop))
2577 return false;
2578
2579 rhs[0] = gimple_assign_rhs1 (stmt);
2580 rhs[1] = gimple_assign_rhs2 (stmt);
2581 gimple_set_visited (stmt, true);
2582 for (i = 0; i < 2; i++)
2583 if (TREE_CODE (rhs[i]) == SSA_NAME
2584 && !get_ops (rhs[i], code, ops, loop)
2585 && has_single_use (rhs[i]))
2586 {
2587 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2588
2589 oe->op = rhs[i];
2590 oe->rank = code;
2591 oe->id = 0;
2592 oe->count = 1;
f1f41a6c 2593 ops->safe_push (oe);
8a2c7744 2594 }
2595 return true;
2596}
2597
82cb4e1c 2598/* Find the ops that were added by get_ops starting from VAR, see if
2599 they were changed during update_range_test and if yes, create new
2600 stmts. */
2601
2602static tree
2603update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2604 unsigned int *pidx, struct loop *loop)
2605{
2606 gimple stmt = SSA_NAME_DEF_STMT (var);
2607 tree rhs[4];
2608 int i;
2609
2610 if (!is_reassociable_op (stmt, code, loop))
2611 return NULL;
2612
2613 rhs[0] = gimple_assign_rhs1 (stmt);
2614 rhs[1] = gimple_assign_rhs2 (stmt);
2615 rhs[2] = rhs[0];
2616 rhs[3] = rhs[1];
2617 for (i = 0; i < 2; i++)
2618 if (TREE_CODE (rhs[i]) == SSA_NAME)
2619 {
2620 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2621 if (rhs[2 + i] == NULL_TREE)
2622 {
2623 if (has_single_use (rhs[i]))
2624 rhs[2 + i] = ops[(*pidx)++]->op;
2625 else
2626 rhs[2 + i] = rhs[i];
2627 }
2628 }
2629 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2630 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2631 {
2632 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2633 var = make_ssa_name (TREE_TYPE (var), NULL);
2634 gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2635 var, rhs[2], rhs[3]);
2636 gimple_set_uid (g, gimple_uid (stmt));
2637 gimple_set_visited (g, true);
2638 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2639 }
2640 return var;
2641}
2642
2643/* Structure to track the initial value passed to get_ops and
2644 the range in the ops vector for each basic block. */
2645
2646struct inter_bb_range_test_entry
2647{
2648 tree op;
2649 unsigned int first_idx, last_idx;
2650};
2651
8a2c7744 2652/* Inter-bb range test optimization. */
2653
2654static void
2655maybe_optimize_range_tests (gimple stmt)
2656{
2657 basic_block first_bb = gimple_bb (stmt);
2658 basic_block last_bb = first_bb;
2659 basic_block other_bb = NULL;
2660 basic_block bb;
2661 edge_iterator ei;
2662 edge e;
1e094109 2663 vec<operand_entry_t> ops = vNULL;
82cb4e1c 2664 vec<inter_bb_range_test_entry> bbinfo = vNULL;
3c8e4ef8 2665 bool any_changes = false;
8a2c7744 2666
2667 /* Consider only basic blocks that end with GIMPLE_COND or
2668 a cast statement satisfying final_range_test_p. All
2669 but the last bb in the first_bb .. last_bb range
2670 should end with GIMPLE_COND. */
2671 if (gimple_code (stmt) == GIMPLE_COND)
2672 {
2673 if (EDGE_COUNT (first_bb->succs) != 2)
2674 return;
2675 }
2676 else if (final_range_test_p (stmt))
2677 other_bb = single_succ (first_bb);
2678 else
2679 return;
2680
2681 if (stmt_could_throw_p (stmt))
2682 return;
2683
2684 /* As relative ordering of post-dominator sons isn't fixed,
2685 maybe_optimize_range_tests can be called first on any
2686 bb in the range we want to optimize. So, start searching
2687 backwards, if first_bb can be set to a predecessor. */
2688 while (single_pred_p (first_bb))
2689 {
2690 basic_block pred_bb = single_pred (first_bb);
2691 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2692 break;
2693 if (!no_side_effect_bb (first_bb))
2694 break;
2695 first_bb = pred_bb;
2696 }
2697 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2698 Before starting forward search in last_bb successors, find
2699 out the other_bb. */
2700 if (first_bb == last_bb)
2701 {
2702 other_bb = NULL;
2703 /* As non-GIMPLE_COND last stmt always terminates the range,
2704 if forward search didn't discover anything, just give up. */
2705 if (gimple_code (stmt) != GIMPLE_COND)
2706 return;
2707 /* Look at both successors. Either it ends with a GIMPLE_COND
2708 and satisfies suitable_cond_bb, or ends with a cast and
2709 other_bb is that cast's successor. */
2710 FOR_EACH_EDGE (e, ei, first_bb->succs)
2711 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2712 || e->dest == first_bb)
2713 return;
2714 else if (single_pred_p (e->dest))
2715 {
2716 stmt = last_stmt (e->dest);
2717 if (stmt
2718 && gimple_code (stmt) == GIMPLE_COND
2719 && EDGE_COUNT (e->dest->succs) == 2)
2720 {
2721 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2722 break;
2723 else
2724 other_bb = NULL;
2725 }
2726 else if (stmt
2727 && final_range_test_p (stmt)
2728 && find_edge (first_bb, single_succ (e->dest)))
2729 {
2730 other_bb = single_succ (e->dest);
2731 if (other_bb == first_bb)
2732 other_bb = NULL;
2733 }
2734 }
2735 if (other_bb == NULL)
2736 return;
2737 }
2738 /* Now do the forward search, moving last_bb to successor bbs
2739 that aren't other_bb. */
2740 while (EDGE_COUNT (last_bb->succs) == 2)
2741 {
2742 FOR_EACH_EDGE (e, ei, last_bb->succs)
2743 if (e->dest != other_bb)
2744 break;
2745 if (e == NULL)
2746 break;
2747 if (!single_pred_p (e->dest))
2748 break;
2749 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2750 break;
2751 if (!no_side_effect_bb (e->dest))
2752 break;
2753 last_bb = e->dest;
2754 }
2755 if (first_bb == last_bb)
2756 return;
2757 /* Here basic blocks first_bb through last_bb's predecessor
2758 end with GIMPLE_COND, all of them have one of the edges to
2759 other_bb and another to another block in the range,
2760 all blocks except first_bb don't have side-effects and
2761 last_bb ends with either GIMPLE_COND, or cast satisfying
2762 final_range_test_p. */
2763 for (bb = last_bb; ; bb = single_pred (bb))
2764 {
2765 enum tree_code code;
2766 tree lhs, rhs;
82cb4e1c 2767 inter_bb_range_test_entry bb_ent;
8a2c7744 2768
82cb4e1c 2769 bb_ent.op = NULL_TREE;
2770 bb_ent.first_idx = ops.length ();
2771 bb_ent.last_idx = bb_ent.first_idx;
8a2c7744 2772 e = find_edge (bb, other_bb);
2773 stmt = last_stmt (bb);
2774 gimple_set_visited (stmt, true);
2775 if (gimple_code (stmt) != GIMPLE_COND)
2776 {
2777 use_operand_p use_p;
2778 gimple phi;
2779 edge e2;
2780 unsigned int d;
2781
2782 lhs = gimple_assign_lhs (stmt);
2783 rhs = gimple_assign_rhs1 (stmt);
2784 gcc_assert (bb == last_bb);
2785
2786 /* stmt is
2787 _123 = (int) _234;
2788
2789 followed by:
2790 <bb M>:
2791 # _345 = PHI <_123(N), 1(...), 1(...)>
2792
2793 or 0 instead of 1. If it is 0, the _234
2794 range test is anded together with all the
2795 other range tests, if it is 1, it is ored with
2796 them. */
2797 single_imm_use (lhs, &use_p, &phi);
2798 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2799 e2 = find_edge (first_bb, other_bb);
2800 d = e2->dest_idx;
2801 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2802 if (integer_zerop (gimple_phi_arg_def (phi, d)))
2803 code = BIT_AND_EXPR;
2804 else
2805 {
2806 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2807 code = BIT_IOR_EXPR;
2808 }
2809
2810 /* If _234 SSA_NAME_DEF_STMT is
2811 _234 = _567 | _789;
2812 (or &, corresponding to 1/0 in the phi arguments,
2813 push into ops the individual range test arguments
2814 of the bitwise or resp. and, recursively. */
2815 if (!get_ops (rhs, code, &ops,
2816 loop_containing_stmt (stmt))
2817 && has_single_use (rhs))
2818 {
2819 /* Otherwise, push the _234 range test itself. */
2820 operand_entry_t oe
2821 = (operand_entry_t) pool_alloc (operand_entry_pool);
2822
2823 oe->op = rhs;
2824 oe->rank = code;
2825 oe->id = 0;
2826 oe->count = 1;
f1f41a6c 2827 ops.safe_push (oe);
82cb4e1c 2828 bb_ent.last_idx++;
8a2c7744 2829 }
82cb4e1c 2830 else
2831 bb_ent.last_idx = ops.length ();
2832 bb_ent.op = rhs;
2833 bbinfo.safe_push (bb_ent);
8a2c7744 2834 continue;
2835 }
2836 /* Otherwise stmt is GIMPLE_COND. */
2837 code = gimple_cond_code (stmt);
2838 lhs = gimple_cond_lhs (stmt);
2839 rhs = gimple_cond_rhs (stmt);
2840 if (TREE_CODE (lhs) == SSA_NAME
2841 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2842 && ((code != EQ_EXPR && code != NE_EXPR)
2843 || rhs != boolean_false_node
2844 /* Either push into ops the individual bitwise
2845 or resp. and operands, depending on which
2846 edge is other_bb. */
2847 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2848 ^ (code == EQ_EXPR))
2849 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2850 loop_containing_stmt (stmt))))
2851 {
2852 /* Or push the GIMPLE_COND stmt itself. */
2853 operand_entry_t oe
2854 = (operand_entry_t) pool_alloc (operand_entry_pool);
2855
2856 oe->op = NULL;
2857 oe->rank = (e->flags & EDGE_TRUE_VALUE)
2858 ? BIT_IOR_EXPR : BIT_AND_EXPR;
2859 /* oe->op = NULL signs that there is no SSA_NAME
2860 for the range test, and oe->id instead is the
2861 basic block number, at which's end the GIMPLE_COND
2862 is. */
2863 oe->id = bb->index;
2864 oe->count = 1;
f1f41a6c 2865 ops.safe_push (oe);
82cb4e1c 2866 bb_ent.op = NULL;
2867 bb_ent.last_idx++;
8a2c7744 2868 }
82cb4e1c 2869 else if (ops.length () > bb_ent.first_idx)
2870 {
2871 bb_ent.op = lhs;
2872 bb_ent.last_idx = ops.length ();
2873 }
2874 bbinfo.safe_push (bb_ent);
8a2c7744 2875 if (bb == first_bb)
2876 break;
2877 }
f1f41a6c 2878 if (ops.length () > 1)
3c8e4ef8 2879 any_changes = optimize_range_tests (ERROR_MARK, &ops);
2880 if (any_changes)
82cb4e1c 2881 {
2882 unsigned int idx;
3c8e4ef8 2883 /* update_ops relies on has_single_use predicates returning the
2884 same values as it did during get_ops earlier. Additionally it
2885 never removes statements, only adds new ones and it should walk
2886 from the single imm use and check the predicate already before
2887 making those changes.
2888 On the other side, the handling of GIMPLE_COND directly can turn
2889 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2890 it needs to be done in a separate loop afterwards. */
2891 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
82cb4e1c 2892 {
3c8e4ef8 2893 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2894 && bbinfo[idx].op != NULL_TREE)
82cb4e1c 2895 {
82cb4e1c 2896 tree new_op;
2897
3c8e4ef8 2898 stmt = last_stmt (bb);
2899 new_op = update_ops (bbinfo[idx].op,
2900 (enum tree_code)
2901 ops[bbinfo[idx].first_idx]->rank,
2902 ops, &bbinfo[idx].first_idx,
2903 loop_containing_stmt (stmt));
82cb4e1c 2904 if (new_op == NULL_TREE)
2905 {
2906 gcc_assert (bb == last_bb);
2907 new_op = ops[bbinfo[idx].first_idx++]->op;
2908 }
2909 if (bbinfo[idx].op != new_op)
2910 {
2911 imm_use_iterator iter;
2912 use_operand_p use_p;
2913 gimple use_stmt, cast_stmt = NULL;
2914
2915 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2916 if (is_gimple_debug (use_stmt))
2917 continue;
2918 else if (gimple_code (use_stmt) == GIMPLE_COND
2919 || gimple_code (use_stmt) == GIMPLE_PHI)
2920 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2921 SET_USE (use_p, new_op);
2922 else if (gimple_assign_cast_p (use_stmt))
2923 cast_stmt = use_stmt;
2924 else
2925 gcc_unreachable ();
2926 if (cast_stmt)
2927 {
2928 gcc_assert (bb == last_bb);
2929 tree lhs = gimple_assign_lhs (cast_stmt);
2930 tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
2931 enum tree_code rhs_code
2932 = gimple_assign_rhs_code (cast_stmt);
2933 gimple g
2934 = gimple_build_assign_with_ops (rhs_code, new_lhs,
2935 new_op, NULL_TREE);
2936 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
2937 gimple_set_uid (g, gimple_uid (cast_stmt));
2938 gimple_set_visited (g, true);
2939 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2940 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2941 if (is_gimple_debug (use_stmt))
2942 continue;
2943 else if (gimple_code (use_stmt) == GIMPLE_COND
2944 || gimple_code (use_stmt) == GIMPLE_PHI)
2945 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2946 SET_USE (use_p, new_lhs);
2947 else
2948 gcc_unreachable ();
2949 }
2950 }
2951 }
2952 if (bb == first_bb)
2953 break;
2954 }
3c8e4ef8 2955 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2956 {
2957 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2958 && bbinfo[idx].op == NULL_TREE
2959 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
2960 {
2961 stmt = last_stmt (bb);
2962 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
2963 gimple_cond_make_false (stmt);
2964 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
2965 gimple_cond_make_true (stmt);
2966 else
2967 {
2968 gimple_cond_set_code (stmt, NE_EXPR);
2969 gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
2970 gimple_cond_set_rhs (stmt, boolean_false_node);
2971 }
2972 update_stmt (stmt);
2973 }
2974 if (bb == first_bb)
2975 break;
2976 }
82cb4e1c 2977 }
2978 bbinfo.release ();
f1f41a6c 2979 ops.release ();
8a2c7744 2980}
2981
54aceb26 2982/* Return true if OPERAND is defined by a PHI node which uses the LHS
2983 of STMT in it's operands. This is also known as a "destructive
2984 update" operation. */
2985
2986static bool
75a70cf9 2987is_phi_for_stmt (gimple stmt, tree operand)
54aceb26 2988{
75a70cf9 2989 gimple def_stmt;
2990 tree lhs;
54aceb26 2991 use_operand_p arg_p;
2992 ssa_op_iter i;
2993
2994 if (TREE_CODE (operand) != SSA_NAME)
2995 return false;
2996
75a70cf9 2997 lhs = gimple_assign_lhs (stmt);
2998
54aceb26 2999 def_stmt = SSA_NAME_DEF_STMT (operand);
75a70cf9 3000 if (gimple_code (def_stmt) != GIMPLE_PHI)
54aceb26 3001 return false;
3002
3003 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3004 if (lhs == USE_FROM_PTR (arg_p))
3005 return true;
3006 return false;
3007}
3008
9ce93694 3009/* Remove def stmt of VAR if VAR has zero uses and recurse
3010 on rhs1 operand if so. */
3011
3012static void
3013remove_visited_stmt_chain (tree var)
3014{
3015 gimple stmt;
3016 gimple_stmt_iterator gsi;
3017
3018 while (1)
3019 {
3020 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3021 return;
3022 stmt = SSA_NAME_DEF_STMT (var);
8c5ac7f6 3023 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3024 {
3025 var = gimple_assign_rhs1 (stmt);
8c5ac7f6 3026 gsi = gsi_for_stmt (stmt);
3027 gsi_remove (&gsi, true);
3028 release_defs (stmt);
8c5ac7f6 3029 }
3030 else
9ce93694 3031 return;
9ce93694 3032 }
3033}
3034
5b1c765d 3035/* This function checks three consequtive operands in
3036 passed operands vector OPS starting from OPINDEX and
3037 swaps two operands if it is profitable for binary operation
3038 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3039
3040 We pair ops with the same rank if possible.
3041
3042 The alternative we try is to see if STMT is a destructive
3043 update style statement, which is like:
3044 b = phi (a, ...)
3045 a = c + b;
3046 In that case, we want to use the destructive update form to
3047 expose the possible vectorizer sum reduction opportunity.
3048 In that case, the third operand will be the phi node. This
3049 check is not performed if STMT is null.
3050
3051 We could, of course, try to be better as noted above, and do a
3052 lot of work to try to find these opportunities in >3 operand
3053 cases, but it is unlikely to be worth it. */
3054
3055static void
f1f41a6c 3056swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
5b1c765d 3057 unsigned int opindex, gimple stmt)
3058{
3059 operand_entry_t oe1, oe2, oe3;
3060
f1f41a6c 3061 oe1 = ops[opindex];
3062 oe2 = ops[opindex + 1];
3063 oe3 = ops[opindex + 2];
5b1c765d 3064
3065 if ((oe1->rank == oe2->rank
3066 && oe2->rank != oe3->rank)
3067 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3068 && !is_phi_for_stmt (stmt, oe1->op)
3069 && !is_phi_for_stmt (stmt, oe2->op)))
3070 {
3071 struct operand_entry temp = *oe3;
3072 oe3->op = oe1->op;
3073 oe3->rank = oe1->rank;
3074 oe1->op = temp.op;
3075 oe1->rank= temp.rank;
3076 }
3077 else if ((oe1->rank == oe3->rank
3078 && oe2->rank != oe3->rank)
3079 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3080 && !is_phi_for_stmt (stmt, oe1->op)
3081 && !is_phi_for_stmt (stmt, oe3->op)))
3082 {
3083 struct operand_entry temp = *oe2;
3084 oe2->op = oe1->op;
3085 oe2->rank = oe1->rank;
3086 oe1->op = temp.op;
82cb4e1c 3087 oe1->rank = temp.rank;
a2bd0c99 3088 }
a2bd0c99 3089}
3090
82cb4e1c 3091/* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3092 two definitions, otherwise return STMT. */
a2bd0c99 3093
3094static inline gimple
82cb4e1c 3095find_insert_point (gimple stmt, tree rhs1, tree rhs2)
a2bd0c99 3096{
82cb4e1c 3097 if (TREE_CODE (rhs1) == SSA_NAME
3098 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3099 stmt = SSA_NAME_DEF_STMT (rhs1);
3100 if (TREE_CODE (rhs2) == SSA_NAME
3101 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3102 stmt = SSA_NAME_DEF_STMT (rhs2);
3103 return stmt;
a2bd0c99 3104}
3105
54aceb26 3106/* Recursively rewrite our linearized statements so that the operators
3107 match those in OPS[OPINDEX], putting the computation in rank
82cb4e1c 3108 order. Return new lhs. */
54aceb26 3109
82cb4e1c 3110static tree
75a70cf9 3111rewrite_expr_tree (gimple stmt, unsigned int opindex,
82cb4e1c 3112 vec<operand_entry_t> ops, bool changed)
54aceb26 3113{
75a70cf9 3114 tree rhs1 = gimple_assign_rhs1 (stmt);
3115 tree rhs2 = gimple_assign_rhs2 (stmt);
82cb4e1c 3116 tree lhs = gimple_assign_lhs (stmt);
54aceb26 3117 operand_entry_t oe;
3118
54aceb26 3119 /* The final recursion case for this function is that you have
3120 exactly two operations left.
3121 If we had one exactly one op in the entire list to start with, we
3122 would have never called this function, and the tail recursion
3123 rewrites them one at a time. */
f1f41a6c 3124 if (opindex + 2 == ops.length ())
54aceb26 3125 {
3126 operand_entry_t oe1, oe2;
3127
f1f41a6c 3128 oe1 = ops[opindex];
3129 oe2 = ops[opindex + 1];
54aceb26 3130
75a70cf9 3131 if (rhs1 != oe1->op || rhs2 != oe2->op)
54aceb26 3132 {
82cb4e1c 3133 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3134 unsigned int uid = gimple_uid (stmt);
3135
54aceb26 3136 if (dump_file && (dump_flags & TDF_DETAILS))
3137 {
3138 fprintf (dump_file, "Transforming ");
75a70cf9 3139 print_gimple_stmt (dump_file, stmt, 0, 0);
54aceb26 3140 }
3141
82cb4e1c 3142 if (changed)
3143 {
3144 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3145 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3146 stmt
3147 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3148 lhs, oe1->op, oe2->op);
3149 gimple_set_uid (stmt, uid);
3150 gimple_set_visited (stmt, true);
3151 if (insert_point == gsi_stmt (gsi))
3152 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3153 else
3154 insert_stmt_after (stmt, insert_point);
3155 }
3156 else
3157 {
3158 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3159 == stmt);
3160 gimple_assign_set_rhs1 (stmt, oe1->op);
3161 gimple_assign_set_rhs2 (stmt, oe2->op);
3162 update_stmt (stmt);
3163 }
3164
9ce93694 3165 if (rhs1 != oe1->op && rhs1 != oe2->op)
3166 remove_visited_stmt_chain (rhs1);
54aceb26 3167
3168 if (dump_file && (dump_flags & TDF_DETAILS))
3169 {
3170 fprintf (dump_file, " into ");
75a70cf9 3171 print_gimple_stmt (dump_file, stmt, 0, 0);
54aceb26 3172 }
54aceb26 3173 }
82cb4e1c 3174 return lhs;
54aceb26 3175 }
3176
3177 /* If we hit here, we should have 3 or more ops left. */
f1f41a6c 3178 gcc_assert (opindex + 2 < ops.length ());
54aceb26 3179
3180 /* Rewrite the next operator. */
f1f41a6c 3181 oe = ops[opindex];
54aceb26 3182
82cb4e1c 3183 /* Recurse on the LHS of the binary operator, which is guaranteed to
3184 be the non-leaf side. */
3185 tree new_rhs1
3186 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3187 changed || oe->op != rhs2);
54aceb26 3188
82cb4e1c 3189 if (oe->op != rhs2 || new_rhs1 != rhs1)
3190 {
54aceb26 3191 if (dump_file && (dump_flags & TDF_DETAILS))
3192 {
3193 fprintf (dump_file, "Transforming ");
75a70cf9 3194 print_gimple_stmt (dump_file, stmt, 0, 0);
54aceb26 3195 }
3196
82cb4e1c 3197 /* If changed is false, this is either opindex == 0
3198 or all outer rhs2's were equal to corresponding oe->op,
3199 and powi_result is NULL.
3200 That means lhs is equivalent before and after reassociation.
3201 Otherwise ensure the old lhs SSA_NAME is not reused and
3202 create a new stmt as well, so that any debug stmts will be
3203 properly adjusted. */
3204 if (changed)
3205 {
3206 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3207 unsigned int uid = gimple_uid (stmt);
3208 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3209
3210 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3211 stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3212 lhs, new_rhs1, oe->op);
3213 gimple_set_uid (stmt, uid);
3214 gimple_set_visited (stmt, true);
3215 if (insert_point == gsi_stmt (gsi))
3216 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3217 else
3218 insert_stmt_after (stmt, insert_point);
3219 }
3220 else
3221 {
3222 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3223 == stmt);
3224 gimple_assign_set_rhs1 (stmt, new_rhs1);
3225 gimple_assign_set_rhs2 (stmt, oe->op);
3226 update_stmt (stmt);
3227 }
54aceb26 3228
3229 if (dump_file && (dump_flags & TDF_DETAILS))
3230 {
3231 fprintf (dump_file, " into ");
75a70cf9 3232 print_gimple_stmt (dump_file, stmt, 0, 0);
54aceb26 3233 }
3234 }
82cb4e1c 3235 return lhs;
54aceb26 3236}
3237
5b1c765d 3238/* Find out how many cycles we need to compute statements chain.
3239 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3240 maximum number of independent statements we may execute per cycle. */
3241
3242static int
3243get_required_cycles (int ops_num, int cpu_width)
3244{
3245 int res;
3246 int elog;
3247 unsigned int rest;
3248
3249 /* While we have more than 2 * cpu_width operands
3250 we may reduce number of operands by cpu_width
3251 per cycle. */
3252 res = ops_num / (2 * cpu_width);
3253
3254 /* Remained operands count may be reduced twice per cycle
3255 until we have only one operand. */
3256 rest = (unsigned)(ops_num - res * cpu_width);
3257 elog = exact_log2 (rest);
3258 if (elog >= 0)
3259 res += elog;
3260 else
3261 res += floor_log2 (rest) + 1;
3262
3263 return res;
3264}
3265
3266/* Returns an optimal number of registers to use for computation of
3267 given statements. */
3268
3269static int
3270get_reassociation_width (int ops_num, enum tree_code opc,
3271 enum machine_mode mode)
3272{
3273 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3274 int width;
3275 int width_min;
3276 int cycles_best;
3277
3278 if (param_width > 0)
3279 width = param_width;
3280 else
3281 width = targetm.sched.reassociation_width (opc, mode);
3282
3283 if (width == 1)
3284 return width;
3285
3286 /* Get the minimal time required for sequence computation. */
3287 cycles_best = get_required_cycles (ops_num, width);
3288
3289 /* Check if we may use less width and still compute sequence for
3290 the same time. It will allow us to reduce registers usage.
3291 get_required_cycles is monotonically increasing with lower width
3292 so we can perform a binary search for the minimal width that still
3293 results in the optimal cycle count. */
3294 width_min = 1;
3295 while (width > width_min)
3296 {
3297 int width_mid = (width + width_min) / 2;
3298
3299 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3300 width = width_mid;
3301 else if (width_min < width_mid)
3302 width_min = width_mid;
3303 else
3304 break;
3305 }
3306
3307 return width;
3308}
3309
3310/* Recursively rewrite our linearized statements so that the operators
3311 match those in OPS[OPINDEX], putting the computation in rank
3312 order and trying to allow operations to be executed in
3313 parallel. */
3314
3315static void
3316rewrite_expr_tree_parallel (gimple stmt, int width,
82cb4e1c 3317 vec<operand_entry_t> ops)
5b1c765d 3318{
3319 enum tree_code opcode = gimple_assign_rhs_code (stmt);
f1f41a6c 3320 int op_num = ops.length ();
5b1c765d 3321 int stmt_num = op_num - 1;
3322 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3323 int op_index = op_num - 1;
3324 int stmt_index = 0;
3325 int ready_stmts_end = 0;
3326 int i = 0;
3327 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5b1c765d 3328
3329 /* We start expression rewriting from the top statements.
3330 So, in this loop we create a full list of statements
3331 we will work with. */
3332 stmts[stmt_num - 1] = stmt;
3333 for (i = stmt_num - 2; i >= 0; i--)
3334 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3335
5b1c765d 3336 for (i = 0; i < stmt_num; i++)
3337 {
3338 tree op1, op2;
3339
3340 /* Determine whether we should use results of
3341 already handled statements or not. */
3342 if (ready_stmts_end == 0
3343 && (i - stmt_index >= width || op_index < 1))
3344 ready_stmts_end = i;
3345
3346 /* Now we choose operands for the next statement. Non zero
3347 value in ready_stmts_end means here that we should use
3348 the result of already generated statements as new operand. */
3349 if (ready_stmts_end > 0)
3350 {
3351 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3352 if (ready_stmts_end > stmt_index)
3353 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3354 else if (op_index >= 0)
f1f41a6c 3355 op2 = ops[op_index--]->op;
5b1c765d 3356 else
3357 {
3358 gcc_assert (stmt_index < i);
3359 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3360 }
3361
3362 if (stmt_index >= ready_stmts_end)
3363 ready_stmts_end = 0;
3364 }
3365 else
3366 {
3367 if (op_index > 1)
3368 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
f1f41a6c 3369 op2 = ops[op_index--]->op;
3370 op1 = ops[op_index--]->op;
5b1c765d 3371 }
3372
3373 /* If we emit the last statement then we should put
3374 operands into the last statement. It will also
3375 break the loop. */
3376 if (op_index < 0 && stmt_index == i)
3377 i = stmt_num - 1;
3378
3379 if (dump_file && (dump_flags & TDF_DETAILS))
3380 {
3381 fprintf (dump_file, "Transforming ");
3382 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3383 }
3384
3385 /* We keep original statement only for the last one. All
3386 others are recreated. */
3387 if (i == stmt_num - 1)
3388 {
3389 gimple_assign_set_rhs1 (stmts[i], op1);
3390 gimple_assign_set_rhs2 (stmts[i], op2);
3391 update_stmt (stmts[i]);
3392 }
3393 else
03d37e4e 3394 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5b1c765d 3395
3396 if (dump_file && (dump_flags & TDF_DETAILS))
3397 {
3398 fprintf (dump_file, " into ");
3399 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3400 }
3401 }
3402
3403 remove_visited_stmt_chain (last_rhs1);
3404}
3405
54aceb26 3406/* Transform STMT, which is really (A +B) + (C + D) into the left
3407 linear form, ((A+B)+C)+D.
3408 Recurse on D if necessary. */
3409
3410static void
75a70cf9 3411linearize_expr (gimple stmt)
54aceb26 3412{
82cb4e1c 3413 gimple_stmt_iterator gsi;
75a70cf9 3414 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3415 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
82cb4e1c 3416 gimple oldbinrhs = binrhs;
75a70cf9 3417 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3418 gimple newbinrhs = NULL;
a4c3fb95 3419 struct loop *loop = loop_containing_stmt (stmt);
82cb4e1c 3420 tree lhs = gimple_assign_lhs (stmt);
54aceb26 3421
75a70cf9 3422 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3423 && is_reassociable_op (binrhs, rhscode, loop));
3424
82cb4e1c 3425 gsi = gsi_for_stmt (stmt);
54aceb26 3426
75a70cf9 3427 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
82cb4e1c 3428 binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3429 make_ssa_name (TREE_TYPE (lhs), NULL),
3430 gimple_assign_lhs (binlhs),
3431 gimple_assign_rhs2 (binrhs));
75a70cf9 3432 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
82cb4e1c 3433 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3434 gimple_set_uid (binrhs, gimple_uid (stmt));
54aceb26 3435
75a70cf9 3436 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3437 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
54aceb26 3438
3439 if (dump_file && (dump_flags & TDF_DETAILS))
3440 {
3441 fprintf (dump_file, "Linearized: ");
75a70cf9 3442 print_gimple_stmt (dump_file, stmt, 0, 0);
54aceb26 3443 }
3444
3445 reassociate_stats.linearized++;
54aceb26 3446 update_stmt (stmt);
75a70cf9 3447
82cb4e1c 3448 gsi = gsi_for_stmt (oldbinrhs);
3449 gsi_remove (&gsi, true);
3450 release_defs (oldbinrhs);
3451
75a70cf9 3452 gimple_set_visited (stmt, true);
3453 gimple_set_visited (binlhs, true);
3454 gimple_set_visited (binrhs, true);
54aceb26 3455
3456 /* Tail recurse on the new rhs if it still needs reassociation. */
a4c3fb95 3457 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
75a70cf9 3458 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3459 want to change the algorithm while converting to tuples. */
54aceb26 3460 linearize_expr (stmt);
54aceb26 3461}
3462
75a70cf9 3463/* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
54aceb26 3464 it. Otherwise, return NULL. */
3465
75a70cf9 3466static gimple
54aceb26 3467get_single_immediate_use (tree lhs)
3468{
3469 use_operand_p immuse;
75a70cf9 3470 gimple immusestmt;
54aceb26 3471
3472 if (TREE_CODE (lhs) == SSA_NAME
75a70cf9 3473 && single_imm_use (lhs, &immuse, &immusestmt)
3474 && is_gimple_assign (immusestmt))
3475 return immusestmt;
3476
3477 return NULL;
54aceb26 3478}
54aceb26 3479
54aceb26 3480/* Recursively negate the value of TONEGATE, and return the SSA_NAME
3481 representing the negated value. Insertions of any necessary
75a70cf9 3482 instructions go before GSI.
54aceb26 3483 This function is recursive in that, if you hand it "a_5" as the
3484 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3485 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3486
3487static tree
82cb4e1c 3488negate_value (tree tonegate, gimple_stmt_iterator *gsip)
54aceb26 3489{
82cb4e1c 3490 gimple negatedefstmt = NULL;
54aceb26 3491 tree resultofnegate;
82cb4e1c 3492 gimple_stmt_iterator gsi;
3493 unsigned int uid;
54aceb26 3494
54aceb26 3495 /* If we are trying to negate a name, defined by an add, negate the
3496 add operands instead. */
75a70cf9 3497 if (TREE_CODE (tonegate) == SSA_NAME)
3498 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
54aceb26 3499 if (TREE_CODE (tonegate) == SSA_NAME
75a70cf9 3500 && is_gimple_assign (negatedefstmt)
3501 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3502 && has_single_use (gimple_assign_lhs (negatedefstmt))
3503 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
54aceb26 3504 {
75a70cf9 3505 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3506 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
82cb4e1c 3507 tree lhs = gimple_assign_lhs (negatedefstmt);
3508 gimple g;
75a70cf9 3509
3510 gsi = gsi_for_stmt (negatedefstmt);
3511 rhs1 = negate_value (rhs1, &gsi);
75a70cf9 3512
3513 gsi = gsi_for_stmt (negatedefstmt);
3514 rhs2 = negate_value (rhs2, &gsi);
75a70cf9 3515
82cb4e1c 3516 gsi = gsi_for_stmt (negatedefstmt);
3517 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3518 gimple_set_visited (negatedefstmt, true);
3519 g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3520 gimple_set_uid (g, gimple_uid (negatedefstmt));
3521 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3522 return lhs;
54aceb26 3523 }
3524
3525 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
82cb4e1c 3526 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
75a70cf9 3527 NULL_TREE, true, GSI_SAME_STMT);
82cb4e1c 3528 gsi = *gsip;
3529 uid = gimple_uid (gsi_stmt (gsi));
3530 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3531 {
3532 gimple stmt = gsi_stmt (gsi);
3533 if (gimple_uid (stmt) != 0)
3534 break;
3535 gimple_set_uid (stmt, uid);
3536 }
54aceb26 3537 return resultofnegate;
54aceb26 3538}
3539
3540/* Return true if we should break up the subtract in STMT into an add
3541 with negate. This is true when we the subtract operands are really
3542 adds, or the subtract itself is used in an add expression. In
3543 either case, breaking up the subtract into an add with negate
3544 exposes the adds to reassociation. */
3545
3546static bool
75a70cf9 3547should_break_up_subtract (gimple stmt)
54aceb26 3548{
75a70cf9 3549 tree lhs = gimple_assign_lhs (stmt);
3550 tree binlhs = gimple_assign_rhs1 (stmt);
3551 tree binrhs = gimple_assign_rhs2 (stmt);
3552 gimple immusestmt;
a4c3fb95 3553 struct loop *loop = loop_containing_stmt (stmt);
54aceb26 3554
3555 if (TREE_CODE (binlhs) == SSA_NAME
a4c3fb95 3556 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
54aceb26 3557 return true;
3558
3559 if (TREE_CODE (binrhs) == SSA_NAME
a4c3fb95 3560 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
54aceb26 3561 return true;
3562
3563 if (TREE_CODE (lhs) == SSA_NAME
3564 && (immusestmt = get_single_immediate_use (lhs))
75a70cf9 3565 && is_gimple_assign (immusestmt)
dddf5036 3566 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3567 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
54aceb26 3568 return true;
3569 return false;
54aceb26 3570}
3571
3572/* Transform STMT from A - B into A + -B. */
3573
3574static void
75a70cf9 3575break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
54aceb26 3576{
75a70cf9 3577 tree rhs1 = gimple_assign_rhs1 (stmt);
3578 tree rhs2 = gimple_assign_rhs2 (stmt);
54aceb26 3579
3580 if (dump_file && (dump_flags & TDF_DETAILS))
3581 {
3582 fprintf (dump_file, "Breaking up subtract ");
75a70cf9 3583 print_gimple_stmt (dump_file, stmt, 0, 0);
54aceb26 3584 }
3585
75a70cf9 3586 rhs2 = negate_value (rhs2, gsip);
3587 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
54aceb26 3588 update_stmt (stmt);
3589}
3590
8c5ac7f6 3591/* Determine whether STMT is a builtin call that raises an SSA name
3592 to an integer power and has only one use. If so, and this is early
3593 reassociation and unsafe math optimizations are permitted, place
3594 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3595 If any of these conditions does not hold, return FALSE. */
3596
3597static bool
3598acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3599{
3600 tree fndecl, arg1;
3601 REAL_VALUE_TYPE c, cint;
3602
3603 if (!first_pass_instance
3604 || !flag_unsafe_math_optimizations
3605 || !is_gimple_call (stmt)
3606 || !has_single_use (gimple_call_lhs (stmt)))
3607 return false;
3608
3609 fndecl = gimple_call_fndecl (stmt);
3610
3611 if (!fndecl
3612 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3613 return false;
3614
3615 switch (DECL_FUNCTION_CODE (fndecl))
3616 {
3617 CASE_FLT_FN (BUILT_IN_POW):
3618 *base = gimple_call_arg (stmt, 0);
3619 arg1 = gimple_call_arg (stmt, 1);
3620
3621 if (TREE_CODE (arg1) != REAL_CST)
3622 return false;
3623
3624 c = TREE_REAL_CST (arg1);
3625
3626 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3627 return false;
3628
3629 *exponent = real_to_integer (&c);
3630 real_from_integer (&cint, VOIDmode, *exponent,
3631 *exponent < 0 ? -1 : 0, 0);
3632 if (!real_identical (&c, &cint))
3633 return false;
3634
3635 break;
3636
3637 CASE_FLT_FN (BUILT_IN_POWI):
3638 *base = gimple_call_arg (stmt, 0);
3639 arg1 = gimple_call_arg (stmt, 1);
3640
35ec552a 3641 if (!tree_fits_shwi_p (arg1))
8c5ac7f6 3642 return false;
3643
3644 *exponent = TREE_INT_CST_LOW (arg1);
3645 break;
3646
3647 default:
3648 return false;
3649 }
3650
3651 /* Expanding negative exponents is generally unproductive, so we don't
3652 complicate matters with those. Exponents of zero and one should
3653 have been handled by expression folding. */
3654 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3655 return false;
3656
3657 return true;
3658}
3659
54aceb26 3660/* Recursively linearize a binary expression that is the RHS of STMT.
3661 Place the operands of the expression tree in the vector named OPS. */
3662
3663static void
f1f41a6c 3664linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
dddf5036 3665 bool is_associative, bool set_visited)
54aceb26 3666{
75a70cf9 3667 tree binlhs = gimple_assign_rhs1 (stmt);
3668 tree binrhs = gimple_assign_rhs2 (stmt);
8c5ac7f6 3669 gimple binlhsdef = NULL, binrhsdef = NULL;
54aceb26 3670 bool binlhsisreassoc = false;
3671 bool binrhsisreassoc = false;
75a70cf9 3672 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
a4c3fb95 3673 struct loop *loop = loop_containing_stmt (stmt);
8c5ac7f6 3674 tree base = NULL_TREE;
3675 HOST_WIDE_INT exponent = 0;
54aceb26 3676
dddf5036 3677 if (set_visited)
3678 gimple_set_visited (stmt, true);
54aceb26 3679
3680 if (TREE_CODE (binlhs) == SSA_NAME)
3681 {
3682 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
df0675b8 3683 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3684 && !stmt_could_throw_p (binlhsdef));
54aceb26 3685 }
3686
3687 if (TREE_CODE (binrhs) == SSA_NAME)
3688 {
3689 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
df0675b8 3690 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3691 && !stmt_could_throw_p (binrhsdef));
54aceb26 3692 }
3693
3694 /* If the LHS is not reassociable, but the RHS is, we need to swap
3695 them. If neither is reassociable, there is nothing we can do, so
3696 just put them in the ops vector. If the LHS is reassociable,
3697 linearize it. If both are reassociable, then linearize the RHS
3698 and the LHS. */
3699
3700 if (!binlhsisreassoc)
3701 {
3702 tree temp;
3703
dddf5036 3704 /* If this is not a associative operation like division, give up. */
3705 if (!is_associative)
3706 {
3707 add_to_ops_vec (ops, binrhs);
3708 return;
3709 }
3710
54aceb26 3711 if (!binrhsisreassoc)
3712 {
8c5ac7f6 3713 if (rhscode == MULT_EXPR
3714 && TREE_CODE (binrhs) == SSA_NAME
3715 && acceptable_pow_call (binrhsdef, &base, &exponent))
3716 {
3717 add_repeat_to_ops_vec (ops, base, exponent);
3718 gimple_set_visited (binrhsdef, true);
3719 }
3720 else
3721 add_to_ops_vec (ops, binrhs);
3722
3723 if (rhscode == MULT_EXPR
3724 && TREE_CODE (binlhs) == SSA_NAME
3725 && acceptable_pow_call (binlhsdef, &base, &exponent))
3726 {
3727 add_repeat_to_ops_vec (ops, base, exponent);
3728 gimple_set_visited (binlhsdef, true);
3729 }
3730 else
3731 add_to_ops_vec (ops, binlhs);
3732
54aceb26 3733 return;
3734 }
3735
3736 if (dump_file && (dump_flags & TDF_DETAILS))
3737 {
3738 fprintf (dump_file, "swapping operands of ");
75a70cf9 3739 print_gimple_stmt (dump_file, stmt, 0, 0);
54aceb26 3740 }
3741
8f6fa493 3742 swap_ssa_operands (stmt,
3743 gimple_assign_rhs1_ptr (stmt),
3744 gimple_assign_rhs2_ptr (stmt));
54aceb26 3745 update_stmt (stmt);
3746
3747 if (dump_file && (dump_flags & TDF_DETAILS))
3748 {
3749 fprintf (dump_file, " is now ");
75a70cf9 3750 print_gimple_stmt (dump_file, stmt, 0, 0);
54aceb26 3751 }
3752
3753 /* We want to make it so the lhs is always the reassociative op,
3754 so swap. */
3755 temp = binlhs;
3756 binlhs = binrhs;
3757 binrhs = temp;
3758 }
3759 else if (binrhsisreassoc)
3760 {
3761 linearize_expr (stmt);
75a70cf9 3762 binlhs = gimple_assign_rhs1 (stmt);
3763 binrhs = gimple_assign_rhs2 (stmt);
54aceb26 3764 }
3765
3766 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
a4c3fb95 3767 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3768 rhscode, loop));
dddf5036 3769 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3770 is_associative, set_visited);
8c5ac7f6 3771
3772 if (rhscode == MULT_EXPR
3773 && TREE_CODE (binrhs) == SSA_NAME
3774 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3775 {
3776 add_repeat_to_ops_vec (ops, base, exponent);
3777 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3778 }
3779 else
3780 add_to_ops_vec (ops, binrhs);
54aceb26 3781}
3782
3783/* Repropagate the negates back into subtracts, since no other pass
3784 currently does it. */
3785
3786static void
3787repropagate_negates (void)
3788{
3789 unsigned int i = 0;
3790 tree negate;
3791
f1f41a6c 3792 FOR_EACH_VEC_ELT (plus_negates, i, negate)
54aceb26 3793 {
75a70cf9 3794 gimple user = get_single_immediate_use (negate);
54aceb26 3795
c07e5b8b 3796 if (!user || !is_gimple_assign (user))
3797 continue;
3798
54aceb26 3799 /* The negate operand can be either operand of a PLUS_EXPR
3800 (it can be the LHS if the RHS is a constant for example).
3801
3802 Force the negate operand to the RHS of the PLUS_EXPR, then
3803 transform the PLUS_EXPR into a MINUS_EXPR. */
c07e5b8b 3804 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
54aceb26 3805 {
54aceb26 3806 /* If the negated operand appears on the LHS of the
3807 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3808 to force the negated operand to the RHS of the PLUS_EXPR. */
75a70cf9 3809 if (gimple_assign_rhs1 (user) == negate)
54aceb26 3810 {
8f6fa493 3811 swap_ssa_operands (user,
3812 gimple_assign_rhs1_ptr (user),
3813 gimple_assign_rhs2_ptr (user));
54aceb26 3814 }
3815
3816 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3817 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
75a70cf9 3818 if (gimple_assign_rhs2 (user) == negate)
54aceb26 3819 {
75a70cf9 3820 tree rhs1 = gimple_assign_rhs1 (user);
3821 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3822 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3823 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
54aceb26 3824 update_stmt (user);
3825 }
3826 }
c07e5b8b 3827 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3828 {
3829 if (gimple_assign_rhs1 (user) == negate)
3830 {
3831 /* We have
3832 x = -a
3833 y = x - b
3834 which we transform into
3835 x = a + b
3836 y = -x .
3837 This pushes down the negate which we possibly can merge
3838 into some other operation, hence insert it into the
3839 plus_negates vector. */
3840 gimple feed = SSA_NAME_DEF_STMT (negate);
3841 tree a = gimple_assign_rhs1 (feed);
82cb4e1c 3842 tree b = gimple_assign_rhs2 (user);
3843 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3844 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3845 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3846 gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3847 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3848 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3849 user = gsi_stmt (gsi2);
3850 update_stmt (user);
3851 gsi_remove (&gsi, true);
3852 release_defs (feed);
3853 plus_negates.safe_push (gimple_assign_lhs (user));
c07e5b8b 3854 }
3855 else
3856 {
3857 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3858 rid of one operation. */
3859 gimple feed = SSA_NAME_DEF_STMT (negate);
3860 tree a = gimple_assign_rhs1 (feed);
3861 tree rhs1 = gimple_assign_rhs1 (user);
3862 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3863 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3864 update_stmt (gsi_stmt (gsi));
3865 }
3866 }
54aceb26 3867 }
3868}
3869
c07e5b8b 3870/* Returns true if OP is of a type for which we can do reassociation.
3871 That is for integral or non-saturating fixed-point types, and for
3872 floating point type when associative-math is enabled. */
3873
3874static bool
3875can_reassociate_p (tree op)
3876{
3877 tree type = TREE_TYPE (op);
ca3c9092 3878 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
c07e5b8b 3879 || NON_SAT_FIXED_POINT_TYPE_P (type)
f4a50267 3880 || (flag_associative_math && FLOAT_TYPE_P (type)))
c07e5b8b 3881 return true;
3882 return false;
3883}
3884
54aceb26 3885/* Break up subtract operations in block BB.
3886
3887 We do this top down because we don't know whether the subtract is
3888 part of a possible chain of reassociation except at the top.
48e1416a 3889
54aceb26 3890 IE given
3891 d = f + g
3892 c = a + e
3893 b = c - d
3894 q = b - r
3895 k = t - q
48e1416a 3896
54aceb26 3897 we want to break up k = t - q, but we won't until we've transformed q
75a70cf9 3898 = b - r, which won't be broken up until we transform b = c - d.
3899
82cb4e1c 3900 En passant, clear the GIMPLE visited flag on every statement
3901 and set UIDs within each basic block. */
54aceb26 3902
3903static void
3904break_up_subtract_bb (basic_block bb)
3905{
75a70cf9 3906 gimple_stmt_iterator gsi;
54aceb26 3907 basic_block son;
82cb4e1c 3908 unsigned int uid = 1;
54aceb26 3909
75a70cf9 3910 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
54aceb26 3911 {
75a70cf9 3912 gimple stmt = gsi_stmt (gsi);
3913 gimple_set_visited (stmt, false);
82cb4e1c 3914 gimple_set_uid (stmt, uid++);
54aceb26 3915
c07e5b8b 3916 if (!is_gimple_assign (stmt)
3917 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3918 continue;
3919
75a70cf9 3920 /* Look for simple gimple subtract operations. */
c07e5b8b 3921 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
54aceb26 3922 {
c07e5b8b 3923 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3924 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
54aceb26 3925 continue;
3926
3927 /* Check for a subtract used only in an addition. If this
3928 is the case, transform it into add of a negate for better
3929 reassociation. IE transform C = A-B into C = A + -B if C
3930 is only used in an addition. */
75a70cf9 3931 if (should_break_up_subtract (stmt))
3932 break_up_subtract (stmt, &gsi);
54aceb26 3933 }
c07e5b8b 3934 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3935 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
f1f41a6c 3936 plus_negates.safe_push (gimple_assign_lhs (stmt));
54aceb26 3937 }
3938 for (son = first_dom_son (CDI_DOMINATORS, bb);
3939 son;
3940 son = next_dom_son (CDI_DOMINATORS, son))
3941 break_up_subtract_bb (son);
3942}
3943
8c5ac7f6 3944/* Used for repeated factor analysis. */
3945struct repeat_factor_d
3946{
3947 /* An SSA name that occurs in a multiply chain. */
3948 tree factor;
3949
3950 /* Cached rank of the factor. */
3951 unsigned rank;
3952
3953 /* Number of occurrences of the factor in the chain. */
3954 HOST_WIDE_INT count;
3955
3956 /* An SSA name representing the product of this factor and
3957 all factors appearing later in the repeated factor vector. */
3958 tree repr;
3959};
3960
3961typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3962typedef const struct repeat_factor_d *const_repeat_factor_t;
3963
8c5ac7f6 3964
f1f41a6c 3965static vec<repeat_factor> repeat_factor_vec;
8c5ac7f6 3966
3967/* Used for sorting the repeat factor vector. Sort primarily by
3968 ascending occurrence count, secondarily by descending rank. */
3969
3970static int
3971compare_repeat_factors (const void *x1, const void *x2)
3972{
3973 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3974 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3975
3976 if (rf1->count != rf2->count)
3977 return rf1->count - rf2->count;
3978
3979 return rf2->rank - rf1->rank;
3980}
3981
8c5ac7f6 3982/* Look for repeated operands in OPS in the multiply tree rooted at
3983 STMT. Replace them with an optimal sequence of multiplies and powi
97269507 3984 builtin calls, and remove the used operands from OPS. Return an
3985 SSA name representing the value of the replacement sequence. */
8c5ac7f6 3986
97269507 3987static tree
f1f41a6c 3988attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
8c5ac7f6 3989{
3990 unsigned i, j, vec_len;
3991 int ii;
3992 operand_entry_t oe;
3993 repeat_factor_t rf1, rf2;
3994 repeat_factor rfnew;
97269507 3995 tree result = NULL_TREE;
8c5ac7f6 3996 tree target_ssa, iter_result;
3997 tree type = TREE_TYPE (gimple_get_lhs (stmt));
3998 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3999 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4000 gimple mul_stmt, pow_stmt;
4001
4002 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4003 target. */
4004 if (!powi_fndecl)
97269507 4005 return NULL_TREE;
8c5ac7f6 4006
4007 /* Allocate the repeated factor vector. */
f1f41a6c 4008 repeat_factor_vec.create (10);
8c5ac7f6 4009
4010 /* Scan the OPS vector for all SSA names in the product and build
4011 up a vector of occurrence counts for each factor. */
f1f41a6c 4012 FOR_EACH_VEC_ELT (*ops, i, oe)
8c5ac7f6 4013 {
4014 if (TREE_CODE (oe->op) == SSA_NAME)
4015 {
f1f41a6c 4016 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
8c5ac7f6 4017 {
4018 if (rf1->factor == oe->op)
4019 {
4020 rf1->count += oe->count;
4021 break;
4022 }
4023 }
4024
f1f41a6c 4025 if (j >= repeat_factor_vec.length ())
8c5ac7f6 4026 {
4027 rfnew.factor = oe->op;
4028 rfnew.rank = oe->rank;
4029 rfnew.count = oe->count;
4030 rfnew.repr = NULL_TREE;
f1f41a6c 4031 repeat_factor_vec.safe_push (rfnew);
8c5ac7f6 4032 }
4033 }
4034 }
4035
4036 /* Sort the repeated factor vector by (a) increasing occurrence count,
4037 and (b) decreasing rank. */
f1f41a6c 4038 repeat_factor_vec.qsort (compare_repeat_factors);
8c5ac7f6 4039
4040 /* It is generally best to combine as many base factors as possible
4041 into a product before applying __builtin_powi to the result.
4042 However, the sort order chosen for the repeated factor vector
4043 allows us to cache partial results for the product of the base
4044 factors for subsequent use. When we already have a cached partial
4045 result from a previous iteration, it is best to make use of it
4046 before looking for another __builtin_pow opportunity.
4047
4048 As an example, consider x * x * y * y * y * z * z * z * z.
4049 We want to first compose the product x * y * z, raise it to the
4050 second power, then multiply this by y * z, and finally multiply
4051 by z. This can be done in 5 multiplies provided we cache y * z
4052 for use in both expressions:
4053
4054 t1 = y * z
4055 t2 = t1 * x
4056 t3 = t2 * t2
4057 t4 = t1 * t3
4058 result = t4 * z
4059
4060 If we instead ignored the cached y * z and first multiplied by
4061 the __builtin_pow opportunity z * z, we would get the inferior:
4062
4063 t1 = y * z
4064 t2 = t1 * x
4065 t3 = t2 * t2
4066 t4 = z * z
4067 t5 = t3 * t4
4068 result = t5 * y */
4069
f1f41a6c 4070 vec_len = repeat_factor_vec.length ();
8c5ac7f6 4071
4072 /* Repeatedly look for opportunities to create a builtin_powi call. */
4073 while (true)
4074 {
4075 HOST_WIDE_INT power;
4076
4077 /* First look for the largest cached product of factors from
4078 preceding iterations. If found, create a builtin_powi for
4079 it if the minimum occurrence count for its factors is at
4080 least 2, or just use this cached product as our next
4081 multiplicand if the minimum occurrence count is 1. */
f1f41a6c 4082 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
8c5ac7f6 4083 {
4084 if (rf1->repr && rf1->count > 0)
4085 break;
4086 }
4087
4088 if (j < vec_len)
4089 {
4090 power = rf1->count;
4091
4092 if (power == 1)
4093 {
4094 iter_result = rf1->repr;
4095
4096 if (dump_file && (dump_flags & TDF_DETAILS))
4097 {
4098 unsigned elt;
4099 repeat_factor_t rf;
4100 fputs ("Multiplying by cached product ", dump_file);
4101 for (elt = j; elt < vec_len; elt++)
4102 {
f1f41a6c 4103 rf = &repeat_factor_vec[elt];
8c5ac7f6 4104 print_generic_expr (dump_file, rf->factor, 0);
4105 if (elt < vec_len - 1)
4106 fputs (" * ", dump_file);
4107 }
4108 fputs ("\n", dump_file);
4109 }
4110 }
4111 else
4112 {
03d37e4e 4113 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
8c5ac7f6 4114 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4115 build_int_cst (integer_type_node,
4116 power));
4117 gimple_call_set_lhs (pow_stmt, iter_result);
4118 gimple_set_location (pow_stmt, gimple_location (stmt));
4119 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4120
4121 if (dump_file && (dump_flags & TDF_DETAILS))
4122 {
4123 unsigned elt;
4124 repeat_factor_t rf;
4125 fputs ("Building __builtin_pow call for cached product (",
4126 dump_file);
4127 for (elt = j; elt < vec_len; elt++)
4128 {
f1f41a6c 4129 rf = &repeat_factor_vec[elt];
8c5ac7f6 4130 print_generic_expr (dump_file, rf->factor, 0);
4131 if (elt < vec_len - 1)
4132 fputs (" * ", dump_file);
4133 }
8ef3b7cb 4134 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4135 power);
8c5ac7f6 4136 }
4137 }
4138 }
4139 else
4140 {
4141 /* Otherwise, find the first factor in the repeated factor
4142 vector whose occurrence count is at least 2. If no such
4143 factor exists, there are no builtin_powi opportunities
4144 remaining. */
f1f41a6c 4145 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
8c5ac7f6 4146 {
4147 if (rf1->count >= 2)
4148 break;
4149 }
4150
4151 if (j >= vec_len)
4152 break;
4153
4154 power = rf1->count;
4155
4156 if (dump_file && (dump_flags & TDF_DETAILS))
4157 {
4158 unsigned elt;
4159 repeat_factor_t rf;
4160 fputs ("Building __builtin_pow call for (", dump_file);
4161 for (elt = j; elt < vec_len; elt++)
4162 {
f1f41a6c 4163 rf = &repeat_factor_vec[elt];
8c5ac7f6 4164 print_generic_expr (dump_file, rf->factor, 0);
4165 if (elt < vec_len - 1)
4166 fputs (" * ", dump_file);
4167 }
8ef3b7cb 4168 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
8c5ac7f6 4169 }
4170
4171 reassociate_stats.pows_created++;
4172
4173 /* Visit each element of the vector in reverse order (so that
4174 high-occurrence elements are visited first, and within the
4175 same occurrence count, lower-ranked elements are visited
4176 first). Form a linear product of all elements in this order
4177 whose occurrencce count is at least that of element J.
4178 Record the SSA name representing the product of each element
4179 with all subsequent elements in the vector. */
4180 if (j == vec_len - 1)
4181 rf1->repr = rf1->factor;
4182 else
4183 {
4184 for (ii = vec_len - 2; ii >= (int)j; ii--)
4185 {
4186 tree op1, op2;
4187
f1f41a6c 4188 rf1 = &repeat_factor_vec[ii];
4189 rf2 = &repeat_factor_vec[ii + 1];
8c5ac7f6 4190
4191 /* Init the last factor's representative to be itself. */
4192 if (!rf2->repr)
4193 rf2->repr = rf2->factor;
4194
4195 op1 = rf1->factor;
4196 op2 = rf2->repr;
4197
03d37e4e 4198 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
8c5ac7f6 4199 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4200 target_ssa,
4201 op1, op2);
4202 gimple_set_location (mul_stmt, gimple_location (stmt));
4203 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4204 rf1->repr = target_ssa;
4205
4206 /* Don't reprocess the multiply we just introduced. */
4207 gimple_set_visited (mul_stmt, true);
4208 }
4209 }
4210
4211 /* Form a call to __builtin_powi for the maximum product
4212 just formed, raised to the power obtained earlier. */
f1f41a6c 4213 rf1 = &repeat_factor_vec[j];
03d37e4e 4214 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
8c5ac7f6 4215 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4216 build_int_cst (integer_type_node,
4217 power));
4218 gimple_call_set_lhs (pow_stmt, iter_result);
4219 gimple_set_location (pow_stmt, gimple_location (stmt));
4220 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4221 }
4222
97269507 4223 /* If we previously formed at least one other builtin_powi call,
4224 form the product of this one and those others. */
4225 if (result)
4226 {
03d37e4e 4227 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
97269507 4228 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4229 result, iter_result);
4230 gimple_set_location (mul_stmt, gimple_location (stmt));
4231 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4232 gimple_set_visited (mul_stmt, true);
4233 result = new_result;
4234 }
4235 else
4236 result = iter_result;
8c5ac7f6 4237
4238 /* Decrement the occurrence count of each element in the product
4239 by the count found above, and remove this many copies of each
4240 factor from OPS. */
4241 for (i = j; i < vec_len; i++)
4242 {
4243 unsigned k = power;
4244 unsigned n;
4245
f1f41a6c 4246 rf1 = &repeat_factor_vec[i];
8c5ac7f6 4247 rf1->count -= power;
4248
f1f41a6c 4249 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
8c5ac7f6 4250 {
4251 if (oe->op == rf1->factor)
4252 {
4253 if (oe->count <= k)
4254 {
f1f41a6c 4255 ops->ordered_remove (n);
8c5ac7f6 4256 k -= oe->count;
4257
4258 if (k == 0)
4259 break;
4260 }
4261 else
4262 {
4263 oe->count -= k;
4264 break;
4265 }
4266 }
4267 }
4268 }
4269 }
4270
4271 /* At this point all elements in the repeated factor vector have a
4272 remaining occurrence count of 0 or 1, and those with a count of 1
4273 don't have cached representatives. Re-sort the ops vector and
4274 clean up. */
f1f41a6c 4275 ops->qsort (sort_by_operand_rank);
4276 repeat_factor_vec.release ();
97269507 4277
4278 /* Return the final product computed herein. Note that there may
4279 still be some elements with single occurrence count left in OPS;
4280 those will be handled by the normal reassociation logic. */
4281 return result;
4282}
4283
4284/* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4285
4286static void
4287transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4288{
4289 tree rhs1;
4290
4291 if (dump_file && (dump_flags & TDF_DETAILS))
4292 {
4293 fprintf (dump_file, "Transforming ");
4294 print_gimple_stmt (dump_file, stmt, 0, 0);
4295 }
4296
4297 rhs1 = gimple_assign_rhs1 (stmt);
4298 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4299 update_stmt (stmt);
4300 remove_visited_stmt_chain (rhs1);
4301
4302 if (dump_file && (dump_flags & TDF_DETAILS))
4303 {
4304 fprintf (dump_file, " into ");
4305 print_gimple_stmt (dump_file, stmt, 0, 0);
4306 }
4307}
4308
4309/* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4310
4311static void
4312transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4313 tree rhs1, tree rhs2)
4314{
4315 if (dump_file && (dump_flags & TDF_DETAILS))
4316 {
4317 fprintf (dump_file, "Transforming ");
4318 print_gimple_stmt (dump_file, stmt, 0, 0);
4319 }
4320
61e371b0 4321 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
97269507 4322 update_stmt (gsi_stmt (*gsi));
4323 remove_visited_stmt_chain (rhs1);
4324
4325 if (dump_file && (dump_flags & TDF_DETAILS))
4326 {
4327 fprintf (dump_file, " into ");
4328 print_gimple_stmt (dump_file, stmt, 0, 0);
4329 }
8c5ac7f6 4330}
4331
54aceb26 4332/* Reassociate expressions in basic block BB and its post-dominator as
4333 children. */
4334
4335static void
4336reassociate_bb (basic_block bb)
4337{
75a70cf9 4338 gimple_stmt_iterator gsi;
54aceb26 4339 basic_block son;
8a2c7744 4340 gimple stmt = last_stmt (bb);
4341
4342 if (stmt && !gimple_visited_p (stmt))
4343 maybe_optimize_range_tests (stmt);
54aceb26 4344
75a70cf9 4345 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
54aceb26 4346 {
8a2c7744 4347 stmt = gsi_stmt (gsi);
54aceb26 4348
df0675b8 4349 if (is_gimple_assign (stmt)
4350 && !stmt_could_throw_p (stmt))
54aceb26 4351 {
75a70cf9 4352 tree lhs, rhs1, rhs2;
4353 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
54aceb26 4354
75a70cf9 4355 /* If this is not a gimple binary expression, there is
4356 nothing for us to do with it. */
4357 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
54aceb26 4358 continue;
4359
75a70cf9 4360 /* If this was part of an already processed statement,
4361 we don't need to touch it again. */
4362 if (gimple_visited_p (stmt))
dddf5036 4363 {
4364 /* This statement might have become dead because of previous
4365 reassociations. */
4366 if (has_zero_uses (gimple_get_lhs (stmt)))
4367 {
4368 gsi_remove (&gsi, true);
4369 release_defs (stmt);
fb715874 4370 /* We might end up removing the last stmt above which
4371 places the iterator to the end of the sequence.
4372 Reset it to the last stmt in this case which might
4373 be the end of the sequence as well if we removed
4374 the last statement of the sequence. In which case
4375 we need to bail out. */
4376 if (gsi_end_p (gsi))
4377 {
4378 gsi = gsi_last_bb (bb);
4379 if (gsi_end_p (gsi))
4380 break;
4381 }
dddf5036 4382 }
4383 continue;
4384 }
75a70cf9 4385
4386 lhs = gimple_assign_lhs (stmt);
4387 rhs1 = gimple_assign_rhs1 (stmt);
4388 rhs2 = gimple_assign_rhs2 (stmt);
4389
ca3c9092 4390 /* For non-bit or min/max operations we can't associate
4391 all types. Verify that here. */
4392 if (rhs_code != BIT_IOR_EXPR
4393 && rhs_code != BIT_AND_EXPR
4394 && rhs_code != BIT_XOR_EXPR
4395 && rhs_code != MIN_EXPR
4396 && rhs_code != MAX_EXPR
4397 && (!can_reassociate_p (lhs)
4398 || !can_reassociate_p (rhs1)
4399 || !can_reassociate_p (rhs2)))
54aceb26 4400 continue;
4401
75a70cf9 4402 if (associative_tree_code (rhs_code))
54aceb26 4403 {
1e094109 4404 vec<operand_entry_t> ops = vNULL;
97269507 4405 tree powi_result = NULL_TREE;
54aceb26 4406
4407 /* There may be no immediate uses left by the time we
4408 get here because we may have eliminated them all. */
72e3ec84 4409 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
54aceb26 4410 continue;
4411
75a70cf9 4412 gimple_set_visited (stmt, true);
dddf5036 4413 linearize_expr_tree (&ops, stmt, true, true);
f1f41a6c 4414 ops.qsort (sort_by_operand_rank);
75a70cf9 4415 optimize_ops_list (rhs_code, &ops);
dddf5036 4416 if (undistribute_ops_list (rhs_code, &ops,
4417 loop_containing_stmt (stmt)))
4418 {
f1f41a6c 4419 ops.qsort (sort_by_operand_rank);
dddf5036 4420 optimize_ops_list (rhs_code, &ops);
4421 }
54aceb26 4422
946e9eb4 4423 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4424 optimize_range_tests (rhs_code, &ops);
4425
8c5ac7f6 4426 if (first_pass_instance
4427 && rhs_code == MULT_EXPR
4428 && flag_unsafe_math_optimizations)
03d37e4e 4429 powi_result = attempt_builtin_powi (stmt, &ops);
8c5ac7f6 4430
97269507 4431 /* If the operand vector is now empty, all operands were
4432 consumed by the __builtin_powi optimization. */
f1f41a6c 4433 if (ops.length () == 0)
97269507 4434 transform_stmt_to_copy (&gsi, stmt, powi_result);
f1f41a6c 4435 else if (ops.length () == 1)
54aceb26 4436 {
f1f41a6c 4437 tree last_op = ops.last ()->op;
97269507 4438
4439 if (powi_result)
4440 transform_stmt_to_multiply (&gsi, stmt, last_op,
4441 powi_result);
4442 else
4443 transform_stmt_to_copy (&gsi, stmt, last_op);
54aceb26 4444 }
4445 else
5b1c765d 4446 {
4447 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
f1f41a6c 4448 int ops_num = ops.length ();
5b1c765d 4449 int width = get_reassociation_width (ops_num, rhs_code, mode);
82cb4e1c 4450 tree new_lhs = lhs;
5b1c765d 4451
4452 if (dump_file && (dump_flags & TDF_DETAILS))
4453 fprintf (dump_file,
4454 "Width = %d was chosen for reassociation\n", width);
4455
4456 if (width > 1
f1f41a6c 4457 && ops.length () > 3)
5b1c765d 4458 rewrite_expr_tree_parallel (stmt, width, ops);
4459 else
a2bd0c99 4460 {
4461 /* When there are three operands left, we want
4462 to make sure the ones that get the double
4463 binary op are chosen wisely. */
4464 int len = ops.length ();
4465 if (len >= 3)
4466 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4467
82cb4e1c 4468 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4469 powi_result != NULL);
a2bd0c99 4470 }
97269507 4471
4472 /* If we combined some repeated factors into a
4473 __builtin_powi call, multiply that result by the
4474 reassociated operands. */
4475 if (powi_result)
4476 {
82cb4e1c 4477 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4478 tree type = TREE_TYPE (lhs);
03d37e4e 4479 tree target_ssa = make_temp_ssa_name (type, NULL,
4480 "reassocpow");
82cb4e1c 4481 gimple_set_lhs (lhs_stmt, target_ssa);
4482 update_stmt (lhs_stmt);
4483 if (lhs != new_lhs)
4484 target_ssa = new_lhs;
97269507 4485 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4486 powi_result,
4487 target_ssa);
4488 gimple_set_location (mul_stmt, gimple_location (stmt));
4489 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4490 }
5b1c765d 4491 }
54aceb26 4492
f1f41a6c 4493 ops.release ();
54aceb26 4494 }
4495 }
4496 }
4497 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4498 son;
4499 son = next_dom_son (CDI_POST_DOMINATORS, son))
4500 reassociate_bb (son);
4501}
4502
f1f41a6c 4503void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4504void debug_ops_vector (vec<operand_entry_t> ops);
54aceb26 4505
4506/* Dump the operand entry vector OPS to FILE. */
4507
4508void
f1f41a6c 4509dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
54aceb26 4510{
4511 operand_entry_t oe;
4512 unsigned int i;
4513
f1f41a6c 4514 FOR_EACH_VEC_ELT (ops, i, oe)
54aceb26 4515 {
4516 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
75a70cf9 4517 print_generic_expr (file, oe->op, 0);
54aceb26 4518 }
4519}
4520
4521/* Dump the operand entry vector OPS to STDERR. */
4522
4b987fac 4523DEBUG_FUNCTION void
f1f41a6c 4524debug_ops_vector (vec<operand_entry_t> ops)
54aceb26 4525{
4526 dump_ops_vector (stderr, ops);
4527}
4528
4529static void
4530do_reassoc (void)
4531{
4532 break_up_subtract_bb (ENTRY_BLOCK_PTR);
4533 reassociate_bb (EXIT_BLOCK_PTR);
4534}
4535
4536/* Initialize the reassociation pass. */
4537
4538static void
4539init_reassoc (void)
4540{
4541 int i;
b30a8715 4542 long rank = 2;
a28770e1 4543 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
54aceb26 4544
a4c3fb95 4545 /* Find the loops, so that we can prevent moving calculations in
4546 them. */
4547 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4548
54aceb26 4549 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4550
4551 operand_entry_pool = create_alloc_pool ("operand entry pool",
4552 sizeof (struct operand_entry), 30);
17b5ea6f 4553 next_operand_entry_id = 0;
54aceb26 4554
4555 /* Reverse RPO (Reverse Post Order) will give us something where
4556 deeper loops come later. */
6180f28d 4557 pre_and_rev_post_order_compute (NULL, bbs, false);
ed7e2206 4558 bb_rank = XCNEWVEC (long, last_basic_block);
b30a8715 4559 operand_rank = pointer_map_create ();
54aceb26 4560
61e371b0 4561 /* Give each default definition a distinct rank. This includes
4562 parameters and the static chain. Walk backwards over all
4563 SSA names so that we get proper rank ordering according
4564 to tree_swap_operands_p. */
4565 for (i = num_ssa_names - 1; i > 0; --i)
54aceb26 4566 {
61e371b0 4567 tree name = ssa_name (i);
4568 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4569 insert_operand_rank (name, ++rank);
54aceb26 4570 }
4571
4572 /* Set up rank for each BB */
a28770e1 4573 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
54aceb26 4574 bb_rank[bbs[i]] = ++rank << 16;
4575
4576 free (bbs);
54aceb26 4577 calculate_dominance_info (CDI_POST_DOMINATORS);
1e094109 4578 plus_negates = vNULL;
54aceb26 4579}
4580
4581/* Cleanup after the reassociation pass, and print stats if
4582 requested. */
4583
4584static void
4585fini_reassoc (void)
4586{
581f8050 4587 statistics_counter_event (cfun, "Linearized",
4588 reassociate_stats.linearized);
4589 statistics_counter_event (cfun, "Constants eliminated",
4590 reassociate_stats.constants_eliminated);
4591 statistics_counter_event (cfun, "Ops eliminated",
4592 reassociate_stats.ops_eliminated);
4593 statistics_counter_event (cfun, "Statements rewritten",
4594 reassociate_stats.rewritten);
8c5ac7f6 4595 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4596 reassociate_stats.pows_encountered);
4597 statistics_counter_event (cfun, "Built-in powi calls created",
4598 reassociate_stats.pows_created);
54aceb26 4599
b30a8715 4600 pointer_map_destroy (operand_rank);
54aceb26 4601 free_alloc_pool (operand_entry_pool);
4602 free (bb_rank);
f1f41a6c 4603 plus_negates.release ();
54aceb26 4604 free_dominance_info (CDI_POST_DOMINATORS);
a4c3fb95 4605 loop_optimizer_finalize ();
54aceb26 4606}
4607
4608/* Gate and execute functions for Reassociation. */
4609
2a1990e9 4610static unsigned int
54aceb26 4611execute_reassoc (void)
4612{
3dec5460 4613 init_reassoc ();
54aceb26 4614
3dec5460 4615 do_reassoc ();
54aceb26 4616 repropagate_negates ();
4617
3dec5460 4618 fini_reassoc ();
2a1990e9 4619 return 0;
3dec5460 4620}
4621
621a93b1 4622static bool
4623gate_tree_ssa_reassoc (void)
4624{
4625 return flag_tree_reassoc != 0;
4626}
4627
cbe8bda8 4628namespace {
4629
4630const pass_data pass_data_reassoc =
3dec5460 4631{
cbe8bda8 4632 GIMPLE_PASS, /* type */
4633 "reassoc", /* name */
4634 OPTGROUP_NONE, /* optinfo_flags */
4635 true, /* has_gate */
4636 true, /* has_execute */
4637 TV_TREE_REASSOC, /* tv_id */
4638 ( PROP_cfg | PROP_ssa ), /* properties_required */
4639 0, /* properties_provided */
4640 0, /* properties_destroyed */
4641 0, /* todo_flags_start */
4642 ( TODO_verify_ssa
4643 | TODO_update_ssa_only_virtuals
4644 | TODO_verify_flow ), /* todo_flags_finish */
3dec5460 4645};
cbe8bda8 4646
4647class pass_reassoc : public gimple_opt_pass
4648{
4649public:
9af5ce0c 4650 pass_reassoc (gcc::context *ctxt)
4651 : gimple_opt_pass (pass_data_reassoc, ctxt)
cbe8bda8 4652 {}
4653
4654 /* opt_pass methods: */
ae84f584 4655 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
cbe8bda8 4656 bool gate () { return gate_tree_ssa_reassoc (); }
4657 unsigned int execute () { return execute_reassoc (); }
4658
4659}; // class pass_reassoc
4660
4661} // anon namespace
4662
4663gimple_opt_pass *
4664make_pass_reassoc (gcc::context *ctxt)
4665{
4666 return new pass_reassoc (ctxt);
4667}