]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-reassoc.c
Add new object_allocator and clean-up allocator usage.
[thirdparty/gcc.git] / gcc / tree-ssa-reassoc.c
CommitLineData
3dec5460 1/* Reassociation for trees.
d353bf18 2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3dec5460 3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
8c4c00c1 9the Free Software Foundation; either version 3, or (at your option)
3dec5460 10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
8c4c00c1 18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
3dec5460 20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
9ef16211 24#include "backend.h"
d040a5b0 25#include "cfghooks.h"
9ef16211 26#include "tree.h"
27#include "gimple.h"
b0c0c879 28#include "rtl.h"
9ef16211 29#include "ssa.h"
b0c0c879 30#include "tm_p.h"
b20a8bb4 31#include "alias.h"
b20a8bb4 32#include "fold-const.h"
9ed99284 33#include "stor-layout.h"
94ea8568 34#include "cfganal.h"
ce084dfc 35#include "gimple-pretty-print.h"
3dec5460 36#include "tree-inline.h"
bc61cadb 37#include "internal-fn.h"
38#include "gimple-fold.h"
39#include "tree-eh.h"
dcf1a1ec 40#include "gimple-iterator.h"
e795d6e1 41#include "gimplify-me.h"
073c1fd5 42#include "tree-cfg.h"
05d9c18a 43#include "tree-ssa-loop-niter.h"
073c1fd5 44#include "tree-ssa-loop.h"
d53441c8 45#include "flags.h"
d53441c8 46#include "insn-config.h"
47#include "expmed.h"
48#include "dojump.h"
49#include "explow.h"
50#include "calls.h"
51#include "emit-rtl.h"
52#include "varasm.h"
53#include "stmt.h"
9ed99284 54#include "expr.h"
073c1fd5 55#include "tree-dfa.h"
56#include "tree-ssa.h"
3dec5460 57#include "tree-iterator.h"
58#include "tree-pass.h"
54aceb26 59#include "alloc-pool.h"
54aceb26 60#include "langhooks.h"
a4c3fb95 61#include "cfgloop.h"
5b1c765d 62#include "target.h"
63#include "params.h"
946e9eb4 64#include "diagnostic-core.h"
f7715905 65#include "builtins.h"
e3668db5 66#include "gimplify.h"
34517c64 67#include "insn-codes.h"
e3668db5 68#include "optabs.h"
3dec5460 69
54aceb26 70/* This is a simple global reassociation pass. It is, in part, based
71 on the LLVM pass of the same name (They do some things more/less
72 than we do, in different orders, etc).
3dec5460 73
54aceb26 74 It consists of five steps:
3dec5460 75
54aceb26 76 1. Breaking up subtract operations into addition + negate, where
77 it would promote the reassociation of adds.
3dec5460 78
54aceb26 79 2. Left linearization of the expression trees, so that (A+B)+(C+D)
80 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
81 During linearization, we place the operands of the binary
82 expressions into a vector of operand_entry_t
3dec5460 83
54aceb26 84 3. Optimization of the operand lists, eliminating things like a +
85 -a, a & a, etc.
3dec5460 86
8c5ac7f6 87 3a. Combine repeated factors with the same occurrence counts
88 into a __builtin_powi call that will later be optimized into
89 an optimal number of multiplies.
90
54aceb26 91 4. Rewrite the expression trees we linearized and optimized so
92 they are in proper rank order.
3dec5460 93
54aceb26 94 5. Repropagate negates, as nothing else will clean it up ATM.
3dec5460 95
54aceb26 96 A bit of theory on #4, since nobody seems to write anything down
97 about why it makes sense to do it the way they do it:
3dec5460 98
54aceb26 99 We could do this much nicer theoretically, but don't (for reasons
100 explained after how to do it theoretically nice :P).
3dec5460 101
54aceb26 102 In order to promote the most redundancy elimination, you want
103 binary expressions whose operands are the same rank (or
191ec5a2 104 preferably, the same value) exposed to the redundancy eliminator,
54aceb26 105 for possible elimination.
3dec5460 106
54aceb26 107 So the way to do this if we really cared, is to build the new op
108 tree from the leaves to the roots, merging as you go, and putting the
109 new op on the end of the worklist, until you are left with one
110 thing on the worklist.
3dec5460 111
54aceb26 112 IE if you have to rewrite the following set of operands (listed with
113 rank in parentheses), with opcode PLUS_EXPR:
3dec5460 114
54aceb26 115 a (1), b (1), c (1), d (2), e (2)
3dec5460 116
3dec5460 117
54aceb26 118 We start with our merge worklist empty, and the ops list with all of
119 those on it.
3dec5460 120
54aceb26 121 You want to first merge all leaves of the same rank, as much as
122 possible.
123
124 So first build a binary op of
125
126 mergetmp = a + b, and put "mergetmp" on the merge worklist.
127
128 Because there is no three operand form of PLUS_EXPR, c is not going to
129 be exposed to redundancy elimination as a rank 1 operand.
130
131 So you might as well throw it on the merge worklist (you could also
132 consider it to now be a rank two operand, and merge it with d and e,
133 but in this case, you then have evicted e from a binary op. So at
134 least in this situation, you can't win.)
135
136 Then build a binary op of d + e
137 mergetmp2 = d + e
138
139 and put mergetmp2 on the merge worklist.
48e1416a 140
54aceb26 141 so merge worklist = {mergetmp, c, mergetmp2}
48e1416a 142
54aceb26 143 Continue building binary ops of these operations until you have only
144 one operation left on the worklist.
48e1416a 145
54aceb26 146 So we have
48e1416a 147
54aceb26 148 build binary op
149 mergetmp3 = mergetmp + c
48e1416a 150
54aceb26 151 worklist = {mergetmp2, mergetmp3}
48e1416a 152
54aceb26 153 mergetmp4 = mergetmp2 + mergetmp3
48e1416a 154
54aceb26 155 worklist = {mergetmp4}
48e1416a 156
54aceb26 157 because we have one operation left, we can now just set the original
158 statement equal to the result of that operation.
48e1416a 159
54aceb26 160 This will at least expose a + b and d + e to redundancy elimination
161 as binary operations.
48e1416a 162
54aceb26 163 For extra points, you can reuse the old statements to build the
164 mergetmps, since you shouldn't run out.
165
166 So why don't we do this?
48e1416a 167
54aceb26 168 Because it's expensive, and rarely will help. Most trees we are
169 reassociating have 3 or less ops. If they have 2 ops, they already
170 will be written into a nice single binary op. If you have 3 ops, a
171 single simple check suffices to tell you whether the first two are of the
172 same rank. If so, you know to order it
173
174 mergetmp = op1 + op2
175 newstmt = mergetmp + op3
48e1416a 176
54aceb26 177 instead of
178 mergetmp = op2 + op3
179 newstmt = mergetmp + op1
48e1416a 180
54aceb26 181 If all three are of the same rank, you can't expose them all in a
182 single binary operator anyway, so the above is *still* the best you
183 can do.
48e1416a 184
54aceb26 185 Thus, this is what we do. When we have three ops left, we check to see
186 what order to put them in, and call it a day. As a nod to vector sum
2d97a789 187 reduction, we check if any of the ops are really a phi node that is a
54aceb26 188 destructive update for the associating op, and keep the destructive
189 update together for vector sum reduction recognition. */
3dec5460 190
3dec5460 191
54aceb26 192/* Statistics */
193static struct
194{
195 int linearized;
196 int constants_eliminated;
197 int ops_eliminated;
198 int rewritten;
8c5ac7f6 199 int pows_encountered;
200 int pows_created;
54aceb26 201} reassociate_stats;
3dec5460 202
54aceb26 203/* Operator, rank pair. */
204typedef struct operand_entry
3dec5460 205{
54aceb26 206 unsigned int rank;
17b5ea6f 207 int id;
54aceb26 208 tree op;
8c5ac7f6 209 unsigned int count;
54aceb26 210} *operand_entry_t;
211
e16712b1 212static object_allocator<operand_entry> operand_entry_pool ("operand entry pool",
672758dc 213 30);
54aceb26 214
17b5ea6f 215/* This is used to assign a unique ID to each struct operand_entry
216 so that qsort results are identical on different hosts. */
217static int next_operand_entry_id;
3dec5460 218
219/* Starting rank number for a given basic block, so that we can rank
220 operations using unmovable instructions in that BB based on the bb
221 depth. */
b30a8715 222static long *bb_rank;
3dec5460 223
54aceb26 224/* Operand->rank hashtable. */
06ecf488 225static hash_map<tree, long> *operand_rank;
3dec5460 226
e3668db5 227/* Vector of SSA_NAMEs on which after reassociate_bb is done with
228 all basic blocks the CFG should be adjusted - basic blocks
229 split right after that SSA_NAME's definition statement and before
230 the only use, which must be a bit ior. */
231static vec<tree> reassoc_branch_fixups;
232
b248bf30 233/* Forward decls. */
234static long get_rank (tree);
c89eca7b 235static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
b248bf30 236
54675e05 237/* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
238 possibly added by gsi_remove. */
239
240bool
241reassoc_remove_stmt (gimple_stmt_iterator *gsi)
242{
243 gimple stmt = gsi_stmt (*gsi);
244
245 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
246 return gsi_remove (gsi, true);
247
248 gimple_stmt_iterator prev = *gsi;
249 gsi_prev (&prev);
250 unsigned uid = gimple_uid (stmt);
251 basic_block bb = gimple_bb (stmt);
252 bool ret = gsi_remove (gsi, true);
253 if (!gsi_end_p (prev))
254 gsi_next (&prev);
255 else
256 prev = gsi_start_bb (bb);
257 gimple end_stmt = gsi_stmt (*gsi);
258 while ((stmt = gsi_stmt (prev)) != end_stmt)
259 {
260 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
261 gimple_set_uid (stmt, uid);
262 gsi_next (&prev);
263 }
264 return ret;
265}
b248bf30 266
267/* Bias amount for loop-carried phis. We want this to be larger than
268 the depth of any reassociation tree we can see, but not larger than
269 the rank difference between two blocks. */
270#define PHI_LOOP_BIAS (1 << 15)
271
272/* Rank assigned to a phi statement. If STMT is a loop-carried phi of
273 an innermost loop, and the phi has only a single use which is inside
274 the loop, then the rank is the block rank of the loop latch plus an
275 extra bias for the loop-carried dependence. This causes expressions
276 calculated into an accumulator variable to be independent for each
277 iteration of the loop. If STMT is some other phi, the rank is the
278 block rank of its containing block. */
279static long
280phi_rank (gimple stmt)
281{
282 basic_block bb = gimple_bb (stmt);
283 struct loop *father = bb->loop_father;
284 tree res;
285 unsigned i;
286 use_operand_p use;
287 gimple use_stmt;
288
289 /* We only care about real loops (those with a latch). */
290 if (!father->latch)
291 return bb_rank[bb->index];
292
293 /* Interesting phis must be in headers of innermost loops. */
294 if (bb != father->header
295 || father->inner)
296 return bb_rank[bb->index];
297
298 /* Ignore virtual SSA_NAMEs. */
299 res = gimple_phi_result (stmt);
7c782c9b 300 if (virtual_operand_p (res))
b248bf30 301 return bb_rank[bb->index];
302
303 /* The phi definition must have a single use, and that use must be
304 within the loop. Otherwise this isn't an accumulator pattern. */
305 if (!single_imm_use (res, &use, &use_stmt)
306 || gimple_bb (use_stmt)->loop_father != father)
307 return bb_rank[bb->index];
308
309 /* Look for phi arguments from within the loop. If found, bias this phi. */
310 for (i = 0; i < gimple_phi_num_args (stmt); i++)
311 {
312 tree arg = gimple_phi_arg_def (stmt, i);
313 if (TREE_CODE (arg) == SSA_NAME
314 && !SSA_NAME_IS_DEFAULT_DEF (arg))
315 {
316 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
317 if (gimple_bb (def_stmt)->loop_father == father)
318 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
319 }
320 }
321
322 /* Must be an uninteresting phi. */
323 return bb_rank[bb->index];
324}
325
326/* If EXP is an SSA_NAME defined by a PHI statement that represents a
327 loop-carried dependence of an innermost loop, return TRUE; else
328 return FALSE. */
329static bool
330loop_carried_phi (tree exp)
331{
332 gimple phi_stmt;
333 long block_rank;
334
335 if (TREE_CODE (exp) != SSA_NAME
336 || SSA_NAME_IS_DEFAULT_DEF (exp))
337 return false;
338
339 phi_stmt = SSA_NAME_DEF_STMT (exp);
340
341 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
342 return false;
343
344 /* Non-loop-carried phis have block rank. Loop-carried phis have
345 an additional bias added in. If this phi doesn't have block rank,
346 it's biased and should not be propagated. */
347 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
348
349 if (phi_rank (phi_stmt) != block_rank)
350 return true;
351
352 return false;
353}
354
355/* Return the maximum of RANK and the rank that should be propagated
356 from expression OP. For most operands, this is just the rank of OP.
357 For loop-carried phis, the value is zero to avoid undoing the bias
358 in favor of the phi. */
359static long
360propagate_rank (long rank, tree op)
361{
362 long op_rank;
363
364 if (loop_carried_phi (op))
365 return rank;
366
367 op_rank = get_rank (op);
368
369 return MAX (rank, op_rank);
370}
3dec5460 371
54aceb26 372/* Look up the operand rank structure for expression E. */
3dec5460 373
b30a8715 374static inline long
54aceb26 375find_operand_rank (tree e)
3dec5460 376{
06ecf488 377 long *slot = operand_rank->get (e);
378 return slot ? *slot : -1;
3dec5460 379}
380
54aceb26 381/* Insert {E,RANK} into the operand rank hashtable. */
3dec5460 382
b30a8715 383static inline void
384insert_operand_rank (tree e, long rank)
3dec5460 385{
b30a8715 386 gcc_assert (rank > 0);
06ecf488 387 gcc_assert (!operand_rank->put (e, rank));
3dec5460 388}
389
3dec5460 390/* Given an expression E, return the rank of the expression. */
391
b30a8715 392static long
3dec5460 393get_rank (tree e)
394{
3dec5460 395 /* SSA_NAME's have the rank of the expression they are the result
396 of.
397 For globals and uninitialized values, the rank is 0.
398 For function arguments, use the pre-setup rank.
399 For PHI nodes, stores, asm statements, etc, we use the rank of
400 the BB.
401 For simple operations, the rank is the maximum rank of any of
402 its operands, or the bb_rank, whichever is less.
403 I make no claims that this is optimal, however, it gives good
404 results. */
405
b248bf30 406 /* We make an exception to the normal ranking system to break
407 dependences of accumulator variables in loops. Suppose we
408 have a simple one-block loop containing:
409
410 x_1 = phi(x_0, x_2)
411 b = a + x_1
412 c = b + d
413 x_2 = c + e
414
415 As shown, each iteration of the calculation into x is fully
416 dependent upon the iteration before it. We would prefer to
417 see this in the form:
418
419 x_1 = phi(x_0, x_2)
420 b = a + d
421 c = b + e
422 x_2 = c + x_1
423
424 If the loop is unrolled, the calculations of b and c from
425 different iterations can be interleaved.
426
427 To obtain this result during reassociation, we bias the rank
428 of the phi definition x_1 upward, when it is recognized as an
429 accumulator pattern. The artificial rank causes it to be
430 added last, providing the desired independence. */
431
3dec5460 432 if (TREE_CODE (e) == SSA_NAME)
433 {
e86e65ef 434 ssa_op_iter iter;
75a70cf9 435 gimple stmt;
aa52f48f 436 long rank;
b248bf30 437 tree op;
54aceb26 438
61e371b0 439 if (SSA_NAME_IS_DEFAULT_DEF (e))
b30a8715 440 return find_operand_rank (e);
54aceb26 441
3dec5460 442 stmt = SSA_NAME_DEF_STMT (e);
b248bf30 443 if (gimple_code (stmt) == GIMPLE_PHI)
444 return phi_rank (stmt);
445
e86e65ef 446 if (!is_gimple_assign (stmt))
75a70cf9 447 return bb_rank[gimple_bb (stmt)->index];
3dec5460 448
449 /* If we already have a rank for this expression, use that. */
b30a8715 450 rank = find_operand_rank (e);
451 if (rank != -1)
452 return rank;
3dec5460 453
b248bf30 454 /* Otherwise, find the maximum rank for the operands. As an
455 exception, remove the bias from loop-carried phis when propagating
456 the rank so that dependent operations are not also biased. */
e86e65ef 457 /* Simply walk over all SSA uses - this takes advatage of the
458 fact that non-SSA operands are is_gimple_min_invariant and
459 thus have rank 0. */
3dec5460 460 rank = 0;
e86e65ef 461 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
462 rank = propagate_rank (rank, op);
54aceb26 463
3dec5460 464 if (dump_file && (dump_flags & TDF_DETAILS))
465 {
466 fprintf (dump_file, "Rank for ");
467 print_generic_expr (dump_file, e, 0);
b30a8715 468 fprintf (dump_file, " is %ld\n", (rank + 1));
3dec5460 469 }
54aceb26 470
3dec5460 471 /* Note the rank in the hashtable so we don't recompute it. */
54aceb26 472 insert_operand_rank (e, (rank + 1));
3dec5460 473 return (rank + 1);
474 }
475
e86e65ef 476 /* Constants, globals, etc., are rank 0 */
3dec5460 477 return 0;
478}
479
54aceb26 480
481/* We want integer ones to end up last no matter what, since they are
482 the ones we can do the most with. */
483#define INTEGER_CONST_TYPE 1 << 3
484#define FLOAT_CONST_TYPE 1 << 2
485#define OTHER_CONST_TYPE 1 << 1
486
487/* Classify an invariant tree into integer, float, or other, so that
488 we can sort them to be near other constants of the same type. */
489static inline int
490constant_type (tree t)
491{
492 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
493 return INTEGER_CONST_TYPE;
494 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
495 return FLOAT_CONST_TYPE;
496 else
497 return OTHER_CONST_TYPE;
498}
499
500/* qsort comparison function to sort operand entries PA and PB by rank
501 so that the sorted array is ordered by rank in decreasing order. */
502static int
503sort_by_operand_rank (const void *pa, const void *pb)
504{
505 const operand_entry_t oea = *(const operand_entry_t *)pa;
506 const operand_entry_t oeb = *(const operand_entry_t *)pb;
507
508 /* It's nicer for optimize_expression if constants that are likely
509 to fold when added/multiplied//whatever are put next to each
510 other. Since all constants have rank 0, order them by type. */
61e371b0 511 if (oeb->rank == 0 && oea->rank == 0)
17b5ea6f 512 {
513 if (constant_type (oeb->op) != constant_type (oea->op))
514 return constant_type (oeb->op) - constant_type (oea->op);
515 else
516 /* To make sorting result stable, we use unique IDs to determine
517 order. */
518 return oeb->id - oea->id;
519 }
54aceb26 520
521 /* Lastly, make sure the versions that are the same go next to each
c89eca7b 522 other. */
54aceb26 523 if ((oeb->rank - oea->rank == 0)
524 && TREE_CODE (oea->op) == SSA_NAME
525 && TREE_CODE (oeb->op) == SSA_NAME)
17b5ea6f 526 {
c89eca7b 527 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
528 versions of removed SSA_NAMEs, so if possible, prefer to sort
529 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
530 See PR60418. */
531 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
532 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
533 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
534 {
535 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
536 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
537 basic_block bba = gimple_bb (stmta);
538 basic_block bbb = gimple_bb (stmtb);
539 if (bbb != bba)
540 {
541 if (bb_rank[bbb->index] != bb_rank[bba->index])
542 return bb_rank[bbb->index] - bb_rank[bba->index];
543 }
544 else
545 {
546 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
547 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
548 if (da != db)
549 return da ? 1 : -1;
550 }
551 }
552
17b5ea6f 553 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
554 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
555 else
556 return oeb->id - oea->id;
557 }
54aceb26 558
17b5ea6f 559 if (oeb->rank != oea->rank)
560 return oeb->rank - oea->rank;
561 else
562 return oeb->id - oea->id;
54aceb26 563}
564
565/* Add an operand entry to *OPS for the tree operand OP. */
566
567static void
f1f41a6c 568add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
54aceb26 569{
672758dc 570 operand_entry_t oe = operand_entry_pool.allocate ();
54aceb26 571
572 oe->op = op;
573 oe->rank = get_rank (op);
17b5ea6f 574 oe->id = next_operand_entry_id++;
8c5ac7f6 575 oe->count = 1;
f1f41a6c 576 ops->safe_push (oe);
54aceb26 577}
3dec5460 578
8c5ac7f6 579/* Add an operand entry to *OPS for the tree operand OP with repeat
580 count REPEAT. */
581
582static void
f1f41a6c 583add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
8c5ac7f6 584 HOST_WIDE_INT repeat)
585{
672758dc 586 operand_entry_t oe = operand_entry_pool.allocate ();
8c5ac7f6 587
588 oe->op = op;
589 oe->rank = get_rank (op);
590 oe->id = next_operand_entry_id++;
591 oe->count = repeat;
f1f41a6c 592 ops->safe_push (oe);
8c5ac7f6 593
594 reassociate_stats.pows_encountered++;
595}
596
54aceb26 597/* Return true if STMT is reassociable operation containing a binary
a4c3fb95 598 operation with tree code CODE, and is inside LOOP. */
3dec5460 599
600static bool
75a70cf9 601is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
54aceb26 602{
75a70cf9 603 basic_block bb = gimple_bb (stmt);
a4c3fb95 604
75a70cf9 605 if (gimple_bb (stmt) == NULL)
a4c3fb95 606 return false;
607
a4c3fb95 608 if (!flow_bb_inside_loop_p (loop, bb))
609 return false;
610
75a70cf9 611 if (is_gimple_assign (stmt)
612 && gimple_assign_rhs_code (stmt) == code
613 && has_single_use (gimple_assign_lhs (stmt)))
3dec5460 614 return true;
75a70cf9 615
54aceb26 616 return false;
617}
618
619
620/* Given NAME, if NAME is defined by a unary operation OPCODE, return the
621 operand of the negate operation. Otherwise, return NULL. */
622
623static tree
624get_unary_op (tree name, enum tree_code opcode)
625{
75a70cf9 626 gimple stmt = SSA_NAME_DEF_STMT (name);
54aceb26 627
75a70cf9 628 if (!is_gimple_assign (stmt))
54aceb26 629 return NULL_TREE;
630
75a70cf9 631 if (gimple_assign_rhs_code (stmt) == opcode)
632 return gimple_assign_rhs1 (stmt);
54aceb26 633 return NULL_TREE;
634}
635
636/* If CURR and LAST are a pair of ops that OPCODE allows us to
637 eliminate through equivalences, do so, remove them from OPS, and
638 return true. Otherwise, return false. */
639
640static bool
641eliminate_duplicate_pair (enum tree_code opcode,
f1f41a6c 642 vec<operand_entry_t> *ops,
54aceb26 643 bool *all_done,
644 unsigned int i,
645 operand_entry_t curr,
646 operand_entry_t last)
647{
648
91543a50 649 /* If we have two of the same op, and the opcode is & |, min, or max,
650 we can eliminate one of them.
54aceb26 651 If we have two of the same op, and the opcode is ^, we can
652 eliminate both of them. */
3dec5460 653
54aceb26 654 if (last && last->op == curr->op)
3dec5460 655 {
54aceb26 656 switch (opcode)
657 {
91543a50 658 case MAX_EXPR:
659 case MIN_EXPR:
54aceb26 660 case BIT_IOR_EXPR:
661 case BIT_AND_EXPR:
662 if (dump_file && (dump_flags & TDF_DETAILS))
663 {
664 fprintf (dump_file, "Equivalence: ");
665 print_generic_expr (dump_file, curr->op, 0);
91543a50 666 fprintf (dump_file, " [&|minmax] ");
54aceb26 667 print_generic_expr (dump_file, last->op, 0);
668 fprintf (dump_file, " -> ");
669 print_generic_stmt (dump_file, last->op, 0);
670 }
671
f1f41a6c 672 ops->ordered_remove (i);
54aceb26 673 reassociate_stats.ops_eliminated ++;
674
675 return true;
676
677 case BIT_XOR_EXPR:
678 if (dump_file && (dump_flags & TDF_DETAILS))
679 {
680 fprintf (dump_file, "Equivalence: ");
681 print_generic_expr (dump_file, curr->op, 0);
682 fprintf (dump_file, " ^ ");
683 print_generic_expr (dump_file, last->op, 0);
684 fprintf (dump_file, " -> nothing\n");
685 }
686
687 reassociate_stats.ops_eliminated += 2;
688
f1f41a6c 689 if (ops->length () == 2)
54aceb26 690 {
f1f41a6c 691 ops->create (0);
385f3f36 692 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
54aceb26 693 *all_done = true;
694 }
695 else
696 {
f1f41a6c 697 ops->ordered_remove (i-1);
698 ops->ordered_remove (i-1);
54aceb26 699 }
700
701 return true;
702
703 default:
704 break;
705 }
706 }
3dec5460 707 return false;
708}
709
f1f41a6c 710static vec<tree> plus_negates;
c2f29260 711
50fa62c6 712/* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
713 expression, look in OPS for a corresponding positive operation to cancel
714 it out. If we find one, remove the other from OPS, replace
715 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
716 return false. */
3dec5460 717
718static bool
54aceb26 719eliminate_plus_minus_pair (enum tree_code opcode,
f1f41a6c 720 vec<operand_entry_t> *ops,
54aceb26 721 unsigned int currindex,
722 operand_entry_t curr)
723{
724 tree negateop;
50fa62c6 725 tree notop;
54aceb26 726 unsigned int i;
727 operand_entry_t oe;
728
729 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
3dec5460 730 return false;
54aceb26 731
732 negateop = get_unary_op (curr->op, NEGATE_EXPR);
50fa62c6 733 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
734 if (negateop == NULL_TREE && notop == NULL_TREE)
54aceb26 735 return false;
736
737 /* Any non-negated version will have a rank that is one less than
738 the current rank. So once we hit those ranks, if we don't find
739 one, we can stop. */
740
741 for (i = currindex + 1;
f1f41a6c 742 ops->iterate (i, &oe)
54aceb26 743 && oe->rank >= curr->rank - 1 ;
744 i++)
3dec5460 745 {
54aceb26 746 if (oe->op == negateop)
747 {
748
749 if (dump_file && (dump_flags & TDF_DETAILS))
750 {
751 fprintf (dump_file, "Equivalence: ");
752 print_generic_expr (dump_file, negateop, 0);
753 fprintf (dump_file, " + -");
754 print_generic_expr (dump_file, oe->op, 0);
755 fprintf (dump_file, " -> 0\n");
756 }
757
f1f41a6c 758 ops->ordered_remove (i);
385f3f36 759 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
f1f41a6c 760 ops->ordered_remove (currindex);
54aceb26 761 reassociate_stats.ops_eliminated ++;
762
50fa62c6 763 return true;
764 }
765 else if (oe->op == notop)
766 {
767 tree op_type = TREE_TYPE (oe->op);
768
769 if (dump_file && (dump_flags & TDF_DETAILS))
770 {
771 fprintf (dump_file, "Equivalence: ");
772 print_generic_expr (dump_file, notop, 0);
773 fprintf (dump_file, " + ~");
774 print_generic_expr (dump_file, oe->op, 0);
775 fprintf (dump_file, " -> -1\n");
776 }
777
f1f41a6c 778 ops->ordered_remove (i);
50fa62c6 779 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
f1f41a6c 780 ops->ordered_remove (currindex);
50fa62c6 781 reassociate_stats.ops_eliminated ++;
782
54aceb26 783 return true;
784 }
3dec5460 785 }
786
c2f29260 787 /* CURR->OP is a negate expr in a plus expr: save it for later
788 inspection in repropagate_negates(). */
50fa62c6 789 if (negateop != NULL_TREE)
f1f41a6c 790 plus_negates.safe_push (curr->op);
c2f29260 791
54aceb26 792 return false;
793}
794
795/* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
796 bitwise not expression, look in OPS for a corresponding operand to
797 cancel it out. If we find one, remove the other from OPS, replace
798 OPS[CURRINDEX] with 0, and return true. Otherwise, return
799 false. */
800
801static bool
802eliminate_not_pairs (enum tree_code opcode,
f1f41a6c 803 vec<operand_entry_t> *ops,
54aceb26 804 unsigned int currindex,
805 operand_entry_t curr)
806{
807 tree notop;
808 unsigned int i;
809 operand_entry_t oe;
810
811 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
812 || TREE_CODE (curr->op) != SSA_NAME)
813 return false;
814
815 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
816 if (notop == NULL_TREE)
817 return false;
818
819 /* Any non-not version will have a rank that is one less than
820 the current rank. So once we hit those ranks, if we don't find
821 one, we can stop. */
822
823 for (i = currindex + 1;
f1f41a6c 824 ops->iterate (i, &oe)
54aceb26 825 && oe->rank >= curr->rank - 1;
826 i++)
3dec5460 827 {
54aceb26 828 if (oe->op == notop)
3dec5460 829 {
54aceb26 830 if (dump_file && (dump_flags & TDF_DETAILS))
3dec5460 831 {
54aceb26 832 fprintf (dump_file, "Equivalence: ");
833 print_generic_expr (dump_file, notop, 0);
834 if (opcode == BIT_AND_EXPR)
835 fprintf (dump_file, " & ~");
836 else if (opcode == BIT_IOR_EXPR)
837 fprintf (dump_file, " | ~");
838 print_generic_expr (dump_file, oe->op, 0);
839 if (opcode == BIT_AND_EXPR)
840 fprintf (dump_file, " -> 0\n");
841 else if (opcode == BIT_IOR_EXPR)
842 fprintf (dump_file, " -> -1\n");
3dec5460 843 }
54aceb26 844
845 if (opcode == BIT_AND_EXPR)
385f3f36 846 oe->op = build_zero_cst (TREE_TYPE (oe->op));
54aceb26 847 else if (opcode == BIT_IOR_EXPR)
c2c07119 848 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
54aceb26 849
f1f41a6c 850 reassociate_stats.ops_eliminated += ops->length () - 1;
851 ops->truncate (0);
852 ops->quick_push (oe);
54aceb26 853 return true;
3dec5460 854 }
855 }
54aceb26 856
857 return false;
3dec5460 858}
859
54aceb26 860/* Use constant value that may be present in OPS to try to eliminate
861 operands. Note that this function is only really used when we've
862 eliminated ops for other reasons, or merged constants. Across
863 single statements, fold already does all of this, plus more. There
864 is little point in duplicating logic, so I've only included the
865 identities that I could ever construct testcases to trigger. */
3dec5460 866
54aceb26 867static void
868eliminate_using_constants (enum tree_code opcode,
f1f41a6c 869 vec<operand_entry_t> *ops)
3dec5460 870{
f1f41a6c 871 operand_entry_t oelast = ops->last ();
46ef5347 872 tree type = TREE_TYPE (oelast->op);
3dec5460 873
46ef5347 874 if (oelast->rank == 0
875 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
3dec5460 876 {
54aceb26 877 switch (opcode)
3dec5460 878 {
54aceb26 879 case BIT_AND_EXPR:
880 if (integer_zerop (oelast->op))
881 {
f1f41a6c 882 if (ops->length () != 1)
54aceb26 883 {
884 if (dump_file && (dump_flags & TDF_DETAILS))
885 fprintf (dump_file, "Found & 0, removing all other ops\n");
886
f1f41a6c 887 reassociate_stats.ops_eliminated += ops->length () - 1;
48e1416a 888
f1f41a6c 889 ops->truncate (0);
890 ops->quick_push (oelast);
54aceb26 891 return;
892 }
893 }
894 else if (integer_all_onesp (oelast->op))
895 {
f1f41a6c 896 if (ops->length () != 1)
54aceb26 897 {
898 if (dump_file && (dump_flags & TDF_DETAILS))
899 fprintf (dump_file, "Found & -1, removing\n");
f1f41a6c 900 ops->pop ();
54aceb26 901 reassociate_stats.ops_eliminated++;
902 }
903 }
904 break;
905 case BIT_IOR_EXPR:
906 if (integer_all_onesp (oelast->op))
907 {
f1f41a6c 908 if (ops->length () != 1)
54aceb26 909 {
910 if (dump_file && (dump_flags & TDF_DETAILS))
911 fprintf (dump_file, "Found | -1, removing all other ops\n");
912
f1f41a6c 913 reassociate_stats.ops_eliminated += ops->length () - 1;
48e1416a 914
f1f41a6c 915 ops->truncate (0);
916 ops->quick_push (oelast);
54aceb26 917 return;
918 }
48e1416a 919 }
54aceb26 920 else if (integer_zerop (oelast->op))
921 {
f1f41a6c 922 if (ops->length () != 1)
54aceb26 923 {
924 if (dump_file && (dump_flags & TDF_DETAILS))
925 fprintf (dump_file, "Found | 0, removing\n");
f1f41a6c 926 ops->pop ();
54aceb26 927 reassociate_stats.ops_eliminated++;
928 }
929 }
930 break;
931 case MULT_EXPR:
46ef5347 932 if (integer_zerop (oelast->op)
933 || (FLOAT_TYPE_P (type)
93633022 934 && !HONOR_NANS (type)
fe994837 935 && !HONOR_SIGNED_ZEROS (type)
46ef5347 936 && real_zerop (oelast->op)))
54aceb26 937 {
f1f41a6c 938 if (ops->length () != 1)
54aceb26 939 {
940 if (dump_file && (dump_flags & TDF_DETAILS))
941 fprintf (dump_file, "Found * 0, removing all other ops\n");
48e1416a 942
f1f41a6c 943 reassociate_stats.ops_eliminated += ops->length () - 1;
944 ops->truncate (1);
945 ops->quick_push (oelast);
54aceb26 946 return;
947 }
948 }
46ef5347 949 else if (integer_onep (oelast->op)
950 || (FLOAT_TYPE_P (type)
fe994837 951 && !HONOR_SNANS (type)
46ef5347 952 && real_onep (oelast->op)))
54aceb26 953 {
f1f41a6c 954 if (ops->length () != 1)
54aceb26 955 {
956 if (dump_file && (dump_flags & TDF_DETAILS))
957 fprintf (dump_file, "Found * 1, removing\n");
f1f41a6c 958 ops->pop ();
54aceb26 959 reassociate_stats.ops_eliminated++;
960 return;
961 }
962 }
963 break;
964 case BIT_XOR_EXPR:
965 case PLUS_EXPR:
966 case MINUS_EXPR:
46ef5347 967 if (integer_zerop (oelast->op)
968 || (FLOAT_TYPE_P (type)
969 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
970 && fold_real_zero_addition_p (type, oelast->op,
971 opcode == MINUS_EXPR)))
3dec5460 972 {
f1f41a6c 973 if (ops->length () != 1)
3dec5460 974 {
54aceb26 975 if (dump_file && (dump_flags & TDF_DETAILS))
976 fprintf (dump_file, "Found [|^+] 0, removing\n");
f1f41a6c 977 ops->pop ();
54aceb26 978 reassociate_stats.ops_eliminated++;
979 return;
3dec5460 980 }
3dec5460 981 }
54aceb26 982 break;
983 default:
984 break;
3dec5460 985 }
986 }
3dec5460 987}
988
dddf5036 989
f1f41a6c 990static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
dddf5036 991 bool, bool);
992
993/* Structure for tracking and counting operands. */
994typedef struct oecount_s {
995 int cnt;
17b5ea6f 996 int id;
dddf5036 997 enum tree_code oecode;
998 tree op;
999} oecount;
1000
dddf5036 1001
1002/* The heap for the oecount hashtable and the sorted list of operands. */
f1f41a6c 1003static vec<oecount> cvec;
dddf5036 1004
3e871d4d 1005
1006/* Oecount hashtable helpers. */
1007
613732c1 1008struct oecount_hasher : int_hash <int, 0, 1>
3e871d4d 1009{
613732c1 1010 static inline hashval_t hash (int);
1011 static inline bool equal (int, int);
3e871d4d 1012};
1013
dddf5036 1014/* Hash function for oecount. */
1015
3e871d4d 1016inline hashval_t
613732c1 1017oecount_hasher::hash (int p)
dddf5036 1018{
2933f7af 1019 const oecount *c = &cvec[p - 42];
dddf5036 1020 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1021}
1022
1023/* Comparison function for oecount. */
1024
3e871d4d 1025inline bool
613732c1 1026oecount_hasher::equal (int p1, int p2)
dddf5036 1027{
2933f7af 1028 const oecount *c1 = &cvec[p1 - 42];
1029 const oecount *c2 = &cvec[p2 - 42];
dddf5036 1030 return (c1->oecode == c2->oecode
1031 && c1->op == c2->op);
1032}
1033
1034/* Comparison function for qsort sorting oecount elements by count. */
1035
1036static int
1037oecount_cmp (const void *p1, const void *p2)
1038{
1039 const oecount *c1 = (const oecount *)p1;
1040 const oecount *c2 = (const oecount *)p2;
17b5ea6f 1041 if (c1->cnt != c2->cnt)
1042 return c1->cnt - c2->cnt;
1043 else
1044 /* If counts are identical, use unique IDs to stabilize qsort. */
1045 return c1->id - c2->id;
dddf5036 1046}
1047
56e650d6 1048/* Return TRUE iff STMT represents a builtin call that raises OP
1049 to some exponent. */
1050
1051static bool
1052stmt_is_power_of_op (gimple stmt, tree op)
1053{
1054 tree fndecl;
1055
1056 if (!is_gimple_call (stmt))
1057 return false;
1058
1059 fndecl = gimple_call_fndecl (stmt);
1060
1061 if (!fndecl
1062 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1063 return false;
1064
1065 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1066 {
1067 CASE_FLT_FN (BUILT_IN_POW):
1068 CASE_FLT_FN (BUILT_IN_POWI):
1069 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1070
1071 default:
1072 return false;
1073 }
1074}
1075
1076/* Given STMT which is a __builtin_pow* call, decrement its exponent
1077 in place and return the result. Assumes that stmt_is_power_of_op
1078 was previously called for STMT and returned TRUE. */
1079
1080static HOST_WIDE_INT
1081decrement_power (gimple stmt)
1082{
1083 REAL_VALUE_TYPE c, cint;
1084 HOST_WIDE_INT power;
1085 tree arg1;
1086
1087 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1088 {
1089 CASE_FLT_FN (BUILT_IN_POW):
1090 arg1 = gimple_call_arg (stmt, 1);
1091 c = TREE_REAL_CST (arg1);
1092 power = real_to_integer (&c) - 1;
e913b5cd 1093 real_from_integer (&cint, VOIDmode, power, SIGNED);
56e650d6 1094 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1095 return power;
1096
1097 CASE_FLT_FN (BUILT_IN_POWI):
1098 arg1 = gimple_call_arg (stmt, 1);
f9ae6f95 1099 power = TREE_INT_CST_LOW (arg1) - 1;
56e650d6 1100 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1101 return power;
1102
1103 default:
1104 gcc_unreachable ();
1105 }
1106}
1107
1108/* Find the single immediate use of STMT's LHS, and replace it
1109 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1110 replace *DEF with OP as well. */
1111
1112static void
1113propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1114{
1115 tree lhs;
1116 gimple use_stmt;
1117 use_operand_p use;
1118 gimple_stmt_iterator gsi;
1119
1120 if (is_gimple_call (stmt))
1121 lhs = gimple_call_lhs (stmt);
1122 else
1123 lhs = gimple_assign_lhs (stmt);
1124
1125 gcc_assert (has_single_use (lhs));
1126 single_imm_use (lhs, &use, &use_stmt);
1127 if (lhs == *def)
1128 *def = op;
1129 SET_USE (use, op);
1130 if (TREE_CODE (op) != SSA_NAME)
1131 update_stmt (use_stmt);
1132 gsi = gsi_for_stmt (stmt);
e6b37e57 1133 unlink_stmt_vdef (stmt);
54675e05 1134 reassoc_remove_stmt (&gsi);
56e650d6 1135 release_defs (stmt);
56e650d6 1136}
1137
dddf5036 1138/* Walks the linear chain with result *DEF searching for an operation
1139 with operand OP and code OPCODE removing that from the chain. *DEF
1140 is updated if there is only one operand but no operation left. */
1141
1142static void
1143zero_one_operation (tree *def, enum tree_code opcode, tree op)
1144{
1145 gimple stmt = SSA_NAME_DEF_STMT (*def);
1146
1147 do
1148 {
56e650d6 1149 tree name;
1150
1151 if (opcode == MULT_EXPR
1152 && stmt_is_power_of_op (stmt, op))
1153 {
1154 if (decrement_power (stmt) == 1)
1155 propagate_op_to_single_use (op, stmt, def);
1156 return;
1157 }
1158
1159 name = gimple_assign_rhs1 (stmt);
dddf5036 1160
1161 /* If this is the operation we look for and one of the operands
1162 is ours simply propagate the other operand into the stmts
1163 single use. */
1164 if (gimple_assign_rhs_code (stmt) == opcode
1165 && (name == op
1166 || gimple_assign_rhs2 (stmt) == op))
1167 {
dddf5036 1168 if (name == op)
1169 name = gimple_assign_rhs2 (stmt);
56e650d6 1170 propagate_op_to_single_use (name, stmt, def);
dddf5036 1171 return;
1172 }
1173
56e650d6 1174 /* We might have a multiply of two __builtin_pow* calls, and
1175 the operand might be hiding in the rightmost one. */
1176 if (opcode == MULT_EXPR
1177 && gimple_assign_rhs_code (stmt) == opcode
bdb5a35a 1178 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1179 && has_single_use (gimple_assign_rhs2 (stmt)))
56e650d6 1180 {
1181 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1182 if (stmt_is_power_of_op (stmt2, op))
1183 {
1184 if (decrement_power (stmt2) == 1)
1185 propagate_op_to_single_use (op, stmt2, def);
1186 return;
1187 }
1188 }
1189
dddf5036 1190 /* Continue walking the chain. */
1191 gcc_assert (name != op
1192 && TREE_CODE (name) == SSA_NAME);
1193 stmt = SSA_NAME_DEF_STMT (name);
1194 }
1195 while (1);
1196}
1197
82cb4e1c 1198/* Returns true if statement S1 dominates statement S2. Like
1199 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
c9dbb922 1200
82cb4e1c 1201static bool
1202reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
c9dbb922 1203{
82cb4e1c 1204 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1205
1206 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1207 SSA_NAME. Assume it lives at the beginning of function and
1208 thus dominates everything. */
1209 if (!bb1 || s1 == s2)
1210 return true;
1211
1212 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1213 if (!bb2)
1214 return false;
1215
1216 if (bb1 == bb2)
1217 {
1218 /* PHIs in the same basic block are assumed to be
1219 executed all in parallel, if only one stmt is a PHI,
1220 it dominates the other stmt in the same basic block. */
1221 if (gimple_code (s1) == GIMPLE_PHI)
1222 return true;
1223
1224 if (gimple_code (s2) == GIMPLE_PHI)
1225 return false;
1226
1227 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1228
1229 if (gimple_uid (s1) < gimple_uid (s2))
1230 return true;
1231
1232 if (gimple_uid (s1) > gimple_uid (s2))
1233 return false;
1234
1235 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1236 unsigned int uid = gimple_uid (s1);
1237 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1238 {
1239 gimple s = gsi_stmt (gsi);
1240 if (gimple_uid (s) != uid)
1241 break;
1242 if (s == s2)
1243 return true;
1244 }
1245
1246 return false;
1247 }
1248
1249 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1250}
1251
1252/* Insert STMT after INSERT_POINT. */
1253
1254static void
1255insert_stmt_after (gimple stmt, gimple insert_point)
1256{
1257 gimple_stmt_iterator gsi;
1258 basic_block bb;
1259
1260 if (gimple_code (insert_point) == GIMPLE_PHI)
1261 bb = gimple_bb (insert_point);
1262 else if (!stmt_ends_bb_p (insert_point))
1263 {
1264 gsi = gsi_for_stmt (insert_point);
1265 gimple_set_uid (stmt, gimple_uid (insert_point));
1266 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1267 return;
1268 }
1269 else
1270 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1271 thus if it must end a basic block, it should be a call that can
1272 throw, or some assignment that can throw. If it throws, the LHS
1273 of it will not be initialized though, so only valid places using
1274 the SSA_NAME should be dominated by the fallthru edge. */
1275 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1276 gsi = gsi_after_labels (bb);
1277 if (gsi_end_p (gsi))
1278 {
1279 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1280 gimple_set_uid (stmt,
1281 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1282 }
1283 else
1284 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1285 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
c9dbb922 1286}
1287
dddf5036 1288/* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1289 the result. Places the statement after the definition of either
1290 OP1 or OP2. Returns the new statement. */
1291
1292static gimple
03d37e4e 1293build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
dddf5036 1294{
1295 gimple op1def = NULL, op2def = NULL;
1296 gimple_stmt_iterator gsi;
1297 tree op;
1a91d914 1298 gassign *sum;
dddf5036 1299
1300 /* Create the addition statement. */
f9e245b2 1301 op = make_ssa_name (type);
e9cf809e 1302 sum = gimple_build_assign (op, opcode, op1, op2);
dddf5036 1303
1304 /* Find an insertion place and insert. */
1305 if (TREE_CODE (op1) == SSA_NAME)
1306 op1def = SSA_NAME_DEF_STMT (op1);
1307 if (TREE_CODE (op2) == SSA_NAME)
1308 op2def = SSA_NAME_DEF_STMT (op2);
1309 if ((!op1def || gimple_nop_p (op1def))
1310 && (!op2def || gimple_nop_p (op2def)))
1311 {
34154e27 1312 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
82cb4e1c 1313 if (gsi_end_p (gsi))
dddf5036 1314 {
82cb4e1c 1315 gimple_stmt_iterator gsi2
34154e27 1316 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
82cb4e1c 1317 gimple_set_uid (sum,
1318 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
dddf5036 1319 }
1320 else
82cb4e1c 1321 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1322 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
dddf5036 1323 }
1324 else
1325 {
82cb4e1c 1326 gimple insert_point;
1327 if ((!op1def || gimple_nop_p (op1def))
1328 || (op2def && !gimple_nop_p (op2def)
1329 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1330 insert_point = op2def;
dddf5036 1331 else
82cb4e1c 1332 insert_point = op1def;
1333 insert_stmt_after (sum, insert_point);
dddf5036 1334 }
1335 update_stmt (sum);
1336
1337 return sum;
1338}
1339
1340/* Perform un-distribution of divisions and multiplications.
1341 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1342 to (A + B) / X for real X.
1343
1344 The algorithm is organized as follows.
1345
1346 - First we walk the addition chain *OPS looking for summands that
1347 are defined by a multiplication or a real division. This results
1348 in the candidates bitmap with relevant indices into *OPS.
1349
1350 - Second we build the chains of multiplications or divisions for
9d75589a 1351 these candidates, counting the number of occurrences of (operand, code)
dddf5036 1352 pairs in all of the candidates chains.
1353
9d75589a 1354 - Third we sort the (operand, code) pairs by number of occurrence and
dddf5036 1355 process them starting with the pair with the most uses.
1356
1357 * For each such pair we walk the candidates again to build a
1358 second candidate bitmap noting all multiplication/division chains
9d75589a 1359 that have at least one occurrence of (operand, code).
dddf5036 1360
1361 * We build an alternate addition chain only covering these
1362 candidates with one (operand, code) operation removed from their
1363 multiplication/division chain.
1364
1365 * The first candidate gets replaced by the alternate addition chain
1366 multiplied/divided by the operand.
1367
1368 * All candidate chains get disabled for further processing and
1369 processing of (operand, code) pairs continues.
1370
1371 The alternate addition chains built are re-processed by the main
1372 reassociation algorithm which allows optimizing a * x * y + b * y * x
1373 to (a + b ) * x * y in one invocation of the reassociation pass. */
1374
1375static bool
1376undistribute_ops_list (enum tree_code opcode,
f1f41a6c 1377 vec<operand_entry_t> *ops, struct loop *loop)
dddf5036 1378{
f1f41a6c 1379 unsigned int length = ops->length ();
dddf5036 1380 operand_entry_t oe1;
1381 unsigned i, j;
1382 sbitmap candidates, candidates2;
1383 unsigned nr_candidates, nr_candidates2;
1384 sbitmap_iterator sbi0;
f1f41a6c 1385 vec<operand_entry_t> *subops;
dddf5036 1386 bool changed = false;
17b5ea6f 1387 int next_oecount_id = 0;
dddf5036 1388
1389 if (length <= 1
1390 || opcode != PLUS_EXPR)
1391 return false;
1392
1393 /* Build a list of candidates to process. */
1394 candidates = sbitmap_alloc (length);
53c5d9d4 1395 bitmap_clear (candidates);
dddf5036 1396 nr_candidates = 0;
f1f41a6c 1397 FOR_EACH_VEC_ELT (*ops, i, oe1)
dddf5036 1398 {
1399 enum tree_code dcode;
1400 gimple oe1def;
1401
1402 if (TREE_CODE (oe1->op) != SSA_NAME)
1403 continue;
1404 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1405 if (!is_gimple_assign (oe1def))
1406 continue;
1407 dcode = gimple_assign_rhs_code (oe1def);
1408 if ((dcode != MULT_EXPR
1409 && dcode != RDIV_EXPR)
1410 || !is_reassociable_op (oe1def, dcode, loop))
1411 continue;
1412
08b7917c 1413 bitmap_set_bit (candidates, i);
dddf5036 1414 nr_candidates++;
1415 }
1416
1417 if (nr_candidates < 2)
1418 {
1419 sbitmap_free (candidates);
1420 return false;
1421 }
1422
1423 if (dump_file && (dump_flags & TDF_DETAILS))
1424 {
1425 fprintf (dump_file, "searching for un-distribute opportunities ");
1426 print_generic_expr (dump_file,
f1f41a6c 1427 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
dddf5036 1428 fprintf (dump_file, " %d\n", nr_candidates);
1429 }
1430
1431 /* Build linearized sub-operand lists and the counting table. */
f1f41a6c 1432 cvec.create (0);
c1f445d2 1433
1434 hash_table<oecount_hasher> ctable (15);
1435
f1f41a6c 1436 /* ??? Macro arguments cannot have multi-argument template types in
1437 them. This typedef is needed to workaround that limitation. */
1438 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1439 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
0d211963 1440 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
dddf5036 1441 {
1442 gimple oedef;
1443 enum tree_code oecode;
1444 unsigned j;
1445
f1f41a6c 1446 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
dddf5036 1447 oecode = gimple_assign_rhs_code (oedef);
1448 linearize_expr_tree (&subops[i], oedef,
1449 associative_tree_code (oecode), false);
1450
f1f41a6c 1451 FOR_EACH_VEC_ELT (subops[i], j, oe1)
dddf5036 1452 {
1453 oecount c;
2933f7af 1454 int *slot;
1455 int idx;
dddf5036 1456 c.oecode = oecode;
1457 c.cnt = 1;
17b5ea6f 1458 c.id = next_oecount_id++;
dddf5036 1459 c.op = oe1->op;
f1f41a6c 1460 cvec.safe_push (c);
1461 idx = cvec.length () + 41;
2933f7af 1462 slot = ctable.find_slot (idx, INSERT);
dddf5036 1463 if (!*slot)
1464 {
2933f7af 1465 *slot = idx;
dddf5036 1466 }
1467 else
1468 {
f1f41a6c 1469 cvec.pop ();
2933f7af 1470 cvec[*slot - 42].cnt++;
dddf5036 1471 }
1472 }
1473 }
dddf5036 1474
1475 /* Sort the counting table. */
f1f41a6c 1476 cvec.qsort (oecount_cmp);
dddf5036 1477
1478 if (dump_file && (dump_flags & TDF_DETAILS))
1479 {
1480 oecount *c;
1481 fprintf (dump_file, "Candidates:\n");
f1f41a6c 1482 FOR_EACH_VEC_ELT (cvec, j, c)
dddf5036 1483 {
1484 fprintf (dump_file, " %u %s: ", c->cnt,
1485 c->oecode == MULT_EXPR
1486 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1487 print_generic_expr (dump_file, c->op, 0);
1488 fprintf (dump_file, "\n");
1489 }
1490 }
1491
c31fb425 1492 /* Process the (operand, code) pairs in order of most occurrence. */
dddf5036 1493 candidates2 = sbitmap_alloc (length);
f1f41a6c 1494 while (!cvec.is_empty ())
dddf5036 1495 {
f1f41a6c 1496 oecount *c = &cvec.last ();
dddf5036 1497 if (c->cnt < 2)
1498 break;
1499
1500 /* Now collect the operands in the outer chain that contain
1501 the common operand in their inner chain. */
53c5d9d4 1502 bitmap_clear (candidates2);
dddf5036 1503 nr_candidates2 = 0;
0d211963 1504 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
dddf5036 1505 {
1506 gimple oedef;
1507 enum tree_code oecode;
1508 unsigned j;
f1f41a6c 1509 tree op = (*ops)[i]->op;
dddf5036 1510
1511 /* If we undistributed in this chain already this may be
1512 a constant. */
1513 if (TREE_CODE (op) != SSA_NAME)
1514 continue;
1515
1516 oedef = SSA_NAME_DEF_STMT (op);
1517 oecode = gimple_assign_rhs_code (oedef);
1518 if (oecode != c->oecode)
1519 continue;
1520
f1f41a6c 1521 FOR_EACH_VEC_ELT (subops[i], j, oe1)
dddf5036 1522 {
56e650d6 1523 if (oe1->op == c->op)
dddf5036 1524 {
08b7917c 1525 bitmap_set_bit (candidates2, i);
dddf5036 1526 ++nr_candidates2;
1527 break;
1528 }
1529 }
1530 }
1531
1532 if (nr_candidates2 >= 2)
1533 {
1534 operand_entry_t oe1, oe2;
dddf5036 1535 gimple prod;
53c5d9d4 1536 int first = bitmap_first_set_bit (candidates2);
dddf5036 1537
1538 /* Build the new addition chain. */
f1f41a6c 1539 oe1 = (*ops)[first];
dddf5036 1540 if (dump_file && (dump_flags & TDF_DETAILS))
1541 {
1542 fprintf (dump_file, "Building (");
1543 print_generic_expr (dump_file, oe1->op, 0);
1544 }
dddf5036 1545 zero_one_operation (&oe1->op, c->oecode, c->op);
0d211963 1546 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
dddf5036 1547 {
1548 gimple sum;
f1f41a6c 1549 oe2 = (*ops)[i];
dddf5036 1550 if (dump_file && (dump_flags & TDF_DETAILS))
1551 {
1552 fprintf (dump_file, " + ");
1553 print_generic_expr (dump_file, oe2->op, 0);
1554 }
1555 zero_one_operation (&oe2->op, c->oecode, c->op);
03d37e4e 1556 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1557 oe1->op, oe2->op, opcode);
385f3f36 1558 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
dddf5036 1559 oe2->rank = 0;
1560 oe1->op = gimple_get_lhs (sum);
1561 }
1562
1563 /* Apply the multiplication/division. */
03d37e4e 1564 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1565 oe1->op, c->op, c->oecode);
dddf5036 1566 if (dump_file && (dump_flags & TDF_DETAILS))
1567 {
1568 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1569 print_generic_expr (dump_file, c->op, 0);
1570 fprintf (dump_file, "\n");
1571 }
1572
1573 /* Record it in the addition chain and disable further
1574 undistribution with this op. */
1575 oe1->op = gimple_assign_lhs (prod);
1576 oe1->rank = get_rank (oe1->op);
f1f41a6c 1577 subops[first].release ();
dddf5036 1578
1579 changed = true;
1580 }
1581
f1f41a6c 1582 cvec.pop ();
dddf5036 1583 }
1584
f1f41a6c 1585 for (i = 0; i < ops->length (); ++i)
1586 subops[i].release ();
dddf5036 1587 free (subops);
f1f41a6c 1588 cvec.release ();
dddf5036 1589 sbitmap_free (candidates);
1590 sbitmap_free (candidates2);
1591
1592 return changed;
1593}
1594
5d6da7da 1595/* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1596 expression, examine the other OPS to see if any of them are comparisons
1597 of the same values, which we may be able to combine or eliminate.
1598 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1599
1600static bool
1601eliminate_redundant_comparison (enum tree_code opcode,
f1f41a6c 1602 vec<operand_entry_t> *ops,
5d6da7da 1603 unsigned int currindex,
1604 operand_entry_t curr)
1605{
1606 tree op1, op2;
1607 enum tree_code lcode, rcode;
1608 gimple def1, def2;
1609 int i;
1610 operand_entry_t oe;
1611
1612 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1613 return false;
1614
1615 /* Check that CURR is a comparison. */
1616 if (TREE_CODE (curr->op) != SSA_NAME)
1617 return false;
1618 def1 = SSA_NAME_DEF_STMT (curr->op);
1619 if (!is_gimple_assign (def1))
1620 return false;
1621 lcode = gimple_assign_rhs_code (def1);
1622 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1623 return false;
1624 op1 = gimple_assign_rhs1 (def1);
1625 op2 = gimple_assign_rhs2 (def1);
1626
1627 /* Now look for a similar comparison in the remaining OPS. */
f1f41a6c 1628 for (i = currindex + 1; ops->iterate (i, &oe); i++)
5d6da7da 1629 {
1630 tree t;
1631
1632 if (TREE_CODE (oe->op) != SSA_NAME)
1633 continue;
1634 def2 = SSA_NAME_DEF_STMT (oe->op);
1635 if (!is_gimple_assign (def2))
1636 continue;
1637 rcode = gimple_assign_rhs_code (def2);
1638 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1639 continue;
5d6da7da 1640
1641 /* If we got here, we have a match. See if we can combine the
1642 two comparisons. */
c82d157a 1643 if (opcode == BIT_IOR_EXPR)
1644 t = maybe_fold_or_comparisons (lcode, op1, op2,
1645 rcode, gimple_assign_rhs1 (def2),
1646 gimple_assign_rhs2 (def2));
1647 else
1648 t = maybe_fold_and_comparisons (lcode, op1, op2,
1649 rcode, gimple_assign_rhs1 (def2),
1650 gimple_assign_rhs2 (def2));
5d6da7da 1651 if (!t)
1652 continue;
c82d157a 1653
1654 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1655 always give us a boolean_type_node value back. If the original
1656 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1657 we need to convert. */
1658 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1659 t = fold_convert (TREE_TYPE (curr->op), t);
1660
89c993b6 1661 if (TREE_CODE (t) != INTEGER_CST
1662 && !operand_equal_p (t, curr->op, 0))
1663 {
1664 enum tree_code subcode;
1665 tree newop1, newop2;
1666 if (!COMPARISON_CLASS_P (t))
1667 continue;
1668 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1669 STRIP_USELESS_TYPE_CONVERSION (newop1);
1670 STRIP_USELESS_TYPE_CONVERSION (newop2);
1671 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1672 continue;
1673 }
1674
5d6da7da 1675 if (dump_file && (dump_flags & TDF_DETAILS))
1676 {
1677 fprintf (dump_file, "Equivalence: ");
1678 print_generic_expr (dump_file, curr->op, 0);
1679 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1680 print_generic_expr (dump_file, oe->op, 0);
1681 fprintf (dump_file, " -> ");
1682 print_generic_expr (dump_file, t, 0);
1683 fprintf (dump_file, "\n");
1684 }
1685
1686 /* Now we can delete oe, as it has been subsumed by the new combined
1687 expression t. */
f1f41a6c 1688 ops->ordered_remove (i);
5d6da7da 1689 reassociate_stats.ops_eliminated ++;
1690
1691 /* If t is the same as curr->op, we're done. Otherwise we must
1692 replace curr->op with t. Special case is if we got a constant
1693 back, in which case we add it to the end instead of in place of
1694 the current entry. */
1695 if (TREE_CODE (t) == INTEGER_CST)
1696 {
f1f41a6c 1697 ops->ordered_remove (currindex);
5d6da7da 1698 add_to_ops_vec (ops, t);
1699 }
c82d157a 1700 else if (!operand_equal_p (t, curr->op, 0))
5d6da7da 1701 {
5d6da7da 1702 gimple sum;
1703 enum tree_code subcode;
1704 tree newop1;
1705 tree newop2;
b2d33090 1706 gcc_assert (COMPARISON_CLASS_P (t));
5d6da7da 1707 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
b2d33090 1708 STRIP_USELESS_TYPE_CONVERSION (newop1);
1709 STRIP_USELESS_TYPE_CONVERSION (newop2);
1710 gcc_checking_assert (is_gimple_val (newop1)
1711 && is_gimple_val (newop2));
03d37e4e 1712 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
5d6da7da 1713 curr->op = gimple_get_lhs (sum);
1714 }
1715 return true;
1716 }
1717
1718 return false;
1719}
dddf5036 1720
54aceb26 1721/* Perform various identities and other optimizations on the list of
1722 operand entries, stored in OPS. The tree code for the binary
1723 operation between all the operands is OPCODE. */
3dec5460 1724
54aceb26 1725static void
1726optimize_ops_list (enum tree_code opcode,
f1f41a6c 1727 vec<operand_entry_t> *ops)
54aceb26 1728{
f1f41a6c 1729 unsigned int length = ops->length ();
54aceb26 1730 unsigned int i;
1731 operand_entry_t oe;
1732 operand_entry_t oelast = NULL;
1733 bool iterate = false;
3dec5460 1734
54aceb26 1735 if (length == 1)
1736 return;
3dec5460 1737
f1f41a6c 1738 oelast = ops->last ();
3dec5460 1739
54aceb26 1740 /* If the last two are constants, pop the constants off, merge them
1741 and try the next two. */
1742 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1743 {
f1f41a6c 1744 operand_entry_t oelm1 = (*ops)[length - 2];
54aceb26 1745
1746 if (oelm1->rank == 0
1747 && is_gimple_min_invariant (oelm1->op)
c8ca3ee7 1748 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1749 TREE_TYPE (oelast->op)))
54aceb26 1750 {
5f9acd88 1751 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
54aceb26 1752 oelm1->op, oelast->op);
1753
5f9acd88 1754 if (folded && is_gimple_min_invariant (folded))
54aceb26 1755 {
1756 if (dump_file && (dump_flags & TDF_DETAILS))
1757 fprintf (dump_file, "Merging constants\n");
1758
f1f41a6c 1759 ops->pop ();
1760 ops->pop ();
54aceb26 1761
1762 add_to_ops_vec (ops, folded);
1763 reassociate_stats.constants_eliminated++;
1764
1765 optimize_ops_list (opcode, ops);
1766 return;
1767 }
1768 }
1769 }
1770
1771 eliminate_using_constants (opcode, ops);
1772 oelast = NULL;
1773
f1f41a6c 1774 for (i = 0; ops->iterate (i, &oe);)
54aceb26 1775 {
1776 bool done = false;
1777
1778 if (eliminate_not_pairs (opcode, ops, i, oe))
1779 return;
1780 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
5d6da7da 1781 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1782 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
54aceb26 1783 {
1784 if (done)
1785 return;
1786 iterate = true;
1787 oelast = NULL;
1788 continue;
1789 }
1790 oelast = oe;
1791 i++;
1792 }
1793
f1f41a6c 1794 length = ops->length ();
1795 oelast = ops->last ();
54aceb26 1796
1797 if (iterate)
1798 optimize_ops_list (opcode, ops);
1799}
1800
946e9eb4 1801/* The following functions are subroutines to optimize_range_tests and allow
1802 it to try to change a logical combination of comparisons into a range
1803 test.
1804
1805 For example, both
1806 X == 2 || X == 5 || X == 3 || X == 4
1807 and
1808 X >= 2 && X <= 5
1809 are converted to
1810 (unsigned) (X - 2) <= 3
1811
1812 For more information see comments above fold_test_range in fold-const.c,
1813 this implementation is for GIMPLE. */
1814
1815struct range_entry
1816{
1817 tree exp;
1818 tree low;
1819 tree high;
1820 bool in_p;
1821 bool strict_overflow_p;
1822 unsigned int idx, next;
1823};
1824
1825/* This is similar to make_range in fold-const.c, but on top of
8a2c7744 1826 GIMPLE instead of trees. If EXP is non-NULL, it should be
1827 an SSA_NAME and STMT argument is ignored, otherwise STMT
1828 argument should be a GIMPLE_COND. */
946e9eb4 1829
1830static void
8a2c7744 1831init_range_entry (struct range_entry *r, tree exp, gimple stmt)
946e9eb4 1832{
1833 int in_p;
1834 tree low, high;
1835 bool is_bool, strict_overflow_p;
1836
1837 r->exp = NULL_TREE;
1838 r->in_p = false;
1839 r->strict_overflow_p = false;
1840 r->low = NULL_TREE;
1841 r->high = NULL_TREE;
8a2c7744 1842 if (exp != NULL_TREE
1843 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
946e9eb4 1844 return;
1845
1846 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1847 and see if we can refine the range. Some of the cases below may not
1848 happen, but it doesn't seem worth worrying about this. We "continue"
1849 the outer loop when we've changed something; otherwise we "break"
1850 the switch, which will "break" the while. */
8a2c7744 1851 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
946e9eb4 1852 high = low;
1853 in_p = 0;
1854 strict_overflow_p = false;
1855 is_bool = false;
8a2c7744 1856 if (exp == NULL_TREE)
1857 is_bool = true;
1858 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
946e9eb4 1859 {
1860 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1861 is_bool = true;
1862 else
1863 return;
1864 }
1865 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1866 is_bool = true;
1867
1868 while (1)
1869 {
946e9eb4 1870 enum tree_code code;
1871 tree arg0, arg1, exp_type;
1872 tree nexp;
1873 location_t loc;
1874
8a2c7744 1875 if (exp != NULL_TREE)
1876 {
81cab242 1877 if (TREE_CODE (exp) != SSA_NAME
1878 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
8a2c7744 1879 break;
946e9eb4 1880
8a2c7744 1881 stmt = SSA_NAME_DEF_STMT (exp);
1882 if (!is_gimple_assign (stmt))
1883 break;
1884
1885 code = gimple_assign_rhs_code (stmt);
1886 arg0 = gimple_assign_rhs1 (stmt);
1887 arg1 = gimple_assign_rhs2 (stmt);
1888 exp_type = TREE_TYPE (exp);
1889 }
1890 else
1891 {
1892 code = gimple_cond_code (stmt);
1893 arg0 = gimple_cond_lhs (stmt);
1894 arg1 = gimple_cond_rhs (stmt);
1895 exp_type = boolean_type_node;
1896 }
946e9eb4 1897
9adacac7 1898 if (TREE_CODE (arg0) != SSA_NAME)
1899 break;
946e9eb4 1900 loc = gimple_location (stmt);
1901 switch (code)
1902 {
1903 case BIT_NOT_EXPR:
e423480a 1904 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1905 /* Ensure the range is either +[-,0], +[0,0],
1906 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1907 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1908 or similar expression of unconditional true or
1909 false, it should not be negated. */
1910 && ((high && integer_zerop (high))
1911 || (low && integer_onep (low))))
946e9eb4 1912 {
1913 in_p = !in_p;
1914 exp = arg0;
1915 continue;
1916 }
1917 break;
1918 case SSA_NAME:
1919 exp = arg0;
1920 continue;
1921 CASE_CONVERT:
1922 if (is_bool)
1923 goto do_default;
1924 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1925 {
1926 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1927 is_bool = true;
1928 else
1929 return;
1930 }
1931 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1932 is_bool = true;
1933 goto do_default;
1934 case EQ_EXPR:
1935 case NE_EXPR:
1936 case LT_EXPR:
1937 case LE_EXPR:
1938 case GE_EXPR:
1939 case GT_EXPR:
1940 is_bool = true;
1941 /* FALLTHRU */
1942 default:
1943 if (!is_bool)
1944 return;
1945 do_default:
1946 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1947 &low, &high, &in_p,
1948 &strict_overflow_p);
1949 if (nexp != NULL_TREE)
1950 {
1951 exp = nexp;
1952 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1953 continue;
1954 }
1955 break;
1956 }
1957 break;
1958 }
1959 if (is_bool)
1960 {
1961 r->exp = exp;
1962 r->in_p = in_p;
1963 r->low = low;
1964 r->high = high;
1965 r->strict_overflow_p = strict_overflow_p;
1966 }
1967}
1968
1969/* Comparison function for qsort. Sort entries
1970 without SSA_NAME exp first, then with SSA_NAMEs sorted
1971 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1972 by increasing ->low and if ->low is the same, by increasing
1973 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1974 maximum. */
1975
1976static int
1977range_entry_cmp (const void *a, const void *b)
1978{
1979 const struct range_entry *p = (const struct range_entry *) a;
1980 const struct range_entry *q = (const struct range_entry *) b;
1981
1982 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1983 {
1984 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1985 {
1986 /* Group range_entries for the same SSA_NAME together. */
1987 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1988 return -1;
1989 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1990 return 1;
1991 /* If ->low is different, NULL low goes first, then by
1992 ascending low. */
1993 if (p->low != NULL_TREE)
1994 {
1995 if (q->low != NULL_TREE)
1996 {
1997 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1998 p->low, q->low);
1999 if (tem && integer_onep (tem))
2000 return -1;
2001 tem = fold_binary (GT_EXPR, boolean_type_node,
2002 p->low, q->low);
2003 if (tem && integer_onep (tem))
2004 return 1;
2005 }
2006 else
2007 return 1;
2008 }
2009 else if (q->low != NULL_TREE)
2010 return -1;
2011 /* If ->high is different, NULL high goes last, before that by
2012 ascending high. */
2013 if (p->high != NULL_TREE)
2014 {
2015 if (q->high != NULL_TREE)
2016 {
2017 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2018 p->high, q->high);
2019 if (tem && integer_onep (tem))
2020 return -1;
2021 tem = fold_binary (GT_EXPR, boolean_type_node,
2022 p->high, q->high);
2023 if (tem && integer_onep (tem))
2024 return 1;
2025 }
2026 else
2027 return -1;
2028 }
eff8617c 2029 else if (q->high != NULL_TREE)
946e9eb4 2030 return 1;
2031 /* If both ranges are the same, sort below by ascending idx. */
2032 }
2033 else
2034 return 1;
2035 }
2036 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2037 return -1;
2038
2039 if (p->idx < q->idx)
2040 return -1;
2041 else
2042 {
2043 gcc_checking_assert (p->idx > q->idx);
2044 return 1;
2045 }
2046}
2047
2048/* Helper routine of optimize_range_test.
2049 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2050 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
e3668db5 2051 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2052 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2053 an array of COUNT pointers to other ranges. Return
8a2c7744 2054 true if the range merge has been successful.
2055 If OPCODE is ERROR_MARK, this is called from within
2056 maybe_optimize_range_tests and is performing inter-bb range optimization.
82cb4e1c 2057 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2058 oe->rank. */
946e9eb4 2059
2060static bool
2061update_range_test (struct range_entry *range, struct range_entry *otherrange,
e3668db5 2062 struct range_entry **otherrangep,
946e9eb4 2063 unsigned int count, enum tree_code opcode,
e3668db5 2064 vec<operand_entry_t> *ops, tree exp, gimple_seq seq,
2065 bool in_p, tree low, tree high, bool strict_overflow_p)
946e9eb4 2066{
f1f41a6c 2067 operand_entry_t oe = (*ops)[range->idx];
8a2c7744 2068 tree op = oe->op;
f5a6b05f 2069 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2070 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
8a2c7744 2071 location_t loc = gimple_location (stmt);
2072 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
e3668db5 2073 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2074 in_p, low, high);
946e9eb4 2075 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2076 gimple_stmt_iterator gsi;
e3668db5 2077 unsigned int i;
946e9eb4 2078
2079 if (tem == NULL_TREE)
2080 return false;
2081
2082 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2083 warning_at (loc, OPT_Wstrict_overflow,
2084 "assuming signed overflow does not occur "
2085 "when simplifying range test");
2086
2087 if (dump_file && (dump_flags & TDF_DETAILS))
2088 {
2089 struct range_entry *r;
2090 fprintf (dump_file, "Optimizing range tests ");
2091 print_generic_expr (dump_file, range->exp, 0);
2092 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2093 print_generic_expr (dump_file, range->low, 0);
2094 fprintf (dump_file, ", ");
2095 print_generic_expr (dump_file, range->high, 0);
2096 fprintf (dump_file, "]");
e3668db5 2097 for (i = 0; i < count; i++)
946e9eb4 2098 {
e3668db5 2099 if (otherrange)
2100 r = otherrange + i;
2101 else
2102 r = otherrangep[i];
946e9eb4 2103 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2104 print_generic_expr (dump_file, r->low, 0);
2105 fprintf (dump_file, ", ");
2106 print_generic_expr (dump_file, r->high, 0);
2107 fprintf (dump_file, "]");
2108 }
2109 fprintf (dump_file, "\n into ");
2110 print_generic_expr (dump_file, tem, 0);
2111 fprintf (dump_file, "\n");
2112 }
2113
8a2c7744 2114 if (opcode == BIT_IOR_EXPR
2115 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
946e9eb4 2116 tem = invert_truthvalue_loc (loc, tem);
2117
8a2c7744 2118 tem = fold_convert_loc (loc, optype, tem);
2119 gsi = gsi_for_stmt (stmt);
927d5076 2120 unsigned int uid = gimple_uid (stmt);
a0ebffaa 2121 /* In rare cases range->exp can be equal to lhs of stmt.
2122 In that case we have to insert after the stmt rather then before
927d5076 2123 it. If stmt is a PHI, insert it at the start of the basic block. */
2124 if (op != range->exp)
2125 {
2126 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2127 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2128 GSI_SAME_STMT);
2129 gsi_prev (&gsi);
2130 }
2131 else if (gimple_code (stmt) != GIMPLE_PHI)
e3668db5 2132 {
2133 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2134 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2135 GSI_CONTINUE_LINKING);
2136 }
a0ebffaa 2137 else
2138 {
927d5076 2139 gsi = gsi_after_labels (gimple_bb (stmt));
2140 if (!gsi_end_p (gsi))
2141 uid = gimple_uid (gsi_stmt (gsi));
2142 else
2143 {
2144 gsi = gsi_start_bb (gimple_bb (stmt));
2145 uid = 1;
2146 while (!gsi_end_p (gsi))
2147 {
2148 uid = gimple_uid (gsi_stmt (gsi));
2149 gsi_next (&gsi);
2150 }
2151 }
e3668db5 2152 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
a0ebffaa 2153 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2154 GSI_SAME_STMT);
927d5076 2155 if (gsi_end_p (gsi))
2156 gsi = gsi_last_bb (gimple_bb (stmt));
2157 else
2158 gsi_prev (&gsi);
a0ebffaa 2159 }
2160 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
82cb4e1c 2161 if (gimple_uid (gsi_stmt (gsi)))
2162 break;
2163 else
927d5076 2164 gimple_set_uid (gsi_stmt (gsi), uid);
946e9eb4 2165
8a2c7744 2166 oe->op = tem;
946e9eb4 2167 range->exp = exp;
2168 range->low = low;
2169 range->high = high;
2170 range->in_p = in_p;
2171 range->strict_overflow_p = false;
2172
e3668db5 2173 for (i = 0; i < count; i++)
946e9eb4 2174 {
e3668db5 2175 if (otherrange)
2176 range = otherrange + i;
2177 else
2178 range = otherrangep[i];
f1f41a6c 2179 oe = (*ops)[range->idx];
8a2c7744 2180 /* Now change all the other range test immediate uses, so that
2181 those tests will be optimized away. */
2182 if (opcode == ERROR_MARK)
2183 {
2184 if (oe->op)
82cb4e1c 2185 oe->op = build_int_cst (TREE_TYPE (oe->op),
2186 oe->rank == BIT_IOR_EXPR ? 0 : 1);
8a2c7744 2187 else
82cb4e1c 2188 oe->op = (oe->rank == BIT_IOR_EXPR
2189 ? boolean_false_node : boolean_true_node);
8a2c7744 2190 }
82cb4e1c 2191 else
2192 oe->op = error_mark_node;
946e9eb4 2193 range->exp = NULL_TREE;
2194 }
2195 return true;
2196}
2197
b0c0c879 2198/* Optimize X == CST1 || X == CST2
2199 if popcount (CST1 ^ CST2) == 1 into
2200 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2201 Similarly for ranges. E.g.
2202 X != 2 && X != 3 && X != 10 && X != 11
2203 will be transformed by the previous optimization into
2204 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2205 and this loop can transform that into
2206 !(((X & ~8) - 2U) <= 1U). */
2207
2208static bool
2209optimize_range_tests_xor (enum tree_code opcode, tree type,
2210 tree lowi, tree lowj, tree highi, tree highj,
2211 vec<operand_entry_t> *ops,
2212 struct range_entry *rangei,
2213 struct range_entry *rangej)
2214{
2215 tree lowxor, highxor, tem, exp;
e3668db5 2216 /* Check lowi ^ lowj == highi ^ highj and
2217 popcount (lowi ^ lowj) == 1. */
b0c0c879 2218 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2219 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2220 return false;
8016354a 2221 if (!integer_pow2p (lowxor))
b0c0c879 2222 return false;
2223 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2224 if (!tree_int_cst_equal (lowxor, highxor))
2225 return false;
2226
2227 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2228 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2229 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2230 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
e3668db5 2231 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2232 NULL, rangei->in_p, lowj, highj,
b0c0c879 2233 rangei->strict_overflow_p
2234 || rangej->strict_overflow_p))
2235 return true;
2236 return false;
2237}
2238
2239/* Optimize X == CST1 || X == CST2
2240 if popcount (CST2 - CST1) == 1 into
2241 ((X - CST1) & ~(CST2 - CST1)) == 0.
2242 Similarly for ranges. E.g.
2243 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2244 || X == 75 || X == 45
2245 will be transformed by the previous optimization into
2246 (X - 43U) <= 3U || (X - 75U) <= 3U
2247 and this loop can transform that into
2248 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2249static bool
2250optimize_range_tests_diff (enum tree_code opcode, tree type,
2251 tree lowi, tree lowj, tree highi, tree highj,
2252 vec<operand_entry_t> *ops,
2253 struct range_entry *rangei,
2254 struct range_entry *rangej)
2255{
2256 tree tem1, tem2, mask;
2257 /* Check highi - lowi == highj - lowj. */
2258 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2259 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2260 return false;
2261 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2262 if (!tree_int_cst_equal (tem1, tem2))
2263 return false;
2264 /* Check popcount (lowj - lowi) == 1. */
2265 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2266 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2267 return false;
8016354a 2268 if (!integer_pow2p (tem1))
b0c0c879 2269 return false;
2270
db7164e3 2271 type = unsigned_type_for (type);
2272 tem1 = fold_convert (type, tem1);
2273 tem2 = fold_convert (type, tem2);
2274 lowi = fold_convert (type, lowi);
b0c0c879 2275 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
db7164e3 2276 tem1 = fold_binary (MINUS_EXPR, type,
2277 fold_convert (type, rangei->exp), lowi);
b0c0c879 2278 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2279 lowj = build_int_cst (type, 0);
e3668db5 2280 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2281 NULL, rangei->in_p, lowj, tem2,
b0c0c879 2282 rangei->strict_overflow_p
2283 || rangej->strict_overflow_p))
2284 return true;
2285 return false;
2286}
2287
2288/* It does some common checks for function optimize_range_tests_xor and
2289 optimize_range_tests_diff.
2290 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2291 Else it calls optimize_range_tests_diff. */
2292
2293static bool
2294optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2295 bool optimize_xor, vec<operand_entry_t> *ops,
2296 struct range_entry *ranges)
2297{
2298 int i, j;
2299 bool any_changes = false;
2300 for (i = first; i < length; i++)
2301 {
2302 tree lowi, highi, lowj, highj, type, tem;
2303
2304 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2305 continue;
2306 type = TREE_TYPE (ranges[i].exp);
2307 if (!INTEGRAL_TYPE_P (type))
2308 continue;
2309 lowi = ranges[i].low;
2310 if (lowi == NULL_TREE)
2311 lowi = TYPE_MIN_VALUE (type);
2312 highi = ranges[i].high;
2313 if (highi == NULL_TREE)
2314 continue;
2315 for (j = i + 1; j < length && j < i + 64; j++)
2316 {
2317 bool changes;
2318 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2319 continue;
2320 lowj = ranges[j].low;
2321 if (lowj == NULL_TREE)
2322 continue;
2323 highj = ranges[j].high;
2324 if (highj == NULL_TREE)
2325 highj = TYPE_MAX_VALUE (type);
2326 /* Check lowj > highi. */
2327 tem = fold_binary (GT_EXPR, boolean_type_node,
2328 lowj, highi);
2329 if (tem == NULL_TREE || !integer_onep (tem))
2330 continue;
2331 if (optimize_xor)
2332 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2333 highi, highj, ops,
2334 ranges + i, ranges + j);
2335 else
2336 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2337 highi, highj, ops,
2338 ranges + i, ranges + j);
2339 if (changes)
2340 {
2341 any_changes = true;
2342 break;
2343 }
2344 }
2345 }
2346 return any_changes;
2347}
2348
e3668db5 2349/* Helper function of optimize_range_tests_to_bit_test. Handle a single
2350 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2351 EXP on success, NULL otherwise. */
2352
2353static tree
2354extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2355 wide_int *mask, tree *totallowp)
2356{
2357 tree tem = int_const_binop (MINUS_EXPR, high, low);
2358 if (tem == NULL_TREE
2359 || TREE_CODE (tem) != INTEGER_CST
2360 || TREE_OVERFLOW (tem)
2361 || tree_int_cst_sgn (tem) == -1
2362 || compare_tree_int (tem, prec) != -1)
2363 return NULL_TREE;
2364
2365 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2366 *mask = wi::shifted_mask (0, max, false, prec);
2367 if (TREE_CODE (exp) == BIT_AND_EXPR
2368 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2369 {
2370 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2371 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2372 if (wi::popcount (msk) == 1
2373 && wi::ltu_p (msk, prec - max))
2374 {
2375 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2376 max += msk.to_uhwi ();
2377 exp = TREE_OPERAND (exp, 0);
2378 if (integer_zerop (low)
2379 && TREE_CODE (exp) == PLUS_EXPR
2380 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2381 {
f6835415 2382 tree ret = TREE_OPERAND (exp, 0);
2383 STRIP_NOPS (ret);
e3668db5 2384 widest_int bias
2385 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2386 TYPE_PRECISION (TREE_TYPE (low))));
f6835415 2387 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
e3668db5 2388 if (totallowp)
2389 {
2390 *totallowp = tbias;
f6835415 2391 return ret;
e3668db5 2392 }
2393 else if (!tree_int_cst_lt (totallow, tbias))
2394 return NULL_TREE;
f6835415 2395 bias = wi::to_widest (tbias);
e3668db5 2396 bias -= wi::to_widest (totallow);
2397 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2398 {
2399 *mask = wi::lshift (*mask, bias);
f6835415 2400 return ret;
e3668db5 2401 }
2402 }
2403 }
2404 }
2405 if (totallowp)
2406 return exp;
2407 if (!tree_int_cst_lt (totallow, low))
2408 return exp;
2409 tem = int_const_binop (MINUS_EXPR, low, totallow);
2410 if (tem == NULL_TREE
2411 || TREE_CODE (tem) != INTEGER_CST
2412 || TREE_OVERFLOW (tem)
2413 || compare_tree_int (tem, prec - max) == 1)
2414 return NULL_TREE;
2415
2416 *mask = wi::lshift (*mask, wi::to_widest (tem));
2417 return exp;
2418}
2419
2420/* Attempt to optimize small range tests using bit test.
2421 E.g.
2422 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2423 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2424 has been by earlier optimizations optimized into:
2425 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2426 As all the 43 through 82 range is less than 64 numbers,
2427 for 64-bit word targets optimize that into:
2428 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2429
2430static bool
2431optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2432 vec<operand_entry_t> *ops,
2433 struct range_entry *ranges)
2434{
2435 int i, j;
2436 bool any_changes = false;
2437 int prec = GET_MODE_BITSIZE (word_mode);
2438 auto_vec<struct range_entry *, 64> candidates;
2439
2440 for (i = first; i < length - 2; i++)
2441 {
2442 tree lowi, highi, lowj, highj, type;
2443
2444 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2445 continue;
2446 type = TREE_TYPE (ranges[i].exp);
2447 if (!INTEGRAL_TYPE_P (type))
2448 continue;
2449 lowi = ranges[i].low;
2450 if (lowi == NULL_TREE)
2451 lowi = TYPE_MIN_VALUE (type);
2452 highi = ranges[i].high;
2453 if (highi == NULL_TREE)
2454 continue;
2455 wide_int mask;
2456 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2457 highi, &mask, &lowi);
2458 if (exp == NULL_TREE)
2459 continue;
2460 bool strict_overflow_p = ranges[i].strict_overflow_p;
2461 candidates.truncate (0);
2462 int end = MIN (i + 64, length);
2463 for (j = i + 1; j < end; j++)
2464 {
2465 tree exp2;
2466 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2467 continue;
2468 if (ranges[j].exp == exp)
2469 ;
2470 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2471 {
2472 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2473 if (exp2 == exp)
2474 ;
2475 else if (TREE_CODE (exp2) == PLUS_EXPR)
2476 {
2477 exp2 = TREE_OPERAND (exp2, 0);
2478 STRIP_NOPS (exp2);
2479 if (exp2 != exp)
2480 continue;
2481 }
2482 else
2483 continue;
2484 }
2485 else
2486 continue;
2487 lowj = ranges[j].low;
2488 if (lowj == NULL_TREE)
2489 continue;
2490 highj = ranges[j].high;
2491 if (highj == NULL_TREE)
2492 highj = TYPE_MAX_VALUE (type);
2493 wide_int mask2;
2494 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2495 highj, &mask2, NULL);
2496 if (exp2 != exp)
2497 continue;
2498 mask |= mask2;
2499 strict_overflow_p |= ranges[j].strict_overflow_p;
2500 candidates.safe_push (&ranges[j]);
2501 }
2502
2503 /* If we need otherwise 3 or more comparisons, use a bit test. */
2504 if (candidates.length () >= 2)
2505 {
2506 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2507 wi::to_widest (lowi)
8f936b5e 2508 + prec - 1 - wi::clz (mask));
e3668db5 2509 operand_entry_t oe = (*ops)[ranges[i].idx];
2510 tree op = oe->op;
2511 gimple stmt = op ? SSA_NAME_DEF_STMT (op)
2512 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2513 location_t loc = gimple_location (stmt);
2514 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2515
2516 /* See if it isn't cheaper to pretend the minimum value of the
2517 range is 0, if maximum value is small enough.
2518 We can avoid then subtraction of the minimum value, but the
2519 mask constant could be perhaps more expensive. */
2520 if (compare_tree_int (lowi, 0) > 0
2521 && compare_tree_int (high, prec) < 0)
2522 {
2523 int cost_diff;
2524 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2525 rtx reg = gen_raw_REG (word_mode, 10000);
2526 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2527 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2528 GEN_INT (-m)), speed_p);
2529 rtx r = immed_wide_int_const (mask, word_mode);
2530 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
5ae4887d 2531 word_mode, speed_p);
e3668db5 2532 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2533 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
5ae4887d 2534 word_mode, speed_p);
e3668db5 2535 if (cost_diff > 0)
2536 {
2537 mask = wi::lshift (mask, m);
2538 lowi = build_zero_cst (TREE_TYPE (lowi));
2539 }
2540 }
2541
2542 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2543 false, lowi, high);
2544 if (tem == NULL_TREE || is_gimple_val (tem))
2545 continue;
2546 tree etype = unsigned_type_for (TREE_TYPE (exp));
2547 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2548 fold_convert_loc (loc, etype, exp),
2549 fold_convert_loc (loc, etype, lowi));
2550 exp = fold_convert_loc (loc, integer_type_node, exp);
2551 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2552 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2553 build_int_cst (word_type, 1), exp);
2554 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2555 wide_int_to_tree (word_type, mask));
2556 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2557 build_zero_cst (word_type));
2558 if (is_gimple_val (exp))
2559 continue;
2560
2561 /* The shift might have undefined behavior if TEM is true,
2562 but reassociate_bb isn't prepared to have basic blocks
2563 split when it is running. So, temporarily emit a code
2564 with BIT_IOR_EXPR instead of &&, and fix it up in
2565 branch_fixup. */
2566 gimple_seq seq;
2567 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2568 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2569 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2570 gimple_seq seq2;
2571 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2572 gimple_seq_add_seq_without_update (&seq, seq2);
2573 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2574 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
e9cf809e 2575 gimple g = gimple_build_assign (make_ssa_name (optype),
2576 BIT_IOR_EXPR, tem, exp);
e3668db5 2577 gimple_set_location (g, loc);
2578 gimple_seq_add_stmt_without_update (&seq, g);
2579 exp = gimple_assign_lhs (g);
2580 tree val = build_zero_cst (optype);
2581 if (update_range_test (&ranges[i], NULL, candidates.address (),
2582 candidates.length (), opcode, ops, exp,
2583 seq, false, val, val, strict_overflow_p))
2584 {
2585 any_changes = true;
2586 reassoc_branch_fixups.safe_push (tem);
2587 }
2588 else
2589 gimple_seq_discard (seq);
2590 }
2591 }
2592 return any_changes;
2593}
2594
946e9eb4 2595/* Optimize range tests, similarly how fold_range_test optimizes
2596 it on trees. The tree code for the binary
8a2c7744 2597 operation between all the operands is OPCODE.
2598 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2599 maybe_optimize_range_tests for inter-bb range optimization.
2600 In that case if oe->op is NULL, oe->id is bb->index whose
2601 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2602 the actual opcode. */
946e9eb4 2603
82cb4e1c 2604static bool
946e9eb4 2605optimize_range_tests (enum tree_code opcode,
f1f41a6c 2606 vec<operand_entry_t> *ops)
946e9eb4 2607{
f1f41a6c 2608 unsigned int length = ops->length (), i, j, first;
946e9eb4 2609 operand_entry_t oe;
2610 struct range_entry *ranges;
2611 bool any_changes = false;
2612
2613 if (length == 1)
82cb4e1c 2614 return false;
946e9eb4 2615
2616 ranges = XNEWVEC (struct range_entry, length);
2617 for (i = 0; i < length; i++)
2618 {
f1f41a6c 2619 oe = (*ops)[i];
946e9eb4 2620 ranges[i].idx = i;
8a2c7744 2621 init_range_entry (ranges + i, oe->op,
f5a6b05f 2622 oe->op ? NULL :
2623 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
946e9eb4 2624 /* For | invert it now, we will invert it again before emitting
2625 the optimized expression. */
8a2c7744 2626 if (opcode == BIT_IOR_EXPR
2627 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
946e9eb4 2628 ranges[i].in_p = !ranges[i].in_p;
2629 }
2630
2631 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2632 for (i = 0; i < length; i++)
2633 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2634 break;
2635
2636 /* Try to merge ranges. */
2637 for (first = i; i < length; i++)
2638 {
2639 tree low = ranges[i].low;
2640 tree high = ranges[i].high;
2641 int in_p = ranges[i].in_p;
2642 bool strict_overflow_p = ranges[i].strict_overflow_p;
2643 int update_fail_count = 0;
2644
2645 for (j = i + 1; j < length; j++)
2646 {
2647 if (ranges[i].exp != ranges[j].exp)
2648 break;
2649 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2650 ranges[j].in_p, ranges[j].low, ranges[j].high))
2651 break;
2652 strict_overflow_p |= ranges[j].strict_overflow_p;
2653 }
2654
2655 if (j == i + 1)
2656 continue;
2657
e3668db5 2658 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2659 opcode, ops, ranges[i].exp, NULL, in_p,
2660 low, high, strict_overflow_p))
946e9eb4 2661 {
2662 i = j - 1;
2663 any_changes = true;
2664 }
2665 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2666 while update_range_test would fail. */
2667 else if (update_fail_count == 64)
2668 i = j - 1;
2669 else
2670 ++update_fail_count;
2671 }
2672
b0c0c879 2673 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2674 ops, ranges);
946e9eb4 2675
b0c0c879 2676 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2677 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2678 ops, ranges);
e3668db5 2679 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2680 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2681 ops, ranges);
946e9eb4 2682
8a2c7744 2683 if (any_changes && opcode != ERROR_MARK)
946e9eb4 2684 {
2685 j = 0;
f1f41a6c 2686 FOR_EACH_VEC_ELT (*ops, i, oe)
946e9eb4 2687 {
2688 if (oe->op == error_mark_node)
2689 continue;
2690 else if (i != j)
f1f41a6c 2691 (*ops)[j] = oe;
946e9eb4 2692 j++;
2693 }
f1f41a6c 2694 ops->truncate (j);
946e9eb4 2695 }
2696
2697 XDELETEVEC (ranges);
82cb4e1c 2698 return any_changes;
946e9eb4 2699}
2700
8a2c7744 2701/* Return true if STMT is a cast like:
2702 <bb N>:
2703 ...
2704 _123 = (int) _234;
2705
2706 <bb M>:
2707 # _345 = PHI <_123(N), 1(...), 1(...)>
2708 where _234 has bool type, _123 has single use and
2709 bb N has a single successor M. This is commonly used in
2710 the last block of a range test. */
2711
2712static bool
2713final_range_test_p (gimple stmt)
2714{
2715 basic_block bb, rhs_bb;
2716 edge e;
2717 tree lhs, rhs;
2718 use_operand_p use_p;
2719 gimple use_stmt;
2720
2721 if (!gimple_assign_cast_p (stmt))
2722 return false;
2723 bb = gimple_bb (stmt);
2724 if (!single_succ_p (bb))
2725 return false;
2726 e = single_succ_edge (bb);
2727 if (e->flags & EDGE_COMPLEX)
2728 return false;
2729
2730 lhs = gimple_assign_lhs (stmt);
2731 rhs = gimple_assign_rhs1 (stmt);
2732 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2733 || TREE_CODE (rhs) != SSA_NAME
2734 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2735 return false;
2736
2737 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2738 if (!single_imm_use (lhs, &use_p, &use_stmt))
2739 return false;
2740
2741 if (gimple_code (use_stmt) != GIMPLE_PHI
2742 || gimple_bb (use_stmt) != e->dest)
2743 return false;
2744
2745 /* And that the rhs is defined in the same loop. */
2746 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2747 if (rhs_bb == NULL
2748 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2749 return false;
2750
2751 return true;
2752}
2753
2754/* Return true if BB is suitable basic block for inter-bb range test
2755 optimization. If BACKWARD is true, BB should be the only predecessor
2756 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2757 or compared with to find a common basic block to which all conditions
2758 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2759 be the only predecessor of BB. */
2760
2761static bool
2762suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2763 bool backward)
2764{
2765 edge_iterator ei, ei2;
2766 edge e, e2;
2767 gimple stmt;
1a91d914 2768 gphi_iterator gsi;
8a2c7744 2769 bool other_edge_seen = false;
2770 bool is_cond;
2771
2772 if (test_bb == bb)
2773 return false;
2774 /* Check last stmt first. */
2775 stmt = last_stmt (bb);
2776 if (stmt == NULL
2777 || (gimple_code (stmt) != GIMPLE_COND
2778 && (backward || !final_range_test_p (stmt)))
2779 || gimple_visited_p (stmt)
2780 || stmt_could_throw_p (stmt)
2781 || *other_bb == bb)
2782 return false;
2783 is_cond = gimple_code (stmt) == GIMPLE_COND;
2784 if (is_cond)
2785 {
2786 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2787 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2788 to *OTHER_BB (if not set yet, try to find it out). */
2789 if (EDGE_COUNT (bb->succs) != 2)
2790 return false;
2791 FOR_EACH_EDGE (e, ei, bb->succs)
2792 {
2793 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2794 return false;
2795 if (e->dest == test_bb)
2796 {
2797 if (backward)
2798 continue;
2799 else
2800 return false;
2801 }
2802 if (e->dest == bb)
2803 return false;
2804 if (*other_bb == NULL)
2805 {
2806 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2807 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2808 return false;
2809 else if (e->dest == e2->dest)
2810 *other_bb = e->dest;
2811 if (*other_bb == NULL)
2812 return false;
2813 }
2814 if (e->dest == *other_bb)
2815 other_edge_seen = true;
2816 else if (backward)
2817 return false;
2818 }
2819 if (*other_bb == NULL || !other_edge_seen)
2820 return false;
2821 }
2822 else if (single_succ (bb) != *other_bb)
2823 return false;
2824
2825 /* Now check all PHIs of *OTHER_BB. */
2826 e = find_edge (bb, *other_bb);
2827 e2 = find_edge (test_bb, *other_bb);
2828 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2829 {
1a91d914 2830 gphi *phi = gsi.phi ();
8a2c7744 2831 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2832 corresponding to BB and TEST_BB predecessor must be the same. */
2833 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2834 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2835 {
2836 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2837 one of the PHIs should have the lhs of the last stmt in
2838 that block as PHI arg and that PHI should have 0 or 1
2839 corresponding to it in all other range test basic blocks
2840 considered. */
2841 if (!is_cond)
2842 {
2843 if (gimple_phi_arg_def (phi, e->dest_idx)
2844 == gimple_assign_lhs (stmt)
2845 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2846 || integer_onep (gimple_phi_arg_def (phi,
2847 e2->dest_idx))))
2848 continue;
2849 }
2850 else
2851 {
2852 gimple test_last = last_stmt (test_bb);
2853 if (gimple_code (test_last) != GIMPLE_COND
2854 && gimple_phi_arg_def (phi, e2->dest_idx)
2855 == gimple_assign_lhs (test_last)
2856 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2857 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2858 continue;
2859 }
2860
2861 return false;
2862 }
2863 }
2864 return true;
2865}
2866
2867/* Return true if BB doesn't have side-effects that would disallow
2868 range test optimization, all SSA_NAMEs set in the bb are consumed
2869 in the bb and there are no PHIs. */
2870
2871static bool
2872no_side_effect_bb (basic_block bb)
2873{
2874 gimple_stmt_iterator gsi;
2875 gimple last;
2876
2877 if (!gimple_seq_empty_p (phi_nodes (bb)))
2878 return false;
2879 last = last_stmt (bb);
2880 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2881 {
2882 gimple stmt = gsi_stmt (gsi);
2883 tree lhs;
2884 imm_use_iterator imm_iter;
2885 use_operand_p use_p;
2886
2887 if (is_gimple_debug (stmt))
2888 continue;
2889 if (gimple_has_side_effects (stmt))
2890 return false;
2891 if (stmt == last)
2892 return true;
2893 if (!is_gimple_assign (stmt))
2894 return false;
2895 lhs = gimple_assign_lhs (stmt);
2896 if (TREE_CODE (lhs) != SSA_NAME)
2897 return false;
2898 if (gimple_assign_rhs_could_trap_p (stmt))
2899 return false;
2900 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2901 {
2902 gimple use_stmt = USE_STMT (use_p);
2903 if (is_gimple_debug (use_stmt))
2904 continue;
2905 if (gimple_bb (use_stmt) != bb)
2906 return false;
2907 }
2908 }
2909 return false;
2910}
2911
2912/* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2913 return true and fill in *OPS recursively. */
2914
2915static bool
f1f41a6c 2916get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
8a2c7744 2917 struct loop *loop)
2918{
2919 gimple stmt = SSA_NAME_DEF_STMT (var);
2920 tree rhs[2];
2921 int i;
2922
2923 if (!is_reassociable_op (stmt, code, loop))
2924 return false;
2925
2926 rhs[0] = gimple_assign_rhs1 (stmt);
2927 rhs[1] = gimple_assign_rhs2 (stmt);
2928 gimple_set_visited (stmt, true);
2929 for (i = 0; i < 2; i++)
2930 if (TREE_CODE (rhs[i]) == SSA_NAME
2931 && !get_ops (rhs[i], code, ops, loop)
2932 && has_single_use (rhs[i]))
2933 {
672758dc 2934 operand_entry_t oe = operand_entry_pool.allocate ();
8a2c7744 2935
2936 oe->op = rhs[i];
2937 oe->rank = code;
2938 oe->id = 0;
2939 oe->count = 1;
f1f41a6c 2940 ops->safe_push (oe);
8a2c7744 2941 }
2942 return true;
2943}
2944
82cb4e1c 2945/* Find the ops that were added by get_ops starting from VAR, see if
2946 they were changed during update_range_test and if yes, create new
2947 stmts. */
2948
2949static tree
2950update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2951 unsigned int *pidx, struct loop *loop)
2952{
2953 gimple stmt = SSA_NAME_DEF_STMT (var);
2954 tree rhs[4];
2955 int i;
2956
2957 if (!is_reassociable_op (stmt, code, loop))
2958 return NULL;
2959
2960 rhs[0] = gimple_assign_rhs1 (stmt);
2961 rhs[1] = gimple_assign_rhs2 (stmt);
2962 rhs[2] = rhs[0];
2963 rhs[3] = rhs[1];
2964 for (i = 0; i < 2; i++)
2965 if (TREE_CODE (rhs[i]) == SSA_NAME)
2966 {
2967 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2968 if (rhs[2 + i] == NULL_TREE)
2969 {
2970 if (has_single_use (rhs[i]))
2971 rhs[2 + i] = ops[(*pidx)++]->op;
2972 else
2973 rhs[2 + i] = rhs[i];
2974 }
2975 }
2976 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2977 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2978 {
2979 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
f9e245b2 2980 var = make_ssa_name (TREE_TYPE (var));
e9cf809e 2981 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
2982 rhs[2], rhs[3]);
82cb4e1c 2983 gimple_set_uid (g, gimple_uid (stmt));
2984 gimple_set_visited (g, true);
2985 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2986 }
2987 return var;
2988}
2989
2990/* Structure to track the initial value passed to get_ops and
2991 the range in the ops vector for each basic block. */
2992
2993struct inter_bb_range_test_entry
2994{
2995 tree op;
2996 unsigned int first_idx, last_idx;
2997};
2998
8a2c7744 2999/* Inter-bb range test optimization. */
3000
3001static void
3002maybe_optimize_range_tests (gimple stmt)
3003{
3004 basic_block first_bb = gimple_bb (stmt);
3005 basic_block last_bb = first_bb;
3006 basic_block other_bb = NULL;
3007 basic_block bb;
3008 edge_iterator ei;
3009 edge e;
c2078b80 3010 auto_vec<operand_entry_t> ops;
3011 auto_vec<inter_bb_range_test_entry> bbinfo;
3c8e4ef8 3012 bool any_changes = false;
8a2c7744 3013
3014 /* Consider only basic blocks that end with GIMPLE_COND or
3015 a cast statement satisfying final_range_test_p. All
3016 but the last bb in the first_bb .. last_bb range
3017 should end with GIMPLE_COND. */
3018 if (gimple_code (stmt) == GIMPLE_COND)
3019 {
3020 if (EDGE_COUNT (first_bb->succs) != 2)
3021 return;
3022 }
3023 else if (final_range_test_p (stmt))
3024 other_bb = single_succ (first_bb);
3025 else
3026 return;
3027
3028 if (stmt_could_throw_p (stmt))
3029 return;
3030
3031 /* As relative ordering of post-dominator sons isn't fixed,
3032 maybe_optimize_range_tests can be called first on any
3033 bb in the range we want to optimize. So, start searching
3034 backwards, if first_bb can be set to a predecessor. */
3035 while (single_pred_p (first_bb))
3036 {
3037 basic_block pred_bb = single_pred (first_bb);
3038 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3039 break;
3040 if (!no_side_effect_bb (first_bb))
3041 break;
3042 first_bb = pred_bb;
3043 }
3044 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3045 Before starting forward search in last_bb successors, find
3046 out the other_bb. */
3047 if (first_bb == last_bb)
3048 {
3049 other_bb = NULL;
3050 /* As non-GIMPLE_COND last stmt always terminates the range,
3051 if forward search didn't discover anything, just give up. */
3052 if (gimple_code (stmt) != GIMPLE_COND)
3053 return;
3054 /* Look at both successors. Either it ends with a GIMPLE_COND
3055 and satisfies suitable_cond_bb, or ends with a cast and
3056 other_bb is that cast's successor. */
3057 FOR_EACH_EDGE (e, ei, first_bb->succs)
3058 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3059 || e->dest == first_bb)
3060 return;
3061 else if (single_pred_p (e->dest))
3062 {
3063 stmt = last_stmt (e->dest);
3064 if (stmt
3065 && gimple_code (stmt) == GIMPLE_COND
3066 && EDGE_COUNT (e->dest->succs) == 2)
3067 {
3068 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3069 break;
3070 else
3071 other_bb = NULL;
3072 }
3073 else if (stmt
3074 && final_range_test_p (stmt)
3075 && find_edge (first_bb, single_succ (e->dest)))
3076 {
3077 other_bb = single_succ (e->dest);
3078 if (other_bb == first_bb)
3079 other_bb = NULL;
3080 }
3081 }
3082 if (other_bb == NULL)
3083 return;
3084 }
3085 /* Now do the forward search, moving last_bb to successor bbs
3086 that aren't other_bb. */
3087 while (EDGE_COUNT (last_bb->succs) == 2)
3088 {
3089 FOR_EACH_EDGE (e, ei, last_bb->succs)
3090 if (e->dest != other_bb)
3091 break;
3092 if (e == NULL)
3093 break;
3094 if (!single_pred_p (e->dest))
3095 break;
3096 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3097 break;
3098 if (!no_side_effect_bb (e->dest))
3099 break;
3100 last_bb = e->dest;
3101 }
3102 if (first_bb == last_bb)
3103 return;
3104 /* Here basic blocks first_bb through last_bb's predecessor
3105 end with GIMPLE_COND, all of them have one of the edges to
3106 other_bb and another to another block in the range,
3107 all blocks except first_bb don't have side-effects and
3108 last_bb ends with either GIMPLE_COND, or cast satisfying
3109 final_range_test_p. */
3110 for (bb = last_bb; ; bb = single_pred (bb))
3111 {
3112 enum tree_code code;
3113 tree lhs, rhs;
82cb4e1c 3114 inter_bb_range_test_entry bb_ent;
8a2c7744 3115
82cb4e1c 3116 bb_ent.op = NULL_TREE;
3117 bb_ent.first_idx = ops.length ();
3118 bb_ent.last_idx = bb_ent.first_idx;
8a2c7744 3119 e = find_edge (bb, other_bb);
3120 stmt = last_stmt (bb);
3121 gimple_set_visited (stmt, true);
3122 if (gimple_code (stmt) != GIMPLE_COND)
3123 {
3124 use_operand_p use_p;
3125 gimple phi;
3126 edge e2;
3127 unsigned int d;
3128
3129 lhs = gimple_assign_lhs (stmt);
3130 rhs = gimple_assign_rhs1 (stmt);
3131 gcc_assert (bb == last_bb);
3132
3133 /* stmt is
3134 _123 = (int) _234;
3135
3136 followed by:
3137 <bb M>:
3138 # _345 = PHI <_123(N), 1(...), 1(...)>
3139
3140 or 0 instead of 1. If it is 0, the _234
3141 range test is anded together with all the
3142 other range tests, if it is 1, it is ored with
3143 them. */
3144 single_imm_use (lhs, &use_p, &phi);
3145 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3146 e2 = find_edge (first_bb, other_bb);
3147 d = e2->dest_idx;
3148 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3149 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3150 code = BIT_AND_EXPR;
3151 else
3152 {
3153 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3154 code = BIT_IOR_EXPR;
3155 }
3156
3157 /* If _234 SSA_NAME_DEF_STMT is
3158 _234 = _567 | _789;
3159 (or &, corresponding to 1/0 in the phi arguments,
3160 push into ops the individual range test arguments
3161 of the bitwise or resp. and, recursively. */
3162 if (!get_ops (rhs, code, &ops,
3163 loop_containing_stmt (stmt))
3164 && has_single_use (rhs))
3165 {
3166 /* Otherwise, push the _234 range test itself. */
672758dc 3167 operand_entry_t oe = operand_entry_pool.allocate ();
8a2c7744 3168
3169 oe->op = rhs;
3170 oe->rank = code;
3171 oe->id = 0;
3172 oe->count = 1;
f1f41a6c 3173 ops.safe_push (oe);
82cb4e1c 3174 bb_ent.last_idx++;
8a2c7744 3175 }
82cb4e1c 3176 else
3177 bb_ent.last_idx = ops.length ();
3178 bb_ent.op = rhs;
3179 bbinfo.safe_push (bb_ent);
8a2c7744 3180 continue;
3181 }
3182 /* Otherwise stmt is GIMPLE_COND. */
3183 code = gimple_cond_code (stmt);
3184 lhs = gimple_cond_lhs (stmt);
3185 rhs = gimple_cond_rhs (stmt);
3186 if (TREE_CODE (lhs) == SSA_NAME
3187 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3188 && ((code != EQ_EXPR && code != NE_EXPR)
3189 || rhs != boolean_false_node
3190 /* Either push into ops the individual bitwise
3191 or resp. and operands, depending on which
3192 edge is other_bb. */
3193 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3194 ^ (code == EQ_EXPR))
3195 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3196 loop_containing_stmt (stmt))))
3197 {
3198 /* Or push the GIMPLE_COND stmt itself. */
672758dc 3199 operand_entry_t oe = operand_entry_pool.allocate ();
8a2c7744 3200
3201 oe->op = NULL;
3202 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3203 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3204 /* oe->op = NULL signs that there is no SSA_NAME
3205 for the range test, and oe->id instead is the
3206 basic block number, at which's end the GIMPLE_COND
3207 is. */
3208 oe->id = bb->index;
3209 oe->count = 1;
f1f41a6c 3210 ops.safe_push (oe);
82cb4e1c 3211 bb_ent.op = NULL;
3212 bb_ent.last_idx++;
8a2c7744 3213 }
82cb4e1c 3214 else if (ops.length () > bb_ent.first_idx)
3215 {
3216 bb_ent.op = lhs;
3217 bb_ent.last_idx = ops.length ();
3218 }
3219 bbinfo.safe_push (bb_ent);
8a2c7744 3220 if (bb == first_bb)
3221 break;
3222 }
f1f41a6c 3223 if (ops.length () > 1)
3c8e4ef8 3224 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3225 if (any_changes)
82cb4e1c 3226 {
3227 unsigned int idx;
3c8e4ef8 3228 /* update_ops relies on has_single_use predicates returning the
3229 same values as it did during get_ops earlier. Additionally it
3230 never removes statements, only adds new ones and it should walk
3231 from the single imm use and check the predicate already before
3232 making those changes.
3233 On the other side, the handling of GIMPLE_COND directly can turn
3234 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3235 it needs to be done in a separate loop afterwards. */
3236 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
82cb4e1c 3237 {
3c8e4ef8 3238 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3239 && bbinfo[idx].op != NULL_TREE)
82cb4e1c 3240 {
82cb4e1c 3241 tree new_op;
3242
3c8e4ef8 3243 stmt = last_stmt (bb);
3244 new_op = update_ops (bbinfo[idx].op,
3245 (enum tree_code)
3246 ops[bbinfo[idx].first_idx]->rank,
3247 ops, &bbinfo[idx].first_idx,
3248 loop_containing_stmt (stmt));
82cb4e1c 3249 if (new_op == NULL_TREE)
3250 {
3251 gcc_assert (bb == last_bb);
3252 new_op = ops[bbinfo[idx].first_idx++]->op;
3253 }
3254 if (bbinfo[idx].op != new_op)
3255 {
3256 imm_use_iterator iter;
3257 use_operand_p use_p;
3258 gimple use_stmt, cast_stmt = NULL;
3259
3260 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3261 if (is_gimple_debug (use_stmt))
3262 continue;
3263 else if (gimple_code (use_stmt) == GIMPLE_COND
3264 || gimple_code (use_stmt) == GIMPLE_PHI)
3265 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3266 SET_USE (use_p, new_op);
3267 else if (gimple_assign_cast_p (use_stmt))
3268 cast_stmt = use_stmt;
3269 else
3270 gcc_unreachable ();
3271 if (cast_stmt)
3272 {
3273 gcc_assert (bb == last_bb);
3274 tree lhs = gimple_assign_lhs (cast_stmt);
f9e245b2 3275 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
82cb4e1c 3276 enum tree_code rhs_code
3277 = gimple_assign_rhs_code (cast_stmt);
1a91d914 3278 gassign *g;
4c0d6cf7 3279 if (is_gimple_min_invariant (new_op))
3280 {
3281 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3282 g = gimple_build_assign (new_lhs, new_op);
3283 }
3284 else
e9cf809e 3285 g = gimple_build_assign (new_lhs, rhs_code, new_op);
82cb4e1c 3286 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3287 gimple_set_uid (g, gimple_uid (cast_stmt));
3288 gimple_set_visited (g, true);
3289 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3290 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3291 if (is_gimple_debug (use_stmt))
3292 continue;
3293 else if (gimple_code (use_stmt) == GIMPLE_COND
3294 || gimple_code (use_stmt) == GIMPLE_PHI)
3295 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3296 SET_USE (use_p, new_lhs);
3297 else
3298 gcc_unreachable ();
3299 }
3300 }
3301 }
3302 if (bb == first_bb)
3303 break;
3304 }
3c8e4ef8 3305 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3306 {
3307 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3308 && bbinfo[idx].op == NULL_TREE
3309 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3310 {
1a91d914 3311 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3c8e4ef8 3312 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
1a91d914 3313 gimple_cond_make_false (cond_stmt);
3c8e4ef8 3314 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
1a91d914 3315 gimple_cond_make_true (cond_stmt);
3c8e4ef8 3316 else
3317 {
1a91d914 3318 gimple_cond_set_code (cond_stmt, NE_EXPR);
3319 gimple_cond_set_lhs (cond_stmt,
3320 ops[bbinfo[idx].first_idx]->op);
3321 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3c8e4ef8 3322 }
1a91d914 3323 update_stmt (cond_stmt);
3c8e4ef8 3324 }
3325 if (bb == first_bb)
3326 break;
3327 }
82cb4e1c 3328 }
8a2c7744 3329}
3330
54aceb26 3331/* Return true if OPERAND is defined by a PHI node which uses the LHS
3332 of STMT in it's operands. This is also known as a "destructive
3333 update" operation. */
3334
3335static bool
75a70cf9 3336is_phi_for_stmt (gimple stmt, tree operand)
54aceb26 3337{
75a70cf9 3338 gimple def_stmt;
1a91d914 3339 gphi *def_phi;
75a70cf9 3340 tree lhs;
54aceb26 3341 use_operand_p arg_p;
3342 ssa_op_iter i;
3343
3344 if (TREE_CODE (operand) != SSA_NAME)
3345 return false;
3346
75a70cf9 3347 lhs = gimple_assign_lhs (stmt);
3348
54aceb26 3349 def_stmt = SSA_NAME_DEF_STMT (operand);
1a91d914 3350 def_phi = dyn_cast <gphi *> (def_stmt);
3351 if (!def_phi)
54aceb26 3352 return false;
3353
1a91d914 3354 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
54aceb26 3355 if (lhs == USE_FROM_PTR (arg_p))
3356 return true;
3357 return false;
3358}
3359
9ce93694 3360/* Remove def stmt of VAR if VAR has zero uses and recurse
3361 on rhs1 operand if so. */
3362
3363static void
3364remove_visited_stmt_chain (tree var)
3365{
3366 gimple stmt;
3367 gimple_stmt_iterator gsi;
3368
3369 while (1)
3370 {
3371 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3372 return;
3373 stmt = SSA_NAME_DEF_STMT (var);
8c5ac7f6 3374 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3375 {
3376 var = gimple_assign_rhs1 (stmt);
8c5ac7f6 3377 gsi = gsi_for_stmt (stmt);
54675e05 3378 reassoc_remove_stmt (&gsi);
8c5ac7f6 3379 release_defs (stmt);
8c5ac7f6 3380 }
3381 else
9ce93694 3382 return;
9ce93694 3383 }
3384}
3385
5b1c765d 3386/* This function checks three consequtive operands in
3387 passed operands vector OPS starting from OPINDEX and
3388 swaps two operands if it is profitable for binary operation
3389 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3390
3391 We pair ops with the same rank if possible.
3392
3393 The alternative we try is to see if STMT is a destructive
3394 update style statement, which is like:
3395 b = phi (a, ...)
3396 a = c + b;
3397 In that case, we want to use the destructive update form to
3398 expose the possible vectorizer sum reduction opportunity.
3399 In that case, the third operand will be the phi node. This
3400 check is not performed if STMT is null.
3401
3402 We could, of course, try to be better as noted above, and do a
3403 lot of work to try to find these opportunities in >3 operand
3404 cases, but it is unlikely to be worth it. */
3405
3406static void
f1f41a6c 3407swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
5b1c765d 3408 unsigned int opindex, gimple stmt)
3409{
3410 operand_entry_t oe1, oe2, oe3;
3411
f1f41a6c 3412 oe1 = ops[opindex];
3413 oe2 = ops[opindex + 1];
3414 oe3 = ops[opindex + 2];
5b1c765d 3415
3416 if ((oe1->rank == oe2->rank
3417 && oe2->rank != oe3->rank)
3418 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3419 && !is_phi_for_stmt (stmt, oe1->op)
3420 && !is_phi_for_stmt (stmt, oe2->op)))
3421 {
3422 struct operand_entry temp = *oe3;
3423 oe3->op = oe1->op;
3424 oe3->rank = oe1->rank;
3425 oe1->op = temp.op;
3426 oe1->rank= temp.rank;
3427 }
3428 else if ((oe1->rank == oe3->rank
3429 && oe2->rank != oe3->rank)
3430 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3431 && !is_phi_for_stmt (stmt, oe1->op)
3432 && !is_phi_for_stmt (stmt, oe3->op)))
3433 {
3434 struct operand_entry temp = *oe2;
3435 oe2->op = oe1->op;
3436 oe2->rank = oe1->rank;
3437 oe1->op = temp.op;
82cb4e1c 3438 oe1->rank = temp.rank;
bb88a6c7 3439 }
bb88a6c7 3440}
3441
82cb4e1c 3442/* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3443 two definitions, otherwise return STMT. */
a2bd0c99 3444
3445static inline gimple
82cb4e1c 3446find_insert_point (gimple stmt, tree rhs1, tree rhs2)
a2bd0c99 3447{
82cb4e1c 3448 if (TREE_CODE (rhs1) == SSA_NAME
3449 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3450 stmt = SSA_NAME_DEF_STMT (rhs1);
3451 if (TREE_CODE (rhs2) == SSA_NAME
3452 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3453 stmt = SSA_NAME_DEF_STMT (rhs2);
3454 return stmt;
a2bd0c99 3455}
3456
54aceb26 3457/* Recursively rewrite our linearized statements so that the operators
3458 match those in OPS[OPINDEX], putting the computation in rank
82cb4e1c 3459 order. Return new lhs. */
54aceb26 3460
82cb4e1c 3461static tree
75a70cf9 3462rewrite_expr_tree (gimple stmt, unsigned int opindex,
82cb4e1c 3463 vec<operand_entry_t> ops, bool changed)
54aceb26 3464{
75a70cf9 3465 tree rhs1 = gimple_assign_rhs1 (stmt);
3466 tree rhs2 = gimple_assign_rhs2 (stmt);
82cb4e1c 3467 tree lhs = gimple_assign_lhs (stmt);
54aceb26 3468 operand_entry_t oe;
3469
54aceb26 3470 /* The final recursion case for this function is that you have
3471 exactly two operations left.
229fa37d 3472 If we had exactly one op in the entire list to start with, we
54aceb26 3473 would have never called this function, and the tail recursion
3474 rewrites them one at a time. */
f1f41a6c 3475 if (opindex + 2 == ops.length ())
54aceb26 3476 {
3477 operand_entry_t oe1, oe2;
3478
f1f41a6c 3479 oe1 = ops[opindex];
3480 oe2 = ops[opindex + 1];
54aceb26 3481
75a70cf9 3482 if (rhs1 != oe1->op || rhs2 != oe2->op)
54aceb26 3483 {
82cb4e1c 3484 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3485 unsigned int uid = gimple_uid (stmt);
3486
54aceb26 3487 if (dump_file && (dump_flags & TDF_DETAILS))
3488 {
3489 fprintf (dump_file, "Transforming ");
75a70cf9 3490 print_gimple_stmt (dump_file, stmt, 0, 0);
54aceb26 3491 }
3492
229fa37d 3493 /* Even when changed is false, reassociation could have e.g. removed
3494 some redundant operations, so unless we are just swapping the
3495 arguments or unless there is no change at all (then we just
3496 return lhs), force creation of a new SSA_NAME. */
3497 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
82cb4e1c 3498 {
3499 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
f9e245b2 3500 lhs = make_ssa_name (TREE_TYPE (lhs));
82cb4e1c 3501 stmt
e9cf809e 3502 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3503 oe1->op, oe2->op);
82cb4e1c 3504 gimple_set_uid (stmt, uid);
3505 gimple_set_visited (stmt, true);
3506 if (insert_point == gsi_stmt (gsi))
3507 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3508 else
3509 insert_stmt_after (stmt, insert_point);
3510 }
3511 else
3512 {
3513 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3514 == stmt);
3515 gimple_assign_set_rhs1 (stmt, oe1->op);
3516 gimple_assign_set_rhs2 (stmt, oe2->op);
3517 update_stmt (stmt);
3518 }
3519
9ce93694 3520 if (rhs1 != oe1->op && rhs1 != oe2->op)
3521 remove_visited_stmt_chain (rhs1);
54aceb26 3522
3523 if (dump_file && (dump_flags & TDF_DETAILS))
3524 {
3525 fprintf (dump_file, " into ");
75a70cf9 3526 print_gimple_stmt (dump_file, stmt, 0, 0);
54aceb26 3527 }
54aceb26 3528 }
82cb4e1c 3529 return lhs;
54aceb26 3530 }
3531
3532 /* If we hit here, we should have 3 or more ops left. */
f1f41a6c 3533 gcc_assert (opindex + 2 < ops.length ());
54aceb26 3534
3535 /* Rewrite the next operator. */
f1f41a6c 3536 oe = ops[opindex];
54aceb26 3537
82cb4e1c 3538 /* Recurse on the LHS of the binary operator, which is guaranteed to
3539 be the non-leaf side. */
3540 tree new_rhs1
3541 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3542 changed || oe->op != rhs2);
54aceb26 3543
82cb4e1c 3544 if (oe->op != rhs2 || new_rhs1 != rhs1)
3545 {
54aceb26 3546 if (dump_file && (dump_flags & TDF_DETAILS))
3547 {
3548 fprintf (dump_file, "Transforming ");
75a70cf9 3549 print_gimple_stmt (dump_file, stmt, 0, 0);
54aceb26 3550 }
3551
82cb4e1c 3552 /* If changed is false, this is either opindex == 0
3553 or all outer rhs2's were equal to corresponding oe->op,
3554 and powi_result is NULL.
3555 That means lhs is equivalent before and after reassociation.
3556 Otherwise ensure the old lhs SSA_NAME is not reused and
3557 create a new stmt as well, so that any debug stmts will be
3558 properly adjusted. */
3559 if (changed)
3560 {
3561 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3562 unsigned int uid = gimple_uid (stmt);
3563 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3564
f9e245b2 3565 lhs = make_ssa_name (TREE_TYPE (lhs));
e9cf809e 3566 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3567 new_rhs1, oe->op);
82cb4e1c 3568 gimple_set_uid (stmt, uid);
3569 gimple_set_visited (stmt, true);
3570 if (insert_point == gsi_stmt (gsi))
3571 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3572 else
3573 insert_stmt_after (stmt, insert_point);
3574 }
3575 else
3576 {
3577 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3578 == stmt);
3579 gimple_assign_set_rhs1 (stmt, new_rhs1);
3580 gimple_assign_set_rhs2 (stmt, oe->op);
3581 update_stmt (stmt);
3582 }
54aceb26 3583
3584 if (dump_file && (dump_flags & TDF_DETAILS))
3585 {
3586 fprintf (dump_file, " into ");
75a70cf9 3587 print_gimple_stmt (dump_file, stmt, 0, 0);
54aceb26 3588 }
3589 }
82cb4e1c 3590 return lhs;
54aceb26 3591}
3592
5b1c765d 3593/* Find out how many cycles we need to compute statements chain.
3594 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3595 maximum number of independent statements we may execute per cycle. */
3596
3597static int
3598get_required_cycles (int ops_num, int cpu_width)
3599{
3600 int res;
3601 int elog;
3602 unsigned int rest;
3603
3604 /* While we have more than 2 * cpu_width operands
3605 we may reduce number of operands by cpu_width
3606 per cycle. */
3607 res = ops_num / (2 * cpu_width);
3608
3609 /* Remained operands count may be reduced twice per cycle
3610 until we have only one operand. */
3611 rest = (unsigned)(ops_num - res * cpu_width);
3612 elog = exact_log2 (rest);
3613 if (elog >= 0)
3614 res += elog;
3615 else
3616 res += floor_log2 (rest) + 1;
3617
3618 return res;
3619}
3620
3621/* Returns an optimal number of registers to use for computation of
3622 given statements. */
3623
3624static int
3625get_reassociation_width (int ops_num, enum tree_code opc,
3754d046 3626 machine_mode mode)
5b1c765d 3627{
3628 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3629 int width;
3630 int width_min;
3631 int cycles_best;
3632
3633 if (param_width > 0)
3634 width = param_width;
3635 else
3636 width = targetm.sched.reassociation_width (opc, mode);
3637
3638 if (width == 1)
3639 return width;
3640
3641 /* Get the minimal time required for sequence computation. */
3642 cycles_best = get_required_cycles (ops_num, width);
3643
3644 /* Check if we may use less width and still compute sequence for
3645 the same time. It will allow us to reduce registers usage.
3646 get_required_cycles is monotonically increasing with lower width
3647 so we can perform a binary search for the minimal width that still
3648 results in the optimal cycle count. */
3649 width_min = 1;
3650 while (width > width_min)
3651 {
3652 int width_mid = (width + width_min) / 2;
3653
3654 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3655 width = width_mid;
3656 else if (width_min < width_mid)
3657 width_min = width_mid;
3658 else
3659 break;
3660 }
3661
3662 return width;
3663}
3664
3665/* Recursively rewrite our linearized statements so that the operators
3666 match those in OPS[OPINDEX], putting the computation in rank
3667 order and trying to allow operations to be executed in
3668 parallel. */
3669
3670static void
1a91d914 3671rewrite_expr_tree_parallel (gassign *stmt, int width,
82cb4e1c 3672 vec<operand_entry_t> ops)
5b1c765d 3673{
3674 enum tree_code opcode = gimple_assign_rhs_code (stmt);
f1f41a6c 3675 int op_num = ops.length ();
5b1c765d 3676 int stmt_num = op_num - 1;
3677 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3678 int op_index = op_num - 1;
3679 int stmt_index = 0;
3680 int ready_stmts_end = 0;
3681 int i = 0;
3682 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5b1c765d 3683
3684 /* We start expression rewriting from the top statements.
3685 So, in this loop we create a full list of statements
3686 we will work with. */
3687 stmts[stmt_num - 1] = stmt;
3688 for (i = stmt_num - 2; i >= 0; i--)
3689 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3690
5b1c765d 3691 for (i = 0; i < stmt_num; i++)
3692 {
3693 tree op1, op2;
3694
3695 /* Determine whether we should use results of
3696 already handled statements or not. */
3697 if (ready_stmts_end == 0
3698 && (i - stmt_index >= width || op_index < 1))
3699 ready_stmts_end = i;
3700
3701 /* Now we choose operands for the next statement. Non zero
3702 value in ready_stmts_end means here that we should use
3703 the result of already generated statements as new operand. */
3704 if (ready_stmts_end > 0)
3705 {
3706 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3707 if (ready_stmts_end > stmt_index)
3708 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3709 else if (op_index >= 0)
f1f41a6c 3710 op2 = ops[op_index--]->op;
5b1c765d 3711 else
3712 {
3713 gcc_assert (stmt_index < i);
3714 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3715 }
3716
3717 if (stmt_index >= ready_stmts_end)
3718 ready_stmts_end = 0;
3719 }
3720 else
3721 {
3722 if (op_index > 1)
3723 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
f1f41a6c 3724 op2 = ops[op_index--]->op;
3725 op1 = ops[op_index--]->op;
5b1c765d 3726 }
3727
3728 /* If we emit the last statement then we should put
3729 operands into the last statement. It will also
3730 break the loop. */
3731 if (op_index < 0 && stmt_index == i)
3732 i = stmt_num - 1;
3733
3734 if (dump_file && (dump_flags & TDF_DETAILS))
3735 {
3736 fprintf (dump_file, "Transforming ");
3737 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3738 }
3739
3740 /* We keep original statement only for the last one. All
3741 others are recreated. */
3742 if (i == stmt_num - 1)
3743 {
3744 gimple_assign_set_rhs1 (stmts[i], op1);
3745 gimple_assign_set_rhs2 (stmts[i], op2);
3746 update_stmt (stmts[i]);
3747 }
3748 else
03d37e4e 3749 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5b1c765d 3750
3751 if (dump_file && (dump_flags & TDF_DETAILS))
3752 {
3753 fprintf (dump_file, " into ");
3754 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3755 }
3756 }
3757
3758 remove_visited_stmt_chain (last_rhs1);
3759}
3760
54aceb26 3761/* Transform STMT, which is really (A +B) + (C + D) into the left
3762 linear form, ((A+B)+C)+D.
3763 Recurse on D if necessary. */
3764
3765static void
75a70cf9 3766linearize_expr (gimple stmt)
54aceb26 3767{
82cb4e1c 3768 gimple_stmt_iterator gsi;
75a70cf9 3769 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3770 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
82cb4e1c 3771 gimple oldbinrhs = binrhs;
75a70cf9 3772 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3773 gimple newbinrhs = NULL;
a4c3fb95 3774 struct loop *loop = loop_containing_stmt (stmt);
82cb4e1c 3775 tree lhs = gimple_assign_lhs (stmt);
54aceb26 3776
75a70cf9 3777 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3778 && is_reassociable_op (binrhs, rhscode, loop));
3779
82cb4e1c 3780 gsi = gsi_for_stmt (stmt);
54aceb26 3781
75a70cf9 3782 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
e9cf809e 3783 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
3784 gimple_assign_rhs_code (binrhs),
3785 gimple_assign_lhs (binlhs),
3786 gimple_assign_rhs2 (binrhs));
75a70cf9 3787 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
82cb4e1c 3788 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3789 gimple_set_uid (binrhs, gimple_uid (stmt));
54aceb26 3790
75a70cf9 3791 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3792 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
54aceb26 3793
3794 if (dump_file && (dump_flags & TDF_DETAILS))
3795 {
3796 fprintf (dump_file, "Linearized: ");
75a70cf9 3797 print_gimple_stmt (dump_file, stmt, 0, 0);
54aceb26 3798 }
3799
3800 reassociate_stats.linearized++;
54aceb26 3801 update_stmt (stmt);
75a70cf9 3802
82cb4e1c 3803 gsi = gsi_for_stmt (oldbinrhs);
54675e05 3804 reassoc_remove_stmt (&gsi);
82cb4e1c 3805 release_defs (oldbinrhs);
3806
75a70cf9 3807 gimple_set_visited (stmt, true);
3808 gimple_set_visited (binlhs, true);
3809 gimple_set_visited (binrhs, true);
54aceb26 3810
3811 /* Tail recurse on the new rhs if it still needs reassociation. */
a4c3fb95 3812 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
75a70cf9 3813 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3814 want to change the algorithm while converting to tuples. */
54aceb26 3815 linearize_expr (stmt);
54aceb26 3816}
3817
75a70cf9 3818/* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
54aceb26 3819 it. Otherwise, return NULL. */
3820
75a70cf9 3821static gimple
54aceb26 3822get_single_immediate_use (tree lhs)
3823{
3824 use_operand_p immuse;
75a70cf9 3825 gimple immusestmt;
54aceb26 3826
3827 if (TREE_CODE (lhs) == SSA_NAME
75a70cf9 3828 && single_imm_use (lhs, &immuse, &immusestmt)
3829 && is_gimple_assign (immusestmt))
3830 return immusestmt;
3831
3832 return NULL;
54aceb26 3833}
54aceb26 3834
54aceb26 3835/* Recursively negate the value of TONEGATE, and return the SSA_NAME
3836 representing the negated value. Insertions of any necessary
75a70cf9 3837 instructions go before GSI.
54aceb26 3838 This function is recursive in that, if you hand it "a_5" as the
3839 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3840 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3841
3842static tree
82cb4e1c 3843negate_value (tree tonegate, gimple_stmt_iterator *gsip)
54aceb26 3844{
82cb4e1c 3845 gimple negatedefstmt = NULL;
54aceb26 3846 tree resultofnegate;
82cb4e1c 3847 gimple_stmt_iterator gsi;
3848 unsigned int uid;
54aceb26 3849
54aceb26 3850 /* If we are trying to negate a name, defined by an add, negate the
3851 add operands instead. */
75a70cf9 3852 if (TREE_CODE (tonegate) == SSA_NAME)
3853 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
54aceb26 3854 if (TREE_CODE (tonegate) == SSA_NAME
75a70cf9 3855 && is_gimple_assign (negatedefstmt)
3856 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3857 && has_single_use (gimple_assign_lhs (negatedefstmt))
3858 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
54aceb26 3859 {
75a70cf9 3860 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3861 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
82cb4e1c 3862 tree lhs = gimple_assign_lhs (negatedefstmt);
3863 gimple g;
75a70cf9 3864
3865 gsi = gsi_for_stmt (negatedefstmt);
3866 rhs1 = negate_value (rhs1, &gsi);
75a70cf9 3867
3868 gsi = gsi_for_stmt (negatedefstmt);
3869 rhs2 = negate_value (rhs2, &gsi);
75a70cf9 3870
82cb4e1c 3871 gsi = gsi_for_stmt (negatedefstmt);
f9e245b2 3872 lhs = make_ssa_name (TREE_TYPE (lhs));
82cb4e1c 3873 gimple_set_visited (negatedefstmt, true);
e9cf809e 3874 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
82cb4e1c 3875 gimple_set_uid (g, gimple_uid (negatedefstmt));
3876 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3877 return lhs;
54aceb26 3878 }
3879
3880 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
82cb4e1c 3881 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
75a70cf9 3882 NULL_TREE, true, GSI_SAME_STMT);
82cb4e1c 3883 gsi = *gsip;
3884 uid = gimple_uid (gsi_stmt (gsi));
3885 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3886 {
3887 gimple stmt = gsi_stmt (gsi);
3888 if (gimple_uid (stmt) != 0)
3889 break;
3890 gimple_set_uid (stmt, uid);
3891 }
54aceb26 3892 return resultofnegate;
54aceb26 3893}
3894
3895/* Return true if we should break up the subtract in STMT into an add
3896 with negate. This is true when we the subtract operands are really
3897 adds, or the subtract itself is used in an add expression. In
3898 either case, breaking up the subtract into an add with negate
3899 exposes the adds to reassociation. */
3900
3901static bool
75a70cf9 3902should_break_up_subtract (gimple stmt)
54aceb26 3903{
75a70cf9 3904 tree lhs = gimple_assign_lhs (stmt);
3905 tree binlhs = gimple_assign_rhs1 (stmt);
3906 tree binrhs = gimple_assign_rhs2 (stmt);
3907 gimple immusestmt;
a4c3fb95 3908 struct loop *loop = loop_containing_stmt (stmt);
54aceb26 3909
3910 if (TREE_CODE (binlhs) == SSA_NAME
a4c3fb95 3911 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
54aceb26 3912 return true;
3913
3914 if (TREE_CODE (binrhs) == SSA_NAME
a4c3fb95 3915 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
54aceb26 3916 return true;
3917
3918 if (TREE_CODE (lhs) == SSA_NAME
3919 && (immusestmt = get_single_immediate_use (lhs))
75a70cf9 3920 && is_gimple_assign (immusestmt)
dddf5036 3921 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3922 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
54aceb26 3923 return true;
3924 return false;
54aceb26 3925}
3926
3927/* Transform STMT from A - B into A + -B. */
3928
3929static void
75a70cf9 3930break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
54aceb26 3931{
75a70cf9 3932 tree rhs1 = gimple_assign_rhs1 (stmt);
3933 tree rhs2 = gimple_assign_rhs2 (stmt);
54aceb26 3934
3935 if (dump_file && (dump_flags & TDF_DETAILS))
3936 {
3937 fprintf (dump_file, "Breaking up subtract ");
75a70cf9 3938 print_gimple_stmt (dump_file, stmt, 0, 0);
54aceb26 3939 }
3940
75a70cf9 3941 rhs2 = negate_value (rhs2, gsip);
3942 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
54aceb26 3943 update_stmt (stmt);
3944}
3945
8c5ac7f6 3946/* Determine whether STMT is a builtin call that raises an SSA name
3947 to an integer power and has only one use. If so, and this is early
3948 reassociation and unsafe math optimizations are permitted, place
3949 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3950 If any of these conditions does not hold, return FALSE. */
3951
3952static bool
3953acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3954{
3955 tree fndecl, arg1;
3956 REAL_VALUE_TYPE c, cint;
3957
3958 if (!first_pass_instance
3959 || !flag_unsafe_math_optimizations
3960 || !is_gimple_call (stmt)
3961 || !has_single_use (gimple_call_lhs (stmt)))
3962 return false;
3963
3964 fndecl = gimple_call_fndecl (stmt);
3965
3966 if (!fndecl
3967 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3968 return false;
3969
3970 switch (DECL_FUNCTION_CODE (fndecl))
3971 {
3972 CASE_FLT_FN (BUILT_IN_POW):
54faaad5 3973 if (flag_errno_math)
3974 return false;
3975
8c5ac7f6 3976 *base = gimple_call_arg (stmt, 0);
3977 arg1 = gimple_call_arg (stmt, 1);
3978
3979 if (TREE_CODE (arg1) != REAL_CST)
3980 return false;
3981
3982 c = TREE_REAL_CST (arg1);
3983
3984 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3985 return false;
3986
3987 *exponent = real_to_integer (&c);
e913b5cd 3988 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
8c5ac7f6 3989 if (!real_identical (&c, &cint))
3990 return false;
3991
3992 break;
3993
3994 CASE_FLT_FN (BUILT_IN_POWI):
3995 *base = gimple_call_arg (stmt, 0);
3996 arg1 = gimple_call_arg (stmt, 1);
3997
e913b5cd 3998 if (!tree_fits_shwi_p (arg1))
8c5ac7f6 3999 return false;
4000
e913b5cd 4001 *exponent = tree_to_shwi (arg1);
8c5ac7f6 4002 break;
4003
4004 default:
4005 return false;
4006 }
4007
4008 /* Expanding negative exponents is generally unproductive, so we don't
4009 complicate matters with those. Exponents of zero and one should
4010 have been handled by expression folding. */
4011 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4012 return false;
4013
4014 return true;
4015}
4016
54aceb26 4017/* Recursively linearize a binary expression that is the RHS of STMT.
4018 Place the operands of the expression tree in the vector named OPS. */
4019
4020static void
f1f41a6c 4021linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
dddf5036 4022 bool is_associative, bool set_visited)
54aceb26 4023{
75a70cf9 4024 tree binlhs = gimple_assign_rhs1 (stmt);
4025 tree binrhs = gimple_assign_rhs2 (stmt);
8c5ac7f6 4026 gimple binlhsdef = NULL, binrhsdef = NULL;
54aceb26 4027 bool binlhsisreassoc = false;
4028 bool binrhsisreassoc = false;
75a70cf9 4029 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
a4c3fb95 4030 struct loop *loop = loop_containing_stmt (stmt);
8c5ac7f6 4031 tree base = NULL_TREE;
4032 HOST_WIDE_INT exponent = 0;
54aceb26 4033
dddf5036 4034 if (set_visited)
4035 gimple_set_visited (stmt, true);
54aceb26 4036
4037 if (TREE_CODE (binlhs) == SSA_NAME)
4038 {
4039 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
df0675b8 4040 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4041 && !stmt_could_throw_p (binlhsdef));
54aceb26 4042 }
4043
4044 if (TREE_CODE (binrhs) == SSA_NAME)
4045 {
4046 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
df0675b8 4047 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4048 && !stmt_could_throw_p (binrhsdef));
54aceb26 4049 }
4050
4051 /* If the LHS is not reassociable, but the RHS is, we need to swap
4052 them. If neither is reassociable, there is nothing we can do, so
4053 just put them in the ops vector. If the LHS is reassociable,
4054 linearize it. If both are reassociable, then linearize the RHS
4055 and the LHS. */
4056
4057 if (!binlhsisreassoc)
4058 {
dddf5036 4059 /* If this is not a associative operation like division, give up. */
4060 if (!is_associative)
4061 {
4062 add_to_ops_vec (ops, binrhs);
4063 return;
4064 }
4065
54aceb26 4066 if (!binrhsisreassoc)
4067 {
8c5ac7f6 4068 if (rhscode == MULT_EXPR
4069 && TREE_CODE (binrhs) == SSA_NAME
4070 && acceptable_pow_call (binrhsdef, &base, &exponent))
4071 {
4072 add_repeat_to_ops_vec (ops, base, exponent);
4073 gimple_set_visited (binrhsdef, true);
4074 }
4075 else
4076 add_to_ops_vec (ops, binrhs);
4077
4078 if (rhscode == MULT_EXPR
4079 && TREE_CODE (binlhs) == SSA_NAME
4080 && acceptable_pow_call (binlhsdef, &base, &exponent))
4081 {
4082 add_repeat_to_ops_vec (ops, base, exponent);
4083 gimple_set_visited (binlhsdef, true);
4084 }
4085 else
4086 add_to_ops_vec (ops, binlhs);
4087
54aceb26 4088 return;
4089 }
4090
4091 if (dump_file && (dump_flags & TDF_DETAILS))
4092 {
4093 fprintf (dump_file, "swapping operands of ");
75a70cf9 4094 print_gimple_stmt (dump_file, stmt, 0, 0);
54aceb26 4095 }
4096
8f6fa493 4097 swap_ssa_operands (stmt,
4098 gimple_assign_rhs1_ptr (stmt),
4099 gimple_assign_rhs2_ptr (stmt));
54aceb26 4100 update_stmt (stmt);
4101
4102 if (dump_file && (dump_flags & TDF_DETAILS))
4103 {
4104 fprintf (dump_file, " is now ");
75a70cf9 4105 print_gimple_stmt (dump_file, stmt, 0, 0);
54aceb26 4106 }
4107
4108 /* We want to make it so the lhs is always the reassociative op,
4109 so swap. */
dfcf26a5 4110 std::swap (binlhs, binrhs);
54aceb26 4111 }
4112 else if (binrhsisreassoc)
4113 {
4114 linearize_expr (stmt);
75a70cf9 4115 binlhs = gimple_assign_rhs1 (stmt);
4116 binrhs = gimple_assign_rhs2 (stmt);
54aceb26 4117 }
4118
4119 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
a4c3fb95 4120 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4121 rhscode, loop));
dddf5036 4122 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4123 is_associative, set_visited);
8c5ac7f6 4124
4125 if (rhscode == MULT_EXPR
4126 && TREE_CODE (binrhs) == SSA_NAME
4127 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4128 {
4129 add_repeat_to_ops_vec (ops, base, exponent);
4130 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4131 }
4132 else
4133 add_to_ops_vec (ops, binrhs);
54aceb26 4134}
4135
4136/* Repropagate the negates back into subtracts, since no other pass
4137 currently does it. */
4138
4139static void
4140repropagate_negates (void)
4141{
4142 unsigned int i = 0;
4143 tree negate;
4144
f1f41a6c 4145 FOR_EACH_VEC_ELT (plus_negates, i, negate)
54aceb26 4146 {
75a70cf9 4147 gimple user = get_single_immediate_use (negate);
54aceb26 4148
c07e5b8b 4149 if (!user || !is_gimple_assign (user))
4150 continue;
4151
54aceb26 4152 /* The negate operand can be either operand of a PLUS_EXPR
4153 (it can be the LHS if the RHS is a constant for example).
4154
4155 Force the negate operand to the RHS of the PLUS_EXPR, then
4156 transform the PLUS_EXPR into a MINUS_EXPR. */
c07e5b8b 4157 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
54aceb26 4158 {
54aceb26 4159 /* If the negated operand appears on the LHS of the
4160 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4161 to force the negated operand to the RHS of the PLUS_EXPR. */
75a70cf9 4162 if (gimple_assign_rhs1 (user) == negate)
54aceb26 4163 {
8f6fa493 4164 swap_ssa_operands (user,
4165 gimple_assign_rhs1_ptr (user),
4166 gimple_assign_rhs2_ptr (user));
54aceb26 4167 }
4168
4169 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4170 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
75a70cf9 4171 if (gimple_assign_rhs2 (user) == negate)
54aceb26 4172 {
75a70cf9 4173 tree rhs1 = gimple_assign_rhs1 (user);
4174 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
4175 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4176 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
54aceb26 4177 update_stmt (user);
4178 }
4179 }
c07e5b8b 4180 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4181 {
4182 if (gimple_assign_rhs1 (user) == negate)
4183 {
4184 /* We have
4185 x = -a
4186 y = x - b
4187 which we transform into
4188 x = a + b
4189 y = -x .
4190 This pushes down the negate which we possibly can merge
4191 into some other operation, hence insert it into the
4192 plus_negates vector. */
4193 gimple feed = SSA_NAME_DEF_STMT (negate);
4194 tree a = gimple_assign_rhs1 (feed);
82cb4e1c 4195 tree b = gimple_assign_rhs2 (user);
4196 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4197 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
f9e245b2 4198 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
e9cf809e 4199 gimple g = gimple_build_assign (x, PLUS_EXPR, a, b);
82cb4e1c 4200 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
806413d2 4201 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
82cb4e1c 4202 user = gsi_stmt (gsi2);
4203 update_stmt (user);
54675e05 4204 reassoc_remove_stmt (&gsi);
82cb4e1c 4205 release_defs (feed);
4206 plus_negates.safe_push (gimple_assign_lhs (user));
c07e5b8b 4207 }
4208 else
4209 {
4210 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4211 rid of one operation. */
4212 gimple feed = SSA_NAME_DEF_STMT (negate);
4213 tree a = gimple_assign_rhs1 (feed);
4214 tree rhs1 = gimple_assign_rhs1 (user);
4215 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4216 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4217 update_stmt (gsi_stmt (gsi));
4218 }
4219 }
54aceb26 4220 }
4221}
4222
c07e5b8b 4223/* Returns true if OP is of a type for which we can do reassociation.
4224 That is for integral or non-saturating fixed-point types, and for
4225 floating point type when associative-math is enabled. */
4226
4227static bool
4228can_reassociate_p (tree op)
4229{
4230 tree type = TREE_TYPE (op);
ca3c9092 4231 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
c07e5b8b 4232 || NON_SAT_FIXED_POINT_TYPE_P (type)
f4a50267 4233 || (flag_associative_math && FLOAT_TYPE_P (type)))
c07e5b8b 4234 return true;
4235 return false;
4236}
4237
54aceb26 4238/* Break up subtract operations in block BB.
4239
4240 We do this top down because we don't know whether the subtract is
4241 part of a possible chain of reassociation except at the top.
48e1416a 4242
54aceb26 4243 IE given
4244 d = f + g
4245 c = a + e
4246 b = c - d
4247 q = b - r
4248 k = t - q
48e1416a 4249
54aceb26 4250 we want to break up k = t - q, but we won't until we've transformed q
75a70cf9 4251 = b - r, which won't be broken up until we transform b = c - d.
4252
82cb4e1c 4253 En passant, clear the GIMPLE visited flag on every statement
4254 and set UIDs within each basic block. */
54aceb26 4255
4256static void
4257break_up_subtract_bb (basic_block bb)
4258{
75a70cf9 4259 gimple_stmt_iterator gsi;
54aceb26 4260 basic_block son;
82cb4e1c 4261 unsigned int uid = 1;
54aceb26 4262
75a70cf9 4263 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
54aceb26 4264 {
75a70cf9 4265 gimple stmt = gsi_stmt (gsi);
4266 gimple_set_visited (stmt, false);
82cb4e1c 4267 gimple_set_uid (stmt, uid++);
54aceb26 4268
c07e5b8b 4269 if (!is_gimple_assign (stmt)
4270 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4271 continue;
4272
75a70cf9 4273 /* Look for simple gimple subtract operations. */
c07e5b8b 4274 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
54aceb26 4275 {
c07e5b8b 4276 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4277 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
54aceb26 4278 continue;
4279
4280 /* Check for a subtract used only in an addition. If this
4281 is the case, transform it into add of a negate for better
4282 reassociation. IE transform C = A-B into C = A + -B if C
4283 is only used in an addition. */
75a70cf9 4284 if (should_break_up_subtract (stmt))
4285 break_up_subtract (stmt, &gsi);
54aceb26 4286 }
c07e5b8b 4287 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4288 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
f1f41a6c 4289 plus_negates.safe_push (gimple_assign_lhs (stmt));
54aceb26 4290 }
4291 for (son = first_dom_son (CDI_DOMINATORS, bb);
4292 son;
4293 son = next_dom_son (CDI_DOMINATORS, son))
4294 break_up_subtract_bb (son);
4295}
4296
8c5ac7f6 4297/* Used for repeated factor analysis. */
4298struct repeat_factor_d
4299{
4300 /* An SSA name that occurs in a multiply chain. */
4301 tree factor;
4302
4303 /* Cached rank of the factor. */
4304 unsigned rank;
4305
4306 /* Number of occurrences of the factor in the chain. */
4307 HOST_WIDE_INT count;
4308
4309 /* An SSA name representing the product of this factor and
4310 all factors appearing later in the repeated factor vector. */
4311 tree repr;
4312};
4313
4314typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4315typedef const struct repeat_factor_d *const_repeat_factor_t;
4316
8c5ac7f6 4317
f1f41a6c 4318static vec<repeat_factor> repeat_factor_vec;
8c5ac7f6 4319
4320/* Used for sorting the repeat factor vector. Sort primarily by
4321 ascending occurrence count, secondarily by descending rank. */
4322
4323static int
4324compare_repeat_factors (const void *x1, const void *x2)
4325{
4326 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4327 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4328
4329 if (rf1->count != rf2->count)
4330 return rf1->count - rf2->count;
4331
4332 return rf2->rank - rf1->rank;
4333}
4334
8c5ac7f6 4335/* Look for repeated operands in OPS in the multiply tree rooted at
4336 STMT. Replace them with an optimal sequence of multiplies and powi
97269507 4337 builtin calls, and remove the used operands from OPS. Return an
4338 SSA name representing the value of the replacement sequence. */
8c5ac7f6 4339
97269507 4340static tree
f1f41a6c 4341attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
8c5ac7f6 4342{
4343 unsigned i, j, vec_len;
4344 int ii;
4345 operand_entry_t oe;
4346 repeat_factor_t rf1, rf2;
4347 repeat_factor rfnew;
97269507 4348 tree result = NULL_TREE;
8c5ac7f6 4349 tree target_ssa, iter_result;
4350 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4351 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4352 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4353 gimple mul_stmt, pow_stmt;
4354
4355 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4356 target. */
4357 if (!powi_fndecl)
97269507 4358 return NULL_TREE;
8c5ac7f6 4359
4360 /* Allocate the repeated factor vector. */
f1f41a6c 4361 repeat_factor_vec.create (10);
8c5ac7f6 4362
4363 /* Scan the OPS vector for all SSA names in the product and build
4364 up a vector of occurrence counts for each factor. */
f1f41a6c 4365 FOR_EACH_VEC_ELT (*ops, i, oe)
8c5ac7f6 4366 {
4367 if (TREE_CODE (oe->op) == SSA_NAME)
4368 {
f1f41a6c 4369 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
8c5ac7f6 4370 {
4371 if (rf1->factor == oe->op)
4372 {
4373 rf1->count += oe->count;
4374 break;
4375 }
4376 }
4377
f1f41a6c 4378 if (j >= repeat_factor_vec.length ())
8c5ac7f6 4379 {
4380 rfnew.factor = oe->op;
4381 rfnew.rank = oe->rank;
4382 rfnew.count = oe->count;
4383 rfnew.repr = NULL_TREE;
f1f41a6c 4384 repeat_factor_vec.safe_push (rfnew);
8c5ac7f6 4385 }
4386 }
4387 }
4388
4389 /* Sort the repeated factor vector by (a) increasing occurrence count,
4390 and (b) decreasing rank. */
f1f41a6c 4391 repeat_factor_vec.qsort (compare_repeat_factors);
8c5ac7f6 4392
4393 /* It is generally best to combine as many base factors as possible
4394 into a product before applying __builtin_powi to the result.
4395 However, the sort order chosen for the repeated factor vector
4396 allows us to cache partial results for the product of the base
4397 factors for subsequent use. When we already have a cached partial
4398 result from a previous iteration, it is best to make use of it
4399 before looking for another __builtin_pow opportunity.
4400
4401 As an example, consider x * x * y * y * y * z * z * z * z.
4402 We want to first compose the product x * y * z, raise it to the
4403 second power, then multiply this by y * z, and finally multiply
4404 by z. This can be done in 5 multiplies provided we cache y * z
4405 for use in both expressions:
4406
4407 t1 = y * z
4408 t2 = t1 * x
4409 t3 = t2 * t2
4410 t4 = t1 * t3
4411 result = t4 * z
4412
4413 If we instead ignored the cached y * z and first multiplied by
4414 the __builtin_pow opportunity z * z, we would get the inferior:
4415
4416 t1 = y * z
4417 t2 = t1 * x
4418 t3 = t2 * t2
4419 t4 = z * z
4420 t5 = t3 * t4
4421 result = t5 * y */
4422
f1f41a6c 4423 vec_len = repeat_factor_vec.length ();
8c5ac7f6 4424
4425 /* Repeatedly look for opportunities to create a builtin_powi call. */
4426 while (true)
4427 {
4428 HOST_WIDE_INT power;
4429
4430 /* First look for the largest cached product of factors from
4431 preceding iterations. If found, create a builtin_powi for
4432 it if the minimum occurrence count for its factors is at
4433 least 2, or just use this cached product as our next
4434 multiplicand if the minimum occurrence count is 1. */
f1f41a6c 4435 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
8c5ac7f6 4436 {
4437 if (rf1->repr && rf1->count > 0)
4438 break;
4439 }
4440
4441 if (j < vec_len)
4442 {
4443 power = rf1->count;
4444
4445 if (power == 1)
4446 {
4447 iter_result = rf1->repr;
4448
4449 if (dump_file && (dump_flags & TDF_DETAILS))
4450 {
4451 unsigned elt;
4452 repeat_factor_t rf;
4453 fputs ("Multiplying by cached product ", dump_file);
4454 for (elt = j; elt < vec_len; elt++)
4455 {
f1f41a6c 4456 rf = &repeat_factor_vec[elt];
8c5ac7f6 4457 print_generic_expr (dump_file, rf->factor, 0);
4458 if (elt < vec_len - 1)
4459 fputs (" * ", dump_file);
4460 }
4461 fputs ("\n", dump_file);
4462 }
4463 }
4464 else
4465 {
03d37e4e 4466 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
8c5ac7f6 4467 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4468 build_int_cst (integer_type_node,
4469 power));
4470 gimple_call_set_lhs (pow_stmt, iter_result);
4471 gimple_set_location (pow_stmt, gimple_location (stmt));
4472 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4473
4474 if (dump_file && (dump_flags & TDF_DETAILS))
4475 {
4476 unsigned elt;
4477 repeat_factor_t rf;
4478 fputs ("Building __builtin_pow call for cached product (",
4479 dump_file);
4480 for (elt = j; elt < vec_len; elt++)
4481 {
f1f41a6c 4482 rf = &repeat_factor_vec[elt];
8c5ac7f6 4483 print_generic_expr (dump_file, rf->factor, 0);
4484 if (elt < vec_len - 1)
4485 fputs (" * ", dump_file);
4486 }
f03df321 4487 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
8ef3b7cb 4488 power);
8c5ac7f6 4489 }
4490 }
4491 }
4492 else
4493 {
4494 /* Otherwise, find the first factor in the repeated factor
4495 vector whose occurrence count is at least 2. If no such
4496 factor exists, there are no builtin_powi opportunities
4497 remaining. */
f1f41a6c 4498 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
8c5ac7f6 4499 {
4500 if (rf1->count >= 2)
4501 break;
4502 }
4503
4504 if (j >= vec_len)
4505 break;
4506
4507 power = rf1->count;
4508
4509 if (dump_file && (dump_flags & TDF_DETAILS))
4510 {
4511 unsigned elt;
4512 repeat_factor_t rf;
4513 fputs ("Building __builtin_pow call for (", dump_file);
4514 for (elt = j; elt < vec_len; elt++)
4515 {
f1f41a6c 4516 rf = &repeat_factor_vec[elt];
8c5ac7f6 4517 print_generic_expr (dump_file, rf->factor, 0);
4518 if (elt < vec_len - 1)
4519 fputs (" * ", dump_file);
4520 }
f03df321 4521 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
8c5ac7f6 4522 }
4523
4524 reassociate_stats.pows_created++;
4525
4526 /* Visit each element of the vector in reverse order (so that
4527 high-occurrence elements are visited first, and within the
4528 same occurrence count, lower-ranked elements are visited
4529 first). Form a linear product of all elements in this order
4530 whose occurrencce count is at least that of element J.
4531 Record the SSA name representing the product of each element
4532 with all subsequent elements in the vector. */
4533 if (j == vec_len - 1)
4534 rf1->repr = rf1->factor;
4535 else
4536 {
4537 for (ii = vec_len - 2; ii >= (int)j; ii--)
4538 {
4539 tree op1, op2;
4540
f1f41a6c 4541 rf1 = &repeat_factor_vec[ii];
4542 rf2 = &repeat_factor_vec[ii + 1];
8c5ac7f6 4543
4544 /* Init the last factor's representative to be itself. */
4545 if (!rf2->repr)
4546 rf2->repr = rf2->factor;
4547
4548 op1 = rf1->factor;
4549 op2 = rf2->repr;
4550
03d37e4e 4551 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
e9cf809e 4552 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4553 op1, op2);
8c5ac7f6 4554 gimple_set_location (mul_stmt, gimple_location (stmt));
4555 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4556 rf1->repr = target_ssa;
4557
4558 /* Don't reprocess the multiply we just introduced. */
4559 gimple_set_visited (mul_stmt, true);
4560 }
4561 }
4562
4563 /* Form a call to __builtin_powi for the maximum product
4564 just formed, raised to the power obtained earlier. */
f1f41a6c 4565 rf1 = &repeat_factor_vec[j];
03d37e4e 4566 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
8c5ac7f6 4567 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4568 build_int_cst (integer_type_node,
4569 power));
4570 gimple_call_set_lhs (pow_stmt, iter_result);
4571 gimple_set_location (pow_stmt, gimple_location (stmt));
4572 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4573 }
4574
97269507 4575 /* If we previously formed at least one other builtin_powi call,
4576 form the product of this one and those others. */
4577 if (result)
4578 {
03d37e4e 4579 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
e9cf809e 4580 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4581 result, iter_result);
97269507 4582 gimple_set_location (mul_stmt, gimple_location (stmt));
4583 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4584 gimple_set_visited (mul_stmt, true);
4585 result = new_result;
4586 }
4587 else
4588 result = iter_result;
8c5ac7f6 4589
4590 /* Decrement the occurrence count of each element in the product
4591 by the count found above, and remove this many copies of each
4592 factor from OPS. */
4593 for (i = j; i < vec_len; i++)
4594 {
4595 unsigned k = power;
4596 unsigned n;
4597
f1f41a6c 4598 rf1 = &repeat_factor_vec[i];
8c5ac7f6 4599 rf1->count -= power;
4600
f1f41a6c 4601 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
8c5ac7f6 4602 {
4603 if (oe->op == rf1->factor)
4604 {
4605 if (oe->count <= k)
4606 {
f1f41a6c 4607 ops->ordered_remove (n);
8c5ac7f6 4608 k -= oe->count;
4609
4610 if (k == 0)
4611 break;
4612 }
4613 else
4614 {
4615 oe->count -= k;
4616 break;
4617 }
4618 }
4619 }
4620 }
4621 }
4622
4623 /* At this point all elements in the repeated factor vector have a
4624 remaining occurrence count of 0 or 1, and those with a count of 1
4625 don't have cached representatives. Re-sort the ops vector and
4626 clean up. */
f1f41a6c 4627 ops->qsort (sort_by_operand_rank);
4628 repeat_factor_vec.release ();
97269507 4629
4630 /* Return the final product computed herein. Note that there may
4631 still be some elements with single occurrence count left in OPS;
4632 those will be handled by the normal reassociation logic. */
4633 return result;
4634}
4635
4636/* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4637
4638static void
4639transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4640{
4641 tree rhs1;
4642
4643 if (dump_file && (dump_flags & TDF_DETAILS))
4644 {
4645 fprintf (dump_file, "Transforming ");
4646 print_gimple_stmt (dump_file, stmt, 0, 0);
4647 }
4648
4649 rhs1 = gimple_assign_rhs1 (stmt);
4650 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4651 update_stmt (stmt);
4652 remove_visited_stmt_chain (rhs1);
4653
4654 if (dump_file && (dump_flags & TDF_DETAILS))
4655 {
4656 fprintf (dump_file, " into ");
4657 print_gimple_stmt (dump_file, stmt, 0, 0);
4658 }
4659}
4660
4661/* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4662
4663static void
4664transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4665 tree rhs1, tree rhs2)
4666{
4667 if (dump_file && (dump_flags & TDF_DETAILS))
4668 {
4669 fprintf (dump_file, "Transforming ");
4670 print_gimple_stmt (dump_file, stmt, 0, 0);
4671 }
4672
61e371b0 4673 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
97269507 4674 update_stmt (gsi_stmt (*gsi));
4675 remove_visited_stmt_chain (rhs1);
4676
4677 if (dump_file && (dump_flags & TDF_DETAILS))
4678 {
4679 fprintf (dump_file, " into ");
4680 print_gimple_stmt (dump_file, stmt, 0, 0);
4681 }
8c5ac7f6 4682}
4683
54aceb26 4684/* Reassociate expressions in basic block BB and its post-dominator as
4685 children. */
4686
4687static void
4688reassociate_bb (basic_block bb)
4689{
75a70cf9 4690 gimple_stmt_iterator gsi;
54aceb26 4691 basic_block son;
8a2c7744 4692 gimple stmt = last_stmt (bb);
4693
4694 if (stmt && !gimple_visited_p (stmt))
4695 maybe_optimize_range_tests (stmt);
54aceb26 4696
75a70cf9 4697 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
54aceb26 4698 {
8a2c7744 4699 stmt = gsi_stmt (gsi);
54aceb26 4700
df0675b8 4701 if (is_gimple_assign (stmt)
4702 && !stmt_could_throw_p (stmt))
54aceb26 4703 {
75a70cf9 4704 tree lhs, rhs1, rhs2;
4705 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
54aceb26 4706
75a70cf9 4707 /* If this is not a gimple binary expression, there is
4708 nothing for us to do with it. */
4709 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
54aceb26 4710 continue;
4711
75a70cf9 4712 /* If this was part of an already processed statement,
4713 we don't need to touch it again. */
4714 if (gimple_visited_p (stmt))
dddf5036 4715 {
4716 /* This statement might have become dead because of previous
4717 reassociations. */
4718 if (has_zero_uses (gimple_get_lhs (stmt)))
4719 {
54675e05 4720 reassoc_remove_stmt (&gsi);
dddf5036 4721 release_defs (stmt);
fb715874 4722 /* We might end up removing the last stmt above which
4723 places the iterator to the end of the sequence.
4724 Reset it to the last stmt in this case which might
4725 be the end of the sequence as well if we removed
4726 the last statement of the sequence. In which case
4727 we need to bail out. */
4728 if (gsi_end_p (gsi))
4729 {
4730 gsi = gsi_last_bb (bb);
4731 if (gsi_end_p (gsi))
4732 break;
4733 }
dddf5036 4734 }
4735 continue;
4736 }
75a70cf9 4737
4738 lhs = gimple_assign_lhs (stmt);
4739 rhs1 = gimple_assign_rhs1 (stmt);
4740 rhs2 = gimple_assign_rhs2 (stmt);
4741
ca3c9092 4742 /* For non-bit or min/max operations we can't associate
4743 all types. Verify that here. */
4744 if (rhs_code != BIT_IOR_EXPR
4745 && rhs_code != BIT_AND_EXPR
4746 && rhs_code != BIT_XOR_EXPR
4747 && rhs_code != MIN_EXPR
4748 && rhs_code != MAX_EXPR
4749 && (!can_reassociate_p (lhs)
4750 || !can_reassociate_p (rhs1)
4751 || !can_reassociate_p (rhs2)))
54aceb26 4752 continue;
4753
75a70cf9 4754 if (associative_tree_code (rhs_code))
54aceb26 4755 {
c2078b80 4756 auto_vec<operand_entry_t> ops;
97269507 4757 tree powi_result = NULL_TREE;
54aceb26 4758
4759 /* There may be no immediate uses left by the time we
4760 get here because we may have eliminated them all. */
72e3ec84 4761 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
54aceb26 4762 continue;
4763
75a70cf9 4764 gimple_set_visited (stmt, true);
dddf5036 4765 linearize_expr_tree (&ops, stmt, true, true);
f1f41a6c 4766 ops.qsort (sort_by_operand_rank);
75a70cf9 4767 optimize_ops_list (rhs_code, &ops);
dddf5036 4768 if (undistribute_ops_list (rhs_code, &ops,
4769 loop_containing_stmt (stmt)))
4770 {
f1f41a6c 4771 ops.qsort (sort_by_operand_rank);
dddf5036 4772 optimize_ops_list (rhs_code, &ops);
4773 }
54aceb26 4774
946e9eb4 4775 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4776 optimize_range_tests (rhs_code, &ops);
4777
8c5ac7f6 4778 if (first_pass_instance
4779 && rhs_code == MULT_EXPR
4780 && flag_unsafe_math_optimizations)
03d37e4e 4781 powi_result = attempt_builtin_powi (stmt, &ops);
8c5ac7f6 4782
97269507 4783 /* If the operand vector is now empty, all operands were
4784 consumed by the __builtin_powi optimization. */
f1f41a6c 4785 if (ops.length () == 0)
97269507 4786 transform_stmt_to_copy (&gsi, stmt, powi_result);
f1f41a6c 4787 else if (ops.length () == 1)
54aceb26 4788 {
f1f41a6c 4789 tree last_op = ops.last ()->op;
97269507 4790
4791 if (powi_result)
4792 transform_stmt_to_multiply (&gsi, stmt, last_op,
4793 powi_result);
4794 else
4795 transform_stmt_to_copy (&gsi, stmt, last_op);
54aceb26 4796 }
4797 else
5b1c765d 4798 {
3754d046 4799 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
f1f41a6c 4800 int ops_num = ops.length ();
5b1c765d 4801 int width = get_reassociation_width (ops_num, rhs_code, mode);
82cb4e1c 4802 tree new_lhs = lhs;
5b1c765d 4803
4804 if (dump_file && (dump_flags & TDF_DETAILS))
4805 fprintf (dump_file,
4806 "Width = %d was chosen for reassociation\n", width);
4807
4808 if (width > 1
f1f41a6c 4809 && ops.length () > 3)
1a91d914 4810 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
4811 width, ops);
5b1c765d 4812 else
a2bd0c99 4813 {
4814 /* When there are three operands left, we want
4815 to make sure the ones that get the double
4816 binary op are chosen wisely. */
4817 int len = ops.length ();
4818 if (len >= 3)
4819 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4820
82cb4e1c 4821 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4822 powi_result != NULL);
a2bd0c99 4823 }
97269507 4824
4825 /* If we combined some repeated factors into a
4826 __builtin_powi call, multiply that result by the
4827 reassociated operands. */
4828 if (powi_result)
4829 {
82cb4e1c 4830 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4831 tree type = TREE_TYPE (lhs);
03d37e4e 4832 tree target_ssa = make_temp_ssa_name (type, NULL,
4833 "reassocpow");
82cb4e1c 4834 gimple_set_lhs (lhs_stmt, target_ssa);
4835 update_stmt (lhs_stmt);
4836 if (lhs != new_lhs)
4837 target_ssa = new_lhs;
e9cf809e 4838 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
4839 powi_result, target_ssa);
97269507 4840 gimple_set_location (mul_stmt, gimple_location (stmt));
4841 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4842 }
5b1c765d 4843 }
54aceb26 4844 }
4845 }
4846 }
4847 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4848 son;
4849 son = next_dom_son (CDI_POST_DOMINATORS, son))
4850 reassociate_bb (son);
4851}
4852
e3668db5 4853/* Add jumps around shifts for range tests turned into bit tests.
4854 For each SSA_NAME VAR we have code like:
4855 VAR = ...; // final stmt of range comparison
4856 // bit test here...;
4857 OTHERVAR = ...; // final stmt of the bit test sequence
4858 RES = VAR | OTHERVAR;
4859 Turn the above into:
4860 VAR = ...;
4861 if (VAR != 0)
4862 goto <l3>;
4863 else
4864 goto <l2>;
4865 <l2>:
4866 // bit test here...;
4867 OTHERVAR = ...;
4868 <l3>:
4869 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4870
4871static void
4872branch_fixup (void)
4873{
4874 tree var;
4875 unsigned int i;
4876
4877 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
4878 {
4879 gimple def_stmt = SSA_NAME_DEF_STMT (var);
4880 gimple use_stmt;
4881 use_operand_p use;
4882 bool ok = single_imm_use (var, &use, &use_stmt);
4883 gcc_assert (ok
4884 && is_gimple_assign (use_stmt)
4885 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
4886 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
4887
4888 basic_block cond_bb = gimple_bb (def_stmt);
4889 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
4890 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
4891
4892 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
4893 gimple g = gimple_build_cond (NE_EXPR, var,
4894 build_zero_cst (TREE_TYPE (var)),
4895 NULL_TREE, NULL_TREE);
4896 location_t loc = gimple_location (use_stmt);
4897 gimple_set_location (g, loc);
4898 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
4899
4900 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
4901 etrue->probability = REG_BR_PROB_BASE / 2;
4902 etrue->count = cond_bb->count / 2;
4903 edge efalse = find_edge (cond_bb, then_bb);
4904 efalse->flags = EDGE_FALSE_VALUE;
4905 efalse->probability -= etrue->probability;
4906 efalse->count -= etrue->count;
4907 then_bb->count -= etrue->count;
4908
4909 tree othervar = NULL_TREE;
4910 if (gimple_assign_rhs1 (use_stmt) == var)
4911 othervar = gimple_assign_rhs2 (use_stmt);
4912 else if (gimple_assign_rhs2 (use_stmt) == var)
4913 othervar = gimple_assign_rhs1 (use_stmt);
4914 else
4915 gcc_unreachable ();
4916 tree lhs = gimple_assign_lhs (use_stmt);
1a91d914 4917 gphi *phi = create_phi_node (lhs, merge_bb);
e3668db5 4918 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
4919 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
4920 gsi = gsi_for_stmt (use_stmt);
4921 gsi_remove (&gsi, true);
4922
4923 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
4924 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
4925 }
4926 reassoc_branch_fixups.release ();
4927}
4928
f1f41a6c 4929void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4930void debug_ops_vector (vec<operand_entry_t> ops);
54aceb26 4931
4932/* Dump the operand entry vector OPS to FILE. */
4933
4934void
f1f41a6c 4935dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
54aceb26 4936{
4937 operand_entry_t oe;
4938 unsigned int i;
4939
f1f41a6c 4940 FOR_EACH_VEC_ELT (ops, i, oe)
54aceb26 4941 {
4942 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
75a70cf9 4943 print_generic_expr (file, oe->op, 0);
54aceb26 4944 }
4945}
4946
4947/* Dump the operand entry vector OPS to STDERR. */
4948
4b987fac 4949DEBUG_FUNCTION void
f1f41a6c 4950debug_ops_vector (vec<operand_entry_t> ops)
54aceb26 4951{
4952 dump_ops_vector (stderr, ops);
4953}
4954
4955static void
4956do_reassoc (void)
4957{
34154e27 4958 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4959 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
54aceb26 4960}
4961
4962/* Initialize the reassociation pass. */
4963
4964static void
4965init_reassoc (void)
4966{
4967 int i;
b30a8715 4968 long rank = 2;
a28770e1 4969 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
54aceb26 4970
a4c3fb95 4971 /* Find the loops, so that we can prevent moving calculations in
4972 them. */
4973 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4974
54aceb26 4975 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4976
17b5ea6f 4977 next_operand_entry_id = 0;
54aceb26 4978
4979 /* Reverse RPO (Reverse Post Order) will give us something where
4980 deeper loops come later. */
6180f28d 4981 pre_and_rev_post_order_compute (NULL, bbs, false);
fe672ac0 4982 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
06ecf488 4983 operand_rank = new hash_map<tree, long>;
54aceb26 4984
61e371b0 4985 /* Give each default definition a distinct rank. This includes
4986 parameters and the static chain. Walk backwards over all
4987 SSA names so that we get proper rank ordering according
4988 to tree_swap_operands_p. */
4989 for (i = num_ssa_names - 1; i > 0; --i)
54aceb26 4990 {
61e371b0 4991 tree name = ssa_name (i);
4992 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4993 insert_operand_rank (name, ++rank);
54aceb26 4994 }
4995
4996 /* Set up rank for each BB */
a28770e1 4997 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
54aceb26 4998 bb_rank[bbs[i]] = ++rank << 16;
4999
5000 free (bbs);
54aceb26 5001 calculate_dominance_info (CDI_POST_DOMINATORS);
1e094109 5002 plus_negates = vNULL;
54aceb26 5003}
5004
5005/* Cleanup after the reassociation pass, and print stats if
5006 requested. */
5007
5008static void
5009fini_reassoc (void)
5010{
581f8050 5011 statistics_counter_event (cfun, "Linearized",
5012 reassociate_stats.linearized);
5013 statistics_counter_event (cfun, "Constants eliminated",
5014 reassociate_stats.constants_eliminated);
5015 statistics_counter_event (cfun, "Ops eliminated",
5016 reassociate_stats.ops_eliminated);
5017 statistics_counter_event (cfun, "Statements rewritten",
5018 reassociate_stats.rewritten);
8c5ac7f6 5019 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5020 reassociate_stats.pows_encountered);
5021 statistics_counter_event (cfun, "Built-in powi calls created",
5022 reassociate_stats.pows_created);
54aceb26 5023
06ecf488 5024 delete operand_rank;
672758dc 5025 operand_entry_pool.release ();
54aceb26 5026 free (bb_rank);
f1f41a6c 5027 plus_negates.release ();
54aceb26 5028 free_dominance_info (CDI_POST_DOMINATORS);
a4c3fb95 5029 loop_optimizer_finalize ();
54aceb26 5030}
5031
5032/* Gate and execute functions for Reassociation. */
5033
2a1990e9 5034static unsigned int
54aceb26 5035execute_reassoc (void)
5036{
3dec5460 5037 init_reassoc ();
54aceb26 5038
3dec5460 5039 do_reassoc ();
54aceb26 5040 repropagate_negates ();
e3668db5 5041 branch_fixup ();
54aceb26 5042
3dec5460 5043 fini_reassoc ();
2a1990e9 5044 return 0;
3dec5460 5045}
5046
cbe8bda8 5047namespace {
5048
5049const pass_data pass_data_reassoc =
3dec5460 5050{
cbe8bda8 5051 GIMPLE_PASS, /* type */
5052 "reassoc", /* name */
5053 OPTGROUP_NONE, /* optinfo_flags */
cbe8bda8 5054 TV_TREE_REASSOC, /* tv_id */
5055 ( PROP_cfg | PROP_ssa ), /* properties_required */
5056 0, /* properties_provided */
5057 0, /* properties_destroyed */
5058 0, /* todo_flags_start */
8b88439e 5059 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
3dec5460 5060};
cbe8bda8 5061
5062class pass_reassoc : public gimple_opt_pass
5063{
5064public:
9af5ce0c 5065 pass_reassoc (gcc::context *ctxt)
5066 : gimple_opt_pass (pass_data_reassoc, ctxt)
cbe8bda8 5067 {}
5068
5069 /* opt_pass methods: */
ae84f584 5070 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
31315c24 5071 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
65b0537f 5072 virtual unsigned int execute (function *) { return execute_reassoc (); }
cbe8bda8 5073
5074}; // class pass_reassoc
5075
5076} // anon namespace
5077
5078gimple_opt_pass *
5079make_pass_reassoc (gcc::context *ctxt)
5080{
5081 return new pass_reassoc (ctxt);
5082}