]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-reassoc.c
* doc/install.texi (Specific, alpha): Remove note to use
[thirdparty/gcc.git] / gcc / tree-ssa-reassoc.c
CommitLineData
3dec5460 1/* Reassociation for trees.
fbd26352 2 Copyright (C) 2005-2019 Free Software Foundation, Inc.
3dec5460 3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
8c4c00c1 9the Free Software Foundation; either version 3, or (at your option)
3dec5460 10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
8c4c00c1 18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
3dec5460 20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
9ef16211 24#include "backend.h"
7c29e30e 25#include "target.h"
26#include "rtl.h"
9ef16211 27#include "tree.h"
28#include "gimple.h"
7c29e30e 29#include "cfghooks.h"
30#include "alloc-pool.h"
31#include "tree-pass.h"
ad7b10a2 32#include "memmodel.h"
b0c0c879 33#include "tm_p.h"
7c29e30e 34#include "ssa.h"
35#include "optabs-tree.h"
36#include "gimple-pretty-print.h"
37#include "diagnostic-core.h"
b20a8bb4 38#include "fold-const.h"
9ed99284 39#include "stor-layout.h"
94ea8568 40#include "cfganal.h"
bc61cadb 41#include "gimple-fold.h"
42#include "tree-eh.h"
dcf1a1ec 43#include "gimple-iterator.h"
e795d6e1 44#include "gimplify-me.h"
073c1fd5 45#include "tree-cfg.h"
073c1fd5 46#include "tree-ssa-loop.h"
d53441c8 47#include "flags.h"
073c1fd5 48#include "tree-ssa.h"
54aceb26 49#include "langhooks.h"
a4c3fb95 50#include "cfgloop.h"
5b1c765d 51#include "params.h"
f7715905 52#include "builtins.h"
e3668db5 53#include "gimplify.h"
390bb332 54#include "case-cfn-macros.h"
3dec5460 55
54aceb26 56/* This is a simple global reassociation pass. It is, in part, based
57 on the LLVM pass of the same name (They do some things more/less
58 than we do, in different orders, etc).
3dec5460 59
54aceb26 60 It consists of five steps:
3dec5460 61
54aceb26 62 1. Breaking up subtract operations into addition + negate, where
63 it would promote the reassociation of adds.
3dec5460 64
54aceb26 65 2. Left linearization of the expression trees, so that (A+B)+(C+D)
66 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67 During linearization, we place the operands of the binary
04009ada 68 expressions into a vector of operand_entry_*
3dec5460 69
54aceb26 70 3. Optimization of the operand lists, eliminating things like a +
71 -a, a & a, etc.
3dec5460 72
8c5ac7f6 73 3a. Combine repeated factors with the same occurrence counts
74 into a __builtin_powi call that will later be optimized into
75 an optimal number of multiplies.
76
54aceb26 77 4. Rewrite the expression trees we linearized and optimized so
78 they are in proper rank order.
3dec5460 79
54aceb26 80 5. Repropagate negates, as nothing else will clean it up ATM.
3dec5460 81
54aceb26 82 A bit of theory on #4, since nobody seems to write anything down
83 about why it makes sense to do it the way they do it:
3dec5460 84
54aceb26 85 We could do this much nicer theoretically, but don't (for reasons
86 explained after how to do it theoretically nice :P).
3dec5460 87
54aceb26 88 In order to promote the most redundancy elimination, you want
89 binary expressions whose operands are the same rank (or
191ec5a2 90 preferably, the same value) exposed to the redundancy eliminator,
54aceb26 91 for possible elimination.
3dec5460 92
54aceb26 93 So the way to do this if we really cared, is to build the new op
94 tree from the leaves to the roots, merging as you go, and putting the
95 new op on the end of the worklist, until you are left with one
96 thing on the worklist.
3dec5460 97
54aceb26 98 IE if you have to rewrite the following set of operands (listed with
99 rank in parentheses), with opcode PLUS_EXPR:
3dec5460 100
54aceb26 101 a (1), b (1), c (1), d (2), e (2)
3dec5460 102
3dec5460 103
54aceb26 104 We start with our merge worklist empty, and the ops list with all of
105 those on it.
3dec5460 106
54aceb26 107 You want to first merge all leaves of the same rank, as much as
108 possible.
109
110 So first build a binary op of
111
112 mergetmp = a + b, and put "mergetmp" on the merge worklist.
113
114 Because there is no three operand form of PLUS_EXPR, c is not going to
115 be exposed to redundancy elimination as a rank 1 operand.
116
117 So you might as well throw it on the merge worklist (you could also
118 consider it to now be a rank two operand, and merge it with d and e,
119 but in this case, you then have evicted e from a binary op. So at
120 least in this situation, you can't win.)
121
122 Then build a binary op of d + e
123 mergetmp2 = d + e
124
125 and put mergetmp2 on the merge worklist.
48e1416a 126
54aceb26 127 so merge worklist = {mergetmp, c, mergetmp2}
48e1416a 128
54aceb26 129 Continue building binary ops of these operations until you have only
130 one operation left on the worklist.
48e1416a 131
54aceb26 132 So we have
48e1416a 133
54aceb26 134 build binary op
135 mergetmp3 = mergetmp + c
48e1416a 136
54aceb26 137 worklist = {mergetmp2, mergetmp3}
48e1416a 138
54aceb26 139 mergetmp4 = mergetmp2 + mergetmp3
48e1416a 140
54aceb26 141 worklist = {mergetmp4}
48e1416a 142
54aceb26 143 because we have one operation left, we can now just set the original
144 statement equal to the result of that operation.
48e1416a 145
54aceb26 146 This will at least expose a + b and d + e to redundancy elimination
147 as binary operations.
48e1416a 148
54aceb26 149 For extra points, you can reuse the old statements to build the
150 mergetmps, since you shouldn't run out.
151
152 So why don't we do this?
48e1416a 153
54aceb26 154 Because it's expensive, and rarely will help. Most trees we are
155 reassociating have 3 or less ops. If they have 2 ops, they already
156 will be written into a nice single binary op. If you have 3 ops, a
157 single simple check suffices to tell you whether the first two are of the
158 same rank. If so, you know to order it
159
160 mergetmp = op1 + op2
161 newstmt = mergetmp + op3
48e1416a 162
54aceb26 163 instead of
164 mergetmp = op2 + op3
165 newstmt = mergetmp + op1
48e1416a 166
54aceb26 167 If all three are of the same rank, you can't expose them all in a
168 single binary operator anyway, so the above is *still* the best you
169 can do.
48e1416a 170
54aceb26 171 Thus, this is what we do. When we have three ops left, we check to see
172 what order to put them in, and call it a day. As a nod to vector sum
2d97a789 173 reduction, we check if any of the ops are really a phi node that is a
54aceb26 174 destructive update for the associating op, and keep the destructive
175 update together for vector sum reduction recognition. */
3dec5460 176
83505659 177/* Enable insertion of __builtin_powi calls during execute_reassoc. See
178 point 3a in the pass header comment. */
179static bool reassoc_insert_powi_p;
3dec5460 180
54aceb26 181/* Statistics */
182static struct
183{
184 int linearized;
185 int constants_eliminated;
186 int ops_eliminated;
187 int rewritten;
8c5ac7f6 188 int pows_encountered;
189 int pows_created;
54aceb26 190} reassociate_stats;
3dec5460 191
54aceb26 192/* Operator, rank pair. */
04009ada 193struct operand_entry
3dec5460 194{
54aceb26 195 unsigned int rank;
7faa1ffa 196 unsigned int id;
54aceb26 197 tree op;
8c5ac7f6 198 unsigned int count;
527d8479 199 gimple *stmt_to_insert;
04009ada 200};
54aceb26 201
1dc6c44d 202static object_allocator<operand_entry> operand_entry_pool
203 ("operand entry pool");
54aceb26 204
17b5ea6f 205/* This is used to assign a unique ID to each struct operand_entry
206 so that qsort results are identical on different hosts. */
7faa1ffa 207static unsigned int next_operand_entry_id;
3dec5460 208
209/* Starting rank number for a given basic block, so that we can rank
210 operations using unmovable instructions in that BB based on the bb
211 depth. */
b30a8715 212static long *bb_rank;
3dec5460 213
54aceb26 214/* Operand->rank hashtable. */
06ecf488 215static hash_map<tree, long> *operand_rank;
3dec5460 216
e3668db5 217/* Vector of SSA_NAMEs on which after reassociate_bb is done with
218 all basic blocks the CFG should be adjusted - basic blocks
219 split right after that SSA_NAME's definition statement and before
220 the only use, which must be a bit ior. */
221static vec<tree> reassoc_branch_fixups;
222
b248bf30 223/* Forward decls. */
224static long get_rank (tree);
42acab1c 225static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
b248bf30 226
54675e05 227/* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228 possibly added by gsi_remove. */
229
230bool
231reassoc_remove_stmt (gimple_stmt_iterator *gsi)
232{
42acab1c 233 gimple *stmt = gsi_stmt (*gsi);
54675e05 234
c64f38bf 235 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
54675e05 236 return gsi_remove (gsi, true);
237
238 gimple_stmt_iterator prev = *gsi;
239 gsi_prev (&prev);
240 unsigned uid = gimple_uid (stmt);
241 basic_block bb = gimple_bb (stmt);
242 bool ret = gsi_remove (gsi, true);
243 if (!gsi_end_p (prev))
244 gsi_next (&prev);
245 else
246 prev = gsi_start_bb (bb);
42acab1c 247 gimple *end_stmt = gsi_stmt (*gsi);
54675e05 248 while ((stmt = gsi_stmt (prev)) != end_stmt)
249 {
250 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
251 gimple_set_uid (stmt, uid);
252 gsi_next (&prev);
253 }
254 return ret;
255}
b248bf30 256
257/* Bias amount for loop-carried phis. We want this to be larger than
258 the depth of any reassociation tree we can see, but not larger than
259 the rank difference between two blocks. */
260#define PHI_LOOP_BIAS (1 << 15)
261
262/* Rank assigned to a phi statement. If STMT is a loop-carried phi of
263 an innermost loop, and the phi has only a single use which is inside
264 the loop, then the rank is the block rank of the loop latch plus an
265 extra bias for the loop-carried dependence. This causes expressions
266 calculated into an accumulator variable to be independent for each
267 iteration of the loop. If STMT is some other phi, the rank is the
268 block rank of its containing block. */
269static long
42acab1c 270phi_rank (gimple *stmt)
b248bf30 271{
272 basic_block bb = gimple_bb (stmt);
2e966e2a 273 class loop *father = bb->loop_father;
b248bf30 274 tree res;
275 unsigned i;
276 use_operand_p use;
42acab1c 277 gimple *use_stmt;
b248bf30 278
279 /* We only care about real loops (those with a latch). */
280 if (!father->latch)
281 return bb_rank[bb->index];
282
283 /* Interesting phis must be in headers of innermost loops. */
284 if (bb != father->header
285 || father->inner)
286 return bb_rank[bb->index];
287
288 /* Ignore virtual SSA_NAMEs. */
289 res = gimple_phi_result (stmt);
7c782c9b 290 if (virtual_operand_p (res))
b248bf30 291 return bb_rank[bb->index];
292
293 /* The phi definition must have a single use, and that use must be
294 within the loop. Otherwise this isn't an accumulator pattern. */
295 if (!single_imm_use (res, &use, &use_stmt)
296 || gimple_bb (use_stmt)->loop_father != father)
297 return bb_rank[bb->index];
298
299 /* Look for phi arguments from within the loop. If found, bias this phi. */
300 for (i = 0; i < gimple_phi_num_args (stmt); i++)
301 {
302 tree arg = gimple_phi_arg_def (stmt, i);
303 if (TREE_CODE (arg) == SSA_NAME
304 && !SSA_NAME_IS_DEFAULT_DEF (arg))
305 {
42acab1c 306 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
b248bf30 307 if (gimple_bb (def_stmt)->loop_father == father)
308 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
309 }
310 }
311
312 /* Must be an uninteresting phi. */
313 return bb_rank[bb->index];
314}
315
316/* If EXP is an SSA_NAME defined by a PHI statement that represents a
317 loop-carried dependence of an innermost loop, return TRUE; else
318 return FALSE. */
319static bool
320loop_carried_phi (tree exp)
321{
42acab1c 322 gimple *phi_stmt;
b248bf30 323 long block_rank;
324
325 if (TREE_CODE (exp) != SSA_NAME
326 || SSA_NAME_IS_DEFAULT_DEF (exp))
327 return false;
328
329 phi_stmt = SSA_NAME_DEF_STMT (exp);
330
331 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
332 return false;
333
334 /* Non-loop-carried phis have block rank. Loop-carried phis have
335 an additional bias added in. If this phi doesn't have block rank,
336 it's biased and should not be propagated. */
337 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
338
339 if (phi_rank (phi_stmt) != block_rank)
340 return true;
341
342 return false;
343}
344
345/* Return the maximum of RANK and the rank that should be propagated
346 from expression OP. For most operands, this is just the rank of OP.
347 For loop-carried phis, the value is zero to avoid undoing the bias
348 in favor of the phi. */
349static long
350propagate_rank (long rank, tree op)
351{
352 long op_rank;
353
354 if (loop_carried_phi (op))
355 return rank;
356
357 op_rank = get_rank (op);
358
359 return MAX (rank, op_rank);
360}
3dec5460 361
54aceb26 362/* Look up the operand rank structure for expression E. */
3dec5460 363
b30a8715 364static inline long
54aceb26 365find_operand_rank (tree e)
3dec5460 366{
06ecf488 367 long *slot = operand_rank->get (e);
368 return slot ? *slot : -1;
3dec5460 369}
370
54aceb26 371/* Insert {E,RANK} into the operand rank hashtable. */
3dec5460 372
b30a8715 373static inline void
374insert_operand_rank (tree e, long rank)
3dec5460 375{
b30a8715 376 gcc_assert (rank > 0);
06ecf488 377 gcc_assert (!operand_rank->put (e, rank));
3dec5460 378}
379
3dec5460 380/* Given an expression E, return the rank of the expression. */
381
b30a8715 382static long
3dec5460 383get_rank (tree e)
384{
3dec5460 385 /* SSA_NAME's have the rank of the expression they are the result
386 of.
387 For globals and uninitialized values, the rank is 0.
388 For function arguments, use the pre-setup rank.
389 For PHI nodes, stores, asm statements, etc, we use the rank of
390 the BB.
391 For simple operations, the rank is the maximum rank of any of
392 its operands, or the bb_rank, whichever is less.
393 I make no claims that this is optimal, however, it gives good
394 results. */
395
b248bf30 396 /* We make an exception to the normal ranking system to break
397 dependences of accumulator variables in loops. Suppose we
398 have a simple one-block loop containing:
399
400 x_1 = phi(x_0, x_2)
401 b = a + x_1
402 c = b + d
403 x_2 = c + e
404
405 As shown, each iteration of the calculation into x is fully
406 dependent upon the iteration before it. We would prefer to
407 see this in the form:
408
409 x_1 = phi(x_0, x_2)
410 b = a + d
411 c = b + e
412 x_2 = c + x_1
413
414 If the loop is unrolled, the calculations of b and c from
415 different iterations can be interleaved.
416
417 To obtain this result during reassociation, we bias the rank
418 of the phi definition x_1 upward, when it is recognized as an
419 accumulator pattern. The artificial rank causes it to be
420 added last, providing the desired independence. */
421
3dec5460 422 if (TREE_CODE (e) == SSA_NAME)
423 {
e86e65ef 424 ssa_op_iter iter;
42acab1c 425 gimple *stmt;
aa52f48f 426 long rank;
b248bf30 427 tree op;
54aceb26 428
61e371b0 429 if (SSA_NAME_IS_DEFAULT_DEF (e))
b30a8715 430 return find_operand_rank (e);
54aceb26 431
3dec5460 432 stmt = SSA_NAME_DEF_STMT (e);
b248bf30 433 if (gimple_code (stmt) == GIMPLE_PHI)
434 return phi_rank (stmt);
435
e86e65ef 436 if (!is_gimple_assign (stmt))
75a70cf9 437 return bb_rank[gimple_bb (stmt)->index];
3dec5460 438
439 /* If we already have a rank for this expression, use that. */
b30a8715 440 rank = find_operand_rank (e);
441 if (rank != -1)
442 return rank;
3dec5460 443
b248bf30 444 /* Otherwise, find the maximum rank for the operands. As an
445 exception, remove the bias from loop-carried phis when propagating
446 the rank so that dependent operations are not also biased. */
e86e65ef 447 /* Simply walk over all SSA uses - this takes advatage of the
448 fact that non-SSA operands are is_gimple_min_invariant and
449 thus have rank 0. */
3dec5460 450 rank = 0;
e86e65ef 451 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
452 rank = propagate_rank (rank, op);
54aceb26 453
3dec5460 454 if (dump_file && (dump_flags & TDF_DETAILS))
455 {
456 fprintf (dump_file, "Rank for ");
1ffa4346 457 print_generic_expr (dump_file, e);
b30a8715 458 fprintf (dump_file, " is %ld\n", (rank + 1));
3dec5460 459 }
54aceb26 460
3dec5460 461 /* Note the rank in the hashtable so we don't recompute it. */
54aceb26 462 insert_operand_rank (e, (rank + 1));
3dec5460 463 return (rank + 1);
464 }
465
e86e65ef 466 /* Constants, globals, etc., are rank 0 */
3dec5460 467 return 0;
468}
469
54aceb26 470
471/* We want integer ones to end up last no matter what, since they are
472 the ones we can do the most with. */
b520fc9d 473#define INTEGER_CONST_TYPE 1 << 4
474#define FLOAT_ONE_CONST_TYPE 1 << 3
54aceb26 475#define FLOAT_CONST_TYPE 1 << 2
476#define OTHER_CONST_TYPE 1 << 1
477
478/* Classify an invariant tree into integer, float, or other, so that
479 we can sort them to be near other constants of the same type. */
480static inline int
481constant_type (tree t)
482{
483 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
484 return INTEGER_CONST_TYPE;
485 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
b520fc9d 486 {
487 /* Sort -1.0 and 1.0 constants last, while in some cases
488 const_binop can't optimize some inexact operations, multiplication
489 by -1.0 or 1.0 can be always merged with others. */
490 if (real_onep (t) || real_minus_onep (t))
491 return FLOAT_ONE_CONST_TYPE;
492 return FLOAT_CONST_TYPE;
493 }
54aceb26 494 else
495 return OTHER_CONST_TYPE;
496}
497
498/* qsort comparison function to sort operand entries PA and PB by rank
499 so that the sorted array is ordered by rank in decreasing order. */
500static int
501sort_by_operand_rank (const void *pa, const void *pb)
502{
04009ada 503 const operand_entry *oea = *(const operand_entry *const *)pa;
504 const operand_entry *oeb = *(const operand_entry *const *)pb;
54aceb26 505
20977abe 506 if (oeb->rank != oea->rank)
507 return oeb->rank > oea->rank ? 1 : -1;
508
54aceb26 509 /* It's nicer for optimize_expression if constants that are likely
20977abe 510 to fold when added/multiplied/whatever are put next to each
54aceb26 511 other. Since all constants have rank 0, order them by type. */
20977abe 512 if (oea->rank == 0)
17b5ea6f 513 {
514 if (constant_type (oeb->op) != constant_type (oea->op))
b520fc9d 515 return constant_type (oea->op) - constant_type (oeb->op);
17b5ea6f 516 else
517 /* To make sorting result stable, we use unique IDs to determine
518 order. */
7faa1ffa 519 return oeb->id > oea->id ? 1 : -1;
17b5ea6f 520 }
54aceb26 521
20977abe 522 if (TREE_CODE (oea->op) != SSA_NAME)
523 {
524 if (TREE_CODE (oeb->op) != SSA_NAME)
525 return oeb->id > oea->id ? 1 : -1;
526 else
527 return 1;
528 }
529 else if (TREE_CODE (oeb->op) != SSA_NAME)
530 return -1;
531
54aceb26 532 /* Lastly, make sure the versions that are the same go next to each
c89eca7b 533 other. */
20977abe 534 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
17b5ea6f 535 {
20977abe 536 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
537 versions of removed SSA_NAMEs, so if possible, prefer to sort
538 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
539 See PR60418. */
540 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
541 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
542 basic_block bba = gimple_bb (stmta);
543 basic_block bbb = gimple_bb (stmtb);
544 if (bbb != bba)
c89eca7b 545 {
20977abe 546 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
547 but the other might not. */
548 if (!bba)
549 return 1;
550 if (!bbb)
551 return -1;
552 /* If neither is, compare bb_rank. */
553 if (bb_rank[bbb->index] != bb_rank[bba->index])
bf27ea99 554 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
d869d6e4 555 }
20977abe 556
557 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
558 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
559 if (da != db)
560 return da ? 1 : -1;
561
562 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
17b5ea6f 563 }
54aceb26 564
20977abe 565 return oeb->id > oea->id ? 1 : -1;
54aceb26 566}
567
568/* Add an operand entry to *OPS for the tree operand OP. */
569
570static void
527d8479 571add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
54aceb26 572{
04009ada 573 operand_entry *oe = operand_entry_pool.allocate ();
54aceb26 574
575 oe->op = op;
576 oe->rank = get_rank (op);
17b5ea6f 577 oe->id = next_operand_entry_id++;
8c5ac7f6 578 oe->count = 1;
527d8479 579 oe->stmt_to_insert = stmt_to_insert;
f1f41a6c 580 ops->safe_push (oe);
54aceb26 581}
3dec5460 582
8c5ac7f6 583/* Add an operand entry to *OPS for the tree operand OP with repeat
584 count REPEAT. */
585
586static void
04009ada 587add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
8c5ac7f6 588 HOST_WIDE_INT repeat)
589{
04009ada 590 operand_entry *oe = operand_entry_pool.allocate ();
8c5ac7f6 591
592 oe->op = op;
593 oe->rank = get_rank (op);
594 oe->id = next_operand_entry_id++;
595 oe->count = repeat;
527d8479 596 oe->stmt_to_insert = NULL;
f1f41a6c 597 ops->safe_push (oe);
8c5ac7f6 598
599 reassociate_stats.pows_encountered++;
600}
601
54aceb26 602/* Return true if STMT is reassociable operation containing a binary
a4c3fb95 603 operation with tree code CODE, and is inside LOOP. */
3dec5460 604
605static bool
2e966e2a 606is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
54aceb26 607{
75a70cf9 608 basic_block bb = gimple_bb (stmt);
a4c3fb95 609
75a70cf9 610 if (gimple_bb (stmt) == NULL)
a4c3fb95 611 return false;
612
a4c3fb95 613 if (!flow_bb_inside_loop_p (loop, bb))
614 return false;
615
75a70cf9 616 if (is_gimple_assign (stmt)
617 && gimple_assign_rhs_code (stmt) == code
618 && has_single_use (gimple_assign_lhs (stmt)))
d4d349db 619 {
620 tree rhs1 = gimple_assign_rhs1 (stmt);
84489950 621 tree rhs2 = gimple_assign_rhs2 (stmt);
d4d349db 622 if (TREE_CODE (rhs1) == SSA_NAME
623 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
624 return false;
625 if (rhs2
626 && TREE_CODE (rhs2) == SSA_NAME
627 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
628 return false;
629 return true;
630 }
75a70cf9 631
54aceb26 632 return false;
633}
634
635
5b849c60 636/* Return true if STMT is a nop-conversion. */
637
638static bool
639gimple_nop_conversion_p (gimple *stmt)
640{
641 if (gassign *ass = dyn_cast <gassign *> (stmt))
642 {
643 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
644 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
645 TREE_TYPE (gimple_assign_rhs1 (ass))))
646 return true;
647 }
648 return false;
649}
650
54aceb26 651/* Given NAME, if NAME is defined by a unary operation OPCODE, return the
652 operand of the negate operation. Otherwise, return NULL. */
653
654static tree
655get_unary_op (tree name, enum tree_code opcode)
656{
42acab1c 657 gimple *stmt = SSA_NAME_DEF_STMT (name);
54aceb26 658
5b849c60 659 /* Look through nop conversions (sign changes). */
660 if (gimple_nop_conversion_p (stmt)
661 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
662 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
663
75a70cf9 664 if (!is_gimple_assign (stmt))
54aceb26 665 return NULL_TREE;
666
75a70cf9 667 if (gimple_assign_rhs_code (stmt) == opcode)
668 return gimple_assign_rhs1 (stmt);
54aceb26 669 return NULL_TREE;
670}
671
5b849c60 672/* Return true if OP1 and OP2 have the same value if casted to either type. */
673
674static bool
675ops_equal_values_p (tree op1, tree op2)
676{
677 if (op1 == op2)
678 return true;
679
6fb59218 680 tree orig_op1 = op1;
5b849c60 681 if (TREE_CODE (op1) == SSA_NAME)
682 {
683 gimple *stmt = SSA_NAME_DEF_STMT (op1);
684 if (gimple_nop_conversion_p (stmt))
685 {
686 op1 = gimple_assign_rhs1 (stmt);
687 if (op1 == op2)
688 return true;
689 }
690 }
691
692 if (TREE_CODE (op2) == SSA_NAME)
693 {
694 gimple *stmt = SSA_NAME_DEF_STMT (op2);
695 if (gimple_nop_conversion_p (stmt))
696 {
697 op2 = gimple_assign_rhs1 (stmt);
6fb59218 698 if (op1 == op2
699 || orig_op1 == op2)
5b849c60 700 return true;
701 }
702 }
703
704 return false;
705}
706
707
54aceb26 708/* If CURR and LAST are a pair of ops that OPCODE allows us to
709 eliminate through equivalences, do so, remove them from OPS, and
710 return true. Otherwise, return false. */
711
712static bool
713eliminate_duplicate_pair (enum tree_code opcode,
04009ada 714 vec<operand_entry *> *ops,
54aceb26 715 bool *all_done,
716 unsigned int i,
04009ada 717 operand_entry *curr,
718 operand_entry *last)
54aceb26 719{
720
91543a50 721 /* If we have two of the same op, and the opcode is & |, min, or max,
722 we can eliminate one of them.
54aceb26 723 If we have two of the same op, and the opcode is ^, we can
724 eliminate both of them. */
3dec5460 725
54aceb26 726 if (last && last->op == curr->op)
3dec5460 727 {
54aceb26 728 switch (opcode)
729 {
91543a50 730 case MAX_EXPR:
731 case MIN_EXPR:
54aceb26 732 case BIT_IOR_EXPR:
733 case BIT_AND_EXPR:
734 if (dump_file && (dump_flags & TDF_DETAILS))
735 {
736 fprintf (dump_file, "Equivalence: ");
1ffa4346 737 print_generic_expr (dump_file, curr->op);
91543a50 738 fprintf (dump_file, " [&|minmax] ");
1ffa4346 739 print_generic_expr (dump_file, last->op);
54aceb26 740 fprintf (dump_file, " -> ");
1ffa4346 741 print_generic_stmt (dump_file, last->op);
54aceb26 742 }
743
f1f41a6c 744 ops->ordered_remove (i);
54aceb26 745 reassociate_stats.ops_eliminated ++;
746
747 return true;
748
749 case BIT_XOR_EXPR:
750 if (dump_file && (dump_flags & TDF_DETAILS))
751 {
752 fprintf (dump_file, "Equivalence: ");
1ffa4346 753 print_generic_expr (dump_file, curr->op);
54aceb26 754 fprintf (dump_file, " ^ ");
1ffa4346 755 print_generic_expr (dump_file, last->op);
54aceb26 756 fprintf (dump_file, " -> nothing\n");
757 }
758
759 reassociate_stats.ops_eliminated += 2;
760
f1f41a6c 761 if (ops->length () == 2)
54aceb26 762 {
a014ba8f 763 ops->truncate (0);
385f3f36 764 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
54aceb26 765 *all_done = true;
766 }
767 else
768 {
f1f41a6c 769 ops->ordered_remove (i-1);
770 ops->ordered_remove (i-1);
54aceb26 771 }
772
773 return true;
774
775 default:
776 break;
777 }
778 }
3dec5460 779 return false;
780}
781
f1f41a6c 782static vec<tree> plus_negates;
c2f29260 783
50fa62c6 784/* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
785 expression, look in OPS for a corresponding positive operation to cancel
786 it out. If we find one, remove the other from OPS, replace
787 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
788 return false. */
3dec5460 789
790static bool
54aceb26 791eliminate_plus_minus_pair (enum tree_code opcode,
04009ada 792 vec<operand_entry *> *ops,
54aceb26 793 unsigned int currindex,
04009ada 794 operand_entry *curr)
54aceb26 795{
796 tree negateop;
50fa62c6 797 tree notop;
54aceb26 798 unsigned int i;
04009ada 799 operand_entry *oe;
54aceb26 800
801 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
3dec5460 802 return false;
54aceb26 803
804 negateop = get_unary_op (curr->op, NEGATE_EXPR);
50fa62c6 805 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
806 if (negateop == NULL_TREE && notop == NULL_TREE)
54aceb26 807 return false;
808
809 /* Any non-negated version will have a rank that is one less than
810 the current rank. So once we hit those ranks, if we don't find
811 one, we can stop. */
812
813 for (i = currindex + 1;
f1f41a6c 814 ops->iterate (i, &oe)
54aceb26 815 && oe->rank >= curr->rank - 1 ;
816 i++)
3dec5460 817 {
5b849c60 818 if (negateop
819 && ops_equal_values_p (oe->op, negateop))
54aceb26 820 {
54aceb26 821 if (dump_file && (dump_flags & TDF_DETAILS))
822 {
823 fprintf (dump_file, "Equivalence: ");
1ffa4346 824 print_generic_expr (dump_file, negateop);
54aceb26 825 fprintf (dump_file, " + -");
1ffa4346 826 print_generic_expr (dump_file, oe->op);
54aceb26 827 fprintf (dump_file, " -> 0\n");
828 }
829
f1f41a6c 830 ops->ordered_remove (i);
385f3f36 831 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
f1f41a6c 832 ops->ordered_remove (currindex);
54aceb26 833 reassociate_stats.ops_eliminated ++;
834
50fa62c6 835 return true;
836 }
5b849c60 837 else if (notop
838 && ops_equal_values_p (oe->op, notop))
50fa62c6 839 {
840 tree op_type = TREE_TYPE (oe->op);
841
842 if (dump_file && (dump_flags & TDF_DETAILS))
843 {
844 fprintf (dump_file, "Equivalence: ");
1ffa4346 845 print_generic_expr (dump_file, notop);
50fa62c6 846 fprintf (dump_file, " + ~");
1ffa4346 847 print_generic_expr (dump_file, oe->op);
50fa62c6 848 fprintf (dump_file, " -> -1\n");
849 }
850
f1f41a6c 851 ops->ordered_remove (i);
a4225fff 852 add_to_ops_vec (ops, build_all_ones_cst (op_type));
f1f41a6c 853 ops->ordered_remove (currindex);
50fa62c6 854 reassociate_stats.ops_eliminated ++;
855
54aceb26 856 return true;
857 }
3dec5460 858 }
859
5b849c60 860 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
861 save it for later inspection in repropagate_negates(). */
862 if (negateop != NULL_TREE
863 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
f1f41a6c 864 plus_negates.safe_push (curr->op);
c2f29260 865
54aceb26 866 return false;
867}
868
869/* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
870 bitwise not expression, look in OPS for a corresponding operand to
871 cancel it out. If we find one, remove the other from OPS, replace
872 OPS[CURRINDEX] with 0, and return true. Otherwise, return
873 false. */
874
875static bool
876eliminate_not_pairs (enum tree_code opcode,
04009ada 877 vec<operand_entry *> *ops,
54aceb26 878 unsigned int currindex,
04009ada 879 operand_entry *curr)
54aceb26 880{
881 tree notop;
882 unsigned int i;
04009ada 883 operand_entry *oe;
54aceb26 884
885 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
886 || TREE_CODE (curr->op) != SSA_NAME)
887 return false;
888
889 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
890 if (notop == NULL_TREE)
891 return false;
892
893 /* Any non-not version will have a rank that is one less than
894 the current rank. So once we hit those ranks, if we don't find
895 one, we can stop. */
896
897 for (i = currindex + 1;
f1f41a6c 898 ops->iterate (i, &oe)
54aceb26 899 && oe->rank >= curr->rank - 1;
900 i++)
3dec5460 901 {
54aceb26 902 if (oe->op == notop)
3dec5460 903 {
54aceb26 904 if (dump_file && (dump_flags & TDF_DETAILS))
3dec5460 905 {
54aceb26 906 fprintf (dump_file, "Equivalence: ");
1ffa4346 907 print_generic_expr (dump_file, notop);
54aceb26 908 if (opcode == BIT_AND_EXPR)
909 fprintf (dump_file, " & ~");
910 else if (opcode == BIT_IOR_EXPR)
911 fprintf (dump_file, " | ~");
1ffa4346 912 print_generic_expr (dump_file, oe->op);
54aceb26 913 if (opcode == BIT_AND_EXPR)
914 fprintf (dump_file, " -> 0\n");
915 else if (opcode == BIT_IOR_EXPR)
916 fprintf (dump_file, " -> -1\n");
3dec5460 917 }
54aceb26 918
919 if (opcode == BIT_AND_EXPR)
385f3f36 920 oe->op = build_zero_cst (TREE_TYPE (oe->op));
54aceb26 921 else if (opcode == BIT_IOR_EXPR)
c2c07119 922 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
54aceb26 923
f1f41a6c 924 reassociate_stats.ops_eliminated += ops->length () - 1;
925 ops->truncate (0);
926 ops->quick_push (oe);
54aceb26 927 return true;
3dec5460 928 }
929 }
54aceb26 930
931 return false;
3dec5460 932}
933
54aceb26 934/* Use constant value that may be present in OPS to try to eliminate
935 operands. Note that this function is only really used when we've
936 eliminated ops for other reasons, or merged constants. Across
937 single statements, fold already does all of this, plus more. There
938 is little point in duplicating logic, so I've only included the
939 identities that I could ever construct testcases to trigger. */
3dec5460 940
54aceb26 941static void
942eliminate_using_constants (enum tree_code opcode,
04009ada 943 vec<operand_entry *> *ops)
3dec5460 944{
04009ada 945 operand_entry *oelast = ops->last ();
46ef5347 946 tree type = TREE_TYPE (oelast->op);
3dec5460 947
46ef5347 948 if (oelast->rank == 0
c02417ad 949 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
3dec5460 950 {
54aceb26 951 switch (opcode)
3dec5460 952 {
54aceb26 953 case BIT_AND_EXPR:
954 if (integer_zerop (oelast->op))
955 {
f1f41a6c 956 if (ops->length () != 1)
54aceb26 957 {
958 if (dump_file && (dump_flags & TDF_DETAILS))
959 fprintf (dump_file, "Found & 0, removing all other ops\n");
960
f1f41a6c 961 reassociate_stats.ops_eliminated += ops->length () - 1;
48e1416a 962
f1f41a6c 963 ops->truncate (0);
964 ops->quick_push (oelast);
54aceb26 965 return;
966 }
967 }
968 else if (integer_all_onesp (oelast->op))
969 {
f1f41a6c 970 if (ops->length () != 1)
54aceb26 971 {
972 if (dump_file && (dump_flags & TDF_DETAILS))
973 fprintf (dump_file, "Found & -1, removing\n");
f1f41a6c 974 ops->pop ();
54aceb26 975 reassociate_stats.ops_eliminated++;
976 }
977 }
978 break;
979 case BIT_IOR_EXPR:
980 if (integer_all_onesp (oelast->op))
981 {
f1f41a6c 982 if (ops->length () != 1)
54aceb26 983 {
984 if (dump_file && (dump_flags & TDF_DETAILS))
985 fprintf (dump_file, "Found | -1, removing all other ops\n");
986
f1f41a6c 987 reassociate_stats.ops_eliminated += ops->length () - 1;
48e1416a 988
f1f41a6c 989 ops->truncate (0);
990 ops->quick_push (oelast);
54aceb26 991 return;
992 }
48e1416a 993 }
54aceb26 994 else if (integer_zerop (oelast->op))
995 {
f1f41a6c 996 if (ops->length () != 1)
54aceb26 997 {
998 if (dump_file && (dump_flags & TDF_DETAILS))
999 fprintf (dump_file, "Found | 0, removing\n");
f1f41a6c 1000 ops->pop ();
54aceb26 1001 reassociate_stats.ops_eliminated++;
1002 }
1003 }
1004 break;
1005 case MULT_EXPR:
46ef5347 1006 if (integer_zerop (oelast->op)
1007 || (FLOAT_TYPE_P (type)
93633022 1008 && !HONOR_NANS (type)
fe994837 1009 && !HONOR_SIGNED_ZEROS (type)
46ef5347 1010 && real_zerop (oelast->op)))
54aceb26 1011 {
f1f41a6c 1012 if (ops->length () != 1)
54aceb26 1013 {
1014 if (dump_file && (dump_flags & TDF_DETAILS))
1015 fprintf (dump_file, "Found * 0, removing all other ops\n");
48e1416a 1016
f1f41a6c 1017 reassociate_stats.ops_eliminated += ops->length () - 1;
a74580b3 1018 ops->truncate (0);
f1f41a6c 1019 ops->quick_push (oelast);
54aceb26 1020 return;
1021 }
1022 }
46ef5347 1023 else if (integer_onep (oelast->op)
1024 || (FLOAT_TYPE_P (type)
fe994837 1025 && !HONOR_SNANS (type)
46ef5347 1026 && real_onep (oelast->op)))
54aceb26 1027 {
f1f41a6c 1028 if (ops->length () != 1)
54aceb26 1029 {
1030 if (dump_file && (dump_flags & TDF_DETAILS))
1031 fprintf (dump_file, "Found * 1, removing\n");
f1f41a6c 1032 ops->pop ();
54aceb26 1033 reassociate_stats.ops_eliminated++;
1034 return;
1035 }
1036 }
1037 break;
1038 case BIT_XOR_EXPR:
1039 case PLUS_EXPR:
1040 case MINUS_EXPR:
46ef5347 1041 if (integer_zerop (oelast->op)
1042 || (FLOAT_TYPE_P (type)
1043 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1044 && fold_real_zero_addition_p (type, oelast->op,
1045 opcode == MINUS_EXPR)))
3dec5460 1046 {
f1f41a6c 1047 if (ops->length () != 1)
3dec5460 1048 {
54aceb26 1049 if (dump_file && (dump_flags & TDF_DETAILS))
1050 fprintf (dump_file, "Found [|^+] 0, removing\n");
f1f41a6c 1051 ops->pop ();
54aceb26 1052 reassociate_stats.ops_eliminated++;
1053 return;
3dec5460 1054 }
3dec5460 1055 }
54aceb26 1056 break;
1057 default:
1058 break;
3dec5460 1059 }
1060 }
3dec5460 1061}
1062
dddf5036 1063
04009ada 1064static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
dddf5036 1065 bool, bool);
1066
1067/* Structure for tracking and counting operands. */
6dc50383 1068struct oecount {
7faa1ffa 1069 unsigned int cnt;
1070 unsigned int id;
dddf5036 1071 enum tree_code oecode;
1072 tree op;
6dc50383 1073};
dddf5036 1074
dddf5036 1075
1076/* The heap for the oecount hashtable and the sorted list of operands. */
f1f41a6c 1077static vec<oecount> cvec;
dddf5036 1078
3e871d4d 1079
1080/* Oecount hashtable helpers. */
1081
613732c1 1082struct oecount_hasher : int_hash <int, 0, 1>
3e871d4d 1083{
613732c1 1084 static inline hashval_t hash (int);
1085 static inline bool equal (int, int);
3e871d4d 1086};
1087
dddf5036 1088/* Hash function for oecount. */
1089
3e871d4d 1090inline hashval_t
613732c1 1091oecount_hasher::hash (int p)
dddf5036 1092{
2933f7af 1093 const oecount *c = &cvec[p - 42];
dddf5036 1094 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1095}
1096
1097/* Comparison function for oecount. */
1098
3e871d4d 1099inline bool
613732c1 1100oecount_hasher::equal (int p1, int p2)
dddf5036 1101{
2933f7af 1102 const oecount *c1 = &cvec[p1 - 42];
1103 const oecount *c2 = &cvec[p2 - 42];
7faa1ffa 1104 return c1->oecode == c2->oecode && c1->op == c2->op;
dddf5036 1105}
1106
1107/* Comparison function for qsort sorting oecount elements by count. */
1108
1109static int
1110oecount_cmp (const void *p1, const void *p2)
1111{
1112 const oecount *c1 = (const oecount *)p1;
1113 const oecount *c2 = (const oecount *)p2;
17b5ea6f 1114 if (c1->cnt != c2->cnt)
7faa1ffa 1115 return c1->cnt > c2->cnt ? 1 : -1;
17b5ea6f 1116 else
1117 /* If counts are identical, use unique IDs to stabilize qsort. */
7faa1ffa 1118 return c1->id > c2->id ? 1 : -1;
dddf5036 1119}
1120
56e650d6 1121/* Return TRUE iff STMT represents a builtin call that raises OP
1122 to some exponent. */
1123
1124static bool
42acab1c 1125stmt_is_power_of_op (gimple *stmt, tree op)
56e650d6 1126{
56e650d6 1127 if (!is_gimple_call (stmt))
1128 return false;
1129
390bb332 1130 switch (gimple_call_combined_fn (stmt))
56e650d6 1131 {
390bb332 1132 CASE_CFN_POW:
1133 CASE_CFN_POWI:
56e650d6 1134 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1135
1136 default:
1137 return false;
1138 }
1139}
1140
1141/* Given STMT which is a __builtin_pow* call, decrement its exponent
1142 in place and return the result. Assumes that stmt_is_power_of_op
1143 was previously called for STMT and returned TRUE. */
1144
1145static HOST_WIDE_INT
42acab1c 1146decrement_power (gimple *stmt)
56e650d6 1147{
1148 REAL_VALUE_TYPE c, cint;
1149 HOST_WIDE_INT power;
1150 tree arg1;
1151
390bb332 1152 switch (gimple_call_combined_fn (stmt))
56e650d6 1153 {
390bb332 1154 CASE_CFN_POW:
56e650d6 1155 arg1 = gimple_call_arg (stmt, 1);
1156 c = TREE_REAL_CST (arg1);
1157 power = real_to_integer (&c) - 1;
e913b5cd 1158 real_from_integer (&cint, VOIDmode, power, SIGNED);
56e650d6 1159 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1160 return power;
1161
390bb332 1162 CASE_CFN_POWI:
56e650d6 1163 arg1 = gimple_call_arg (stmt, 1);
f9ae6f95 1164 power = TREE_INT_CST_LOW (arg1) - 1;
56e650d6 1165 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1166 return power;
1167
1168 default:
1169 gcc_unreachable ();
1170 }
1171}
1172
8ccdb0a4 1173/* Replace SSA defined by STMT and replace all its uses with new
1174 SSA. Also return the new SSA. */
1175
1176static tree
51e9c45e 1177make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
8ccdb0a4 1178{
1179 gimple *use_stmt;
1180 use_operand_p use;
1181 imm_use_iterator iter;
51e9c45e 1182 tree new_lhs, new_debug_lhs = NULL_TREE;
140cd7ff 1183 tree lhs = gimple_get_lhs (stmt);
8ccdb0a4 1184
1185 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1186 gimple_set_lhs (stmt, new_lhs);
1187
1188 /* Also need to update GIMPLE_DEBUGs. */
1189 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1190 {
51e9c45e 1191 tree repl = new_lhs;
1192 if (is_gimple_debug (use_stmt))
1193 {
1194 if (new_debug_lhs == NULL_TREE)
1195 {
1196 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1197 gdebug *def_temp
1198 = gimple_build_debug_bind (new_debug_lhs,
1199 build2 (opcode, TREE_TYPE (lhs),
1200 new_lhs, op),
1201 stmt);
1202 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1203 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1204 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1205 gimple_set_uid (def_temp, gimple_uid (stmt));
1206 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1207 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1208 }
1209 repl = new_debug_lhs;
1210 }
8ccdb0a4 1211 FOR_EACH_IMM_USE_ON_STMT (use, iter)
51e9c45e 1212 SET_USE (use, repl);
8ccdb0a4 1213 update_stmt (use_stmt);
1214 }
1215 return new_lhs;
1216}
1217
1218/* Replace all SSAs defined in STMTS_TO_FIX and replace its
1219 uses with new SSAs. Also do this for the stmt that defines DEF
1220 if *DEF is not OP. */
1221
1222static void
51e9c45e 1223make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
8ccdb0a4 1224 vec<gimple *> &stmts_to_fix)
1225{
1226 unsigned i;
1227 gimple *stmt;
1228
1229 if (*def != op
1230 && TREE_CODE (*def) == SSA_NAME
1231 && (stmt = SSA_NAME_DEF_STMT (*def))
1232 && gimple_code (stmt) != GIMPLE_NOP)
51e9c45e 1233 *def = make_new_ssa_for_def (stmt, opcode, op);
8ccdb0a4 1234
1235 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
51e9c45e 1236 make_new_ssa_for_def (stmt, opcode, op);
8ccdb0a4 1237}
1238
56e650d6 1239/* Find the single immediate use of STMT's LHS, and replace it
1240 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1241 replace *DEF with OP as well. */
1242
1243static void
42acab1c 1244propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
56e650d6 1245{
1246 tree lhs;
42acab1c 1247 gimple *use_stmt;
56e650d6 1248 use_operand_p use;
1249 gimple_stmt_iterator gsi;
1250
1251 if (is_gimple_call (stmt))
1252 lhs = gimple_call_lhs (stmt);
1253 else
1254 lhs = gimple_assign_lhs (stmt);
1255
1256 gcc_assert (has_single_use (lhs));
1257 single_imm_use (lhs, &use, &use_stmt);
1258 if (lhs == *def)
1259 *def = op;
1260 SET_USE (use, op);
1261 if (TREE_CODE (op) != SSA_NAME)
1262 update_stmt (use_stmt);
1263 gsi = gsi_for_stmt (stmt);
e6b37e57 1264 unlink_stmt_vdef (stmt);
54675e05 1265 reassoc_remove_stmt (&gsi);
56e650d6 1266 release_defs (stmt);
56e650d6 1267}
1268
dddf5036 1269/* Walks the linear chain with result *DEF searching for an operation
1270 with operand OP and code OPCODE removing that from the chain. *DEF
1271 is updated if there is only one operand but no operation left. */
1272
1273static void
1274zero_one_operation (tree *def, enum tree_code opcode, tree op)
1275{
51e9c45e 1276 tree orig_def = *def;
42acab1c 1277 gimple *stmt = SSA_NAME_DEF_STMT (*def);
8ccdb0a4 1278 /* PR72835 - Record the stmt chain that has to be updated such that
1279 we dont use the same LHS when the values computed are different. */
1280 auto_vec<gimple *, 64> stmts_to_fix;
dddf5036 1281
1282 do
1283 {
56e650d6 1284 tree name;
1285
30633bcc 1286 if (opcode == MULT_EXPR)
56e650d6 1287 {
30633bcc 1288 if (stmt_is_power_of_op (stmt, op))
1289 {
1290 if (decrement_power (stmt) == 1)
8ccdb0a4 1291 {
1292 if (stmts_to_fix.length () > 0)
1293 stmts_to_fix.pop ();
1294 propagate_op_to_single_use (op, stmt, def);
1295 }
1296 break;
30633bcc 1297 }
e1f7cc4b 1298 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
30633bcc 1299 {
e1f7cc4b 1300 if (gimple_assign_rhs1 (stmt) == op)
1301 {
6cec5c6b 1302 tree cst = build_minus_one_cst (TREE_TYPE (op));
8ccdb0a4 1303 if (stmts_to_fix.length () > 0)
1304 stmts_to_fix.pop ();
6cec5c6b 1305 propagate_op_to_single_use (cst, stmt, def);
8ccdb0a4 1306 break;
e1f7cc4b 1307 }
1308 else if (integer_minus_onep (op)
1309 || real_minus_onep (op))
1310 {
1311 gimple_assign_set_rhs_code
1312 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
8ccdb0a4 1313 break;
e1f7cc4b 1314 }
30633bcc 1315 }
56e650d6 1316 }
1317
1318 name = gimple_assign_rhs1 (stmt);
dddf5036 1319
1320 /* If this is the operation we look for and one of the operands
1321 is ours simply propagate the other operand into the stmts
1322 single use. */
1323 if (gimple_assign_rhs_code (stmt) == opcode
1324 && (name == op
1325 || gimple_assign_rhs2 (stmt) == op))
1326 {
dddf5036 1327 if (name == op)
1328 name = gimple_assign_rhs2 (stmt);
8ccdb0a4 1329 if (stmts_to_fix.length () > 0)
1330 stmts_to_fix.pop ();
56e650d6 1331 propagate_op_to_single_use (name, stmt, def);
8ccdb0a4 1332 break;
dddf5036 1333 }
1334
56e650d6 1335 /* We might have a multiply of two __builtin_pow* calls, and
30633bcc 1336 the operand might be hiding in the rightmost one. Likewise
1337 this can happen for a negate. */
56e650d6 1338 if (opcode == MULT_EXPR
1339 && gimple_assign_rhs_code (stmt) == opcode
bdb5a35a 1340 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1341 && has_single_use (gimple_assign_rhs2 (stmt)))
56e650d6 1342 {
42acab1c 1343 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
56e650d6 1344 if (stmt_is_power_of_op (stmt2, op))
1345 {
1346 if (decrement_power (stmt2) == 1)
1347 propagate_op_to_single_use (op, stmt2, def);
8ccdb0a4 1348 else
1349 stmts_to_fix.safe_push (stmt2);
1350 break;
56e650d6 1351 }
30633bcc 1352 else if (is_gimple_assign (stmt2)
e1f7cc4b 1353 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
30633bcc 1354 {
e1f7cc4b 1355 if (gimple_assign_rhs1 (stmt2) == op)
1356 {
6cec5c6b 1357 tree cst = build_minus_one_cst (TREE_TYPE (op));
1358 propagate_op_to_single_use (cst, stmt2, def);
8ccdb0a4 1359 break;
e1f7cc4b 1360 }
1361 else if (integer_minus_onep (op)
1362 || real_minus_onep (op))
1363 {
8ccdb0a4 1364 stmts_to_fix.safe_push (stmt2);
e1f7cc4b 1365 gimple_assign_set_rhs_code
1366 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
8ccdb0a4 1367 break;
e1f7cc4b 1368 }
30633bcc 1369 }
56e650d6 1370 }
1371
dddf5036 1372 /* Continue walking the chain. */
1373 gcc_assert (name != op
1374 && TREE_CODE (name) == SSA_NAME);
1375 stmt = SSA_NAME_DEF_STMT (name);
8ccdb0a4 1376 stmts_to_fix.safe_push (stmt);
dddf5036 1377 }
1378 while (1);
8ccdb0a4 1379
51e9c45e 1380 if (stmts_to_fix.length () > 0 || *def == orig_def)
1381 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
dddf5036 1382}
1383
82cb4e1c 1384/* Returns true if statement S1 dominates statement S2. Like
1385 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
c9dbb922 1386
82cb4e1c 1387static bool
42acab1c 1388reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
c9dbb922 1389{
82cb4e1c 1390 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1391
1392 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1393 SSA_NAME. Assume it lives at the beginning of function and
1394 thus dominates everything. */
1395 if (!bb1 || s1 == s2)
1396 return true;
1397
1398 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1399 if (!bb2)
1400 return false;
1401
1402 if (bb1 == bb2)
1403 {
1404 /* PHIs in the same basic block are assumed to be
1405 executed all in parallel, if only one stmt is a PHI,
1406 it dominates the other stmt in the same basic block. */
1407 if (gimple_code (s1) == GIMPLE_PHI)
1408 return true;
1409
1410 if (gimple_code (s2) == GIMPLE_PHI)
1411 return false;
1412
1413 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1414
1415 if (gimple_uid (s1) < gimple_uid (s2))
1416 return true;
1417
1418 if (gimple_uid (s1) > gimple_uid (s2))
1419 return false;
1420
1421 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1422 unsigned int uid = gimple_uid (s1);
1423 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1424 {
42acab1c 1425 gimple *s = gsi_stmt (gsi);
82cb4e1c 1426 if (gimple_uid (s) != uid)
1427 break;
1428 if (s == s2)
1429 return true;
1430 }
1431
1432 return false;
1433 }
1434
1435 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1436}
1437
1438/* Insert STMT after INSERT_POINT. */
1439
1440static void
42acab1c 1441insert_stmt_after (gimple *stmt, gimple *insert_point)
82cb4e1c 1442{
1443 gimple_stmt_iterator gsi;
1444 basic_block bb;
1445
1446 if (gimple_code (insert_point) == GIMPLE_PHI)
1447 bb = gimple_bb (insert_point);
1448 else if (!stmt_ends_bb_p (insert_point))
1449 {
1450 gsi = gsi_for_stmt (insert_point);
1451 gimple_set_uid (stmt, gimple_uid (insert_point));
1452 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1453 return;
1454 }
1455 else
1456 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1457 thus if it must end a basic block, it should be a call that can
1458 throw, or some assignment that can throw. If it throws, the LHS
1459 of it will not be initialized though, so only valid places using
1460 the SSA_NAME should be dominated by the fallthru edge. */
1461 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1462 gsi = gsi_after_labels (bb);
1463 if (gsi_end_p (gsi))
1464 {
1465 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1466 gimple_set_uid (stmt,
1467 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1468 }
1469 else
1470 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1471 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
c9dbb922 1472}
1473
dddf5036 1474/* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1475 the result. Places the statement after the definition of either
1476 OP1 or OP2. Returns the new statement. */
1477
42acab1c 1478static gimple *
03d37e4e 1479build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
dddf5036 1480{
42acab1c 1481 gimple *op1def = NULL, *op2def = NULL;
dddf5036 1482 gimple_stmt_iterator gsi;
1483 tree op;
1a91d914 1484 gassign *sum;
dddf5036 1485
1486 /* Create the addition statement. */
f9e245b2 1487 op = make_ssa_name (type);
e9cf809e 1488 sum = gimple_build_assign (op, opcode, op1, op2);
dddf5036 1489
1490 /* Find an insertion place and insert. */
1491 if (TREE_CODE (op1) == SSA_NAME)
1492 op1def = SSA_NAME_DEF_STMT (op1);
1493 if (TREE_CODE (op2) == SSA_NAME)
1494 op2def = SSA_NAME_DEF_STMT (op2);
1495 if ((!op1def || gimple_nop_p (op1def))
1496 && (!op2def || gimple_nop_p (op2def)))
1497 {
34154e27 1498 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
82cb4e1c 1499 if (gsi_end_p (gsi))
dddf5036 1500 {
82cb4e1c 1501 gimple_stmt_iterator gsi2
34154e27 1502 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
82cb4e1c 1503 gimple_set_uid (sum,
1504 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
dddf5036 1505 }
1506 else
82cb4e1c 1507 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1508 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
dddf5036 1509 }
1510 else
1511 {
42acab1c 1512 gimple *insert_point;
82cb4e1c 1513 if ((!op1def || gimple_nop_p (op1def))
1514 || (op2def && !gimple_nop_p (op2def)
1515 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1516 insert_point = op2def;
dddf5036 1517 else
82cb4e1c 1518 insert_point = op1def;
1519 insert_stmt_after (sum, insert_point);
dddf5036 1520 }
1521 update_stmt (sum);
1522
1523 return sum;
1524}
1525
1526/* Perform un-distribution of divisions and multiplications.
1527 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1528 to (A + B) / X for real X.
1529
1530 The algorithm is organized as follows.
1531
1532 - First we walk the addition chain *OPS looking for summands that
1533 are defined by a multiplication or a real division. This results
1534 in the candidates bitmap with relevant indices into *OPS.
1535
1536 - Second we build the chains of multiplications or divisions for
9d75589a 1537 these candidates, counting the number of occurrences of (operand, code)
dddf5036 1538 pairs in all of the candidates chains.
1539
9d75589a 1540 - Third we sort the (operand, code) pairs by number of occurrence and
dddf5036 1541 process them starting with the pair with the most uses.
1542
1543 * For each such pair we walk the candidates again to build a
1544 second candidate bitmap noting all multiplication/division chains
9d75589a 1545 that have at least one occurrence of (operand, code).
dddf5036 1546
1547 * We build an alternate addition chain only covering these
1548 candidates with one (operand, code) operation removed from their
1549 multiplication/division chain.
1550
1551 * The first candidate gets replaced by the alternate addition chain
1552 multiplied/divided by the operand.
1553
1554 * All candidate chains get disabled for further processing and
1555 processing of (operand, code) pairs continues.
1556
1557 The alternate addition chains built are re-processed by the main
1558 reassociation algorithm which allows optimizing a * x * y + b * y * x
1559 to (a + b ) * x * y in one invocation of the reassociation pass. */
1560
1561static bool
1562undistribute_ops_list (enum tree_code opcode,
2e966e2a 1563 vec<operand_entry *> *ops, class loop *loop)
dddf5036 1564{
f1f41a6c 1565 unsigned int length = ops->length ();
04009ada 1566 operand_entry *oe1;
dddf5036 1567 unsigned i, j;
dddf5036 1568 unsigned nr_candidates, nr_candidates2;
1569 sbitmap_iterator sbi0;
04009ada 1570 vec<operand_entry *> *subops;
dddf5036 1571 bool changed = false;
7faa1ffa 1572 unsigned int next_oecount_id = 0;
dddf5036 1573
1574 if (length <= 1
1575 || opcode != PLUS_EXPR)
1576 return false;
1577
1578 /* Build a list of candidates to process. */
3c6549f8 1579 auto_sbitmap candidates (length);
53c5d9d4 1580 bitmap_clear (candidates);
dddf5036 1581 nr_candidates = 0;
f1f41a6c 1582 FOR_EACH_VEC_ELT (*ops, i, oe1)
dddf5036 1583 {
1584 enum tree_code dcode;
42acab1c 1585 gimple *oe1def;
dddf5036 1586
1587 if (TREE_CODE (oe1->op) != SSA_NAME)
1588 continue;
1589 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1590 if (!is_gimple_assign (oe1def))
1591 continue;
1592 dcode = gimple_assign_rhs_code (oe1def);
1593 if ((dcode != MULT_EXPR
1594 && dcode != RDIV_EXPR)
1595 || !is_reassociable_op (oe1def, dcode, loop))
1596 continue;
1597
08b7917c 1598 bitmap_set_bit (candidates, i);
dddf5036 1599 nr_candidates++;
1600 }
1601
1602 if (nr_candidates < 2)
3c6549f8 1603 return false;
dddf5036 1604
1605 if (dump_file && (dump_flags & TDF_DETAILS))
1606 {
1607 fprintf (dump_file, "searching for un-distribute opportunities ");
1608 print_generic_expr (dump_file,
54e7de93 1609 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
dddf5036 1610 fprintf (dump_file, " %d\n", nr_candidates);
1611 }
1612
1613 /* Build linearized sub-operand lists and the counting table. */
f1f41a6c 1614 cvec.create (0);
c1f445d2 1615
1616 hash_table<oecount_hasher> ctable (15);
1617
f1f41a6c 1618 /* ??? Macro arguments cannot have multi-argument template types in
1619 them. This typedef is needed to workaround that limitation. */
04009ada 1620 typedef vec<operand_entry *> vec_operand_entry_t_heap;
f1f41a6c 1621 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
0d211963 1622 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
dddf5036 1623 {
42acab1c 1624 gimple *oedef;
dddf5036 1625 enum tree_code oecode;
1626 unsigned j;
1627
f1f41a6c 1628 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
dddf5036 1629 oecode = gimple_assign_rhs_code (oedef);
1630 linearize_expr_tree (&subops[i], oedef,
1631 associative_tree_code (oecode), false);
1632
f1f41a6c 1633 FOR_EACH_VEC_ELT (subops[i], j, oe1)
dddf5036 1634 {
1635 oecount c;
2933f7af 1636 int *slot;
1637 int idx;
dddf5036 1638 c.oecode = oecode;
1639 c.cnt = 1;
17b5ea6f 1640 c.id = next_oecount_id++;
dddf5036 1641 c.op = oe1->op;
f1f41a6c 1642 cvec.safe_push (c);
1643 idx = cvec.length () + 41;
2933f7af 1644 slot = ctable.find_slot (idx, INSERT);
dddf5036 1645 if (!*slot)
1646 {
2933f7af 1647 *slot = idx;
dddf5036 1648 }
1649 else
1650 {
f1f41a6c 1651 cvec.pop ();
2933f7af 1652 cvec[*slot - 42].cnt++;
dddf5036 1653 }
1654 }
1655 }
dddf5036 1656
1657 /* Sort the counting table. */
f1f41a6c 1658 cvec.qsort (oecount_cmp);
dddf5036 1659
1660 if (dump_file && (dump_flags & TDF_DETAILS))
1661 {
1662 oecount *c;
1663 fprintf (dump_file, "Candidates:\n");
f1f41a6c 1664 FOR_EACH_VEC_ELT (cvec, j, c)
dddf5036 1665 {
1666 fprintf (dump_file, " %u %s: ", c->cnt,
1667 c->oecode == MULT_EXPR
1668 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1ffa4346 1669 print_generic_expr (dump_file, c->op);
dddf5036 1670 fprintf (dump_file, "\n");
1671 }
1672 }
1673
c31fb425 1674 /* Process the (operand, code) pairs in order of most occurrence. */
3c6549f8 1675 auto_sbitmap candidates2 (length);
f1f41a6c 1676 while (!cvec.is_empty ())
dddf5036 1677 {
f1f41a6c 1678 oecount *c = &cvec.last ();
dddf5036 1679 if (c->cnt < 2)
1680 break;
1681
1682 /* Now collect the operands in the outer chain that contain
1683 the common operand in their inner chain. */
53c5d9d4 1684 bitmap_clear (candidates2);
dddf5036 1685 nr_candidates2 = 0;
0d211963 1686 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
dddf5036 1687 {
42acab1c 1688 gimple *oedef;
dddf5036 1689 enum tree_code oecode;
1690 unsigned j;
f1f41a6c 1691 tree op = (*ops)[i]->op;
dddf5036 1692
1693 /* If we undistributed in this chain already this may be
1694 a constant. */
1695 if (TREE_CODE (op) != SSA_NAME)
1696 continue;
1697
1698 oedef = SSA_NAME_DEF_STMT (op);
1699 oecode = gimple_assign_rhs_code (oedef);
1700 if (oecode != c->oecode)
1701 continue;
1702
f1f41a6c 1703 FOR_EACH_VEC_ELT (subops[i], j, oe1)
dddf5036 1704 {
56e650d6 1705 if (oe1->op == c->op)
dddf5036 1706 {
08b7917c 1707 bitmap_set_bit (candidates2, i);
dddf5036 1708 ++nr_candidates2;
1709 break;
1710 }
1711 }
1712 }
1713
1714 if (nr_candidates2 >= 2)
1715 {
04009ada 1716 operand_entry *oe1, *oe2;
42acab1c 1717 gimple *prod;
53c5d9d4 1718 int first = bitmap_first_set_bit (candidates2);
dddf5036 1719
1720 /* Build the new addition chain. */
f1f41a6c 1721 oe1 = (*ops)[first];
dddf5036 1722 if (dump_file && (dump_flags & TDF_DETAILS))
1723 {
1724 fprintf (dump_file, "Building (");
1ffa4346 1725 print_generic_expr (dump_file, oe1->op);
dddf5036 1726 }
dddf5036 1727 zero_one_operation (&oe1->op, c->oecode, c->op);
0d211963 1728 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
dddf5036 1729 {
42acab1c 1730 gimple *sum;
f1f41a6c 1731 oe2 = (*ops)[i];
dddf5036 1732 if (dump_file && (dump_flags & TDF_DETAILS))
1733 {
1734 fprintf (dump_file, " + ");
1ffa4346 1735 print_generic_expr (dump_file, oe2->op);
dddf5036 1736 }
1737 zero_one_operation (&oe2->op, c->oecode, c->op);
03d37e4e 1738 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1739 oe1->op, oe2->op, opcode);
385f3f36 1740 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
dddf5036 1741 oe2->rank = 0;
1742 oe1->op = gimple_get_lhs (sum);
1743 }
1744
1745 /* Apply the multiplication/division. */
03d37e4e 1746 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1747 oe1->op, c->op, c->oecode);
dddf5036 1748 if (dump_file && (dump_flags & TDF_DETAILS))
1749 {
1750 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1ffa4346 1751 print_generic_expr (dump_file, c->op);
dddf5036 1752 fprintf (dump_file, "\n");
1753 }
1754
1755 /* Record it in the addition chain and disable further
1756 undistribution with this op. */
1757 oe1->op = gimple_assign_lhs (prod);
1758 oe1->rank = get_rank (oe1->op);
f1f41a6c 1759 subops[first].release ();
dddf5036 1760
1761 changed = true;
1762 }
1763
f1f41a6c 1764 cvec.pop ();
dddf5036 1765 }
1766
f1f41a6c 1767 for (i = 0; i < ops->length (); ++i)
1768 subops[i].release ();
dddf5036 1769 free (subops);
f1f41a6c 1770 cvec.release ();
dddf5036 1771
1772 return changed;
1773}
1774
ae63312f 1775/* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1776 first: element index for each relevant BIT_FIELD_REF.
1777 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1778typedef std::pair<unsigned, unsigned> v_info_elem;
1779typedef auto_vec<v_info_elem, 32> v_info;
1780typedef v_info *v_info_ptr;
1781
1782/* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1783static int
1784sort_by_mach_mode (const void *p_i, const void *p_j)
1785{
1786 const tree tr1 = *((const tree *) p_i);
1787 const tree tr2 = *((const tree *) p_j);
1788 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1789 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1790 if (mode1 > mode2)
1791 return 1;
1792 else if (mode1 < mode2)
1793 return -1;
1794 else
1795 return 0;
1796}
1797
1798/* Cleanup hash map for VECTOR information. */
1799static void
1800cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1801{
1802 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1803 it != info_map.end (); ++it)
1804 {
1805 v_info_ptr info = (*it).second;
1806 delete info;
1807 (*it).second = NULL;
1808 }
1809}
1810
1811/* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1812 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1813 is transformed to
1814 Vs = (V1 + V2 + ... + Vn)
1815 Vs[0] + Vs[1] + ... + Vs[k]
1816
1817 The basic steps are listed below:
1818
1819 1) Check the addition chain *OPS by looking those summands coming from
1820 VECTOR bit_field_ref on VECTOR type. Put the information into
1821 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1822
1823 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1824 continuous, they can cover the whole VECTOR perfectly without any holes.
1825 Obtain one VECTOR list which contain candidates to be transformed.
1826
1827 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1828 candidates with same mode, build the addition statements for them and
1829 generate BIT_FIELD_REFs accordingly.
1830
1831 TODO:
1832 The current implementation requires the whole VECTORs should be fully
1833 covered, but it can be extended to support partial, checking adjacent
1834 but not fill the whole, it may need some cost model to define the
1835 boundary to do or not.
1836*/
1837static bool
1838undistribute_bitref_for_vector (enum tree_code opcode,
1839 vec<operand_entry *> *ops, struct loop *loop)
1840{
1841 if (ops->length () <= 1)
1842 return false;
1843
1844 if (opcode != PLUS_EXPR && opcode != MULT_EXPR && opcode != BIT_XOR_EXPR
1845 && opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1846 return false;
1847
1848 hash_map<tree, v_info_ptr> v_info_map;
1849 operand_entry *oe1;
1850 unsigned i;
1851
1852 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1853 information into map. */
1854 FOR_EACH_VEC_ELT (*ops, i, oe1)
1855 {
1856 enum tree_code dcode;
1857 gimple *oe1def;
1858
1859 if (TREE_CODE (oe1->op) != SSA_NAME)
1860 continue;
1861 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1862 if (!is_gimple_assign (oe1def))
1863 continue;
1864 dcode = gimple_assign_rhs_code (oe1def);
1865 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1866 continue;
1867
1868 tree rhs = gimple_assign_rhs1 (oe1def);
1869 tree vec = TREE_OPERAND (rhs, 0);
1870 tree vec_type = TREE_TYPE (vec);
1871
1872 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1873 continue;
1874
1875 /* Ignore it if target machine can't support this VECTOR type. */
1876 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1877 continue;
1878
1879 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1880 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1881 continue;
1882
1883 tree elem_type = TREE_TYPE (vec_type);
1884 unsigned HOST_WIDE_INT elem_size
1885 = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
1886 if (maybe_ne (bit_field_size (rhs), elem_size))
1887 continue;
1888
1889 unsigned idx;
1890 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
1891 continue;
1892
1893 /* Ignore it if target machine can't support this type of VECTOR
1894 operation. */
1895 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
1896 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
1897 continue;
1898
1899 bool existed;
1900 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
1901 if (!existed)
1902 info = new v_info;
1903 info->safe_push (std::make_pair (idx, i));
1904 }
1905
1906 /* At least two VECTOR to combine. */
1907 if (v_info_map.elements () <= 1)
1908 {
1909 cleanup_vinfo_map (v_info_map);
1910 return false;
1911 }
1912
1913 /* Verify all VECTOR candidates by checking two conditions:
1914 1) sorted offsets are adjacent, no holes.
1915 2) can fill the whole VECTOR perfectly.
1916 And add the valid candidates to a vector for further handling. */
1917 auto_vec<tree> valid_vecs (v_info_map.elements ());
1918 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
1919 it != v_info_map.end (); ++it)
1920 {
1921 tree cand_vec = (*it).first;
1922 v_info_ptr cand_info = (*it).second;
1923 unsigned int num_elems = VECTOR_CST_NELTS (cand_vec).to_constant ();
1924 if (cand_info->length () != num_elems)
1925 continue;
1926 sbitmap holes = sbitmap_alloc (num_elems);
1927 bitmap_ones (holes);
1928 bool valid = true;
1929 v_info_elem *curr;
1930 FOR_EACH_VEC_ELT (*cand_info, i, curr)
1931 {
1932 if (!bitmap_bit_p (holes, curr->first))
1933 {
1934 valid = false;
1935 break;
1936 }
1937 else
1938 bitmap_clear_bit (holes, curr->first);
1939 }
1940 if (valid && bitmap_empty_p (holes))
1941 valid_vecs.quick_push (cand_vec);
1942 sbitmap_free (holes);
1943 }
1944
1945 /* At least two VECTOR to combine. */
1946 if (valid_vecs.length () <= 1)
1947 {
1948 cleanup_vinfo_map (v_info_map);
1949 return false;
1950 }
1951
1952 valid_vecs.qsort (sort_by_mach_mode);
1953 /* Go through all candidates by machine mode order, query the mode_to_total
1954 to get the total number for each mode and skip the single one. */
1955 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
1956 {
1957 tree tvec = valid_vecs[i];
1958 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
1959
1960 /* Skip modes with only a single candidate. */
1961 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
1962 continue;
1963
1964 unsigned int idx, j;
1965 gimple *sum = NULL;
1966 v_info_ptr info_ptr;
1967 tree sum_vec = tvec;
1968 v_info_elem *elem;
1969
1970 /* Build the sum for all candidates with same mode. */
1971 do
1972 {
1973 sum = build_and_add_sum (TREE_TYPE (sum_vec), sum_vec,
1974 valid_vecs[i + 1], opcode);
1975 sum_vec = gimple_get_lhs (sum);
1976 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
1977 /* Update those related ops of current candidate VECTOR. */
1978 FOR_EACH_VEC_ELT (*info_ptr, j, elem)
1979 {
1980 idx = elem->second;
1981 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
1982 /* Set this then op definition will get DCEd later. */
1983 gimple_set_visited (def, true);
1984 if (opcode == PLUS_EXPR || opcode == BIT_XOR_EXPR
1985 || opcode == BIT_IOR_EXPR)
1986 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
1987 else if (opcode == MULT_EXPR)
1988 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
1989 else
1990 {
1991 gcc_assert (opcode == BIT_AND_EXPR);
1992 (*ops)[idx]->op
1993 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
1994 }
1995 (*ops)[idx]->rank = 0;
1996 }
1997 if (dump_file && (dump_flags & TDF_DETAILS))
1998 {
1999 fprintf (dump_file, "Generating addition -> ");
2000 print_gimple_stmt (dump_file, sum, 0);
2001 }
2002 i++;
2003 }
2004 while ((i < valid_vecs.length () - 1)
2005 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2006
2007 /* Referring to first valid VECTOR with this mode, generate the
2008 BIT_FIELD_REF statements accordingly. */
2009 info_ptr = *(v_info_map.get (tvec));
2010 gcc_assert (sum);
2011 tree elem_type = TREE_TYPE (TREE_TYPE (tvec));
2012 FOR_EACH_VEC_ELT (*info_ptr, j, elem)
2013 {
2014 idx = elem->second;
2015 tree dst = make_ssa_name (elem_type);
2016 gimple *gs = gimple_build_assign (
2017 dst, BIT_FIELD_REF,
2018 build3 (BIT_FIELD_REF, elem_type, sum_vec, TYPE_SIZE (elem_type),
2019 bitsize_int (elem->first
2020 * tree_to_uhwi (TYPE_SIZE (elem_type)))));
2021 insert_stmt_after (gs, sum);
2022 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2023 /* Set this then op definition will get DCEd later. */
2024 gimple_set_visited (def, true);
2025 (*ops)[idx]->op = gimple_assign_lhs (gs);
2026 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2027 if (dump_file && (dump_flags & TDF_DETAILS))
2028 {
2029 fprintf (dump_file, "Generating bit_field_ref -> ");
2030 print_gimple_stmt (dump_file, gs, 0);
2031 }
2032 }
2033 }
2034
2035 if (dump_file && (dump_flags & TDF_DETAILS))
2036 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2037
2038 cleanup_vinfo_map (v_info_map);
2039
2040 return true;
2041}
2042
5d6da7da 2043/* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2044 expression, examine the other OPS to see if any of them are comparisons
2045 of the same values, which we may be able to combine or eliminate.
2046 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2047
2048static bool
2049eliminate_redundant_comparison (enum tree_code opcode,
04009ada 2050 vec<operand_entry *> *ops,
5d6da7da 2051 unsigned int currindex,
04009ada 2052 operand_entry *curr)
5d6da7da 2053{
2054 tree op1, op2;
2055 enum tree_code lcode, rcode;
42acab1c 2056 gimple *def1, *def2;
5d6da7da 2057 int i;
04009ada 2058 operand_entry *oe;
5d6da7da 2059
2060 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2061 return false;
2062
2063 /* Check that CURR is a comparison. */
2064 if (TREE_CODE (curr->op) != SSA_NAME)
2065 return false;
2066 def1 = SSA_NAME_DEF_STMT (curr->op);
2067 if (!is_gimple_assign (def1))
2068 return false;
2069 lcode = gimple_assign_rhs_code (def1);
2070 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2071 return false;
2072 op1 = gimple_assign_rhs1 (def1);
2073 op2 = gimple_assign_rhs2 (def1);
2074
2075 /* Now look for a similar comparison in the remaining OPS. */
f1f41a6c 2076 for (i = currindex + 1; ops->iterate (i, &oe); i++)
5d6da7da 2077 {
2078 tree t;
2079
2080 if (TREE_CODE (oe->op) != SSA_NAME)
2081 continue;
2082 def2 = SSA_NAME_DEF_STMT (oe->op);
2083 if (!is_gimple_assign (def2))
2084 continue;
2085 rcode = gimple_assign_rhs_code (def2);
2086 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2087 continue;
5d6da7da 2088
2089 /* If we got here, we have a match. See if we can combine the
2090 two comparisons. */
c82d157a 2091 if (opcode == BIT_IOR_EXPR)
2092 t = maybe_fold_or_comparisons (lcode, op1, op2,
2093 rcode, gimple_assign_rhs1 (def2),
2094 gimple_assign_rhs2 (def2));
2095 else
2096 t = maybe_fold_and_comparisons (lcode, op1, op2,
2097 rcode, gimple_assign_rhs1 (def2),
2098 gimple_assign_rhs2 (def2));
5d6da7da 2099 if (!t)
2100 continue;
c82d157a 2101
2102 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2103 always give us a boolean_type_node value back. If the original
2104 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2105 we need to convert. */
2106 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2107 t = fold_convert (TREE_TYPE (curr->op), t);
2108
89c993b6 2109 if (TREE_CODE (t) != INTEGER_CST
2110 && !operand_equal_p (t, curr->op, 0))
2111 {
2112 enum tree_code subcode;
2113 tree newop1, newop2;
2114 if (!COMPARISON_CLASS_P (t))
2115 continue;
2116 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2117 STRIP_USELESS_TYPE_CONVERSION (newop1);
2118 STRIP_USELESS_TYPE_CONVERSION (newop2);
2119 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2120 continue;
2121 }
2122
5d6da7da 2123 if (dump_file && (dump_flags & TDF_DETAILS))
2124 {
2125 fprintf (dump_file, "Equivalence: ");
1ffa4346 2126 print_generic_expr (dump_file, curr->op);
5d6da7da 2127 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1ffa4346 2128 print_generic_expr (dump_file, oe->op);
5d6da7da 2129 fprintf (dump_file, " -> ");
1ffa4346 2130 print_generic_expr (dump_file, t);
5d6da7da 2131 fprintf (dump_file, "\n");
2132 }
2133
2134 /* Now we can delete oe, as it has been subsumed by the new combined
2135 expression t. */
f1f41a6c 2136 ops->ordered_remove (i);
5d6da7da 2137 reassociate_stats.ops_eliminated ++;
2138
2139 /* If t is the same as curr->op, we're done. Otherwise we must
2140 replace curr->op with t. Special case is if we got a constant
2141 back, in which case we add it to the end instead of in place of
2142 the current entry. */
2143 if (TREE_CODE (t) == INTEGER_CST)
2144 {
f1f41a6c 2145 ops->ordered_remove (currindex);
5d6da7da 2146 add_to_ops_vec (ops, t);
2147 }
c82d157a 2148 else if (!operand_equal_p (t, curr->op, 0))
5d6da7da 2149 {
42acab1c 2150 gimple *sum;
5d6da7da 2151 enum tree_code subcode;
2152 tree newop1;
2153 tree newop2;
b2d33090 2154 gcc_assert (COMPARISON_CLASS_P (t));
5d6da7da 2155 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
b2d33090 2156 STRIP_USELESS_TYPE_CONVERSION (newop1);
2157 STRIP_USELESS_TYPE_CONVERSION (newop2);
2158 gcc_checking_assert (is_gimple_val (newop1)
2159 && is_gimple_val (newop2));
03d37e4e 2160 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
5d6da7da 2161 curr->op = gimple_get_lhs (sum);
2162 }
2163 return true;
2164 }
2165
2166 return false;
2167}
dddf5036 2168
527d8479 2169
0d7ddd44 2170/* Transform repeated addition of same values into multiply with
2171 constant. */
2172static bool
527d8479 2173transform_add_to_multiply (vec<operand_entry *> *ops)
0d7ddd44 2174{
2175 operand_entry *oe;
2176 tree op = NULL_TREE;
2177 int j;
2178 int i, start = -1, end = 0, count = 0;
7e6da23d 2179 auto_vec<std::pair <int, int> > indxs;
0d7ddd44 2180 bool changed = false;
2181
2182 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
933b9f7f 2183 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2184 || !flag_unsafe_math_optimizations))
0d7ddd44 2185 return false;
2186
2187 /* Look for repeated operands. */
2188 FOR_EACH_VEC_ELT (*ops, i, oe)
2189 {
2190 if (start == -1)
2191 {
2192 count = 1;
2193 op = oe->op;
2194 start = i;
2195 }
2196 else if (operand_equal_p (oe->op, op, 0))
2197 {
2198 count++;
2199 end = i;
2200 }
2201 else
2202 {
2203 if (count > 1)
2204 indxs.safe_push (std::make_pair (start, end));
2205 count = 1;
2206 op = oe->op;
2207 start = i;
2208 }
2209 }
2210
2211 if (count > 1)
2212 indxs.safe_push (std::make_pair (start, end));
2213
2214 for (j = indxs.length () - 1; j >= 0; --j)
2215 {
2216 /* Convert repeated operand addition to multiplication. */
2217 start = indxs[j].first;
2218 end = indxs[j].second;
2219 op = (*ops)[start]->op;
2220 count = end - start + 1;
2221 for (i = end; i >= start; --i)
2222 ops->unordered_remove (i);
2223 tree tmp = make_ssa_name (TREE_TYPE (op));
2224 tree cst = build_int_cst (integer_type_node, count);
0d7ddd44 2225 gassign *mul_stmt
2226 = gimple_build_assign (tmp, MULT_EXPR,
2227 op, fold_convert (TREE_TYPE (op), cst));
0d7ddd44 2228 gimple_set_visited (mul_stmt, true);
527d8479 2229 add_to_ops_vec (ops, tmp, mul_stmt);
0d7ddd44 2230 changed = true;
2231 }
2232
2233 return changed;
2234}
2235
2236
54aceb26 2237/* Perform various identities and other optimizations on the list of
2238 operand entries, stored in OPS. The tree code for the binary
2239 operation between all the operands is OPCODE. */
3dec5460 2240
54aceb26 2241static void
2242optimize_ops_list (enum tree_code opcode,
04009ada 2243 vec<operand_entry *> *ops)
54aceb26 2244{
f1f41a6c 2245 unsigned int length = ops->length ();
54aceb26 2246 unsigned int i;
04009ada 2247 operand_entry *oe;
2248 operand_entry *oelast = NULL;
54aceb26 2249 bool iterate = false;
3dec5460 2250
54aceb26 2251 if (length == 1)
2252 return;
3dec5460 2253
f1f41a6c 2254 oelast = ops->last ();
3dec5460 2255
54aceb26 2256 /* If the last two are constants, pop the constants off, merge them
2257 and try the next two. */
2258 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2259 {
04009ada 2260 operand_entry *oelm1 = (*ops)[length - 2];
54aceb26 2261
2262 if (oelm1->rank == 0
2263 && is_gimple_min_invariant (oelm1->op)
c8ca3ee7 2264 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2265 TREE_TYPE (oelast->op)))
54aceb26 2266 {
5f9acd88 2267 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
54aceb26 2268 oelm1->op, oelast->op);
2269
5f9acd88 2270 if (folded && is_gimple_min_invariant (folded))
54aceb26 2271 {
2272 if (dump_file && (dump_flags & TDF_DETAILS))
2273 fprintf (dump_file, "Merging constants\n");
2274
f1f41a6c 2275 ops->pop ();
2276 ops->pop ();
54aceb26 2277
2278 add_to_ops_vec (ops, folded);
2279 reassociate_stats.constants_eliminated++;
2280
2281 optimize_ops_list (opcode, ops);
2282 return;
2283 }
2284 }
2285 }
2286
2287 eliminate_using_constants (opcode, ops);
2288 oelast = NULL;
2289
f1f41a6c 2290 for (i = 0; ops->iterate (i, &oe);)
54aceb26 2291 {
2292 bool done = false;
2293
2294 if (eliminate_not_pairs (opcode, ops, i, oe))
2295 return;
2296 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
5d6da7da 2297 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2298 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
54aceb26 2299 {
2300 if (done)
2301 return;
2302 iterate = true;
2303 oelast = NULL;
2304 continue;
2305 }
2306 oelast = oe;
2307 i++;
2308 }
2309
54aceb26 2310 if (iterate)
2311 optimize_ops_list (opcode, ops);
2312}
2313
946e9eb4 2314/* The following functions are subroutines to optimize_range_tests and allow
2315 it to try to change a logical combination of comparisons into a range
2316 test.
2317
2318 For example, both
2319 X == 2 || X == 5 || X == 3 || X == 4
2320 and
2321 X >= 2 && X <= 5
2322 are converted to
2323 (unsigned) (X - 2) <= 3
2324
2325 For more information see comments above fold_test_range in fold-const.c,
2326 this implementation is for GIMPLE. */
2327
2328struct range_entry
2329{
2330 tree exp;
2331 tree low;
2332 tree high;
2333 bool in_p;
2334 bool strict_overflow_p;
2335 unsigned int idx, next;
2336};
2337
2338/* This is similar to make_range in fold-const.c, but on top of
8a2c7744 2339 GIMPLE instead of trees. If EXP is non-NULL, it should be
2340 an SSA_NAME and STMT argument is ignored, otherwise STMT
2341 argument should be a GIMPLE_COND. */
946e9eb4 2342
2343static void
42acab1c 2344init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
946e9eb4 2345{
2346 int in_p;
2347 tree low, high;
2348 bool is_bool, strict_overflow_p;
2349
2350 r->exp = NULL_TREE;
2351 r->in_p = false;
2352 r->strict_overflow_p = false;
2353 r->low = NULL_TREE;
2354 r->high = NULL_TREE;
8a2c7744 2355 if (exp != NULL_TREE
2356 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
946e9eb4 2357 return;
2358
2359 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2360 and see if we can refine the range. Some of the cases below may not
2361 happen, but it doesn't seem worth worrying about this. We "continue"
2362 the outer loop when we've changed something; otherwise we "break"
2363 the switch, which will "break" the while. */
8a2c7744 2364 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
946e9eb4 2365 high = low;
2366 in_p = 0;
2367 strict_overflow_p = false;
2368 is_bool = false;
8a2c7744 2369 if (exp == NULL_TREE)
2370 is_bool = true;
2371 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
946e9eb4 2372 {
2373 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2374 is_bool = true;
2375 else
2376 return;
2377 }
2378 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2379 is_bool = true;
2380
2381 while (1)
2382 {
946e9eb4 2383 enum tree_code code;
2384 tree arg0, arg1, exp_type;
2385 tree nexp;
2386 location_t loc;
2387
8a2c7744 2388 if (exp != NULL_TREE)
2389 {
81cab242 2390 if (TREE_CODE (exp) != SSA_NAME
2391 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
8a2c7744 2392 break;
946e9eb4 2393
8a2c7744 2394 stmt = SSA_NAME_DEF_STMT (exp);
2395 if (!is_gimple_assign (stmt))
2396 break;
2397
2398 code = gimple_assign_rhs_code (stmt);
2399 arg0 = gimple_assign_rhs1 (stmt);
2400 arg1 = gimple_assign_rhs2 (stmt);
2401 exp_type = TREE_TYPE (exp);
2402 }
2403 else
2404 {
2405 code = gimple_cond_code (stmt);
2406 arg0 = gimple_cond_lhs (stmt);
2407 arg1 = gimple_cond_rhs (stmt);
2408 exp_type = boolean_type_node;
2409 }
946e9eb4 2410
a5189c16 2411 if (TREE_CODE (arg0) != SSA_NAME
2412 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
9adacac7 2413 break;
946e9eb4 2414 loc = gimple_location (stmt);
2415 switch (code)
2416 {
2417 case BIT_NOT_EXPR:
e423480a 2418 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2419 /* Ensure the range is either +[-,0], +[0,0],
2420 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2421 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2422 or similar expression of unconditional true or
2423 false, it should not be negated. */
2424 && ((high && integer_zerop (high))
2425 || (low && integer_onep (low))))
946e9eb4 2426 {
2427 in_p = !in_p;
2428 exp = arg0;
2429 continue;
2430 }
2431 break;
2432 case SSA_NAME:
2433 exp = arg0;
2434 continue;
2435 CASE_CONVERT:
2436 if (is_bool)
c7aed3df 2437 {
2438 if ((TYPE_PRECISION (exp_type) == 1
2439 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2440 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2441 return;
2442 }
2443 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
946e9eb4 2444 {
2445 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2446 is_bool = true;
2447 else
2448 return;
2449 }
2450 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2451 is_bool = true;
2452 goto do_default;
2453 case EQ_EXPR:
2454 case NE_EXPR:
2455 case LT_EXPR:
2456 case LE_EXPR:
2457 case GE_EXPR:
2458 case GT_EXPR:
2459 is_bool = true;
2460 /* FALLTHRU */
2461 default:
2462 if (!is_bool)
2463 return;
2464 do_default:
2465 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2466 &low, &high, &in_p,
2467 &strict_overflow_p);
2468 if (nexp != NULL_TREE)
2469 {
2470 exp = nexp;
2471 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2472 continue;
2473 }
2474 break;
2475 }
2476 break;
2477 }
2478 if (is_bool)
2479 {
2480 r->exp = exp;
2481 r->in_p = in_p;
2482 r->low = low;
2483 r->high = high;
2484 r->strict_overflow_p = strict_overflow_p;
2485 }
2486}
2487
2488/* Comparison function for qsort. Sort entries
2489 without SSA_NAME exp first, then with SSA_NAMEs sorted
2490 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2491 by increasing ->low and if ->low is the same, by increasing
2492 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2493 maximum. */
2494
2495static int
2496range_entry_cmp (const void *a, const void *b)
2497{
2498 const struct range_entry *p = (const struct range_entry *) a;
2499 const struct range_entry *q = (const struct range_entry *) b;
2500
2501 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2502 {
2503 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2504 {
2505 /* Group range_entries for the same SSA_NAME together. */
2506 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2507 return -1;
2508 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2509 return 1;
2510 /* If ->low is different, NULL low goes first, then by
2511 ascending low. */
2512 if (p->low != NULL_TREE)
2513 {
2514 if (q->low != NULL_TREE)
2515 {
2516 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2517 p->low, q->low);
2518 if (tem && integer_onep (tem))
2519 return -1;
2520 tem = fold_binary (GT_EXPR, boolean_type_node,
2521 p->low, q->low);
2522 if (tem && integer_onep (tem))
2523 return 1;
2524 }
2525 else
2526 return 1;
2527 }
2528 else if (q->low != NULL_TREE)
2529 return -1;
2530 /* If ->high is different, NULL high goes last, before that by
2531 ascending high. */
2532 if (p->high != NULL_TREE)
2533 {
2534 if (q->high != NULL_TREE)
2535 {
2536 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2537 p->high, q->high);
2538 if (tem && integer_onep (tem))
2539 return -1;
2540 tem = fold_binary (GT_EXPR, boolean_type_node,
2541 p->high, q->high);
2542 if (tem && integer_onep (tem))
2543 return 1;
2544 }
2545 else
2546 return -1;
2547 }
eff8617c 2548 else if (q->high != NULL_TREE)
946e9eb4 2549 return 1;
2550 /* If both ranges are the same, sort below by ascending idx. */
2551 }
2552 else
2553 return 1;
2554 }
2555 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2556 return -1;
2557
2558 if (p->idx < q->idx)
2559 return -1;
2560 else
2561 {
2562 gcc_checking_assert (p->idx > q->idx);
2563 return 1;
2564 }
2565}
2566
070dd4d4 2567/* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2568 insert needed statements BEFORE or after GSI. */
2569
2570static tree
2571force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2572{
2573 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2574 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2575 if (TREE_CODE (ret) != SSA_NAME)
2576 {
2577 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2578 if (before)
2579 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2580 else
2581 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2582 ret = gimple_assign_lhs (g);
2583 }
2584 return ret;
2585}
2586
946e9eb4 2587/* Helper routine of optimize_range_test.
2588 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2589 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
e3668db5 2590 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2591 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2592 an array of COUNT pointers to other ranges. Return
8a2c7744 2593 true if the range merge has been successful.
2594 If OPCODE is ERROR_MARK, this is called from within
2595 maybe_optimize_range_tests and is performing inter-bb range optimization.
82cb4e1c 2596 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2597 oe->rank. */
946e9eb4 2598
2599static bool
2600update_range_test (struct range_entry *range, struct range_entry *otherrange,
e3668db5 2601 struct range_entry **otherrangep,
946e9eb4 2602 unsigned int count, enum tree_code opcode,
04009ada 2603 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
e3668db5 2604 bool in_p, tree low, tree high, bool strict_overflow_p)
946e9eb4 2605{
04009ada 2606 operand_entry *oe = (*ops)[range->idx];
8a2c7744 2607 tree op = oe->op;
14a72c4e 2608 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2609 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
8a2c7744 2610 location_t loc = gimple_location (stmt);
2611 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
e3668db5 2612 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2613 in_p, low, high);
946e9eb4 2614 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2615 gimple_stmt_iterator gsi;
14a72c4e 2616 unsigned int i, uid;
946e9eb4 2617
2618 if (tem == NULL_TREE)
2619 return false;
2620
14a72c4e 2621 /* If op is default def SSA_NAME, there is no place to insert the
2622 new comparison. Give up, unless we can use OP itself as the
2623 range test. */
2624 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2625 {
2626 if (op == range->exp
2627 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2628 || TREE_CODE (optype) == BOOLEAN_TYPE)
2629 && (op == tem
2630 || (TREE_CODE (tem) == EQ_EXPR
2631 && TREE_OPERAND (tem, 0) == op
2632 && integer_onep (TREE_OPERAND (tem, 1))))
2633 && opcode != BIT_IOR_EXPR
2634 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2635 {
2636 stmt = NULL;
2637 tem = op;
2638 }
2639 else
2640 return false;
2641 }
2642
946e9eb4 2643 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2644 warning_at (loc, OPT_Wstrict_overflow,
2645 "assuming signed overflow does not occur "
2646 "when simplifying range test");
2647
2648 if (dump_file && (dump_flags & TDF_DETAILS))
2649 {
2650 struct range_entry *r;
2651 fprintf (dump_file, "Optimizing range tests ");
1ffa4346 2652 print_generic_expr (dump_file, range->exp);
946e9eb4 2653 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1ffa4346 2654 print_generic_expr (dump_file, range->low);
946e9eb4 2655 fprintf (dump_file, ", ");
1ffa4346 2656 print_generic_expr (dump_file, range->high);
946e9eb4 2657 fprintf (dump_file, "]");
e3668db5 2658 for (i = 0; i < count; i++)
946e9eb4 2659 {
e3668db5 2660 if (otherrange)
2661 r = otherrange + i;
2662 else
2663 r = otherrangep[i];
4c168df0 2664 if (r->exp
2665 && r->exp != range->exp
2666 && TREE_CODE (r->exp) == SSA_NAME)
2667 {
2668 fprintf (dump_file, " and ");
2669 print_generic_expr (dump_file, r->exp);
2670 }
2671 else
2672 fprintf (dump_file, " and");
2673 fprintf (dump_file, " %c[", r->in_p ? '+' : '-');
1ffa4346 2674 print_generic_expr (dump_file, r->low);
946e9eb4 2675 fprintf (dump_file, ", ");
1ffa4346 2676 print_generic_expr (dump_file, r->high);
946e9eb4 2677 fprintf (dump_file, "]");
2678 }
2679 fprintf (dump_file, "\n into ");
1ffa4346 2680 print_generic_expr (dump_file, tem);
946e9eb4 2681 fprintf (dump_file, "\n");
2682 }
2683
8a2c7744 2684 if (opcode == BIT_IOR_EXPR
2685 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
946e9eb4 2686 tem = invert_truthvalue_loc (loc, tem);
2687
8a2c7744 2688 tem = fold_convert_loc (loc, optype, tem);
14a72c4e 2689 if (stmt)
2690 {
2691 gsi = gsi_for_stmt (stmt);
2692 uid = gimple_uid (stmt);
2693 }
2694 else
2695 {
2696 gsi = gsi_none ();
2697 uid = 0;
2698 }
2699 if (stmt == NULL)
2700 gcc_checking_assert (tem == op);
a0ebffaa 2701 /* In rare cases range->exp can be equal to lhs of stmt.
2702 In that case we have to insert after the stmt rather then before
927d5076 2703 it. If stmt is a PHI, insert it at the start of the basic block. */
14a72c4e 2704 else if (op != range->exp)
927d5076 2705 {
2706 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
070dd4d4 2707 tem = force_into_ssa_name (&gsi, tem, true);
927d5076 2708 gsi_prev (&gsi);
2709 }
2710 else if (gimple_code (stmt) != GIMPLE_PHI)
e3668db5 2711 {
2712 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
070dd4d4 2713 tem = force_into_ssa_name (&gsi, tem, false);
e3668db5 2714 }
a0ebffaa 2715 else
2716 {
927d5076 2717 gsi = gsi_after_labels (gimple_bb (stmt));
2718 if (!gsi_end_p (gsi))
2719 uid = gimple_uid (gsi_stmt (gsi));
2720 else
2721 {
2722 gsi = gsi_start_bb (gimple_bb (stmt));
2723 uid = 1;
2724 while (!gsi_end_p (gsi))
2725 {
2726 uid = gimple_uid (gsi_stmt (gsi));
2727 gsi_next (&gsi);
2728 }
2729 }
e3668db5 2730 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
070dd4d4 2731 tem = force_into_ssa_name (&gsi, tem, true);
927d5076 2732 if (gsi_end_p (gsi))
2733 gsi = gsi_last_bb (gimple_bb (stmt));
2734 else
2735 gsi_prev (&gsi);
a0ebffaa 2736 }
2737 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
82cb4e1c 2738 if (gimple_uid (gsi_stmt (gsi)))
2739 break;
2740 else
927d5076 2741 gimple_set_uid (gsi_stmt (gsi), uid);
946e9eb4 2742
8a2c7744 2743 oe->op = tem;
946e9eb4 2744 range->exp = exp;
2745 range->low = low;
2746 range->high = high;
2747 range->in_p = in_p;
2748 range->strict_overflow_p = false;
2749
e3668db5 2750 for (i = 0; i < count; i++)
946e9eb4 2751 {
e3668db5 2752 if (otherrange)
2753 range = otherrange + i;
2754 else
2755 range = otherrangep[i];
f1f41a6c 2756 oe = (*ops)[range->idx];
8a2c7744 2757 /* Now change all the other range test immediate uses, so that
2758 those tests will be optimized away. */
2759 if (opcode == ERROR_MARK)
2760 {
2761 if (oe->op)
82cb4e1c 2762 oe->op = build_int_cst (TREE_TYPE (oe->op),
2763 oe->rank == BIT_IOR_EXPR ? 0 : 1);
8a2c7744 2764 else
82cb4e1c 2765 oe->op = (oe->rank == BIT_IOR_EXPR
2766 ? boolean_false_node : boolean_true_node);
8a2c7744 2767 }
82cb4e1c 2768 else
2769 oe->op = error_mark_node;
946e9eb4 2770 range->exp = NULL_TREE;
fafde080 2771 range->low = NULL_TREE;
2772 range->high = NULL_TREE;
946e9eb4 2773 }
2774 return true;
2775}
2776
b0c0c879 2777/* Optimize X == CST1 || X == CST2
2778 if popcount (CST1 ^ CST2) == 1 into
2779 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2780 Similarly for ranges. E.g.
2781 X != 2 && X != 3 && X != 10 && X != 11
2782 will be transformed by the previous optimization into
2783 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2784 and this loop can transform that into
2785 !(((X & ~8) - 2U) <= 1U). */
2786
2787static bool
2788optimize_range_tests_xor (enum tree_code opcode, tree type,
2789 tree lowi, tree lowj, tree highi, tree highj,
04009ada 2790 vec<operand_entry *> *ops,
b0c0c879 2791 struct range_entry *rangei,
2792 struct range_entry *rangej)
2793{
2794 tree lowxor, highxor, tem, exp;
e3668db5 2795 /* Check lowi ^ lowj == highi ^ highj and
2796 popcount (lowi ^ lowj) == 1. */
b0c0c879 2797 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2798 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2799 return false;
8016354a 2800 if (!integer_pow2p (lowxor))
b0c0c879 2801 return false;
2802 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2803 if (!tree_int_cst_equal (lowxor, highxor))
2804 return false;
2805
fe7507dc 2806 exp = rangei->exp;
2807 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2808 int prec = GET_MODE_PRECISION (mode);
2809 if (TYPE_PRECISION (type) < prec
2810 || (wi::to_wide (TYPE_MIN_VALUE (type))
2811 != wi::min_value (prec, TYPE_SIGN (type)))
2812 || (wi::to_wide (TYPE_MAX_VALUE (type))
2813 != wi::max_value (prec, TYPE_SIGN (type))))
2814 {
2815 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
2816 exp = fold_convert (type, exp);
2817 lowxor = fold_convert (type, lowxor);
2818 lowi = fold_convert (type, lowi);
2819 highi = fold_convert (type, highi);
2820 }
b0c0c879 2821 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
fe7507dc 2822 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
b0c0c879 2823 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2824 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
e3668db5 2825 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2826 NULL, rangei->in_p, lowj, highj,
b0c0c879 2827 rangei->strict_overflow_p
2828 || rangej->strict_overflow_p))
2829 return true;
2830 return false;
2831}
2832
2833/* Optimize X == CST1 || X == CST2
2834 if popcount (CST2 - CST1) == 1 into
2835 ((X - CST1) & ~(CST2 - CST1)) == 0.
2836 Similarly for ranges. E.g.
2837 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2838 || X == 75 || X == 45
2839 will be transformed by the previous optimization into
2840 (X - 43U) <= 3U || (X - 75U) <= 3U
2841 and this loop can transform that into
2842 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2843static bool
2844optimize_range_tests_diff (enum tree_code opcode, tree type,
fafde080 2845 tree lowi, tree lowj, tree highi, tree highj,
2846 vec<operand_entry *> *ops,
2847 struct range_entry *rangei,
2848 struct range_entry *rangej)
b0c0c879 2849{
2850 tree tem1, tem2, mask;
2851 /* Check highi - lowi == highj - lowj. */
2852 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2853 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2854 return false;
2855 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2856 if (!tree_int_cst_equal (tem1, tem2))
2857 return false;
2858 /* Check popcount (lowj - lowi) == 1. */
2859 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2860 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2861 return false;
8016354a 2862 if (!integer_pow2p (tem1))
b0c0c879 2863 return false;
2864
fe7507dc 2865 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2866 int prec = GET_MODE_PRECISION (mode);
2867 if (TYPE_PRECISION (type) < prec
2868 || (wi::to_wide (TYPE_MIN_VALUE (type))
2869 != wi::min_value (prec, TYPE_SIGN (type)))
2870 || (wi::to_wide (TYPE_MAX_VALUE (type))
2871 != wi::max_value (prec, TYPE_SIGN (type))))
2872 type = build_nonstandard_integer_type (prec, 1);
2873 else
2874 type = unsigned_type_for (type);
db7164e3 2875 tem1 = fold_convert (type, tem1);
2876 tem2 = fold_convert (type, tem2);
2877 lowi = fold_convert (type, lowi);
b0c0c879 2878 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
da78c088 2879 tem1 = fold_build2 (MINUS_EXPR, type,
db7164e3 2880 fold_convert (type, rangei->exp), lowi);
b0c0c879 2881 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2882 lowj = build_int_cst (type, 0);
e3668db5 2883 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2884 NULL, rangei->in_p, lowj, tem2,
b0c0c879 2885 rangei->strict_overflow_p
2886 || rangej->strict_overflow_p))
2887 return true;
2888 return false;
2889}
2890
2891/* It does some common checks for function optimize_range_tests_xor and
2892 optimize_range_tests_diff.
2893 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2894 Else it calls optimize_range_tests_diff. */
2895
2896static bool
2897optimize_range_tests_1 (enum tree_code opcode, int first, int length,
04009ada 2898 bool optimize_xor, vec<operand_entry *> *ops,
b0c0c879 2899 struct range_entry *ranges)
2900{
2901 int i, j;
2902 bool any_changes = false;
2903 for (i = first; i < length; i++)
2904 {
2905 tree lowi, highi, lowj, highj, type, tem;
2906
2907 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2908 continue;
2909 type = TREE_TYPE (ranges[i].exp);
2910 if (!INTEGRAL_TYPE_P (type))
2911 continue;
2912 lowi = ranges[i].low;
2913 if (lowi == NULL_TREE)
2914 lowi = TYPE_MIN_VALUE (type);
2915 highi = ranges[i].high;
2916 if (highi == NULL_TREE)
2917 continue;
2918 for (j = i + 1; j < length && j < i + 64; j++)
2919 {
2920 bool changes;
2921 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2922 continue;
2923 lowj = ranges[j].low;
2924 if (lowj == NULL_TREE)
2925 continue;
2926 highj = ranges[j].high;
2927 if (highj == NULL_TREE)
2928 highj = TYPE_MAX_VALUE (type);
2929 /* Check lowj > highi. */
2930 tem = fold_binary (GT_EXPR, boolean_type_node,
2931 lowj, highi);
2932 if (tem == NULL_TREE || !integer_onep (tem))
2933 continue;
2934 if (optimize_xor)
2935 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2936 highi, highj, ops,
2937 ranges + i, ranges + j);
2938 else
2939 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2940 highi, highj, ops,
2941 ranges + i, ranges + j);
2942 if (changes)
2943 {
2944 any_changes = true;
2945 break;
2946 }
2947 }
2948 }
2949 return any_changes;
2950}
2951
e3668db5 2952/* Helper function of optimize_range_tests_to_bit_test. Handle a single
2953 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2954 EXP on success, NULL otherwise. */
2955
2956static tree
2957extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2958 wide_int *mask, tree *totallowp)
2959{
2960 tree tem = int_const_binop (MINUS_EXPR, high, low);
2961 if (tem == NULL_TREE
2962 || TREE_CODE (tem) != INTEGER_CST
2963 || TREE_OVERFLOW (tem)
2964 || tree_int_cst_sgn (tem) == -1
2965 || compare_tree_int (tem, prec) != -1)
2966 return NULL_TREE;
2967
2968 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2969 *mask = wi::shifted_mask (0, max, false, prec);
2970 if (TREE_CODE (exp) == BIT_AND_EXPR
2971 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2972 {
2973 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2974 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2975 if (wi::popcount (msk) == 1
2976 && wi::ltu_p (msk, prec - max))
2977 {
2978 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2979 max += msk.to_uhwi ();
2980 exp = TREE_OPERAND (exp, 0);
2981 if (integer_zerop (low)
2982 && TREE_CODE (exp) == PLUS_EXPR
2983 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2984 {
f6835415 2985 tree ret = TREE_OPERAND (exp, 0);
2986 STRIP_NOPS (ret);
e3668db5 2987 widest_int bias
2988 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2989 TYPE_PRECISION (TREE_TYPE (low))));
f6835415 2990 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
e3668db5 2991 if (totallowp)
2992 {
2993 *totallowp = tbias;
f6835415 2994 return ret;
e3668db5 2995 }
2996 else if (!tree_int_cst_lt (totallow, tbias))
2997 return NULL_TREE;
f6835415 2998 bias = wi::to_widest (tbias);
e3668db5 2999 bias -= wi::to_widest (totallow);
32115eac 3000 if (bias >= 0 && bias < prec - max)
e3668db5 3001 {
3002 *mask = wi::lshift (*mask, bias);
f6835415 3003 return ret;
e3668db5 3004 }
3005 }
3006 }
3007 }
3008 if (totallowp)
3009 return exp;
3010 if (!tree_int_cst_lt (totallow, low))
3011 return exp;
3012 tem = int_const_binop (MINUS_EXPR, low, totallow);
3013 if (tem == NULL_TREE
3014 || TREE_CODE (tem) != INTEGER_CST
3015 || TREE_OVERFLOW (tem)
3016 || compare_tree_int (tem, prec - max) == 1)
3017 return NULL_TREE;
3018
3019 *mask = wi::lshift (*mask, wi::to_widest (tem));
3020 return exp;
3021}
3022
3023/* Attempt to optimize small range tests using bit test.
3024 E.g.
3025 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3026 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3027 has been by earlier optimizations optimized into:
3028 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3029 As all the 43 through 82 range is less than 64 numbers,
3030 for 64-bit word targets optimize that into:
3031 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3032
3033static bool
3034optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
04009ada 3035 vec<operand_entry *> *ops,
e3668db5 3036 struct range_entry *ranges)
3037{
3038 int i, j;
3039 bool any_changes = false;
3040 int prec = GET_MODE_BITSIZE (word_mode);
3041 auto_vec<struct range_entry *, 64> candidates;
3042
3043 for (i = first; i < length - 2; i++)
3044 {
3045 tree lowi, highi, lowj, highj, type;
3046
3047 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3048 continue;
3049 type = TREE_TYPE (ranges[i].exp);
3050 if (!INTEGRAL_TYPE_P (type))
3051 continue;
3052 lowi = ranges[i].low;
3053 if (lowi == NULL_TREE)
3054 lowi = TYPE_MIN_VALUE (type);
3055 highi = ranges[i].high;
3056 if (highi == NULL_TREE)
3057 continue;
3058 wide_int mask;
3059 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3060 highi, &mask, &lowi);
3061 if (exp == NULL_TREE)
3062 continue;
3063 bool strict_overflow_p = ranges[i].strict_overflow_p;
3064 candidates.truncate (0);
3065 int end = MIN (i + 64, length);
3066 for (j = i + 1; j < end; j++)
3067 {
3068 tree exp2;
3069 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3070 continue;
3071 if (ranges[j].exp == exp)
3072 ;
3073 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3074 {
3075 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3076 if (exp2 == exp)
3077 ;
3078 else if (TREE_CODE (exp2) == PLUS_EXPR)
3079 {
3080 exp2 = TREE_OPERAND (exp2, 0);
3081 STRIP_NOPS (exp2);
3082 if (exp2 != exp)
3083 continue;
3084 }
3085 else
3086 continue;
3087 }
3088 else
3089 continue;
3090 lowj = ranges[j].low;
3091 if (lowj == NULL_TREE)
3092 continue;
3093 highj = ranges[j].high;
3094 if (highj == NULL_TREE)
3095 highj = TYPE_MAX_VALUE (type);
3096 wide_int mask2;
3097 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3098 highj, &mask2, NULL);
3099 if (exp2 != exp)
3100 continue;
3101 mask |= mask2;
3102 strict_overflow_p |= ranges[j].strict_overflow_p;
3103 candidates.safe_push (&ranges[j]);
3104 }
3105
3106 /* If we need otherwise 3 or more comparisons, use a bit test. */
3107 if (candidates.length () >= 2)
3108 {
3109 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3110 wi::to_widest (lowi)
8f936b5e 3111 + prec - 1 - wi::clz (mask));
04009ada 3112 operand_entry *oe = (*ops)[ranges[i].idx];
e3668db5 3113 tree op = oe->op;
42acab1c 3114 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
14a72c4e 3115 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
e3668db5 3116 location_t loc = gimple_location (stmt);
3117 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3118
3119 /* See if it isn't cheaper to pretend the minimum value of the
3120 range is 0, if maximum value is small enough.
3121 We can avoid then subtraction of the minimum value, but the
3122 mask constant could be perhaps more expensive. */
3123 if (compare_tree_int (lowi, 0) > 0
3124 && compare_tree_int (high, prec) < 0)
3125 {
3126 int cost_diff;
3127 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3128 rtx reg = gen_raw_REG (word_mode, 10000);
3129 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3130 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
3131 GEN_INT (-m)), speed_p);
3132 rtx r = immed_wide_int_const (mask, word_mode);
3133 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
5ae4887d 3134 word_mode, speed_p);
e3668db5 3135 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3136 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
5ae4887d 3137 word_mode, speed_p);
e3668db5 3138 if (cost_diff > 0)
3139 {
3140 mask = wi::lshift (mask, m);
3141 lowi = build_zero_cst (TREE_TYPE (lowi));
3142 }
3143 }
3144
3145 tree tem = build_range_check (loc, optype, unshare_expr (exp),
3146 false, lowi, high);
3147 if (tem == NULL_TREE || is_gimple_val (tem))
3148 continue;
3149 tree etype = unsigned_type_for (TREE_TYPE (exp));
3150 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3151 fold_convert_loc (loc, etype, exp),
3152 fold_convert_loc (loc, etype, lowi));
3153 exp = fold_convert_loc (loc, integer_type_node, exp);
3154 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3155 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3156 build_int_cst (word_type, 1), exp);
3157 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3158 wide_int_to_tree (word_type, mask));
3159 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3160 build_zero_cst (word_type));
3161 if (is_gimple_val (exp))
3162 continue;
3163
3164 /* The shift might have undefined behavior if TEM is true,
3165 but reassociate_bb isn't prepared to have basic blocks
3166 split when it is running. So, temporarily emit a code
3167 with BIT_IOR_EXPR instead of &&, and fix it up in
3168 branch_fixup. */
3169 gimple_seq seq;
3170 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3171 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3172 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3173 gimple_seq seq2;
3174 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3175 gimple_seq_add_seq_without_update (&seq, seq2);
3176 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3177 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
42acab1c 3178 gimple *g = gimple_build_assign (make_ssa_name (optype),
14a72c4e 3179 BIT_IOR_EXPR, tem, exp);
e3668db5 3180 gimple_set_location (g, loc);
3181 gimple_seq_add_stmt_without_update (&seq, g);
3182 exp = gimple_assign_lhs (g);
3183 tree val = build_zero_cst (optype);
3184 if (update_range_test (&ranges[i], NULL, candidates.address (),
3185 candidates.length (), opcode, ops, exp,
3186 seq, false, val, val, strict_overflow_p))
3187 {
3188 any_changes = true;
3189 reassoc_branch_fixups.safe_push (tem);
3190 }
3191 else
3192 gimple_seq_discard (seq);
3193 }
3194 }
3195 return any_changes;
3196}
3197
4c168df0 3198/* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3199 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */
3200
3201static bool
3202optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3203 vec<operand_entry *> *ops,
3204 struct range_entry *ranges)
3205{
3206 int i;
3207 unsigned int b;
3208 bool any_changes = false;
3209 auto_vec<int, 128> buckets;
3210 auto_vec<int, 32> chains;
3211 auto_vec<struct range_entry *, 32> candidates;
3212
3213 for (i = first; i < length; i++)
3214 {
3215 if (ranges[i].exp == NULL_TREE
3216 || TREE_CODE (ranges[i].exp) != SSA_NAME
3217 || !ranges[i].in_p
3218 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3219 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
3220 || ranges[i].low == NULL_TREE
3221 || ranges[i].low != ranges[i].high)
3222 continue;
3223
3224 bool zero_p = integer_zerop (ranges[i].low);
3225 if (!zero_p && !integer_all_onesp (ranges[i].low))
3226 continue;
3227
3228 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
3229 if (buckets.length () <= b)
3230 buckets.safe_grow_cleared (b + 1);
3231 if (chains.length () <= (unsigned) i)
3232 chains.safe_grow (i + 1);
3233 chains[i] = buckets[b];
3234 buckets[b] = i + 1;
3235 }
3236
3237 FOR_EACH_VEC_ELT (buckets, b, i)
3238 if (i && chains[i - 1])
3239 {
3240 int j, k = i;
3241 for (j = chains[i - 1]; j; j = chains[j - 1])
3242 {
3243 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3244 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3245 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3246 k = j;
3247 }
3248 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3249 tree type2 = NULL_TREE;
3250 bool strict_overflow_p = false;
3251 candidates.truncate (0);
3252 for (j = i; j; j = chains[j - 1])
3253 {
3254 tree type = TREE_TYPE (ranges[j - 1].exp);
3255 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3256 if (j == k
3257 || useless_type_conversion_p (type1, type))
3258 ;
3259 else if (type2 == NULL_TREE
3260 || useless_type_conversion_p (type2, type))
3261 {
3262 if (type2 == NULL_TREE)
3263 type2 = type;
3264 candidates.safe_push (&ranges[j - 1]);
3265 }
3266 }
3267 unsigned l = candidates.length ();
3268 for (j = i; j; j = chains[j - 1])
3269 {
3270 tree type = TREE_TYPE (ranges[j - 1].exp);
3271 if (j == k)
3272 continue;
3273 if (useless_type_conversion_p (type1, type))
3274 ;
3275 else if (type2 == NULL_TREE
3276 || useless_type_conversion_p (type2, type))
3277 continue;
3278 candidates.safe_push (&ranges[j - 1]);
3279 }
3280 gimple_seq seq = NULL;
3281 tree op = NULL_TREE;
3282 unsigned int id;
3283 struct range_entry *r;
3284 candidates.safe_push (&ranges[k - 1]);
3285 FOR_EACH_VEC_ELT (candidates, id, r)
3286 {
3287 gimple *g;
3288 if (id == 0)
3289 {
3290 op = r->exp;
3291 continue;
3292 }
3293 if (id == l)
3294 {
3295 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3296 gimple_seq_add_stmt_without_update (&seq, g);
3297 op = gimple_assign_lhs (g);
3298 }
3299 tree type = TREE_TYPE (r->exp);
3300 tree exp = r->exp;
3301 if (id >= l && !useless_type_conversion_p (type1, type))
3302 {
3303 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3304 gimple_seq_add_stmt_without_update (&seq, g);
3305 exp = gimple_assign_lhs (g);
3306 }
3307 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3308 (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
3309 op, exp);
3310 gimple_seq_add_stmt_without_update (&seq, g);
3311 op = gimple_assign_lhs (g);
3312 }
3313 candidates.pop ();
3314 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3315 candidates.length (), opcode, ops, op,
3316 seq, true, ranges[k - 1].low,
3317 ranges[k - 1].low, strict_overflow_p))
3318 any_changes = true;
3319 else
3320 gimple_seq_discard (seq);
3321 }
3322
3323 return any_changes;
3324}
3325
fafde080 3326/* Attempt to optimize for signed a and b where b is known to be >= 0:
3327 a >= 0 && a < b into (unsigned) a < (unsigned) b
3328 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3329
3330static bool
3331optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3332 vec<operand_entry *> *ops,
54ec8b11 3333 struct range_entry *ranges,
3334 basic_block first_bb)
fafde080 3335{
3336 int i;
3337 bool any_changes = false;
3338 hash_map<tree, int> *map = NULL;
3339
3340 for (i = first; i < length; i++)
3341 {
b4d09fc1 3342 if (ranges[i].exp == NULL_TREE
3343 || TREE_CODE (ranges[i].exp) != SSA_NAME
3344 || !ranges[i].in_p)
fafde080 3345 continue;
3346
3347 tree type = TREE_TYPE (ranges[i].exp);
3348 if (!INTEGRAL_TYPE_P (type)
3349 || TYPE_UNSIGNED (type)
3350 || ranges[i].low == NULL_TREE
3351 || !integer_zerop (ranges[i].low)
3352 || ranges[i].high != NULL_TREE)
3353 continue;
3354 /* EXP >= 0 here. */
3355 if (map == NULL)
3356 map = new hash_map <tree, int>;
3357 map->put (ranges[i].exp, i);
3358 }
3359
3360 if (map == NULL)
3361 return false;
3362
3363 for (i = 0; i < length; i++)
3364 {
493a1c52 3365 bool in_p = ranges[i].in_p;
fafde080 3366 if (ranges[i].low == NULL_TREE
493a1c52 3367 || ranges[i].high == NULL_TREE)
fafde080 3368 continue;
493a1c52 3369 if (!integer_zerop (ranges[i].low)
3370 || !integer_zerop (ranges[i].high))
3371 {
3372 if (ranges[i].exp
3373 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3374 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3375 && integer_onep (ranges[i].low)
3376 && integer_onep (ranges[i].high))
3377 in_p = !in_p;
3378 else
3379 continue;
3380 }
fafde080 3381
3382 gimple *stmt;
3383 tree_code ccode;
3384 tree rhs1, rhs2;
3385 if (ranges[i].exp)
3386 {
b4d09fc1 3387 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3388 continue;
fafde080 3389 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3390 if (!is_gimple_assign (stmt))
3391 continue;
3392 ccode = gimple_assign_rhs_code (stmt);
3393 rhs1 = gimple_assign_rhs1 (stmt);
3394 rhs2 = gimple_assign_rhs2 (stmt);
3395 }
3396 else
3397 {
3398 operand_entry *oe = (*ops)[ranges[i].idx];
3399 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3400 if (gimple_code (stmt) != GIMPLE_COND)
3401 continue;
3402 ccode = gimple_cond_code (stmt);
3403 rhs1 = gimple_cond_lhs (stmt);
3404 rhs2 = gimple_cond_rhs (stmt);
3405 }
3406
3407 if (TREE_CODE (rhs1) != SSA_NAME
3408 || rhs2 == NULL_TREE
3409 || TREE_CODE (rhs2) != SSA_NAME)
3410 continue;
3411
3412 switch (ccode)
3413 {
3414 case GT_EXPR:
3415 case GE_EXPR:
42fad061 3416 case LT_EXPR:
3417 case LE_EXPR:
3418 break;
3419 default:
3420 continue;
3421 }
493a1c52 3422 if (in_p)
42fad061 3423 ccode = invert_tree_comparison (ccode, false);
3424 switch (ccode)
3425 {
3426 case GT_EXPR:
3427 case GE_EXPR:
3428 std::swap (rhs1, rhs2);
fafde080 3429 ccode = swap_tree_comparison (ccode);
3430 break;
3431 case LT_EXPR:
3432 case LE_EXPR:
fafde080 3433 break;
3434 default:
42fad061 3435 gcc_unreachable ();
fafde080 3436 }
3437
3438 int *idx = map->get (rhs1);
3439 if (idx == NULL)
3440 continue;
3441
54ec8b11 3442 /* maybe_optimize_range_tests allows statements without side-effects
3443 in the basic blocks as long as they are consumed in the same bb.
3444 Make sure rhs2's def stmt is not among them, otherwise we can't
3445 use safely get_nonzero_bits on it. E.g. in:
3446 # RANGE [-83, 1] NONZERO 173
3447 # k_32 = PHI <k_47(13), k_12(9)>
3448 ...
3449 if (k_32 >= 0)
3450 goto <bb 5>; [26.46%]
3451 else
3452 goto <bb 9>; [73.54%]
3453
3454 <bb 5> [local count: 140323371]:
3455 # RANGE [0, 1] NONZERO 1
3456 _5 = (int) k_32;
3457 # RANGE [0, 4] NONZERO 4
3458 _21 = _5 << 2;
3459 # RANGE [0, 4] NONZERO 4
3460 iftmp.0_44 = (char) _21;
3461 if (k_32 < iftmp.0_44)
3462 goto <bb 6>; [84.48%]
3463 else
3464 goto <bb 9>; [15.52%]
3465 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3466 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3467 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3468 those stmts even for negative k_32 and the value ranges would be no
3469 longer guaranteed and so the optimization would be invalid. */
ccf8d652 3470 while (opcode == ERROR_MARK)
54ec8b11 3471 {
3472 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3473 basic_block bb2 = gimple_bb (g);
3474 if (bb2
3475 && bb2 != first_bb
3476 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3477 {
3478 /* As an exception, handle a few common cases. */
3479 if (gimple_assign_cast_p (g)
ccf8d652 3480 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3481 {
3482 tree op0 = gimple_assign_rhs1 (g);
3483 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3484 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3485 > TYPE_PRECISION (TREE_TYPE (op0))))
3486 /* Zero-extension is always ok. */
3487 break;
3488 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3489 == TYPE_PRECISION (TREE_TYPE (op0))
3490 && TREE_CODE (op0) == SSA_NAME)
3491 {
3492 /* Cast from signed to unsigned or vice versa. Retry
3493 with the op0 as new rhs2. */
3494 rhs2 = op0;
3495 continue;
3496 }
3497 }
54ec8b11 3498 else if (is_gimple_assign (g)
3499 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3500 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3501 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3502 /* Masking with INTEGER_CST with MSB clear is always ok
ccf8d652 3503 too. */
3504 break;
3505 rhs2 = NULL_TREE;
54ec8b11 3506 }
ccf8d652 3507 break;
54ec8b11 3508 }
ccf8d652 3509 if (rhs2 == NULL_TREE)
3510 continue;
54ec8b11 3511
fafde080 3512 wide_int nz = get_nonzero_bits (rhs2);
3513 if (wi::neg_p (nz))
3514 continue;
3515
3516 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3517 and RHS2 is known to be RHS2 >= 0. */
3518 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3519
3520 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3521 if ((ranges[*idx].strict_overflow_p
3522 || ranges[i].strict_overflow_p)
3523 && issue_strict_overflow_warning (wc))
3524 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3525 "assuming signed overflow does not occur "
3526 "when simplifying range test");
3527
3528 if (dump_file && (dump_flags & TDF_DETAILS))
3529 {
3530 struct range_entry *r = &ranges[*idx];
3531 fprintf (dump_file, "Optimizing range test ");
1ffa4346 3532 print_generic_expr (dump_file, r->exp);
fafde080 3533 fprintf (dump_file, " +[");
1ffa4346 3534 print_generic_expr (dump_file, r->low);
fafde080 3535 fprintf (dump_file, ", ");
1ffa4346 3536 print_generic_expr (dump_file, r->high);
fafde080 3537 fprintf (dump_file, "] and comparison ");
1ffa4346 3538 print_generic_expr (dump_file, rhs1);
fafde080 3539 fprintf (dump_file, " %s ", op_symbol_code (ccode));
1ffa4346 3540 print_generic_expr (dump_file, rhs2);
fafde080 3541 fprintf (dump_file, "\n into (");
1ffa4346 3542 print_generic_expr (dump_file, utype);
fafde080 3543 fprintf (dump_file, ") ");
1ffa4346 3544 print_generic_expr (dump_file, rhs1);
fafde080 3545 fprintf (dump_file, " %s (", op_symbol_code (ccode));
1ffa4346 3546 print_generic_expr (dump_file, utype);
fafde080 3547 fprintf (dump_file, ") ");
1ffa4346 3548 print_generic_expr (dump_file, rhs2);
fafde080 3549 fprintf (dump_file, "\n");
3550 }
3551
42fad061 3552 operand_entry *oe = (*ops)[ranges[i].idx];
3553 ranges[i].in_p = 0;
3554 if (opcode == BIT_IOR_EXPR
3555 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3556 {
3557 ranges[i].in_p = 1;
3558 ccode = invert_tree_comparison (ccode, false);
3559 }
fafde080 3560
3561 unsigned int uid = gimple_uid (stmt);
3562 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3563 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3564 gimple_set_uid (g, uid);
3565 rhs1 = gimple_assign_lhs (g);
3566 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
ccf8d652 3567 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3568 {
3569 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3570 gimple_set_uid (g, uid);
3571 rhs2 = gimple_assign_lhs (g);
3572 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3573 }
48baf518 3574 if (tree_swap_operands_p (rhs1, rhs2))
fafde080 3575 {
3576 std::swap (rhs1, rhs2);
3577 ccode = swap_tree_comparison (ccode);
3578 }
3579 if (gimple_code (stmt) == GIMPLE_COND)
3580 {
3581 gcond *c = as_a <gcond *> (stmt);
3582 gimple_cond_set_code (c, ccode);
3583 gimple_cond_set_lhs (c, rhs1);
3584 gimple_cond_set_rhs (c, rhs2);
3585 update_stmt (stmt);
3586 }
3587 else
3588 {
221d7858 3589 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3590 if (!INTEGRAL_TYPE_P (ctype)
3591 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3592 && TYPE_PRECISION (ctype) != 1))
3593 ctype = boolean_type_node;
3594 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
fafde080 3595 gimple_set_uid (g, uid);
3596 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
221d7858 3597 if (oe->op && ctype != TREE_TYPE (oe->op))
3598 {
3599 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3600 NOP_EXPR, gimple_assign_lhs (g));
3601 gimple_set_uid (g, uid);
3602 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3603 }
fafde080 3604 ranges[i].exp = gimple_assign_lhs (g);
221d7858 3605 oe->op = ranges[i].exp;
3606 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3607 ranges[i].high = ranges[i].low;
fafde080 3608 }
3609 ranges[i].strict_overflow_p = false;
42fad061 3610 oe = (*ops)[ranges[*idx].idx];
fafde080 3611 /* Now change all the other range test immediate uses, so that
3612 those tests will be optimized away. */
3613 if (opcode == ERROR_MARK)
3614 {
3615 if (oe->op)
3616 oe->op = build_int_cst (TREE_TYPE (oe->op),
3617 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3618 else
3619 oe->op = (oe->rank == BIT_IOR_EXPR
3620 ? boolean_false_node : boolean_true_node);
3621 }
3622 else
3623 oe->op = error_mark_node;
3624 ranges[*idx].exp = NULL_TREE;
3625 ranges[*idx].low = NULL_TREE;
3626 ranges[*idx].high = NULL_TREE;
3627 any_changes = true;
3628 }
3629
3630 delete map;
3631 return any_changes;
3632}
3633
946e9eb4 3634/* Optimize range tests, similarly how fold_range_test optimizes
3635 it on trees. The tree code for the binary
8a2c7744 3636 operation between all the operands is OPCODE.
3637 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3638 maybe_optimize_range_tests for inter-bb range optimization.
3639 In that case if oe->op is NULL, oe->id is bb->index whose
3640 GIMPLE_COND is && or ||ed into the test, and oe->rank says
54ec8b11 3641 the actual opcode.
3642 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
946e9eb4 3643
82cb4e1c 3644static bool
946e9eb4 3645optimize_range_tests (enum tree_code opcode,
54ec8b11 3646 vec<operand_entry *> *ops, basic_block first_bb)
946e9eb4 3647{
f1f41a6c 3648 unsigned int length = ops->length (), i, j, first;
04009ada 3649 operand_entry *oe;
946e9eb4 3650 struct range_entry *ranges;
3651 bool any_changes = false;
3652
3653 if (length == 1)
82cb4e1c 3654 return false;
946e9eb4 3655
3656 ranges = XNEWVEC (struct range_entry, length);
3657 for (i = 0; i < length; i++)
3658 {
f1f41a6c 3659 oe = (*ops)[i];
946e9eb4 3660 ranges[i].idx = i;
8a2c7744 3661 init_range_entry (ranges + i, oe->op,
14a72c4e 3662 oe->op
3663 ? NULL
3664 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
946e9eb4 3665 /* For | invert it now, we will invert it again before emitting
3666 the optimized expression. */
8a2c7744 3667 if (opcode == BIT_IOR_EXPR
3668 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
946e9eb4 3669 ranges[i].in_p = !ranges[i].in_p;
3670 }
3671
3672 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3673 for (i = 0; i < length; i++)
3674 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3675 break;
3676
3677 /* Try to merge ranges. */
3678 for (first = i; i < length; i++)
3679 {
3680 tree low = ranges[i].low;
3681 tree high = ranges[i].high;
3682 int in_p = ranges[i].in_p;
3683 bool strict_overflow_p = ranges[i].strict_overflow_p;
3684 int update_fail_count = 0;
3685
3686 for (j = i + 1; j < length; j++)
3687 {
3688 if (ranges[i].exp != ranges[j].exp)
3689 break;
3690 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3691 ranges[j].in_p, ranges[j].low, ranges[j].high))
3692 break;
3693 strict_overflow_p |= ranges[j].strict_overflow_p;
3694 }
3695
3696 if (j == i + 1)
3697 continue;
3698
e3668db5 3699 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3700 opcode, ops, ranges[i].exp, NULL, in_p,
3701 low, high, strict_overflow_p))
946e9eb4 3702 {
3703 i = j - 1;
3704 any_changes = true;
3705 }
3706 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3707 while update_range_test would fail. */
3708 else if (update_fail_count == 64)
3709 i = j - 1;
3710 else
3711 ++update_fail_count;
3712 }
3713
b0c0c879 3714 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3715 ops, ranges);
946e9eb4 3716
b0c0c879 3717 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3718 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3719 ops, ranges);
e3668db5 3720 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3721 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3722 ops, ranges);
4c168df0 3723 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3724 ops, ranges);
fafde080 3725 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
54ec8b11 3726 ranges, first_bb);
946e9eb4 3727
8a2c7744 3728 if (any_changes && opcode != ERROR_MARK)
946e9eb4 3729 {
3730 j = 0;
f1f41a6c 3731 FOR_EACH_VEC_ELT (*ops, i, oe)
946e9eb4 3732 {
3733 if (oe->op == error_mark_node)
3734 continue;
3735 else if (i != j)
f1f41a6c 3736 (*ops)[j] = oe;
946e9eb4 3737 j++;
3738 }
f1f41a6c 3739 ops->truncate (j);
946e9eb4 3740 }
3741
3742 XDELETEVEC (ranges);
82cb4e1c 3743 return any_changes;
946e9eb4 3744}
3745
026f2137 3746/* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3747 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3748 otherwise the comparison code. */
3749
3750static tree_code
3751ovce_extract_ops (tree var, gassign **rets, bool *reti)
3752{
3753 if (TREE_CODE (var) != SSA_NAME)
3754 return ERROR_MARK;
3755
3756 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3757 if (stmt == NULL)
3758 return ERROR_MARK;
3759
3760 /* ??? If we start creating more COND_EXPR, we could perform
3761 this same optimization with them. For now, simplify. */
3762 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3763 return ERROR_MARK;
3764
3765 tree cond = gimple_assign_rhs1 (stmt);
3766 tree_code cmp = TREE_CODE (cond);
3767 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3768 return ERROR_MARK;
3769
3770 /* ??? For now, allow only canonical true and false result vectors.
3771 We could expand this to other constants should the need arise,
3772 but at the moment we don't create them. */
3773 tree t = gimple_assign_rhs2 (stmt);
3774 tree f = gimple_assign_rhs3 (stmt);
3775 bool inv;
3776 if (integer_all_onesp (t))
3777 inv = false;
3778 else if (integer_all_onesp (f))
3779 {
3780 cmp = invert_tree_comparison (cmp, false);
3781 inv = true;
3782 }
3783 else
3784 return ERROR_MARK;
3785 if (!integer_zerop (f))
3786 return ERROR_MARK;
3787
3788 /* Success! */
3789 if (rets)
3790 *rets = stmt;
3791 if (reti)
3792 *reti = inv;
3793 return cmp;
3794}
3795
3796/* Optimize the condition of VEC_COND_EXPRs which have been combined
3797 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3798
3799static bool
3800optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3801{
3802 unsigned int length = ops->length (), i, j;
3803 bool any_changes = false;
3804
3805 if (length == 1)
3806 return false;
3807
3808 for (i = 0; i < length; ++i)
3809 {
3810 tree elt0 = (*ops)[i]->op;
3811
3812 gassign *stmt0;
3813 bool invert;
3814 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
3815 if (cmp0 == ERROR_MARK)
3816 continue;
3817
3818 for (j = i + 1; j < length; ++j)
3819 {
3820 tree &elt1 = (*ops)[j]->op;
3821
3822 gassign *stmt1;
3823 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
3824 if (cmp1 == ERROR_MARK)
3825 continue;
3826
3827 tree cond0 = gimple_assign_rhs1 (stmt0);
3828 tree x0 = TREE_OPERAND (cond0, 0);
3829 tree y0 = TREE_OPERAND (cond0, 1);
3830
3831 tree cond1 = gimple_assign_rhs1 (stmt1);
3832 tree x1 = TREE_OPERAND (cond1, 0);
3833 tree y1 = TREE_OPERAND (cond1, 1);
3834
3835 tree comb;
3836 if (opcode == BIT_AND_EXPR)
3837 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3838 else if (opcode == BIT_IOR_EXPR)
3839 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3840 else
3841 gcc_unreachable ();
3842 if (comb == NULL)
3843 continue;
3844
3845 /* Success! */
3846 if (dump_file && (dump_flags & TDF_DETAILS))
3847 {
3848 fprintf (dump_file, "Transforming ");
1ffa4346 3849 print_generic_expr (dump_file, cond0);
026f2137 3850 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
1ffa4346 3851 print_generic_expr (dump_file, cond1);
026f2137 3852 fprintf (dump_file, " into ");
1ffa4346 3853 print_generic_expr (dump_file, comb);
026f2137 3854 fputc ('\n', dump_file);
3855 }
3856
3857 gimple_assign_set_rhs1 (stmt0, comb);
3858 if (invert)
3859 std::swap (*gimple_assign_rhs2_ptr (stmt0),
3860 *gimple_assign_rhs3_ptr (stmt0));
3861 update_stmt (stmt0);
3862
3863 elt1 = error_mark_node;
3864 any_changes = true;
3865 }
3866 }
3867
3868 if (any_changes)
3869 {
3870 operand_entry *oe;
3871 j = 0;
3872 FOR_EACH_VEC_ELT (*ops, i, oe)
3873 {
3874 if (oe->op == error_mark_node)
3875 continue;
3876 else if (i != j)
3877 (*ops)[j] = oe;
3878 j++;
3879 }
3880 ops->truncate (j);
3881 }
3882
3883 return any_changes;
3884}
3885
8a2c7744 3886/* Return true if STMT is a cast like:
3887 <bb N>:
3888 ...
3889 _123 = (int) _234;
3890
3891 <bb M>:
3892 # _345 = PHI <_123(N), 1(...), 1(...)>
3893 where _234 has bool type, _123 has single use and
3894 bb N has a single successor M. This is commonly used in
be9f4473 3895 the last block of a range test.
3896
3897 Also Return true if STMT is tcc_compare like:
3898 <bb N>:
3899 ...
3900 _234 = a_2(D) == 2;
3901
3902 <bb M>:
3903 # _345 = PHI <_234(N), 1(...), 1(...)>
3904 _346 = (int) _345;
3905 where _234 has booltype, single use and
3906 bb N has a single successor M. This is commonly used in
8a2c7744 3907 the last block of a range test. */
3908
3909static bool
42acab1c 3910final_range_test_p (gimple *stmt)
8a2c7744 3911{
be9f4473 3912 basic_block bb, rhs_bb, lhs_bb;
8a2c7744 3913 edge e;
3914 tree lhs, rhs;
3915 use_operand_p use_p;
42acab1c 3916 gimple *use_stmt;
8a2c7744 3917
be9f4473 3918 if (!gimple_assign_cast_p (stmt)
3919 && (!is_gimple_assign (stmt)
3920 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3921 != tcc_comparison)))
8a2c7744 3922 return false;
3923 bb = gimple_bb (stmt);
3924 if (!single_succ_p (bb))
3925 return false;
3926 e = single_succ_edge (bb);
3927 if (e->flags & EDGE_COMPLEX)
3928 return false;
3929
3930 lhs = gimple_assign_lhs (stmt);
3931 rhs = gimple_assign_rhs1 (stmt);
be9f4473 3932 if (gimple_assign_cast_p (stmt)
3933 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3934 || TREE_CODE (rhs) != SSA_NAME
3935 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
8a2c7744 3936 return false;
3937
be9f4473 3938 if (!gimple_assign_cast_p (stmt)
3939 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
3940 return false;
3941
8a2c7744 3942 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3943 if (!single_imm_use (lhs, &use_p, &use_stmt))
3944 return false;
3945
3946 if (gimple_code (use_stmt) != GIMPLE_PHI
3947 || gimple_bb (use_stmt) != e->dest)
3948 return false;
3949
3950 /* And that the rhs is defined in the same loop. */
be9f4473 3951 if (gimple_assign_cast_p (stmt))
3952 {
3953 if (TREE_CODE (rhs) != SSA_NAME
3954 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
3955 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3956 return false;
3957 }
3958 else
3959 {
3960 if (TREE_CODE (lhs) != SSA_NAME
3961 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
3962 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
3963 return false;
3964 }
8a2c7744 3965
3966 return true;
3967}
3968
3969/* Return true if BB is suitable basic block for inter-bb range test
3970 optimization. If BACKWARD is true, BB should be the only predecessor
3971 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3972 or compared with to find a common basic block to which all conditions
3973 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3974 be the only predecessor of BB. */
3975
3976static bool
3977suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3978 bool backward)
3979{
3980 edge_iterator ei, ei2;
3981 edge e, e2;
42acab1c 3982 gimple *stmt;
1a91d914 3983 gphi_iterator gsi;
8a2c7744 3984 bool other_edge_seen = false;
3985 bool is_cond;
3986
3987 if (test_bb == bb)
3988 return false;
3989 /* Check last stmt first. */
3990 stmt = last_stmt (bb);
3991 if (stmt == NULL
3992 || (gimple_code (stmt) != GIMPLE_COND
3993 && (backward || !final_range_test_p (stmt)))
3994 || gimple_visited_p (stmt)
aac19106 3995 || stmt_could_throw_p (cfun, stmt)
8a2c7744 3996 || *other_bb == bb)
3997 return false;
3998 is_cond = gimple_code (stmt) == GIMPLE_COND;
3999 if (is_cond)
4000 {
4001 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4002 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4003 to *OTHER_BB (if not set yet, try to find it out). */
4004 if (EDGE_COUNT (bb->succs) != 2)
4005 return false;
4006 FOR_EACH_EDGE (e, ei, bb->succs)
4007 {
4008 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4009 return false;
4010 if (e->dest == test_bb)
4011 {
4012 if (backward)
4013 continue;
4014 else
4015 return false;
4016 }
4017 if (e->dest == bb)
4018 return false;
4019 if (*other_bb == NULL)
4020 {
4021 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4022 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4023 return false;
4024 else if (e->dest == e2->dest)
4025 *other_bb = e->dest;
4026 if (*other_bb == NULL)
4027 return false;
4028 }
4029 if (e->dest == *other_bb)
4030 other_edge_seen = true;
4031 else if (backward)
4032 return false;
4033 }
4034 if (*other_bb == NULL || !other_edge_seen)
4035 return false;
4036 }
4037 else if (single_succ (bb) != *other_bb)
4038 return false;
4039
4040 /* Now check all PHIs of *OTHER_BB. */
4041 e = find_edge (bb, *other_bb);
4042 e2 = find_edge (test_bb, *other_bb);
4043 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4044 {
1a91d914 4045 gphi *phi = gsi.phi ();
8a2c7744 4046 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4047 corresponding to BB and TEST_BB predecessor must be the same. */
4048 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4049 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4050 {
4051 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4052 one of the PHIs should have the lhs of the last stmt in
4053 that block as PHI arg and that PHI should have 0 or 1
4054 corresponding to it in all other range test basic blocks
4055 considered. */
4056 if (!is_cond)
4057 {
4058 if (gimple_phi_arg_def (phi, e->dest_idx)
4059 == gimple_assign_lhs (stmt)
4060 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4061 || integer_onep (gimple_phi_arg_def (phi,
4062 e2->dest_idx))))
4063 continue;
4064 }
4065 else
4066 {
42acab1c 4067 gimple *test_last = last_stmt (test_bb);
8a2c7744 4068 if (gimple_code (test_last) != GIMPLE_COND
4069 && gimple_phi_arg_def (phi, e2->dest_idx)
4070 == gimple_assign_lhs (test_last)
4071 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
4072 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
4073 continue;
4074 }
4075
4076 return false;
4077 }
4078 }
4079 return true;
4080}
4081
4082/* Return true if BB doesn't have side-effects that would disallow
4083 range test optimization, all SSA_NAMEs set in the bb are consumed
4084 in the bb and there are no PHIs. */
4085
4086static bool
4087no_side_effect_bb (basic_block bb)
4088{
4089 gimple_stmt_iterator gsi;
42acab1c 4090 gimple *last;
8a2c7744 4091
4092 if (!gimple_seq_empty_p (phi_nodes (bb)))
4093 return false;
4094 last = last_stmt (bb);
4095 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4096 {
42acab1c 4097 gimple *stmt = gsi_stmt (gsi);
8a2c7744 4098 tree lhs;
4099 imm_use_iterator imm_iter;
4100 use_operand_p use_p;
4101
4102 if (is_gimple_debug (stmt))
4103 continue;
4104 if (gimple_has_side_effects (stmt))
4105 return false;
4106 if (stmt == last)
4107 return true;
4108 if (!is_gimple_assign (stmt))
4109 return false;
4110 lhs = gimple_assign_lhs (stmt);
4111 if (TREE_CODE (lhs) != SSA_NAME)
4112 return false;
4113 if (gimple_assign_rhs_could_trap_p (stmt))
4114 return false;
4115 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4116 {
42acab1c 4117 gimple *use_stmt = USE_STMT (use_p);
8a2c7744 4118 if (is_gimple_debug (use_stmt))
4119 continue;
4120 if (gimple_bb (use_stmt) != bb)
4121 return false;
4122 }
4123 }
4124 return false;
4125}
4126
4127/* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4128 return true and fill in *OPS recursively. */
4129
4130static bool
04009ada 4131get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
2e966e2a 4132 class loop *loop)
8a2c7744 4133{
42acab1c 4134 gimple *stmt = SSA_NAME_DEF_STMT (var);
8a2c7744 4135 tree rhs[2];
4136 int i;
4137
4138 if (!is_reassociable_op (stmt, code, loop))
4139 return false;
4140
4141 rhs[0] = gimple_assign_rhs1 (stmt);
4142 rhs[1] = gimple_assign_rhs2 (stmt);
4143 gimple_set_visited (stmt, true);
4144 for (i = 0; i < 2; i++)
4145 if (TREE_CODE (rhs[i]) == SSA_NAME
4146 && !get_ops (rhs[i], code, ops, loop)
4147 && has_single_use (rhs[i]))
4148 {
04009ada 4149 operand_entry *oe = operand_entry_pool.allocate ();
8a2c7744 4150
4151 oe->op = rhs[i];
4152 oe->rank = code;
4153 oe->id = 0;
4154 oe->count = 1;
527d8479 4155 oe->stmt_to_insert = NULL;
f1f41a6c 4156 ops->safe_push (oe);
8a2c7744 4157 }
4158 return true;
4159}
4160
82cb4e1c 4161/* Find the ops that were added by get_ops starting from VAR, see if
4162 they were changed during update_range_test and if yes, create new
4163 stmts. */
4164
4165static tree
04009ada 4166update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
2e966e2a 4167 unsigned int *pidx, class loop *loop)
82cb4e1c 4168{
42acab1c 4169 gimple *stmt = SSA_NAME_DEF_STMT (var);
82cb4e1c 4170 tree rhs[4];
4171 int i;
4172
4173 if (!is_reassociable_op (stmt, code, loop))
4174 return NULL;
4175
4176 rhs[0] = gimple_assign_rhs1 (stmt);
4177 rhs[1] = gimple_assign_rhs2 (stmt);
4178 rhs[2] = rhs[0];
4179 rhs[3] = rhs[1];
4180 for (i = 0; i < 2; i++)
4181 if (TREE_CODE (rhs[i]) == SSA_NAME)
4182 {
4183 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4184 if (rhs[2 + i] == NULL_TREE)
4185 {
4186 if (has_single_use (rhs[i]))
4187 rhs[2 + i] = ops[(*pidx)++]->op;
4188 else
4189 rhs[2 + i] = rhs[i];
4190 }
4191 }
4192 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4193 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4194 {
4195 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
f9e245b2 4196 var = make_ssa_name (TREE_TYPE (var));
e9cf809e 4197 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4198 rhs[2], rhs[3]);
82cb4e1c 4199 gimple_set_uid (g, gimple_uid (stmt));
4200 gimple_set_visited (g, true);
4201 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4202 }
4203 return var;
4204}
4205
4206/* Structure to track the initial value passed to get_ops and
4207 the range in the ops vector for each basic block. */
4208
4209struct inter_bb_range_test_entry
4210{
4211 tree op;
4212 unsigned int first_idx, last_idx;
4213};
4214
55a2d273 4215/* Inter-bb range test optimization.
8a2c7744 4216
55a2d273 4217 Returns TRUE if a gimple conditional is optimized to a true/false,
4218 otherwise return FALSE.
4219
4220 This indicates to the caller that it should run a CFG cleanup pass
4221 once reassociation is completed. */
4222
4223static bool
42acab1c 4224maybe_optimize_range_tests (gimple *stmt)
8a2c7744 4225{
4226 basic_block first_bb = gimple_bb (stmt);
4227 basic_block last_bb = first_bb;
4228 basic_block other_bb = NULL;
4229 basic_block bb;
4230 edge_iterator ei;
4231 edge e;
04009ada 4232 auto_vec<operand_entry *> ops;
c2078b80 4233 auto_vec<inter_bb_range_test_entry> bbinfo;
3c8e4ef8 4234 bool any_changes = false;
55a2d273 4235 bool cfg_cleanup_needed = false;
8a2c7744 4236
4237 /* Consider only basic blocks that end with GIMPLE_COND or
4238 a cast statement satisfying final_range_test_p. All
4239 but the last bb in the first_bb .. last_bb range
4240 should end with GIMPLE_COND. */
4241 if (gimple_code (stmt) == GIMPLE_COND)
4242 {
4243 if (EDGE_COUNT (first_bb->succs) != 2)
55a2d273 4244 return cfg_cleanup_needed;
8a2c7744 4245 }
4246 else if (final_range_test_p (stmt))
4247 other_bb = single_succ (first_bb);
4248 else
55a2d273 4249 return cfg_cleanup_needed;
8a2c7744 4250
aac19106 4251 if (stmt_could_throw_p (cfun, stmt))
55a2d273 4252 return cfg_cleanup_needed;
8a2c7744 4253
4254 /* As relative ordering of post-dominator sons isn't fixed,
4255 maybe_optimize_range_tests can be called first on any
4256 bb in the range we want to optimize. So, start searching
4257 backwards, if first_bb can be set to a predecessor. */
4258 while (single_pred_p (first_bb))
4259 {
4260 basic_block pred_bb = single_pred (first_bb);
4261 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
4262 break;
4263 if (!no_side_effect_bb (first_bb))
4264 break;
4265 first_bb = pred_bb;
4266 }
4267 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4268 Before starting forward search in last_bb successors, find
4269 out the other_bb. */
4270 if (first_bb == last_bb)
4271 {
4272 other_bb = NULL;
4273 /* As non-GIMPLE_COND last stmt always terminates the range,
4274 if forward search didn't discover anything, just give up. */
4275 if (gimple_code (stmt) != GIMPLE_COND)
55a2d273 4276 return cfg_cleanup_needed;
8a2c7744 4277 /* Look at both successors. Either it ends with a GIMPLE_COND
4278 and satisfies suitable_cond_bb, or ends with a cast and
4279 other_bb is that cast's successor. */
4280 FOR_EACH_EDGE (e, ei, first_bb->succs)
4281 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4282 || e->dest == first_bb)
55a2d273 4283 return cfg_cleanup_needed;
8a2c7744 4284 else if (single_pred_p (e->dest))
4285 {
4286 stmt = last_stmt (e->dest);
4287 if (stmt
4288 && gimple_code (stmt) == GIMPLE_COND
4289 && EDGE_COUNT (e->dest->succs) == 2)
4290 {
4291 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
4292 break;
4293 else
4294 other_bb = NULL;
4295 }
4296 else if (stmt
4297 && final_range_test_p (stmt)
4298 && find_edge (first_bb, single_succ (e->dest)))
4299 {
4300 other_bb = single_succ (e->dest);
4301 if (other_bb == first_bb)
4302 other_bb = NULL;
4303 }
4304 }
4305 if (other_bb == NULL)
55a2d273 4306 return cfg_cleanup_needed;
8a2c7744 4307 }
4308 /* Now do the forward search, moving last_bb to successor bbs
4309 that aren't other_bb. */
4310 while (EDGE_COUNT (last_bb->succs) == 2)
4311 {
4312 FOR_EACH_EDGE (e, ei, last_bb->succs)
4313 if (e->dest != other_bb)
4314 break;
4315 if (e == NULL)
4316 break;
4317 if (!single_pred_p (e->dest))
4318 break;
4319 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
4320 break;
4321 if (!no_side_effect_bb (e->dest))
4322 break;
4323 last_bb = e->dest;
4324 }
4325 if (first_bb == last_bb)
55a2d273 4326 return cfg_cleanup_needed;
8a2c7744 4327 /* Here basic blocks first_bb through last_bb's predecessor
4328 end with GIMPLE_COND, all of them have one of the edges to
4329 other_bb and another to another block in the range,
4330 all blocks except first_bb don't have side-effects and
4331 last_bb ends with either GIMPLE_COND, or cast satisfying
4332 final_range_test_p. */
4333 for (bb = last_bb; ; bb = single_pred (bb))
4334 {
4335 enum tree_code code;
4336 tree lhs, rhs;
82cb4e1c 4337 inter_bb_range_test_entry bb_ent;
8a2c7744 4338
82cb4e1c 4339 bb_ent.op = NULL_TREE;
4340 bb_ent.first_idx = ops.length ();
4341 bb_ent.last_idx = bb_ent.first_idx;
8a2c7744 4342 e = find_edge (bb, other_bb);
4343 stmt = last_stmt (bb);
4344 gimple_set_visited (stmt, true);
4345 if (gimple_code (stmt) != GIMPLE_COND)
4346 {
4347 use_operand_p use_p;
42acab1c 4348 gimple *phi;
8a2c7744 4349 edge e2;
4350 unsigned int d;
4351
4352 lhs = gimple_assign_lhs (stmt);
4353 rhs = gimple_assign_rhs1 (stmt);
4354 gcc_assert (bb == last_bb);
4355
4356 /* stmt is
4357 _123 = (int) _234;
be9f4473 4358 OR
4359 _234 = a_2(D) == 2;
8a2c7744 4360
4361 followed by:
4362 <bb M>:
4363 # _345 = PHI <_123(N), 1(...), 1(...)>
4364
4365 or 0 instead of 1. If it is 0, the _234
4366 range test is anded together with all the
4367 other range tests, if it is 1, it is ored with
4368 them. */
4369 single_imm_use (lhs, &use_p, &phi);
4370 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4371 e2 = find_edge (first_bb, other_bb);
4372 d = e2->dest_idx;
4373 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4374 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4375 code = BIT_AND_EXPR;
4376 else
4377 {
4378 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4379 code = BIT_IOR_EXPR;
4380 }
4381
4382 /* If _234 SSA_NAME_DEF_STMT is
4383 _234 = _567 | _789;
4384 (or &, corresponding to 1/0 in the phi arguments,
4385 push into ops the individual range test arguments
4386 of the bitwise or resp. and, recursively. */
5b2b88ce 4387 if (TREE_CODE (rhs) == SSA_NAME
be9f4473 4388 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4389 != tcc_comparison)
ec6d2d63 4390 && !get_ops (rhs, code, &ops,
4391 loop_containing_stmt (stmt))
8a2c7744 4392 && has_single_use (rhs))
4393 {
4394 /* Otherwise, push the _234 range test itself. */
04009ada 4395 operand_entry *oe = operand_entry_pool.allocate ();
8a2c7744 4396
4397 oe->op = rhs;
4398 oe->rank = code;
4399 oe->id = 0;
4400 oe->count = 1;
527d8479 4401 oe->stmt_to_insert = NULL;
f1f41a6c 4402 ops.safe_push (oe);
82cb4e1c 4403 bb_ent.last_idx++;
be9f4473 4404 bb_ent.op = rhs;
4405 }
4406 else if (is_gimple_assign (stmt)
4407 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4408 == tcc_comparison)
5b2b88ce 4409 && !get_ops (lhs, code, &ops,
4410 loop_containing_stmt (stmt))
be9f4473 4411 && has_single_use (lhs))
4412 {
4413 operand_entry *oe = operand_entry_pool.allocate ();
4414 oe->op = lhs;
4415 oe->rank = code;
4416 oe->id = 0;
4417 oe->count = 1;
4418 ops.safe_push (oe);
4419 bb_ent.last_idx++;
4420 bb_ent.op = lhs;
8a2c7744 4421 }
82cb4e1c 4422 else
be9f4473 4423 {
4424 bb_ent.last_idx = ops.length ();
4425 bb_ent.op = rhs;
4426 }
82cb4e1c 4427 bbinfo.safe_push (bb_ent);
8a2c7744 4428 continue;
4429 }
4430 /* Otherwise stmt is GIMPLE_COND. */
4431 code = gimple_cond_code (stmt);
4432 lhs = gimple_cond_lhs (stmt);
4433 rhs = gimple_cond_rhs (stmt);
4434 if (TREE_CODE (lhs) == SSA_NAME
4435 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4436 && ((code != EQ_EXPR && code != NE_EXPR)
4437 || rhs != boolean_false_node
4438 /* Either push into ops the individual bitwise
4439 or resp. and operands, depending on which
4440 edge is other_bb. */
4441 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4442 ^ (code == EQ_EXPR))
4443 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4444 loop_containing_stmt (stmt))))
4445 {
4446 /* Or push the GIMPLE_COND stmt itself. */
04009ada 4447 operand_entry *oe = operand_entry_pool.allocate ();
8a2c7744 4448
4449 oe->op = NULL;
4450 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4451 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4452 /* oe->op = NULL signs that there is no SSA_NAME
4453 for the range test, and oe->id instead is the
4454 basic block number, at which's end the GIMPLE_COND
4455 is. */
4456 oe->id = bb->index;
4457 oe->count = 1;
527d8479 4458 oe->stmt_to_insert = NULL;
f1f41a6c 4459 ops.safe_push (oe);
82cb4e1c 4460 bb_ent.op = NULL;
4461 bb_ent.last_idx++;
8a2c7744 4462 }
82cb4e1c 4463 else if (ops.length () > bb_ent.first_idx)
4464 {
4465 bb_ent.op = lhs;
4466 bb_ent.last_idx = ops.length ();
4467 }
4468 bbinfo.safe_push (bb_ent);
8a2c7744 4469 if (bb == first_bb)
4470 break;
4471 }
f1f41a6c 4472 if (ops.length () > 1)
54ec8b11 4473 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
3c8e4ef8 4474 if (any_changes)
82cb4e1c 4475 {
b7975a3d 4476 unsigned int idx, max_idx = 0;
3c8e4ef8 4477 /* update_ops relies on has_single_use predicates returning the
4478 same values as it did during get_ops earlier. Additionally it
4479 never removes statements, only adds new ones and it should walk
4480 from the single imm use and check the predicate already before
4481 making those changes.
4482 On the other side, the handling of GIMPLE_COND directly can turn
4483 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4484 it needs to be done in a separate loop afterwards. */
4485 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
82cb4e1c 4486 {
3c8e4ef8 4487 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4488 && bbinfo[idx].op != NULL_TREE)
82cb4e1c 4489 {
82cb4e1c 4490 tree new_op;
4491
b7975a3d 4492 max_idx = idx;
3c8e4ef8 4493 stmt = last_stmt (bb);
4494 new_op = update_ops (bbinfo[idx].op,
4495 (enum tree_code)
4496 ops[bbinfo[idx].first_idx]->rank,
4497 ops, &bbinfo[idx].first_idx,
4498 loop_containing_stmt (stmt));
82cb4e1c 4499 if (new_op == NULL_TREE)
4500 {
4501 gcc_assert (bb == last_bb);
4502 new_op = ops[bbinfo[idx].first_idx++]->op;
4503 }
4504 if (bbinfo[idx].op != new_op)
4505 {
4506 imm_use_iterator iter;
4507 use_operand_p use_p;
be9f4473 4508 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
82cb4e1c 4509
4510 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
f36f5c1b 4511 if (is_gimple_debug (use_stmt))
82cb4e1c 4512 continue;
f36f5c1b 4513 else if (gimple_code (use_stmt) == GIMPLE_COND
4514 || gimple_code (use_stmt) == GIMPLE_PHI)
4515 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4516 SET_USE (use_p, new_op);
be9f4473 4517 else if ((is_gimple_assign (use_stmt)
4518 && (TREE_CODE_CLASS
4519 (gimple_assign_rhs_code (use_stmt))
4520 == tcc_comparison)))
4521 cast_or_tcc_cmp_stmt = use_stmt;
82cb4e1c 4522 else if (gimple_assign_cast_p (use_stmt))
be9f4473 4523 cast_or_tcc_cmp_stmt = use_stmt;
f36f5c1b 4524 else
4525 gcc_unreachable ();
be9f4473 4526
4527 if (cast_or_tcc_cmp_stmt)
82cb4e1c 4528 {
4529 gcc_assert (bb == last_bb);
be9f4473 4530 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
f9e245b2 4531 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
82cb4e1c 4532 enum tree_code rhs_code
be9f4473 4533 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4534 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4535 : CONVERT_EXPR;
1a91d914 4536 gassign *g;
4c0d6cf7 4537 if (is_gimple_min_invariant (new_op))
4538 {
4539 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4540 g = gimple_build_assign (new_lhs, new_op);
4541 }
4542 else
e9cf809e 4543 g = gimple_build_assign (new_lhs, rhs_code, new_op);
be9f4473 4544 gimple_stmt_iterator gsi
4545 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4546 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
82cb4e1c 4547 gimple_set_visited (g, true);
f36f5c1b 4548 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
82cb4e1c 4549 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4550 if (is_gimple_debug (use_stmt))
4551 continue;
4552 else if (gimple_code (use_stmt) == GIMPLE_COND
4553 || gimple_code (use_stmt) == GIMPLE_PHI)
4554 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4555 SET_USE (use_p, new_lhs);
4556 else
4557 gcc_unreachable ();
4558 }
4559 }
4560 }
4561 if (bb == first_bb)
4562 break;
4563 }
3c8e4ef8 4564 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4565 {
4566 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4567 && bbinfo[idx].op == NULL_TREE
4568 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4569 {
1a91d914 4570 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
b7975a3d 4571
4572 if (idx > max_idx)
4573 max_idx = idx;
4574
55a2d273 4575 /* If we collapse the conditional to a true/false
4576 condition, then bubble that knowledge up to our caller. */
3c8e4ef8 4577 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
55a2d273 4578 {
4579 gimple_cond_make_false (cond_stmt);
4580 cfg_cleanup_needed = true;
4581 }
3c8e4ef8 4582 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
55a2d273 4583 {
4584 gimple_cond_make_true (cond_stmt);
4585 cfg_cleanup_needed = true;
4586 }
3c8e4ef8 4587 else
4588 {
1a91d914 4589 gimple_cond_set_code (cond_stmt, NE_EXPR);
4590 gimple_cond_set_lhs (cond_stmt,
4591 ops[bbinfo[idx].first_idx]->op);
4592 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3c8e4ef8 4593 }
1a91d914 4594 update_stmt (cond_stmt);
3c8e4ef8 4595 }
4596 if (bb == first_bb)
4597 break;
4598 }
b7975a3d 4599
4600 /* The above changes could result in basic blocks after the first
4601 modified one, up to and including last_bb, to be executed even if
4602 they would not be in the original program. If the value ranges of
4603 assignment lhs' in those bbs were dependent on the conditions
4604 guarding those basic blocks which now can change, the VRs might
4605 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4606 are only used within the same bb, it should be not a big deal if
4607 we just reset all the VRs in those bbs. See PR68671. */
4608 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4609 reset_flow_sensitive_info_in_bb (bb);
82cb4e1c 4610 }
55a2d273 4611 return cfg_cleanup_needed;
8a2c7744 4612}
4613
54aceb26 4614/* Return true if OPERAND is defined by a PHI node which uses the LHS
4615 of STMT in it's operands. This is also known as a "destructive
4616 update" operation. */
4617
4618static bool
42acab1c 4619is_phi_for_stmt (gimple *stmt, tree operand)
54aceb26 4620{
42acab1c 4621 gimple *def_stmt;
1a91d914 4622 gphi *def_phi;
75a70cf9 4623 tree lhs;
54aceb26 4624 use_operand_p arg_p;
4625 ssa_op_iter i;
4626
4627 if (TREE_CODE (operand) != SSA_NAME)
4628 return false;
4629
75a70cf9 4630 lhs = gimple_assign_lhs (stmt);
4631
54aceb26 4632 def_stmt = SSA_NAME_DEF_STMT (operand);
1a91d914 4633 def_phi = dyn_cast <gphi *> (def_stmt);
4634 if (!def_phi)
54aceb26 4635 return false;
4636
1a91d914 4637 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
54aceb26 4638 if (lhs == USE_FROM_PTR (arg_p))
4639 return true;
4640 return false;
4641}
4642
9ce93694 4643/* Remove def stmt of VAR if VAR has zero uses and recurse
4644 on rhs1 operand if so. */
4645
4646static void
4647remove_visited_stmt_chain (tree var)
4648{
42acab1c 4649 gimple *stmt;
9ce93694 4650 gimple_stmt_iterator gsi;
4651
4652 while (1)
4653 {
4654 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4655 return;
4656 stmt = SSA_NAME_DEF_STMT (var);
8c5ac7f6 4657 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4658 {
4659 var = gimple_assign_rhs1 (stmt);
8c5ac7f6 4660 gsi = gsi_for_stmt (stmt);
54675e05 4661 reassoc_remove_stmt (&gsi);
8c5ac7f6 4662 release_defs (stmt);
8c5ac7f6 4663 }
4664 else
9ce93694 4665 return;
9ce93694 4666 }
4667}
4668
5b1c765d 4669/* This function checks three consequtive operands in
4670 passed operands vector OPS starting from OPINDEX and
4671 swaps two operands if it is profitable for binary operation
4672 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4673
4674 We pair ops with the same rank if possible.
4675
4676 The alternative we try is to see if STMT is a destructive
4677 update style statement, which is like:
4678 b = phi (a, ...)
4679 a = c + b;
4680 In that case, we want to use the destructive update form to
4681 expose the possible vectorizer sum reduction opportunity.
4682 In that case, the third operand will be the phi node. This
4683 check is not performed if STMT is null.
4684
4685 We could, of course, try to be better as noted above, and do a
4686 lot of work to try to find these opportunities in >3 operand
4687 cases, but it is unlikely to be worth it. */
4688
4689static void
04009ada 4690swap_ops_for_binary_stmt (vec<operand_entry *> ops,
42acab1c 4691 unsigned int opindex, gimple *stmt)
5b1c765d 4692{
04009ada 4693 operand_entry *oe1, *oe2, *oe3;
5b1c765d 4694
f1f41a6c 4695 oe1 = ops[opindex];
4696 oe2 = ops[opindex + 1];
4697 oe3 = ops[opindex + 2];
5b1c765d 4698
4699 if ((oe1->rank == oe2->rank
4700 && oe2->rank != oe3->rank)
4701 || (stmt && is_phi_for_stmt (stmt, oe3->op)
4702 && !is_phi_for_stmt (stmt, oe1->op)
4703 && !is_phi_for_stmt (stmt, oe2->op)))
ce352457 4704 std::swap (*oe1, *oe3);
5b1c765d 4705 else if ((oe1->rank == oe3->rank
4706 && oe2->rank != oe3->rank)
4707 || (stmt && is_phi_for_stmt (stmt, oe2->op)
4708 && !is_phi_for_stmt (stmt, oe1->op)
4709 && !is_phi_for_stmt (stmt, oe3->op)))
5de40da3 4710 std::swap (*oe1, *oe2);
bb88a6c7 4711}
4712
82cb4e1c 4713/* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4714 two definitions, otherwise return STMT. */
a2bd0c99 4715
42acab1c 4716static inline gimple *
4717find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
a2bd0c99 4718{
82cb4e1c 4719 if (TREE_CODE (rhs1) == SSA_NAME
4720 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4721 stmt = SSA_NAME_DEF_STMT (rhs1);
4722 if (TREE_CODE (rhs2) == SSA_NAME
4723 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4724 stmt = SSA_NAME_DEF_STMT (rhs2);
4725 return stmt;
a2bd0c99 4726}
4727
fc3b1c44 4728/* If the stmt that defines operand has to be inserted, insert it
4729 before the use. */
4730static void
4731insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4732{
4733 gcc_assert (is_gimple_assign (stmt_to_insert));
4734 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4735 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4736 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4737 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4738 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4739
4740 /* If the insert point is not stmt, then insert_point would be
4741 the point where operand rhs1 or rhs2 is defined. In this case,
4742 stmt_to_insert has to be inserted afterwards. This would
4743 only happen when the stmt insertion point is flexible. */
4744 if (stmt == insert_point)
4745 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4746 else
4747 insert_stmt_after (stmt_to_insert, insert_point);
4748}
4749
4750
54aceb26 4751/* Recursively rewrite our linearized statements so that the operators
4752 match those in OPS[OPINDEX], putting the computation in rank
e70cda06 4753 order. Return new lhs.
4754 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4755 the current stmt and during recursive invocations.
4756 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4757 recursive invocations. */
54aceb26 4758
82cb4e1c 4759static tree
42acab1c 4760rewrite_expr_tree (gimple *stmt, unsigned int opindex,
e70cda06 4761 vec<operand_entry *> ops, bool changed, bool next_changed)
54aceb26 4762{
75a70cf9 4763 tree rhs1 = gimple_assign_rhs1 (stmt);
4764 tree rhs2 = gimple_assign_rhs2 (stmt);
82cb4e1c 4765 tree lhs = gimple_assign_lhs (stmt);
04009ada 4766 operand_entry *oe;
54aceb26 4767
54aceb26 4768 /* The final recursion case for this function is that you have
4769 exactly two operations left.
229fa37d 4770 If we had exactly one op in the entire list to start with, we
54aceb26 4771 would have never called this function, and the tail recursion
4772 rewrites them one at a time. */
f1f41a6c 4773 if (opindex + 2 == ops.length ())
54aceb26 4774 {
04009ada 4775 operand_entry *oe1, *oe2;
54aceb26 4776
f1f41a6c 4777 oe1 = ops[opindex];
4778 oe2 = ops[opindex + 1];
54aceb26 4779
75a70cf9 4780 if (rhs1 != oe1->op || rhs2 != oe2->op)
54aceb26 4781 {
82cb4e1c 4782 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4783 unsigned int uid = gimple_uid (stmt);
4784
54aceb26 4785 if (dump_file && (dump_flags & TDF_DETAILS))
4786 {
4787 fprintf (dump_file, "Transforming ");
1ffa4346 4788 print_gimple_stmt (dump_file, stmt, 0);
54aceb26 4789 }
4790
fc3b1c44 4791 /* If the stmt that defines operand has to be inserted, insert it
4792 before the use. */
4793 if (oe1->stmt_to_insert)
4794 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4795 if (oe2->stmt_to_insert)
4796 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
229fa37d 4797 /* Even when changed is false, reassociation could have e.g. removed
4798 some redundant operations, so unless we are just swapping the
4799 arguments or unless there is no change at all (then we just
4800 return lhs), force creation of a new SSA_NAME. */
4801 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
82cb4e1c 4802 {
42acab1c 4803 gimple *insert_point
4804 = find_insert_point (stmt, oe1->op, oe2->op);
f9e245b2 4805 lhs = make_ssa_name (TREE_TYPE (lhs));
82cb4e1c 4806 stmt
e9cf809e 4807 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4808 oe1->op, oe2->op);
82cb4e1c 4809 gimple_set_uid (stmt, uid);
4810 gimple_set_visited (stmt, true);
4811 if (insert_point == gsi_stmt (gsi))
4812 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4813 else
4814 insert_stmt_after (stmt, insert_point);
4815 }
4816 else
4817 {
4818 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4819 == stmt);
4820 gimple_assign_set_rhs1 (stmt, oe1->op);
4821 gimple_assign_set_rhs2 (stmt, oe2->op);
4822 update_stmt (stmt);
4823 }
4824
9ce93694 4825 if (rhs1 != oe1->op && rhs1 != oe2->op)
4826 remove_visited_stmt_chain (rhs1);
54aceb26 4827
4828 if (dump_file && (dump_flags & TDF_DETAILS))
4829 {
4830 fprintf (dump_file, " into ");
1ffa4346 4831 print_gimple_stmt (dump_file, stmt, 0);
54aceb26 4832 }
54aceb26 4833 }
82cb4e1c 4834 return lhs;
54aceb26 4835 }
4836
4837 /* If we hit here, we should have 3 or more ops left. */
f1f41a6c 4838 gcc_assert (opindex + 2 < ops.length ());
54aceb26 4839
4840 /* Rewrite the next operator. */
f1f41a6c 4841 oe = ops[opindex];
54aceb26 4842
527d8479 4843 /* If the stmt that defines operand has to be inserted, insert it
4844 before the use. */
4845 if (oe->stmt_to_insert)
4846 insert_stmt_before_use (stmt, oe->stmt_to_insert);
4847
82cb4e1c 4848 /* Recurse on the LHS of the binary operator, which is guaranteed to
4849 be the non-leaf side. */
4850 tree new_rhs1
4851 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
e70cda06 4852 changed || oe->op != rhs2 || next_changed,
4853 false);
54aceb26 4854
82cb4e1c 4855 if (oe->op != rhs2 || new_rhs1 != rhs1)
4856 {
54aceb26 4857 if (dump_file && (dump_flags & TDF_DETAILS))
4858 {
4859 fprintf (dump_file, "Transforming ");
1ffa4346 4860 print_gimple_stmt (dump_file, stmt, 0);
54aceb26 4861 }
4862
82cb4e1c 4863 /* If changed is false, this is either opindex == 0
4864 or all outer rhs2's were equal to corresponding oe->op,
4865 and powi_result is NULL.
4866 That means lhs is equivalent before and after reassociation.
4867 Otherwise ensure the old lhs SSA_NAME is not reused and
4868 create a new stmt as well, so that any debug stmts will be
4869 properly adjusted. */
4870 if (changed)
4871 {
4872 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4873 unsigned int uid = gimple_uid (stmt);
42acab1c 4874 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
82cb4e1c 4875
f9e245b2 4876 lhs = make_ssa_name (TREE_TYPE (lhs));
e9cf809e 4877 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4878 new_rhs1, oe->op);
82cb4e1c 4879 gimple_set_uid (stmt, uid);
4880 gimple_set_visited (stmt, true);
4881 if (insert_point == gsi_stmt (gsi))
4882 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4883 else
4884 insert_stmt_after (stmt, insert_point);
4885 }
4886 else
4887 {
4888 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4889 == stmt);
4890 gimple_assign_set_rhs1 (stmt, new_rhs1);
4891 gimple_assign_set_rhs2 (stmt, oe->op);
4892 update_stmt (stmt);
4893 }
54aceb26 4894
4895 if (dump_file && (dump_flags & TDF_DETAILS))
4896 {
4897 fprintf (dump_file, " into ");
1ffa4346 4898 print_gimple_stmt (dump_file, stmt, 0);
54aceb26 4899 }
4900 }
82cb4e1c 4901 return lhs;
54aceb26 4902}
4903
5b1c765d 4904/* Find out how many cycles we need to compute statements chain.
4905 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4906 maximum number of independent statements we may execute per cycle. */
4907
4908static int
4909get_required_cycles (int ops_num, int cpu_width)
4910{
4911 int res;
4912 int elog;
4913 unsigned int rest;
4914
4915 /* While we have more than 2 * cpu_width operands
4916 we may reduce number of operands by cpu_width
4917 per cycle. */
4918 res = ops_num / (2 * cpu_width);
4919
4920 /* Remained operands count may be reduced twice per cycle
4921 until we have only one operand. */
4922 rest = (unsigned)(ops_num - res * cpu_width);
4923 elog = exact_log2 (rest);
4924 if (elog >= 0)
4925 res += elog;
4926 else
4927 res += floor_log2 (rest) + 1;
4928
4929 return res;
4930}
4931
4932/* Returns an optimal number of registers to use for computation of
4933 given statements. */
4934
4935static int
4936get_reassociation_width (int ops_num, enum tree_code opc,
3754d046 4937 machine_mode mode)
5b1c765d 4938{
4939 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4940 int width;
4941 int width_min;
4942 int cycles_best;
4943
4944 if (param_width > 0)
4945 width = param_width;
4946 else
4947 width = targetm.sched.reassociation_width (opc, mode);
4948
4949 if (width == 1)
4950 return width;
4951
4952 /* Get the minimal time required for sequence computation. */
4953 cycles_best = get_required_cycles (ops_num, width);
4954
4955 /* Check if we may use less width and still compute sequence for
4956 the same time. It will allow us to reduce registers usage.
4957 get_required_cycles is monotonically increasing with lower width
4958 so we can perform a binary search for the minimal width that still
4959 results in the optimal cycle count. */
4960 width_min = 1;
4961 while (width > width_min)
4962 {
4963 int width_mid = (width + width_min) / 2;
4964
4965 if (get_required_cycles (ops_num, width_mid) == cycles_best)
4966 width = width_mid;
4967 else if (width_min < width_mid)
4968 width_min = width_mid;
4969 else
4970 break;
4971 }
4972
4973 return width;
4974}
4975
4976/* Recursively rewrite our linearized statements so that the operators
4977 match those in OPS[OPINDEX], putting the computation in rank
4978 order and trying to allow operations to be executed in
4979 parallel. */
4980
4981static void
1a91d914 4982rewrite_expr_tree_parallel (gassign *stmt, int width,
04009ada 4983 vec<operand_entry *> ops)
5b1c765d 4984{
4985 enum tree_code opcode = gimple_assign_rhs_code (stmt);
f1f41a6c 4986 int op_num = ops.length ();
5db2fd79 4987 gcc_assert (op_num > 0);
5b1c765d 4988 int stmt_num = op_num - 1;
42acab1c 4989 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5b1c765d 4990 int op_index = op_num - 1;
4991 int stmt_index = 0;
4992 int ready_stmts_end = 0;
4993 int i = 0;
527d8479 4994 gimple *stmt1 = NULL, *stmt2 = NULL;
5b1c765d 4995 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5b1c765d 4996
4997 /* We start expression rewriting from the top statements.
4998 So, in this loop we create a full list of statements
4999 we will work with. */
5000 stmts[stmt_num - 1] = stmt;
5001 for (i = stmt_num - 2; i >= 0; i--)
5002 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5003
5b1c765d 5004 for (i = 0; i < stmt_num; i++)
5005 {
5006 tree op1, op2;
5007
5008 /* Determine whether we should use results of
5009 already handled statements or not. */
5010 if (ready_stmts_end == 0
5011 && (i - stmt_index >= width || op_index < 1))
5012 ready_stmts_end = i;
5013
5014 /* Now we choose operands for the next statement. Non zero
5015 value in ready_stmts_end means here that we should use
5016 the result of already generated statements as new operand. */
5017 if (ready_stmts_end > 0)
5018 {
5019 op1 = gimple_assign_lhs (stmts[stmt_index++]);
5020 if (ready_stmts_end > stmt_index)
5021 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5022 else if (op_index >= 0)
527d8479 5023 {
5024 operand_entry *oe = ops[op_index--];
5025 stmt2 = oe->stmt_to_insert;
5026 op2 = oe->op;
5027 }
5b1c765d 5028 else
5029 {
5030 gcc_assert (stmt_index < i);
5031 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5032 }
5033
5034 if (stmt_index >= ready_stmts_end)
5035 ready_stmts_end = 0;
5036 }
5037 else
5038 {
5039 if (op_index > 1)
5040 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
527d8479 5041 operand_entry *oe2 = ops[op_index--];
5042 operand_entry *oe1 = ops[op_index--];
5043 op2 = oe2->op;
5044 stmt2 = oe2->stmt_to_insert;
5045 op1 = oe1->op;
5046 stmt1 = oe1->stmt_to_insert;
5b1c765d 5047 }
5048
5049 /* If we emit the last statement then we should put
5050 operands into the last statement. It will also
5051 break the loop. */
5052 if (op_index < 0 && stmt_index == i)
5053 i = stmt_num - 1;
5054
5055 if (dump_file && (dump_flags & TDF_DETAILS))
5056 {
5057 fprintf (dump_file, "Transforming ");
1ffa4346 5058 print_gimple_stmt (dump_file, stmts[i], 0);
5b1c765d 5059 }
5060
fc3b1c44 5061 /* If the stmt that defines operand has to be inserted, insert it
5062 before the use. */
5063 if (stmt1)
5064 insert_stmt_before_use (stmts[i], stmt1);
5065 if (stmt2)
5066 insert_stmt_before_use (stmts[i], stmt2);
5067 stmt1 = stmt2 = NULL;
5068
5b1c765d 5069 /* We keep original statement only for the last one. All
5070 others are recreated. */
5071 if (i == stmt_num - 1)
5072 {
5073 gimple_assign_set_rhs1 (stmts[i], op1);
5074 gimple_assign_set_rhs2 (stmts[i], op2);
5075 update_stmt (stmts[i]);
5076 }
5077 else
3bdaecd5 5078 {
5079 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
126bc06b 5080 gimple_set_visited (stmts[i], true);
3bdaecd5 5081 }
5b1c765d 5082 if (dump_file && (dump_flags & TDF_DETAILS))
5083 {
5084 fprintf (dump_file, " into ");
1ffa4346 5085 print_gimple_stmt (dump_file, stmts[i], 0);
5b1c765d 5086 }
5087 }
5088
5089 remove_visited_stmt_chain (last_rhs1);
5090}
5091
54aceb26 5092/* Transform STMT, which is really (A +B) + (C + D) into the left
5093 linear form, ((A+B)+C)+D.
5094 Recurse on D if necessary. */
5095
5096static void
42acab1c 5097linearize_expr (gimple *stmt)
54aceb26 5098{
82cb4e1c 5099 gimple_stmt_iterator gsi;
42acab1c 5100 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5101 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5102 gimple *oldbinrhs = binrhs;
75a70cf9 5103 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
42acab1c 5104 gimple *newbinrhs = NULL;
2e966e2a 5105 class loop *loop = loop_containing_stmt (stmt);
82cb4e1c 5106 tree lhs = gimple_assign_lhs (stmt);
54aceb26 5107
75a70cf9 5108 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5109 && is_reassociable_op (binrhs, rhscode, loop));
5110
82cb4e1c 5111 gsi = gsi_for_stmt (stmt);
54aceb26 5112
75a70cf9 5113 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
e9cf809e 5114 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5115 gimple_assign_rhs_code (binrhs),
5116 gimple_assign_lhs (binlhs),
5117 gimple_assign_rhs2 (binrhs));
75a70cf9 5118 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
82cb4e1c 5119 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5120 gimple_set_uid (binrhs, gimple_uid (stmt));
54aceb26 5121
75a70cf9 5122 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5123 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
54aceb26 5124
5125 if (dump_file && (dump_flags & TDF_DETAILS))
5126 {
5127 fprintf (dump_file, "Linearized: ");
1ffa4346 5128 print_gimple_stmt (dump_file, stmt, 0);
54aceb26 5129 }
5130
5131 reassociate_stats.linearized++;
54aceb26 5132 update_stmt (stmt);
75a70cf9 5133
82cb4e1c 5134 gsi = gsi_for_stmt (oldbinrhs);
54675e05 5135 reassoc_remove_stmt (&gsi);
82cb4e1c 5136 release_defs (oldbinrhs);
5137
75a70cf9 5138 gimple_set_visited (stmt, true);
5139 gimple_set_visited (binlhs, true);
5140 gimple_set_visited (binrhs, true);
54aceb26 5141
5142 /* Tail recurse on the new rhs if it still needs reassociation. */
a4c3fb95 5143 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
75a70cf9 5144 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5145 want to change the algorithm while converting to tuples. */
54aceb26 5146 linearize_expr (stmt);
54aceb26 5147}
5148
75a70cf9 5149/* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
54aceb26 5150 it. Otherwise, return NULL. */
5151
42acab1c 5152static gimple *
54aceb26 5153get_single_immediate_use (tree lhs)
5154{
5155 use_operand_p immuse;
42acab1c 5156 gimple *immusestmt;
54aceb26 5157
5158 if (TREE_CODE (lhs) == SSA_NAME
75a70cf9 5159 && single_imm_use (lhs, &immuse, &immusestmt)
5160 && is_gimple_assign (immusestmt))
5161 return immusestmt;
5162
5163 return NULL;
54aceb26 5164}
54aceb26 5165
54aceb26 5166/* Recursively negate the value of TONEGATE, and return the SSA_NAME
5167 representing the negated value. Insertions of any necessary
75a70cf9 5168 instructions go before GSI.
54aceb26 5169 This function is recursive in that, if you hand it "a_5" as the
5170 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5171 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5172
5173static tree
82cb4e1c 5174negate_value (tree tonegate, gimple_stmt_iterator *gsip)
54aceb26 5175{
42acab1c 5176 gimple *negatedefstmt = NULL;
54aceb26 5177 tree resultofnegate;
82cb4e1c 5178 gimple_stmt_iterator gsi;
5179 unsigned int uid;
54aceb26 5180
54aceb26 5181 /* If we are trying to negate a name, defined by an add, negate the
5182 add operands instead. */
75a70cf9 5183 if (TREE_CODE (tonegate) == SSA_NAME)
5184 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
54aceb26 5185 if (TREE_CODE (tonegate) == SSA_NAME
75a70cf9 5186 && is_gimple_assign (negatedefstmt)
5187 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5188 && has_single_use (gimple_assign_lhs (negatedefstmt))
5189 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
54aceb26 5190 {
75a70cf9 5191 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5192 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
82cb4e1c 5193 tree lhs = gimple_assign_lhs (negatedefstmt);
42acab1c 5194 gimple *g;
75a70cf9 5195
5196 gsi = gsi_for_stmt (negatedefstmt);
5197 rhs1 = negate_value (rhs1, &gsi);
75a70cf9 5198
5199 gsi = gsi_for_stmt (negatedefstmt);
5200 rhs2 = negate_value (rhs2, &gsi);
75a70cf9 5201
82cb4e1c 5202 gsi = gsi_for_stmt (negatedefstmt);
f9e245b2 5203 lhs = make_ssa_name (TREE_TYPE (lhs));
82cb4e1c 5204 gimple_set_visited (negatedefstmt, true);
e9cf809e 5205 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
82cb4e1c 5206 gimple_set_uid (g, gimple_uid (negatedefstmt));
5207 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5208 return lhs;
54aceb26 5209 }
5210
5211 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
82cb4e1c 5212 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
75a70cf9 5213 NULL_TREE, true, GSI_SAME_STMT);
82cb4e1c 5214 gsi = *gsip;
5215 uid = gimple_uid (gsi_stmt (gsi));
5216 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5217 {
42acab1c 5218 gimple *stmt = gsi_stmt (gsi);
82cb4e1c 5219 if (gimple_uid (stmt) != 0)
5220 break;
5221 gimple_set_uid (stmt, uid);
5222 }
54aceb26 5223 return resultofnegate;
54aceb26 5224}
5225
5226/* Return true if we should break up the subtract in STMT into an add
5227 with negate. This is true when we the subtract operands are really
5228 adds, or the subtract itself is used in an add expression. In
5229 either case, breaking up the subtract into an add with negate
5230 exposes the adds to reassociation. */
5231
5232static bool
42acab1c 5233should_break_up_subtract (gimple *stmt)
54aceb26 5234{
75a70cf9 5235 tree lhs = gimple_assign_lhs (stmt);
5236 tree binlhs = gimple_assign_rhs1 (stmt);
5237 tree binrhs = gimple_assign_rhs2 (stmt);
42acab1c 5238 gimple *immusestmt;
2e966e2a 5239 class loop *loop = loop_containing_stmt (stmt);
54aceb26 5240
5241 if (TREE_CODE (binlhs) == SSA_NAME
a4c3fb95 5242 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
54aceb26 5243 return true;
5244
5245 if (TREE_CODE (binrhs) == SSA_NAME
a4c3fb95 5246 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
54aceb26 5247 return true;
5248
5249 if (TREE_CODE (lhs) == SSA_NAME
5250 && (immusestmt = get_single_immediate_use (lhs))
75a70cf9 5251 && is_gimple_assign (immusestmt)
dddf5036 5252 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
29a45e93 5253 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5254 && gimple_assign_rhs1 (immusestmt) == lhs)
5255 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
54aceb26 5256 return true;
5257 return false;
54aceb26 5258}
5259
5260/* Transform STMT from A - B into A + -B. */
5261
5262static void
42acab1c 5263break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
54aceb26 5264{
75a70cf9 5265 tree rhs1 = gimple_assign_rhs1 (stmt);
5266 tree rhs2 = gimple_assign_rhs2 (stmt);
54aceb26 5267
5268 if (dump_file && (dump_flags & TDF_DETAILS))
5269 {
5270 fprintf (dump_file, "Breaking up subtract ");
1ffa4346 5271 print_gimple_stmt (dump_file, stmt, 0);
54aceb26 5272 }
5273
75a70cf9 5274 rhs2 = negate_value (rhs2, gsip);
5275 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
54aceb26 5276 update_stmt (stmt);
5277}
5278
8c5ac7f6 5279/* Determine whether STMT is a builtin call that raises an SSA name
5280 to an integer power and has only one use. If so, and this is early
5281 reassociation and unsafe math optimizations are permitted, place
5282 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5283 If any of these conditions does not hold, return FALSE. */
5284
5285static bool
5403ed8b 5286acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
8c5ac7f6 5287{
390bb332 5288 tree arg1;
8c5ac7f6 5289 REAL_VALUE_TYPE c, cint;
5290
390bb332 5291 switch (gimple_call_combined_fn (stmt))
8c5ac7f6 5292 {
390bb332 5293 CASE_CFN_POW:
54faaad5 5294 if (flag_errno_math)
5295 return false;
5296
8c5ac7f6 5297 *base = gimple_call_arg (stmt, 0);
5298 arg1 = gimple_call_arg (stmt, 1);
5299
5300 if (TREE_CODE (arg1) != REAL_CST)
5301 return false;
5302
5303 c = TREE_REAL_CST (arg1);
5304
5305 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5306 return false;
5307
5308 *exponent = real_to_integer (&c);
e913b5cd 5309 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
8c5ac7f6 5310 if (!real_identical (&c, &cint))
5311 return false;
5312
5313 break;
5314
390bb332 5315 CASE_CFN_POWI:
8c5ac7f6 5316 *base = gimple_call_arg (stmt, 0);
5317 arg1 = gimple_call_arg (stmt, 1);
5318
e913b5cd 5319 if (!tree_fits_shwi_p (arg1))
8c5ac7f6 5320 return false;
5321
e913b5cd 5322 *exponent = tree_to_shwi (arg1);
8c5ac7f6 5323 break;
5324
5325 default:
5326 return false;
5327 }
5328
5329 /* Expanding negative exponents is generally unproductive, so we don't
5330 complicate matters with those. Exponents of zero and one should
5331 have been handled by expression folding. */
5332 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5333 return false;
5334
5335 return true;
5336}
5337
9eafdd7b 5338/* Try to derive and add operand entry for OP to *OPS. Return false if
5339 unsuccessful. */
5340
5341static bool
5342try_special_add_to_ops (vec<operand_entry *> *ops,
5343 enum tree_code code,
5344 tree op, gimple* def_stmt)
5345{
5346 tree base = NULL_TREE;
5347 HOST_WIDE_INT exponent = 0;
5348
5403ed8b 5349 if (TREE_CODE (op) != SSA_NAME
5350 || ! has_single_use (op))
9eafdd7b 5351 return false;
5352
5353 if (code == MULT_EXPR
5403ed8b 5354 && reassoc_insert_powi_p
5355 && flag_unsafe_math_optimizations
5356 && is_gimple_call (def_stmt)
5357 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
9eafdd7b 5358 {
5359 add_repeat_to_ops_vec (ops, base, exponent);
5360 gimple_set_visited (def_stmt, true);
5361 return true;
5362 }
5363 else if (code == MULT_EXPR
5364 && is_gimple_assign (def_stmt)
5365 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5366 && !HONOR_SNANS (TREE_TYPE (op))
5367 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5368 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5369 {
5370 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5371 tree cst = build_minus_one_cst (TREE_TYPE (op));
5372 add_to_ops_vec (ops, rhs1);
5373 add_to_ops_vec (ops, cst);
5374 gimple_set_visited (def_stmt, true);
5375 return true;
5376 }
5377
5378 return false;
5379}
5380
54aceb26 5381/* Recursively linearize a binary expression that is the RHS of STMT.
5382 Place the operands of the expression tree in the vector named OPS. */
5383
5384static void
04009ada 5385linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
dddf5036 5386 bool is_associative, bool set_visited)
54aceb26 5387{
75a70cf9 5388 tree binlhs = gimple_assign_rhs1 (stmt);
5389 tree binrhs = gimple_assign_rhs2 (stmt);
42acab1c 5390 gimple *binlhsdef = NULL, *binrhsdef = NULL;
54aceb26 5391 bool binlhsisreassoc = false;
5392 bool binrhsisreassoc = false;
75a70cf9 5393 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2e966e2a 5394 class loop *loop = loop_containing_stmt (stmt);
54aceb26 5395
dddf5036 5396 if (set_visited)
5397 gimple_set_visited (stmt, true);
54aceb26 5398
5399 if (TREE_CODE (binlhs) == SSA_NAME)
5400 {
5401 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
df0675b8 5402 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
aac19106 5403 && !stmt_could_throw_p (cfun, binlhsdef));
54aceb26 5404 }
5405
5406 if (TREE_CODE (binrhs) == SSA_NAME)
5407 {
5408 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
df0675b8 5409 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
aac19106 5410 && !stmt_could_throw_p (cfun, binrhsdef));
54aceb26 5411 }
5412
5413 /* If the LHS is not reassociable, but the RHS is, we need to swap
5414 them. If neither is reassociable, there is nothing we can do, so
5415 just put them in the ops vector. If the LHS is reassociable,
5416 linearize it. If both are reassociable, then linearize the RHS
5417 and the LHS. */
5418
5419 if (!binlhsisreassoc)
5420 {
dddf5036 5421 /* If this is not a associative operation like division, give up. */
5422 if (!is_associative)
5423 {
5424 add_to_ops_vec (ops, binrhs);
5425 return;
5426 }
5427
54aceb26 5428 if (!binrhsisreassoc)
5429 {
9eafdd7b 5430 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
8c5ac7f6 5431 add_to_ops_vec (ops, binrhs);
5432
9eafdd7b 5433 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
8c5ac7f6 5434 add_to_ops_vec (ops, binlhs);
5435
54aceb26 5436 return;
5437 }
5438
5439 if (dump_file && (dump_flags & TDF_DETAILS))
5440 {
5441 fprintf (dump_file, "swapping operands of ");
1ffa4346 5442 print_gimple_stmt (dump_file, stmt, 0);
54aceb26 5443 }
5444
8f6fa493 5445 swap_ssa_operands (stmt,
5446 gimple_assign_rhs1_ptr (stmt),
5447 gimple_assign_rhs2_ptr (stmt));
54aceb26 5448 update_stmt (stmt);
5449
5450 if (dump_file && (dump_flags & TDF_DETAILS))
5451 {
5452 fprintf (dump_file, " is now ");
1ffa4346 5453 print_gimple_stmt (dump_file, stmt, 0);
54aceb26 5454 }
5455
5456 /* We want to make it so the lhs is always the reassociative op,
5457 so swap. */
dfcf26a5 5458 std::swap (binlhs, binrhs);
54aceb26 5459 }
5460 else if (binrhsisreassoc)
5461 {
5462 linearize_expr (stmt);
75a70cf9 5463 binlhs = gimple_assign_rhs1 (stmt);
5464 binrhs = gimple_assign_rhs2 (stmt);
54aceb26 5465 }
5466
5467 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
a4c3fb95 5468 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5469 rhscode, loop));
dddf5036 5470 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5471 is_associative, set_visited);
8c5ac7f6 5472
9eafdd7b 5473 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
8c5ac7f6 5474 add_to_ops_vec (ops, binrhs);
54aceb26 5475}
5476
5477/* Repropagate the negates back into subtracts, since no other pass
5478 currently does it. */
5479
5480static void
5481repropagate_negates (void)
5482{
5483 unsigned int i = 0;
5484 tree negate;
5485
f1f41a6c 5486 FOR_EACH_VEC_ELT (plus_negates, i, negate)
54aceb26 5487 {
42acab1c 5488 gimple *user = get_single_immediate_use (negate);
54aceb26 5489
c07e5b8b 5490 if (!user || !is_gimple_assign (user))
5491 continue;
5492
54aceb26 5493 /* The negate operand can be either operand of a PLUS_EXPR
5494 (it can be the LHS if the RHS is a constant for example).
5495
5496 Force the negate operand to the RHS of the PLUS_EXPR, then
5497 transform the PLUS_EXPR into a MINUS_EXPR. */
c07e5b8b 5498 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
54aceb26 5499 {
54aceb26 5500 /* If the negated operand appears on the LHS of the
5501 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5502 to force the negated operand to the RHS of the PLUS_EXPR. */
75a70cf9 5503 if (gimple_assign_rhs1 (user) == negate)
54aceb26 5504 {
8f6fa493 5505 swap_ssa_operands (user,
5506 gimple_assign_rhs1_ptr (user),
5507 gimple_assign_rhs2_ptr (user));
54aceb26 5508 }
5509
5510 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5511 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
75a70cf9 5512 if (gimple_assign_rhs2 (user) == negate)
54aceb26 5513 {
75a70cf9 5514 tree rhs1 = gimple_assign_rhs1 (user);
5b849c60 5515 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
75a70cf9 5516 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5517 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
54aceb26 5518 update_stmt (user);
5519 }
5520 }
c07e5b8b 5521 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5522 {
5523 if (gimple_assign_rhs1 (user) == negate)
5524 {
5525 /* We have
5526 x = -a
5527 y = x - b
5528 which we transform into
5529 x = a + b
5530 y = -x .
5531 This pushes down the negate which we possibly can merge
5532 into some other operation, hence insert it into the
5533 plus_negates vector. */
42acab1c 5534 gimple *feed = SSA_NAME_DEF_STMT (negate);
c07e5b8b 5535 tree a = gimple_assign_rhs1 (feed);
82cb4e1c 5536 tree b = gimple_assign_rhs2 (user);
5537 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5538 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
f9e245b2 5539 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
42acab1c 5540 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
82cb4e1c 5541 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
806413d2 5542 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
82cb4e1c 5543 user = gsi_stmt (gsi2);
5544 update_stmt (user);
54675e05 5545 reassoc_remove_stmt (&gsi);
82cb4e1c 5546 release_defs (feed);
5547 plus_negates.safe_push (gimple_assign_lhs (user));
c07e5b8b 5548 }
5549 else
5550 {
5551 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5552 rid of one operation. */
42acab1c 5553 gimple *feed = SSA_NAME_DEF_STMT (negate);
c07e5b8b 5554 tree a = gimple_assign_rhs1 (feed);
5555 tree rhs1 = gimple_assign_rhs1 (user);
5556 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5557 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5558 update_stmt (gsi_stmt (gsi));
5559 }
5560 }
54aceb26 5561 }
5562}
5563
c07e5b8b 5564/* Returns true if OP is of a type for which we can do reassociation.
5565 That is for integral or non-saturating fixed-point types, and for
5566 floating point type when associative-math is enabled. */
5567
5568static bool
5569can_reassociate_p (tree op)
5570{
5571 tree type = TREE_TYPE (op);
d4d349db 5572 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5573 return false;
026f2137 5574 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
c07e5b8b 5575 || NON_SAT_FIXED_POINT_TYPE_P (type)
f4a50267 5576 || (flag_associative_math && FLOAT_TYPE_P (type)))
c07e5b8b 5577 return true;
5578 return false;
5579}
5580
54aceb26 5581/* Break up subtract operations in block BB.
5582
5583 We do this top down because we don't know whether the subtract is
5584 part of a possible chain of reassociation except at the top.
48e1416a 5585
54aceb26 5586 IE given
5587 d = f + g
5588 c = a + e
5589 b = c - d
5590 q = b - r
5591 k = t - q
48e1416a 5592
54aceb26 5593 we want to break up k = t - q, but we won't until we've transformed q
75a70cf9 5594 = b - r, which won't be broken up until we transform b = c - d.
5595
82cb4e1c 5596 En passant, clear the GIMPLE visited flag on every statement
5597 and set UIDs within each basic block. */
54aceb26 5598
5599static void
5600break_up_subtract_bb (basic_block bb)
5601{
75a70cf9 5602 gimple_stmt_iterator gsi;
54aceb26 5603 basic_block son;
82cb4e1c 5604 unsigned int uid = 1;
54aceb26 5605
75a70cf9 5606 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
54aceb26 5607 {
42acab1c 5608 gimple *stmt = gsi_stmt (gsi);
75a70cf9 5609 gimple_set_visited (stmt, false);
82cb4e1c 5610 gimple_set_uid (stmt, uid++);
54aceb26 5611
c07e5b8b 5612 if (!is_gimple_assign (stmt)
5613 || !can_reassociate_p (gimple_assign_lhs (stmt)))
5614 continue;
5615
75a70cf9 5616 /* Look for simple gimple subtract operations. */
c07e5b8b 5617 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
54aceb26 5618 {
c07e5b8b 5619 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5620 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
54aceb26 5621 continue;
5622
5623 /* Check for a subtract used only in an addition. If this
5624 is the case, transform it into add of a negate for better
5625 reassociation. IE transform C = A-B into C = A + -B if C
5626 is only used in an addition. */
75a70cf9 5627 if (should_break_up_subtract (stmt))
5628 break_up_subtract (stmt, &gsi);
54aceb26 5629 }
c07e5b8b 5630 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5631 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
f1f41a6c 5632 plus_negates.safe_push (gimple_assign_lhs (stmt));
54aceb26 5633 }
5634 for (son = first_dom_son (CDI_DOMINATORS, bb);
5635 son;
5636 son = next_dom_son (CDI_DOMINATORS, son))
5637 break_up_subtract_bb (son);
5638}
5639
8c5ac7f6 5640/* Used for repeated factor analysis. */
04009ada 5641struct repeat_factor
8c5ac7f6 5642{
5643 /* An SSA name that occurs in a multiply chain. */
5644 tree factor;
5645
5646 /* Cached rank of the factor. */
5647 unsigned rank;
5648
5649 /* Number of occurrences of the factor in the chain. */
5650 HOST_WIDE_INT count;
5651
5652 /* An SSA name representing the product of this factor and
5653 all factors appearing later in the repeated factor vector. */
5654 tree repr;
5655};
5656
8c5ac7f6 5657
f1f41a6c 5658static vec<repeat_factor> repeat_factor_vec;
8c5ac7f6 5659
5660/* Used for sorting the repeat factor vector. Sort primarily by
5661 ascending occurrence count, secondarily by descending rank. */
5662
5663static int
5664compare_repeat_factors (const void *x1, const void *x2)
5665{
04009ada 5666 const repeat_factor *rf1 = (const repeat_factor *) x1;
5667 const repeat_factor *rf2 = (const repeat_factor *) x2;
8c5ac7f6 5668
5669 if (rf1->count != rf2->count)
5670 return rf1->count - rf2->count;
5671
5672 return rf2->rank - rf1->rank;
5673}
5674
8c5ac7f6 5675/* Look for repeated operands in OPS in the multiply tree rooted at
5676 STMT. Replace them with an optimal sequence of multiplies and powi
97269507 5677 builtin calls, and remove the used operands from OPS. Return an
5678 SSA name representing the value of the replacement sequence. */
8c5ac7f6 5679
97269507 5680static tree
04009ada 5681attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
8c5ac7f6 5682{
5683 unsigned i, j, vec_len;
5684 int ii;
04009ada 5685 operand_entry *oe;
5686 repeat_factor *rf1, *rf2;
8c5ac7f6 5687 repeat_factor rfnew;
97269507 5688 tree result = NULL_TREE;
8c5ac7f6 5689 tree target_ssa, iter_result;
5690 tree type = TREE_TYPE (gimple_get_lhs (stmt));
5691 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5692 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
42acab1c 5693 gimple *mul_stmt, *pow_stmt;
8c5ac7f6 5694
5695 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5696 target. */
5697 if (!powi_fndecl)
97269507 5698 return NULL_TREE;
8c5ac7f6 5699
5700 /* Allocate the repeated factor vector. */
f1f41a6c 5701 repeat_factor_vec.create (10);
8c5ac7f6 5702
5703 /* Scan the OPS vector for all SSA names in the product and build
5704 up a vector of occurrence counts for each factor. */
f1f41a6c 5705 FOR_EACH_VEC_ELT (*ops, i, oe)
8c5ac7f6 5706 {
5707 if (TREE_CODE (oe->op) == SSA_NAME)
5708 {
f1f41a6c 5709 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
8c5ac7f6 5710 {
5711 if (rf1->factor == oe->op)
5712 {
5713 rf1->count += oe->count;
5714 break;
5715 }
5716 }
5717
f1f41a6c 5718 if (j >= repeat_factor_vec.length ())
8c5ac7f6 5719 {
5720 rfnew.factor = oe->op;
5721 rfnew.rank = oe->rank;
5722 rfnew.count = oe->count;
5723 rfnew.repr = NULL_TREE;
f1f41a6c 5724 repeat_factor_vec.safe_push (rfnew);
8c5ac7f6 5725 }
5726 }
5727 }
5728
5729 /* Sort the repeated factor vector by (a) increasing occurrence count,
5730 and (b) decreasing rank. */
f1f41a6c 5731 repeat_factor_vec.qsort (compare_repeat_factors);
8c5ac7f6 5732
5733 /* It is generally best to combine as many base factors as possible
5734 into a product before applying __builtin_powi to the result.
5735 However, the sort order chosen for the repeated factor vector
5736 allows us to cache partial results for the product of the base
5737 factors for subsequent use. When we already have a cached partial
5738 result from a previous iteration, it is best to make use of it
5739 before looking for another __builtin_pow opportunity.
5740
5741 As an example, consider x * x * y * y * y * z * z * z * z.
5742 We want to first compose the product x * y * z, raise it to the
5743 second power, then multiply this by y * z, and finally multiply
5744 by z. This can be done in 5 multiplies provided we cache y * z
5745 for use in both expressions:
5746
5747 t1 = y * z
5748 t2 = t1 * x
5749 t3 = t2 * t2
5750 t4 = t1 * t3
5751 result = t4 * z
5752
5753 If we instead ignored the cached y * z and first multiplied by
5754 the __builtin_pow opportunity z * z, we would get the inferior:
5755
5756 t1 = y * z
5757 t2 = t1 * x
5758 t3 = t2 * t2
5759 t4 = z * z
5760 t5 = t3 * t4
5761 result = t5 * y */
5762
f1f41a6c 5763 vec_len = repeat_factor_vec.length ();
8c5ac7f6 5764
5765 /* Repeatedly look for opportunities to create a builtin_powi call. */
5766 while (true)
5767 {
5768 HOST_WIDE_INT power;
5769
5770 /* First look for the largest cached product of factors from
5771 preceding iterations. If found, create a builtin_powi for
5772 it if the minimum occurrence count for its factors is at
5773 least 2, or just use this cached product as our next
5774 multiplicand if the minimum occurrence count is 1. */
f1f41a6c 5775 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
8c5ac7f6 5776 {
5777 if (rf1->repr && rf1->count > 0)
5778 break;
5779 }
5780
5781 if (j < vec_len)
5782 {
5783 power = rf1->count;
5784
5785 if (power == 1)
5786 {
5787 iter_result = rf1->repr;
5788
5789 if (dump_file && (dump_flags & TDF_DETAILS))
5790 {
5791 unsigned elt;
04009ada 5792 repeat_factor *rf;
8c5ac7f6 5793 fputs ("Multiplying by cached product ", dump_file);
5794 for (elt = j; elt < vec_len; elt++)
5795 {
f1f41a6c 5796 rf = &repeat_factor_vec[elt];
1ffa4346 5797 print_generic_expr (dump_file, rf->factor);
8c5ac7f6 5798 if (elt < vec_len - 1)
5799 fputs (" * ", dump_file);
5800 }
5801 fputs ("\n", dump_file);
5802 }
5803 }
5804 else
5805 {
03d37e4e 5806 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
8c5ac7f6 5807 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5808 build_int_cst (integer_type_node,
5809 power));
5810 gimple_call_set_lhs (pow_stmt, iter_result);
5811 gimple_set_location (pow_stmt, gimple_location (stmt));
0be0fe66 5812 gimple_set_uid (pow_stmt, gimple_uid (stmt));
8c5ac7f6 5813 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5814
5815 if (dump_file && (dump_flags & TDF_DETAILS))
5816 {
5817 unsigned elt;
04009ada 5818 repeat_factor *rf;
8c5ac7f6 5819 fputs ("Building __builtin_pow call for cached product (",
5820 dump_file);
5821 for (elt = j; elt < vec_len; elt++)
5822 {
f1f41a6c 5823 rf = &repeat_factor_vec[elt];
1ffa4346 5824 print_generic_expr (dump_file, rf->factor);
8c5ac7f6 5825 if (elt < vec_len - 1)
5826 fputs (" * ", dump_file);
5827 }
f03df321 5828 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
8ef3b7cb 5829 power);
8c5ac7f6 5830 }
5831 }
5832 }
5833 else
5834 {
5835 /* Otherwise, find the first factor in the repeated factor
5836 vector whose occurrence count is at least 2. If no such
5837 factor exists, there are no builtin_powi opportunities
5838 remaining. */
f1f41a6c 5839 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
8c5ac7f6 5840 {
5841 if (rf1->count >= 2)
5842 break;
5843 }
5844
5845 if (j >= vec_len)
5846 break;
5847
5848 power = rf1->count;
5849
5850 if (dump_file && (dump_flags & TDF_DETAILS))
5851 {
5852 unsigned elt;
04009ada 5853 repeat_factor *rf;
8c5ac7f6 5854 fputs ("Building __builtin_pow call for (", dump_file);
5855 for (elt = j; elt < vec_len; elt++)
5856 {
f1f41a6c 5857 rf = &repeat_factor_vec[elt];
1ffa4346 5858 print_generic_expr (dump_file, rf->factor);
8c5ac7f6 5859 if (elt < vec_len - 1)
5860 fputs (" * ", dump_file);
5861 }
f03df321 5862 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
8c5ac7f6 5863 }
5864
5865 reassociate_stats.pows_created++;
5866
5867 /* Visit each element of the vector in reverse order (so that
5868 high-occurrence elements are visited first, and within the
5869 same occurrence count, lower-ranked elements are visited
5870 first). Form a linear product of all elements in this order
5871 whose occurrencce count is at least that of element J.
5872 Record the SSA name representing the product of each element
5873 with all subsequent elements in the vector. */
5874 if (j == vec_len - 1)
5875 rf1->repr = rf1->factor;
5876 else
5877 {
5878 for (ii = vec_len - 2; ii >= (int)j; ii--)
5879 {
5880 tree op1, op2;
5881
f1f41a6c 5882 rf1 = &repeat_factor_vec[ii];
5883 rf2 = &repeat_factor_vec[ii + 1];
8c5ac7f6 5884
5885 /* Init the last factor's representative to be itself. */
5886 if (!rf2->repr)
5887 rf2->repr = rf2->factor;
5888
5889 op1 = rf1->factor;
5890 op2 = rf2->repr;
5891
03d37e4e 5892 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
e9cf809e 5893 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5894 op1, op2);
8c5ac7f6 5895 gimple_set_location (mul_stmt, gimple_location (stmt));
0be0fe66 5896 gimple_set_uid (mul_stmt, gimple_uid (stmt));
8c5ac7f6 5897 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5898 rf1->repr = target_ssa;
5899
5900 /* Don't reprocess the multiply we just introduced. */
5901 gimple_set_visited (mul_stmt, true);
5902 }
5903 }
5904
5905 /* Form a call to __builtin_powi for the maximum product
5906 just formed, raised to the power obtained earlier. */
f1f41a6c 5907 rf1 = &repeat_factor_vec[j];
03d37e4e 5908 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
8c5ac7f6 5909 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5910 build_int_cst (integer_type_node,
5911 power));
5912 gimple_call_set_lhs (pow_stmt, iter_result);
5913 gimple_set_location (pow_stmt, gimple_location (stmt));
0be0fe66 5914 gimple_set_uid (pow_stmt, gimple_uid (stmt));
8c5ac7f6 5915 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5916 }
5917
97269507 5918 /* If we previously formed at least one other builtin_powi call,
5919 form the product of this one and those others. */
5920 if (result)
5921 {
03d37e4e 5922 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
e9cf809e 5923 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
5924 result, iter_result);
97269507 5925 gimple_set_location (mul_stmt, gimple_location (stmt));
0be0fe66 5926 gimple_set_uid (mul_stmt, gimple_uid (stmt));
97269507 5927 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5928 gimple_set_visited (mul_stmt, true);
5929 result = new_result;
5930 }
5931 else
5932 result = iter_result;
8c5ac7f6 5933
5934 /* Decrement the occurrence count of each element in the product
5935 by the count found above, and remove this many copies of each
5936 factor from OPS. */
5937 for (i = j; i < vec_len; i++)
5938 {
5939 unsigned k = power;
5940 unsigned n;
5941
f1f41a6c 5942 rf1 = &repeat_factor_vec[i];
8c5ac7f6 5943 rf1->count -= power;
5944
f1f41a6c 5945 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
8c5ac7f6 5946 {
5947 if (oe->op == rf1->factor)
5948 {
5949 if (oe->count <= k)
5950 {
f1f41a6c 5951 ops->ordered_remove (n);
8c5ac7f6 5952 k -= oe->count;
5953
5954 if (k == 0)
5955 break;
5956 }
5957 else
5958 {
5959 oe->count -= k;
5960 break;
5961 }
5962 }
5963 }
5964 }
5965 }
5966
5967 /* At this point all elements in the repeated factor vector have a
5968 remaining occurrence count of 0 or 1, and those with a count of 1
5969 don't have cached representatives. Re-sort the ops vector and
5970 clean up. */
f1f41a6c 5971 ops->qsort (sort_by_operand_rank);
5972 repeat_factor_vec.release ();
97269507 5973
5974 /* Return the final product computed herein. Note that there may
5975 still be some elements with single occurrence count left in OPS;
5976 those will be handled by the normal reassociation logic. */
5977 return result;
5978}
5979
389034a4 5980/* Attempt to optimize
5981 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5982 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5983
5984static void
5985attempt_builtin_copysign (vec<operand_entry *> *ops)
5986{
5987 operand_entry *oe;
5988 unsigned int i;
5989 unsigned int length = ops->length ();
5990 tree cst = ops->last ()->op;
5991
5992 if (length == 1 || TREE_CODE (cst) != REAL_CST)
5993 return;
5994
5995 FOR_EACH_VEC_ELT (*ops, i, oe)
5996 {
5997 if (TREE_CODE (oe->op) == SSA_NAME
5998 && has_single_use (oe->op))
5999 {
6000 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
390bb332 6001 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
389034a4 6002 {
389034a4 6003 tree arg0, arg1;
390bb332 6004 switch (gimple_call_combined_fn (old_call))
389034a4 6005 {
390bb332 6006 CASE_CFN_COPYSIGN:
8c32188e 6007 CASE_CFN_COPYSIGN_FN:
390bb332 6008 arg0 = gimple_call_arg (old_call, 0);
6009 arg1 = gimple_call_arg (old_call, 1);
389034a4 6010 /* The first argument of copysign must be a constant,
6011 otherwise there's nothing to do. */
6012 if (TREE_CODE (arg0) == REAL_CST)
6013 {
390bb332 6014 tree type = TREE_TYPE (arg0);
6015 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
389034a4 6016 /* If we couldn't fold to a single constant, skip it.
6017 That happens e.g. for inexact multiplication when
6018 -frounding-math. */
6019 if (mul == NULL_TREE)
6020 break;
390bb332 6021 /* Instead of adjusting OLD_CALL, let's build a new
6022 call to not leak the LHS and prevent keeping bogus
6023 debug statements. DCE will clean up the old call. */
6024 gcall *new_call;
6025 if (gimple_call_internal_p (old_call))
6026 new_call = gimple_build_call_internal
6027 (IFN_COPYSIGN, 2, mul, arg1);
6028 else
6029 new_call = gimple_build_call
6030 (gimple_call_fndecl (old_call), 2, mul, arg1);
6031 tree lhs = make_ssa_name (type);
6032 gimple_call_set_lhs (new_call, lhs);
6033 gimple_set_location (new_call,
6034 gimple_location (old_call));
6035 insert_stmt_after (new_call, old_call);
389034a4 6036 /* We've used the constant, get rid of it. */
6037 ops->pop ();
6038 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6039 /* Handle the CST1 < 0 case by negating the result. */
6040 if (cst1_neg)
6041 {
6042 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6043 gimple *negate_stmt
6044 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
390bb332 6045 insert_stmt_after (negate_stmt, new_call);
389034a4 6046 oe->op = negrhs;
6047 }
6048 else
6049 oe->op = lhs;
6050 if (dump_file && (dump_flags & TDF_DETAILS))
6051 {
6052 fprintf (dump_file, "Optimizing copysign: ");
1ffa4346 6053 print_generic_expr (dump_file, cst);
390bb332 6054 fprintf (dump_file, " * COPYSIGN (");
1ffa4346 6055 print_generic_expr (dump_file, arg0);
389034a4 6056 fprintf (dump_file, ", ");
1ffa4346 6057 print_generic_expr (dump_file, arg1);
390bb332 6058 fprintf (dump_file, ") into %sCOPYSIGN (",
389034a4 6059 cst1_neg ? "-" : "");
1ffa4346 6060 print_generic_expr (dump_file, mul);
389034a4 6061 fprintf (dump_file, ", ");
1ffa4346 6062 print_generic_expr (dump_file, arg1);
389034a4 6063 fprintf (dump_file, "\n");
6064 }
6065 return;
6066 }
6067 break;
6068 default:
6069 break;
6070 }
6071 }
6072 }
6073 }
6074}
6075
97269507 6076/* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6077
6078static void
42acab1c 6079transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
97269507 6080{
6081 tree rhs1;
6082
6083 if (dump_file && (dump_flags & TDF_DETAILS))
6084 {
6085 fprintf (dump_file, "Transforming ");
1ffa4346 6086 print_gimple_stmt (dump_file, stmt, 0);
97269507 6087 }
6088
6089 rhs1 = gimple_assign_rhs1 (stmt);
6090 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6091 update_stmt (stmt);
6092 remove_visited_stmt_chain (rhs1);
6093
6094 if (dump_file && (dump_flags & TDF_DETAILS))
6095 {
6096 fprintf (dump_file, " into ");
1ffa4346 6097 print_gimple_stmt (dump_file, stmt, 0);
97269507 6098 }
6099}
6100
6101/* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6102
6103static void
42acab1c 6104transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
97269507 6105 tree rhs1, tree rhs2)
6106{
6107 if (dump_file && (dump_flags & TDF_DETAILS))
6108 {
6109 fprintf (dump_file, "Transforming ");
1ffa4346 6110 print_gimple_stmt (dump_file, stmt, 0);
97269507 6111 }
6112
61e371b0 6113 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
97269507 6114 update_stmt (gsi_stmt (*gsi));
6115 remove_visited_stmt_chain (rhs1);
6116
6117 if (dump_file && (dump_flags & TDF_DETAILS))
6118 {
6119 fprintf (dump_file, " into ");
1ffa4346 6120 print_gimple_stmt (dump_file, stmt, 0);
97269507 6121 }
8c5ac7f6 6122}
6123
54aceb26 6124/* Reassociate expressions in basic block BB and its post-dominator as
55a2d273 6125 children.
54aceb26 6126
55a2d273 6127 Bubble up return status from maybe_optimize_range_tests. */
6128
6129static bool
54aceb26 6130reassociate_bb (basic_block bb)
6131{
75a70cf9 6132 gimple_stmt_iterator gsi;
54aceb26 6133 basic_block son;
42acab1c 6134 gimple *stmt = last_stmt (bb);
55a2d273 6135 bool cfg_cleanup_needed = false;
8a2c7744 6136
6137 if (stmt && !gimple_visited_p (stmt))
55a2d273 6138 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
54aceb26 6139
75a70cf9 6140 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
54aceb26 6141 {
8a2c7744 6142 stmt = gsi_stmt (gsi);
54aceb26 6143
df0675b8 6144 if (is_gimple_assign (stmt)
aac19106 6145 && !stmt_could_throw_p (cfun, stmt))
54aceb26 6146 {
75a70cf9 6147 tree lhs, rhs1, rhs2;
6148 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
54aceb26 6149
75a70cf9 6150 /* If this was part of an already processed statement,
6151 we don't need to touch it again. */
6152 if (gimple_visited_p (stmt))
dddf5036 6153 {
6154 /* This statement might have become dead because of previous
6155 reassociations. */
6156 if (has_zero_uses (gimple_get_lhs (stmt)))
6157 {
54675e05 6158 reassoc_remove_stmt (&gsi);
dddf5036 6159 release_defs (stmt);
fb715874 6160 /* We might end up removing the last stmt above which
6161 places the iterator to the end of the sequence.
6162 Reset it to the last stmt in this case which might
6163 be the end of the sequence as well if we removed
6164 the last statement of the sequence. In which case
6165 we need to bail out. */
6166 if (gsi_end_p (gsi))
6167 {
6168 gsi = gsi_last_bb (bb);
6169 if (gsi_end_p (gsi))
6170 break;
6171 }
dddf5036 6172 }
6173 continue;
6174 }
75a70cf9 6175
ae63312f 6176 /* If this is not a gimple binary expression, there is
6177 nothing for us to do with it. */
6178 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6179 continue;
6180
75a70cf9 6181 lhs = gimple_assign_lhs (stmt);
6182 rhs1 = gimple_assign_rhs1 (stmt);
6183 rhs2 = gimple_assign_rhs2 (stmt);
6184
ca3c9092 6185 /* For non-bit or min/max operations we can't associate
6186 all types. Verify that here. */
6187 if (rhs_code != BIT_IOR_EXPR
6188 && rhs_code != BIT_AND_EXPR
6189 && rhs_code != BIT_XOR_EXPR
6190 && rhs_code != MIN_EXPR
6191 && rhs_code != MAX_EXPR
6192 && (!can_reassociate_p (lhs)
6193 || !can_reassociate_p (rhs1)
6194 || !can_reassociate_p (rhs2)))
54aceb26 6195 continue;
6196
75a70cf9 6197 if (associative_tree_code (rhs_code))
54aceb26 6198 {
04009ada 6199 auto_vec<operand_entry *> ops;
97269507 6200 tree powi_result = NULL_TREE;
026f2137 6201 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
54aceb26 6202
6203 /* There may be no immediate uses left by the time we
6204 get here because we may have eliminated them all. */
72e3ec84 6205 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
54aceb26 6206 continue;
6207
75a70cf9 6208 gimple_set_visited (stmt, true);
dddf5036 6209 linearize_expr_tree (&ops, stmt, true, true);
f1f41a6c 6210 ops.qsort (sort_by_operand_rank);
e70cda06 6211 int orig_len = ops.length ();
75a70cf9 6212 optimize_ops_list (rhs_code, &ops);
dddf5036 6213 if (undistribute_ops_list (rhs_code, &ops,
6214 loop_containing_stmt (stmt)))
6215 {
f1f41a6c 6216 ops.qsort (sort_by_operand_rank);
dddf5036 6217 optimize_ops_list (rhs_code, &ops);
6218 }
ae63312f 6219 if (undistribute_bitref_for_vector (rhs_code, &ops,
6220 loop_containing_stmt (stmt)))
6221 {
6222 ops.qsort (sort_by_operand_rank);
6223 optimize_ops_list (rhs_code, &ops);
6224 }
0d7ddd44 6225 if (rhs_code == PLUS_EXPR
527d8479 6226 && transform_add_to_multiply (&ops))
0d7ddd44 6227 ops.qsort (sort_by_operand_rank);
6228
946e9eb4 6229 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
026f2137 6230 {
6231 if (is_vector)
6232 optimize_vec_cond_expr (rhs_code, &ops);
6233 else
54ec8b11 6234 optimize_range_tests (rhs_code, &ops, NULL);
026f2137 6235 }
946e9eb4 6236
026f2137 6237 if (rhs_code == MULT_EXPR && !is_vector)
6238 {
6239 attempt_builtin_copysign (&ops);
389034a4 6240
026f2137 6241 if (reassoc_insert_powi_p
6242 && flag_unsafe_math_optimizations)
6243 powi_result = attempt_builtin_powi (stmt, &ops);
6244 }
8c5ac7f6 6245
9eafdd7b 6246 operand_entry *last;
6247 bool negate_result = false;
6248 if (ops.length () > 1
6249 && rhs_code == MULT_EXPR)
6250 {
6251 last = ops.last ();
24c41395 6252 if ((integer_minus_onep (last->op)
9eafdd7b 6253 || real_minus_onep (last->op))
6254 && !HONOR_SNANS (TREE_TYPE (lhs))
6255 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6256 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6257 {
6258 ops.pop ();
6259 negate_result = true;
6260 }
6261 }
6262
9977f724 6263 tree new_lhs = lhs;
97269507 6264 /* If the operand vector is now empty, all operands were
6265 consumed by the __builtin_powi optimization. */
f1f41a6c 6266 if (ops.length () == 0)
97269507 6267 transform_stmt_to_copy (&gsi, stmt, powi_result);
f1f41a6c 6268 else if (ops.length () == 1)
54aceb26 6269 {
f1f41a6c 6270 tree last_op = ops.last ()->op;
527d8479 6271
6272 /* If the stmt that defines operand has to be inserted, insert it
6273 before the use. */
6274 if (ops.last ()->stmt_to_insert)
6275 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
97269507 6276 if (powi_result)
6277 transform_stmt_to_multiply (&gsi, stmt, last_op,
6278 powi_result);
6279 else
6280 transform_stmt_to_copy (&gsi, stmt, last_op);
54aceb26 6281 }
6282 else
5b1c765d 6283 {
3754d046 6284 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
f1f41a6c 6285 int ops_num = ops.length ();
4d319159 6286 int width;
b17ce62f 6287
077cf883 6288 /* For binary bit operations, if there are at least 3
6289 operands and the last last operand in OPS is a constant,
6290 move it to the front. This helps ensure that we generate
6291 (X & Y) & C rather than (X & C) & Y. The former will
6292 often match a canonical bit test when we get to RTL. */
04acc76e 6293 if (ops.length () > 2
077cf883 6294 && (rhs_code == BIT_AND_EXPR
6295 || rhs_code == BIT_IOR_EXPR
6296 || rhs_code == BIT_XOR_EXPR)
b17ce62f 6297 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6298 std::swap (*ops[0], *ops[ops_num - 1]);
6299
4d319159 6300 /* Only rewrite the expression tree to parallel in the
6301 last reassoc pass to avoid useless work back-and-forth
6302 with initial linearization. */
6303 if (!reassoc_insert_powi_p
6304 && ops.length () > 3
6305 && (width = get_reassociation_width (ops_num, rhs_code,
6306 mode)) > 1)
6307 {
6308 if (dump_file && (dump_flags & TDF_DETAILS))
6309 fprintf (dump_file,
6310 "Width = %d was chosen for reassociation\n",
6311 width);
6312 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6313 width, ops);
6314 }
5b1c765d 6315 else
a2bd0c99 6316 {
6317 /* When there are three operands left, we want
6318 to make sure the ones that get the double
6319 binary op are chosen wisely. */
6320 int len = ops.length ();
6321 if (len >= 3)
6322 swap_ops_for_binary_stmt (ops, len - 3, stmt);
6323
82cb4e1c 6324 new_lhs = rewrite_expr_tree (stmt, 0, ops,
9977f724 6325 powi_result != NULL
e70cda06 6326 || negate_result,
6327 len != orig_len);
a2bd0c99 6328 }
97269507 6329
6330 /* If we combined some repeated factors into a
6331 __builtin_powi call, multiply that result by the
6332 reassociated operands. */
6333 if (powi_result)
6334 {
42acab1c 6335 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
82cb4e1c 6336 tree type = TREE_TYPE (lhs);
03d37e4e 6337 tree target_ssa = make_temp_ssa_name (type, NULL,
6338 "reassocpow");
82cb4e1c 6339 gimple_set_lhs (lhs_stmt, target_ssa);
6340 update_stmt (lhs_stmt);
6341 if (lhs != new_lhs)
9977f724 6342 {
6343 target_ssa = new_lhs;
6344 new_lhs = lhs;
6345 }
e9cf809e 6346 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6347 powi_result, target_ssa);
97269507 6348 gimple_set_location (mul_stmt, gimple_location (stmt));
0be0fe66 6349 gimple_set_uid (mul_stmt, gimple_uid (stmt));
97269507 6350 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6351 }
5b1c765d 6352 }
9eafdd7b 6353
6354 if (negate_result)
6355 {
6356 stmt = SSA_NAME_DEF_STMT (lhs);
6357 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6358 gimple_set_lhs (stmt, tmp);
9977f724 6359 if (lhs != new_lhs)
6360 tmp = new_lhs;
9eafdd7b 6361 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6362 tmp);
40bbfc17 6363 gimple_set_uid (neg_stmt, gimple_uid (stmt));
9eafdd7b 6364 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6365 update_stmt (stmt);
6366 }
54aceb26 6367 }
6368 }
6369 }
6370 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6371 son;
6372 son = next_dom_son (CDI_POST_DOMINATORS, son))
55a2d273 6373 cfg_cleanup_needed |= reassociate_bb (son);
6374
6375 return cfg_cleanup_needed;
54aceb26 6376}
6377
e3668db5 6378/* Add jumps around shifts for range tests turned into bit tests.
6379 For each SSA_NAME VAR we have code like:
6380 VAR = ...; // final stmt of range comparison
6381 // bit test here...;
6382 OTHERVAR = ...; // final stmt of the bit test sequence
6383 RES = VAR | OTHERVAR;
6384 Turn the above into:
6385 VAR = ...;
6386 if (VAR != 0)
6387 goto <l3>;
6388 else
6389 goto <l2>;
6390 <l2>:
6391 // bit test here...;
6392 OTHERVAR = ...;
6393 <l3>:
6394 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6395
6396static void
6397branch_fixup (void)
6398{
6399 tree var;
6400 unsigned int i;
6401
6402 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6403 {
42acab1c 6404 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6405 gimple *use_stmt;
e3668db5 6406 use_operand_p use;
6407 bool ok = single_imm_use (var, &use, &use_stmt);
6408 gcc_assert (ok
6409 && is_gimple_assign (use_stmt)
6410 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6411 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6412
6413 basic_block cond_bb = gimple_bb (def_stmt);
6414 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6415 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6416
6417 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
42acab1c 6418 gimple *g = gimple_build_cond (NE_EXPR, var,
6419 build_zero_cst (TREE_TYPE (var)),
6420 NULL_TREE, NULL_TREE);
e3668db5 6421 location_t loc = gimple_location (use_stmt);
6422 gimple_set_location (g, loc);
6423 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6424
6425 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
720cfc43 6426 etrue->probability = profile_probability::even ();
e3668db5 6427 edge efalse = find_edge (cond_bb, then_bb);
6428 efalse->flags = EDGE_FALSE_VALUE;
6429 efalse->probability -= etrue->probability;
ea5d3981 6430 then_bb->count -= etrue->count ();
e3668db5 6431
6432 tree othervar = NULL_TREE;
6433 if (gimple_assign_rhs1 (use_stmt) == var)
6434 othervar = gimple_assign_rhs2 (use_stmt);
6435 else if (gimple_assign_rhs2 (use_stmt) == var)
6436 othervar = gimple_assign_rhs1 (use_stmt);
6437 else
6438 gcc_unreachable ();
6439 tree lhs = gimple_assign_lhs (use_stmt);
1a91d914 6440 gphi *phi = create_phi_node (lhs, merge_bb);
e3668db5 6441 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6442 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6443 gsi = gsi_for_stmt (use_stmt);
6444 gsi_remove (&gsi, true);
6445
6446 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6447 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6448 }
6449 reassoc_branch_fixups.release ();
6450}
6451
04009ada 6452void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6453void debug_ops_vector (vec<operand_entry *> ops);
54aceb26 6454
6455/* Dump the operand entry vector OPS to FILE. */
6456
6457void
04009ada 6458dump_ops_vector (FILE *file, vec<operand_entry *> ops)
54aceb26 6459{
04009ada 6460 operand_entry *oe;
54aceb26 6461 unsigned int i;
6462
f1f41a6c 6463 FOR_EACH_VEC_ELT (ops, i, oe)
54aceb26 6464 {
6465 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
1ffa4346 6466 print_generic_expr (file, oe->op);
e3cd52d9 6467 fprintf (file, "\n");
54aceb26 6468 }
6469}
6470
6471/* Dump the operand entry vector OPS to STDERR. */
6472
4b987fac 6473DEBUG_FUNCTION void
04009ada 6474debug_ops_vector (vec<operand_entry *> ops)
54aceb26 6475{
6476 dump_ops_vector (stderr, ops);
6477}
6478
55a2d273 6479/* Bubble up return status from reassociate_bb. */
6480
6481static bool
54aceb26 6482do_reassoc (void)
6483{
34154e27 6484 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
55a2d273 6485 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
54aceb26 6486}
6487
6488/* Initialize the reassociation pass. */
6489
6490static void
6491init_reassoc (void)
6492{
6493 int i;
b30a8715 6494 long rank = 2;
a28770e1 6495 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
54aceb26 6496
a4c3fb95 6497 /* Find the loops, so that we can prevent moving calculations in
6498 them. */
6499 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6500
54aceb26 6501 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6502
17b5ea6f 6503 next_operand_entry_id = 0;
54aceb26 6504
6505 /* Reverse RPO (Reverse Post Order) will give us something where
6506 deeper loops come later. */
6180f28d 6507 pre_and_rev_post_order_compute (NULL, bbs, false);
fe672ac0 6508 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
06ecf488 6509 operand_rank = new hash_map<tree, long>;
54aceb26 6510
61e371b0 6511 /* Give each default definition a distinct rank. This includes
6512 parameters and the static chain. Walk backwards over all
6513 SSA names so that we get proper rank ordering according
6514 to tree_swap_operands_p. */
6515 for (i = num_ssa_names - 1; i > 0; --i)
54aceb26 6516 {
61e371b0 6517 tree name = ssa_name (i);
6518 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6519 insert_operand_rank (name, ++rank);
54aceb26 6520 }
6521
6522 /* Set up rank for each BB */
a28770e1 6523 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
bf27ea99 6524 bb_rank[bbs[i]] = ++rank << 16;
54aceb26 6525
6526 free (bbs);
54aceb26 6527 calculate_dominance_info (CDI_POST_DOMINATORS);
1e094109 6528 plus_negates = vNULL;
54aceb26 6529}
6530
6531/* Cleanup after the reassociation pass, and print stats if
6532 requested. */
6533
6534static void
6535fini_reassoc (void)
6536{
581f8050 6537 statistics_counter_event (cfun, "Linearized",
6538 reassociate_stats.linearized);
6539 statistics_counter_event (cfun, "Constants eliminated",
6540 reassociate_stats.constants_eliminated);
6541 statistics_counter_event (cfun, "Ops eliminated",
6542 reassociate_stats.ops_eliminated);
6543 statistics_counter_event (cfun, "Statements rewritten",
6544 reassociate_stats.rewritten);
8c5ac7f6 6545 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6546 reassociate_stats.pows_encountered);
6547 statistics_counter_event (cfun, "Built-in powi calls created",
6548 reassociate_stats.pows_created);
54aceb26 6549
06ecf488 6550 delete operand_rank;
672758dc 6551 operand_entry_pool.release ();
54aceb26 6552 free (bb_rank);
f1f41a6c 6553 plus_negates.release ();
54aceb26 6554 free_dominance_info (CDI_POST_DOMINATORS);
a4c3fb95 6555 loop_optimizer_finalize ();
54aceb26 6556}
6557
83505659 6558/* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
55a2d273 6559 insertion of __builtin_powi calls.
6560
6561 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6562 optimization of a gimple conditional. Otherwise returns zero. */
54aceb26 6563
2a1990e9 6564static unsigned int
83505659 6565execute_reassoc (bool insert_powi_p)
54aceb26 6566{
83505659 6567 reassoc_insert_powi_p = insert_powi_p;
6568
3dec5460 6569 init_reassoc ();
54aceb26 6570
55a2d273 6571 bool cfg_cleanup_needed;
6572 cfg_cleanup_needed = do_reassoc ();
54aceb26 6573 repropagate_negates ();
e3668db5 6574 branch_fixup ();
54aceb26 6575
3dec5460 6576 fini_reassoc ();
55a2d273 6577 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
3dec5460 6578}
6579
cbe8bda8 6580namespace {
6581
6582const pass_data pass_data_reassoc =
3dec5460 6583{
cbe8bda8 6584 GIMPLE_PASS, /* type */
6585 "reassoc", /* name */
6586 OPTGROUP_NONE, /* optinfo_flags */
cbe8bda8 6587 TV_TREE_REASSOC, /* tv_id */
6588 ( PROP_cfg | PROP_ssa ), /* properties_required */
6589 0, /* properties_provided */
6590 0, /* properties_destroyed */
6591 0, /* todo_flags_start */
8b88439e 6592 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
3dec5460 6593};
cbe8bda8 6594
6595class pass_reassoc : public gimple_opt_pass
6596{
6597public:
9af5ce0c 6598 pass_reassoc (gcc::context *ctxt)
83505659 6599 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
cbe8bda8 6600 {}
6601
6602 /* opt_pass methods: */
ae84f584 6603 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
83505659 6604 void set_pass_param (unsigned int n, bool param)
6605 {
6606 gcc_assert (n == 0);
6607 insert_powi_p = param;
6608 }
31315c24 6609 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
83505659 6610 virtual unsigned int execute (function *)
6611 { return execute_reassoc (insert_powi_p); }
cbe8bda8 6612
83505659 6613 private:
6614 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6615 point 3a in the pass header comment. */
6616 bool insert_powi_p;
cbe8bda8 6617}; // class pass_reassoc
6618
6619} // anon namespace
6620
6621gimple_opt_pass *
6622make_pass_reassoc (gcc::context *ctxt)
6623{
6624 return new pass_reassoc (ctxt);
6625}