]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-reassoc.c
Update copyright years.
[thirdparty/gcc.git] / gcc / tree-ssa-reassoc.c
1 /* Reassociation for trees.
2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55
56 /* This is a simple global reassociation pass. It is, in part, based
57 on the LLVM pass of the same name (They do some things more/less
58 than we do, in different orders, etc).
59
60 It consists of five steps:
61
62 1. Breaking up subtract operations into addition + negate, where
63 it would promote the reassociation of adds.
64
65 2. Left linearization of the expression trees, so that (A+B)+(C+D)
66 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67 During linearization, we place the operands of the binary
68 expressions into a vector of operand_entry_*
69
70 3. Optimization of the operand lists, eliminating things like a +
71 -a, a & a, etc.
72
73 3a. Combine repeated factors with the same occurrence counts
74 into a __builtin_powi call that will later be optimized into
75 an optimal number of multiplies.
76
77 4. Rewrite the expression trees we linearized and optimized so
78 they are in proper rank order.
79
80 5. Repropagate negates, as nothing else will clean it up ATM.
81
82 A bit of theory on #4, since nobody seems to write anything down
83 about why it makes sense to do it the way they do it:
84
85 We could do this much nicer theoretically, but don't (for reasons
86 explained after how to do it theoretically nice :P).
87
88 In order to promote the most redundancy elimination, you want
89 binary expressions whose operands are the same rank (or
90 preferably, the same value) exposed to the redundancy eliminator,
91 for possible elimination.
92
93 So the way to do this if we really cared, is to build the new op
94 tree from the leaves to the roots, merging as you go, and putting the
95 new op on the end of the worklist, until you are left with one
96 thing on the worklist.
97
98 IE if you have to rewrite the following set of operands (listed with
99 rank in parentheses), with opcode PLUS_EXPR:
100
101 a (1), b (1), c (1), d (2), e (2)
102
103
104 We start with our merge worklist empty, and the ops list with all of
105 those on it.
106
107 You want to first merge all leaves of the same rank, as much as
108 possible.
109
110 So first build a binary op of
111
112 mergetmp = a + b, and put "mergetmp" on the merge worklist.
113
114 Because there is no three operand form of PLUS_EXPR, c is not going to
115 be exposed to redundancy elimination as a rank 1 operand.
116
117 So you might as well throw it on the merge worklist (you could also
118 consider it to now be a rank two operand, and merge it with d and e,
119 but in this case, you then have evicted e from a binary op. So at
120 least in this situation, you can't win.)
121
122 Then build a binary op of d + e
123 mergetmp2 = d + e
124
125 and put mergetmp2 on the merge worklist.
126
127 so merge worklist = {mergetmp, c, mergetmp2}
128
129 Continue building binary ops of these operations until you have only
130 one operation left on the worklist.
131
132 So we have
133
134 build binary op
135 mergetmp3 = mergetmp + c
136
137 worklist = {mergetmp2, mergetmp3}
138
139 mergetmp4 = mergetmp2 + mergetmp3
140
141 worklist = {mergetmp4}
142
143 because we have one operation left, we can now just set the original
144 statement equal to the result of that operation.
145
146 This will at least expose a + b and d + e to redundancy elimination
147 as binary operations.
148
149 For extra points, you can reuse the old statements to build the
150 mergetmps, since you shouldn't run out.
151
152 So why don't we do this?
153
154 Because it's expensive, and rarely will help. Most trees we are
155 reassociating have 3 or less ops. If they have 2 ops, they already
156 will be written into a nice single binary op. If you have 3 ops, a
157 single simple check suffices to tell you whether the first two are of the
158 same rank. If so, you know to order it
159
160 mergetmp = op1 + op2
161 newstmt = mergetmp + op3
162
163 instead of
164 mergetmp = op2 + op3
165 newstmt = mergetmp + op1
166
167 If all three are of the same rank, you can't expose them all in a
168 single binary operator anyway, so the above is *still* the best you
169 can do.
170
171 Thus, this is what we do. When we have three ops left, we check to see
172 what order to put them in, and call it a day. As a nod to vector sum
173 reduction, we check if any of the ops are really a phi node that is a
174 destructive update for the associating op, and keep the destructive
175 update together for vector sum reduction recognition. */
176
177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
178 point 3a in the pass header comment. */
179 static bool reassoc_insert_powi_p;
180
181 /* Statistics */
182 static struct
183 {
184 int linearized;
185 int constants_eliminated;
186 int ops_eliminated;
187 int rewritten;
188 int pows_encountered;
189 int pows_created;
190 } reassociate_stats;
191
192
193 static object_allocator<operand_entry> operand_entry_pool
194 ("operand entry pool");
195
196 /* This is used to assign a unique ID to each struct operand_entry
197 so that qsort results are identical on different hosts. */
198 static unsigned int next_operand_entry_id;
199
200 /* Starting rank number for a given basic block, so that we can rank
201 operations using unmovable instructions in that BB based on the bb
202 depth. */
203 static long *bb_rank;
204
205 /* Operand->rank hashtable. */
206 static hash_map<tree, long> *operand_rank;
207
208 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
209 all basic blocks the CFG should be adjusted - basic blocks
210 split right after that SSA_NAME's definition statement and before
211 the only use, which must be a bit ior. */
212 static vec<tree> reassoc_branch_fixups;
213
214 /* Forward decls. */
215 static long get_rank (tree);
216 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
217
218 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
219 possibly added by gsi_remove. */
220
221 static bool
222 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
223 {
224 gimple *stmt = gsi_stmt (*gsi);
225
226 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
227 return gsi_remove (gsi, true);
228
229 gimple_stmt_iterator prev = *gsi;
230 gsi_prev (&prev);
231 unsigned uid = gimple_uid (stmt);
232 basic_block bb = gimple_bb (stmt);
233 bool ret = gsi_remove (gsi, true);
234 if (!gsi_end_p (prev))
235 gsi_next (&prev);
236 else
237 prev = gsi_start_bb (bb);
238 gimple *end_stmt = gsi_stmt (*gsi);
239 while ((stmt = gsi_stmt (prev)) != end_stmt)
240 {
241 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
242 gimple_set_uid (stmt, uid);
243 gsi_next (&prev);
244 }
245 return ret;
246 }
247
248 /* Bias amount for loop-carried phis. We want this to be larger than
249 the depth of any reassociation tree we can see, but not larger than
250 the rank difference between two blocks. */
251 #define PHI_LOOP_BIAS (1 << 15)
252
253 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
254 an innermost loop, and the phi has only a single use which is inside
255 the loop, then the rank is the block rank of the loop latch plus an
256 extra bias for the loop-carried dependence. This causes expressions
257 calculated into an accumulator variable to be independent for each
258 iteration of the loop. If STMT is some other phi, the rank is the
259 block rank of its containing block. */
260 static long
261 phi_rank (gimple *stmt)
262 {
263 basic_block bb = gimple_bb (stmt);
264 class loop *father = bb->loop_father;
265 tree res;
266 unsigned i;
267 use_operand_p use;
268 gimple *use_stmt;
269
270 /* We only care about real loops (those with a latch). */
271 if (!father->latch)
272 return bb_rank[bb->index];
273
274 /* Interesting phis must be in headers of innermost loops. */
275 if (bb != father->header
276 || father->inner)
277 return bb_rank[bb->index];
278
279 /* Ignore virtual SSA_NAMEs. */
280 res = gimple_phi_result (stmt);
281 if (virtual_operand_p (res))
282 return bb_rank[bb->index];
283
284 /* The phi definition must have a single use, and that use must be
285 within the loop. Otherwise this isn't an accumulator pattern. */
286 if (!single_imm_use (res, &use, &use_stmt)
287 || gimple_bb (use_stmt)->loop_father != father)
288 return bb_rank[bb->index];
289
290 /* Look for phi arguments from within the loop. If found, bias this phi. */
291 for (i = 0; i < gimple_phi_num_args (stmt); i++)
292 {
293 tree arg = gimple_phi_arg_def (stmt, i);
294 if (TREE_CODE (arg) == SSA_NAME
295 && !SSA_NAME_IS_DEFAULT_DEF (arg))
296 {
297 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
298 if (gimple_bb (def_stmt)->loop_father == father)
299 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
300 }
301 }
302
303 /* Must be an uninteresting phi. */
304 return bb_rank[bb->index];
305 }
306
307 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
308 loop-carried dependence of an innermost loop, return TRUE; else
309 return FALSE. */
310 static bool
311 loop_carried_phi (tree exp)
312 {
313 gimple *phi_stmt;
314 long block_rank;
315
316 if (TREE_CODE (exp) != SSA_NAME
317 || SSA_NAME_IS_DEFAULT_DEF (exp))
318 return false;
319
320 phi_stmt = SSA_NAME_DEF_STMT (exp);
321
322 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
323 return false;
324
325 /* Non-loop-carried phis have block rank. Loop-carried phis have
326 an additional bias added in. If this phi doesn't have block rank,
327 it's biased and should not be propagated. */
328 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
329
330 if (phi_rank (phi_stmt) != block_rank)
331 return true;
332
333 return false;
334 }
335
336 /* Return the maximum of RANK and the rank that should be propagated
337 from expression OP. For most operands, this is just the rank of OP.
338 For loop-carried phis, the value is zero to avoid undoing the bias
339 in favor of the phi. */
340 static long
341 propagate_rank (long rank, tree op)
342 {
343 long op_rank;
344
345 if (loop_carried_phi (op))
346 return rank;
347
348 op_rank = get_rank (op);
349
350 return MAX (rank, op_rank);
351 }
352
353 /* Look up the operand rank structure for expression E. */
354
355 static inline long
356 find_operand_rank (tree e)
357 {
358 long *slot = operand_rank->get (e);
359 return slot ? *slot : -1;
360 }
361
362 /* Insert {E,RANK} into the operand rank hashtable. */
363
364 static inline void
365 insert_operand_rank (tree e, long rank)
366 {
367 gcc_assert (rank > 0);
368 gcc_assert (!operand_rank->put (e, rank));
369 }
370
371 /* Given an expression E, return the rank of the expression. */
372
373 static long
374 get_rank (tree e)
375 {
376 /* SSA_NAME's have the rank of the expression they are the result
377 of.
378 For globals and uninitialized values, the rank is 0.
379 For function arguments, use the pre-setup rank.
380 For PHI nodes, stores, asm statements, etc, we use the rank of
381 the BB.
382 For simple operations, the rank is the maximum rank of any of
383 its operands, or the bb_rank, whichever is less.
384 I make no claims that this is optimal, however, it gives good
385 results. */
386
387 /* We make an exception to the normal ranking system to break
388 dependences of accumulator variables in loops. Suppose we
389 have a simple one-block loop containing:
390
391 x_1 = phi(x_0, x_2)
392 b = a + x_1
393 c = b + d
394 x_2 = c + e
395
396 As shown, each iteration of the calculation into x is fully
397 dependent upon the iteration before it. We would prefer to
398 see this in the form:
399
400 x_1 = phi(x_0, x_2)
401 b = a + d
402 c = b + e
403 x_2 = c + x_1
404
405 If the loop is unrolled, the calculations of b and c from
406 different iterations can be interleaved.
407
408 To obtain this result during reassociation, we bias the rank
409 of the phi definition x_1 upward, when it is recognized as an
410 accumulator pattern. The artificial rank causes it to be
411 added last, providing the desired independence. */
412
413 if (TREE_CODE (e) == SSA_NAME)
414 {
415 ssa_op_iter iter;
416 gimple *stmt;
417 long rank;
418 tree op;
419
420 /* If we already have a rank for this expression, use that. */
421 rank = find_operand_rank (e);
422 if (rank != -1)
423 return rank;
424
425 stmt = SSA_NAME_DEF_STMT (e);
426 if (gimple_code (stmt) == GIMPLE_PHI)
427 rank = phi_rank (stmt);
428
429 else if (!is_gimple_assign (stmt))
430 rank = bb_rank[gimple_bb (stmt)->index];
431
432 else
433 {
434 /* Otherwise, find the maximum rank for the operands. As an
435 exception, remove the bias from loop-carried phis when propagating
436 the rank so that dependent operations are not also biased. */
437 /* Simply walk over all SSA uses - this takes advatage of the
438 fact that non-SSA operands are is_gimple_min_invariant and
439 thus have rank 0. */
440 rank = 0;
441 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
442 rank = propagate_rank (rank, op);
443
444 rank += 1;
445 }
446
447 if (dump_file && (dump_flags & TDF_DETAILS))
448 {
449 fprintf (dump_file, "Rank for ");
450 print_generic_expr (dump_file, e);
451 fprintf (dump_file, " is %ld\n", rank);
452 }
453
454 /* Note the rank in the hashtable so we don't recompute it. */
455 insert_operand_rank (e, rank);
456 return rank;
457 }
458
459 /* Constants, globals, etc., are rank 0 */
460 return 0;
461 }
462
463
464 /* We want integer ones to end up last no matter what, since they are
465 the ones we can do the most with. */
466 #define INTEGER_CONST_TYPE 1 << 4
467 #define FLOAT_ONE_CONST_TYPE 1 << 3
468 #define FLOAT_CONST_TYPE 1 << 2
469 #define OTHER_CONST_TYPE 1 << 1
470
471 /* Classify an invariant tree into integer, float, or other, so that
472 we can sort them to be near other constants of the same type. */
473 static inline int
474 constant_type (tree t)
475 {
476 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
477 return INTEGER_CONST_TYPE;
478 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
479 {
480 /* Sort -1.0 and 1.0 constants last, while in some cases
481 const_binop can't optimize some inexact operations, multiplication
482 by -1.0 or 1.0 can be always merged with others. */
483 if (real_onep (t) || real_minus_onep (t))
484 return FLOAT_ONE_CONST_TYPE;
485 return FLOAT_CONST_TYPE;
486 }
487 else
488 return OTHER_CONST_TYPE;
489 }
490
491 /* qsort comparison function to sort operand entries PA and PB by rank
492 so that the sorted array is ordered by rank in decreasing order. */
493 static int
494 sort_by_operand_rank (const void *pa, const void *pb)
495 {
496 const operand_entry *oea = *(const operand_entry *const *)pa;
497 const operand_entry *oeb = *(const operand_entry *const *)pb;
498
499 if (oeb->rank != oea->rank)
500 return oeb->rank > oea->rank ? 1 : -1;
501
502 /* It's nicer for optimize_expression if constants that are likely
503 to fold when added/multiplied/whatever are put next to each
504 other. Since all constants have rank 0, order them by type. */
505 if (oea->rank == 0)
506 {
507 if (constant_type (oeb->op) != constant_type (oea->op))
508 return constant_type (oea->op) - constant_type (oeb->op);
509 else
510 /* To make sorting result stable, we use unique IDs to determine
511 order. */
512 return oeb->id > oea->id ? 1 : -1;
513 }
514
515 if (TREE_CODE (oea->op) != SSA_NAME)
516 {
517 if (TREE_CODE (oeb->op) != SSA_NAME)
518 return oeb->id > oea->id ? 1 : -1;
519 else
520 return 1;
521 }
522 else if (TREE_CODE (oeb->op) != SSA_NAME)
523 return -1;
524
525 /* Lastly, make sure the versions that are the same go next to each
526 other. */
527 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
528 {
529 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
530 versions of removed SSA_NAMEs, so if possible, prefer to sort
531 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
532 See PR60418. */
533 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
534 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
535 basic_block bba = gimple_bb (stmta);
536 basic_block bbb = gimple_bb (stmtb);
537 if (bbb != bba)
538 {
539 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
540 but the other might not. */
541 if (!bba)
542 return 1;
543 if (!bbb)
544 return -1;
545 /* If neither is, compare bb_rank. */
546 if (bb_rank[bbb->index] != bb_rank[bba->index])
547 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
548 }
549
550 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
551 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
552 if (da != db)
553 return da ? 1 : -1;
554
555 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
556 }
557
558 return oeb->id > oea->id ? 1 : -1;
559 }
560
561 /* Add an operand entry to *OPS for the tree operand OP. */
562
563 static void
564 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
565 {
566 operand_entry *oe = operand_entry_pool.allocate ();
567
568 oe->op = op;
569 oe->rank = get_rank (op);
570 oe->id = next_operand_entry_id++;
571 oe->count = 1;
572 oe->stmt_to_insert = stmt_to_insert;
573 ops->safe_push (oe);
574 }
575
576 /* Add an operand entry to *OPS for the tree operand OP with repeat
577 count REPEAT. */
578
579 static void
580 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
581 HOST_WIDE_INT repeat)
582 {
583 operand_entry *oe = operand_entry_pool.allocate ();
584
585 oe->op = op;
586 oe->rank = get_rank (op);
587 oe->id = next_operand_entry_id++;
588 oe->count = repeat;
589 oe->stmt_to_insert = NULL;
590 ops->safe_push (oe);
591
592 reassociate_stats.pows_encountered++;
593 }
594
595 /* Return true if STMT is reassociable operation containing a binary
596 operation with tree code CODE, and is inside LOOP. */
597
598 static bool
599 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
600 {
601 basic_block bb = gimple_bb (stmt);
602
603 if (gimple_bb (stmt) == NULL)
604 return false;
605
606 if (!flow_bb_inside_loop_p (loop, bb))
607 return false;
608
609 if (is_gimple_assign (stmt)
610 && gimple_assign_rhs_code (stmt) == code
611 && has_single_use (gimple_assign_lhs (stmt)))
612 {
613 tree rhs1 = gimple_assign_rhs1 (stmt);
614 tree rhs2 = gimple_assign_rhs2 (stmt);
615 if (TREE_CODE (rhs1) == SSA_NAME
616 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
617 return false;
618 if (rhs2
619 && TREE_CODE (rhs2) == SSA_NAME
620 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
621 return false;
622 return true;
623 }
624
625 return false;
626 }
627
628
629 /* Return true if STMT is a nop-conversion. */
630
631 static bool
632 gimple_nop_conversion_p (gimple *stmt)
633 {
634 if (gassign *ass = dyn_cast <gassign *> (stmt))
635 {
636 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
637 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
638 TREE_TYPE (gimple_assign_rhs1 (ass))))
639 return true;
640 }
641 return false;
642 }
643
644 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
645 operand of the negate operation. Otherwise, return NULL. */
646
647 static tree
648 get_unary_op (tree name, enum tree_code opcode)
649 {
650 gimple *stmt = SSA_NAME_DEF_STMT (name);
651
652 /* Look through nop conversions (sign changes). */
653 if (gimple_nop_conversion_p (stmt)
654 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
655 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
656
657 if (!is_gimple_assign (stmt))
658 return NULL_TREE;
659
660 if (gimple_assign_rhs_code (stmt) == opcode)
661 return gimple_assign_rhs1 (stmt);
662 return NULL_TREE;
663 }
664
665 /* Return true if OP1 and OP2 have the same value if casted to either type. */
666
667 static bool
668 ops_equal_values_p (tree op1, tree op2)
669 {
670 if (op1 == op2)
671 return true;
672
673 tree orig_op1 = op1;
674 if (TREE_CODE (op1) == SSA_NAME)
675 {
676 gimple *stmt = SSA_NAME_DEF_STMT (op1);
677 if (gimple_nop_conversion_p (stmt))
678 {
679 op1 = gimple_assign_rhs1 (stmt);
680 if (op1 == op2)
681 return true;
682 }
683 }
684
685 if (TREE_CODE (op2) == SSA_NAME)
686 {
687 gimple *stmt = SSA_NAME_DEF_STMT (op2);
688 if (gimple_nop_conversion_p (stmt))
689 {
690 op2 = gimple_assign_rhs1 (stmt);
691 if (op1 == op2
692 || orig_op1 == op2)
693 return true;
694 }
695 }
696
697 return false;
698 }
699
700
701 /* If CURR and LAST are a pair of ops that OPCODE allows us to
702 eliminate through equivalences, do so, remove them from OPS, and
703 return true. Otherwise, return false. */
704
705 static bool
706 eliminate_duplicate_pair (enum tree_code opcode,
707 vec<operand_entry *> *ops,
708 bool *all_done,
709 unsigned int i,
710 operand_entry *curr,
711 operand_entry *last)
712 {
713
714 /* If we have two of the same op, and the opcode is & |, min, or max,
715 we can eliminate one of them.
716 If we have two of the same op, and the opcode is ^, we can
717 eliminate both of them. */
718
719 if (last && last->op == curr->op)
720 {
721 switch (opcode)
722 {
723 case MAX_EXPR:
724 case MIN_EXPR:
725 case BIT_IOR_EXPR:
726 case BIT_AND_EXPR:
727 if (dump_file && (dump_flags & TDF_DETAILS))
728 {
729 fprintf (dump_file, "Equivalence: ");
730 print_generic_expr (dump_file, curr->op);
731 fprintf (dump_file, " [&|minmax] ");
732 print_generic_expr (dump_file, last->op);
733 fprintf (dump_file, " -> ");
734 print_generic_stmt (dump_file, last->op);
735 }
736
737 ops->ordered_remove (i);
738 reassociate_stats.ops_eliminated ++;
739
740 return true;
741
742 case BIT_XOR_EXPR:
743 if (dump_file && (dump_flags & TDF_DETAILS))
744 {
745 fprintf (dump_file, "Equivalence: ");
746 print_generic_expr (dump_file, curr->op);
747 fprintf (dump_file, " ^ ");
748 print_generic_expr (dump_file, last->op);
749 fprintf (dump_file, " -> nothing\n");
750 }
751
752 reassociate_stats.ops_eliminated += 2;
753
754 if (ops->length () == 2)
755 {
756 ops->truncate (0);
757 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
758 *all_done = true;
759 }
760 else
761 {
762 ops->ordered_remove (i-1);
763 ops->ordered_remove (i-1);
764 }
765
766 return true;
767
768 default:
769 break;
770 }
771 }
772 return false;
773 }
774
775 static vec<tree> plus_negates;
776
777 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
778 expression, look in OPS for a corresponding positive operation to cancel
779 it out. If we find one, remove the other from OPS, replace
780 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
781 return false. */
782
783 static bool
784 eliminate_plus_minus_pair (enum tree_code opcode,
785 vec<operand_entry *> *ops,
786 unsigned int currindex,
787 operand_entry *curr)
788 {
789 tree negateop;
790 tree notop;
791 unsigned int i;
792 operand_entry *oe;
793
794 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
795 return false;
796
797 negateop = get_unary_op (curr->op, NEGATE_EXPR);
798 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
799 if (negateop == NULL_TREE && notop == NULL_TREE)
800 return false;
801
802 /* Any non-negated version will have a rank that is one less than
803 the current rank. So once we hit those ranks, if we don't find
804 one, we can stop. */
805
806 for (i = currindex + 1;
807 ops->iterate (i, &oe)
808 && oe->rank >= curr->rank - 1 ;
809 i++)
810 {
811 if (negateop
812 && ops_equal_values_p (oe->op, negateop))
813 {
814 if (dump_file && (dump_flags & TDF_DETAILS))
815 {
816 fprintf (dump_file, "Equivalence: ");
817 print_generic_expr (dump_file, negateop);
818 fprintf (dump_file, " + -");
819 print_generic_expr (dump_file, oe->op);
820 fprintf (dump_file, " -> 0\n");
821 }
822
823 ops->ordered_remove (i);
824 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
825 ops->ordered_remove (currindex);
826 reassociate_stats.ops_eliminated ++;
827
828 return true;
829 }
830 else if (notop
831 && ops_equal_values_p (oe->op, notop))
832 {
833 tree op_type = TREE_TYPE (oe->op);
834
835 if (dump_file && (dump_flags & TDF_DETAILS))
836 {
837 fprintf (dump_file, "Equivalence: ");
838 print_generic_expr (dump_file, notop);
839 fprintf (dump_file, " + ~");
840 print_generic_expr (dump_file, oe->op);
841 fprintf (dump_file, " -> -1\n");
842 }
843
844 ops->ordered_remove (i);
845 add_to_ops_vec (ops, build_all_ones_cst (op_type));
846 ops->ordered_remove (currindex);
847 reassociate_stats.ops_eliminated ++;
848
849 return true;
850 }
851 }
852
853 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
854 save it for later inspection in repropagate_negates(). */
855 if (negateop != NULL_TREE
856 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
857 plus_negates.safe_push (curr->op);
858
859 return false;
860 }
861
862 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
863 bitwise not expression, look in OPS for a corresponding operand to
864 cancel it out. If we find one, remove the other from OPS, replace
865 OPS[CURRINDEX] with 0, and return true. Otherwise, return
866 false. */
867
868 static bool
869 eliminate_not_pairs (enum tree_code opcode,
870 vec<operand_entry *> *ops,
871 unsigned int currindex,
872 operand_entry *curr)
873 {
874 tree notop;
875 unsigned int i;
876 operand_entry *oe;
877
878 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
879 || TREE_CODE (curr->op) != SSA_NAME)
880 return false;
881
882 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
883 if (notop == NULL_TREE)
884 return false;
885
886 /* Any non-not version will have a rank that is one less than
887 the current rank. So once we hit those ranks, if we don't find
888 one, we can stop. */
889
890 for (i = currindex + 1;
891 ops->iterate (i, &oe)
892 && oe->rank >= curr->rank - 1;
893 i++)
894 {
895 if (oe->op == notop)
896 {
897 if (dump_file && (dump_flags & TDF_DETAILS))
898 {
899 fprintf (dump_file, "Equivalence: ");
900 print_generic_expr (dump_file, notop);
901 if (opcode == BIT_AND_EXPR)
902 fprintf (dump_file, " & ~");
903 else if (opcode == BIT_IOR_EXPR)
904 fprintf (dump_file, " | ~");
905 print_generic_expr (dump_file, oe->op);
906 if (opcode == BIT_AND_EXPR)
907 fprintf (dump_file, " -> 0\n");
908 else if (opcode == BIT_IOR_EXPR)
909 fprintf (dump_file, " -> -1\n");
910 }
911
912 if (opcode == BIT_AND_EXPR)
913 oe->op = build_zero_cst (TREE_TYPE (oe->op));
914 else if (opcode == BIT_IOR_EXPR)
915 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
916
917 reassociate_stats.ops_eliminated += ops->length () - 1;
918 ops->truncate (0);
919 ops->quick_push (oe);
920 return true;
921 }
922 }
923
924 return false;
925 }
926
927 /* Use constant value that may be present in OPS to try to eliminate
928 operands. Note that this function is only really used when we've
929 eliminated ops for other reasons, or merged constants. Across
930 single statements, fold already does all of this, plus more. There
931 is little point in duplicating logic, so I've only included the
932 identities that I could ever construct testcases to trigger. */
933
934 static void
935 eliminate_using_constants (enum tree_code opcode,
936 vec<operand_entry *> *ops)
937 {
938 operand_entry *oelast = ops->last ();
939 tree type = TREE_TYPE (oelast->op);
940
941 if (oelast->rank == 0
942 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
943 {
944 switch (opcode)
945 {
946 case BIT_AND_EXPR:
947 if (integer_zerop (oelast->op))
948 {
949 if (ops->length () != 1)
950 {
951 if (dump_file && (dump_flags & TDF_DETAILS))
952 fprintf (dump_file, "Found & 0, removing all other ops\n");
953
954 reassociate_stats.ops_eliminated += ops->length () - 1;
955
956 ops->truncate (0);
957 ops->quick_push (oelast);
958 return;
959 }
960 }
961 else if (integer_all_onesp (oelast->op))
962 {
963 if (ops->length () != 1)
964 {
965 if (dump_file && (dump_flags & TDF_DETAILS))
966 fprintf (dump_file, "Found & -1, removing\n");
967 ops->pop ();
968 reassociate_stats.ops_eliminated++;
969 }
970 }
971 break;
972 case BIT_IOR_EXPR:
973 if (integer_all_onesp (oelast->op))
974 {
975 if (ops->length () != 1)
976 {
977 if (dump_file && (dump_flags & TDF_DETAILS))
978 fprintf (dump_file, "Found | -1, removing all other ops\n");
979
980 reassociate_stats.ops_eliminated += ops->length () - 1;
981
982 ops->truncate (0);
983 ops->quick_push (oelast);
984 return;
985 }
986 }
987 else if (integer_zerop (oelast->op))
988 {
989 if (ops->length () != 1)
990 {
991 if (dump_file && (dump_flags & TDF_DETAILS))
992 fprintf (dump_file, "Found | 0, removing\n");
993 ops->pop ();
994 reassociate_stats.ops_eliminated++;
995 }
996 }
997 break;
998 case MULT_EXPR:
999 if (integer_zerop (oelast->op)
1000 || (FLOAT_TYPE_P (type)
1001 && !HONOR_NANS (type)
1002 && !HONOR_SIGNED_ZEROS (type)
1003 && real_zerop (oelast->op)))
1004 {
1005 if (ops->length () != 1)
1006 {
1007 if (dump_file && (dump_flags & TDF_DETAILS))
1008 fprintf (dump_file, "Found * 0, removing all other ops\n");
1009
1010 reassociate_stats.ops_eliminated += ops->length () - 1;
1011 ops->truncate (0);
1012 ops->quick_push (oelast);
1013 return;
1014 }
1015 }
1016 else if (integer_onep (oelast->op)
1017 || (FLOAT_TYPE_P (type)
1018 && !HONOR_SNANS (type)
1019 && real_onep (oelast->op)))
1020 {
1021 if (ops->length () != 1)
1022 {
1023 if (dump_file && (dump_flags & TDF_DETAILS))
1024 fprintf (dump_file, "Found * 1, removing\n");
1025 ops->pop ();
1026 reassociate_stats.ops_eliminated++;
1027 return;
1028 }
1029 }
1030 break;
1031 case BIT_XOR_EXPR:
1032 case PLUS_EXPR:
1033 case MINUS_EXPR:
1034 if (integer_zerop (oelast->op)
1035 || (FLOAT_TYPE_P (type)
1036 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1037 && fold_real_zero_addition_p (type, oelast->op,
1038 opcode == MINUS_EXPR)))
1039 {
1040 if (ops->length () != 1)
1041 {
1042 if (dump_file && (dump_flags & TDF_DETAILS))
1043 fprintf (dump_file, "Found [|^+] 0, removing\n");
1044 ops->pop ();
1045 reassociate_stats.ops_eliminated++;
1046 return;
1047 }
1048 }
1049 break;
1050 default:
1051 break;
1052 }
1053 }
1054 }
1055
1056
1057 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1058 bool, bool);
1059
1060 /* Structure for tracking and counting operands. */
1061 struct oecount {
1062 unsigned int cnt;
1063 unsigned int id;
1064 enum tree_code oecode;
1065 tree op;
1066 };
1067
1068
1069 /* The heap for the oecount hashtable and the sorted list of operands. */
1070 static vec<oecount> cvec;
1071
1072
1073 /* Oecount hashtable helpers. */
1074
1075 struct oecount_hasher : int_hash <int, 0, 1>
1076 {
1077 static inline hashval_t hash (int);
1078 static inline bool equal (int, int);
1079 };
1080
1081 /* Hash function for oecount. */
1082
1083 inline hashval_t
1084 oecount_hasher::hash (int p)
1085 {
1086 const oecount *c = &cvec[p - 42];
1087 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1088 }
1089
1090 /* Comparison function for oecount. */
1091
1092 inline bool
1093 oecount_hasher::equal (int p1, int p2)
1094 {
1095 const oecount *c1 = &cvec[p1 - 42];
1096 const oecount *c2 = &cvec[p2 - 42];
1097 return c1->oecode == c2->oecode && c1->op == c2->op;
1098 }
1099
1100 /* Comparison function for qsort sorting oecount elements by count. */
1101
1102 static int
1103 oecount_cmp (const void *p1, const void *p2)
1104 {
1105 const oecount *c1 = (const oecount *)p1;
1106 const oecount *c2 = (const oecount *)p2;
1107 if (c1->cnt != c2->cnt)
1108 return c1->cnt > c2->cnt ? 1 : -1;
1109 else
1110 /* If counts are identical, use unique IDs to stabilize qsort. */
1111 return c1->id > c2->id ? 1 : -1;
1112 }
1113
1114 /* Return TRUE iff STMT represents a builtin call that raises OP
1115 to some exponent. */
1116
1117 static bool
1118 stmt_is_power_of_op (gimple *stmt, tree op)
1119 {
1120 if (!is_gimple_call (stmt))
1121 return false;
1122
1123 switch (gimple_call_combined_fn (stmt))
1124 {
1125 CASE_CFN_POW:
1126 CASE_CFN_POWI:
1127 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1128
1129 default:
1130 return false;
1131 }
1132 }
1133
1134 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1135 in place and return the result. Assumes that stmt_is_power_of_op
1136 was previously called for STMT and returned TRUE. */
1137
1138 static HOST_WIDE_INT
1139 decrement_power (gimple *stmt)
1140 {
1141 REAL_VALUE_TYPE c, cint;
1142 HOST_WIDE_INT power;
1143 tree arg1;
1144
1145 switch (gimple_call_combined_fn (stmt))
1146 {
1147 CASE_CFN_POW:
1148 arg1 = gimple_call_arg (stmt, 1);
1149 c = TREE_REAL_CST (arg1);
1150 power = real_to_integer (&c) - 1;
1151 real_from_integer (&cint, VOIDmode, power, SIGNED);
1152 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1153 return power;
1154
1155 CASE_CFN_POWI:
1156 arg1 = gimple_call_arg (stmt, 1);
1157 power = TREE_INT_CST_LOW (arg1) - 1;
1158 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1159 return power;
1160
1161 default:
1162 gcc_unreachable ();
1163 }
1164 }
1165
1166 /* Replace SSA defined by STMT and replace all its uses with new
1167 SSA. Also return the new SSA. */
1168
1169 static tree
1170 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1171 {
1172 gimple *use_stmt;
1173 use_operand_p use;
1174 imm_use_iterator iter;
1175 tree new_lhs, new_debug_lhs = NULL_TREE;
1176 tree lhs = gimple_get_lhs (stmt);
1177
1178 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1179 gimple_set_lhs (stmt, new_lhs);
1180
1181 /* Also need to update GIMPLE_DEBUGs. */
1182 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1183 {
1184 tree repl = new_lhs;
1185 if (is_gimple_debug (use_stmt))
1186 {
1187 if (new_debug_lhs == NULL_TREE)
1188 {
1189 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1190 gdebug *def_temp
1191 = gimple_build_debug_bind (new_debug_lhs,
1192 build2 (opcode, TREE_TYPE (lhs),
1193 new_lhs, op),
1194 stmt);
1195 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1196 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1197 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1198 gimple_set_uid (def_temp, gimple_uid (stmt));
1199 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1200 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1201 }
1202 repl = new_debug_lhs;
1203 }
1204 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1205 SET_USE (use, repl);
1206 update_stmt (use_stmt);
1207 }
1208 return new_lhs;
1209 }
1210
1211 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1212 uses with new SSAs. Also do this for the stmt that defines DEF
1213 if *DEF is not OP. */
1214
1215 static void
1216 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1217 vec<gimple *> &stmts_to_fix)
1218 {
1219 unsigned i;
1220 gimple *stmt;
1221
1222 if (*def != op
1223 && TREE_CODE (*def) == SSA_NAME
1224 && (stmt = SSA_NAME_DEF_STMT (*def))
1225 && gimple_code (stmt) != GIMPLE_NOP)
1226 *def = make_new_ssa_for_def (stmt, opcode, op);
1227
1228 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1229 make_new_ssa_for_def (stmt, opcode, op);
1230 }
1231
1232 /* Find the single immediate use of STMT's LHS, and replace it
1233 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1234 replace *DEF with OP as well. */
1235
1236 static void
1237 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1238 {
1239 tree lhs;
1240 gimple *use_stmt;
1241 use_operand_p use;
1242 gimple_stmt_iterator gsi;
1243
1244 if (is_gimple_call (stmt))
1245 lhs = gimple_call_lhs (stmt);
1246 else
1247 lhs = gimple_assign_lhs (stmt);
1248
1249 gcc_assert (has_single_use (lhs));
1250 single_imm_use (lhs, &use, &use_stmt);
1251 if (lhs == *def)
1252 *def = op;
1253 SET_USE (use, op);
1254 if (TREE_CODE (op) != SSA_NAME)
1255 update_stmt (use_stmt);
1256 gsi = gsi_for_stmt (stmt);
1257 unlink_stmt_vdef (stmt);
1258 reassoc_remove_stmt (&gsi);
1259 release_defs (stmt);
1260 }
1261
1262 /* Walks the linear chain with result *DEF searching for an operation
1263 with operand OP and code OPCODE removing that from the chain. *DEF
1264 is updated if there is only one operand but no operation left. */
1265
1266 static void
1267 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1268 {
1269 tree orig_def = *def;
1270 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1271 /* PR72835 - Record the stmt chain that has to be updated such that
1272 we dont use the same LHS when the values computed are different. */
1273 auto_vec<gimple *, 64> stmts_to_fix;
1274
1275 do
1276 {
1277 tree name;
1278
1279 if (opcode == MULT_EXPR)
1280 {
1281 if (stmt_is_power_of_op (stmt, op))
1282 {
1283 if (decrement_power (stmt) == 1)
1284 {
1285 if (stmts_to_fix.length () > 0)
1286 stmts_to_fix.pop ();
1287 propagate_op_to_single_use (op, stmt, def);
1288 }
1289 break;
1290 }
1291 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1292 {
1293 if (gimple_assign_rhs1 (stmt) == op)
1294 {
1295 tree cst = build_minus_one_cst (TREE_TYPE (op));
1296 if (stmts_to_fix.length () > 0)
1297 stmts_to_fix.pop ();
1298 propagate_op_to_single_use (cst, stmt, def);
1299 break;
1300 }
1301 else if (integer_minus_onep (op)
1302 || real_minus_onep (op))
1303 {
1304 gimple_assign_set_rhs_code
1305 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1306 break;
1307 }
1308 }
1309 }
1310
1311 name = gimple_assign_rhs1 (stmt);
1312
1313 /* If this is the operation we look for and one of the operands
1314 is ours simply propagate the other operand into the stmts
1315 single use. */
1316 if (gimple_assign_rhs_code (stmt) == opcode
1317 && (name == op
1318 || gimple_assign_rhs2 (stmt) == op))
1319 {
1320 if (name == op)
1321 name = gimple_assign_rhs2 (stmt);
1322 if (stmts_to_fix.length () > 0)
1323 stmts_to_fix.pop ();
1324 propagate_op_to_single_use (name, stmt, def);
1325 break;
1326 }
1327
1328 /* We might have a multiply of two __builtin_pow* calls, and
1329 the operand might be hiding in the rightmost one. Likewise
1330 this can happen for a negate. */
1331 if (opcode == MULT_EXPR
1332 && gimple_assign_rhs_code (stmt) == opcode
1333 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1334 && has_single_use (gimple_assign_rhs2 (stmt)))
1335 {
1336 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1337 if (stmt_is_power_of_op (stmt2, op))
1338 {
1339 if (decrement_power (stmt2) == 1)
1340 propagate_op_to_single_use (op, stmt2, def);
1341 else
1342 stmts_to_fix.safe_push (stmt2);
1343 break;
1344 }
1345 else if (is_gimple_assign (stmt2)
1346 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1347 {
1348 if (gimple_assign_rhs1 (stmt2) == op)
1349 {
1350 tree cst = build_minus_one_cst (TREE_TYPE (op));
1351 propagate_op_to_single_use (cst, stmt2, def);
1352 break;
1353 }
1354 else if (integer_minus_onep (op)
1355 || real_minus_onep (op))
1356 {
1357 stmts_to_fix.safe_push (stmt2);
1358 gimple_assign_set_rhs_code
1359 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1360 break;
1361 }
1362 }
1363 }
1364
1365 /* Continue walking the chain. */
1366 gcc_assert (name != op
1367 && TREE_CODE (name) == SSA_NAME);
1368 stmt = SSA_NAME_DEF_STMT (name);
1369 stmts_to_fix.safe_push (stmt);
1370 }
1371 while (1);
1372
1373 if (stmts_to_fix.length () > 0 || *def == orig_def)
1374 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1375 }
1376
1377 /* Returns true if statement S1 dominates statement S2. Like
1378 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1379
1380 static bool
1381 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1382 {
1383 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1384
1385 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1386 SSA_NAME. Assume it lives at the beginning of function and
1387 thus dominates everything. */
1388 if (!bb1 || s1 == s2)
1389 return true;
1390
1391 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1392 if (!bb2)
1393 return false;
1394
1395 if (bb1 == bb2)
1396 {
1397 /* PHIs in the same basic block are assumed to be
1398 executed all in parallel, if only one stmt is a PHI,
1399 it dominates the other stmt in the same basic block. */
1400 if (gimple_code (s1) == GIMPLE_PHI)
1401 return true;
1402
1403 if (gimple_code (s2) == GIMPLE_PHI)
1404 return false;
1405
1406 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1407
1408 if (gimple_uid (s1) < gimple_uid (s2))
1409 return true;
1410
1411 if (gimple_uid (s1) > gimple_uid (s2))
1412 return false;
1413
1414 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1415 unsigned int uid = gimple_uid (s1);
1416 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1417 {
1418 gimple *s = gsi_stmt (gsi);
1419 if (gimple_uid (s) != uid)
1420 break;
1421 if (s == s2)
1422 return true;
1423 }
1424
1425 return false;
1426 }
1427
1428 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1429 }
1430
1431 /* Insert STMT after INSERT_POINT. */
1432
1433 static void
1434 insert_stmt_after (gimple *stmt, gimple *insert_point)
1435 {
1436 gimple_stmt_iterator gsi;
1437 basic_block bb;
1438
1439 if (gimple_code (insert_point) == GIMPLE_PHI)
1440 bb = gimple_bb (insert_point);
1441 else if (!stmt_ends_bb_p (insert_point))
1442 {
1443 gsi = gsi_for_stmt (insert_point);
1444 gimple_set_uid (stmt, gimple_uid (insert_point));
1445 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1446 return;
1447 }
1448 else
1449 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1450 thus if it must end a basic block, it should be a call that can
1451 throw, or some assignment that can throw. If it throws, the LHS
1452 of it will not be initialized though, so only valid places using
1453 the SSA_NAME should be dominated by the fallthru edge. */
1454 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1455 gsi = gsi_after_labels (bb);
1456 if (gsi_end_p (gsi))
1457 {
1458 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1459 gimple_set_uid (stmt,
1460 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1461 }
1462 else
1463 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1464 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1465 }
1466
1467 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1468 the result. Places the statement after the definition of either
1469 OP1 or OP2. Returns the new statement. */
1470
1471 static gimple *
1472 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1473 {
1474 gimple *op1def = NULL, *op2def = NULL;
1475 gimple_stmt_iterator gsi;
1476 tree op;
1477 gassign *sum;
1478
1479 /* Create the addition statement. */
1480 op = make_ssa_name (type);
1481 sum = gimple_build_assign (op, opcode, op1, op2);
1482
1483 /* Find an insertion place and insert. */
1484 if (TREE_CODE (op1) == SSA_NAME)
1485 op1def = SSA_NAME_DEF_STMT (op1);
1486 if (TREE_CODE (op2) == SSA_NAME)
1487 op2def = SSA_NAME_DEF_STMT (op2);
1488 if ((!op1def || gimple_nop_p (op1def))
1489 && (!op2def || gimple_nop_p (op2def)))
1490 {
1491 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1492 if (gsi_end_p (gsi))
1493 {
1494 gimple_stmt_iterator gsi2
1495 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1496 gimple_set_uid (sum,
1497 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1498 }
1499 else
1500 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1501 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1502 }
1503 else
1504 {
1505 gimple *insert_point;
1506 if ((!op1def || gimple_nop_p (op1def))
1507 || (op2def && !gimple_nop_p (op2def)
1508 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1509 insert_point = op2def;
1510 else
1511 insert_point = op1def;
1512 insert_stmt_after (sum, insert_point);
1513 }
1514 update_stmt (sum);
1515
1516 return sum;
1517 }
1518
1519 /* Perform un-distribution of divisions and multiplications.
1520 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1521 to (A + B) / X for real X.
1522
1523 The algorithm is organized as follows.
1524
1525 - First we walk the addition chain *OPS looking for summands that
1526 are defined by a multiplication or a real division. This results
1527 in the candidates bitmap with relevant indices into *OPS.
1528
1529 - Second we build the chains of multiplications or divisions for
1530 these candidates, counting the number of occurrences of (operand, code)
1531 pairs in all of the candidates chains.
1532
1533 - Third we sort the (operand, code) pairs by number of occurrence and
1534 process them starting with the pair with the most uses.
1535
1536 * For each such pair we walk the candidates again to build a
1537 second candidate bitmap noting all multiplication/division chains
1538 that have at least one occurrence of (operand, code).
1539
1540 * We build an alternate addition chain only covering these
1541 candidates with one (operand, code) operation removed from their
1542 multiplication/division chain.
1543
1544 * The first candidate gets replaced by the alternate addition chain
1545 multiplied/divided by the operand.
1546
1547 * All candidate chains get disabled for further processing and
1548 processing of (operand, code) pairs continues.
1549
1550 The alternate addition chains built are re-processed by the main
1551 reassociation algorithm which allows optimizing a * x * y + b * y * x
1552 to (a + b ) * x * y in one invocation of the reassociation pass. */
1553
1554 static bool
1555 undistribute_ops_list (enum tree_code opcode,
1556 vec<operand_entry *> *ops, class loop *loop)
1557 {
1558 unsigned int length = ops->length ();
1559 operand_entry *oe1;
1560 unsigned i, j;
1561 unsigned nr_candidates, nr_candidates2;
1562 sbitmap_iterator sbi0;
1563 vec<operand_entry *> *subops;
1564 bool changed = false;
1565 unsigned int next_oecount_id = 0;
1566
1567 if (length <= 1
1568 || opcode != PLUS_EXPR)
1569 return false;
1570
1571 /* Build a list of candidates to process. */
1572 auto_sbitmap candidates (length);
1573 bitmap_clear (candidates);
1574 nr_candidates = 0;
1575 FOR_EACH_VEC_ELT (*ops, i, oe1)
1576 {
1577 enum tree_code dcode;
1578 gimple *oe1def;
1579
1580 if (TREE_CODE (oe1->op) != SSA_NAME)
1581 continue;
1582 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1583 if (!is_gimple_assign (oe1def))
1584 continue;
1585 dcode = gimple_assign_rhs_code (oe1def);
1586 if ((dcode != MULT_EXPR
1587 && dcode != RDIV_EXPR)
1588 || !is_reassociable_op (oe1def, dcode, loop))
1589 continue;
1590
1591 bitmap_set_bit (candidates, i);
1592 nr_candidates++;
1593 }
1594
1595 if (nr_candidates < 2)
1596 return false;
1597
1598 if (dump_file && (dump_flags & TDF_DETAILS))
1599 {
1600 fprintf (dump_file, "searching for un-distribute opportunities ");
1601 print_generic_expr (dump_file,
1602 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1603 fprintf (dump_file, " %d\n", nr_candidates);
1604 }
1605
1606 /* Build linearized sub-operand lists and the counting table. */
1607 cvec.create (0);
1608
1609 hash_table<oecount_hasher> ctable (15);
1610
1611 /* ??? Macro arguments cannot have multi-argument template types in
1612 them. This typedef is needed to workaround that limitation. */
1613 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1614 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1615 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1616 {
1617 gimple *oedef;
1618 enum tree_code oecode;
1619 unsigned j;
1620
1621 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1622 oecode = gimple_assign_rhs_code (oedef);
1623 linearize_expr_tree (&subops[i], oedef,
1624 associative_tree_code (oecode), false);
1625
1626 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1627 {
1628 oecount c;
1629 int *slot;
1630 int idx;
1631 c.oecode = oecode;
1632 c.cnt = 1;
1633 c.id = next_oecount_id++;
1634 c.op = oe1->op;
1635 cvec.safe_push (c);
1636 idx = cvec.length () + 41;
1637 slot = ctable.find_slot (idx, INSERT);
1638 if (!*slot)
1639 {
1640 *slot = idx;
1641 }
1642 else
1643 {
1644 cvec.pop ();
1645 cvec[*slot - 42].cnt++;
1646 }
1647 }
1648 }
1649
1650 /* Sort the counting table. */
1651 cvec.qsort (oecount_cmp);
1652
1653 if (dump_file && (dump_flags & TDF_DETAILS))
1654 {
1655 oecount *c;
1656 fprintf (dump_file, "Candidates:\n");
1657 FOR_EACH_VEC_ELT (cvec, j, c)
1658 {
1659 fprintf (dump_file, " %u %s: ", c->cnt,
1660 c->oecode == MULT_EXPR
1661 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1662 print_generic_expr (dump_file, c->op);
1663 fprintf (dump_file, "\n");
1664 }
1665 }
1666
1667 /* Process the (operand, code) pairs in order of most occurrence. */
1668 auto_sbitmap candidates2 (length);
1669 while (!cvec.is_empty ())
1670 {
1671 oecount *c = &cvec.last ();
1672 if (c->cnt < 2)
1673 break;
1674
1675 /* Now collect the operands in the outer chain that contain
1676 the common operand in their inner chain. */
1677 bitmap_clear (candidates2);
1678 nr_candidates2 = 0;
1679 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1680 {
1681 gimple *oedef;
1682 enum tree_code oecode;
1683 unsigned j;
1684 tree op = (*ops)[i]->op;
1685
1686 /* If we undistributed in this chain already this may be
1687 a constant. */
1688 if (TREE_CODE (op) != SSA_NAME)
1689 continue;
1690
1691 oedef = SSA_NAME_DEF_STMT (op);
1692 oecode = gimple_assign_rhs_code (oedef);
1693 if (oecode != c->oecode)
1694 continue;
1695
1696 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1697 {
1698 if (oe1->op == c->op)
1699 {
1700 bitmap_set_bit (candidates2, i);
1701 ++nr_candidates2;
1702 break;
1703 }
1704 }
1705 }
1706
1707 if (nr_candidates2 >= 2)
1708 {
1709 operand_entry *oe1, *oe2;
1710 gimple *prod;
1711 int first = bitmap_first_set_bit (candidates2);
1712
1713 /* Build the new addition chain. */
1714 oe1 = (*ops)[first];
1715 if (dump_file && (dump_flags & TDF_DETAILS))
1716 {
1717 fprintf (dump_file, "Building (");
1718 print_generic_expr (dump_file, oe1->op);
1719 }
1720 zero_one_operation (&oe1->op, c->oecode, c->op);
1721 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1722 {
1723 gimple *sum;
1724 oe2 = (*ops)[i];
1725 if (dump_file && (dump_flags & TDF_DETAILS))
1726 {
1727 fprintf (dump_file, " + ");
1728 print_generic_expr (dump_file, oe2->op);
1729 }
1730 zero_one_operation (&oe2->op, c->oecode, c->op);
1731 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1732 oe1->op, oe2->op, opcode);
1733 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1734 oe2->rank = 0;
1735 oe1->op = gimple_get_lhs (sum);
1736 }
1737
1738 /* Apply the multiplication/division. */
1739 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1740 oe1->op, c->op, c->oecode);
1741 if (dump_file && (dump_flags & TDF_DETAILS))
1742 {
1743 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1744 print_generic_expr (dump_file, c->op);
1745 fprintf (dump_file, "\n");
1746 }
1747
1748 /* Record it in the addition chain and disable further
1749 undistribution with this op. */
1750 oe1->op = gimple_assign_lhs (prod);
1751 oe1->rank = get_rank (oe1->op);
1752 subops[first].release ();
1753
1754 changed = true;
1755 }
1756
1757 cvec.pop ();
1758 }
1759
1760 for (i = 0; i < ops->length (); ++i)
1761 subops[i].release ();
1762 free (subops);
1763 cvec.release ();
1764
1765 return changed;
1766 }
1767
1768 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1769 first: element index for each relevant BIT_FIELD_REF.
1770 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1771 typedef std::pair<unsigned, unsigned> v_info_elem;
1772 struct v_info {
1773 tree vec_type;
1774 auto_vec<v_info_elem, 32> vec;
1775 };
1776 typedef v_info *v_info_ptr;
1777
1778 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1779 static int
1780 sort_by_mach_mode (const void *p_i, const void *p_j)
1781 {
1782 const tree tr1 = *((const tree *) p_i);
1783 const tree tr2 = *((const tree *) p_j);
1784 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1785 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1786 if (mode1 > mode2)
1787 return 1;
1788 else if (mode1 < mode2)
1789 return -1;
1790 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1791 return -1;
1792 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1793 return 1;
1794 return 0;
1795 }
1796
1797 /* Cleanup hash map for VECTOR information. */
1798 static void
1799 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1800 {
1801 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1802 it != info_map.end (); ++it)
1803 {
1804 v_info_ptr info = (*it).second;
1805 delete info;
1806 (*it).second = NULL;
1807 }
1808 }
1809
1810 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1811 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1812 is transformed to
1813 Vs = (V1 + V2 + ... + Vn)
1814 Vs[0] + Vs[1] + ... + Vs[k]
1815
1816 The basic steps are listed below:
1817
1818 1) Check the addition chain *OPS by looking those summands coming from
1819 VECTOR bit_field_ref on VECTOR type. Put the information into
1820 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1821
1822 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1823 continuous, they can cover the whole VECTOR perfectly without any holes.
1824 Obtain one VECTOR list which contain candidates to be transformed.
1825
1826 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1827 candidates with same mode, build the addition statements for them and
1828 generate BIT_FIELD_REFs accordingly.
1829
1830 TODO:
1831 The current implementation requires the whole VECTORs should be fully
1832 covered, but it can be extended to support partial, checking adjacent
1833 but not fill the whole, it may need some cost model to define the
1834 boundary to do or not.
1835 */
1836 static bool
1837 undistribute_bitref_for_vector (enum tree_code opcode,
1838 vec<operand_entry *> *ops, struct loop *loop)
1839 {
1840 if (ops->length () <= 1)
1841 return false;
1842
1843 if (opcode != PLUS_EXPR
1844 && opcode != MULT_EXPR
1845 && opcode != BIT_XOR_EXPR
1846 && opcode != BIT_IOR_EXPR
1847 && opcode != BIT_AND_EXPR)
1848 return false;
1849
1850 hash_map<tree, v_info_ptr> v_info_map;
1851 operand_entry *oe1;
1852 unsigned i;
1853
1854 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1855 information into map. */
1856 FOR_EACH_VEC_ELT (*ops, i, oe1)
1857 {
1858 enum tree_code dcode;
1859 gimple *oe1def;
1860
1861 if (TREE_CODE (oe1->op) != SSA_NAME)
1862 continue;
1863 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1864 if (!is_gimple_assign (oe1def))
1865 continue;
1866 dcode = gimple_assign_rhs_code (oe1def);
1867 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1868 continue;
1869
1870 tree rhs = gimple_assign_rhs1 (oe1def);
1871 tree vec = TREE_OPERAND (rhs, 0);
1872 tree vec_type = TREE_TYPE (vec);
1873
1874 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1875 continue;
1876
1877 /* Ignore it if target machine can't support this VECTOR type. */
1878 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1879 continue;
1880
1881 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1882 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1883 continue;
1884
1885 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1886 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1887 continue;
1888
1889 /* The type of BIT_FIELD_REF might not be equal to the element type of
1890 the vector. We want to use a vector type with element type the
1891 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1892 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1893 {
1894 machine_mode simd_mode;
1895 unsigned HOST_WIDE_INT size, nunits;
1896 unsigned HOST_WIDE_INT elem_size
1897 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1898 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1899 continue;
1900 if (size <= elem_size || (size % elem_size) != 0)
1901 continue;
1902 nunits = size / elem_size;
1903 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1904 nunits).exists (&simd_mode))
1905 continue;
1906 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1907
1908 /* Ignore it if target machine can't support this VECTOR type. */
1909 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1910 continue;
1911
1912 /* Check const vector type, constrain BIT_FIELD_REF offset and
1913 size. */
1914 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1915 continue;
1916
1917 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
1918 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
1919 continue;
1920 }
1921
1922 tree elem_type = TREE_TYPE (vec_type);
1923 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
1924 if (maybe_ne (bit_field_size (rhs), elem_size))
1925 continue;
1926
1927 unsigned idx;
1928 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
1929 continue;
1930
1931 /* Ignore it if target machine can't support this type of VECTOR
1932 operation. */
1933 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
1934 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
1935 continue;
1936
1937 bool existed;
1938 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
1939 if (!existed)
1940 {
1941 info = new v_info;
1942 info->vec_type = vec_type;
1943 }
1944 else if (!types_compatible_p (vec_type, info->vec_type))
1945 continue;
1946 info->vec.safe_push (std::make_pair (idx, i));
1947 }
1948
1949 /* At least two VECTOR to combine. */
1950 if (v_info_map.elements () <= 1)
1951 {
1952 cleanup_vinfo_map (v_info_map);
1953 return false;
1954 }
1955
1956 /* Verify all VECTOR candidates by checking two conditions:
1957 1) sorted offsets are adjacent, no holes.
1958 2) can fill the whole VECTOR perfectly.
1959 And add the valid candidates to a vector for further handling. */
1960 auto_vec<tree> valid_vecs (v_info_map.elements ());
1961 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
1962 it != v_info_map.end (); ++it)
1963 {
1964 tree cand_vec = (*it).first;
1965 v_info_ptr cand_info = (*it).second;
1966 unsigned int num_elems
1967 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
1968 if (cand_info->vec.length () != num_elems)
1969 continue;
1970 sbitmap holes = sbitmap_alloc (num_elems);
1971 bitmap_ones (holes);
1972 bool valid = true;
1973 v_info_elem *curr;
1974 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
1975 {
1976 if (!bitmap_bit_p (holes, curr->first))
1977 {
1978 valid = false;
1979 break;
1980 }
1981 else
1982 bitmap_clear_bit (holes, curr->first);
1983 }
1984 if (valid && bitmap_empty_p (holes))
1985 valid_vecs.quick_push (cand_vec);
1986 sbitmap_free (holes);
1987 }
1988
1989 /* At least two VECTOR to combine. */
1990 if (valid_vecs.length () <= 1)
1991 {
1992 cleanup_vinfo_map (v_info_map);
1993 return false;
1994 }
1995
1996 valid_vecs.qsort (sort_by_mach_mode);
1997 /* Go through all candidates by machine mode order, query the mode_to_total
1998 to get the total number for each mode and skip the single one. */
1999 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2000 {
2001 tree tvec = valid_vecs[i];
2002 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2003
2004 /* Skip modes with only a single candidate. */
2005 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2006 continue;
2007
2008 unsigned int idx, j;
2009 gimple *sum = NULL;
2010 tree sum_vec = tvec;
2011 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2012 v_info_elem *elem;
2013 tree vec_type = info_ptr->vec_type;
2014
2015 /* Build the sum for all candidates with same mode. */
2016 do
2017 {
2018 sum = build_and_add_sum (vec_type, sum_vec,
2019 valid_vecs[i + 1], opcode);
2020 if (!useless_type_conversion_p (vec_type,
2021 TREE_TYPE (valid_vecs[i + 1])))
2022 {
2023 /* Update the operands only after build_and_add_sum,
2024 so that we don't have to repeat the placement algorithm
2025 of build_and_add_sum. */
2026 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2027 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2028 valid_vecs[i + 1]);
2029 tree lhs = make_ssa_name (vec_type);
2030 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2031 gimple_set_uid (g, gimple_uid (sum));
2032 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2033 gimple_assign_set_rhs2 (sum, lhs);
2034 if (sum_vec == tvec)
2035 {
2036 vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2037 lhs = make_ssa_name (vec_type);
2038 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2039 gimple_set_uid (g, gimple_uid (sum));
2040 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2041 gimple_assign_set_rhs1 (sum, lhs);
2042 }
2043 update_stmt (sum);
2044 }
2045 sum_vec = gimple_get_lhs (sum);
2046 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2047 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2048 /* Update those related ops of current candidate VECTOR. */
2049 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2050 {
2051 idx = elem->second;
2052 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2053 /* Set this then op definition will get DCEd later. */
2054 gimple_set_visited (def, true);
2055 if (opcode == PLUS_EXPR
2056 || opcode == BIT_XOR_EXPR
2057 || opcode == BIT_IOR_EXPR)
2058 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2059 else if (opcode == MULT_EXPR)
2060 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2061 else
2062 {
2063 gcc_assert (opcode == BIT_AND_EXPR);
2064 (*ops)[idx]->op
2065 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2066 }
2067 (*ops)[idx]->rank = 0;
2068 }
2069 if (dump_file && (dump_flags & TDF_DETAILS))
2070 {
2071 fprintf (dump_file, "Generating addition -> ");
2072 print_gimple_stmt (dump_file, sum, 0);
2073 }
2074 i++;
2075 }
2076 while ((i < valid_vecs.length () - 1)
2077 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2078
2079 /* Referring to first valid VECTOR with this mode, generate the
2080 BIT_FIELD_REF statements accordingly. */
2081 info_ptr = *(v_info_map.get (tvec));
2082 gcc_assert (sum);
2083 tree elem_type = TREE_TYPE (vec_type);
2084 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2085 {
2086 idx = elem->second;
2087 tree dst = make_ssa_name (elem_type);
2088 tree pos = bitsize_int (elem->first
2089 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2090 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2091 TYPE_SIZE (elem_type), pos);
2092 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2093 insert_stmt_after (gs, sum);
2094 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2095 /* Set this then op definition will get DCEd later. */
2096 gimple_set_visited (def, true);
2097 (*ops)[idx]->op = gimple_assign_lhs (gs);
2098 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2099 if (dump_file && (dump_flags & TDF_DETAILS))
2100 {
2101 fprintf (dump_file, "Generating bit_field_ref -> ");
2102 print_gimple_stmt (dump_file, gs, 0);
2103 }
2104 }
2105 }
2106
2107 if (dump_file && (dump_flags & TDF_DETAILS))
2108 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2109
2110 cleanup_vinfo_map (v_info_map);
2111
2112 return true;
2113 }
2114
2115 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2116 expression, examine the other OPS to see if any of them are comparisons
2117 of the same values, which we may be able to combine or eliminate.
2118 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2119
2120 static bool
2121 eliminate_redundant_comparison (enum tree_code opcode,
2122 vec<operand_entry *> *ops,
2123 unsigned int currindex,
2124 operand_entry *curr)
2125 {
2126 tree op1, op2;
2127 enum tree_code lcode, rcode;
2128 gimple *def1, *def2;
2129 int i;
2130 operand_entry *oe;
2131
2132 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2133 return false;
2134
2135 /* Check that CURR is a comparison. */
2136 if (TREE_CODE (curr->op) != SSA_NAME)
2137 return false;
2138 def1 = SSA_NAME_DEF_STMT (curr->op);
2139 if (!is_gimple_assign (def1))
2140 return false;
2141 lcode = gimple_assign_rhs_code (def1);
2142 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2143 return false;
2144 op1 = gimple_assign_rhs1 (def1);
2145 op2 = gimple_assign_rhs2 (def1);
2146
2147 /* Now look for a similar comparison in the remaining OPS. */
2148 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2149 {
2150 tree t;
2151
2152 if (TREE_CODE (oe->op) != SSA_NAME)
2153 continue;
2154 def2 = SSA_NAME_DEF_STMT (oe->op);
2155 if (!is_gimple_assign (def2))
2156 continue;
2157 rcode = gimple_assign_rhs_code (def2);
2158 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2159 continue;
2160
2161 /* If we got here, we have a match. See if we can combine the
2162 two comparisons. */
2163 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2164 if (opcode == BIT_IOR_EXPR)
2165 t = maybe_fold_or_comparisons (type,
2166 lcode, op1, op2,
2167 rcode, gimple_assign_rhs1 (def2),
2168 gimple_assign_rhs2 (def2));
2169 else
2170 t = maybe_fold_and_comparisons (type,
2171 lcode, op1, op2,
2172 rcode, gimple_assign_rhs1 (def2),
2173 gimple_assign_rhs2 (def2));
2174 if (!t)
2175 continue;
2176
2177 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2178 always give us a boolean_type_node value back. If the original
2179 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2180 we need to convert. */
2181 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2182 t = fold_convert (TREE_TYPE (curr->op), t);
2183
2184 if (TREE_CODE (t) != INTEGER_CST
2185 && !operand_equal_p (t, curr->op, 0))
2186 {
2187 enum tree_code subcode;
2188 tree newop1, newop2;
2189 if (!COMPARISON_CLASS_P (t))
2190 continue;
2191 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2192 STRIP_USELESS_TYPE_CONVERSION (newop1);
2193 STRIP_USELESS_TYPE_CONVERSION (newop2);
2194 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2195 continue;
2196 }
2197
2198 if (dump_file && (dump_flags & TDF_DETAILS))
2199 {
2200 fprintf (dump_file, "Equivalence: ");
2201 print_generic_expr (dump_file, curr->op);
2202 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2203 print_generic_expr (dump_file, oe->op);
2204 fprintf (dump_file, " -> ");
2205 print_generic_expr (dump_file, t);
2206 fprintf (dump_file, "\n");
2207 }
2208
2209 /* Now we can delete oe, as it has been subsumed by the new combined
2210 expression t. */
2211 ops->ordered_remove (i);
2212 reassociate_stats.ops_eliminated ++;
2213
2214 /* If t is the same as curr->op, we're done. Otherwise we must
2215 replace curr->op with t. Special case is if we got a constant
2216 back, in which case we add it to the end instead of in place of
2217 the current entry. */
2218 if (TREE_CODE (t) == INTEGER_CST)
2219 {
2220 ops->ordered_remove (currindex);
2221 add_to_ops_vec (ops, t);
2222 }
2223 else if (!operand_equal_p (t, curr->op, 0))
2224 {
2225 gimple *sum;
2226 enum tree_code subcode;
2227 tree newop1;
2228 tree newop2;
2229 gcc_assert (COMPARISON_CLASS_P (t));
2230 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2231 STRIP_USELESS_TYPE_CONVERSION (newop1);
2232 STRIP_USELESS_TYPE_CONVERSION (newop2);
2233 gcc_checking_assert (is_gimple_val (newop1)
2234 && is_gimple_val (newop2));
2235 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2236 curr->op = gimple_get_lhs (sum);
2237 }
2238 return true;
2239 }
2240
2241 return false;
2242 }
2243
2244
2245 /* Transform repeated addition of same values into multiply with
2246 constant. */
2247 static bool
2248 transform_add_to_multiply (vec<operand_entry *> *ops)
2249 {
2250 operand_entry *oe;
2251 tree op = NULL_TREE;
2252 int j;
2253 int i, start = -1, end = 0, count = 0;
2254 auto_vec<std::pair <int, int> > indxs;
2255 bool changed = false;
2256
2257 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2258 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2259 || !flag_unsafe_math_optimizations))
2260 return false;
2261
2262 /* Look for repeated operands. */
2263 FOR_EACH_VEC_ELT (*ops, i, oe)
2264 {
2265 if (start == -1)
2266 {
2267 count = 1;
2268 op = oe->op;
2269 start = i;
2270 }
2271 else if (operand_equal_p (oe->op, op, 0))
2272 {
2273 count++;
2274 end = i;
2275 }
2276 else
2277 {
2278 if (count > 1)
2279 indxs.safe_push (std::make_pair (start, end));
2280 count = 1;
2281 op = oe->op;
2282 start = i;
2283 }
2284 }
2285
2286 if (count > 1)
2287 indxs.safe_push (std::make_pair (start, end));
2288
2289 for (j = indxs.length () - 1; j >= 0; --j)
2290 {
2291 /* Convert repeated operand addition to multiplication. */
2292 start = indxs[j].first;
2293 end = indxs[j].second;
2294 op = (*ops)[start]->op;
2295 count = end - start + 1;
2296 for (i = end; i >= start; --i)
2297 ops->unordered_remove (i);
2298 tree tmp = make_ssa_name (TREE_TYPE (op));
2299 tree cst = build_int_cst (integer_type_node, count);
2300 gassign *mul_stmt
2301 = gimple_build_assign (tmp, MULT_EXPR,
2302 op, fold_convert (TREE_TYPE (op), cst));
2303 gimple_set_visited (mul_stmt, true);
2304 add_to_ops_vec (ops, tmp, mul_stmt);
2305 changed = true;
2306 }
2307
2308 return changed;
2309 }
2310
2311
2312 /* Perform various identities and other optimizations on the list of
2313 operand entries, stored in OPS. The tree code for the binary
2314 operation between all the operands is OPCODE. */
2315
2316 static void
2317 optimize_ops_list (enum tree_code opcode,
2318 vec<operand_entry *> *ops)
2319 {
2320 unsigned int length = ops->length ();
2321 unsigned int i;
2322 operand_entry *oe;
2323 operand_entry *oelast = NULL;
2324 bool iterate = false;
2325
2326 if (length == 1)
2327 return;
2328
2329 oelast = ops->last ();
2330
2331 /* If the last two are constants, pop the constants off, merge them
2332 and try the next two. */
2333 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2334 {
2335 operand_entry *oelm1 = (*ops)[length - 2];
2336
2337 if (oelm1->rank == 0
2338 && is_gimple_min_invariant (oelm1->op)
2339 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2340 TREE_TYPE (oelast->op)))
2341 {
2342 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2343 oelm1->op, oelast->op);
2344
2345 if (folded && is_gimple_min_invariant (folded))
2346 {
2347 if (dump_file && (dump_flags & TDF_DETAILS))
2348 fprintf (dump_file, "Merging constants\n");
2349
2350 ops->pop ();
2351 ops->pop ();
2352
2353 add_to_ops_vec (ops, folded);
2354 reassociate_stats.constants_eliminated++;
2355
2356 optimize_ops_list (opcode, ops);
2357 return;
2358 }
2359 }
2360 }
2361
2362 eliminate_using_constants (opcode, ops);
2363 oelast = NULL;
2364
2365 for (i = 0; ops->iterate (i, &oe);)
2366 {
2367 bool done = false;
2368
2369 if (eliminate_not_pairs (opcode, ops, i, oe))
2370 return;
2371 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2372 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2373 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2374 {
2375 if (done)
2376 return;
2377 iterate = true;
2378 oelast = NULL;
2379 continue;
2380 }
2381 oelast = oe;
2382 i++;
2383 }
2384
2385 if (iterate)
2386 optimize_ops_list (opcode, ops);
2387 }
2388
2389 /* The following functions are subroutines to optimize_range_tests and allow
2390 it to try to change a logical combination of comparisons into a range
2391 test.
2392
2393 For example, both
2394 X == 2 || X == 5 || X == 3 || X == 4
2395 and
2396 X >= 2 && X <= 5
2397 are converted to
2398 (unsigned) (X - 2) <= 3
2399
2400 For more information see comments above fold_test_range in fold-const.c,
2401 this implementation is for GIMPLE. */
2402
2403
2404
2405 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2406
2407 void
2408 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2409 {
2410 if (!skip_exp)
2411 print_generic_expr (file, r->exp);
2412 fprintf (file, " %c[", r->in_p ? '+' : '-');
2413 print_generic_expr (file, r->low);
2414 fputs (", ", file);
2415 print_generic_expr (file, r->high);
2416 fputc (']', file);
2417 }
2418
2419 /* Dump the range entry R to STDERR. */
2420
2421 DEBUG_FUNCTION void
2422 debug_range_entry (struct range_entry *r)
2423 {
2424 dump_range_entry (stderr, r, false);
2425 fputc ('\n', stderr);
2426 }
2427
2428 /* This is similar to make_range in fold-const.c, but on top of
2429 GIMPLE instead of trees. If EXP is non-NULL, it should be
2430 an SSA_NAME and STMT argument is ignored, otherwise STMT
2431 argument should be a GIMPLE_COND. */
2432
2433 void
2434 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2435 {
2436 int in_p;
2437 tree low, high;
2438 bool is_bool, strict_overflow_p;
2439
2440 r->exp = NULL_TREE;
2441 r->in_p = false;
2442 r->strict_overflow_p = false;
2443 r->low = NULL_TREE;
2444 r->high = NULL_TREE;
2445 if (exp != NULL_TREE
2446 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2447 return;
2448
2449 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2450 and see if we can refine the range. Some of the cases below may not
2451 happen, but it doesn't seem worth worrying about this. We "continue"
2452 the outer loop when we've changed something; otherwise we "break"
2453 the switch, which will "break" the while. */
2454 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2455 high = low;
2456 in_p = 0;
2457 strict_overflow_p = false;
2458 is_bool = false;
2459 if (exp == NULL_TREE)
2460 is_bool = true;
2461 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2462 {
2463 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2464 is_bool = true;
2465 else
2466 return;
2467 }
2468 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2469 is_bool = true;
2470
2471 while (1)
2472 {
2473 enum tree_code code;
2474 tree arg0, arg1, exp_type;
2475 tree nexp;
2476 location_t loc;
2477
2478 if (exp != NULL_TREE)
2479 {
2480 if (TREE_CODE (exp) != SSA_NAME
2481 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2482 break;
2483
2484 stmt = SSA_NAME_DEF_STMT (exp);
2485 if (!is_gimple_assign (stmt))
2486 break;
2487
2488 code = gimple_assign_rhs_code (stmt);
2489 arg0 = gimple_assign_rhs1 (stmt);
2490 arg1 = gimple_assign_rhs2 (stmt);
2491 exp_type = TREE_TYPE (exp);
2492 }
2493 else
2494 {
2495 code = gimple_cond_code (stmt);
2496 arg0 = gimple_cond_lhs (stmt);
2497 arg1 = gimple_cond_rhs (stmt);
2498 exp_type = boolean_type_node;
2499 }
2500
2501 if (TREE_CODE (arg0) != SSA_NAME
2502 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2503 break;
2504 loc = gimple_location (stmt);
2505 switch (code)
2506 {
2507 case BIT_NOT_EXPR:
2508 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2509 /* Ensure the range is either +[-,0], +[0,0],
2510 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2511 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2512 or similar expression of unconditional true or
2513 false, it should not be negated. */
2514 && ((high && integer_zerop (high))
2515 || (low && integer_onep (low))))
2516 {
2517 in_p = !in_p;
2518 exp = arg0;
2519 continue;
2520 }
2521 break;
2522 case SSA_NAME:
2523 exp = arg0;
2524 continue;
2525 CASE_CONVERT:
2526 if (is_bool)
2527 {
2528 if ((TYPE_PRECISION (exp_type) == 1
2529 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2530 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2531 return;
2532 }
2533 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2534 {
2535 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2536 is_bool = true;
2537 else
2538 return;
2539 }
2540 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2541 is_bool = true;
2542 goto do_default;
2543 case EQ_EXPR:
2544 case NE_EXPR:
2545 case LT_EXPR:
2546 case LE_EXPR:
2547 case GE_EXPR:
2548 case GT_EXPR:
2549 is_bool = true;
2550 /* FALLTHRU */
2551 default:
2552 if (!is_bool)
2553 return;
2554 do_default:
2555 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2556 &low, &high, &in_p,
2557 &strict_overflow_p);
2558 if (nexp != NULL_TREE)
2559 {
2560 exp = nexp;
2561 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2562 continue;
2563 }
2564 break;
2565 }
2566 break;
2567 }
2568 if (is_bool)
2569 {
2570 r->exp = exp;
2571 r->in_p = in_p;
2572 r->low = low;
2573 r->high = high;
2574 r->strict_overflow_p = strict_overflow_p;
2575 }
2576 }
2577
2578 /* Comparison function for qsort. Sort entries
2579 without SSA_NAME exp first, then with SSA_NAMEs sorted
2580 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2581 by increasing ->low and if ->low is the same, by increasing
2582 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2583 maximum. */
2584
2585 static int
2586 range_entry_cmp (const void *a, const void *b)
2587 {
2588 const struct range_entry *p = (const struct range_entry *) a;
2589 const struct range_entry *q = (const struct range_entry *) b;
2590
2591 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2592 {
2593 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2594 {
2595 /* Group range_entries for the same SSA_NAME together. */
2596 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2597 return -1;
2598 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2599 return 1;
2600 /* If ->low is different, NULL low goes first, then by
2601 ascending low. */
2602 if (p->low != NULL_TREE)
2603 {
2604 if (q->low != NULL_TREE)
2605 {
2606 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2607 p->low, q->low);
2608 if (tem && integer_onep (tem))
2609 return -1;
2610 tem = fold_binary (GT_EXPR, boolean_type_node,
2611 p->low, q->low);
2612 if (tem && integer_onep (tem))
2613 return 1;
2614 }
2615 else
2616 return 1;
2617 }
2618 else if (q->low != NULL_TREE)
2619 return -1;
2620 /* If ->high is different, NULL high goes last, before that by
2621 ascending high. */
2622 if (p->high != NULL_TREE)
2623 {
2624 if (q->high != NULL_TREE)
2625 {
2626 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2627 p->high, q->high);
2628 if (tem && integer_onep (tem))
2629 return -1;
2630 tem = fold_binary (GT_EXPR, boolean_type_node,
2631 p->high, q->high);
2632 if (tem && integer_onep (tem))
2633 return 1;
2634 }
2635 else
2636 return -1;
2637 }
2638 else if (q->high != NULL_TREE)
2639 return 1;
2640 /* If both ranges are the same, sort below by ascending idx. */
2641 }
2642 else
2643 return 1;
2644 }
2645 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2646 return -1;
2647
2648 if (p->idx < q->idx)
2649 return -1;
2650 else
2651 {
2652 gcc_checking_assert (p->idx > q->idx);
2653 return 1;
2654 }
2655 }
2656
2657 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2658 insert needed statements BEFORE or after GSI. */
2659
2660 static tree
2661 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2662 {
2663 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2664 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2665 if (TREE_CODE (ret) != SSA_NAME)
2666 {
2667 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2668 if (before)
2669 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2670 else
2671 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2672 ret = gimple_assign_lhs (g);
2673 }
2674 return ret;
2675 }
2676
2677 /* Helper routine of optimize_range_test.
2678 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2679 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2680 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2681 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2682 an array of COUNT pointers to other ranges. Return
2683 true if the range merge has been successful.
2684 If OPCODE is ERROR_MARK, this is called from within
2685 maybe_optimize_range_tests and is performing inter-bb range optimization.
2686 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2687 oe->rank. */
2688
2689 static bool
2690 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2691 struct range_entry **otherrangep,
2692 unsigned int count, enum tree_code opcode,
2693 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2694 bool in_p, tree low, tree high, bool strict_overflow_p)
2695 {
2696 operand_entry *oe = (*ops)[range->idx];
2697 tree op = oe->op;
2698 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2699 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2700 location_t loc = gimple_location (stmt);
2701 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2702 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2703 in_p, low, high);
2704 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2705 gimple_stmt_iterator gsi;
2706 unsigned int i, uid;
2707
2708 if (tem == NULL_TREE)
2709 return false;
2710
2711 /* If op is default def SSA_NAME, there is no place to insert the
2712 new comparison. Give up, unless we can use OP itself as the
2713 range test. */
2714 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2715 {
2716 if (op == range->exp
2717 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2718 || TREE_CODE (optype) == BOOLEAN_TYPE)
2719 && (op == tem
2720 || (TREE_CODE (tem) == EQ_EXPR
2721 && TREE_OPERAND (tem, 0) == op
2722 && integer_onep (TREE_OPERAND (tem, 1))))
2723 && opcode != BIT_IOR_EXPR
2724 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2725 {
2726 stmt = NULL;
2727 tem = op;
2728 }
2729 else
2730 return false;
2731 }
2732
2733 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2734 warning_at (loc, OPT_Wstrict_overflow,
2735 "assuming signed overflow does not occur "
2736 "when simplifying range test");
2737
2738 if (dump_file && (dump_flags & TDF_DETAILS))
2739 {
2740 struct range_entry *r;
2741 fprintf (dump_file, "Optimizing range tests ");
2742 dump_range_entry (dump_file, range, false);
2743 for (i = 0; i < count; i++)
2744 {
2745 if (otherrange)
2746 r = otherrange + i;
2747 else
2748 r = otherrangep[i];
2749 if (r->exp
2750 && r->exp != range->exp
2751 && TREE_CODE (r->exp) == SSA_NAME)
2752 {
2753 fprintf (dump_file, " and ");
2754 dump_range_entry (dump_file, r, false);
2755 }
2756 else
2757 {
2758 fprintf (dump_file, " and");
2759 dump_range_entry (dump_file, r, true);
2760 }
2761 }
2762 fprintf (dump_file, "\n into ");
2763 print_generic_expr (dump_file, tem);
2764 fprintf (dump_file, "\n");
2765 }
2766
2767 if (opcode == BIT_IOR_EXPR
2768 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2769 tem = invert_truthvalue_loc (loc, tem);
2770
2771 tem = fold_convert_loc (loc, optype, tem);
2772 if (stmt)
2773 {
2774 gsi = gsi_for_stmt (stmt);
2775 uid = gimple_uid (stmt);
2776 }
2777 else
2778 {
2779 gsi = gsi_none ();
2780 uid = 0;
2781 }
2782 if (stmt == NULL)
2783 gcc_checking_assert (tem == op);
2784 /* In rare cases range->exp can be equal to lhs of stmt.
2785 In that case we have to insert after the stmt rather then before
2786 it. If stmt is a PHI, insert it at the start of the basic block. */
2787 else if (op != range->exp)
2788 {
2789 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2790 tem = force_into_ssa_name (&gsi, tem, true);
2791 gsi_prev (&gsi);
2792 }
2793 else if (gimple_code (stmt) != GIMPLE_PHI)
2794 {
2795 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2796 tem = force_into_ssa_name (&gsi, tem, false);
2797 }
2798 else
2799 {
2800 gsi = gsi_after_labels (gimple_bb (stmt));
2801 if (!gsi_end_p (gsi))
2802 uid = gimple_uid (gsi_stmt (gsi));
2803 else
2804 {
2805 gsi = gsi_start_bb (gimple_bb (stmt));
2806 uid = 1;
2807 while (!gsi_end_p (gsi))
2808 {
2809 uid = gimple_uid (gsi_stmt (gsi));
2810 gsi_next (&gsi);
2811 }
2812 }
2813 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2814 tem = force_into_ssa_name (&gsi, tem, true);
2815 if (gsi_end_p (gsi))
2816 gsi = gsi_last_bb (gimple_bb (stmt));
2817 else
2818 gsi_prev (&gsi);
2819 }
2820 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2821 if (gimple_uid (gsi_stmt (gsi)))
2822 break;
2823 else
2824 gimple_set_uid (gsi_stmt (gsi), uid);
2825
2826 oe->op = tem;
2827 range->exp = exp;
2828 range->low = low;
2829 range->high = high;
2830 range->in_p = in_p;
2831 range->strict_overflow_p = false;
2832
2833 for (i = 0; i < count; i++)
2834 {
2835 if (otherrange)
2836 range = otherrange + i;
2837 else
2838 range = otherrangep[i];
2839 oe = (*ops)[range->idx];
2840 /* Now change all the other range test immediate uses, so that
2841 those tests will be optimized away. */
2842 if (opcode == ERROR_MARK)
2843 {
2844 if (oe->op)
2845 oe->op = build_int_cst (TREE_TYPE (oe->op),
2846 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2847 else
2848 oe->op = (oe->rank == BIT_IOR_EXPR
2849 ? boolean_false_node : boolean_true_node);
2850 }
2851 else
2852 oe->op = error_mark_node;
2853 range->exp = NULL_TREE;
2854 range->low = NULL_TREE;
2855 range->high = NULL_TREE;
2856 }
2857 return true;
2858 }
2859
2860 /* Optimize X == CST1 || X == CST2
2861 if popcount (CST1 ^ CST2) == 1 into
2862 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2863 Similarly for ranges. E.g.
2864 X != 2 && X != 3 && X != 10 && X != 11
2865 will be transformed by the previous optimization into
2866 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2867 and this loop can transform that into
2868 !(((X & ~8) - 2U) <= 1U). */
2869
2870 static bool
2871 optimize_range_tests_xor (enum tree_code opcode, tree type,
2872 tree lowi, tree lowj, tree highi, tree highj,
2873 vec<operand_entry *> *ops,
2874 struct range_entry *rangei,
2875 struct range_entry *rangej)
2876 {
2877 tree lowxor, highxor, tem, exp;
2878 /* Check lowi ^ lowj == highi ^ highj and
2879 popcount (lowi ^ lowj) == 1. */
2880 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2881 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2882 return false;
2883 if (!integer_pow2p (lowxor))
2884 return false;
2885 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2886 if (!tree_int_cst_equal (lowxor, highxor))
2887 return false;
2888
2889 exp = rangei->exp;
2890 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2891 int prec = GET_MODE_PRECISION (mode);
2892 if (TYPE_PRECISION (type) < prec
2893 || (wi::to_wide (TYPE_MIN_VALUE (type))
2894 != wi::min_value (prec, TYPE_SIGN (type)))
2895 || (wi::to_wide (TYPE_MAX_VALUE (type))
2896 != wi::max_value (prec, TYPE_SIGN (type))))
2897 {
2898 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
2899 exp = fold_convert (type, exp);
2900 lowxor = fold_convert (type, lowxor);
2901 lowi = fold_convert (type, lowi);
2902 highi = fold_convert (type, highi);
2903 }
2904 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2905 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
2906 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2907 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2908 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2909 NULL, rangei->in_p, lowj, highj,
2910 rangei->strict_overflow_p
2911 || rangej->strict_overflow_p))
2912 return true;
2913 return false;
2914 }
2915
2916 /* Optimize X == CST1 || X == CST2
2917 if popcount (CST2 - CST1) == 1 into
2918 ((X - CST1) & ~(CST2 - CST1)) == 0.
2919 Similarly for ranges. E.g.
2920 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2921 || X == 75 || X == 45
2922 will be transformed by the previous optimization into
2923 (X - 43U) <= 3U || (X - 75U) <= 3U
2924 and this loop can transform that into
2925 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2926 static bool
2927 optimize_range_tests_diff (enum tree_code opcode, tree type,
2928 tree lowi, tree lowj, tree highi, tree highj,
2929 vec<operand_entry *> *ops,
2930 struct range_entry *rangei,
2931 struct range_entry *rangej)
2932 {
2933 tree tem1, tem2, mask;
2934 /* Check highi - lowi == highj - lowj. */
2935 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2936 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2937 return false;
2938 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2939 if (!tree_int_cst_equal (tem1, tem2))
2940 return false;
2941 /* Check popcount (lowj - lowi) == 1. */
2942 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2943 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2944 return false;
2945 if (!integer_pow2p (tem1))
2946 return false;
2947
2948 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2949 int prec = GET_MODE_PRECISION (mode);
2950 if (TYPE_PRECISION (type) < prec
2951 || (wi::to_wide (TYPE_MIN_VALUE (type))
2952 != wi::min_value (prec, TYPE_SIGN (type)))
2953 || (wi::to_wide (TYPE_MAX_VALUE (type))
2954 != wi::max_value (prec, TYPE_SIGN (type))))
2955 type = build_nonstandard_integer_type (prec, 1);
2956 else
2957 type = unsigned_type_for (type);
2958 tem1 = fold_convert (type, tem1);
2959 tem2 = fold_convert (type, tem2);
2960 lowi = fold_convert (type, lowi);
2961 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2962 tem1 = fold_build2 (MINUS_EXPR, type,
2963 fold_convert (type, rangei->exp), lowi);
2964 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2965 lowj = build_int_cst (type, 0);
2966 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2967 NULL, rangei->in_p, lowj, tem2,
2968 rangei->strict_overflow_p
2969 || rangej->strict_overflow_p))
2970 return true;
2971 return false;
2972 }
2973
2974 /* It does some common checks for function optimize_range_tests_xor and
2975 optimize_range_tests_diff.
2976 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2977 Else it calls optimize_range_tests_diff. */
2978
2979 static bool
2980 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2981 bool optimize_xor, vec<operand_entry *> *ops,
2982 struct range_entry *ranges)
2983 {
2984 int i, j;
2985 bool any_changes = false;
2986 for (i = first; i < length; i++)
2987 {
2988 tree lowi, highi, lowj, highj, type, tem;
2989
2990 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2991 continue;
2992 type = TREE_TYPE (ranges[i].exp);
2993 if (!INTEGRAL_TYPE_P (type))
2994 continue;
2995 lowi = ranges[i].low;
2996 if (lowi == NULL_TREE)
2997 lowi = TYPE_MIN_VALUE (type);
2998 highi = ranges[i].high;
2999 if (highi == NULL_TREE)
3000 continue;
3001 for (j = i + 1; j < length && j < i + 64; j++)
3002 {
3003 bool changes;
3004 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3005 continue;
3006 lowj = ranges[j].low;
3007 if (lowj == NULL_TREE)
3008 continue;
3009 highj = ranges[j].high;
3010 if (highj == NULL_TREE)
3011 highj = TYPE_MAX_VALUE (type);
3012 /* Check lowj > highi. */
3013 tem = fold_binary (GT_EXPR, boolean_type_node,
3014 lowj, highi);
3015 if (tem == NULL_TREE || !integer_onep (tem))
3016 continue;
3017 if (optimize_xor)
3018 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3019 highi, highj, ops,
3020 ranges + i, ranges + j);
3021 else
3022 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3023 highi, highj, ops,
3024 ranges + i, ranges + j);
3025 if (changes)
3026 {
3027 any_changes = true;
3028 break;
3029 }
3030 }
3031 }
3032 return any_changes;
3033 }
3034
3035 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3036 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3037 EXP on success, NULL otherwise. */
3038
3039 static tree
3040 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3041 wide_int *mask, tree *totallowp)
3042 {
3043 tree tem = int_const_binop (MINUS_EXPR, high, low);
3044 if (tem == NULL_TREE
3045 || TREE_CODE (tem) != INTEGER_CST
3046 || TREE_OVERFLOW (tem)
3047 || tree_int_cst_sgn (tem) == -1
3048 || compare_tree_int (tem, prec) != -1)
3049 return NULL_TREE;
3050
3051 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3052 *mask = wi::shifted_mask (0, max, false, prec);
3053 if (TREE_CODE (exp) == BIT_AND_EXPR
3054 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3055 {
3056 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3057 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3058 if (wi::popcount (msk) == 1
3059 && wi::ltu_p (msk, prec - max))
3060 {
3061 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3062 max += msk.to_uhwi ();
3063 exp = TREE_OPERAND (exp, 0);
3064 if (integer_zerop (low)
3065 && TREE_CODE (exp) == PLUS_EXPR
3066 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3067 {
3068 tree ret = TREE_OPERAND (exp, 0);
3069 STRIP_NOPS (ret);
3070 widest_int bias
3071 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3072 TYPE_PRECISION (TREE_TYPE (low))));
3073 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3074 if (totallowp)
3075 {
3076 *totallowp = tbias;
3077 return ret;
3078 }
3079 else if (!tree_int_cst_lt (totallow, tbias))
3080 return NULL_TREE;
3081 bias = wi::to_widest (tbias);
3082 bias -= wi::to_widest (totallow);
3083 if (bias >= 0 && bias < prec - max)
3084 {
3085 *mask = wi::lshift (*mask, bias);
3086 return ret;
3087 }
3088 }
3089 }
3090 }
3091 if (totallowp)
3092 return exp;
3093 if (!tree_int_cst_lt (totallow, low))
3094 return exp;
3095 tem = int_const_binop (MINUS_EXPR, low, totallow);
3096 if (tem == NULL_TREE
3097 || TREE_CODE (tem) != INTEGER_CST
3098 || TREE_OVERFLOW (tem)
3099 || compare_tree_int (tem, prec - max) == 1)
3100 return NULL_TREE;
3101
3102 *mask = wi::lshift (*mask, wi::to_widest (tem));
3103 return exp;
3104 }
3105
3106 /* Attempt to optimize small range tests using bit test.
3107 E.g.
3108 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3109 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3110 has been by earlier optimizations optimized into:
3111 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3112 As all the 43 through 82 range is less than 64 numbers,
3113 for 64-bit word targets optimize that into:
3114 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3115
3116 static bool
3117 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3118 vec<operand_entry *> *ops,
3119 struct range_entry *ranges)
3120 {
3121 int i, j;
3122 bool any_changes = false;
3123 int prec = GET_MODE_BITSIZE (word_mode);
3124 auto_vec<struct range_entry *, 64> candidates;
3125
3126 for (i = first; i < length - 1; i++)
3127 {
3128 tree lowi, highi, lowj, highj, type;
3129
3130 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3131 continue;
3132 type = TREE_TYPE (ranges[i].exp);
3133 if (!INTEGRAL_TYPE_P (type))
3134 continue;
3135 lowi = ranges[i].low;
3136 if (lowi == NULL_TREE)
3137 lowi = TYPE_MIN_VALUE (type);
3138 highi = ranges[i].high;
3139 if (highi == NULL_TREE)
3140 continue;
3141 wide_int mask;
3142 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3143 highi, &mask, &lowi);
3144 if (exp == NULL_TREE)
3145 continue;
3146 bool strict_overflow_p = ranges[i].strict_overflow_p;
3147 candidates.truncate (0);
3148 int end = MIN (i + 64, length);
3149 for (j = i + 1; j < end; j++)
3150 {
3151 tree exp2;
3152 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3153 continue;
3154 if (ranges[j].exp == exp)
3155 ;
3156 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3157 {
3158 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3159 if (exp2 == exp)
3160 ;
3161 else if (TREE_CODE (exp2) == PLUS_EXPR)
3162 {
3163 exp2 = TREE_OPERAND (exp2, 0);
3164 STRIP_NOPS (exp2);
3165 if (exp2 != exp)
3166 continue;
3167 }
3168 else
3169 continue;
3170 }
3171 else
3172 continue;
3173 lowj = ranges[j].low;
3174 if (lowj == NULL_TREE)
3175 continue;
3176 highj = ranges[j].high;
3177 if (highj == NULL_TREE)
3178 highj = TYPE_MAX_VALUE (type);
3179 wide_int mask2;
3180 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3181 highj, &mask2, NULL);
3182 if (exp2 != exp)
3183 continue;
3184 mask |= mask2;
3185 strict_overflow_p |= ranges[j].strict_overflow_p;
3186 candidates.safe_push (&ranges[j]);
3187 }
3188
3189 /* If every possible relative value of the expression is a valid shift
3190 amount, then we can merge the entry test in the bit test. In this
3191 case, if we would need otherwise 2 or more comparisons, then use
3192 the bit test; in the other cases, the threshold is 3 comparisons. */
3193 wide_int min, max;
3194 bool entry_test_needed;
3195 if (TREE_CODE (exp) == SSA_NAME
3196 && get_range_info (exp, &min, &max) == VR_RANGE
3197 && wi::leu_p (max - min, prec - 1))
3198 {
3199 wide_int ilowi = wi::to_wide (lowi);
3200 if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3201 {
3202 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3203 mask = wi::lshift (mask, ilowi - min);
3204 }
3205 else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3206 {
3207 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3208 mask = wi::lrshift (mask, min - ilowi);
3209 }
3210 entry_test_needed = false;
3211 }
3212 else
3213 entry_test_needed = true;
3214 if (candidates.length () >= (entry_test_needed ? 2 : 1))
3215 {
3216 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3217 wi::to_widest (lowi)
3218 + prec - 1 - wi::clz (mask));
3219 operand_entry *oe = (*ops)[ranges[i].idx];
3220 tree op = oe->op;
3221 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3222 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3223 location_t loc = gimple_location (stmt);
3224 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3225
3226 /* See if it isn't cheaper to pretend the minimum value of the
3227 range is 0, if maximum value is small enough.
3228 We can avoid then subtraction of the minimum value, but the
3229 mask constant could be perhaps more expensive. */
3230 if (compare_tree_int (lowi, 0) > 0
3231 && compare_tree_int (high, prec) < 0)
3232 {
3233 int cost_diff;
3234 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3235 rtx reg = gen_raw_REG (word_mode, 10000);
3236 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3237 cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3238 GEN_INT (-m)),
3239 word_mode, speed_p);
3240 rtx r = immed_wide_int_const (mask, word_mode);
3241 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3242 word_mode, speed_p);
3243 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3244 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3245 word_mode, speed_p);
3246 if (cost_diff > 0)
3247 {
3248 mask = wi::lshift (mask, m);
3249 lowi = build_zero_cst (TREE_TYPE (lowi));
3250 }
3251 }
3252
3253 tree tem;
3254 if (entry_test_needed)
3255 {
3256 tem = build_range_check (loc, optype, unshare_expr (exp),
3257 false, lowi, high);
3258 if (tem == NULL_TREE || is_gimple_val (tem))
3259 continue;
3260 }
3261 else
3262 tem = NULL_TREE;
3263 tree etype = unsigned_type_for (TREE_TYPE (exp));
3264 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3265 fold_convert_loc (loc, etype, exp),
3266 fold_convert_loc (loc, etype, lowi));
3267 exp = fold_convert_loc (loc, integer_type_node, exp);
3268 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3269 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3270 build_int_cst (word_type, 1), exp);
3271 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3272 wide_int_to_tree (word_type, mask));
3273 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3274 build_zero_cst (word_type));
3275 if (is_gimple_val (exp))
3276 continue;
3277
3278 /* The shift might have undefined behavior if TEM is true,
3279 but reassociate_bb isn't prepared to have basic blocks
3280 split when it is running. So, temporarily emit a code
3281 with BIT_IOR_EXPR instead of &&, and fix it up in
3282 branch_fixup. */
3283 gimple_seq seq = NULL;
3284 if (tem)
3285 {
3286 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3287 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3288 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3289 }
3290 gimple_seq seq2;
3291 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3292 gimple_seq_add_seq_without_update (&seq, seq2);
3293 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3294 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3295 if (tem)
3296 {
3297 gimple *g = gimple_build_assign (make_ssa_name (optype),
3298 BIT_IOR_EXPR, tem, exp);
3299 gimple_set_location (g, loc);
3300 gimple_seq_add_stmt_without_update (&seq, g);
3301 exp = gimple_assign_lhs (g);
3302 }
3303 tree val = build_zero_cst (optype);
3304 if (update_range_test (&ranges[i], NULL, candidates.address (),
3305 candidates.length (), opcode, ops, exp,
3306 seq, false, val, val, strict_overflow_p))
3307 {
3308 any_changes = true;
3309 if (tem)
3310 reassoc_branch_fixups.safe_push (tem);
3311 }
3312 else
3313 gimple_seq_discard (seq);
3314 }
3315 }
3316 return any_changes;
3317 }
3318
3319 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3320 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3321 Also, handle x < C && y < C && z < C where C is power of two as
3322 (x | y | z) < C. */
3323
3324 static bool
3325 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3326 vec<operand_entry *> *ops,
3327 struct range_entry *ranges)
3328 {
3329 int i;
3330 unsigned int b;
3331 bool any_changes = false;
3332 auto_vec<int, 128> buckets;
3333 auto_vec<int, 32> chains;
3334 auto_vec<struct range_entry *, 32> candidates;
3335
3336 for (i = first; i < length; i++)
3337 {
3338 int idx;
3339
3340 if (ranges[i].exp == NULL_TREE
3341 || TREE_CODE (ranges[i].exp) != SSA_NAME
3342 || !ranges[i].in_p
3343 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3344 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3345 continue;
3346
3347 if (ranges[i].low != NULL_TREE
3348 && ranges[i].high != NULL_TREE
3349 && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3350 {
3351 idx = !integer_zerop (ranges[i].low);
3352 if (idx && !integer_all_onesp (ranges[i].low))
3353 continue;
3354 }
3355 else if (ranges[i].high != NULL_TREE
3356 && TREE_CODE (ranges[i].high) == INTEGER_CST)
3357 {
3358 wide_int w = wi::to_wide (ranges[i].high);
3359 int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3360 int l = wi::clz (w);
3361 idx = 2;
3362 if (l <= 0
3363 || l >= prec
3364 || w != wi::mask (prec - l, false, prec))
3365 continue;
3366 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3367 && ranges[i].low == NULL_TREE)
3368 || (ranges[i].low
3369 && integer_zerop (ranges[i].low))))
3370 continue;
3371 }
3372 else
3373 continue;
3374
3375 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 3 + idx;
3376 if (buckets.length () <= b)
3377 buckets.safe_grow_cleared (b + 1, true);
3378 if (chains.length () <= (unsigned) i)
3379 chains.safe_grow (i + 1, true);
3380 chains[i] = buckets[b];
3381 buckets[b] = i + 1;
3382 }
3383
3384 FOR_EACH_VEC_ELT (buckets, b, i)
3385 if (i && chains[i - 1])
3386 {
3387 int j, k = i;
3388 if ((b % 3) == 2)
3389 {
3390 /* When ranges[X - 1].high + 1 is a power of two,
3391 we need to process the same bucket up to
3392 precision - 1 times, each time split the entries
3393 with the same high bound into one chain and the
3394 rest into another one to be processed later. */
3395 int this_prev = i;
3396 int other_prev = 0;
3397 for (j = chains[i - 1]; j; j = chains[j - 1])
3398 {
3399 if (tree_int_cst_equal (ranges[i - 1].high,
3400 ranges[j - 1].high))
3401 {
3402 chains[this_prev - 1] = j;
3403 this_prev = j;
3404 }
3405 else if (other_prev == 0)
3406 {
3407 buckets[b] = j;
3408 other_prev = j;
3409 }
3410 else
3411 {
3412 chains[other_prev - 1] = j;
3413 other_prev = j;
3414 }
3415 }
3416 chains[this_prev - 1] = 0;
3417 if (other_prev)
3418 chains[other_prev - 1] = 0;
3419 if (chains[i - 1] == 0)
3420 {
3421 if (other_prev)
3422 b--;
3423 continue;
3424 }
3425 }
3426 for (j = chains[i - 1]; j; j = chains[j - 1])
3427 {
3428 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3429 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3430 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3431 k = j;
3432 }
3433 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3434 tree type2 = NULL_TREE;
3435 bool strict_overflow_p = false;
3436 candidates.truncate (0);
3437 for (j = i; j; j = chains[j - 1])
3438 {
3439 tree type = TREE_TYPE (ranges[j - 1].exp);
3440 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3441 if (j == k
3442 || useless_type_conversion_p (type1, type))
3443 ;
3444 else if (type2 == NULL_TREE
3445 || useless_type_conversion_p (type2, type))
3446 {
3447 if (type2 == NULL_TREE)
3448 type2 = type;
3449 candidates.safe_push (&ranges[j - 1]);
3450 }
3451 }
3452 unsigned l = candidates.length ();
3453 for (j = i; j; j = chains[j - 1])
3454 {
3455 tree type = TREE_TYPE (ranges[j - 1].exp);
3456 if (j == k)
3457 continue;
3458 if (useless_type_conversion_p (type1, type))
3459 ;
3460 else if (type2 == NULL_TREE
3461 || useless_type_conversion_p (type2, type))
3462 continue;
3463 candidates.safe_push (&ranges[j - 1]);
3464 }
3465 gimple_seq seq = NULL;
3466 tree op = NULL_TREE;
3467 unsigned int id;
3468 struct range_entry *r;
3469 candidates.safe_push (&ranges[k - 1]);
3470 FOR_EACH_VEC_ELT (candidates, id, r)
3471 {
3472 gimple *g;
3473 if (id == 0)
3474 {
3475 op = r->exp;
3476 continue;
3477 }
3478 if (id == l)
3479 {
3480 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3481 gimple_seq_add_stmt_without_update (&seq, g);
3482 op = gimple_assign_lhs (g);
3483 }
3484 tree type = TREE_TYPE (r->exp);
3485 tree exp = r->exp;
3486 if (id >= l && !useless_type_conversion_p (type1, type))
3487 {
3488 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3489 gimple_seq_add_stmt_without_update (&seq, g);
3490 exp = gimple_assign_lhs (g);
3491 }
3492 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3493 (b % 3) == 1
3494 ? BIT_AND_EXPR : BIT_IOR_EXPR, op, exp);
3495 gimple_seq_add_stmt_without_update (&seq, g);
3496 op = gimple_assign_lhs (g);
3497 }
3498 candidates.pop ();
3499 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3500 candidates.length (), opcode, ops, op,
3501 seq, true, ranges[k - 1].low,
3502 ranges[k - 1].high, strict_overflow_p))
3503 any_changes = true;
3504 else
3505 gimple_seq_discard (seq);
3506 if ((b % 3) == 2 && buckets[b] != i)
3507 /* There is more work to do for this bucket. */
3508 b--;
3509 }
3510
3511 return any_changes;
3512 }
3513
3514 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3515 a >= 0 && a < b into (unsigned) a < (unsigned) b
3516 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3517
3518 static bool
3519 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3520 vec<operand_entry *> *ops,
3521 struct range_entry *ranges,
3522 basic_block first_bb)
3523 {
3524 int i;
3525 bool any_changes = false;
3526 hash_map<tree, int> *map = NULL;
3527
3528 for (i = first; i < length; i++)
3529 {
3530 if (ranges[i].exp == NULL_TREE
3531 || TREE_CODE (ranges[i].exp) != SSA_NAME
3532 || !ranges[i].in_p)
3533 continue;
3534
3535 tree type = TREE_TYPE (ranges[i].exp);
3536 if (!INTEGRAL_TYPE_P (type)
3537 || TYPE_UNSIGNED (type)
3538 || ranges[i].low == NULL_TREE
3539 || !integer_zerop (ranges[i].low)
3540 || ranges[i].high != NULL_TREE)
3541 continue;
3542 /* EXP >= 0 here. */
3543 if (map == NULL)
3544 map = new hash_map <tree, int>;
3545 map->put (ranges[i].exp, i);
3546 }
3547
3548 if (map == NULL)
3549 return false;
3550
3551 for (i = 0; i < length; i++)
3552 {
3553 bool in_p = ranges[i].in_p;
3554 if (ranges[i].low == NULL_TREE
3555 || ranges[i].high == NULL_TREE)
3556 continue;
3557 if (!integer_zerop (ranges[i].low)
3558 || !integer_zerop (ranges[i].high))
3559 {
3560 if (ranges[i].exp
3561 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3562 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3563 && integer_onep (ranges[i].low)
3564 && integer_onep (ranges[i].high))
3565 in_p = !in_p;
3566 else
3567 continue;
3568 }
3569
3570 gimple *stmt;
3571 tree_code ccode;
3572 tree rhs1, rhs2;
3573 if (ranges[i].exp)
3574 {
3575 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3576 continue;
3577 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3578 if (!is_gimple_assign (stmt))
3579 continue;
3580 ccode = gimple_assign_rhs_code (stmt);
3581 rhs1 = gimple_assign_rhs1 (stmt);
3582 rhs2 = gimple_assign_rhs2 (stmt);
3583 }
3584 else
3585 {
3586 operand_entry *oe = (*ops)[ranges[i].idx];
3587 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3588 if (gimple_code (stmt) != GIMPLE_COND)
3589 continue;
3590 ccode = gimple_cond_code (stmt);
3591 rhs1 = gimple_cond_lhs (stmt);
3592 rhs2 = gimple_cond_rhs (stmt);
3593 }
3594
3595 if (TREE_CODE (rhs1) != SSA_NAME
3596 || rhs2 == NULL_TREE
3597 || TREE_CODE (rhs2) != SSA_NAME)
3598 continue;
3599
3600 switch (ccode)
3601 {
3602 case GT_EXPR:
3603 case GE_EXPR:
3604 case LT_EXPR:
3605 case LE_EXPR:
3606 break;
3607 default:
3608 continue;
3609 }
3610 if (in_p)
3611 ccode = invert_tree_comparison (ccode, false);
3612 switch (ccode)
3613 {
3614 case GT_EXPR:
3615 case GE_EXPR:
3616 std::swap (rhs1, rhs2);
3617 ccode = swap_tree_comparison (ccode);
3618 break;
3619 case LT_EXPR:
3620 case LE_EXPR:
3621 break;
3622 default:
3623 gcc_unreachable ();
3624 }
3625
3626 int *idx = map->get (rhs1);
3627 if (idx == NULL)
3628 continue;
3629
3630 /* maybe_optimize_range_tests allows statements without side-effects
3631 in the basic blocks as long as they are consumed in the same bb.
3632 Make sure rhs2's def stmt is not among them, otherwise we can't
3633 use safely get_nonzero_bits on it. E.g. in:
3634 # RANGE [-83, 1] NONZERO 173
3635 # k_32 = PHI <k_47(13), k_12(9)>
3636 ...
3637 if (k_32 >= 0)
3638 goto <bb 5>; [26.46%]
3639 else
3640 goto <bb 9>; [73.54%]
3641
3642 <bb 5> [local count: 140323371]:
3643 # RANGE [0, 1] NONZERO 1
3644 _5 = (int) k_32;
3645 # RANGE [0, 4] NONZERO 4
3646 _21 = _5 << 2;
3647 # RANGE [0, 4] NONZERO 4
3648 iftmp.0_44 = (char) _21;
3649 if (k_32 < iftmp.0_44)
3650 goto <bb 6>; [84.48%]
3651 else
3652 goto <bb 9>; [15.52%]
3653 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3654 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3655 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3656 those stmts even for negative k_32 and the value ranges would be no
3657 longer guaranteed and so the optimization would be invalid. */
3658 while (opcode == ERROR_MARK)
3659 {
3660 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3661 basic_block bb2 = gimple_bb (g);
3662 if (bb2
3663 && bb2 != first_bb
3664 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3665 {
3666 /* As an exception, handle a few common cases. */
3667 if (gimple_assign_cast_p (g)
3668 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3669 {
3670 tree op0 = gimple_assign_rhs1 (g);
3671 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3672 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3673 > TYPE_PRECISION (TREE_TYPE (op0))))
3674 /* Zero-extension is always ok. */
3675 break;
3676 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3677 == TYPE_PRECISION (TREE_TYPE (op0))
3678 && TREE_CODE (op0) == SSA_NAME)
3679 {
3680 /* Cast from signed to unsigned or vice versa. Retry
3681 with the op0 as new rhs2. */
3682 rhs2 = op0;
3683 continue;
3684 }
3685 }
3686 else if (is_gimple_assign (g)
3687 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3688 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3689 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3690 /* Masking with INTEGER_CST with MSB clear is always ok
3691 too. */
3692 break;
3693 rhs2 = NULL_TREE;
3694 }
3695 break;
3696 }
3697 if (rhs2 == NULL_TREE)
3698 continue;
3699
3700 wide_int nz = get_nonzero_bits (rhs2);
3701 if (wi::neg_p (nz))
3702 continue;
3703
3704 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3705 and RHS2 is known to be RHS2 >= 0. */
3706 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3707
3708 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3709 if ((ranges[*idx].strict_overflow_p
3710 || ranges[i].strict_overflow_p)
3711 && issue_strict_overflow_warning (wc))
3712 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3713 "assuming signed overflow does not occur "
3714 "when simplifying range test");
3715
3716 if (dump_file && (dump_flags & TDF_DETAILS))
3717 {
3718 struct range_entry *r = &ranges[*idx];
3719 fprintf (dump_file, "Optimizing range test ");
3720 print_generic_expr (dump_file, r->exp);
3721 fprintf (dump_file, " +[");
3722 print_generic_expr (dump_file, r->low);
3723 fprintf (dump_file, ", ");
3724 print_generic_expr (dump_file, r->high);
3725 fprintf (dump_file, "] and comparison ");
3726 print_generic_expr (dump_file, rhs1);
3727 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3728 print_generic_expr (dump_file, rhs2);
3729 fprintf (dump_file, "\n into (");
3730 print_generic_expr (dump_file, utype);
3731 fprintf (dump_file, ") ");
3732 print_generic_expr (dump_file, rhs1);
3733 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3734 print_generic_expr (dump_file, utype);
3735 fprintf (dump_file, ") ");
3736 print_generic_expr (dump_file, rhs2);
3737 fprintf (dump_file, "\n");
3738 }
3739
3740 operand_entry *oe = (*ops)[ranges[i].idx];
3741 ranges[i].in_p = 0;
3742 if (opcode == BIT_IOR_EXPR
3743 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3744 {
3745 ranges[i].in_p = 1;
3746 ccode = invert_tree_comparison (ccode, false);
3747 }
3748
3749 unsigned int uid = gimple_uid (stmt);
3750 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3751 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3752 gimple_set_uid (g, uid);
3753 rhs1 = gimple_assign_lhs (g);
3754 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3755 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3756 {
3757 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3758 gimple_set_uid (g, uid);
3759 rhs2 = gimple_assign_lhs (g);
3760 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3761 }
3762 if (tree_swap_operands_p (rhs1, rhs2))
3763 {
3764 std::swap (rhs1, rhs2);
3765 ccode = swap_tree_comparison (ccode);
3766 }
3767 if (gimple_code (stmt) == GIMPLE_COND)
3768 {
3769 gcond *c = as_a <gcond *> (stmt);
3770 gimple_cond_set_code (c, ccode);
3771 gimple_cond_set_lhs (c, rhs1);
3772 gimple_cond_set_rhs (c, rhs2);
3773 update_stmt (stmt);
3774 }
3775 else
3776 {
3777 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3778 if (!INTEGRAL_TYPE_P (ctype)
3779 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3780 && TYPE_PRECISION (ctype) != 1))
3781 ctype = boolean_type_node;
3782 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3783 gimple_set_uid (g, uid);
3784 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3785 if (oe->op && ctype != TREE_TYPE (oe->op))
3786 {
3787 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3788 NOP_EXPR, gimple_assign_lhs (g));
3789 gimple_set_uid (g, uid);
3790 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3791 }
3792 ranges[i].exp = gimple_assign_lhs (g);
3793 oe->op = ranges[i].exp;
3794 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3795 ranges[i].high = ranges[i].low;
3796 }
3797 ranges[i].strict_overflow_p = false;
3798 oe = (*ops)[ranges[*idx].idx];
3799 /* Now change all the other range test immediate uses, so that
3800 those tests will be optimized away. */
3801 if (opcode == ERROR_MARK)
3802 {
3803 if (oe->op)
3804 oe->op = build_int_cst (TREE_TYPE (oe->op),
3805 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3806 else
3807 oe->op = (oe->rank == BIT_IOR_EXPR
3808 ? boolean_false_node : boolean_true_node);
3809 }
3810 else
3811 oe->op = error_mark_node;
3812 ranges[*idx].exp = NULL_TREE;
3813 ranges[*idx].low = NULL_TREE;
3814 ranges[*idx].high = NULL_TREE;
3815 any_changes = true;
3816 }
3817
3818 delete map;
3819 return any_changes;
3820 }
3821
3822 /* Optimize range tests, similarly how fold_range_test optimizes
3823 it on trees. The tree code for the binary
3824 operation between all the operands is OPCODE.
3825 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3826 maybe_optimize_range_tests for inter-bb range optimization.
3827 In that case if oe->op is NULL, oe->id is bb->index whose
3828 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3829 the actual opcode.
3830 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3831
3832 static bool
3833 optimize_range_tests (enum tree_code opcode,
3834 vec<operand_entry *> *ops, basic_block first_bb)
3835 {
3836 unsigned int length = ops->length (), i, j, first;
3837 operand_entry *oe;
3838 struct range_entry *ranges;
3839 bool any_changes = false;
3840
3841 if (length == 1)
3842 return false;
3843
3844 ranges = XNEWVEC (struct range_entry, length);
3845 for (i = 0; i < length; i++)
3846 {
3847 oe = (*ops)[i];
3848 ranges[i].idx = i;
3849 init_range_entry (ranges + i, oe->op,
3850 oe->op
3851 ? NULL
3852 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3853 /* For | invert it now, we will invert it again before emitting
3854 the optimized expression. */
3855 if (opcode == BIT_IOR_EXPR
3856 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3857 ranges[i].in_p = !ranges[i].in_p;
3858 }
3859
3860 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3861 for (i = 0; i < length; i++)
3862 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3863 break;
3864
3865 /* Try to merge ranges. */
3866 for (first = i; i < length; i++)
3867 {
3868 tree low = ranges[i].low;
3869 tree high = ranges[i].high;
3870 int in_p = ranges[i].in_p;
3871 bool strict_overflow_p = ranges[i].strict_overflow_p;
3872 int update_fail_count = 0;
3873
3874 for (j = i + 1; j < length; j++)
3875 {
3876 if (ranges[i].exp != ranges[j].exp)
3877 break;
3878 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3879 ranges[j].in_p, ranges[j].low, ranges[j].high))
3880 break;
3881 strict_overflow_p |= ranges[j].strict_overflow_p;
3882 }
3883
3884 if (j == i + 1)
3885 continue;
3886
3887 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3888 opcode, ops, ranges[i].exp, NULL, in_p,
3889 low, high, strict_overflow_p))
3890 {
3891 i = j - 1;
3892 any_changes = true;
3893 }
3894 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3895 while update_range_test would fail. */
3896 else if (update_fail_count == 64)
3897 i = j - 1;
3898 else
3899 ++update_fail_count;
3900 }
3901
3902 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3903 ops, ranges);
3904
3905 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3906 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3907 ops, ranges);
3908 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3909 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3910 ops, ranges);
3911 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3912 ops, ranges);
3913 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3914 ranges, first_bb);
3915
3916 if (any_changes && opcode != ERROR_MARK)
3917 {
3918 j = 0;
3919 FOR_EACH_VEC_ELT (*ops, i, oe)
3920 {
3921 if (oe->op == error_mark_node)
3922 continue;
3923 else if (i != j)
3924 (*ops)[j] = oe;
3925 j++;
3926 }
3927 ops->truncate (j);
3928 }
3929
3930 XDELETEVEC (ranges);
3931 return any_changes;
3932 }
3933
3934 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3935 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3936 otherwise the comparison code. TYPE is a return value that is set
3937 to type of comparison. */
3938
3939 static tree_code
3940 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
3941 tree *lhs, tree *rhs, gassign **vcond)
3942 {
3943 if (TREE_CODE (var) != SSA_NAME)
3944 return ERROR_MARK;
3945
3946 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3947 if (stmt == NULL)
3948 return ERROR_MARK;
3949 if (vcond)
3950 *vcond = stmt;
3951
3952 /* ??? If we start creating more COND_EXPR, we could perform
3953 this same optimization with them. For now, simplify. */
3954 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3955 return ERROR_MARK;
3956
3957 tree cond = gimple_assign_rhs1 (stmt);
3958 tree_code cmp = TREE_CODE (cond);
3959 if (cmp != SSA_NAME)
3960 return ERROR_MARK;
3961
3962 gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
3963 if (assign == NULL
3964 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
3965 return ERROR_MARK;
3966
3967 cmp = gimple_assign_rhs_code (assign);
3968 if (lhs)
3969 *lhs = gimple_assign_rhs1 (assign);
3970 if (rhs)
3971 *rhs = gimple_assign_rhs2 (assign);
3972
3973 /* ??? For now, allow only canonical true and false result vectors.
3974 We could expand this to other constants should the need arise,
3975 but at the moment we don't create them. */
3976 tree t = gimple_assign_rhs2 (stmt);
3977 tree f = gimple_assign_rhs3 (stmt);
3978 bool inv;
3979 if (integer_all_onesp (t))
3980 inv = false;
3981 else if (integer_all_onesp (f))
3982 {
3983 cmp = invert_tree_comparison (cmp, false);
3984 inv = true;
3985 }
3986 else
3987 return ERROR_MARK;
3988 if (!integer_zerop (f))
3989 return ERROR_MARK;
3990
3991 /* Success! */
3992 if (rets)
3993 *rets = assign;
3994 if (reti)
3995 *reti = inv;
3996 if (type)
3997 *type = TREE_TYPE (cond);
3998 return cmp;
3999 }
4000
4001 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4002 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4003
4004 static bool
4005 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4006 {
4007 unsigned int length = ops->length (), i, j;
4008 bool any_changes = false;
4009
4010 if (length == 1)
4011 return false;
4012
4013 for (i = 0; i < length; ++i)
4014 {
4015 tree elt0 = (*ops)[i]->op;
4016
4017 gassign *stmt0, *vcond0;
4018 bool invert;
4019 tree type, lhs0, rhs0;
4020 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4021 &rhs0, &vcond0);
4022 if (cmp0 == ERROR_MARK)
4023 continue;
4024
4025 for (j = i + 1; j < length; ++j)
4026 {
4027 tree &elt1 = (*ops)[j]->op;
4028
4029 gassign *stmt1, *vcond1;
4030 tree lhs1, rhs1;
4031 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4032 &rhs1, &vcond1);
4033 if (cmp1 == ERROR_MARK)
4034 continue;
4035
4036 tree comb;
4037 if (opcode == BIT_AND_EXPR)
4038 comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4039 cmp1, lhs1, rhs1);
4040 else if (opcode == BIT_IOR_EXPR)
4041 comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4042 cmp1, lhs1, rhs1);
4043 else
4044 gcc_unreachable ();
4045 if (comb == NULL)
4046 continue;
4047
4048 /* Success! */
4049 if (dump_file && (dump_flags & TDF_DETAILS))
4050 {
4051 fprintf (dump_file, "Transforming ");
4052 print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4053 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4054 print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4055 fprintf (dump_file, " into ");
4056 print_generic_expr (dump_file, comb);
4057 fputc ('\n', dump_file);
4058 }
4059
4060 gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4061 tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4062 true, GSI_SAME_STMT);
4063 if (invert)
4064 swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4065 gimple_assign_rhs3_ptr (vcond0));
4066 gimple_assign_set_rhs1 (vcond0, exp);
4067 update_stmt (vcond0);
4068
4069 elt1 = error_mark_node;
4070 any_changes = true;
4071 }
4072 }
4073
4074 if (any_changes)
4075 {
4076 operand_entry *oe;
4077 j = 0;
4078 FOR_EACH_VEC_ELT (*ops, i, oe)
4079 {
4080 if (oe->op == error_mark_node)
4081 continue;
4082 else if (i != j)
4083 (*ops)[j] = oe;
4084 j++;
4085 }
4086 ops->truncate (j);
4087 }
4088
4089 return any_changes;
4090 }
4091
4092 /* Return true if STMT is a cast like:
4093 <bb N>:
4094 ...
4095 _123 = (int) _234;
4096
4097 <bb M>:
4098 # _345 = PHI <_123(N), 1(...), 1(...)>
4099 where _234 has bool type, _123 has single use and
4100 bb N has a single successor M. This is commonly used in
4101 the last block of a range test.
4102
4103 Also Return true if STMT is tcc_compare like:
4104 <bb N>:
4105 ...
4106 _234 = a_2(D) == 2;
4107
4108 <bb M>:
4109 # _345 = PHI <_234(N), 1(...), 1(...)>
4110 _346 = (int) _345;
4111 where _234 has booltype, single use and
4112 bb N has a single successor M. This is commonly used in
4113 the last block of a range test. */
4114
4115 static bool
4116 final_range_test_p (gimple *stmt)
4117 {
4118 basic_block bb, rhs_bb, lhs_bb;
4119 edge e;
4120 tree lhs, rhs;
4121 use_operand_p use_p;
4122 gimple *use_stmt;
4123
4124 if (!gimple_assign_cast_p (stmt)
4125 && (!is_gimple_assign (stmt)
4126 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4127 != tcc_comparison)))
4128 return false;
4129 bb = gimple_bb (stmt);
4130 if (!single_succ_p (bb))
4131 return false;
4132 e = single_succ_edge (bb);
4133 if (e->flags & EDGE_COMPLEX)
4134 return false;
4135
4136 lhs = gimple_assign_lhs (stmt);
4137 rhs = gimple_assign_rhs1 (stmt);
4138 if (gimple_assign_cast_p (stmt)
4139 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4140 || TREE_CODE (rhs) != SSA_NAME
4141 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4142 return false;
4143
4144 if (!gimple_assign_cast_p (stmt)
4145 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4146 return false;
4147
4148 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4149 if (!single_imm_use (lhs, &use_p, &use_stmt))
4150 return false;
4151
4152 if (gimple_code (use_stmt) != GIMPLE_PHI
4153 || gimple_bb (use_stmt) != e->dest)
4154 return false;
4155
4156 /* And that the rhs is defined in the same loop. */
4157 if (gimple_assign_cast_p (stmt))
4158 {
4159 if (TREE_CODE (rhs) != SSA_NAME
4160 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4161 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4162 return false;
4163 }
4164 else
4165 {
4166 if (TREE_CODE (lhs) != SSA_NAME
4167 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4168 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4169 return false;
4170 }
4171
4172 return true;
4173 }
4174
4175 /* Return true if BB is suitable basic block for inter-bb range test
4176 optimization. If BACKWARD is true, BB should be the only predecessor
4177 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4178 or compared with to find a common basic block to which all conditions
4179 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4180 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4181 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4182 block points to an empty block that falls through into *OTHER_BB and
4183 the phi args match that path. */
4184
4185 static bool
4186 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4187 bool *test_swapped_p, bool backward)
4188 {
4189 edge_iterator ei, ei2;
4190 edge e, e2;
4191 gimple *stmt;
4192 gphi_iterator gsi;
4193 bool other_edge_seen = false;
4194 bool is_cond;
4195
4196 if (test_bb == bb)
4197 return false;
4198 /* Check last stmt first. */
4199 stmt = last_stmt (bb);
4200 if (stmt == NULL
4201 || (gimple_code (stmt) != GIMPLE_COND
4202 && (backward || !final_range_test_p (stmt)))
4203 || gimple_visited_p (stmt)
4204 || stmt_could_throw_p (cfun, stmt)
4205 || *other_bb == bb)
4206 return false;
4207 is_cond = gimple_code (stmt) == GIMPLE_COND;
4208 if (is_cond)
4209 {
4210 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4211 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4212 to *OTHER_BB (if not set yet, try to find it out). */
4213 if (EDGE_COUNT (bb->succs) != 2)
4214 return false;
4215 FOR_EACH_EDGE (e, ei, bb->succs)
4216 {
4217 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4218 return false;
4219 if (e->dest == test_bb)
4220 {
4221 if (backward)
4222 continue;
4223 else
4224 return false;
4225 }
4226 if (e->dest == bb)
4227 return false;
4228 if (*other_bb == NULL)
4229 {
4230 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4231 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4232 return false;
4233 else if (e->dest == e2->dest)
4234 *other_bb = e->dest;
4235 if (*other_bb == NULL)
4236 return false;
4237 }
4238 if (e->dest == *other_bb)
4239 other_edge_seen = true;
4240 else if (backward)
4241 return false;
4242 }
4243 if (*other_bb == NULL || !other_edge_seen)
4244 return false;
4245 }
4246 else if (single_succ (bb) != *other_bb)
4247 return false;
4248
4249 /* Now check all PHIs of *OTHER_BB. */
4250 e = find_edge (bb, *other_bb);
4251 e2 = find_edge (test_bb, *other_bb);
4252 retry:;
4253 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4254 {
4255 gphi *phi = gsi.phi ();
4256 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4257 corresponding to BB and TEST_BB predecessor must be the same. */
4258 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4259 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4260 {
4261 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4262 one of the PHIs should have the lhs of the last stmt in
4263 that block as PHI arg and that PHI should have 0 or 1
4264 corresponding to it in all other range test basic blocks
4265 considered. */
4266 if (!is_cond)
4267 {
4268 if (gimple_phi_arg_def (phi, e->dest_idx)
4269 == gimple_assign_lhs (stmt)
4270 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4271 || integer_onep (gimple_phi_arg_def (phi,
4272 e2->dest_idx))))
4273 continue;
4274 }
4275 else
4276 {
4277 gimple *test_last = last_stmt (test_bb);
4278 if (gimple_code (test_last) == GIMPLE_COND)
4279 {
4280 if (backward ? e2->src != test_bb : e->src != bb)
4281 return false;
4282
4283 /* For last_bb, handle also:
4284 if (x_3(D) == 3)
4285 goto <bb 6>; [34.00%]
4286 else
4287 goto <bb 7>; [66.00%]
4288
4289 <bb 6> [local count: 79512730]:
4290
4291 <bb 7> [local count: 1073741824]:
4292 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4293 where bb 7 is *OTHER_BB, but the PHI values from the
4294 earlier bbs match the path through the empty bb
4295 in between. */
4296 edge e3;
4297 if (backward)
4298 e3 = EDGE_SUCC (test_bb,
4299 e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4300 else
4301 e3 = EDGE_SUCC (bb,
4302 e == EDGE_SUCC (bb, 0) ? 1 : 0);
4303 if (empty_block_p (e3->dest)
4304 && single_succ_p (e3->dest)
4305 && single_succ (e3->dest) == *other_bb
4306 && single_pred_p (e3->dest)
4307 && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4308 {
4309 if (backward)
4310 e2 = single_succ_edge (e3->dest);
4311 else
4312 e = single_succ_edge (e3->dest);
4313 if (test_swapped_p)
4314 *test_swapped_p = true;
4315 goto retry;
4316 }
4317 }
4318 else if (gimple_phi_arg_def (phi, e2->dest_idx)
4319 == gimple_assign_lhs (test_last)
4320 && (integer_zerop (gimple_phi_arg_def (phi,
4321 e->dest_idx))
4322 || integer_onep (gimple_phi_arg_def (phi,
4323 e->dest_idx))))
4324 continue;
4325 }
4326
4327 return false;
4328 }
4329 }
4330 return true;
4331 }
4332
4333 /* Return true if BB doesn't have side-effects that would disallow
4334 range test optimization, all SSA_NAMEs set in the bb are consumed
4335 in the bb and there are no PHIs. */
4336
4337 bool
4338 no_side_effect_bb (basic_block bb)
4339 {
4340 gimple_stmt_iterator gsi;
4341 gimple *last;
4342
4343 if (!gimple_seq_empty_p (phi_nodes (bb)))
4344 return false;
4345 last = last_stmt (bb);
4346 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4347 {
4348 gimple *stmt = gsi_stmt (gsi);
4349 tree lhs;
4350 imm_use_iterator imm_iter;
4351 use_operand_p use_p;
4352
4353 if (is_gimple_debug (stmt))
4354 continue;
4355 if (gimple_has_side_effects (stmt))
4356 return false;
4357 if (stmt == last)
4358 return true;
4359 if (!is_gimple_assign (stmt))
4360 return false;
4361 lhs = gimple_assign_lhs (stmt);
4362 if (TREE_CODE (lhs) != SSA_NAME)
4363 return false;
4364 if (gimple_assign_rhs_could_trap_p (stmt))
4365 return false;
4366 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4367 {
4368 gimple *use_stmt = USE_STMT (use_p);
4369 if (is_gimple_debug (use_stmt))
4370 continue;
4371 if (gimple_bb (use_stmt) != bb)
4372 return false;
4373 }
4374 }
4375 return false;
4376 }
4377
4378 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4379 return true and fill in *OPS recursively. */
4380
4381 static bool
4382 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4383 class loop *loop)
4384 {
4385 gimple *stmt = SSA_NAME_DEF_STMT (var);
4386 tree rhs[2];
4387 int i;
4388
4389 if (!is_reassociable_op (stmt, code, loop))
4390 return false;
4391
4392 rhs[0] = gimple_assign_rhs1 (stmt);
4393 rhs[1] = gimple_assign_rhs2 (stmt);
4394 gimple_set_visited (stmt, true);
4395 for (i = 0; i < 2; i++)
4396 if (TREE_CODE (rhs[i]) == SSA_NAME
4397 && !get_ops (rhs[i], code, ops, loop)
4398 && has_single_use (rhs[i]))
4399 {
4400 operand_entry *oe = operand_entry_pool.allocate ();
4401
4402 oe->op = rhs[i];
4403 oe->rank = code;
4404 oe->id = 0;
4405 oe->count = 1;
4406 oe->stmt_to_insert = NULL;
4407 ops->safe_push (oe);
4408 }
4409 return true;
4410 }
4411
4412 /* Find the ops that were added by get_ops starting from VAR, see if
4413 they were changed during update_range_test and if yes, create new
4414 stmts. */
4415
4416 static tree
4417 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
4418 unsigned int *pidx, class loop *loop)
4419 {
4420 gimple *stmt = SSA_NAME_DEF_STMT (var);
4421 tree rhs[4];
4422 int i;
4423
4424 if (!is_reassociable_op (stmt, code, loop))
4425 return NULL;
4426
4427 rhs[0] = gimple_assign_rhs1 (stmt);
4428 rhs[1] = gimple_assign_rhs2 (stmt);
4429 rhs[2] = rhs[0];
4430 rhs[3] = rhs[1];
4431 for (i = 0; i < 2; i++)
4432 if (TREE_CODE (rhs[i]) == SSA_NAME)
4433 {
4434 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4435 if (rhs[2 + i] == NULL_TREE)
4436 {
4437 if (has_single_use (rhs[i]))
4438 rhs[2 + i] = ops[(*pidx)++]->op;
4439 else
4440 rhs[2 + i] = rhs[i];
4441 }
4442 }
4443 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4444 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4445 {
4446 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4447 var = make_ssa_name (TREE_TYPE (var));
4448 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4449 rhs[2], rhs[3]);
4450 gimple_set_uid (g, gimple_uid (stmt));
4451 gimple_set_visited (g, true);
4452 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4453 }
4454 return var;
4455 }
4456
4457 /* Structure to track the initial value passed to get_ops and
4458 the range in the ops vector for each basic block. */
4459
4460 struct inter_bb_range_test_entry
4461 {
4462 tree op;
4463 unsigned int first_idx, last_idx;
4464 };
4465
4466 /* Inter-bb range test optimization.
4467
4468 Returns TRUE if a gimple conditional is optimized to a true/false,
4469 otherwise return FALSE.
4470
4471 This indicates to the caller that it should run a CFG cleanup pass
4472 once reassociation is completed. */
4473
4474 static bool
4475 maybe_optimize_range_tests (gimple *stmt)
4476 {
4477 basic_block first_bb = gimple_bb (stmt);
4478 basic_block last_bb = first_bb;
4479 basic_block other_bb = NULL;
4480 basic_block bb;
4481 edge_iterator ei;
4482 edge e;
4483 auto_vec<operand_entry *> ops;
4484 auto_vec<inter_bb_range_test_entry> bbinfo;
4485 bool any_changes = false;
4486 bool cfg_cleanup_needed = false;
4487
4488 /* Consider only basic blocks that end with GIMPLE_COND or
4489 a cast statement satisfying final_range_test_p. All
4490 but the last bb in the first_bb .. last_bb range
4491 should end with GIMPLE_COND. */
4492 if (gimple_code (stmt) == GIMPLE_COND)
4493 {
4494 if (EDGE_COUNT (first_bb->succs) != 2)
4495 return cfg_cleanup_needed;
4496 }
4497 else if (final_range_test_p (stmt))
4498 other_bb = single_succ (first_bb);
4499 else
4500 return cfg_cleanup_needed;
4501
4502 if (stmt_could_throw_p (cfun, stmt))
4503 return cfg_cleanup_needed;
4504
4505 /* As relative ordering of post-dominator sons isn't fixed,
4506 maybe_optimize_range_tests can be called first on any
4507 bb in the range we want to optimize. So, start searching
4508 backwards, if first_bb can be set to a predecessor. */
4509 while (single_pred_p (first_bb))
4510 {
4511 basic_block pred_bb = single_pred (first_bb);
4512 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4513 break;
4514 if (!no_side_effect_bb (first_bb))
4515 break;
4516 first_bb = pred_bb;
4517 }
4518 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4519 Before starting forward search in last_bb successors, find
4520 out the other_bb. */
4521 if (first_bb == last_bb)
4522 {
4523 other_bb = NULL;
4524 /* As non-GIMPLE_COND last stmt always terminates the range,
4525 if forward search didn't discover anything, just give up. */
4526 if (gimple_code (stmt) != GIMPLE_COND)
4527 return cfg_cleanup_needed;
4528 /* Look at both successors. Either it ends with a GIMPLE_COND
4529 and satisfies suitable_cond_bb, or ends with a cast and
4530 other_bb is that cast's successor. */
4531 FOR_EACH_EDGE (e, ei, first_bb->succs)
4532 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4533 || e->dest == first_bb)
4534 return cfg_cleanup_needed;
4535 else if (single_pred_p (e->dest))
4536 {
4537 stmt = last_stmt (e->dest);
4538 if (stmt
4539 && gimple_code (stmt) == GIMPLE_COND
4540 && EDGE_COUNT (e->dest->succs) == 2)
4541 {
4542 if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4543 NULL, true))
4544 break;
4545 else
4546 other_bb = NULL;
4547 }
4548 else if (stmt
4549 && final_range_test_p (stmt)
4550 && find_edge (first_bb, single_succ (e->dest)))
4551 {
4552 other_bb = single_succ (e->dest);
4553 if (other_bb == first_bb)
4554 other_bb = NULL;
4555 }
4556 }
4557 if (other_bb == NULL)
4558 return cfg_cleanup_needed;
4559 }
4560 /* Now do the forward search, moving last_bb to successor bbs
4561 that aren't other_bb. */
4562 while (EDGE_COUNT (last_bb->succs) == 2)
4563 {
4564 FOR_EACH_EDGE (e, ei, last_bb->succs)
4565 if (e->dest != other_bb)
4566 break;
4567 if (e == NULL)
4568 break;
4569 if (!single_pred_p (e->dest))
4570 break;
4571 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4572 break;
4573 if (!no_side_effect_bb (e->dest))
4574 break;
4575 last_bb = e->dest;
4576 }
4577 if (first_bb == last_bb)
4578 return cfg_cleanup_needed;
4579 /* Here basic blocks first_bb through last_bb's predecessor
4580 end with GIMPLE_COND, all of them have one of the edges to
4581 other_bb and another to another block in the range,
4582 all blocks except first_bb don't have side-effects and
4583 last_bb ends with either GIMPLE_COND, or cast satisfying
4584 final_range_test_p. */
4585 for (bb = last_bb; ; bb = single_pred (bb))
4586 {
4587 enum tree_code code;
4588 tree lhs, rhs;
4589 inter_bb_range_test_entry bb_ent;
4590
4591 bb_ent.op = NULL_TREE;
4592 bb_ent.first_idx = ops.length ();
4593 bb_ent.last_idx = bb_ent.first_idx;
4594 e = find_edge (bb, other_bb);
4595 stmt = last_stmt (bb);
4596 gimple_set_visited (stmt, true);
4597 if (gimple_code (stmt) != GIMPLE_COND)
4598 {
4599 use_operand_p use_p;
4600 gimple *phi;
4601 edge e2;
4602 unsigned int d;
4603
4604 lhs = gimple_assign_lhs (stmt);
4605 rhs = gimple_assign_rhs1 (stmt);
4606 gcc_assert (bb == last_bb);
4607
4608 /* stmt is
4609 _123 = (int) _234;
4610 OR
4611 _234 = a_2(D) == 2;
4612
4613 followed by:
4614 <bb M>:
4615 # _345 = PHI <_123(N), 1(...), 1(...)>
4616
4617 or 0 instead of 1. If it is 0, the _234
4618 range test is anded together with all the
4619 other range tests, if it is 1, it is ored with
4620 them. */
4621 single_imm_use (lhs, &use_p, &phi);
4622 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4623 e2 = find_edge (first_bb, other_bb);
4624 d = e2->dest_idx;
4625 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4626 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4627 code = BIT_AND_EXPR;
4628 else
4629 {
4630 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4631 code = BIT_IOR_EXPR;
4632 }
4633
4634 /* If _234 SSA_NAME_DEF_STMT is
4635 _234 = _567 | _789;
4636 (or &, corresponding to 1/0 in the phi arguments,
4637 push into ops the individual range test arguments
4638 of the bitwise or resp. and, recursively. */
4639 if (TREE_CODE (rhs) == SSA_NAME
4640 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4641 != tcc_comparison)
4642 && !get_ops (rhs, code, &ops,
4643 loop_containing_stmt (stmt))
4644 && has_single_use (rhs))
4645 {
4646 /* Otherwise, push the _234 range test itself. */
4647 operand_entry *oe = operand_entry_pool.allocate ();
4648
4649 oe->op = rhs;
4650 oe->rank = code;
4651 oe->id = 0;
4652 oe->count = 1;
4653 oe->stmt_to_insert = NULL;
4654 ops.safe_push (oe);
4655 bb_ent.last_idx++;
4656 bb_ent.op = rhs;
4657 }
4658 else if (is_gimple_assign (stmt)
4659 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4660 == tcc_comparison)
4661 && !get_ops (lhs, code, &ops,
4662 loop_containing_stmt (stmt))
4663 && has_single_use (lhs))
4664 {
4665 operand_entry *oe = operand_entry_pool.allocate ();
4666 oe->op = lhs;
4667 oe->rank = code;
4668 oe->id = 0;
4669 oe->count = 1;
4670 ops.safe_push (oe);
4671 bb_ent.last_idx++;
4672 bb_ent.op = lhs;
4673 }
4674 else
4675 {
4676 bb_ent.last_idx = ops.length ();
4677 bb_ent.op = rhs;
4678 }
4679 bbinfo.safe_push (bb_ent);
4680 continue;
4681 }
4682 else if (bb == last_bb)
4683 {
4684 /* For last_bb, handle also:
4685 if (x_3(D) == 3)
4686 goto <bb 6>; [34.00%]
4687 else
4688 goto <bb 7>; [66.00%]
4689
4690 <bb 6> [local count: 79512730]:
4691
4692 <bb 7> [local count: 1073741824]:
4693 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4694 where bb 7 is OTHER_BB, but the PHI values from the
4695 earlier bbs match the path through the empty bb
4696 in between. */
4697 bool test_swapped_p = false;
4698 bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4699 &other_bb, &test_swapped_p, true);
4700 gcc_assert (ok);
4701 if (test_swapped_p)
4702 e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4703 }
4704 /* Otherwise stmt is GIMPLE_COND. */
4705 code = gimple_cond_code (stmt);
4706 lhs = gimple_cond_lhs (stmt);
4707 rhs = gimple_cond_rhs (stmt);
4708 if (TREE_CODE (lhs) == SSA_NAME
4709 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4710 && ((code != EQ_EXPR && code != NE_EXPR)
4711 || rhs != boolean_false_node
4712 /* Either push into ops the individual bitwise
4713 or resp. and operands, depending on which
4714 edge is other_bb. */
4715 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4716 ^ (code == EQ_EXPR))
4717 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4718 loop_containing_stmt (stmt))))
4719 {
4720 /* Or push the GIMPLE_COND stmt itself. */
4721 operand_entry *oe = operand_entry_pool.allocate ();
4722
4723 oe->op = NULL;
4724 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4725 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4726 /* oe->op = NULL signs that there is no SSA_NAME
4727 for the range test, and oe->id instead is the
4728 basic block number, at which's end the GIMPLE_COND
4729 is. */
4730 oe->id = bb->index;
4731 oe->count = 1;
4732 oe->stmt_to_insert = NULL;
4733 ops.safe_push (oe);
4734 bb_ent.op = NULL;
4735 bb_ent.last_idx++;
4736 }
4737 else if (ops.length () > bb_ent.first_idx)
4738 {
4739 bb_ent.op = lhs;
4740 bb_ent.last_idx = ops.length ();
4741 }
4742 bbinfo.safe_push (bb_ent);
4743 if (bb == first_bb)
4744 break;
4745 }
4746 if (ops.length () > 1)
4747 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4748 if (any_changes)
4749 {
4750 unsigned int idx, max_idx = 0;
4751 /* update_ops relies on has_single_use predicates returning the
4752 same values as it did during get_ops earlier. Additionally it
4753 never removes statements, only adds new ones and it should walk
4754 from the single imm use and check the predicate already before
4755 making those changes.
4756 On the other side, the handling of GIMPLE_COND directly can turn
4757 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4758 it needs to be done in a separate loop afterwards. */
4759 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4760 {
4761 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4762 && bbinfo[idx].op != NULL_TREE)
4763 {
4764 tree new_op;
4765
4766 max_idx = idx;
4767 stmt = last_stmt (bb);
4768 new_op = update_ops (bbinfo[idx].op,
4769 (enum tree_code)
4770 ops[bbinfo[idx].first_idx]->rank,
4771 ops, &bbinfo[idx].first_idx,
4772 loop_containing_stmt (stmt));
4773 if (new_op == NULL_TREE)
4774 {
4775 gcc_assert (bb == last_bb);
4776 new_op = ops[bbinfo[idx].first_idx++]->op;
4777 }
4778 if (bbinfo[idx].op != new_op)
4779 {
4780 imm_use_iterator iter;
4781 use_operand_p use_p;
4782 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4783
4784 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4785 if (is_gimple_debug (use_stmt))
4786 continue;
4787 else if (gimple_code (use_stmt) == GIMPLE_COND
4788 || gimple_code (use_stmt) == GIMPLE_PHI)
4789 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4790 SET_USE (use_p, new_op);
4791 else if ((is_gimple_assign (use_stmt)
4792 && (TREE_CODE_CLASS
4793 (gimple_assign_rhs_code (use_stmt))
4794 == tcc_comparison)))
4795 cast_or_tcc_cmp_stmt = use_stmt;
4796 else if (gimple_assign_cast_p (use_stmt))
4797 cast_or_tcc_cmp_stmt = use_stmt;
4798 else
4799 gcc_unreachable ();
4800
4801 if (cast_or_tcc_cmp_stmt)
4802 {
4803 gcc_assert (bb == last_bb);
4804 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4805 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4806 enum tree_code rhs_code
4807 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4808 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4809 : CONVERT_EXPR;
4810 gassign *g;
4811 if (is_gimple_min_invariant (new_op))
4812 {
4813 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4814 g = gimple_build_assign (new_lhs, new_op);
4815 }
4816 else
4817 g = gimple_build_assign (new_lhs, rhs_code, new_op);
4818 gimple_stmt_iterator gsi
4819 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4820 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4821 gimple_set_visited (g, true);
4822 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4823 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4824 if (is_gimple_debug (use_stmt))
4825 continue;
4826 else if (gimple_code (use_stmt) == GIMPLE_COND
4827 || gimple_code (use_stmt) == GIMPLE_PHI)
4828 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4829 SET_USE (use_p, new_lhs);
4830 else
4831 gcc_unreachable ();
4832 }
4833 }
4834 }
4835 if (bb == first_bb)
4836 break;
4837 }
4838 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4839 {
4840 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4841 && bbinfo[idx].op == NULL_TREE
4842 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4843 {
4844 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4845
4846 if (idx > max_idx)
4847 max_idx = idx;
4848
4849 /* If we collapse the conditional to a true/false
4850 condition, then bubble that knowledge up to our caller. */
4851 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4852 {
4853 gimple_cond_make_false (cond_stmt);
4854 cfg_cleanup_needed = true;
4855 }
4856 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4857 {
4858 gimple_cond_make_true (cond_stmt);
4859 cfg_cleanup_needed = true;
4860 }
4861 else
4862 {
4863 gimple_cond_set_code (cond_stmt, NE_EXPR);
4864 gimple_cond_set_lhs (cond_stmt,
4865 ops[bbinfo[idx].first_idx]->op);
4866 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4867 }
4868 update_stmt (cond_stmt);
4869 }
4870 if (bb == first_bb)
4871 break;
4872 }
4873
4874 /* The above changes could result in basic blocks after the first
4875 modified one, up to and including last_bb, to be executed even if
4876 they would not be in the original program. If the value ranges of
4877 assignment lhs' in those bbs were dependent on the conditions
4878 guarding those basic blocks which now can change, the VRs might
4879 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4880 are only used within the same bb, it should be not a big deal if
4881 we just reset all the VRs in those bbs. See PR68671. */
4882 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4883 reset_flow_sensitive_info_in_bb (bb);
4884 }
4885 return cfg_cleanup_needed;
4886 }
4887
4888 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4889 of STMT in it's operands. This is also known as a "destructive
4890 update" operation. */
4891
4892 static bool
4893 is_phi_for_stmt (gimple *stmt, tree operand)
4894 {
4895 gimple *def_stmt;
4896 gphi *def_phi;
4897 tree lhs;
4898 use_operand_p arg_p;
4899 ssa_op_iter i;
4900
4901 if (TREE_CODE (operand) != SSA_NAME)
4902 return false;
4903
4904 lhs = gimple_assign_lhs (stmt);
4905
4906 def_stmt = SSA_NAME_DEF_STMT (operand);
4907 def_phi = dyn_cast <gphi *> (def_stmt);
4908 if (!def_phi)
4909 return false;
4910
4911 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4912 if (lhs == USE_FROM_PTR (arg_p))
4913 return true;
4914 return false;
4915 }
4916
4917 /* Remove def stmt of VAR if VAR has zero uses and recurse
4918 on rhs1 operand if so. */
4919
4920 static void
4921 remove_visited_stmt_chain (tree var)
4922 {
4923 gimple *stmt;
4924 gimple_stmt_iterator gsi;
4925
4926 while (1)
4927 {
4928 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4929 return;
4930 stmt = SSA_NAME_DEF_STMT (var);
4931 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4932 {
4933 var = gimple_assign_rhs1 (stmt);
4934 gsi = gsi_for_stmt (stmt);
4935 reassoc_remove_stmt (&gsi);
4936 release_defs (stmt);
4937 }
4938 else
4939 return;
4940 }
4941 }
4942
4943 /* This function checks three consequtive operands in
4944 passed operands vector OPS starting from OPINDEX and
4945 swaps two operands if it is profitable for binary operation
4946 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4947
4948 We pair ops with the same rank if possible.
4949
4950 The alternative we try is to see if STMT is a destructive
4951 update style statement, which is like:
4952 b = phi (a, ...)
4953 a = c + b;
4954 In that case, we want to use the destructive update form to
4955 expose the possible vectorizer sum reduction opportunity.
4956 In that case, the third operand will be the phi node. This
4957 check is not performed if STMT is null.
4958
4959 We could, of course, try to be better as noted above, and do a
4960 lot of work to try to find these opportunities in >3 operand
4961 cases, but it is unlikely to be worth it. */
4962
4963 static void
4964 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4965 unsigned int opindex, gimple *stmt)
4966 {
4967 operand_entry *oe1, *oe2, *oe3;
4968
4969 oe1 = ops[opindex];
4970 oe2 = ops[opindex + 1];
4971 oe3 = ops[opindex + 2];
4972
4973 if ((oe1->rank == oe2->rank
4974 && oe2->rank != oe3->rank)
4975 || (stmt && is_phi_for_stmt (stmt, oe3->op)
4976 && !is_phi_for_stmt (stmt, oe1->op)
4977 && !is_phi_for_stmt (stmt, oe2->op)))
4978 std::swap (*oe1, *oe3);
4979 else if ((oe1->rank == oe3->rank
4980 && oe2->rank != oe3->rank)
4981 || (stmt && is_phi_for_stmt (stmt, oe2->op)
4982 && !is_phi_for_stmt (stmt, oe1->op)
4983 && !is_phi_for_stmt (stmt, oe3->op)))
4984 std::swap (*oe1, *oe2);
4985 }
4986
4987 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4988 two definitions, otherwise return STMT. */
4989
4990 static inline gimple *
4991 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4992 {
4993 if (TREE_CODE (rhs1) == SSA_NAME
4994 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4995 stmt = SSA_NAME_DEF_STMT (rhs1);
4996 if (TREE_CODE (rhs2) == SSA_NAME
4997 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4998 stmt = SSA_NAME_DEF_STMT (rhs2);
4999 return stmt;
5000 }
5001
5002 /* If the stmt that defines operand has to be inserted, insert it
5003 before the use. */
5004 static void
5005 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5006 {
5007 gcc_assert (is_gimple_assign (stmt_to_insert));
5008 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5009 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5010 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
5011 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5012 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5013
5014 /* If the insert point is not stmt, then insert_point would be
5015 the point where operand rhs1 or rhs2 is defined. In this case,
5016 stmt_to_insert has to be inserted afterwards. This would
5017 only happen when the stmt insertion point is flexible. */
5018 if (stmt == insert_point)
5019 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5020 else
5021 insert_stmt_after (stmt_to_insert, insert_point);
5022 }
5023
5024
5025 /* Recursively rewrite our linearized statements so that the operators
5026 match those in OPS[OPINDEX], putting the computation in rank
5027 order. Return new lhs.
5028 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5029 the current stmt and during recursive invocations.
5030 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5031 recursive invocations. */
5032
5033 static tree
5034 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5035 vec<operand_entry *> ops, bool changed, bool next_changed)
5036 {
5037 tree rhs1 = gimple_assign_rhs1 (stmt);
5038 tree rhs2 = gimple_assign_rhs2 (stmt);
5039 tree lhs = gimple_assign_lhs (stmt);
5040 operand_entry *oe;
5041
5042 /* The final recursion case for this function is that you have
5043 exactly two operations left.
5044 If we had exactly one op in the entire list to start with, we
5045 would have never called this function, and the tail recursion
5046 rewrites them one at a time. */
5047 if (opindex + 2 == ops.length ())
5048 {
5049 operand_entry *oe1, *oe2;
5050
5051 oe1 = ops[opindex];
5052 oe2 = ops[opindex + 1];
5053
5054 if (rhs1 != oe1->op || rhs2 != oe2->op)
5055 {
5056 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5057 unsigned int uid = gimple_uid (stmt);
5058
5059 if (dump_file && (dump_flags & TDF_DETAILS))
5060 {
5061 fprintf (dump_file, "Transforming ");
5062 print_gimple_stmt (dump_file, stmt, 0);
5063 }
5064
5065 /* If the stmt that defines operand has to be inserted, insert it
5066 before the use. */
5067 if (oe1->stmt_to_insert)
5068 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5069 if (oe2->stmt_to_insert)
5070 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5071 /* Even when changed is false, reassociation could have e.g. removed
5072 some redundant operations, so unless we are just swapping the
5073 arguments or unless there is no change at all (then we just
5074 return lhs), force creation of a new SSA_NAME. */
5075 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5076 {
5077 gimple *insert_point
5078 = find_insert_point (stmt, oe1->op, oe2->op);
5079 lhs = make_ssa_name (TREE_TYPE (lhs));
5080 stmt
5081 = gimple_build_assign (lhs, rhs_code,
5082 oe1->op, oe2->op);
5083 gimple_set_uid (stmt, uid);
5084 gimple_set_visited (stmt, true);
5085 if (insert_point == gsi_stmt (gsi))
5086 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5087 else
5088 insert_stmt_after (stmt, insert_point);
5089 }
5090 else
5091 {
5092 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
5093 == stmt);
5094 gimple_assign_set_rhs1 (stmt, oe1->op);
5095 gimple_assign_set_rhs2 (stmt, oe2->op);
5096 update_stmt (stmt);
5097 }
5098
5099 if (rhs1 != oe1->op && rhs1 != oe2->op)
5100 remove_visited_stmt_chain (rhs1);
5101
5102 if (dump_file && (dump_flags & TDF_DETAILS))
5103 {
5104 fprintf (dump_file, " into ");
5105 print_gimple_stmt (dump_file, stmt, 0);
5106 }
5107 }
5108 return lhs;
5109 }
5110
5111 /* If we hit here, we should have 3 or more ops left. */
5112 gcc_assert (opindex + 2 < ops.length ());
5113
5114 /* Rewrite the next operator. */
5115 oe = ops[opindex];
5116
5117 /* If the stmt that defines operand has to be inserted, insert it
5118 before the use. */
5119 if (oe->stmt_to_insert)
5120 insert_stmt_before_use (stmt, oe->stmt_to_insert);
5121
5122 /* Recurse on the LHS of the binary operator, which is guaranteed to
5123 be the non-leaf side. */
5124 tree new_rhs1
5125 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5126 changed || oe->op != rhs2 || next_changed,
5127 false);
5128
5129 if (oe->op != rhs2 || new_rhs1 != rhs1)
5130 {
5131 if (dump_file && (dump_flags & TDF_DETAILS))
5132 {
5133 fprintf (dump_file, "Transforming ");
5134 print_gimple_stmt (dump_file, stmt, 0);
5135 }
5136
5137 /* If changed is false, this is either opindex == 0
5138 or all outer rhs2's were equal to corresponding oe->op,
5139 and powi_result is NULL.
5140 That means lhs is equivalent before and after reassociation.
5141 Otherwise ensure the old lhs SSA_NAME is not reused and
5142 create a new stmt as well, so that any debug stmts will be
5143 properly adjusted. */
5144 if (changed)
5145 {
5146 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5147 unsigned int uid = gimple_uid (stmt);
5148 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
5149
5150 lhs = make_ssa_name (TREE_TYPE (lhs));
5151 stmt = gimple_build_assign (lhs, rhs_code,
5152 new_rhs1, oe->op);
5153 gimple_set_uid (stmt, uid);
5154 gimple_set_visited (stmt, true);
5155 if (insert_point == gsi_stmt (gsi))
5156 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5157 else
5158 insert_stmt_after (stmt, insert_point);
5159 }
5160 else
5161 {
5162 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
5163 == stmt);
5164 gimple_assign_set_rhs1 (stmt, new_rhs1);
5165 gimple_assign_set_rhs2 (stmt, oe->op);
5166 update_stmt (stmt);
5167 }
5168
5169 if (dump_file && (dump_flags & TDF_DETAILS))
5170 {
5171 fprintf (dump_file, " into ");
5172 print_gimple_stmt (dump_file, stmt, 0);
5173 }
5174 }
5175 return lhs;
5176 }
5177
5178 /* Find out how many cycles we need to compute statements chain.
5179 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5180 maximum number of independent statements we may execute per cycle. */
5181
5182 static int
5183 get_required_cycles (int ops_num, int cpu_width)
5184 {
5185 int res;
5186 int elog;
5187 unsigned int rest;
5188
5189 /* While we have more than 2 * cpu_width operands
5190 we may reduce number of operands by cpu_width
5191 per cycle. */
5192 res = ops_num / (2 * cpu_width);
5193
5194 /* Remained operands count may be reduced twice per cycle
5195 until we have only one operand. */
5196 rest = (unsigned)(ops_num - res * cpu_width);
5197 elog = exact_log2 (rest);
5198 if (elog >= 0)
5199 res += elog;
5200 else
5201 res += floor_log2 (rest) + 1;
5202
5203 return res;
5204 }
5205
5206 /* Returns an optimal number of registers to use for computation of
5207 given statements. */
5208
5209 static int
5210 get_reassociation_width (int ops_num, enum tree_code opc,
5211 machine_mode mode)
5212 {
5213 int param_width = param_tree_reassoc_width;
5214 int width;
5215 int width_min;
5216 int cycles_best;
5217
5218 if (param_width > 0)
5219 width = param_width;
5220 else
5221 width = targetm.sched.reassociation_width (opc, mode);
5222
5223 if (width == 1)
5224 return width;
5225
5226 /* Get the minimal time required for sequence computation. */
5227 cycles_best = get_required_cycles (ops_num, width);
5228
5229 /* Check if we may use less width and still compute sequence for
5230 the same time. It will allow us to reduce registers usage.
5231 get_required_cycles is monotonically increasing with lower width
5232 so we can perform a binary search for the minimal width that still
5233 results in the optimal cycle count. */
5234 width_min = 1;
5235 while (width > width_min)
5236 {
5237 int width_mid = (width + width_min) / 2;
5238
5239 if (get_required_cycles (ops_num, width_mid) == cycles_best)
5240 width = width_mid;
5241 else if (width_min < width_mid)
5242 width_min = width_mid;
5243 else
5244 break;
5245 }
5246
5247 return width;
5248 }
5249
5250 /* Recursively rewrite our linearized statements so that the operators
5251 match those in OPS[OPINDEX], putting the computation in rank
5252 order and trying to allow operations to be executed in
5253 parallel. */
5254
5255 static void
5256 rewrite_expr_tree_parallel (gassign *stmt, int width,
5257 vec<operand_entry *> ops)
5258 {
5259 enum tree_code opcode = gimple_assign_rhs_code (stmt);
5260 int op_num = ops.length ();
5261 gcc_assert (op_num > 0);
5262 int stmt_num = op_num - 1;
5263 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5264 int op_index = op_num - 1;
5265 int stmt_index = 0;
5266 int ready_stmts_end = 0;
5267 int i = 0;
5268 gimple *stmt1 = NULL, *stmt2 = NULL;
5269 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5270
5271 /* We start expression rewriting from the top statements.
5272 So, in this loop we create a full list of statements
5273 we will work with. */
5274 stmts[stmt_num - 1] = stmt;
5275 for (i = stmt_num - 2; i >= 0; i--)
5276 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5277
5278 for (i = 0; i < stmt_num; i++)
5279 {
5280 tree op1, op2;
5281
5282 /* Determine whether we should use results of
5283 already handled statements or not. */
5284 if (ready_stmts_end == 0
5285 && (i - stmt_index >= width || op_index < 1))
5286 ready_stmts_end = i;
5287
5288 /* Now we choose operands for the next statement. Non zero
5289 value in ready_stmts_end means here that we should use
5290 the result of already generated statements as new operand. */
5291 if (ready_stmts_end > 0)
5292 {
5293 op1 = gimple_assign_lhs (stmts[stmt_index++]);
5294 if (ready_stmts_end > stmt_index)
5295 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5296 else if (op_index >= 0)
5297 {
5298 operand_entry *oe = ops[op_index--];
5299 stmt2 = oe->stmt_to_insert;
5300 op2 = oe->op;
5301 }
5302 else
5303 {
5304 gcc_assert (stmt_index < i);
5305 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5306 }
5307
5308 if (stmt_index >= ready_stmts_end)
5309 ready_stmts_end = 0;
5310 }
5311 else
5312 {
5313 if (op_index > 1)
5314 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
5315 operand_entry *oe2 = ops[op_index--];
5316 operand_entry *oe1 = ops[op_index--];
5317 op2 = oe2->op;
5318 stmt2 = oe2->stmt_to_insert;
5319 op1 = oe1->op;
5320 stmt1 = oe1->stmt_to_insert;
5321 }
5322
5323 /* If we emit the last statement then we should put
5324 operands into the last statement. It will also
5325 break the loop. */
5326 if (op_index < 0 && stmt_index == i)
5327 i = stmt_num - 1;
5328
5329 if (dump_file && (dump_flags & TDF_DETAILS))
5330 {
5331 fprintf (dump_file, "Transforming ");
5332 print_gimple_stmt (dump_file, stmts[i], 0);
5333 }
5334
5335 /* If the stmt that defines operand has to be inserted, insert it
5336 before the use. */
5337 if (stmt1)
5338 insert_stmt_before_use (stmts[i], stmt1);
5339 if (stmt2)
5340 insert_stmt_before_use (stmts[i], stmt2);
5341 stmt1 = stmt2 = NULL;
5342
5343 /* We keep original statement only for the last one. All
5344 others are recreated. */
5345 if (i == stmt_num - 1)
5346 {
5347 gimple_assign_set_rhs1 (stmts[i], op1);
5348 gimple_assign_set_rhs2 (stmts[i], op2);
5349 update_stmt (stmts[i]);
5350 }
5351 else
5352 {
5353 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5354 gimple_set_visited (stmts[i], true);
5355 }
5356 if (dump_file && (dump_flags & TDF_DETAILS))
5357 {
5358 fprintf (dump_file, " into ");
5359 print_gimple_stmt (dump_file, stmts[i], 0);
5360 }
5361 }
5362
5363 remove_visited_stmt_chain (last_rhs1);
5364 }
5365
5366 /* Transform STMT, which is really (A +B) + (C + D) into the left
5367 linear form, ((A+B)+C)+D.
5368 Recurse on D if necessary. */
5369
5370 static void
5371 linearize_expr (gimple *stmt)
5372 {
5373 gimple_stmt_iterator gsi;
5374 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5375 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5376 gimple *oldbinrhs = binrhs;
5377 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5378 gimple *newbinrhs = NULL;
5379 class loop *loop = loop_containing_stmt (stmt);
5380 tree lhs = gimple_assign_lhs (stmt);
5381
5382 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5383 && is_reassociable_op (binrhs, rhscode, loop));
5384
5385 gsi = gsi_for_stmt (stmt);
5386
5387 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5388 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5389 gimple_assign_rhs_code (binrhs),
5390 gimple_assign_lhs (binlhs),
5391 gimple_assign_rhs2 (binrhs));
5392 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5393 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5394 gimple_set_uid (binrhs, gimple_uid (stmt));
5395
5396 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5397 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5398
5399 if (dump_file && (dump_flags & TDF_DETAILS))
5400 {
5401 fprintf (dump_file, "Linearized: ");
5402 print_gimple_stmt (dump_file, stmt, 0);
5403 }
5404
5405 reassociate_stats.linearized++;
5406 update_stmt (stmt);
5407
5408 gsi = gsi_for_stmt (oldbinrhs);
5409 reassoc_remove_stmt (&gsi);
5410 release_defs (oldbinrhs);
5411
5412 gimple_set_visited (stmt, true);
5413 gimple_set_visited (binlhs, true);
5414 gimple_set_visited (binrhs, true);
5415
5416 /* Tail recurse on the new rhs if it still needs reassociation. */
5417 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5418 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5419 want to change the algorithm while converting to tuples. */
5420 linearize_expr (stmt);
5421 }
5422
5423 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5424 it. Otherwise, return NULL. */
5425
5426 static gimple *
5427 get_single_immediate_use (tree lhs)
5428 {
5429 use_operand_p immuse;
5430 gimple *immusestmt;
5431
5432 if (TREE_CODE (lhs) == SSA_NAME
5433 && single_imm_use (lhs, &immuse, &immusestmt)
5434 && is_gimple_assign (immusestmt))
5435 return immusestmt;
5436
5437 return NULL;
5438 }
5439
5440 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5441 representing the negated value. Insertions of any necessary
5442 instructions go before GSI.
5443 This function is recursive in that, if you hand it "a_5" as the
5444 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5445 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5446
5447 static tree
5448 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5449 {
5450 gimple *negatedefstmt = NULL;
5451 tree resultofnegate;
5452 gimple_stmt_iterator gsi;
5453 unsigned int uid;
5454
5455 /* If we are trying to negate a name, defined by an add, negate the
5456 add operands instead. */
5457 if (TREE_CODE (tonegate) == SSA_NAME)
5458 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5459 if (TREE_CODE (tonegate) == SSA_NAME
5460 && is_gimple_assign (negatedefstmt)
5461 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5462 && has_single_use (gimple_assign_lhs (negatedefstmt))
5463 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5464 {
5465 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5466 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5467 tree lhs = gimple_assign_lhs (negatedefstmt);
5468 gimple *g;
5469
5470 gsi = gsi_for_stmt (negatedefstmt);
5471 rhs1 = negate_value (rhs1, &gsi);
5472
5473 gsi = gsi_for_stmt (negatedefstmt);
5474 rhs2 = negate_value (rhs2, &gsi);
5475
5476 gsi = gsi_for_stmt (negatedefstmt);
5477 lhs = make_ssa_name (TREE_TYPE (lhs));
5478 gimple_set_visited (negatedefstmt, true);
5479 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5480 gimple_set_uid (g, gimple_uid (negatedefstmt));
5481 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5482 return lhs;
5483 }
5484
5485 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5486 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5487 NULL_TREE, true, GSI_SAME_STMT);
5488 gsi = *gsip;
5489 uid = gimple_uid (gsi_stmt (gsi));
5490 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5491 {
5492 gimple *stmt = gsi_stmt (gsi);
5493 if (gimple_uid (stmt) != 0)
5494 break;
5495 gimple_set_uid (stmt, uid);
5496 }
5497 return resultofnegate;
5498 }
5499
5500 /* Return true if we should break up the subtract in STMT into an add
5501 with negate. This is true when we the subtract operands are really
5502 adds, or the subtract itself is used in an add expression. In
5503 either case, breaking up the subtract into an add with negate
5504 exposes the adds to reassociation. */
5505
5506 static bool
5507 should_break_up_subtract (gimple *stmt)
5508 {
5509 tree lhs = gimple_assign_lhs (stmt);
5510 tree binlhs = gimple_assign_rhs1 (stmt);
5511 tree binrhs = gimple_assign_rhs2 (stmt);
5512 gimple *immusestmt;
5513 class loop *loop = loop_containing_stmt (stmt);
5514
5515 if (TREE_CODE (binlhs) == SSA_NAME
5516 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5517 return true;
5518
5519 if (TREE_CODE (binrhs) == SSA_NAME
5520 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5521 return true;
5522
5523 if (TREE_CODE (lhs) == SSA_NAME
5524 && (immusestmt = get_single_immediate_use (lhs))
5525 && is_gimple_assign (immusestmt)
5526 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5527 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5528 && gimple_assign_rhs1 (immusestmt) == lhs)
5529 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5530 return true;
5531 return false;
5532 }
5533
5534 /* Transform STMT from A - B into A + -B. */
5535
5536 static void
5537 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5538 {
5539 tree rhs1 = gimple_assign_rhs1 (stmt);
5540 tree rhs2 = gimple_assign_rhs2 (stmt);
5541
5542 if (dump_file && (dump_flags & TDF_DETAILS))
5543 {
5544 fprintf (dump_file, "Breaking up subtract ");
5545 print_gimple_stmt (dump_file, stmt, 0);
5546 }
5547
5548 rhs2 = negate_value (rhs2, gsip);
5549 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5550 update_stmt (stmt);
5551 }
5552
5553 /* Determine whether STMT is a builtin call that raises an SSA name
5554 to an integer power and has only one use. If so, and this is early
5555 reassociation and unsafe math optimizations are permitted, place
5556 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5557 If any of these conditions does not hold, return FALSE. */
5558
5559 static bool
5560 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5561 {
5562 tree arg1;
5563 REAL_VALUE_TYPE c, cint;
5564
5565 switch (gimple_call_combined_fn (stmt))
5566 {
5567 CASE_CFN_POW:
5568 if (flag_errno_math)
5569 return false;
5570
5571 *base = gimple_call_arg (stmt, 0);
5572 arg1 = gimple_call_arg (stmt, 1);
5573
5574 if (TREE_CODE (arg1) != REAL_CST)
5575 return false;
5576
5577 c = TREE_REAL_CST (arg1);
5578
5579 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5580 return false;
5581
5582 *exponent = real_to_integer (&c);
5583 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5584 if (!real_identical (&c, &cint))
5585 return false;
5586
5587 break;
5588
5589 CASE_CFN_POWI:
5590 *base = gimple_call_arg (stmt, 0);
5591 arg1 = gimple_call_arg (stmt, 1);
5592
5593 if (!tree_fits_shwi_p (arg1))
5594 return false;
5595
5596 *exponent = tree_to_shwi (arg1);
5597 break;
5598
5599 default:
5600 return false;
5601 }
5602
5603 /* Expanding negative exponents is generally unproductive, so we don't
5604 complicate matters with those. Exponents of zero and one should
5605 have been handled by expression folding. */
5606 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5607 return false;
5608
5609 return true;
5610 }
5611
5612 /* Try to derive and add operand entry for OP to *OPS. Return false if
5613 unsuccessful. */
5614
5615 static bool
5616 try_special_add_to_ops (vec<operand_entry *> *ops,
5617 enum tree_code code,
5618 tree op, gimple* def_stmt)
5619 {
5620 tree base = NULL_TREE;
5621 HOST_WIDE_INT exponent = 0;
5622
5623 if (TREE_CODE (op) != SSA_NAME
5624 || ! has_single_use (op))
5625 return false;
5626
5627 if (code == MULT_EXPR
5628 && reassoc_insert_powi_p
5629 && flag_unsafe_math_optimizations
5630 && is_gimple_call (def_stmt)
5631 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5632 {
5633 add_repeat_to_ops_vec (ops, base, exponent);
5634 gimple_set_visited (def_stmt, true);
5635 return true;
5636 }
5637 else if (code == MULT_EXPR
5638 && is_gimple_assign (def_stmt)
5639 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5640 && !HONOR_SNANS (TREE_TYPE (op))
5641 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5642 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5643 {
5644 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5645 tree cst = build_minus_one_cst (TREE_TYPE (op));
5646 add_to_ops_vec (ops, rhs1);
5647 add_to_ops_vec (ops, cst);
5648 gimple_set_visited (def_stmt, true);
5649 return true;
5650 }
5651
5652 return false;
5653 }
5654
5655 /* Recursively linearize a binary expression that is the RHS of STMT.
5656 Place the operands of the expression tree in the vector named OPS. */
5657
5658 static void
5659 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5660 bool is_associative, bool set_visited)
5661 {
5662 tree binlhs = gimple_assign_rhs1 (stmt);
5663 tree binrhs = gimple_assign_rhs2 (stmt);
5664 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5665 bool binlhsisreassoc = false;
5666 bool binrhsisreassoc = false;
5667 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5668 class loop *loop = loop_containing_stmt (stmt);
5669
5670 if (set_visited)
5671 gimple_set_visited (stmt, true);
5672
5673 if (TREE_CODE (binlhs) == SSA_NAME)
5674 {
5675 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5676 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5677 && !stmt_could_throw_p (cfun, binlhsdef));
5678 }
5679
5680 if (TREE_CODE (binrhs) == SSA_NAME)
5681 {
5682 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5683 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5684 && !stmt_could_throw_p (cfun, binrhsdef));
5685 }
5686
5687 /* If the LHS is not reassociable, but the RHS is, we need to swap
5688 them. If neither is reassociable, there is nothing we can do, so
5689 just put them in the ops vector. If the LHS is reassociable,
5690 linearize it. If both are reassociable, then linearize the RHS
5691 and the LHS. */
5692
5693 if (!binlhsisreassoc)
5694 {
5695 /* If this is not a associative operation like division, give up. */
5696 if (!is_associative)
5697 {
5698 add_to_ops_vec (ops, binrhs);
5699 return;
5700 }
5701
5702 if (!binrhsisreassoc)
5703 {
5704 bool swap = false;
5705 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5706 /* If we add ops for the rhs we expect to be able to recurse
5707 to it via the lhs during expression rewrite so swap
5708 operands. */
5709 swap = true;
5710 else
5711 add_to_ops_vec (ops, binrhs);
5712
5713 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5714 add_to_ops_vec (ops, binlhs);
5715
5716 if (!swap)
5717 return;
5718 }
5719
5720 if (dump_file && (dump_flags & TDF_DETAILS))
5721 {
5722 fprintf (dump_file, "swapping operands of ");
5723 print_gimple_stmt (dump_file, stmt, 0);
5724 }
5725
5726 swap_ssa_operands (stmt,
5727 gimple_assign_rhs1_ptr (stmt),
5728 gimple_assign_rhs2_ptr (stmt));
5729 update_stmt (stmt);
5730
5731 if (dump_file && (dump_flags & TDF_DETAILS))
5732 {
5733 fprintf (dump_file, " is now ");
5734 print_gimple_stmt (dump_file, stmt, 0);
5735 }
5736 if (!binrhsisreassoc)
5737 return;
5738
5739 /* We want to make it so the lhs is always the reassociative op,
5740 so swap. */
5741 std::swap (binlhs, binrhs);
5742 }
5743 else if (binrhsisreassoc)
5744 {
5745 linearize_expr (stmt);
5746 binlhs = gimple_assign_rhs1 (stmt);
5747 binrhs = gimple_assign_rhs2 (stmt);
5748 }
5749
5750 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5751 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5752 rhscode, loop));
5753 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5754 is_associative, set_visited);
5755
5756 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5757 add_to_ops_vec (ops, binrhs);
5758 }
5759
5760 /* Repropagate the negates back into subtracts, since no other pass
5761 currently does it. */
5762
5763 static void
5764 repropagate_negates (void)
5765 {
5766 unsigned int i = 0;
5767 tree negate;
5768
5769 FOR_EACH_VEC_ELT (plus_negates, i, negate)
5770 {
5771 gimple *user = get_single_immediate_use (negate);
5772
5773 if (!user || !is_gimple_assign (user))
5774 continue;
5775
5776 /* The negate operand can be either operand of a PLUS_EXPR
5777 (it can be the LHS if the RHS is a constant for example).
5778
5779 Force the negate operand to the RHS of the PLUS_EXPR, then
5780 transform the PLUS_EXPR into a MINUS_EXPR. */
5781 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5782 {
5783 /* If the negated operand appears on the LHS of the
5784 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5785 to force the negated operand to the RHS of the PLUS_EXPR. */
5786 if (gimple_assign_rhs1 (user) == negate)
5787 {
5788 swap_ssa_operands (user,
5789 gimple_assign_rhs1_ptr (user),
5790 gimple_assign_rhs2_ptr (user));
5791 }
5792
5793 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5794 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5795 if (gimple_assign_rhs2 (user) == negate)
5796 {
5797 tree rhs1 = gimple_assign_rhs1 (user);
5798 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5799 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5800 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5801 update_stmt (user);
5802 }
5803 }
5804 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5805 {
5806 if (gimple_assign_rhs1 (user) == negate)
5807 {
5808 /* We have
5809 x = -a
5810 y = x - b
5811 which we transform into
5812 x = a + b
5813 y = -x .
5814 This pushes down the negate which we possibly can merge
5815 into some other operation, hence insert it into the
5816 plus_negates vector. */
5817 gimple *feed = SSA_NAME_DEF_STMT (negate);
5818 tree a = gimple_assign_rhs1 (feed);
5819 tree b = gimple_assign_rhs2 (user);
5820 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5821 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5822 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5823 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5824 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5825 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5826 user = gsi_stmt (gsi2);
5827 update_stmt (user);
5828 reassoc_remove_stmt (&gsi);
5829 release_defs (feed);
5830 plus_negates.safe_push (gimple_assign_lhs (user));
5831 }
5832 else
5833 {
5834 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5835 rid of one operation. */
5836 gimple *feed = SSA_NAME_DEF_STMT (negate);
5837 tree a = gimple_assign_rhs1 (feed);
5838 tree rhs1 = gimple_assign_rhs1 (user);
5839 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5840 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5841 update_stmt (gsi_stmt (gsi));
5842 }
5843 }
5844 }
5845 }
5846
5847 /* Returns true if OP is of a type for which we can do reassociation.
5848 That is for integral or non-saturating fixed-point types, and for
5849 floating point type when associative-math is enabled. */
5850
5851 static bool
5852 can_reassociate_p (tree op)
5853 {
5854 tree type = TREE_TYPE (op);
5855 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5856 return false;
5857 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5858 || NON_SAT_FIXED_POINT_TYPE_P (type)
5859 || (flag_associative_math && FLOAT_TYPE_P (type)))
5860 return true;
5861 return false;
5862 }
5863
5864 /* Break up subtract operations in block BB.
5865
5866 We do this top down because we don't know whether the subtract is
5867 part of a possible chain of reassociation except at the top.
5868
5869 IE given
5870 d = f + g
5871 c = a + e
5872 b = c - d
5873 q = b - r
5874 k = t - q
5875
5876 we want to break up k = t - q, but we won't until we've transformed q
5877 = b - r, which won't be broken up until we transform b = c - d.
5878
5879 En passant, clear the GIMPLE visited flag on every statement
5880 and set UIDs within each basic block. */
5881
5882 static void
5883 break_up_subtract_bb (basic_block bb)
5884 {
5885 gimple_stmt_iterator gsi;
5886 basic_block son;
5887 unsigned int uid = 1;
5888
5889 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5890 {
5891 gimple *stmt = gsi_stmt (gsi);
5892 gimple_set_visited (stmt, false);
5893 gimple_set_uid (stmt, uid++);
5894
5895 if (!is_gimple_assign (stmt)
5896 || !can_reassociate_p (gimple_assign_lhs (stmt)))
5897 continue;
5898
5899 /* Look for simple gimple subtract operations. */
5900 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5901 {
5902 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5903 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5904 continue;
5905
5906 /* Check for a subtract used only in an addition. If this
5907 is the case, transform it into add of a negate for better
5908 reassociation. IE transform C = A-B into C = A + -B if C
5909 is only used in an addition. */
5910 if (should_break_up_subtract (stmt))
5911 break_up_subtract (stmt, &gsi);
5912 }
5913 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5914 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5915 plus_negates.safe_push (gimple_assign_lhs (stmt));
5916 }
5917 for (son = first_dom_son (CDI_DOMINATORS, bb);
5918 son;
5919 son = next_dom_son (CDI_DOMINATORS, son))
5920 break_up_subtract_bb (son);
5921 }
5922
5923 /* Used for repeated factor analysis. */
5924 struct repeat_factor
5925 {
5926 /* An SSA name that occurs in a multiply chain. */
5927 tree factor;
5928
5929 /* Cached rank of the factor. */
5930 unsigned rank;
5931
5932 /* Number of occurrences of the factor in the chain. */
5933 HOST_WIDE_INT count;
5934
5935 /* An SSA name representing the product of this factor and
5936 all factors appearing later in the repeated factor vector. */
5937 tree repr;
5938 };
5939
5940
5941 static vec<repeat_factor> repeat_factor_vec;
5942
5943 /* Used for sorting the repeat factor vector. Sort primarily by
5944 ascending occurrence count, secondarily by descending rank. */
5945
5946 static int
5947 compare_repeat_factors (const void *x1, const void *x2)
5948 {
5949 const repeat_factor *rf1 = (const repeat_factor *) x1;
5950 const repeat_factor *rf2 = (const repeat_factor *) x2;
5951
5952 if (rf1->count != rf2->count)
5953 return rf1->count - rf2->count;
5954
5955 return rf2->rank - rf1->rank;
5956 }
5957
5958 /* Look for repeated operands in OPS in the multiply tree rooted at
5959 STMT. Replace them with an optimal sequence of multiplies and powi
5960 builtin calls, and remove the used operands from OPS. Return an
5961 SSA name representing the value of the replacement sequence. */
5962
5963 static tree
5964 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5965 {
5966 unsigned i, j, vec_len;
5967 int ii;
5968 operand_entry *oe;
5969 repeat_factor *rf1, *rf2;
5970 repeat_factor rfnew;
5971 tree result = NULL_TREE;
5972 tree target_ssa, iter_result;
5973 tree type = TREE_TYPE (gimple_get_lhs (stmt));
5974 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5975 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5976 gimple *mul_stmt, *pow_stmt;
5977
5978 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5979 target. */
5980 if (!powi_fndecl)
5981 return NULL_TREE;
5982
5983 /* Allocate the repeated factor vector. */
5984 repeat_factor_vec.create (10);
5985
5986 /* Scan the OPS vector for all SSA names in the product and build
5987 up a vector of occurrence counts for each factor. */
5988 FOR_EACH_VEC_ELT (*ops, i, oe)
5989 {
5990 if (TREE_CODE (oe->op) == SSA_NAME)
5991 {
5992 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5993 {
5994 if (rf1->factor == oe->op)
5995 {
5996 rf1->count += oe->count;
5997 break;
5998 }
5999 }
6000
6001 if (j >= repeat_factor_vec.length ())
6002 {
6003 rfnew.factor = oe->op;
6004 rfnew.rank = oe->rank;
6005 rfnew.count = oe->count;
6006 rfnew.repr = NULL_TREE;
6007 repeat_factor_vec.safe_push (rfnew);
6008 }
6009 }
6010 }
6011
6012 /* Sort the repeated factor vector by (a) increasing occurrence count,
6013 and (b) decreasing rank. */
6014 repeat_factor_vec.qsort (compare_repeat_factors);
6015
6016 /* It is generally best to combine as many base factors as possible
6017 into a product before applying __builtin_powi to the result.
6018 However, the sort order chosen for the repeated factor vector
6019 allows us to cache partial results for the product of the base
6020 factors for subsequent use. When we already have a cached partial
6021 result from a previous iteration, it is best to make use of it
6022 before looking for another __builtin_pow opportunity.
6023
6024 As an example, consider x * x * y * y * y * z * z * z * z.
6025 We want to first compose the product x * y * z, raise it to the
6026 second power, then multiply this by y * z, and finally multiply
6027 by z. This can be done in 5 multiplies provided we cache y * z
6028 for use in both expressions:
6029
6030 t1 = y * z
6031 t2 = t1 * x
6032 t3 = t2 * t2
6033 t4 = t1 * t3
6034 result = t4 * z
6035
6036 If we instead ignored the cached y * z and first multiplied by
6037 the __builtin_pow opportunity z * z, we would get the inferior:
6038
6039 t1 = y * z
6040 t2 = t1 * x
6041 t3 = t2 * t2
6042 t4 = z * z
6043 t5 = t3 * t4
6044 result = t5 * y */
6045
6046 vec_len = repeat_factor_vec.length ();
6047
6048 /* Repeatedly look for opportunities to create a builtin_powi call. */
6049 while (true)
6050 {
6051 HOST_WIDE_INT power;
6052
6053 /* First look for the largest cached product of factors from
6054 preceding iterations. If found, create a builtin_powi for
6055 it if the minimum occurrence count for its factors is at
6056 least 2, or just use this cached product as our next
6057 multiplicand if the minimum occurrence count is 1. */
6058 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6059 {
6060 if (rf1->repr && rf1->count > 0)
6061 break;
6062 }
6063
6064 if (j < vec_len)
6065 {
6066 power = rf1->count;
6067
6068 if (power == 1)
6069 {
6070 iter_result = rf1->repr;
6071
6072 if (dump_file && (dump_flags & TDF_DETAILS))
6073 {
6074 unsigned elt;
6075 repeat_factor *rf;
6076 fputs ("Multiplying by cached product ", dump_file);
6077 for (elt = j; elt < vec_len; elt++)
6078 {
6079 rf = &repeat_factor_vec[elt];
6080 print_generic_expr (dump_file, rf->factor);
6081 if (elt < vec_len - 1)
6082 fputs (" * ", dump_file);
6083 }
6084 fputs ("\n", dump_file);
6085 }
6086 }
6087 else
6088 {
6089 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6090 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6091 build_int_cst (integer_type_node,
6092 power));
6093 gimple_call_set_lhs (pow_stmt, iter_result);
6094 gimple_set_location (pow_stmt, gimple_location (stmt));
6095 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6096 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6097
6098 if (dump_file && (dump_flags & TDF_DETAILS))
6099 {
6100 unsigned elt;
6101 repeat_factor *rf;
6102 fputs ("Building __builtin_pow call for cached product (",
6103 dump_file);
6104 for (elt = j; elt < vec_len; elt++)
6105 {
6106 rf = &repeat_factor_vec[elt];
6107 print_generic_expr (dump_file, rf->factor);
6108 if (elt < vec_len - 1)
6109 fputs (" * ", dump_file);
6110 }
6111 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6112 power);
6113 }
6114 }
6115 }
6116 else
6117 {
6118 /* Otherwise, find the first factor in the repeated factor
6119 vector whose occurrence count is at least 2. If no such
6120 factor exists, there are no builtin_powi opportunities
6121 remaining. */
6122 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6123 {
6124 if (rf1->count >= 2)
6125 break;
6126 }
6127
6128 if (j >= vec_len)
6129 break;
6130
6131 power = rf1->count;
6132
6133 if (dump_file && (dump_flags & TDF_DETAILS))
6134 {
6135 unsigned elt;
6136 repeat_factor *rf;
6137 fputs ("Building __builtin_pow call for (", dump_file);
6138 for (elt = j; elt < vec_len; elt++)
6139 {
6140 rf = &repeat_factor_vec[elt];
6141 print_generic_expr (dump_file, rf->factor);
6142 if (elt < vec_len - 1)
6143 fputs (" * ", dump_file);
6144 }
6145 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6146 }
6147
6148 reassociate_stats.pows_created++;
6149
6150 /* Visit each element of the vector in reverse order (so that
6151 high-occurrence elements are visited first, and within the
6152 same occurrence count, lower-ranked elements are visited
6153 first). Form a linear product of all elements in this order
6154 whose occurrencce count is at least that of element J.
6155 Record the SSA name representing the product of each element
6156 with all subsequent elements in the vector. */
6157 if (j == vec_len - 1)
6158 rf1->repr = rf1->factor;
6159 else
6160 {
6161 for (ii = vec_len - 2; ii >= (int)j; ii--)
6162 {
6163 tree op1, op2;
6164
6165 rf1 = &repeat_factor_vec[ii];
6166 rf2 = &repeat_factor_vec[ii + 1];
6167
6168 /* Init the last factor's representative to be itself. */
6169 if (!rf2->repr)
6170 rf2->repr = rf2->factor;
6171
6172 op1 = rf1->factor;
6173 op2 = rf2->repr;
6174
6175 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6176 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6177 op1, op2);
6178 gimple_set_location (mul_stmt, gimple_location (stmt));
6179 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6180 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6181 rf1->repr = target_ssa;
6182
6183 /* Don't reprocess the multiply we just introduced. */
6184 gimple_set_visited (mul_stmt, true);
6185 }
6186 }
6187
6188 /* Form a call to __builtin_powi for the maximum product
6189 just formed, raised to the power obtained earlier. */
6190 rf1 = &repeat_factor_vec[j];
6191 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6192 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6193 build_int_cst (integer_type_node,
6194 power));
6195 gimple_call_set_lhs (pow_stmt, iter_result);
6196 gimple_set_location (pow_stmt, gimple_location (stmt));
6197 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6198 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6199 }
6200
6201 /* If we previously formed at least one other builtin_powi call,
6202 form the product of this one and those others. */
6203 if (result)
6204 {
6205 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6206 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6207 result, iter_result);
6208 gimple_set_location (mul_stmt, gimple_location (stmt));
6209 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6210 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6211 gimple_set_visited (mul_stmt, true);
6212 result = new_result;
6213 }
6214 else
6215 result = iter_result;
6216
6217 /* Decrement the occurrence count of each element in the product
6218 by the count found above, and remove this many copies of each
6219 factor from OPS. */
6220 for (i = j; i < vec_len; i++)
6221 {
6222 unsigned k = power;
6223 unsigned n;
6224
6225 rf1 = &repeat_factor_vec[i];
6226 rf1->count -= power;
6227
6228 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6229 {
6230 if (oe->op == rf1->factor)
6231 {
6232 if (oe->count <= k)
6233 {
6234 ops->ordered_remove (n);
6235 k -= oe->count;
6236
6237 if (k == 0)
6238 break;
6239 }
6240 else
6241 {
6242 oe->count -= k;
6243 break;
6244 }
6245 }
6246 }
6247 }
6248 }
6249
6250 /* At this point all elements in the repeated factor vector have a
6251 remaining occurrence count of 0 or 1, and those with a count of 1
6252 don't have cached representatives. Re-sort the ops vector and
6253 clean up. */
6254 ops->qsort (sort_by_operand_rank);
6255 repeat_factor_vec.release ();
6256
6257 /* Return the final product computed herein. Note that there may
6258 still be some elements with single occurrence count left in OPS;
6259 those will be handled by the normal reassociation logic. */
6260 return result;
6261 }
6262
6263 /* Attempt to optimize
6264 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6265 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6266
6267 static void
6268 attempt_builtin_copysign (vec<operand_entry *> *ops)
6269 {
6270 operand_entry *oe;
6271 unsigned int i;
6272 unsigned int length = ops->length ();
6273 tree cst = ops->last ()->op;
6274
6275 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6276 return;
6277
6278 FOR_EACH_VEC_ELT (*ops, i, oe)
6279 {
6280 if (TREE_CODE (oe->op) == SSA_NAME
6281 && has_single_use (oe->op))
6282 {
6283 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6284 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6285 {
6286 tree arg0, arg1;
6287 switch (gimple_call_combined_fn (old_call))
6288 {
6289 CASE_CFN_COPYSIGN:
6290 CASE_CFN_COPYSIGN_FN:
6291 arg0 = gimple_call_arg (old_call, 0);
6292 arg1 = gimple_call_arg (old_call, 1);
6293 /* The first argument of copysign must be a constant,
6294 otherwise there's nothing to do. */
6295 if (TREE_CODE (arg0) == REAL_CST)
6296 {
6297 tree type = TREE_TYPE (arg0);
6298 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6299 /* If we couldn't fold to a single constant, skip it.
6300 That happens e.g. for inexact multiplication when
6301 -frounding-math. */
6302 if (mul == NULL_TREE)
6303 break;
6304 /* Instead of adjusting OLD_CALL, let's build a new
6305 call to not leak the LHS and prevent keeping bogus
6306 debug statements. DCE will clean up the old call. */
6307 gcall *new_call;
6308 if (gimple_call_internal_p (old_call))
6309 new_call = gimple_build_call_internal
6310 (IFN_COPYSIGN, 2, mul, arg1);
6311 else
6312 new_call = gimple_build_call
6313 (gimple_call_fndecl (old_call), 2, mul, arg1);
6314 tree lhs = make_ssa_name (type);
6315 gimple_call_set_lhs (new_call, lhs);
6316 gimple_set_location (new_call,
6317 gimple_location (old_call));
6318 insert_stmt_after (new_call, old_call);
6319 /* We've used the constant, get rid of it. */
6320 ops->pop ();
6321 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6322 /* Handle the CST1 < 0 case by negating the result. */
6323 if (cst1_neg)
6324 {
6325 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6326 gimple *negate_stmt
6327 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6328 insert_stmt_after (negate_stmt, new_call);
6329 oe->op = negrhs;
6330 }
6331 else
6332 oe->op = lhs;
6333 if (dump_file && (dump_flags & TDF_DETAILS))
6334 {
6335 fprintf (dump_file, "Optimizing copysign: ");
6336 print_generic_expr (dump_file, cst);
6337 fprintf (dump_file, " * COPYSIGN (");
6338 print_generic_expr (dump_file, arg0);
6339 fprintf (dump_file, ", ");
6340 print_generic_expr (dump_file, arg1);
6341 fprintf (dump_file, ") into %sCOPYSIGN (",
6342 cst1_neg ? "-" : "");
6343 print_generic_expr (dump_file, mul);
6344 fprintf (dump_file, ", ");
6345 print_generic_expr (dump_file, arg1);
6346 fprintf (dump_file, "\n");
6347 }
6348 return;
6349 }
6350 break;
6351 default:
6352 break;
6353 }
6354 }
6355 }
6356 }
6357 }
6358
6359 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6360
6361 static void
6362 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6363 {
6364 tree rhs1;
6365
6366 if (dump_file && (dump_flags & TDF_DETAILS))
6367 {
6368 fprintf (dump_file, "Transforming ");
6369 print_gimple_stmt (dump_file, stmt, 0);
6370 }
6371
6372 rhs1 = gimple_assign_rhs1 (stmt);
6373 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6374 update_stmt (stmt);
6375 remove_visited_stmt_chain (rhs1);
6376
6377 if (dump_file && (dump_flags & TDF_DETAILS))
6378 {
6379 fprintf (dump_file, " into ");
6380 print_gimple_stmt (dump_file, stmt, 0);
6381 }
6382 }
6383
6384 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6385
6386 static void
6387 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6388 tree rhs1, tree rhs2)
6389 {
6390 if (dump_file && (dump_flags & TDF_DETAILS))
6391 {
6392 fprintf (dump_file, "Transforming ");
6393 print_gimple_stmt (dump_file, stmt, 0);
6394 }
6395
6396 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6397 update_stmt (gsi_stmt (*gsi));
6398 remove_visited_stmt_chain (rhs1);
6399
6400 if (dump_file && (dump_flags & TDF_DETAILS))
6401 {
6402 fprintf (dump_file, " into ");
6403 print_gimple_stmt (dump_file, stmt, 0);
6404 }
6405 }
6406
6407 /* Reassociate expressions in basic block BB and its post-dominator as
6408 children.
6409
6410 Bubble up return status from maybe_optimize_range_tests. */
6411
6412 static bool
6413 reassociate_bb (basic_block bb)
6414 {
6415 gimple_stmt_iterator gsi;
6416 basic_block son;
6417 gimple *stmt = last_stmt (bb);
6418 bool cfg_cleanup_needed = false;
6419
6420 if (stmt && !gimple_visited_p (stmt))
6421 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6422
6423 bool do_prev = false;
6424 for (gsi = gsi_last_bb (bb);
6425 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6426 {
6427 do_prev = true;
6428 stmt = gsi_stmt (gsi);
6429
6430 if (is_gimple_assign (stmt)
6431 && !stmt_could_throw_p (cfun, stmt))
6432 {
6433 tree lhs, rhs1, rhs2;
6434 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6435
6436 /* If this was part of an already processed statement,
6437 we don't need to touch it again. */
6438 if (gimple_visited_p (stmt))
6439 {
6440 /* This statement might have become dead because of previous
6441 reassociations. */
6442 if (has_zero_uses (gimple_get_lhs (stmt)))
6443 {
6444 reassoc_remove_stmt (&gsi);
6445 release_defs (stmt);
6446 /* We might end up removing the last stmt above which
6447 places the iterator to the end of the sequence.
6448 Reset it to the last stmt in this case and make sure
6449 we don't do gsi_prev in that case. */
6450 if (gsi_end_p (gsi))
6451 {
6452 gsi = gsi_last_bb (bb);
6453 do_prev = false;
6454 }
6455 }
6456 continue;
6457 }
6458
6459 /* If this is not a gimple binary expression, there is
6460 nothing for us to do with it. */
6461 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6462 continue;
6463
6464 lhs = gimple_assign_lhs (stmt);
6465 rhs1 = gimple_assign_rhs1 (stmt);
6466 rhs2 = gimple_assign_rhs2 (stmt);
6467
6468 /* For non-bit or min/max operations we can't associate
6469 all types. Verify that here. */
6470 if (rhs_code != BIT_IOR_EXPR
6471 && rhs_code != BIT_AND_EXPR
6472 && rhs_code != BIT_XOR_EXPR
6473 && rhs_code != MIN_EXPR
6474 && rhs_code != MAX_EXPR
6475 && (!can_reassociate_p (lhs)
6476 || !can_reassociate_p (rhs1)
6477 || !can_reassociate_p (rhs2)))
6478 continue;
6479
6480 if (associative_tree_code (rhs_code))
6481 {
6482 auto_vec<operand_entry *> ops;
6483 tree powi_result = NULL_TREE;
6484 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6485
6486 /* There may be no immediate uses left by the time we
6487 get here because we may have eliminated them all. */
6488 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6489 continue;
6490
6491 gimple_set_visited (stmt, true);
6492 linearize_expr_tree (&ops, stmt, true, true);
6493 ops.qsort (sort_by_operand_rank);
6494 int orig_len = ops.length ();
6495 optimize_ops_list (rhs_code, &ops);
6496 if (undistribute_ops_list (rhs_code, &ops,
6497 loop_containing_stmt (stmt)))
6498 {
6499 ops.qsort (sort_by_operand_rank);
6500 optimize_ops_list (rhs_code, &ops);
6501 }
6502 if (undistribute_bitref_for_vector (rhs_code, &ops,
6503 loop_containing_stmt (stmt)))
6504 {
6505 ops.qsort (sort_by_operand_rank);
6506 optimize_ops_list (rhs_code, &ops);
6507 }
6508 if (rhs_code == PLUS_EXPR
6509 && transform_add_to_multiply (&ops))
6510 ops.qsort (sort_by_operand_rank);
6511
6512 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6513 {
6514 if (is_vector)
6515 optimize_vec_cond_expr (rhs_code, &ops);
6516 else
6517 optimize_range_tests (rhs_code, &ops, NULL);
6518 }
6519
6520 if (rhs_code == MULT_EXPR && !is_vector)
6521 {
6522 attempt_builtin_copysign (&ops);
6523
6524 if (reassoc_insert_powi_p
6525 && flag_unsafe_math_optimizations)
6526 powi_result = attempt_builtin_powi (stmt, &ops);
6527 }
6528
6529 operand_entry *last;
6530 bool negate_result = false;
6531 if (ops.length () > 1
6532 && rhs_code == MULT_EXPR)
6533 {
6534 last = ops.last ();
6535 if ((integer_minus_onep (last->op)
6536 || real_minus_onep (last->op))
6537 && !HONOR_SNANS (TREE_TYPE (lhs))
6538 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6539 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6540 {
6541 ops.pop ();
6542 negate_result = true;
6543 }
6544 }
6545
6546 tree new_lhs = lhs;
6547 /* If the operand vector is now empty, all operands were
6548 consumed by the __builtin_powi optimization. */
6549 if (ops.length () == 0)
6550 transform_stmt_to_copy (&gsi, stmt, powi_result);
6551 else if (ops.length () == 1)
6552 {
6553 tree last_op = ops.last ()->op;
6554
6555 /* If the stmt that defines operand has to be inserted, insert it
6556 before the use. */
6557 if (ops.last ()->stmt_to_insert)
6558 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6559 if (powi_result)
6560 transform_stmt_to_multiply (&gsi, stmt, last_op,
6561 powi_result);
6562 else
6563 transform_stmt_to_copy (&gsi, stmt, last_op);
6564 }
6565 else
6566 {
6567 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6568 int ops_num = ops.length ();
6569 int width;
6570
6571 /* For binary bit operations, if there are at least 3
6572 operands and the last operand in OPS is a constant,
6573 move it to the front. This helps ensure that we generate
6574 (X & Y) & C rather than (X & C) & Y. The former will
6575 often match a canonical bit test when we get to RTL. */
6576 if (ops.length () > 2
6577 && (rhs_code == BIT_AND_EXPR
6578 || rhs_code == BIT_IOR_EXPR
6579 || rhs_code == BIT_XOR_EXPR)
6580 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6581 std::swap (*ops[0], *ops[ops_num - 1]);
6582
6583 /* Only rewrite the expression tree to parallel in the
6584 last reassoc pass to avoid useless work back-and-forth
6585 with initial linearization. */
6586 if (!reassoc_insert_powi_p
6587 && ops.length () > 3
6588 && (width = get_reassociation_width (ops_num, rhs_code,
6589 mode)) > 1)
6590 {
6591 if (dump_file && (dump_flags & TDF_DETAILS))
6592 fprintf (dump_file,
6593 "Width = %d was chosen for reassociation\n",
6594 width);
6595 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6596 width, ops);
6597 }
6598 else
6599 {
6600 /* When there are three operands left, we want
6601 to make sure the ones that get the double
6602 binary op are chosen wisely. */
6603 int len = ops.length ();
6604 if (len >= 3)
6605 swap_ops_for_binary_stmt (ops, len - 3, stmt);
6606
6607 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
6608 powi_result != NULL
6609 || negate_result,
6610 len != orig_len);
6611 }
6612
6613 /* If we combined some repeated factors into a
6614 __builtin_powi call, multiply that result by the
6615 reassociated operands. */
6616 if (powi_result)
6617 {
6618 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6619 tree type = TREE_TYPE (lhs);
6620 tree target_ssa = make_temp_ssa_name (type, NULL,
6621 "reassocpow");
6622 gimple_set_lhs (lhs_stmt, target_ssa);
6623 update_stmt (lhs_stmt);
6624 if (lhs != new_lhs)
6625 {
6626 target_ssa = new_lhs;
6627 new_lhs = lhs;
6628 }
6629 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6630 powi_result, target_ssa);
6631 gimple_set_location (mul_stmt, gimple_location (stmt));
6632 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6633 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6634 }
6635 }
6636
6637 if (negate_result)
6638 {
6639 stmt = SSA_NAME_DEF_STMT (lhs);
6640 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6641 gimple_set_lhs (stmt, tmp);
6642 if (lhs != new_lhs)
6643 tmp = new_lhs;
6644 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6645 tmp);
6646 gimple_set_uid (neg_stmt, gimple_uid (stmt));
6647 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6648 update_stmt (stmt);
6649 }
6650 }
6651 }
6652 }
6653 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6654 son;
6655 son = next_dom_son (CDI_POST_DOMINATORS, son))
6656 cfg_cleanup_needed |= reassociate_bb (son);
6657
6658 return cfg_cleanup_needed;
6659 }
6660
6661 /* Add jumps around shifts for range tests turned into bit tests.
6662 For each SSA_NAME VAR we have code like:
6663 VAR = ...; // final stmt of range comparison
6664 // bit test here...;
6665 OTHERVAR = ...; // final stmt of the bit test sequence
6666 RES = VAR | OTHERVAR;
6667 Turn the above into:
6668 VAR = ...;
6669 if (VAR != 0)
6670 goto <l3>;
6671 else
6672 goto <l2>;
6673 <l2>:
6674 // bit test here...;
6675 OTHERVAR = ...;
6676 <l3>:
6677 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6678
6679 static void
6680 branch_fixup (void)
6681 {
6682 tree var;
6683 unsigned int i;
6684
6685 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6686 {
6687 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6688 gimple *use_stmt;
6689 use_operand_p use;
6690 bool ok = single_imm_use (var, &use, &use_stmt);
6691 gcc_assert (ok
6692 && is_gimple_assign (use_stmt)
6693 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6694 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6695
6696 basic_block cond_bb = gimple_bb (def_stmt);
6697 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6698 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6699
6700 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6701 gimple *g = gimple_build_cond (NE_EXPR, var,
6702 build_zero_cst (TREE_TYPE (var)),
6703 NULL_TREE, NULL_TREE);
6704 location_t loc = gimple_location (use_stmt);
6705 gimple_set_location (g, loc);
6706 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6707
6708 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6709 etrue->probability = profile_probability::even ();
6710 edge efalse = find_edge (cond_bb, then_bb);
6711 efalse->flags = EDGE_FALSE_VALUE;
6712 efalse->probability -= etrue->probability;
6713 then_bb->count -= etrue->count ();
6714
6715 tree othervar = NULL_TREE;
6716 if (gimple_assign_rhs1 (use_stmt) == var)
6717 othervar = gimple_assign_rhs2 (use_stmt);
6718 else if (gimple_assign_rhs2 (use_stmt) == var)
6719 othervar = gimple_assign_rhs1 (use_stmt);
6720 else
6721 gcc_unreachable ();
6722 tree lhs = gimple_assign_lhs (use_stmt);
6723 gphi *phi = create_phi_node (lhs, merge_bb);
6724 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6725 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6726 gsi = gsi_for_stmt (use_stmt);
6727 gsi_remove (&gsi, true);
6728
6729 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6730 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6731 }
6732 reassoc_branch_fixups.release ();
6733 }
6734
6735 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6736 void debug_ops_vector (vec<operand_entry *> ops);
6737
6738 /* Dump the operand entry vector OPS to FILE. */
6739
6740 void
6741 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6742 {
6743 operand_entry *oe;
6744 unsigned int i;
6745
6746 FOR_EACH_VEC_ELT (ops, i, oe)
6747 {
6748 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6749 print_generic_expr (file, oe->op);
6750 fprintf (file, "\n");
6751 }
6752 }
6753
6754 /* Dump the operand entry vector OPS to STDERR. */
6755
6756 DEBUG_FUNCTION void
6757 debug_ops_vector (vec<operand_entry *> ops)
6758 {
6759 dump_ops_vector (stderr, ops);
6760 }
6761
6762 /* Bubble up return status from reassociate_bb. */
6763
6764 static bool
6765 do_reassoc (void)
6766 {
6767 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6768 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6769 }
6770
6771 /* Initialize the reassociation pass. */
6772
6773 static void
6774 init_reassoc (void)
6775 {
6776 int i;
6777 long rank = 2;
6778 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6779
6780 /* Find the loops, so that we can prevent moving calculations in
6781 them. */
6782 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6783
6784 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6785
6786 next_operand_entry_id = 0;
6787
6788 /* Reverse RPO (Reverse Post Order) will give us something where
6789 deeper loops come later. */
6790 pre_and_rev_post_order_compute (NULL, bbs, false);
6791 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
6792 operand_rank = new hash_map<tree, long>;
6793
6794 /* Give each default definition a distinct rank. This includes
6795 parameters and the static chain. Walk backwards over all
6796 SSA names so that we get proper rank ordering according
6797 to tree_swap_operands_p. */
6798 for (i = num_ssa_names - 1; i > 0; --i)
6799 {
6800 tree name = ssa_name (i);
6801 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6802 insert_operand_rank (name, ++rank);
6803 }
6804
6805 /* Set up rank for each BB */
6806 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6807 bb_rank[bbs[i]] = ++rank << 16;
6808
6809 free (bbs);
6810 calculate_dominance_info (CDI_POST_DOMINATORS);
6811 plus_negates = vNULL;
6812 }
6813
6814 /* Cleanup after the reassociation pass, and print stats if
6815 requested. */
6816
6817 static void
6818 fini_reassoc (void)
6819 {
6820 statistics_counter_event (cfun, "Linearized",
6821 reassociate_stats.linearized);
6822 statistics_counter_event (cfun, "Constants eliminated",
6823 reassociate_stats.constants_eliminated);
6824 statistics_counter_event (cfun, "Ops eliminated",
6825 reassociate_stats.ops_eliminated);
6826 statistics_counter_event (cfun, "Statements rewritten",
6827 reassociate_stats.rewritten);
6828 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6829 reassociate_stats.pows_encountered);
6830 statistics_counter_event (cfun, "Built-in powi calls created",
6831 reassociate_stats.pows_created);
6832
6833 delete operand_rank;
6834 operand_entry_pool.release ();
6835 free (bb_rank);
6836 plus_negates.release ();
6837 free_dominance_info (CDI_POST_DOMINATORS);
6838 loop_optimizer_finalize ();
6839 }
6840
6841 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6842 insertion of __builtin_powi calls.
6843
6844 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6845 optimization of a gimple conditional. Otherwise returns zero. */
6846
6847 static unsigned int
6848 execute_reassoc (bool insert_powi_p)
6849 {
6850 reassoc_insert_powi_p = insert_powi_p;
6851
6852 init_reassoc ();
6853
6854 bool cfg_cleanup_needed;
6855 cfg_cleanup_needed = do_reassoc ();
6856 repropagate_negates ();
6857 branch_fixup ();
6858
6859 fini_reassoc ();
6860 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6861 }
6862
6863 namespace {
6864
6865 const pass_data pass_data_reassoc =
6866 {
6867 GIMPLE_PASS, /* type */
6868 "reassoc", /* name */
6869 OPTGROUP_NONE, /* optinfo_flags */
6870 TV_TREE_REASSOC, /* tv_id */
6871 ( PROP_cfg | PROP_ssa ), /* properties_required */
6872 0, /* properties_provided */
6873 0, /* properties_destroyed */
6874 0, /* todo_flags_start */
6875 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6876 };
6877
6878 class pass_reassoc : public gimple_opt_pass
6879 {
6880 public:
6881 pass_reassoc (gcc::context *ctxt)
6882 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6883 {}
6884
6885 /* opt_pass methods: */
6886 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
6887 void set_pass_param (unsigned int n, bool param)
6888 {
6889 gcc_assert (n == 0);
6890 insert_powi_p = param;
6891 }
6892 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
6893 virtual unsigned int execute (function *)
6894 { return execute_reassoc (insert_powi_p); }
6895
6896 private:
6897 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6898 point 3a in the pass header comment. */
6899 bool insert_powi_p;
6900 }; // class pass_reassoc
6901
6902 } // anon namespace
6903
6904 gimple_opt_pass *
6905 make_pass_reassoc (gcc::context *ctxt)
6906 {
6907 return new pass_reassoc (ctxt);
6908 }