]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-reassoc.c
Lower VEC_COND_EXPR into internal functions.
[thirdparty/gcc.git] / gcc / tree-ssa-reassoc.c
1 /* Reassociation for trees.
2 Copyright (C) 2005-2020 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54
55 /* This is a simple global reassociation pass. It is, in part, based
56 on the LLVM pass of the same name (They do some things more/less
57 than we do, in different orders, etc).
58
59 It consists of five steps:
60
61 1. Breaking up subtract operations into addition + negate, where
62 it would promote the reassociation of adds.
63
64 2. Left linearization of the expression trees, so that (A+B)+(C+D)
65 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
66 During linearization, we place the operands of the binary
67 expressions into a vector of operand_entry_*
68
69 3. Optimization of the operand lists, eliminating things like a +
70 -a, a & a, etc.
71
72 3a. Combine repeated factors with the same occurrence counts
73 into a __builtin_powi call that will later be optimized into
74 an optimal number of multiplies.
75
76 4. Rewrite the expression trees we linearized and optimized so
77 they are in proper rank order.
78
79 5. Repropagate negates, as nothing else will clean it up ATM.
80
81 A bit of theory on #4, since nobody seems to write anything down
82 about why it makes sense to do it the way they do it:
83
84 We could do this much nicer theoretically, but don't (for reasons
85 explained after how to do it theoretically nice :P).
86
87 In order to promote the most redundancy elimination, you want
88 binary expressions whose operands are the same rank (or
89 preferably, the same value) exposed to the redundancy eliminator,
90 for possible elimination.
91
92 So the way to do this if we really cared, is to build the new op
93 tree from the leaves to the roots, merging as you go, and putting the
94 new op on the end of the worklist, until you are left with one
95 thing on the worklist.
96
97 IE if you have to rewrite the following set of operands (listed with
98 rank in parentheses), with opcode PLUS_EXPR:
99
100 a (1), b (1), c (1), d (2), e (2)
101
102
103 We start with our merge worklist empty, and the ops list with all of
104 those on it.
105
106 You want to first merge all leaves of the same rank, as much as
107 possible.
108
109 So first build a binary op of
110
111 mergetmp = a + b, and put "mergetmp" on the merge worklist.
112
113 Because there is no three operand form of PLUS_EXPR, c is not going to
114 be exposed to redundancy elimination as a rank 1 operand.
115
116 So you might as well throw it on the merge worklist (you could also
117 consider it to now be a rank two operand, and merge it with d and e,
118 but in this case, you then have evicted e from a binary op. So at
119 least in this situation, you can't win.)
120
121 Then build a binary op of d + e
122 mergetmp2 = d + e
123
124 and put mergetmp2 on the merge worklist.
125
126 so merge worklist = {mergetmp, c, mergetmp2}
127
128 Continue building binary ops of these operations until you have only
129 one operation left on the worklist.
130
131 So we have
132
133 build binary op
134 mergetmp3 = mergetmp + c
135
136 worklist = {mergetmp2, mergetmp3}
137
138 mergetmp4 = mergetmp2 + mergetmp3
139
140 worklist = {mergetmp4}
141
142 because we have one operation left, we can now just set the original
143 statement equal to the result of that operation.
144
145 This will at least expose a + b and d + e to redundancy elimination
146 as binary operations.
147
148 For extra points, you can reuse the old statements to build the
149 mergetmps, since you shouldn't run out.
150
151 So why don't we do this?
152
153 Because it's expensive, and rarely will help. Most trees we are
154 reassociating have 3 or less ops. If they have 2 ops, they already
155 will be written into a nice single binary op. If you have 3 ops, a
156 single simple check suffices to tell you whether the first two are of the
157 same rank. If so, you know to order it
158
159 mergetmp = op1 + op2
160 newstmt = mergetmp + op3
161
162 instead of
163 mergetmp = op2 + op3
164 newstmt = mergetmp + op1
165
166 If all three are of the same rank, you can't expose them all in a
167 single binary operator anyway, so the above is *still* the best you
168 can do.
169
170 Thus, this is what we do. When we have three ops left, we check to see
171 what order to put them in, and call it a day. As a nod to vector sum
172 reduction, we check if any of the ops are really a phi node that is a
173 destructive update for the associating op, and keep the destructive
174 update together for vector sum reduction recognition. */
175
176 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
177 point 3a in the pass header comment. */
178 static bool reassoc_insert_powi_p;
179
180 /* Statistics */
181 static struct
182 {
183 int linearized;
184 int constants_eliminated;
185 int ops_eliminated;
186 int rewritten;
187 int pows_encountered;
188 int pows_created;
189 } reassociate_stats;
190
191 /* Operator, rank pair. */
192 struct operand_entry
193 {
194 unsigned int rank;
195 unsigned int id;
196 tree op;
197 unsigned int count;
198 gimple *stmt_to_insert;
199 };
200
201 static object_allocator<operand_entry> operand_entry_pool
202 ("operand entry pool");
203
204 /* This is used to assign a unique ID to each struct operand_entry
205 so that qsort results are identical on different hosts. */
206 static unsigned int next_operand_entry_id;
207
208 /* Starting rank number for a given basic block, so that we can rank
209 operations using unmovable instructions in that BB based on the bb
210 depth. */
211 static long *bb_rank;
212
213 /* Operand->rank hashtable. */
214 static hash_map<tree, long> *operand_rank;
215
216 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
217 all basic blocks the CFG should be adjusted - basic blocks
218 split right after that SSA_NAME's definition statement and before
219 the only use, which must be a bit ior. */
220 static vec<tree> reassoc_branch_fixups;
221
222 /* Forward decls. */
223 static long get_rank (tree);
224 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
225
226 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
227 possibly added by gsi_remove. */
228
229 bool
230 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
231 {
232 gimple *stmt = gsi_stmt (*gsi);
233
234 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
235 return gsi_remove (gsi, true);
236
237 gimple_stmt_iterator prev = *gsi;
238 gsi_prev (&prev);
239 unsigned uid = gimple_uid (stmt);
240 basic_block bb = gimple_bb (stmt);
241 bool ret = gsi_remove (gsi, true);
242 if (!gsi_end_p (prev))
243 gsi_next (&prev);
244 else
245 prev = gsi_start_bb (bb);
246 gimple *end_stmt = gsi_stmt (*gsi);
247 while ((stmt = gsi_stmt (prev)) != end_stmt)
248 {
249 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
250 gimple_set_uid (stmt, uid);
251 gsi_next (&prev);
252 }
253 return ret;
254 }
255
256 /* Bias amount for loop-carried phis. We want this to be larger than
257 the depth of any reassociation tree we can see, but not larger than
258 the rank difference between two blocks. */
259 #define PHI_LOOP_BIAS (1 << 15)
260
261 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
262 an innermost loop, and the phi has only a single use which is inside
263 the loop, then the rank is the block rank of the loop latch plus an
264 extra bias for the loop-carried dependence. This causes expressions
265 calculated into an accumulator variable to be independent for each
266 iteration of the loop. If STMT is some other phi, the rank is the
267 block rank of its containing block. */
268 static long
269 phi_rank (gimple *stmt)
270 {
271 basic_block bb = gimple_bb (stmt);
272 class loop *father = bb->loop_father;
273 tree res;
274 unsigned i;
275 use_operand_p use;
276 gimple *use_stmt;
277
278 /* We only care about real loops (those with a latch). */
279 if (!father->latch)
280 return bb_rank[bb->index];
281
282 /* Interesting phis must be in headers of innermost loops. */
283 if (bb != father->header
284 || father->inner)
285 return bb_rank[bb->index];
286
287 /* Ignore virtual SSA_NAMEs. */
288 res = gimple_phi_result (stmt);
289 if (virtual_operand_p (res))
290 return bb_rank[bb->index];
291
292 /* The phi definition must have a single use, and that use must be
293 within the loop. Otherwise this isn't an accumulator pattern. */
294 if (!single_imm_use (res, &use, &use_stmt)
295 || gimple_bb (use_stmt)->loop_father != father)
296 return bb_rank[bb->index];
297
298 /* Look for phi arguments from within the loop. If found, bias this phi. */
299 for (i = 0; i < gimple_phi_num_args (stmt); i++)
300 {
301 tree arg = gimple_phi_arg_def (stmt, i);
302 if (TREE_CODE (arg) == SSA_NAME
303 && !SSA_NAME_IS_DEFAULT_DEF (arg))
304 {
305 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
306 if (gimple_bb (def_stmt)->loop_father == father)
307 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
308 }
309 }
310
311 /* Must be an uninteresting phi. */
312 return bb_rank[bb->index];
313 }
314
315 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
316 loop-carried dependence of an innermost loop, return TRUE; else
317 return FALSE. */
318 static bool
319 loop_carried_phi (tree exp)
320 {
321 gimple *phi_stmt;
322 long block_rank;
323
324 if (TREE_CODE (exp) != SSA_NAME
325 || SSA_NAME_IS_DEFAULT_DEF (exp))
326 return false;
327
328 phi_stmt = SSA_NAME_DEF_STMT (exp);
329
330 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
331 return false;
332
333 /* Non-loop-carried phis have block rank. Loop-carried phis have
334 an additional bias added in. If this phi doesn't have block rank,
335 it's biased and should not be propagated. */
336 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
337
338 if (phi_rank (phi_stmt) != block_rank)
339 return true;
340
341 return false;
342 }
343
344 /* Return the maximum of RANK and the rank that should be propagated
345 from expression OP. For most operands, this is just the rank of OP.
346 For loop-carried phis, the value is zero to avoid undoing the bias
347 in favor of the phi. */
348 static long
349 propagate_rank (long rank, tree op)
350 {
351 long op_rank;
352
353 if (loop_carried_phi (op))
354 return rank;
355
356 op_rank = get_rank (op);
357
358 return MAX (rank, op_rank);
359 }
360
361 /* Look up the operand rank structure for expression E. */
362
363 static inline long
364 find_operand_rank (tree e)
365 {
366 long *slot = operand_rank->get (e);
367 return slot ? *slot : -1;
368 }
369
370 /* Insert {E,RANK} into the operand rank hashtable. */
371
372 static inline void
373 insert_operand_rank (tree e, long rank)
374 {
375 gcc_assert (rank > 0);
376 gcc_assert (!operand_rank->put (e, rank));
377 }
378
379 /* Given an expression E, return the rank of the expression. */
380
381 static long
382 get_rank (tree e)
383 {
384 /* SSA_NAME's have the rank of the expression they are the result
385 of.
386 For globals and uninitialized values, the rank is 0.
387 For function arguments, use the pre-setup rank.
388 For PHI nodes, stores, asm statements, etc, we use the rank of
389 the BB.
390 For simple operations, the rank is the maximum rank of any of
391 its operands, or the bb_rank, whichever is less.
392 I make no claims that this is optimal, however, it gives good
393 results. */
394
395 /* We make an exception to the normal ranking system to break
396 dependences of accumulator variables in loops. Suppose we
397 have a simple one-block loop containing:
398
399 x_1 = phi(x_0, x_2)
400 b = a + x_1
401 c = b + d
402 x_2 = c + e
403
404 As shown, each iteration of the calculation into x is fully
405 dependent upon the iteration before it. We would prefer to
406 see this in the form:
407
408 x_1 = phi(x_0, x_2)
409 b = a + d
410 c = b + e
411 x_2 = c + x_1
412
413 If the loop is unrolled, the calculations of b and c from
414 different iterations can be interleaved.
415
416 To obtain this result during reassociation, we bias the rank
417 of the phi definition x_1 upward, when it is recognized as an
418 accumulator pattern. The artificial rank causes it to be
419 added last, providing the desired independence. */
420
421 if (TREE_CODE (e) == SSA_NAME)
422 {
423 ssa_op_iter iter;
424 gimple *stmt;
425 long rank;
426 tree op;
427
428 if (SSA_NAME_IS_DEFAULT_DEF (e))
429 return find_operand_rank (e);
430
431 stmt = SSA_NAME_DEF_STMT (e);
432 if (gimple_code (stmt) == GIMPLE_PHI)
433 return phi_rank (stmt);
434
435 if (!is_gimple_assign (stmt))
436 return bb_rank[gimple_bb (stmt)->index];
437
438 /* If we already have a rank for this expression, use that. */
439 rank = find_operand_rank (e);
440 if (rank != -1)
441 return rank;
442
443 /* Otherwise, find the maximum rank for the operands. As an
444 exception, remove the bias from loop-carried phis when propagating
445 the rank so that dependent operations are not also biased. */
446 /* Simply walk over all SSA uses - this takes advatage of the
447 fact that non-SSA operands are is_gimple_min_invariant and
448 thus have rank 0. */
449 rank = 0;
450 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
451 rank = propagate_rank (rank, op);
452
453 if (dump_file && (dump_flags & TDF_DETAILS))
454 {
455 fprintf (dump_file, "Rank for ");
456 print_generic_expr (dump_file, e);
457 fprintf (dump_file, " is %ld\n", (rank + 1));
458 }
459
460 /* Note the rank in the hashtable so we don't recompute it. */
461 insert_operand_rank (e, (rank + 1));
462 return (rank + 1);
463 }
464
465 /* Constants, globals, etc., are rank 0 */
466 return 0;
467 }
468
469
470 /* We want integer ones to end up last no matter what, since they are
471 the ones we can do the most with. */
472 #define INTEGER_CONST_TYPE 1 << 4
473 #define FLOAT_ONE_CONST_TYPE 1 << 3
474 #define FLOAT_CONST_TYPE 1 << 2
475 #define OTHER_CONST_TYPE 1 << 1
476
477 /* Classify an invariant tree into integer, float, or other, so that
478 we can sort them to be near other constants of the same type. */
479 static inline int
480 constant_type (tree t)
481 {
482 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
483 return INTEGER_CONST_TYPE;
484 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
485 {
486 /* Sort -1.0 and 1.0 constants last, while in some cases
487 const_binop can't optimize some inexact operations, multiplication
488 by -1.0 or 1.0 can be always merged with others. */
489 if (real_onep (t) || real_minus_onep (t))
490 return FLOAT_ONE_CONST_TYPE;
491 return FLOAT_CONST_TYPE;
492 }
493 else
494 return OTHER_CONST_TYPE;
495 }
496
497 /* qsort comparison function to sort operand entries PA and PB by rank
498 so that the sorted array is ordered by rank in decreasing order. */
499 static int
500 sort_by_operand_rank (const void *pa, const void *pb)
501 {
502 const operand_entry *oea = *(const operand_entry *const *)pa;
503 const operand_entry *oeb = *(const operand_entry *const *)pb;
504
505 if (oeb->rank != oea->rank)
506 return oeb->rank > oea->rank ? 1 : -1;
507
508 /* It's nicer for optimize_expression if constants that are likely
509 to fold when added/multiplied/whatever are put next to each
510 other. Since all constants have rank 0, order them by type. */
511 if (oea->rank == 0)
512 {
513 if (constant_type (oeb->op) != constant_type (oea->op))
514 return constant_type (oea->op) - constant_type (oeb->op);
515 else
516 /* To make sorting result stable, we use unique IDs to determine
517 order. */
518 return oeb->id > oea->id ? 1 : -1;
519 }
520
521 if (TREE_CODE (oea->op) != SSA_NAME)
522 {
523 if (TREE_CODE (oeb->op) != SSA_NAME)
524 return oeb->id > oea->id ? 1 : -1;
525 else
526 return 1;
527 }
528 else if (TREE_CODE (oeb->op) != SSA_NAME)
529 return -1;
530
531 /* Lastly, make sure the versions that are the same go next to each
532 other. */
533 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
534 {
535 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
536 versions of removed SSA_NAMEs, so if possible, prefer to sort
537 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
538 See PR60418. */
539 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
540 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
541 basic_block bba = gimple_bb (stmta);
542 basic_block bbb = gimple_bb (stmtb);
543 if (bbb != bba)
544 {
545 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
546 but the other might not. */
547 if (!bba)
548 return 1;
549 if (!bbb)
550 return -1;
551 /* If neither is, compare bb_rank. */
552 if (bb_rank[bbb->index] != bb_rank[bba->index])
553 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
554 }
555
556 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
557 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
558 if (da != db)
559 return da ? 1 : -1;
560
561 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
562 }
563
564 return oeb->id > oea->id ? 1 : -1;
565 }
566
567 /* Add an operand entry to *OPS for the tree operand OP. */
568
569 static void
570 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
571 {
572 operand_entry *oe = operand_entry_pool.allocate ();
573
574 oe->op = op;
575 oe->rank = get_rank (op);
576 oe->id = next_operand_entry_id++;
577 oe->count = 1;
578 oe->stmt_to_insert = stmt_to_insert;
579 ops->safe_push (oe);
580 }
581
582 /* Add an operand entry to *OPS for the tree operand OP with repeat
583 count REPEAT. */
584
585 static void
586 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
587 HOST_WIDE_INT repeat)
588 {
589 operand_entry *oe = operand_entry_pool.allocate ();
590
591 oe->op = op;
592 oe->rank = get_rank (op);
593 oe->id = next_operand_entry_id++;
594 oe->count = repeat;
595 oe->stmt_to_insert = NULL;
596 ops->safe_push (oe);
597
598 reassociate_stats.pows_encountered++;
599 }
600
601 /* Return true if STMT is reassociable operation containing a binary
602 operation with tree code CODE, and is inside LOOP. */
603
604 static bool
605 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
606 {
607 basic_block bb = gimple_bb (stmt);
608
609 if (gimple_bb (stmt) == NULL)
610 return false;
611
612 if (!flow_bb_inside_loop_p (loop, bb))
613 return false;
614
615 if (is_gimple_assign (stmt)
616 && gimple_assign_rhs_code (stmt) == code
617 && has_single_use (gimple_assign_lhs (stmt)))
618 {
619 tree rhs1 = gimple_assign_rhs1 (stmt);
620 tree rhs2 = gimple_assign_rhs2 (stmt);
621 if (TREE_CODE (rhs1) == SSA_NAME
622 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
623 return false;
624 if (rhs2
625 && TREE_CODE (rhs2) == SSA_NAME
626 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
627 return false;
628 return true;
629 }
630
631 return false;
632 }
633
634
635 /* Return true if STMT is a nop-conversion. */
636
637 static bool
638 gimple_nop_conversion_p (gimple *stmt)
639 {
640 if (gassign *ass = dyn_cast <gassign *> (stmt))
641 {
642 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
643 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
644 TREE_TYPE (gimple_assign_rhs1 (ass))))
645 return true;
646 }
647 return false;
648 }
649
650 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
651 operand of the negate operation. Otherwise, return NULL. */
652
653 static tree
654 get_unary_op (tree name, enum tree_code opcode)
655 {
656 gimple *stmt = SSA_NAME_DEF_STMT (name);
657
658 /* Look through nop conversions (sign changes). */
659 if (gimple_nop_conversion_p (stmt)
660 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
661 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
662
663 if (!is_gimple_assign (stmt))
664 return NULL_TREE;
665
666 if (gimple_assign_rhs_code (stmt) == opcode)
667 return gimple_assign_rhs1 (stmt);
668 return NULL_TREE;
669 }
670
671 /* Return true if OP1 and OP2 have the same value if casted to either type. */
672
673 static bool
674 ops_equal_values_p (tree op1, tree op2)
675 {
676 if (op1 == op2)
677 return true;
678
679 tree orig_op1 = op1;
680 if (TREE_CODE (op1) == SSA_NAME)
681 {
682 gimple *stmt = SSA_NAME_DEF_STMT (op1);
683 if (gimple_nop_conversion_p (stmt))
684 {
685 op1 = gimple_assign_rhs1 (stmt);
686 if (op1 == op2)
687 return true;
688 }
689 }
690
691 if (TREE_CODE (op2) == SSA_NAME)
692 {
693 gimple *stmt = SSA_NAME_DEF_STMT (op2);
694 if (gimple_nop_conversion_p (stmt))
695 {
696 op2 = gimple_assign_rhs1 (stmt);
697 if (op1 == op2
698 || orig_op1 == op2)
699 return true;
700 }
701 }
702
703 return false;
704 }
705
706
707 /* If CURR and LAST are a pair of ops that OPCODE allows us to
708 eliminate through equivalences, do so, remove them from OPS, and
709 return true. Otherwise, return false. */
710
711 static bool
712 eliminate_duplicate_pair (enum tree_code opcode,
713 vec<operand_entry *> *ops,
714 bool *all_done,
715 unsigned int i,
716 operand_entry *curr,
717 operand_entry *last)
718 {
719
720 /* If we have two of the same op, and the opcode is & |, min, or max,
721 we can eliminate one of them.
722 If we have two of the same op, and the opcode is ^, we can
723 eliminate both of them. */
724
725 if (last && last->op == curr->op)
726 {
727 switch (opcode)
728 {
729 case MAX_EXPR:
730 case MIN_EXPR:
731 case BIT_IOR_EXPR:
732 case BIT_AND_EXPR:
733 if (dump_file && (dump_flags & TDF_DETAILS))
734 {
735 fprintf (dump_file, "Equivalence: ");
736 print_generic_expr (dump_file, curr->op);
737 fprintf (dump_file, " [&|minmax] ");
738 print_generic_expr (dump_file, last->op);
739 fprintf (dump_file, " -> ");
740 print_generic_stmt (dump_file, last->op);
741 }
742
743 ops->ordered_remove (i);
744 reassociate_stats.ops_eliminated ++;
745
746 return true;
747
748 case BIT_XOR_EXPR:
749 if (dump_file && (dump_flags & TDF_DETAILS))
750 {
751 fprintf (dump_file, "Equivalence: ");
752 print_generic_expr (dump_file, curr->op);
753 fprintf (dump_file, " ^ ");
754 print_generic_expr (dump_file, last->op);
755 fprintf (dump_file, " -> nothing\n");
756 }
757
758 reassociate_stats.ops_eliminated += 2;
759
760 if (ops->length () == 2)
761 {
762 ops->truncate (0);
763 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
764 *all_done = true;
765 }
766 else
767 {
768 ops->ordered_remove (i-1);
769 ops->ordered_remove (i-1);
770 }
771
772 return true;
773
774 default:
775 break;
776 }
777 }
778 return false;
779 }
780
781 static vec<tree> plus_negates;
782
783 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
784 expression, look in OPS for a corresponding positive operation to cancel
785 it out. If we find one, remove the other from OPS, replace
786 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
787 return false. */
788
789 static bool
790 eliminate_plus_minus_pair (enum tree_code opcode,
791 vec<operand_entry *> *ops,
792 unsigned int currindex,
793 operand_entry *curr)
794 {
795 tree negateop;
796 tree notop;
797 unsigned int i;
798 operand_entry *oe;
799
800 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
801 return false;
802
803 negateop = get_unary_op (curr->op, NEGATE_EXPR);
804 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
805 if (negateop == NULL_TREE && notop == NULL_TREE)
806 return false;
807
808 /* Any non-negated version will have a rank that is one less than
809 the current rank. So once we hit those ranks, if we don't find
810 one, we can stop. */
811
812 for (i = currindex + 1;
813 ops->iterate (i, &oe)
814 && oe->rank >= curr->rank - 1 ;
815 i++)
816 {
817 if (negateop
818 && ops_equal_values_p (oe->op, negateop))
819 {
820 if (dump_file && (dump_flags & TDF_DETAILS))
821 {
822 fprintf (dump_file, "Equivalence: ");
823 print_generic_expr (dump_file, negateop);
824 fprintf (dump_file, " + -");
825 print_generic_expr (dump_file, oe->op);
826 fprintf (dump_file, " -> 0\n");
827 }
828
829 ops->ordered_remove (i);
830 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
831 ops->ordered_remove (currindex);
832 reassociate_stats.ops_eliminated ++;
833
834 return true;
835 }
836 else if (notop
837 && ops_equal_values_p (oe->op, notop))
838 {
839 tree op_type = TREE_TYPE (oe->op);
840
841 if (dump_file && (dump_flags & TDF_DETAILS))
842 {
843 fprintf (dump_file, "Equivalence: ");
844 print_generic_expr (dump_file, notop);
845 fprintf (dump_file, " + ~");
846 print_generic_expr (dump_file, oe->op);
847 fprintf (dump_file, " -> -1\n");
848 }
849
850 ops->ordered_remove (i);
851 add_to_ops_vec (ops, build_all_ones_cst (op_type));
852 ops->ordered_remove (currindex);
853 reassociate_stats.ops_eliminated ++;
854
855 return true;
856 }
857 }
858
859 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
860 save it for later inspection in repropagate_negates(). */
861 if (negateop != NULL_TREE
862 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
863 plus_negates.safe_push (curr->op);
864
865 return false;
866 }
867
868 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
869 bitwise not expression, look in OPS for a corresponding operand to
870 cancel it out. If we find one, remove the other from OPS, replace
871 OPS[CURRINDEX] with 0, and return true. Otherwise, return
872 false. */
873
874 static bool
875 eliminate_not_pairs (enum tree_code opcode,
876 vec<operand_entry *> *ops,
877 unsigned int currindex,
878 operand_entry *curr)
879 {
880 tree notop;
881 unsigned int i;
882 operand_entry *oe;
883
884 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
885 || TREE_CODE (curr->op) != SSA_NAME)
886 return false;
887
888 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
889 if (notop == NULL_TREE)
890 return false;
891
892 /* Any non-not version will have a rank that is one less than
893 the current rank. So once we hit those ranks, if we don't find
894 one, we can stop. */
895
896 for (i = currindex + 1;
897 ops->iterate (i, &oe)
898 && oe->rank >= curr->rank - 1;
899 i++)
900 {
901 if (oe->op == notop)
902 {
903 if (dump_file && (dump_flags & TDF_DETAILS))
904 {
905 fprintf (dump_file, "Equivalence: ");
906 print_generic_expr (dump_file, notop);
907 if (opcode == BIT_AND_EXPR)
908 fprintf (dump_file, " & ~");
909 else if (opcode == BIT_IOR_EXPR)
910 fprintf (dump_file, " | ~");
911 print_generic_expr (dump_file, oe->op);
912 if (opcode == BIT_AND_EXPR)
913 fprintf (dump_file, " -> 0\n");
914 else if (opcode == BIT_IOR_EXPR)
915 fprintf (dump_file, " -> -1\n");
916 }
917
918 if (opcode == BIT_AND_EXPR)
919 oe->op = build_zero_cst (TREE_TYPE (oe->op));
920 else if (opcode == BIT_IOR_EXPR)
921 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
922
923 reassociate_stats.ops_eliminated += ops->length () - 1;
924 ops->truncate (0);
925 ops->quick_push (oe);
926 return true;
927 }
928 }
929
930 return false;
931 }
932
933 /* Use constant value that may be present in OPS to try to eliminate
934 operands. Note that this function is only really used when we've
935 eliminated ops for other reasons, or merged constants. Across
936 single statements, fold already does all of this, plus more. There
937 is little point in duplicating logic, so I've only included the
938 identities that I could ever construct testcases to trigger. */
939
940 static void
941 eliminate_using_constants (enum tree_code opcode,
942 vec<operand_entry *> *ops)
943 {
944 operand_entry *oelast = ops->last ();
945 tree type = TREE_TYPE (oelast->op);
946
947 if (oelast->rank == 0
948 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
949 {
950 switch (opcode)
951 {
952 case BIT_AND_EXPR:
953 if (integer_zerop (oelast->op))
954 {
955 if (ops->length () != 1)
956 {
957 if (dump_file && (dump_flags & TDF_DETAILS))
958 fprintf (dump_file, "Found & 0, removing all other ops\n");
959
960 reassociate_stats.ops_eliminated += ops->length () - 1;
961
962 ops->truncate (0);
963 ops->quick_push (oelast);
964 return;
965 }
966 }
967 else if (integer_all_onesp (oelast->op))
968 {
969 if (ops->length () != 1)
970 {
971 if (dump_file && (dump_flags & TDF_DETAILS))
972 fprintf (dump_file, "Found & -1, removing\n");
973 ops->pop ();
974 reassociate_stats.ops_eliminated++;
975 }
976 }
977 break;
978 case BIT_IOR_EXPR:
979 if (integer_all_onesp (oelast->op))
980 {
981 if (ops->length () != 1)
982 {
983 if (dump_file && (dump_flags & TDF_DETAILS))
984 fprintf (dump_file, "Found | -1, removing all other ops\n");
985
986 reassociate_stats.ops_eliminated += ops->length () - 1;
987
988 ops->truncate (0);
989 ops->quick_push (oelast);
990 return;
991 }
992 }
993 else if (integer_zerop (oelast->op))
994 {
995 if (ops->length () != 1)
996 {
997 if (dump_file && (dump_flags & TDF_DETAILS))
998 fprintf (dump_file, "Found | 0, removing\n");
999 ops->pop ();
1000 reassociate_stats.ops_eliminated++;
1001 }
1002 }
1003 break;
1004 case MULT_EXPR:
1005 if (integer_zerop (oelast->op)
1006 || (FLOAT_TYPE_P (type)
1007 && !HONOR_NANS (type)
1008 && !HONOR_SIGNED_ZEROS (type)
1009 && real_zerop (oelast->op)))
1010 {
1011 if (ops->length () != 1)
1012 {
1013 if (dump_file && (dump_flags & TDF_DETAILS))
1014 fprintf (dump_file, "Found * 0, removing all other ops\n");
1015
1016 reassociate_stats.ops_eliminated += ops->length () - 1;
1017 ops->truncate (0);
1018 ops->quick_push (oelast);
1019 return;
1020 }
1021 }
1022 else if (integer_onep (oelast->op)
1023 || (FLOAT_TYPE_P (type)
1024 && !HONOR_SNANS (type)
1025 && real_onep (oelast->op)))
1026 {
1027 if (ops->length () != 1)
1028 {
1029 if (dump_file && (dump_flags & TDF_DETAILS))
1030 fprintf (dump_file, "Found * 1, removing\n");
1031 ops->pop ();
1032 reassociate_stats.ops_eliminated++;
1033 return;
1034 }
1035 }
1036 break;
1037 case BIT_XOR_EXPR:
1038 case PLUS_EXPR:
1039 case MINUS_EXPR:
1040 if (integer_zerop (oelast->op)
1041 || (FLOAT_TYPE_P (type)
1042 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1043 && fold_real_zero_addition_p (type, oelast->op,
1044 opcode == MINUS_EXPR)))
1045 {
1046 if (ops->length () != 1)
1047 {
1048 if (dump_file && (dump_flags & TDF_DETAILS))
1049 fprintf (dump_file, "Found [|^+] 0, removing\n");
1050 ops->pop ();
1051 reassociate_stats.ops_eliminated++;
1052 return;
1053 }
1054 }
1055 break;
1056 default:
1057 break;
1058 }
1059 }
1060 }
1061
1062
1063 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1064 bool, bool);
1065
1066 /* Structure for tracking and counting operands. */
1067 struct oecount {
1068 unsigned int cnt;
1069 unsigned int id;
1070 enum tree_code oecode;
1071 tree op;
1072 };
1073
1074
1075 /* The heap for the oecount hashtable and the sorted list of operands. */
1076 static vec<oecount> cvec;
1077
1078
1079 /* Oecount hashtable helpers. */
1080
1081 struct oecount_hasher : int_hash <int, 0, 1>
1082 {
1083 static inline hashval_t hash (int);
1084 static inline bool equal (int, int);
1085 };
1086
1087 /* Hash function for oecount. */
1088
1089 inline hashval_t
1090 oecount_hasher::hash (int p)
1091 {
1092 const oecount *c = &cvec[p - 42];
1093 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1094 }
1095
1096 /* Comparison function for oecount. */
1097
1098 inline bool
1099 oecount_hasher::equal (int p1, int p2)
1100 {
1101 const oecount *c1 = &cvec[p1 - 42];
1102 const oecount *c2 = &cvec[p2 - 42];
1103 return c1->oecode == c2->oecode && c1->op == c2->op;
1104 }
1105
1106 /* Comparison function for qsort sorting oecount elements by count. */
1107
1108 static int
1109 oecount_cmp (const void *p1, const void *p2)
1110 {
1111 const oecount *c1 = (const oecount *)p1;
1112 const oecount *c2 = (const oecount *)p2;
1113 if (c1->cnt != c2->cnt)
1114 return c1->cnt > c2->cnt ? 1 : -1;
1115 else
1116 /* If counts are identical, use unique IDs to stabilize qsort. */
1117 return c1->id > c2->id ? 1 : -1;
1118 }
1119
1120 /* Return TRUE iff STMT represents a builtin call that raises OP
1121 to some exponent. */
1122
1123 static bool
1124 stmt_is_power_of_op (gimple *stmt, tree op)
1125 {
1126 if (!is_gimple_call (stmt))
1127 return false;
1128
1129 switch (gimple_call_combined_fn (stmt))
1130 {
1131 CASE_CFN_POW:
1132 CASE_CFN_POWI:
1133 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1134
1135 default:
1136 return false;
1137 }
1138 }
1139
1140 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1141 in place and return the result. Assumes that stmt_is_power_of_op
1142 was previously called for STMT and returned TRUE. */
1143
1144 static HOST_WIDE_INT
1145 decrement_power (gimple *stmt)
1146 {
1147 REAL_VALUE_TYPE c, cint;
1148 HOST_WIDE_INT power;
1149 tree arg1;
1150
1151 switch (gimple_call_combined_fn (stmt))
1152 {
1153 CASE_CFN_POW:
1154 arg1 = gimple_call_arg (stmt, 1);
1155 c = TREE_REAL_CST (arg1);
1156 power = real_to_integer (&c) - 1;
1157 real_from_integer (&cint, VOIDmode, power, SIGNED);
1158 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1159 return power;
1160
1161 CASE_CFN_POWI:
1162 arg1 = gimple_call_arg (stmt, 1);
1163 power = TREE_INT_CST_LOW (arg1) - 1;
1164 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1165 return power;
1166
1167 default:
1168 gcc_unreachable ();
1169 }
1170 }
1171
1172 /* Replace SSA defined by STMT and replace all its uses with new
1173 SSA. Also return the new SSA. */
1174
1175 static tree
1176 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1177 {
1178 gimple *use_stmt;
1179 use_operand_p use;
1180 imm_use_iterator iter;
1181 tree new_lhs, new_debug_lhs = NULL_TREE;
1182 tree lhs = gimple_get_lhs (stmt);
1183
1184 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1185 gimple_set_lhs (stmt, new_lhs);
1186
1187 /* Also need to update GIMPLE_DEBUGs. */
1188 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1189 {
1190 tree repl = new_lhs;
1191 if (is_gimple_debug (use_stmt))
1192 {
1193 if (new_debug_lhs == NULL_TREE)
1194 {
1195 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1196 gdebug *def_temp
1197 = gimple_build_debug_bind (new_debug_lhs,
1198 build2 (opcode, TREE_TYPE (lhs),
1199 new_lhs, op),
1200 stmt);
1201 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1202 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1203 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1204 gimple_set_uid (def_temp, gimple_uid (stmt));
1205 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1206 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1207 }
1208 repl = new_debug_lhs;
1209 }
1210 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1211 SET_USE (use, repl);
1212 update_stmt (use_stmt);
1213 }
1214 return new_lhs;
1215 }
1216
1217 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1218 uses with new SSAs. Also do this for the stmt that defines DEF
1219 if *DEF is not OP. */
1220
1221 static void
1222 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1223 vec<gimple *> &stmts_to_fix)
1224 {
1225 unsigned i;
1226 gimple *stmt;
1227
1228 if (*def != op
1229 && TREE_CODE (*def) == SSA_NAME
1230 && (stmt = SSA_NAME_DEF_STMT (*def))
1231 && gimple_code (stmt) != GIMPLE_NOP)
1232 *def = make_new_ssa_for_def (stmt, opcode, op);
1233
1234 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1235 make_new_ssa_for_def (stmt, opcode, op);
1236 }
1237
1238 /* Find the single immediate use of STMT's LHS, and replace it
1239 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1240 replace *DEF with OP as well. */
1241
1242 static void
1243 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1244 {
1245 tree lhs;
1246 gimple *use_stmt;
1247 use_operand_p use;
1248 gimple_stmt_iterator gsi;
1249
1250 if (is_gimple_call (stmt))
1251 lhs = gimple_call_lhs (stmt);
1252 else
1253 lhs = gimple_assign_lhs (stmt);
1254
1255 gcc_assert (has_single_use (lhs));
1256 single_imm_use (lhs, &use, &use_stmt);
1257 if (lhs == *def)
1258 *def = op;
1259 SET_USE (use, op);
1260 if (TREE_CODE (op) != SSA_NAME)
1261 update_stmt (use_stmt);
1262 gsi = gsi_for_stmt (stmt);
1263 unlink_stmt_vdef (stmt);
1264 reassoc_remove_stmt (&gsi);
1265 release_defs (stmt);
1266 }
1267
1268 /* Walks the linear chain with result *DEF searching for an operation
1269 with operand OP and code OPCODE removing that from the chain. *DEF
1270 is updated if there is only one operand but no operation left. */
1271
1272 static void
1273 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1274 {
1275 tree orig_def = *def;
1276 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1277 /* PR72835 - Record the stmt chain that has to be updated such that
1278 we dont use the same LHS when the values computed are different. */
1279 auto_vec<gimple *, 64> stmts_to_fix;
1280
1281 do
1282 {
1283 tree name;
1284
1285 if (opcode == MULT_EXPR)
1286 {
1287 if (stmt_is_power_of_op (stmt, op))
1288 {
1289 if (decrement_power (stmt) == 1)
1290 {
1291 if (stmts_to_fix.length () > 0)
1292 stmts_to_fix.pop ();
1293 propagate_op_to_single_use (op, stmt, def);
1294 }
1295 break;
1296 }
1297 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1298 {
1299 if (gimple_assign_rhs1 (stmt) == op)
1300 {
1301 tree cst = build_minus_one_cst (TREE_TYPE (op));
1302 if (stmts_to_fix.length () > 0)
1303 stmts_to_fix.pop ();
1304 propagate_op_to_single_use (cst, stmt, def);
1305 break;
1306 }
1307 else if (integer_minus_onep (op)
1308 || real_minus_onep (op))
1309 {
1310 gimple_assign_set_rhs_code
1311 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1312 break;
1313 }
1314 }
1315 }
1316
1317 name = gimple_assign_rhs1 (stmt);
1318
1319 /* If this is the operation we look for and one of the operands
1320 is ours simply propagate the other operand into the stmts
1321 single use. */
1322 if (gimple_assign_rhs_code (stmt) == opcode
1323 && (name == op
1324 || gimple_assign_rhs2 (stmt) == op))
1325 {
1326 if (name == op)
1327 name = gimple_assign_rhs2 (stmt);
1328 if (stmts_to_fix.length () > 0)
1329 stmts_to_fix.pop ();
1330 propagate_op_to_single_use (name, stmt, def);
1331 break;
1332 }
1333
1334 /* We might have a multiply of two __builtin_pow* calls, and
1335 the operand might be hiding in the rightmost one. Likewise
1336 this can happen for a negate. */
1337 if (opcode == MULT_EXPR
1338 && gimple_assign_rhs_code (stmt) == opcode
1339 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1340 && has_single_use (gimple_assign_rhs2 (stmt)))
1341 {
1342 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1343 if (stmt_is_power_of_op (stmt2, op))
1344 {
1345 if (decrement_power (stmt2) == 1)
1346 propagate_op_to_single_use (op, stmt2, def);
1347 else
1348 stmts_to_fix.safe_push (stmt2);
1349 break;
1350 }
1351 else if (is_gimple_assign (stmt2)
1352 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1353 {
1354 if (gimple_assign_rhs1 (stmt2) == op)
1355 {
1356 tree cst = build_minus_one_cst (TREE_TYPE (op));
1357 propagate_op_to_single_use (cst, stmt2, def);
1358 break;
1359 }
1360 else if (integer_minus_onep (op)
1361 || real_minus_onep (op))
1362 {
1363 stmts_to_fix.safe_push (stmt2);
1364 gimple_assign_set_rhs_code
1365 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1366 break;
1367 }
1368 }
1369 }
1370
1371 /* Continue walking the chain. */
1372 gcc_assert (name != op
1373 && TREE_CODE (name) == SSA_NAME);
1374 stmt = SSA_NAME_DEF_STMT (name);
1375 stmts_to_fix.safe_push (stmt);
1376 }
1377 while (1);
1378
1379 if (stmts_to_fix.length () > 0 || *def == orig_def)
1380 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1381 }
1382
1383 /* Returns true if statement S1 dominates statement S2. Like
1384 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1385
1386 static bool
1387 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1388 {
1389 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1390
1391 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1392 SSA_NAME. Assume it lives at the beginning of function and
1393 thus dominates everything. */
1394 if (!bb1 || s1 == s2)
1395 return true;
1396
1397 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1398 if (!bb2)
1399 return false;
1400
1401 if (bb1 == bb2)
1402 {
1403 /* PHIs in the same basic block are assumed to be
1404 executed all in parallel, if only one stmt is a PHI,
1405 it dominates the other stmt in the same basic block. */
1406 if (gimple_code (s1) == GIMPLE_PHI)
1407 return true;
1408
1409 if (gimple_code (s2) == GIMPLE_PHI)
1410 return false;
1411
1412 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1413
1414 if (gimple_uid (s1) < gimple_uid (s2))
1415 return true;
1416
1417 if (gimple_uid (s1) > gimple_uid (s2))
1418 return false;
1419
1420 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1421 unsigned int uid = gimple_uid (s1);
1422 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1423 {
1424 gimple *s = gsi_stmt (gsi);
1425 if (gimple_uid (s) != uid)
1426 break;
1427 if (s == s2)
1428 return true;
1429 }
1430
1431 return false;
1432 }
1433
1434 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1435 }
1436
1437 /* Insert STMT after INSERT_POINT. */
1438
1439 static void
1440 insert_stmt_after (gimple *stmt, gimple *insert_point)
1441 {
1442 gimple_stmt_iterator gsi;
1443 basic_block bb;
1444
1445 if (gimple_code (insert_point) == GIMPLE_PHI)
1446 bb = gimple_bb (insert_point);
1447 else if (!stmt_ends_bb_p (insert_point))
1448 {
1449 gsi = gsi_for_stmt (insert_point);
1450 gimple_set_uid (stmt, gimple_uid (insert_point));
1451 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1452 return;
1453 }
1454 else
1455 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1456 thus if it must end a basic block, it should be a call that can
1457 throw, or some assignment that can throw. If it throws, the LHS
1458 of it will not be initialized though, so only valid places using
1459 the SSA_NAME should be dominated by the fallthru edge. */
1460 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1461 gsi = gsi_after_labels (bb);
1462 if (gsi_end_p (gsi))
1463 {
1464 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1465 gimple_set_uid (stmt,
1466 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1467 }
1468 else
1469 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1470 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1471 }
1472
1473 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1474 the result. Places the statement after the definition of either
1475 OP1 or OP2. Returns the new statement. */
1476
1477 static gimple *
1478 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1479 {
1480 gimple *op1def = NULL, *op2def = NULL;
1481 gimple_stmt_iterator gsi;
1482 tree op;
1483 gassign *sum;
1484
1485 /* Create the addition statement. */
1486 op = make_ssa_name (type);
1487 sum = gimple_build_assign (op, opcode, op1, op2);
1488
1489 /* Find an insertion place and insert. */
1490 if (TREE_CODE (op1) == SSA_NAME)
1491 op1def = SSA_NAME_DEF_STMT (op1);
1492 if (TREE_CODE (op2) == SSA_NAME)
1493 op2def = SSA_NAME_DEF_STMT (op2);
1494 if ((!op1def || gimple_nop_p (op1def))
1495 && (!op2def || gimple_nop_p (op2def)))
1496 {
1497 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1498 if (gsi_end_p (gsi))
1499 {
1500 gimple_stmt_iterator gsi2
1501 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1502 gimple_set_uid (sum,
1503 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1504 }
1505 else
1506 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1507 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1508 }
1509 else
1510 {
1511 gimple *insert_point;
1512 if ((!op1def || gimple_nop_p (op1def))
1513 || (op2def && !gimple_nop_p (op2def)
1514 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1515 insert_point = op2def;
1516 else
1517 insert_point = op1def;
1518 insert_stmt_after (sum, insert_point);
1519 }
1520 update_stmt (sum);
1521
1522 return sum;
1523 }
1524
1525 /* Perform un-distribution of divisions and multiplications.
1526 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1527 to (A + B) / X for real X.
1528
1529 The algorithm is organized as follows.
1530
1531 - First we walk the addition chain *OPS looking for summands that
1532 are defined by a multiplication or a real division. This results
1533 in the candidates bitmap with relevant indices into *OPS.
1534
1535 - Second we build the chains of multiplications or divisions for
1536 these candidates, counting the number of occurrences of (operand, code)
1537 pairs in all of the candidates chains.
1538
1539 - Third we sort the (operand, code) pairs by number of occurrence and
1540 process them starting with the pair with the most uses.
1541
1542 * For each such pair we walk the candidates again to build a
1543 second candidate bitmap noting all multiplication/division chains
1544 that have at least one occurrence of (operand, code).
1545
1546 * We build an alternate addition chain only covering these
1547 candidates with one (operand, code) operation removed from their
1548 multiplication/division chain.
1549
1550 * The first candidate gets replaced by the alternate addition chain
1551 multiplied/divided by the operand.
1552
1553 * All candidate chains get disabled for further processing and
1554 processing of (operand, code) pairs continues.
1555
1556 The alternate addition chains built are re-processed by the main
1557 reassociation algorithm which allows optimizing a * x * y + b * y * x
1558 to (a + b ) * x * y in one invocation of the reassociation pass. */
1559
1560 static bool
1561 undistribute_ops_list (enum tree_code opcode,
1562 vec<operand_entry *> *ops, class loop *loop)
1563 {
1564 unsigned int length = ops->length ();
1565 operand_entry *oe1;
1566 unsigned i, j;
1567 unsigned nr_candidates, nr_candidates2;
1568 sbitmap_iterator sbi0;
1569 vec<operand_entry *> *subops;
1570 bool changed = false;
1571 unsigned int next_oecount_id = 0;
1572
1573 if (length <= 1
1574 || opcode != PLUS_EXPR)
1575 return false;
1576
1577 /* Build a list of candidates to process. */
1578 auto_sbitmap candidates (length);
1579 bitmap_clear (candidates);
1580 nr_candidates = 0;
1581 FOR_EACH_VEC_ELT (*ops, i, oe1)
1582 {
1583 enum tree_code dcode;
1584 gimple *oe1def;
1585
1586 if (TREE_CODE (oe1->op) != SSA_NAME)
1587 continue;
1588 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1589 if (!is_gimple_assign (oe1def))
1590 continue;
1591 dcode = gimple_assign_rhs_code (oe1def);
1592 if ((dcode != MULT_EXPR
1593 && dcode != RDIV_EXPR)
1594 || !is_reassociable_op (oe1def, dcode, loop))
1595 continue;
1596
1597 bitmap_set_bit (candidates, i);
1598 nr_candidates++;
1599 }
1600
1601 if (nr_candidates < 2)
1602 return false;
1603
1604 if (dump_file && (dump_flags & TDF_DETAILS))
1605 {
1606 fprintf (dump_file, "searching for un-distribute opportunities ");
1607 print_generic_expr (dump_file,
1608 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1609 fprintf (dump_file, " %d\n", nr_candidates);
1610 }
1611
1612 /* Build linearized sub-operand lists and the counting table. */
1613 cvec.create (0);
1614
1615 hash_table<oecount_hasher> ctable (15);
1616
1617 /* ??? Macro arguments cannot have multi-argument template types in
1618 them. This typedef is needed to workaround that limitation. */
1619 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1620 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1621 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1622 {
1623 gimple *oedef;
1624 enum tree_code oecode;
1625 unsigned j;
1626
1627 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1628 oecode = gimple_assign_rhs_code (oedef);
1629 linearize_expr_tree (&subops[i], oedef,
1630 associative_tree_code (oecode), false);
1631
1632 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1633 {
1634 oecount c;
1635 int *slot;
1636 int idx;
1637 c.oecode = oecode;
1638 c.cnt = 1;
1639 c.id = next_oecount_id++;
1640 c.op = oe1->op;
1641 cvec.safe_push (c);
1642 idx = cvec.length () + 41;
1643 slot = ctable.find_slot (idx, INSERT);
1644 if (!*slot)
1645 {
1646 *slot = idx;
1647 }
1648 else
1649 {
1650 cvec.pop ();
1651 cvec[*slot - 42].cnt++;
1652 }
1653 }
1654 }
1655
1656 /* Sort the counting table. */
1657 cvec.qsort (oecount_cmp);
1658
1659 if (dump_file && (dump_flags & TDF_DETAILS))
1660 {
1661 oecount *c;
1662 fprintf (dump_file, "Candidates:\n");
1663 FOR_EACH_VEC_ELT (cvec, j, c)
1664 {
1665 fprintf (dump_file, " %u %s: ", c->cnt,
1666 c->oecode == MULT_EXPR
1667 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1668 print_generic_expr (dump_file, c->op);
1669 fprintf (dump_file, "\n");
1670 }
1671 }
1672
1673 /* Process the (operand, code) pairs in order of most occurrence. */
1674 auto_sbitmap candidates2 (length);
1675 while (!cvec.is_empty ())
1676 {
1677 oecount *c = &cvec.last ();
1678 if (c->cnt < 2)
1679 break;
1680
1681 /* Now collect the operands in the outer chain that contain
1682 the common operand in their inner chain. */
1683 bitmap_clear (candidates2);
1684 nr_candidates2 = 0;
1685 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1686 {
1687 gimple *oedef;
1688 enum tree_code oecode;
1689 unsigned j;
1690 tree op = (*ops)[i]->op;
1691
1692 /* If we undistributed in this chain already this may be
1693 a constant. */
1694 if (TREE_CODE (op) != SSA_NAME)
1695 continue;
1696
1697 oedef = SSA_NAME_DEF_STMT (op);
1698 oecode = gimple_assign_rhs_code (oedef);
1699 if (oecode != c->oecode)
1700 continue;
1701
1702 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1703 {
1704 if (oe1->op == c->op)
1705 {
1706 bitmap_set_bit (candidates2, i);
1707 ++nr_candidates2;
1708 break;
1709 }
1710 }
1711 }
1712
1713 if (nr_candidates2 >= 2)
1714 {
1715 operand_entry *oe1, *oe2;
1716 gimple *prod;
1717 int first = bitmap_first_set_bit (candidates2);
1718
1719 /* Build the new addition chain. */
1720 oe1 = (*ops)[first];
1721 if (dump_file && (dump_flags & TDF_DETAILS))
1722 {
1723 fprintf (dump_file, "Building (");
1724 print_generic_expr (dump_file, oe1->op);
1725 }
1726 zero_one_operation (&oe1->op, c->oecode, c->op);
1727 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1728 {
1729 gimple *sum;
1730 oe2 = (*ops)[i];
1731 if (dump_file && (dump_flags & TDF_DETAILS))
1732 {
1733 fprintf (dump_file, " + ");
1734 print_generic_expr (dump_file, oe2->op);
1735 }
1736 zero_one_operation (&oe2->op, c->oecode, c->op);
1737 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1738 oe1->op, oe2->op, opcode);
1739 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1740 oe2->rank = 0;
1741 oe1->op = gimple_get_lhs (sum);
1742 }
1743
1744 /* Apply the multiplication/division. */
1745 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1746 oe1->op, c->op, c->oecode);
1747 if (dump_file && (dump_flags & TDF_DETAILS))
1748 {
1749 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1750 print_generic_expr (dump_file, c->op);
1751 fprintf (dump_file, "\n");
1752 }
1753
1754 /* Record it in the addition chain and disable further
1755 undistribution with this op. */
1756 oe1->op = gimple_assign_lhs (prod);
1757 oe1->rank = get_rank (oe1->op);
1758 subops[first].release ();
1759
1760 changed = true;
1761 }
1762
1763 cvec.pop ();
1764 }
1765
1766 for (i = 0; i < ops->length (); ++i)
1767 subops[i].release ();
1768 free (subops);
1769 cvec.release ();
1770
1771 return changed;
1772 }
1773
1774 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1775 first: element index for each relevant BIT_FIELD_REF.
1776 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1777 typedef std::pair<unsigned, unsigned> v_info_elem;
1778 struct v_info {
1779 tree vec_type;
1780 auto_vec<v_info_elem, 32> vec;
1781 };
1782 typedef v_info *v_info_ptr;
1783
1784 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1785 static int
1786 sort_by_mach_mode (const void *p_i, const void *p_j)
1787 {
1788 const tree tr1 = *((const tree *) p_i);
1789 const tree tr2 = *((const tree *) p_j);
1790 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1791 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1792 if (mode1 > mode2)
1793 return 1;
1794 else if (mode1 < mode2)
1795 return -1;
1796 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1797 return -1;
1798 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1799 return 1;
1800 return 0;
1801 }
1802
1803 /* Cleanup hash map for VECTOR information. */
1804 static void
1805 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1806 {
1807 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1808 it != info_map.end (); ++it)
1809 {
1810 v_info_ptr info = (*it).second;
1811 delete info;
1812 (*it).second = NULL;
1813 }
1814 }
1815
1816 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1817 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1818 is transformed to
1819 Vs = (V1 + V2 + ... + Vn)
1820 Vs[0] + Vs[1] + ... + Vs[k]
1821
1822 The basic steps are listed below:
1823
1824 1) Check the addition chain *OPS by looking those summands coming from
1825 VECTOR bit_field_ref on VECTOR type. Put the information into
1826 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1827
1828 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1829 continuous, they can cover the whole VECTOR perfectly without any holes.
1830 Obtain one VECTOR list which contain candidates to be transformed.
1831
1832 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1833 candidates with same mode, build the addition statements for them and
1834 generate BIT_FIELD_REFs accordingly.
1835
1836 TODO:
1837 The current implementation requires the whole VECTORs should be fully
1838 covered, but it can be extended to support partial, checking adjacent
1839 but not fill the whole, it may need some cost model to define the
1840 boundary to do or not.
1841 */
1842 static bool
1843 undistribute_bitref_for_vector (enum tree_code opcode,
1844 vec<operand_entry *> *ops, struct loop *loop)
1845 {
1846 if (ops->length () <= 1)
1847 return false;
1848
1849 if (opcode != PLUS_EXPR
1850 && opcode != MULT_EXPR
1851 && opcode != BIT_XOR_EXPR
1852 && opcode != BIT_IOR_EXPR
1853 && opcode != BIT_AND_EXPR)
1854 return false;
1855
1856 hash_map<tree, v_info_ptr> v_info_map;
1857 operand_entry *oe1;
1858 unsigned i;
1859
1860 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1861 information into map. */
1862 FOR_EACH_VEC_ELT (*ops, i, oe1)
1863 {
1864 enum tree_code dcode;
1865 gimple *oe1def;
1866
1867 if (TREE_CODE (oe1->op) != SSA_NAME)
1868 continue;
1869 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1870 if (!is_gimple_assign (oe1def))
1871 continue;
1872 dcode = gimple_assign_rhs_code (oe1def);
1873 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1874 continue;
1875
1876 tree rhs = gimple_assign_rhs1 (oe1def);
1877 tree vec = TREE_OPERAND (rhs, 0);
1878 tree vec_type = TREE_TYPE (vec);
1879
1880 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1881 continue;
1882
1883 /* Ignore it if target machine can't support this VECTOR type. */
1884 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1885 continue;
1886
1887 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1888 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1889 continue;
1890
1891 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1892 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1893 continue;
1894
1895 /* The type of BIT_FIELD_REF might not be equal to the element type of
1896 the vector. We want to use a vector type with element type the
1897 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1898 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1899 {
1900 machine_mode simd_mode;
1901 unsigned HOST_WIDE_INT size, nunits;
1902 unsigned HOST_WIDE_INT elem_size
1903 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1904 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1905 continue;
1906 if (size <= elem_size || (size % elem_size) != 0)
1907 continue;
1908 nunits = size / elem_size;
1909 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1910 nunits).exists (&simd_mode))
1911 continue;
1912 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1913
1914 /* Ignore it if target machine can't support this VECTOR type. */
1915 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1916 continue;
1917
1918 /* Check const vector type, constrain BIT_FIELD_REF offset and
1919 size. */
1920 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1921 continue;
1922
1923 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
1924 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
1925 continue;
1926 }
1927
1928 tree elem_type = TREE_TYPE (vec_type);
1929 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
1930 if (maybe_ne (bit_field_size (rhs), elem_size))
1931 continue;
1932
1933 unsigned idx;
1934 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
1935 continue;
1936
1937 /* Ignore it if target machine can't support this type of VECTOR
1938 operation. */
1939 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
1940 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
1941 continue;
1942
1943 bool existed;
1944 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
1945 if (!existed)
1946 {
1947 info = new v_info;
1948 info->vec_type = vec_type;
1949 }
1950 else if (!types_compatible_p (vec_type, info->vec_type))
1951 continue;
1952 info->vec.safe_push (std::make_pair (idx, i));
1953 }
1954
1955 /* At least two VECTOR to combine. */
1956 if (v_info_map.elements () <= 1)
1957 {
1958 cleanup_vinfo_map (v_info_map);
1959 return false;
1960 }
1961
1962 /* Verify all VECTOR candidates by checking two conditions:
1963 1) sorted offsets are adjacent, no holes.
1964 2) can fill the whole VECTOR perfectly.
1965 And add the valid candidates to a vector for further handling. */
1966 auto_vec<tree> valid_vecs (v_info_map.elements ());
1967 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
1968 it != v_info_map.end (); ++it)
1969 {
1970 tree cand_vec = (*it).first;
1971 v_info_ptr cand_info = (*it).second;
1972 unsigned int num_elems
1973 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
1974 if (cand_info->vec.length () != num_elems)
1975 continue;
1976 sbitmap holes = sbitmap_alloc (num_elems);
1977 bitmap_ones (holes);
1978 bool valid = true;
1979 v_info_elem *curr;
1980 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
1981 {
1982 if (!bitmap_bit_p (holes, curr->first))
1983 {
1984 valid = false;
1985 break;
1986 }
1987 else
1988 bitmap_clear_bit (holes, curr->first);
1989 }
1990 if (valid && bitmap_empty_p (holes))
1991 valid_vecs.quick_push (cand_vec);
1992 sbitmap_free (holes);
1993 }
1994
1995 /* At least two VECTOR to combine. */
1996 if (valid_vecs.length () <= 1)
1997 {
1998 cleanup_vinfo_map (v_info_map);
1999 return false;
2000 }
2001
2002 valid_vecs.qsort (sort_by_mach_mode);
2003 /* Go through all candidates by machine mode order, query the mode_to_total
2004 to get the total number for each mode and skip the single one. */
2005 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2006 {
2007 tree tvec = valid_vecs[i];
2008 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2009
2010 /* Skip modes with only a single candidate. */
2011 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2012 continue;
2013
2014 unsigned int idx, j;
2015 gimple *sum = NULL;
2016 tree sum_vec = tvec;
2017 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2018 v_info_elem *elem;
2019 tree vec_type = info_ptr->vec_type;
2020
2021 /* Build the sum for all candidates with same mode. */
2022 do
2023 {
2024 sum = build_and_add_sum (vec_type, sum_vec,
2025 valid_vecs[i + 1], opcode);
2026 if (!useless_type_conversion_p (vec_type,
2027 TREE_TYPE (valid_vecs[i + 1])))
2028 {
2029 /* Update the operands only after build_and_add_sum,
2030 so that we don't have to repeat the placement algorithm
2031 of build_and_add_sum. */
2032 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2033 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2034 valid_vecs[i + 1]);
2035 tree lhs = make_ssa_name (vec_type);
2036 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2037 gimple_set_uid (g, gimple_uid (sum));
2038 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2039 gimple_assign_set_rhs2 (sum, lhs);
2040 if (sum_vec == tvec)
2041 {
2042 vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2043 lhs = make_ssa_name (vec_type);
2044 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2045 gimple_set_uid (g, gimple_uid (sum));
2046 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2047 gimple_assign_set_rhs1 (sum, lhs);
2048 }
2049 update_stmt (sum);
2050 }
2051 sum_vec = gimple_get_lhs (sum);
2052 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2053 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2054 /* Update those related ops of current candidate VECTOR. */
2055 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2056 {
2057 idx = elem->second;
2058 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2059 /* Set this then op definition will get DCEd later. */
2060 gimple_set_visited (def, true);
2061 if (opcode == PLUS_EXPR
2062 || opcode == BIT_XOR_EXPR
2063 || opcode == BIT_IOR_EXPR)
2064 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2065 else if (opcode == MULT_EXPR)
2066 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2067 else
2068 {
2069 gcc_assert (opcode == BIT_AND_EXPR);
2070 (*ops)[idx]->op
2071 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2072 }
2073 (*ops)[idx]->rank = 0;
2074 }
2075 if (dump_file && (dump_flags & TDF_DETAILS))
2076 {
2077 fprintf (dump_file, "Generating addition -> ");
2078 print_gimple_stmt (dump_file, sum, 0);
2079 }
2080 i++;
2081 }
2082 while ((i < valid_vecs.length () - 1)
2083 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2084
2085 /* Referring to first valid VECTOR with this mode, generate the
2086 BIT_FIELD_REF statements accordingly. */
2087 info_ptr = *(v_info_map.get (tvec));
2088 gcc_assert (sum);
2089 tree elem_type = TREE_TYPE (vec_type);
2090 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2091 {
2092 idx = elem->second;
2093 tree dst = make_ssa_name (elem_type);
2094 tree pos = bitsize_int (elem->first
2095 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2096 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2097 TYPE_SIZE (elem_type), pos);
2098 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2099 insert_stmt_after (gs, sum);
2100 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2101 /* Set this then op definition will get DCEd later. */
2102 gimple_set_visited (def, true);
2103 (*ops)[idx]->op = gimple_assign_lhs (gs);
2104 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2105 if (dump_file && (dump_flags & TDF_DETAILS))
2106 {
2107 fprintf (dump_file, "Generating bit_field_ref -> ");
2108 print_gimple_stmt (dump_file, gs, 0);
2109 }
2110 }
2111 }
2112
2113 if (dump_file && (dump_flags & TDF_DETAILS))
2114 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2115
2116 cleanup_vinfo_map (v_info_map);
2117
2118 return true;
2119 }
2120
2121 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2122 expression, examine the other OPS to see if any of them are comparisons
2123 of the same values, which we may be able to combine or eliminate.
2124 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2125
2126 static bool
2127 eliminate_redundant_comparison (enum tree_code opcode,
2128 vec<operand_entry *> *ops,
2129 unsigned int currindex,
2130 operand_entry *curr)
2131 {
2132 tree op1, op2;
2133 enum tree_code lcode, rcode;
2134 gimple *def1, *def2;
2135 int i;
2136 operand_entry *oe;
2137
2138 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2139 return false;
2140
2141 /* Check that CURR is a comparison. */
2142 if (TREE_CODE (curr->op) != SSA_NAME)
2143 return false;
2144 def1 = SSA_NAME_DEF_STMT (curr->op);
2145 if (!is_gimple_assign (def1))
2146 return false;
2147 lcode = gimple_assign_rhs_code (def1);
2148 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2149 return false;
2150 op1 = gimple_assign_rhs1 (def1);
2151 op2 = gimple_assign_rhs2 (def1);
2152
2153 /* Now look for a similar comparison in the remaining OPS. */
2154 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2155 {
2156 tree t;
2157
2158 if (TREE_CODE (oe->op) != SSA_NAME)
2159 continue;
2160 def2 = SSA_NAME_DEF_STMT (oe->op);
2161 if (!is_gimple_assign (def2))
2162 continue;
2163 rcode = gimple_assign_rhs_code (def2);
2164 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2165 continue;
2166
2167 /* If we got here, we have a match. See if we can combine the
2168 two comparisons. */
2169 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2170 if (opcode == BIT_IOR_EXPR)
2171 t = maybe_fold_or_comparisons (type,
2172 lcode, op1, op2,
2173 rcode, gimple_assign_rhs1 (def2),
2174 gimple_assign_rhs2 (def2));
2175 else
2176 t = maybe_fold_and_comparisons (type,
2177 lcode, op1, op2,
2178 rcode, gimple_assign_rhs1 (def2),
2179 gimple_assign_rhs2 (def2));
2180 if (!t)
2181 continue;
2182
2183 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2184 always give us a boolean_type_node value back. If the original
2185 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2186 we need to convert. */
2187 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2188 t = fold_convert (TREE_TYPE (curr->op), t);
2189
2190 if (TREE_CODE (t) != INTEGER_CST
2191 && !operand_equal_p (t, curr->op, 0))
2192 {
2193 enum tree_code subcode;
2194 tree newop1, newop2;
2195 if (!COMPARISON_CLASS_P (t))
2196 continue;
2197 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2198 STRIP_USELESS_TYPE_CONVERSION (newop1);
2199 STRIP_USELESS_TYPE_CONVERSION (newop2);
2200 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2201 continue;
2202 }
2203
2204 if (dump_file && (dump_flags & TDF_DETAILS))
2205 {
2206 fprintf (dump_file, "Equivalence: ");
2207 print_generic_expr (dump_file, curr->op);
2208 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2209 print_generic_expr (dump_file, oe->op);
2210 fprintf (dump_file, " -> ");
2211 print_generic_expr (dump_file, t);
2212 fprintf (dump_file, "\n");
2213 }
2214
2215 /* Now we can delete oe, as it has been subsumed by the new combined
2216 expression t. */
2217 ops->ordered_remove (i);
2218 reassociate_stats.ops_eliminated ++;
2219
2220 /* If t is the same as curr->op, we're done. Otherwise we must
2221 replace curr->op with t. Special case is if we got a constant
2222 back, in which case we add it to the end instead of in place of
2223 the current entry. */
2224 if (TREE_CODE (t) == INTEGER_CST)
2225 {
2226 ops->ordered_remove (currindex);
2227 add_to_ops_vec (ops, t);
2228 }
2229 else if (!operand_equal_p (t, curr->op, 0))
2230 {
2231 gimple *sum;
2232 enum tree_code subcode;
2233 tree newop1;
2234 tree newop2;
2235 gcc_assert (COMPARISON_CLASS_P (t));
2236 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2237 STRIP_USELESS_TYPE_CONVERSION (newop1);
2238 STRIP_USELESS_TYPE_CONVERSION (newop2);
2239 gcc_checking_assert (is_gimple_val (newop1)
2240 && is_gimple_val (newop2));
2241 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2242 curr->op = gimple_get_lhs (sum);
2243 }
2244 return true;
2245 }
2246
2247 return false;
2248 }
2249
2250
2251 /* Transform repeated addition of same values into multiply with
2252 constant. */
2253 static bool
2254 transform_add_to_multiply (vec<operand_entry *> *ops)
2255 {
2256 operand_entry *oe;
2257 tree op = NULL_TREE;
2258 int j;
2259 int i, start = -1, end = 0, count = 0;
2260 auto_vec<std::pair <int, int> > indxs;
2261 bool changed = false;
2262
2263 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2264 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2265 || !flag_unsafe_math_optimizations))
2266 return false;
2267
2268 /* Look for repeated operands. */
2269 FOR_EACH_VEC_ELT (*ops, i, oe)
2270 {
2271 if (start == -1)
2272 {
2273 count = 1;
2274 op = oe->op;
2275 start = i;
2276 }
2277 else if (operand_equal_p (oe->op, op, 0))
2278 {
2279 count++;
2280 end = i;
2281 }
2282 else
2283 {
2284 if (count > 1)
2285 indxs.safe_push (std::make_pair (start, end));
2286 count = 1;
2287 op = oe->op;
2288 start = i;
2289 }
2290 }
2291
2292 if (count > 1)
2293 indxs.safe_push (std::make_pair (start, end));
2294
2295 for (j = indxs.length () - 1; j >= 0; --j)
2296 {
2297 /* Convert repeated operand addition to multiplication. */
2298 start = indxs[j].first;
2299 end = indxs[j].second;
2300 op = (*ops)[start]->op;
2301 count = end - start + 1;
2302 for (i = end; i >= start; --i)
2303 ops->unordered_remove (i);
2304 tree tmp = make_ssa_name (TREE_TYPE (op));
2305 tree cst = build_int_cst (integer_type_node, count);
2306 gassign *mul_stmt
2307 = gimple_build_assign (tmp, MULT_EXPR,
2308 op, fold_convert (TREE_TYPE (op), cst));
2309 gimple_set_visited (mul_stmt, true);
2310 add_to_ops_vec (ops, tmp, mul_stmt);
2311 changed = true;
2312 }
2313
2314 return changed;
2315 }
2316
2317
2318 /* Perform various identities and other optimizations on the list of
2319 operand entries, stored in OPS. The tree code for the binary
2320 operation between all the operands is OPCODE. */
2321
2322 static void
2323 optimize_ops_list (enum tree_code opcode,
2324 vec<operand_entry *> *ops)
2325 {
2326 unsigned int length = ops->length ();
2327 unsigned int i;
2328 operand_entry *oe;
2329 operand_entry *oelast = NULL;
2330 bool iterate = false;
2331
2332 if (length == 1)
2333 return;
2334
2335 oelast = ops->last ();
2336
2337 /* If the last two are constants, pop the constants off, merge them
2338 and try the next two. */
2339 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2340 {
2341 operand_entry *oelm1 = (*ops)[length - 2];
2342
2343 if (oelm1->rank == 0
2344 && is_gimple_min_invariant (oelm1->op)
2345 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2346 TREE_TYPE (oelast->op)))
2347 {
2348 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2349 oelm1->op, oelast->op);
2350
2351 if (folded && is_gimple_min_invariant (folded))
2352 {
2353 if (dump_file && (dump_flags & TDF_DETAILS))
2354 fprintf (dump_file, "Merging constants\n");
2355
2356 ops->pop ();
2357 ops->pop ();
2358
2359 add_to_ops_vec (ops, folded);
2360 reassociate_stats.constants_eliminated++;
2361
2362 optimize_ops_list (opcode, ops);
2363 return;
2364 }
2365 }
2366 }
2367
2368 eliminate_using_constants (opcode, ops);
2369 oelast = NULL;
2370
2371 for (i = 0; ops->iterate (i, &oe);)
2372 {
2373 bool done = false;
2374
2375 if (eliminate_not_pairs (opcode, ops, i, oe))
2376 return;
2377 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2378 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2379 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2380 {
2381 if (done)
2382 return;
2383 iterate = true;
2384 oelast = NULL;
2385 continue;
2386 }
2387 oelast = oe;
2388 i++;
2389 }
2390
2391 if (iterate)
2392 optimize_ops_list (opcode, ops);
2393 }
2394
2395 /* The following functions are subroutines to optimize_range_tests and allow
2396 it to try to change a logical combination of comparisons into a range
2397 test.
2398
2399 For example, both
2400 X == 2 || X == 5 || X == 3 || X == 4
2401 and
2402 X >= 2 && X <= 5
2403 are converted to
2404 (unsigned) (X - 2) <= 3
2405
2406 For more information see comments above fold_test_range in fold-const.c,
2407 this implementation is for GIMPLE. */
2408
2409 struct range_entry
2410 {
2411 tree exp;
2412 tree low;
2413 tree high;
2414 bool in_p;
2415 bool strict_overflow_p;
2416 unsigned int idx, next;
2417 };
2418
2419 /* This is similar to make_range in fold-const.c, but on top of
2420 GIMPLE instead of trees. If EXP is non-NULL, it should be
2421 an SSA_NAME and STMT argument is ignored, otherwise STMT
2422 argument should be a GIMPLE_COND. */
2423
2424 static void
2425 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2426 {
2427 int in_p;
2428 tree low, high;
2429 bool is_bool, strict_overflow_p;
2430
2431 r->exp = NULL_TREE;
2432 r->in_p = false;
2433 r->strict_overflow_p = false;
2434 r->low = NULL_TREE;
2435 r->high = NULL_TREE;
2436 if (exp != NULL_TREE
2437 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2438 return;
2439
2440 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2441 and see if we can refine the range. Some of the cases below may not
2442 happen, but it doesn't seem worth worrying about this. We "continue"
2443 the outer loop when we've changed something; otherwise we "break"
2444 the switch, which will "break" the while. */
2445 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2446 high = low;
2447 in_p = 0;
2448 strict_overflow_p = false;
2449 is_bool = false;
2450 if (exp == NULL_TREE)
2451 is_bool = true;
2452 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2453 {
2454 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2455 is_bool = true;
2456 else
2457 return;
2458 }
2459 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2460 is_bool = true;
2461
2462 while (1)
2463 {
2464 enum tree_code code;
2465 tree arg0, arg1, exp_type;
2466 tree nexp;
2467 location_t loc;
2468
2469 if (exp != NULL_TREE)
2470 {
2471 if (TREE_CODE (exp) != SSA_NAME
2472 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2473 break;
2474
2475 stmt = SSA_NAME_DEF_STMT (exp);
2476 if (!is_gimple_assign (stmt))
2477 break;
2478
2479 code = gimple_assign_rhs_code (stmt);
2480 arg0 = gimple_assign_rhs1 (stmt);
2481 arg1 = gimple_assign_rhs2 (stmt);
2482 exp_type = TREE_TYPE (exp);
2483 }
2484 else
2485 {
2486 code = gimple_cond_code (stmt);
2487 arg0 = gimple_cond_lhs (stmt);
2488 arg1 = gimple_cond_rhs (stmt);
2489 exp_type = boolean_type_node;
2490 }
2491
2492 if (TREE_CODE (arg0) != SSA_NAME
2493 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2494 break;
2495 loc = gimple_location (stmt);
2496 switch (code)
2497 {
2498 case BIT_NOT_EXPR:
2499 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2500 /* Ensure the range is either +[-,0], +[0,0],
2501 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2502 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2503 or similar expression of unconditional true or
2504 false, it should not be negated. */
2505 && ((high && integer_zerop (high))
2506 || (low && integer_onep (low))))
2507 {
2508 in_p = !in_p;
2509 exp = arg0;
2510 continue;
2511 }
2512 break;
2513 case SSA_NAME:
2514 exp = arg0;
2515 continue;
2516 CASE_CONVERT:
2517 if (is_bool)
2518 {
2519 if ((TYPE_PRECISION (exp_type) == 1
2520 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2521 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2522 return;
2523 }
2524 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2525 {
2526 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2527 is_bool = true;
2528 else
2529 return;
2530 }
2531 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2532 is_bool = true;
2533 goto do_default;
2534 case EQ_EXPR:
2535 case NE_EXPR:
2536 case LT_EXPR:
2537 case LE_EXPR:
2538 case GE_EXPR:
2539 case GT_EXPR:
2540 is_bool = true;
2541 /* FALLTHRU */
2542 default:
2543 if (!is_bool)
2544 return;
2545 do_default:
2546 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2547 &low, &high, &in_p,
2548 &strict_overflow_p);
2549 if (nexp != NULL_TREE)
2550 {
2551 exp = nexp;
2552 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2553 continue;
2554 }
2555 break;
2556 }
2557 break;
2558 }
2559 if (is_bool)
2560 {
2561 r->exp = exp;
2562 r->in_p = in_p;
2563 r->low = low;
2564 r->high = high;
2565 r->strict_overflow_p = strict_overflow_p;
2566 }
2567 }
2568
2569 /* Comparison function for qsort. Sort entries
2570 without SSA_NAME exp first, then with SSA_NAMEs sorted
2571 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2572 by increasing ->low and if ->low is the same, by increasing
2573 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2574 maximum. */
2575
2576 static int
2577 range_entry_cmp (const void *a, const void *b)
2578 {
2579 const struct range_entry *p = (const struct range_entry *) a;
2580 const struct range_entry *q = (const struct range_entry *) b;
2581
2582 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2583 {
2584 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2585 {
2586 /* Group range_entries for the same SSA_NAME together. */
2587 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2588 return -1;
2589 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2590 return 1;
2591 /* If ->low is different, NULL low goes first, then by
2592 ascending low. */
2593 if (p->low != NULL_TREE)
2594 {
2595 if (q->low != NULL_TREE)
2596 {
2597 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2598 p->low, q->low);
2599 if (tem && integer_onep (tem))
2600 return -1;
2601 tem = fold_binary (GT_EXPR, boolean_type_node,
2602 p->low, q->low);
2603 if (tem && integer_onep (tem))
2604 return 1;
2605 }
2606 else
2607 return 1;
2608 }
2609 else if (q->low != NULL_TREE)
2610 return -1;
2611 /* If ->high is different, NULL high goes last, before that by
2612 ascending high. */
2613 if (p->high != NULL_TREE)
2614 {
2615 if (q->high != NULL_TREE)
2616 {
2617 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2618 p->high, q->high);
2619 if (tem && integer_onep (tem))
2620 return -1;
2621 tem = fold_binary (GT_EXPR, boolean_type_node,
2622 p->high, q->high);
2623 if (tem && integer_onep (tem))
2624 return 1;
2625 }
2626 else
2627 return -1;
2628 }
2629 else if (q->high != NULL_TREE)
2630 return 1;
2631 /* If both ranges are the same, sort below by ascending idx. */
2632 }
2633 else
2634 return 1;
2635 }
2636 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2637 return -1;
2638
2639 if (p->idx < q->idx)
2640 return -1;
2641 else
2642 {
2643 gcc_checking_assert (p->idx > q->idx);
2644 return 1;
2645 }
2646 }
2647
2648 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2649 insert needed statements BEFORE or after GSI. */
2650
2651 static tree
2652 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2653 {
2654 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2655 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2656 if (TREE_CODE (ret) != SSA_NAME)
2657 {
2658 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2659 if (before)
2660 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2661 else
2662 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2663 ret = gimple_assign_lhs (g);
2664 }
2665 return ret;
2666 }
2667
2668 /* Helper routine of optimize_range_test.
2669 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2670 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2671 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2672 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2673 an array of COUNT pointers to other ranges. Return
2674 true if the range merge has been successful.
2675 If OPCODE is ERROR_MARK, this is called from within
2676 maybe_optimize_range_tests and is performing inter-bb range optimization.
2677 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2678 oe->rank. */
2679
2680 static bool
2681 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2682 struct range_entry **otherrangep,
2683 unsigned int count, enum tree_code opcode,
2684 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2685 bool in_p, tree low, tree high, bool strict_overflow_p)
2686 {
2687 operand_entry *oe = (*ops)[range->idx];
2688 tree op = oe->op;
2689 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2690 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2691 location_t loc = gimple_location (stmt);
2692 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2693 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2694 in_p, low, high);
2695 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2696 gimple_stmt_iterator gsi;
2697 unsigned int i, uid;
2698
2699 if (tem == NULL_TREE)
2700 return false;
2701
2702 /* If op is default def SSA_NAME, there is no place to insert the
2703 new comparison. Give up, unless we can use OP itself as the
2704 range test. */
2705 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2706 {
2707 if (op == range->exp
2708 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2709 || TREE_CODE (optype) == BOOLEAN_TYPE)
2710 && (op == tem
2711 || (TREE_CODE (tem) == EQ_EXPR
2712 && TREE_OPERAND (tem, 0) == op
2713 && integer_onep (TREE_OPERAND (tem, 1))))
2714 && opcode != BIT_IOR_EXPR
2715 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2716 {
2717 stmt = NULL;
2718 tem = op;
2719 }
2720 else
2721 return false;
2722 }
2723
2724 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2725 warning_at (loc, OPT_Wstrict_overflow,
2726 "assuming signed overflow does not occur "
2727 "when simplifying range test");
2728
2729 if (dump_file && (dump_flags & TDF_DETAILS))
2730 {
2731 struct range_entry *r;
2732 fprintf (dump_file, "Optimizing range tests ");
2733 print_generic_expr (dump_file, range->exp);
2734 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2735 print_generic_expr (dump_file, range->low);
2736 fprintf (dump_file, ", ");
2737 print_generic_expr (dump_file, range->high);
2738 fprintf (dump_file, "]");
2739 for (i = 0; i < count; i++)
2740 {
2741 if (otherrange)
2742 r = otherrange + i;
2743 else
2744 r = otherrangep[i];
2745 if (r->exp
2746 && r->exp != range->exp
2747 && TREE_CODE (r->exp) == SSA_NAME)
2748 {
2749 fprintf (dump_file, " and ");
2750 print_generic_expr (dump_file, r->exp);
2751 }
2752 else
2753 fprintf (dump_file, " and");
2754 fprintf (dump_file, " %c[", r->in_p ? '+' : '-');
2755 print_generic_expr (dump_file, r->low);
2756 fprintf (dump_file, ", ");
2757 print_generic_expr (dump_file, r->high);
2758 fprintf (dump_file, "]");
2759 }
2760 fprintf (dump_file, "\n into ");
2761 print_generic_expr (dump_file, tem);
2762 fprintf (dump_file, "\n");
2763 }
2764
2765 if (opcode == BIT_IOR_EXPR
2766 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2767 tem = invert_truthvalue_loc (loc, tem);
2768
2769 tem = fold_convert_loc (loc, optype, tem);
2770 if (stmt)
2771 {
2772 gsi = gsi_for_stmt (stmt);
2773 uid = gimple_uid (stmt);
2774 }
2775 else
2776 {
2777 gsi = gsi_none ();
2778 uid = 0;
2779 }
2780 if (stmt == NULL)
2781 gcc_checking_assert (tem == op);
2782 /* In rare cases range->exp can be equal to lhs of stmt.
2783 In that case we have to insert after the stmt rather then before
2784 it. If stmt is a PHI, insert it at the start of the basic block. */
2785 else if (op != range->exp)
2786 {
2787 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2788 tem = force_into_ssa_name (&gsi, tem, true);
2789 gsi_prev (&gsi);
2790 }
2791 else if (gimple_code (stmt) != GIMPLE_PHI)
2792 {
2793 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2794 tem = force_into_ssa_name (&gsi, tem, false);
2795 }
2796 else
2797 {
2798 gsi = gsi_after_labels (gimple_bb (stmt));
2799 if (!gsi_end_p (gsi))
2800 uid = gimple_uid (gsi_stmt (gsi));
2801 else
2802 {
2803 gsi = gsi_start_bb (gimple_bb (stmt));
2804 uid = 1;
2805 while (!gsi_end_p (gsi))
2806 {
2807 uid = gimple_uid (gsi_stmt (gsi));
2808 gsi_next (&gsi);
2809 }
2810 }
2811 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2812 tem = force_into_ssa_name (&gsi, tem, true);
2813 if (gsi_end_p (gsi))
2814 gsi = gsi_last_bb (gimple_bb (stmt));
2815 else
2816 gsi_prev (&gsi);
2817 }
2818 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2819 if (gimple_uid (gsi_stmt (gsi)))
2820 break;
2821 else
2822 gimple_set_uid (gsi_stmt (gsi), uid);
2823
2824 oe->op = tem;
2825 range->exp = exp;
2826 range->low = low;
2827 range->high = high;
2828 range->in_p = in_p;
2829 range->strict_overflow_p = false;
2830
2831 for (i = 0; i < count; i++)
2832 {
2833 if (otherrange)
2834 range = otherrange + i;
2835 else
2836 range = otherrangep[i];
2837 oe = (*ops)[range->idx];
2838 /* Now change all the other range test immediate uses, so that
2839 those tests will be optimized away. */
2840 if (opcode == ERROR_MARK)
2841 {
2842 if (oe->op)
2843 oe->op = build_int_cst (TREE_TYPE (oe->op),
2844 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2845 else
2846 oe->op = (oe->rank == BIT_IOR_EXPR
2847 ? boolean_false_node : boolean_true_node);
2848 }
2849 else
2850 oe->op = error_mark_node;
2851 range->exp = NULL_TREE;
2852 range->low = NULL_TREE;
2853 range->high = NULL_TREE;
2854 }
2855 return true;
2856 }
2857
2858 /* Optimize X == CST1 || X == CST2
2859 if popcount (CST1 ^ CST2) == 1 into
2860 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2861 Similarly for ranges. E.g.
2862 X != 2 && X != 3 && X != 10 && X != 11
2863 will be transformed by the previous optimization into
2864 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2865 and this loop can transform that into
2866 !(((X & ~8) - 2U) <= 1U). */
2867
2868 static bool
2869 optimize_range_tests_xor (enum tree_code opcode, tree type,
2870 tree lowi, tree lowj, tree highi, tree highj,
2871 vec<operand_entry *> *ops,
2872 struct range_entry *rangei,
2873 struct range_entry *rangej)
2874 {
2875 tree lowxor, highxor, tem, exp;
2876 /* Check lowi ^ lowj == highi ^ highj and
2877 popcount (lowi ^ lowj) == 1. */
2878 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2879 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2880 return false;
2881 if (!integer_pow2p (lowxor))
2882 return false;
2883 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2884 if (!tree_int_cst_equal (lowxor, highxor))
2885 return false;
2886
2887 exp = rangei->exp;
2888 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2889 int prec = GET_MODE_PRECISION (mode);
2890 if (TYPE_PRECISION (type) < prec
2891 || (wi::to_wide (TYPE_MIN_VALUE (type))
2892 != wi::min_value (prec, TYPE_SIGN (type)))
2893 || (wi::to_wide (TYPE_MAX_VALUE (type))
2894 != wi::max_value (prec, TYPE_SIGN (type))))
2895 {
2896 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
2897 exp = fold_convert (type, exp);
2898 lowxor = fold_convert (type, lowxor);
2899 lowi = fold_convert (type, lowi);
2900 highi = fold_convert (type, highi);
2901 }
2902 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2903 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
2904 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2905 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2906 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2907 NULL, rangei->in_p, lowj, highj,
2908 rangei->strict_overflow_p
2909 || rangej->strict_overflow_p))
2910 return true;
2911 return false;
2912 }
2913
2914 /* Optimize X == CST1 || X == CST2
2915 if popcount (CST2 - CST1) == 1 into
2916 ((X - CST1) & ~(CST2 - CST1)) == 0.
2917 Similarly for ranges. E.g.
2918 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2919 || X == 75 || X == 45
2920 will be transformed by the previous optimization into
2921 (X - 43U) <= 3U || (X - 75U) <= 3U
2922 and this loop can transform that into
2923 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2924 static bool
2925 optimize_range_tests_diff (enum tree_code opcode, tree type,
2926 tree lowi, tree lowj, tree highi, tree highj,
2927 vec<operand_entry *> *ops,
2928 struct range_entry *rangei,
2929 struct range_entry *rangej)
2930 {
2931 tree tem1, tem2, mask;
2932 /* Check highi - lowi == highj - lowj. */
2933 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2934 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2935 return false;
2936 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2937 if (!tree_int_cst_equal (tem1, tem2))
2938 return false;
2939 /* Check popcount (lowj - lowi) == 1. */
2940 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2941 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2942 return false;
2943 if (!integer_pow2p (tem1))
2944 return false;
2945
2946 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2947 int prec = GET_MODE_PRECISION (mode);
2948 if (TYPE_PRECISION (type) < prec
2949 || (wi::to_wide (TYPE_MIN_VALUE (type))
2950 != wi::min_value (prec, TYPE_SIGN (type)))
2951 || (wi::to_wide (TYPE_MAX_VALUE (type))
2952 != wi::max_value (prec, TYPE_SIGN (type))))
2953 type = build_nonstandard_integer_type (prec, 1);
2954 else
2955 type = unsigned_type_for (type);
2956 tem1 = fold_convert (type, tem1);
2957 tem2 = fold_convert (type, tem2);
2958 lowi = fold_convert (type, lowi);
2959 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2960 tem1 = fold_build2 (MINUS_EXPR, type,
2961 fold_convert (type, rangei->exp), lowi);
2962 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2963 lowj = build_int_cst (type, 0);
2964 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2965 NULL, rangei->in_p, lowj, tem2,
2966 rangei->strict_overflow_p
2967 || rangej->strict_overflow_p))
2968 return true;
2969 return false;
2970 }
2971
2972 /* It does some common checks for function optimize_range_tests_xor and
2973 optimize_range_tests_diff.
2974 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2975 Else it calls optimize_range_tests_diff. */
2976
2977 static bool
2978 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2979 bool optimize_xor, vec<operand_entry *> *ops,
2980 struct range_entry *ranges)
2981 {
2982 int i, j;
2983 bool any_changes = false;
2984 for (i = first; i < length; i++)
2985 {
2986 tree lowi, highi, lowj, highj, type, tem;
2987
2988 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2989 continue;
2990 type = TREE_TYPE (ranges[i].exp);
2991 if (!INTEGRAL_TYPE_P (type))
2992 continue;
2993 lowi = ranges[i].low;
2994 if (lowi == NULL_TREE)
2995 lowi = TYPE_MIN_VALUE (type);
2996 highi = ranges[i].high;
2997 if (highi == NULL_TREE)
2998 continue;
2999 for (j = i + 1; j < length && j < i + 64; j++)
3000 {
3001 bool changes;
3002 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3003 continue;
3004 lowj = ranges[j].low;
3005 if (lowj == NULL_TREE)
3006 continue;
3007 highj = ranges[j].high;
3008 if (highj == NULL_TREE)
3009 highj = TYPE_MAX_VALUE (type);
3010 /* Check lowj > highi. */
3011 tem = fold_binary (GT_EXPR, boolean_type_node,
3012 lowj, highi);
3013 if (tem == NULL_TREE || !integer_onep (tem))
3014 continue;
3015 if (optimize_xor)
3016 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3017 highi, highj, ops,
3018 ranges + i, ranges + j);
3019 else
3020 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3021 highi, highj, ops,
3022 ranges + i, ranges + j);
3023 if (changes)
3024 {
3025 any_changes = true;
3026 break;
3027 }
3028 }
3029 }
3030 return any_changes;
3031 }
3032
3033 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3034 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3035 EXP on success, NULL otherwise. */
3036
3037 static tree
3038 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3039 wide_int *mask, tree *totallowp)
3040 {
3041 tree tem = int_const_binop (MINUS_EXPR, high, low);
3042 if (tem == NULL_TREE
3043 || TREE_CODE (tem) != INTEGER_CST
3044 || TREE_OVERFLOW (tem)
3045 || tree_int_cst_sgn (tem) == -1
3046 || compare_tree_int (tem, prec) != -1)
3047 return NULL_TREE;
3048
3049 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3050 *mask = wi::shifted_mask (0, max, false, prec);
3051 if (TREE_CODE (exp) == BIT_AND_EXPR
3052 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3053 {
3054 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3055 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3056 if (wi::popcount (msk) == 1
3057 && wi::ltu_p (msk, prec - max))
3058 {
3059 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3060 max += msk.to_uhwi ();
3061 exp = TREE_OPERAND (exp, 0);
3062 if (integer_zerop (low)
3063 && TREE_CODE (exp) == PLUS_EXPR
3064 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3065 {
3066 tree ret = TREE_OPERAND (exp, 0);
3067 STRIP_NOPS (ret);
3068 widest_int bias
3069 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3070 TYPE_PRECISION (TREE_TYPE (low))));
3071 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3072 if (totallowp)
3073 {
3074 *totallowp = tbias;
3075 return ret;
3076 }
3077 else if (!tree_int_cst_lt (totallow, tbias))
3078 return NULL_TREE;
3079 bias = wi::to_widest (tbias);
3080 bias -= wi::to_widest (totallow);
3081 if (bias >= 0 && bias < prec - max)
3082 {
3083 *mask = wi::lshift (*mask, bias);
3084 return ret;
3085 }
3086 }
3087 }
3088 }
3089 if (totallowp)
3090 return exp;
3091 if (!tree_int_cst_lt (totallow, low))
3092 return exp;
3093 tem = int_const_binop (MINUS_EXPR, low, totallow);
3094 if (tem == NULL_TREE
3095 || TREE_CODE (tem) != INTEGER_CST
3096 || TREE_OVERFLOW (tem)
3097 || compare_tree_int (tem, prec - max) == 1)
3098 return NULL_TREE;
3099
3100 *mask = wi::lshift (*mask, wi::to_widest (tem));
3101 return exp;
3102 }
3103
3104 /* Attempt to optimize small range tests using bit test.
3105 E.g.
3106 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3107 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3108 has been by earlier optimizations optimized into:
3109 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3110 As all the 43 through 82 range is less than 64 numbers,
3111 for 64-bit word targets optimize that into:
3112 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3113
3114 static bool
3115 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3116 vec<operand_entry *> *ops,
3117 struct range_entry *ranges)
3118 {
3119 int i, j;
3120 bool any_changes = false;
3121 int prec = GET_MODE_BITSIZE (word_mode);
3122 auto_vec<struct range_entry *, 64> candidates;
3123
3124 for (i = first; i < length - 2; i++)
3125 {
3126 tree lowi, highi, lowj, highj, type;
3127
3128 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3129 continue;
3130 type = TREE_TYPE (ranges[i].exp);
3131 if (!INTEGRAL_TYPE_P (type))
3132 continue;
3133 lowi = ranges[i].low;
3134 if (lowi == NULL_TREE)
3135 lowi = TYPE_MIN_VALUE (type);
3136 highi = ranges[i].high;
3137 if (highi == NULL_TREE)
3138 continue;
3139 wide_int mask;
3140 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3141 highi, &mask, &lowi);
3142 if (exp == NULL_TREE)
3143 continue;
3144 bool strict_overflow_p = ranges[i].strict_overflow_p;
3145 candidates.truncate (0);
3146 int end = MIN (i + 64, length);
3147 for (j = i + 1; j < end; j++)
3148 {
3149 tree exp2;
3150 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3151 continue;
3152 if (ranges[j].exp == exp)
3153 ;
3154 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3155 {
3156 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3157 if (exp2 == exp)
3158 ;
3159 else if (TREE_CODE (exp2) == PLUS_EXPR)
3160 {
3161 exp2 = TREE_OPERAND (exp2, 0);
3162 STRIP_NOPS (exp2);
3163 if (exp2 != exp)
3164 continue;
3165 }
3166 else
3167 continue;
3168 }
3169 else
3170 continue;
3171 lowj = ranges[j].low;
3172 if (lowj == NULL_TREE)
3173 continue;
3174 highj = ranges[j].high;
3175 if (highj == NULL_TREE)
3176 highj = TYPE_MAX_VALUE (type);
3177 wide_int mask2;
3178 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3179 highj, &mask2, NULL);
3180 if (exp2 != exp)
3181 continue;
3182 mask |= mask2;
3183 strict_overflow_p |= ranges[j].strict_overflow_p;
3184 candidates.safe_push (&ranges[j]);
3185 }
3186
3187 /* If we need otherwise 3 or more comparisons, use a bit test. */
3188 if (candidates.length () >= 2)
3189 {
3190 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3191 wi::to_widest (lowi)
3192 + prec - 1 - wi::clz (mask));
3193 operand_entry *oe = (*ops)[ranges[i].idx];
3194 tree op = oe->op;
3195 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3196 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3197 location_t loc = gimple_location (stmt);
3198 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3199
3200 /* See if it isn't cheaper to pretend the minimum value of the
3201 range is 0, if maximum value is small enough.
3202 We can avoid then subtraction of the minimum value, but the
3203 mask constant could be perhaps more expensive. */
3204 if (compare_tree_int (lowi, 0) > 0
3205 && compare_tree_int (high, prec) < 0)
3206 {
3207 int cost_diff;
3208 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3209 rtx reg = gen_raw_REG (word_mode, 10000);
3210 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3211 cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3212 GEN_INT (-m)),
3213 word_mode, speed_p);
3214 rtx r = immed_wide_int_const (mask, word_mode);
3215 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3216 word_mode, speed_p);
3217 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3218 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3219 word_mode, speed_p);
3220 if (cost_diff > 0)
3221 {
3222 mask = wi::lshift (mask, m);
3223 lowi = build_zero_cst (TREE_TYPE (lowi));
3224 }
3225 }
3226
3227 tree tem = build_range_check (loc, optype, unshare_expr (exp),
3228 false, lowi, high);
3229 if (tem == NULL_TREE || is_gimple_val (tem))
3230 continue;
3231 tree etype = unsigned_type_for (TREE_TYPE (exp));
3232 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3233 fold_convert_loc (loc, etype, exp),
3234 fold_convert_loc (loc, etype, lowi));
3235 exp = fold_convert_loc (loc, integer_type_node, exp);
3236 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3237 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3238 build_int_cst (word_type, 1), exp);
3239 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3240 wide_int_to_tree (word_type, mask));
3241 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3242 build_zero_cst (word_type));
3243 if (is_gimple_val (exp))
3244 continue;
3245
3246 /* The shift might have undefined behavior if TEM is true,
3247 but reassociate_bb isn't prepared to have basic blocks
3248 split when it is running. So, temporarily emit a code
3249 with BIT_IOR_EXPR instead of &&, and fix it up in
3250 branch_fixup. */
3251 gimple_seq seq;
3252 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3253 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3254 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3255 gimple_seq seq2;
3256 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3257 gimple_seq_add_seq_without_update (&seq, seq2);
3258 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3259 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3260 gimple *g = gimple_build_assign (make_ssa_name (optype),
3261 BIT_IOR_EXPR, tem, exp);
3262 gimple_set_location (g, loc);
3263 gimple_seq_add_stmt_without_update (&seq, g);
3264 exp = gimple_assign_lhs (g);
3265 tree val = build_zero_cst (optype);
3266 if (update_range_test (&ranges[i], NULL, candidates.address (),
3267 candidates.length (), opcode, ops, exp,
3268 seq, false, val, val, strict_overflow_p))
3269 {
3270 any_changes = true;
3271 reassoc_branch_fixups.safe_push (tem);
3272 }
3273 else
3274 gimple_seq_discard (seq);
3275 }
3276 }
3277 return any_changes;
3278 }
3279
3280 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3281 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */
3282
3283 static bool
3284 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3285 vec<operand_entry *> *ops,
3286 struct range_entry *ranges)
3287 {
3288 int i;
3289 unsigned int b;
3290 bool any_changes = false;
3291 auto_vec<int, 128> buckets;
3292 auto_vec<int, 32> chains;
3293 auto_vec<struct range_entry *, 32> candidates;
3294
3295 for (i = first; i < length; i++)
3296 {
3297 if (ranges[i].exp == NULL_TREE
3298 || TREE_CODE (ranges[i].exp) != SSA_NAME
3299 || !ranges[i].in_p
3300 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3301 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
3302 || ranges[i].low == NULL_TREE
3303 || ranges[i].low != ranges[i].high)
3304 continue;
3305
3306 bool zero_p = integer_zerop (ranges[i].low);
3307 if (!zero_p && !integer_all_onesp (ranges[i].low))
3308 continue;
3309
3310 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
3311 if (buckets.length () <= b)
3312 buckets.safe_grow_cleared (b + 1);
3313 if (chains.length () <= (unsigned) i)
3314 chains.safe_grow (i + 1);
3315 chains[i] = buckets[b];
3316 buckets[b] = i + 1;
3317 }
3318
3319 FOR_EACH_VEC_ELT (buckets, b, i)
3320 if (i && chains[i - 1])
3321 {
3322 int j, k = i;
3323 for (j = chains[i - 1]; j; j = chains[j - 1])
3324 {
3325 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3326 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3327 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3328 k = j;
3329 }
3330 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3331 tree type2 = NULL_TREE;
3332 bool strict_overflow_p = false;
3333 candidates.truncate (0);
3334 for (j = i; j; j = chains[j - 1])
3335 {
3336 tree type = TREE_TYPE (ranges[j - 1].exp);
3337 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3338 if (j == k
3339 || useless_type_conversion_p (type1, type))
3340 ;
3341 else if (type2 == NULL_TREE
3342 || useless_type_conversion_p (type2, type))
3343 {
3344 if (type2 == NULL_TREE)
3345 type2 = type;
3346 candidates.safe_push (&ranges[j - 1]);
3347 }
3348 }
3349 unsigned l = candidates.length ();
3350 for (j = i; j; j = chains[j - 1])
3351 {
3352 tree type = TREE_TYPE (ranges[j - 1].exp);
3353 if (j == k)
3354 continue;
3355 if (useless_type_conversion_p (type1, type))
3356 ;
3357 else if (type2 == NULL_TREE
3358 || useless_type_conversion_p (type2, type))
3359 continue;
3360 candidates.safe_push (&ranges[j - 1]);
3361 }
3362 gimple_seq seq = NULL;
3363 tree op = NULL_TREE;
3364 unsigned int id;
3365 struct range_entry *r;
3366 candidates.safe_push (&ranges[k - 1]);
3367 FOR_EACH_VEC_ELT (candidates, id, r)
3368 {
3369 gimple *g;
3370 if (id == 0)
3371 {
3372 op = r->exp;
3373 continue;
3374 }
3375 if (id == l)
3376 {
3377 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3378 gimple_seq_add_stmt_without_update (&seq, g);
3379 op = gimple_assign_lhs (g);
3380 }
3381 tree type = TREE_TYPE (r->exp);
3382 tree exp = r->exp;
3383 if (id >= l && !useless_type_conversion_p (type1, type))
3384 {
3385 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3386 gimple_seq_add_stmt_without_update (&seq, g);
3387 exp = gimple_assign_lhs (g);
3388 }
3389 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3390 (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
3391 op, exp);
3392 gimple_seq_add_stmt_without_update (&seq, g);
3393 op = gimple_assign_lhs (g);
3394 }
3395 candidates.pop ();
3396 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3397 candidates.length (), opcode, ops, op,
3398 seq, true, ranges[k - 1].low,
3399 ranges[k - 1].low, strict_overflow_p))
3400 any_changes = true;
3401 else
3402 gimple_seq_discard (seq);
3403 }
3404
3405 return any_changes;
3406 }
3407
3408 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3409 a >= 0 && a < b into (unsigned) a < (unsigned) b
3410 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3411
3412 static bool
3413 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3414 vec<operand_entry *> *ops,
3415 struct range_entry *ranges,
3416 basic_block first_bb)
3417 {
3418 int i;
3419 bool any_changes = false;
3420 hash_map<tree, int> *map = NULL;
3421
3422 for (i = first; i < length; i++)
3423 {
3424 if (ranges[i].exp == NULL_TREE
3425 || TREE_CODE (ranges[i].exp) != SSA_NAME
3426 || !ranges[i].in_p)
3427 continue;
3428
3429 tree type = TREE_TYPE (ranges[i].exp);
3430 if (!INTEGRAL_TYPE_P (type)
3431 || TYPE_UNSIGNED (type)
3432 || ranges[i].low == NULL_TREE
3433 || !integer_zerop (ranges[i].low)
3434 || ranges[i].high != NULL_TREE)
3435 continue;
3436 /* EXP >= 0 here. */
3437 if (map == NULL)
3438 map = new hash_map <tree, int>;
3439 map->put (ranges[i].exp, i);
3440 }
3441
3442 if (map == NULL)
3443 return false;
3444
3445 for (i = 0; i < length; i++)
3446 {
3447 bool in_p = ranges[i].in_p;
3448 if (ranges[i].low == NULL_TREE
3449 || ranges[i].high == NULL_TREE)
3450 continue;
3451 if (!integer_zerop (ranges[i].low)
3452 || !integer_zerop (ranges[i].high))
3453 {
3454 if (ranges[i].exp
3455 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3456 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3457 && integer_onep (ranges[i].low)
3458 && integer_onep (ranges[i].high))
3459 in_p = !in_p;
3460 else
3461 continue;
3462 }
3463
3464 gimple *stmt;
3465 tree_code ccode;
3466 tree rhs1, rhs2;
3467 if (ranges[i].exp)
3468 {
3469 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3470 continue;
3471 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3472 if (!is_gimple_assign (stmt))
3473 continue;
3474 ccode = gimple_assign_rhs_code (stmt);
3475 rhs1 = gimple_assign_rhs1 (stmt);
3476 rhs2 = gimple_assign_rhs2 (stmt);
3477 }
3478 else
3479 {
3480 operand_entry *oe = (*ops)[ranges[i].idx];
3481 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3482 if (gimple_code (stmt) != GIMPLE_COND)
3483 continue;
3484 ccode = gimple_cond_code (stmt);
3485 rhs1 = gimple_cond_lhs (stmt);
3486 rhs2 = gimple_cond_rhs (stmt);
3487 }
3488
3489 if (TREE_CODE (rhs1) != SSA_NAME
3490 || rhs2 == NULL_TREE
3491 || TREE_CODE (rhs2) != SSA_NAME)
3492 continue;
3493
3494 switch (ccode)
3495 {
3496 case GT_EXPR:
3497 case GE_EXPR:
3498 case LT_EXPR:
3499 case LE_EXPR:
3500 break;
3501 default:
3502 continue;
3503 }
3504 if (in_p)
3505 ccode = invert_tree_comparison (ccode, false);
3506 switch (ccode)
3507 {
3508 case GT_EXPR:
3509 case GE_EXPR:
3510 std::swap (rhs1, rhs2);
3511 ccode = swap_tree_comparison (ccode);
3512 break;
3513 case LT_EXPR:
3514 case LE_EXPR:
3515 break;
3516 default:
3517 gcc_unreachable ();
3518 }
3519
3520 int *idx = map->get (rhs1);
3521 if (idx == NULL)
3522 continue;
3523
3524 /* maybe_optimize_range_tests allows statements without side-effects
3525 in the basic blocks as long as they are consumed in the same bb.
3526 Make sure rhs2's def stmt is not among them, otherwise we can't
3527 use safely get_nonzero_bits on it. E.g. in:
3528 # RANGE [-83, 1] NONZERO 173
3529 # k_32 = PHI <k_47(13), k_12(9)>
3530 ...
3531 if (k_32 >= 0)
3532 goto <bb 5>; [26.46%]
3533 else
3534 goto <bb 9>; [73.54%]
3535
3536 <bb 5> [local count: 140323371]:
3537 # RANGE [0, 1] NONZERO 1
3538 _5 = (int) k_32;
3539 # RANGE [0, 4] NONZERO 4
3540 _21 = _5 << 2;
3541 # RANGE [0, 4] NONZERO 4
3542 iftmp.0_44 = (char) _21;
3543 if (k_32 < iftmp.0_44)
3544 goto <bb 6>; [84.48%]
3545 else
3546 goto <bb 9>; [15.52%]
3547 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3548 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3549 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3550 those stmts even for negative k_32 and the value ranges would be no
3551 longer guaranteed and so the optimization would be invalid. */
3552 while (opcode == ERROR_MARK)
3553 {
3554 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3555 basic_block bb2 = gimple_bb (g);
3556 if (bb2
3557 && bb2 != first_bb
3558 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3559 {
3560 /* As an exception, handle a few common cases. */
3561 if (gimple_assign_cast_p (g)
3562 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3563 {
3564 tree op0 = gimple_assign_rhs1 (g);
3565 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3566 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3567 > TYPE_PRECISION (TREE_TYPE (op0))))
3568 /* Zero-extension is always ok. */
3569 break;
3570 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3571 == TYPE_PRECISION (TREE_TYPE (op0))
3572 && TREE_CODE (op0) == SSA_NAME)
3573 {
3574 /* Cast from signed to unsigned or vice versa. Retry
3575 with the op0 as new rhs2. */
3576 rhs2 = op0;
3577 continue;
3578 }
3579 }
3580 else if (is_gimple_assign (g)
3581 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3582 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3583 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3584 /* Masking with INTEGER_CST with MSB clear is always ok
3585 too. */
3586 break;
3587 rhs2 = NULL_TREE;
3588 }
3589 break;
3590 }
3591 if (rhs2 == NULL_TREE)
3592 continue;
3593
3594 wide_int nz = get_nonzero_bits (rhs2);
3595 if (wi::neg_p (nz))
3596 continue;
3597
3598 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3599 and RHS2 is known to be RHS2 >= 0. */
3600 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3601
3602 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3603 if ((ranges[*idx].strict_overflow_p
3604 || ranges[i].strict_overflow_p)
3605 && issue_strict_overflow_warning (wc))
3606 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3607 "assuming signed overflow does not occur "
3608 "when simplifying range test");
3609
3610 if (dump_file && (dump_flags & TDF_DETAILS))
3611 {
3612 struct range_entry *r = &ranges[*idx];
3613 fprintf (dump_file, "Optimizing range test ");
3614 print_generic_expr (dump_file, r->exp);
3615 fprintf (dump_file, " +[");
3616 print_generic_expr (dump_file, r->low);
3617 fprintf (dump_file, ", ");
3618 print_generic_expr (dump_file, r->high);
3619 fprintf (dump_file, "] and comparison ");
3620 print_generic_expr (dump_file, rhs1);
3621 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3622 print_generic_expr (dump_file, rhs2);
3623 fprintf (dump_file, "\n into (");
3624 print_generic_expr (dump_file, utype);
3625 fprintf (dump_file, ") ");
3626 print_generic_expr (dump_file, rhs1);
3627 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3628 print_generic_expr (dump_file, utype);
3629 fprintf (dump_file, ") ");
3630 print_generic_expr (dump_file, rhs2);
3631 fprintf (dump_file, "\n");
3632 }
3633
3634 operand_entry *oe = (*ops)[ranges[i].idx];
3635 ranges[i].in_p = 0;
3636 if (opcode == BIT_IOR_EXPR
3637 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3638 {
3639 ranges[i].in_p = 1;
3640 ccode = invert_tree_comparison (ccode, false);
3641 }
3642
3643 unsigned int uid = gimple_uid (stmt);
3644 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3645 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3646 gimple_set_uid (g, uid);
3647 rhs1 = gimple_assign_lhs (g);
3648 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3649 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3650 {
3651 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3652 gimple_set_uid (g, uid);
3653 rhs2 = gimple_assign_lhs (g);
3654 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3655 }
3656 if (tree_swap_operands_p (rhs1, rhs2))
3657 {
3658 std::swap (rhs1, rhs2);
3659 ccode = swap_tree_comparison (ccode);
3660 }
3661 if (gimple_code (stmt) == GIMPLE_COND)
3662 {
3663 gcond *c = as_a <gcond *> (stmt);
3664 gimple_cond_set_code (c, ccode);
3665 gimple_cond_set_lhs (c, rhs1);
3666 gimple_cond_set_rhs (c, rhs2);
3667 update_stmt (stmt);
3668 }
3669 else
3670 {
3671 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3672 if (!INTEGRAL_TYPE_P (ctype)
3673 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3674 && TYPE_PRECISION (ctype) != 1))
3675 ctype = boolean_type_node;
3676 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3677 gimple_set_uid (g, uid);
3678 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3679 if (oe->op && ctype != TREE_TYPE (oe->op))
3680 {
3681 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3682 NOP_EXPR, gimple_assign_lhs (g));
3683 gimple_set_uid (g, uid);
3684 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3685 }
3686 ranges[i].exp = gimple_assign_lhs (g);
3687 oe->op = ranges[i].exp;
3688 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3689 ranges[i].high = ranges[i].low;
3690 }
3691 ranges[i].strict_overflow_p = false;
3692 oe = (*ops)[ranges[*idx].idx];
3693 /* Now change all the other range test immediate uses, so that
3694 those tests will be optimized away. */
3695 if (opcode == ERROR_MARK)
3696 {
3697 if (oe->op)
3698 oe->op = build_int_cst (TREE_TYPE (oe->op),
3699 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3700 else
3701 oe->op = (oe->rank == BIT_IOR_EXPR
3702 ? boolean_false_node : boolean_true_node);
3703 }
3704 else
3705 oe->op = error_mark_node;
3706 ranges[*idx].exp = NULL_TREE;
3707 ranges[*idx].low = NULL_TREE;
3708 ranges[*idx].high = NULL_TREE;
3709 any_changes = true;
3710 }
3711
3712 delete map;
3713 return any_changes;
3714 }
3715
3716 /* Optimize range tests, similarly how fold_range_test optimizes
3717 it on trees. The tree code for the binary
3718 operation between all the operands is OPCODE.
3719 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3720 maybe_optimize_range_tests for inter-bb range optimization.
3721 In that case if oe->op is NULL, oe->id is bb->index whose
3722 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3723 the actual opcode.
3724 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3725
3726 static bool
3727 optimize_range_tests (enum tree_code opcode,
3728 vec<operand_entry *> *ops, basic_block first_bb)
3729 {
3730 unsigned int length = ops->length (), i, j, first;
3731 operand_entry *oe;
3732 struct range_entry *ranges;
3733 bool any_changes = false;
3734
3735 if (length == 1)
3736 return false;
3737
3738 ranges = XNEWVEC (struct range_entry, length);
3739 for (i = 0; i < length; i++)
3740 {
3741 oe = (*ops)[i];
3742 ranges[i].idx = i;
3743 init_range_entry (ranges + i, oe->op,
3744 oe->op
3745 ? NULL
3746 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3747 /* For | invert it now, we will invert it again before emitting
3748 the optimized expression. */
3749 if (opcode == BIT_IOR_EXPR
3750 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3751 ranges[i].in_p = !ranges[i].in_p;
3752 }
3753
3754 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3755 for (i = 0; i < length; i++)
3756 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3757 break;
3758
3759 /* Try to merge ranges. */
3760 for (first = i; i < length; i++)
3761 {
3762 tree low = ranges[i].low;
3763 tree high = ranges[i].high;
3764 int in_p = ranges[i].in_p;
3765 bool strict_overflow_p = ranges[i].strict_overflow_p;
3766 int update_fail_count = 0;
3767
3768 for (j = i + 1; j < length; j++)
3769 {
3770 if (ranges[i].exp != ranges[j].exp)
3771 break;
3772 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3773 ranges[j].in_p, ranges[j].low, ranges[j].high))
3774 break;
3775 strict_overflow_p |= ranges[j].strict_overflow_p;
3776 }
3777
3778 if (j == i + 1)
3779 continue;
3780
3781 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3782 opcode, ops, ranges[i].exp, NULL, in_p,
3783 low, high, strict_overflow_p))
3784 {
3785 i = j - 1;
3786 any_changes = true;
3787 }
3788 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3789 while update_range_test would fail. */
3790 else if (update_fail_count == 64)
3791 i = j - 1;
3792 else
3793 ++update_fail_count;
3794 }
3795
3796 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3797 ops, ranges);
3798
3799 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3800 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3801 ops, ranges);
3802 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3803 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3804 ops, ranges);
3805 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3806 ops, ranges);
3807 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3808 ranges, first_bb);
3809
3810 if (any_changes && opcode != ERROR_MARK)
3811 {
3812 j = 0;
3813 FOR_EACH_VEC_ELT (*ops, i, oe)
3814 {
3815 if (oe->op == error_mark_node)
3816 continue;
3817 else if (i != j)
3818 (*ops)[j] = oe;
3819 j++;
3820 }
3821 ops->truncate (j);
3822 }
3823
3824 XDELETEVEC (ranges);
3825 return any_changes;
3826 }
3827
3828 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3829 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3830 otherwise the comparison code. TYPE is a return value that is set
3831 to type of comparison. */
3832
3833 static tree_code
3834 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
3835 tree *lhs, tree *rhs, gassign **vcond)
3836 {
3837 if (TREE_CODE (var) != SSA_NAME)
3838 return ERROR_MARK;
3839
3840 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3841 if (stmt == NULL)
3842 return ERROR_MARK;
3843 if (*vcond)
3844 *vcond = stmt;
3845
3846 /* ??? If we start creating more COND_EXPR, we could perform
3847 this same optimization with them. For now, simplify. */
3848 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3849 return ERROR_MARK;
3850
3851 tree cond = gimple_assign_rhs1 (stmt);
3852 tree_code cmp = TREE_CODE (cond);
3853 if (cmp != SSA_NAME)
3854 return ERROR_MARK;
3855
3856 gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
3857 if (stmt == NULL
3858 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
3859 return ERROR_MARK;
3860
3861 cmp = gimple_assign_rhs_code (assign);
3862 if (lhs)
3863 *lhs = gimple_assign_rhs1 (assign);
3864 if (rhs)
3865 *rhs = gimple_assign_rhs2 (assign);
3866
3867 /* ??? For now, allow only canonical true and false result vectors.
3868 We could expand this to other constants should the need arise,
3869 but at the moment we don't create them. */
3870 tree t = gimple_assign_rhs2 (stmt);
3871 tree f = gimple_assign_rhs3 (stmt);
3872 bool inv;
3873 if (integer_all_onesp (t))
3874 inv = false;
3875 else if (integer_all_onesp (f))
3876 {
3877 cmp = invert_tree_comparison (cmp, false);
3878 inv = true;
3879 }
3880 else
3881 return ERROR_MARK;
3882 if (!integer_zerop (f))
3883 return ERROR_MARK;
3884
3885 /* Success! */
3886 if (rets)
3887 *rets = assign;
3888 if (reti)
3889 *reti = inv;
3890 if (type)
3891 *type = TREE_TYPE (cond);
3892 return cmp;
3893 }
3894
3895 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3896 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3897
3898 static bool
3899 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3900 {
3901 unsigned int length = ops->length (), i, j;
3902 bool any_changes = false;
3903
3904 if (length == 1)
3905 return false;
3906
3907 for (i = 0; i < length; ++i)
3908 {
3909 tree elt0 = (*ops)[i]->op;
3910
3911 gassign *stmt0, *vcond0;
3912 bool invert;
3913 tree type, lhs0, rhs0;
3914 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
3915 &rhs0, &vcond0);
3916 if (cmp0 == ERROR_MARK)
3917 continue;
3918
3919 for (j = i + 1; j < length; ++j)
3920 {
3921 tree &elt1 = (*ops)[j]->op;
3922
3923 gassign *stmt1, *vcond1;
3924 tree lhs1, rhs1;
3925 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
3926 &rhs1, &vcond1);
3927 if (cmp1 == ERROR_MARK)
3928 continue;
3929
3930 tree comb;
3931 if (opcode == BIT_AND_EXPR)
3932 comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
3933 cmp1, lhs1, rhs1);
3934 else if (opcode == BIT_IOR_EXPR)
3935 comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
3936 cmp1, lhs1, rhs1);
3937 else
3938 gcc_unreachable ();
3939 if (comb == NULL)
3940 continue;
3941
3942 /* Success! */
3943 if (dump_file && (dump_flags & TDF_DETAILS))
3944 {
3945 fprintf (dump_file, "Transforming ");
3946 print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
3947 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3948 print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
3949 fprintf (dump_file, " into ");
3950 print_generic_expr (dump_file, comb);
3951 fputc ('\n', dump_file);
3952 }
3953
3954 gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
3955 tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
3956 true, GSI_SAME_STMT);
3957 if (invert)
3958 swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
3959 gimple_assign_rhs3_ptr (vcond0));
3960 gimple_assign_set_rhs1 (vcond0, exp);
3961 update_stmt (vcond0);
3962
3963 elt1 = error_mark_node;
3964 any_changes = true;
3965 }
3966 }
3967
3968 if (any_changes)
3969 {
3970 operand_entry *oe;
3971 j = 0;
3972 FOR_EACH_VEC_ELT (*ops, i, oe)
3973 {
3974 if (oe->op == error_mark_node)
3975 continue;
3976 else if (i != j)
3977 (*ops)[j] = oe;
3978 j++;
3979 }
3980 ops->truncate (j);
3981 }
3982
3983 return any_changes;
3984 }
3985
3986 /* Return true if STMT is a cast like:
3987 <bb N>:
3988 ...
3989 _123 = (int) _234;
3990
3991 <bb M>:
3992 # _345 = PHI <_123(N), 1(...), 1(...)>
3993 where _234 has bool type, _123 has single use and
3994 bb N has a single successor M. This is commonly used in
3995 the last block of a range test.
3996
3997 Also Return true if STMT is tcc_compare like:
3998 <bb N>:
3999 ...
4000 _234 = a_2(D) == 2;
4001
4002 <bb M>:
4003 # _345 = PHI <_234(N), 1(...), 1(...)>
4004 _346 = (int) _345;
4005 where _234 has booltype, single use and
4006 bb N has a single successor M. This is commonly used in
4007 the last block of a range test. */
4008
4009 static bool
4010 final_range_test_p (gimple *stmt)
4011 {
4012 basic_block bb, rhs_bb, lhs_bb;
4013 edge e;
4014 tree lhs, rhs;
4015 use_operand_p use_p;
4016 gimple *use_stmt;
4017
4018 if (!gimple_assign_cast_p (stmt)
4019 && (!is_gimple_assign (stmt)
4020 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4021 != tcc_comparison)))
4022 return false;
4023 bb = gimple_bb (stmt);
4024 if (!single_succ_p (bb))
4025 return false;
4026 e = single_succ_edge (bb);
4027 if (e->flags & EDGE_COMPLEX)
4028 return false;
4029
4030 lhs = gimple_assign_lhs (stmt);
4031 rhs = gimple_assign_rhs1 (stmt);
4032 if (gimple_assign_cast_p (stmt)
4033 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4034 || TREE_CODE (rhs) != SSA_NAME
4035 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4036 return false;
4037
4038 if (!gimple_assign_cast_p (stmt)
4039 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4040 return false;
4041
4042 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4043 if (!single_imm_use (lhs, &use_p, &use_stmt))
4044 return false;
4045
4046 if (gimple_code (use_stmt) != GIMPLE_PHI
4047 || gimple_bb (use_stmt) != e->dest)
4048 return false;
4049
4050 /* And that the rhs is defined in the same loop. */
4051 if (gimple_assign_cast_p (stmt))
4052 {
4053 if (TREE_CODE (rhs) != SSA_NAME
4054 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4055 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4056 return false;
4057 }
4058 else
4059 {
4060 if (TREE_CODE (lhs) != SSA_NAME
4061 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4062 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4063 return false;
4064 }
4065
4066 return true;
4067 }
4068
4069 /* Return true if BB is suitable basic block for inter-bb range test
4070 optimization. If BACKWARD is true, BB should be the only predecessor
4071 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4072 or compared with to find a common basic block to which all conditions
4073 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4074 be the only predecessor of BB. */
4075
4076 static bool
4077 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4078 bool backward)
4079 {
4080 edge_iterator ei, ei2;
4081 edge e, e2;
4082 gimple *stmt;
4083 gphi_iterator gsi;
4084 bool other_edge_seen = false;
4085 bool is_cond;
4086
4087 if (test_bb == bb)
4088 return false;
4089 /* Check last stmt first. */
4090 stmt = last_stmt (bb);
4091 if (stmt == NULL
4092 || (gimple_code (stmt) != GIMPLE_COND
4093 && (backward || !final_range_test_p (stmt)))
4094 || gimple_visited_p (stmt)
4095 || stmt_could_throw_p (cfun, stmt)
4096 || *other_bb == bb)
4097 return false;
4098 is_cond = gimple_code (stmt) == GIMPLE_COND;
4099 if (is_cond)
4100 {
4101 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4102 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4103 to *OTHER_BB (if not set yet, try to find it out). */
4104 if (EDGE_COUNT (bb->succs) != 2)
4105 return false;
4106 FOR_EACH_EDGE (e, ei, bb->succs)
4107 {
4108 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4109 return false;
4110 if (e->dest == test_bb)
4111 {
4112 if (backward)
4113 continue;
4114 else
4115 return false;
4116 }
4117 if (e->dest == bb)
4118 return false;
4119 if (*other_bb == NULL)
4120 {
4121 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4122 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4123 return false;
4124 else if (e->dest == e2->dest)
4125 *other_bb = e->dest;
4126 if (*other_bb == NULL)
4127 return false;
4128 }
4129 if (e->dest == *other_bb)
4130 other_edge_seen = true;
4131 else if (backward)
4132 return false;
4133 }
4134 if (*other_bb == NULL || !other_edge_seen)
4135 return false;
4136 }
4137 else if (single_succ (bb) != *other_bb)
4138 return false;
4139
4140 /* Now check all PHIs of *OTHER_BB. */
4141 e = find_edge (bb, *other_bb);
4142 e2 = find_edge (test_bb, *other_bb);
4143 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4144 {
4145 gphi *phi = gsi.phi ();
4146 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4147 corresponding to BB and TEST_BB predecessor must be the same. */
4148 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4149 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4150 {
4151 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4152 one of the PHIs should have the lhs of the last stmt in
4153 that block as PHI arg and that PHI should have 0 or 1
4154 corresponding to it in all other range test basic blocks
4155 considered. */
4156 if (!is_cond)
4157 {
4158 if (gimple_phi_arg_def (phi, e->dest_idx)
4159 == gimple_assign_lhs (stmt)
4160 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4161 || integer_onep (gimple_phi_arg_def (phi,
4162 e2->dest_idx))))
4163 continue;
4164 }
4165 else
4166 {
4167 gimple *test_last = last_stmt (test_bb);
4168 if (gimple_code (test_last) != GIMPLE_COND
4169 && gimple_phi_arg_def (phi, e2->dest_idx)
4170 == gimple_assign_lhs (test_last)
4171 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
4172 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
4173 continue;
4174 }
4175
4176 return false;
4177 }
4178 }
4179 return true;
4180 }
4181
4182 /* Return true if BB doesn't have side-effects that would disallow
4183 range test optimization, all SSA_NAMEs set in the bb are consumed
4184 in the bb and there are no PHIs. */
4185
4186 static bool
4187 no_side_effect_bb (basic_block bb)
4188 {
4189 gimple_stmt_iterator gsi;
4190 gimple *last;
4191
4192 if (!gimple_seq_empty_p (phi_nodes (bb)))
4193 return false;
4194 last = last_stmt (bb);
4195 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4196 {
4197 gimple *stmt = gsi_stmt (gsi);
4198 tree lhs;
4199 imm_use_iterator imm_iter;
4200 use_operand_p use_p;
4201
4202 if (is_gimple_debug (stmt))
4203 continue;
4204 if (gimple_has_side_effects (stmt))
4205 return false;
4206 if (stmt == last)
4207 return true;
4208 if (!is_gimple_assign (stmt))
4209 return false;
4210 lhs = gimple_assign_lhs (stmt);
4211 if (TREE_CODE (lhs) != SSA_NAME)
4212 return false;
4213 if (gimple_assign_rhs_could_trap_p (stmt))
4214 return false;
4215 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4216 {
4217 gimple *use_stmt = USE_STMT (use_p);
4218 if (is_gimple_debug (use_stmt))
4219 continue;
4220 if (gimple_bb (use_stmt) != bb)
4221 return false;
4222 }
4223 }
4224 return false;
4225 }
4226
4227 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4228 return true and fill in *OPS recursively. */
4229
4230 static bool
4231 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4232 class loop *loop)
4233 {
4234 gimple *stmt = SSA_NAME_DEF_STMT (var);
4235 tree rhs[2];
4236 int i;
4237
4238 if (!is_reassociable_op (stmt, code, loop))
4239 return false;
4240
4241 rhs[0] = gimple_assign_rhs1 (stmt);
4242 rhs[1] = gimple_assign_rhs2 (stmt);
4243 gimple_set_visited (stmt, true);
4244 for (i = 0; i < 2; i++)
4245 if (TREE_CODE (rhs[i]) == SSA_NAME
4246 && !get_ops (rhs[i], code, ops, loop)
4247 && has_single_use (rhs[i]))
4248 {
4249 operand_entry *oe = operand_entry_pool.allocate ();
4250
4251 oe->op = rhs[i];
4252 oe->rank = code;
4253 oe->id = 0;
4254 oe->count = 1;
4255 oe->stmt_to_insert = NULL;
4256 ops->safe_push (oe);
4257 }
4258 return true;
4259 }
4260
4261 /* Find the ops that were added by get_ops starting from VAR, see if
4262 they were changed during update_range_test and if yes, create new
4263 stmts. */
4264
4265 static tree
4266 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
4267 unsigned int *pidx, class loop *loop)
4268 {
4269 gimple *stmt = SSA_NAME_DEF_STMT (var);
4270 tree rhs[4];
4271 int i;
4272
4273 if (!is_reassociable_op (stmt, code, loop))
4274 return NULL;
4275
4276 rhs[0] = gimple_assign_rhs1 (stmt);
4277 rhs[1] = gimple_assign_rhs2 (stmt);
4278 rhs[2] = rhs[0];
4279 rhs[3] = rhs[1];
4280 for (i = 0; i < 2; i++)
4281 if (TREE_CODE (rhs[i]) == SSA_NAME)
4282 {
4283 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4284 if (rhs[2 + i] == NULL_TREE)
4285 {
4286 if (has_single_use (rhs[i]))
4287 rhs[2 + i] = ops[(*pidx)++]->op;
4288 else
4289 rhs[2 + i] = rhs[i];
4290 }
4291 }
4292 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4293 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4294 {
4295 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4296 var = make_ssa_name (TREE_TYPE (var));
4297 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4298 rhs[2], rhs[3]);
4299 gimple_set_uid (g, gimple_uid (stmt));
4300 gimple_set_visited (g, true);
4301 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4302 }
4303 return var;
4304 }
4305
4306 /* Structure to track the initial value passed to get_ops and
4307 the range in the ops vector for each basic block. */
4308
4309 struct inter_bb_range_test_entry
4310 {
4311 tree op;
4312 unsigned int first_idx, last_idx;
4313 };
4314
4315 /* Inter-bb range test optimization.
4316
4317 Returns TRUE if a gimple conditional is optimized to a true/false,
4318 otherwise return FALSE.
4319
4320 This indicates to the caller that it should run a CFG cleanup pass
4321 once reassociation is completed. */
4322
4323 static bool
4324 maybe_optimize_range_tests (gimple *stmt)
4325 {
4326 basic_block first_bb = gimple_bb (stmt);
4327 basic_block last_bb = first_bb;
4328 basic_block other_bb = NULL;
4329 basic_block bb;
4330 edge_iterator ei;
4331 edge e;
4332 auto_vec<operand_entry *> ops;
4333 auto_vec<inter_bb_range_test_entry> bbinfo;
4334 bool any_changes = false;
4335 bool cfg_cleanup_needed = false;
4336
4337 /* Consider only basic blocks that end with GIMPLE_COND or
4338 a cast statement satisfying final_range_test_p. All
4339 but the last bb in the first_bb .. last_bb range
4340 should end with GIMPLE_COND. */
4341 if (gimple_code (stmt) == GIMPLE_COND)
4342 {
4343 if (EDGE_COUNT (first_bb->succs) != 2)
4344 return cfg_cleanup_needed;
4345 }
4346 else if (final_range_test_p (stmt))
4347 other_bb = single_succ (first_bb);
4348 else
4349 return cfg_cleanup_needed;
4350
4351 if (stmt_could_throw_p (cfun, stmt))
4352 return cfg_cleanup_needed;
4353
4354 /* As relative ordering of post-dominator sons isn't fixed,
4355 maybe_optimize_range_tests can be called first on any
4356 bb in the range we want to optimize. So, start searching
4357 backwards, if first_bb can be set to a predecessor. */
4358 while (single_pred_p (first_bb))
4359 {
4360 basic_block pred_bb = single_pred (first_bb);
4361 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
4362 break;
4363 if (!no_side_effect_bb (first_bb))
4364 break;
4365 first_bb = pred_bb;
4366 }
4367 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4368 Before starting forward search in last_bb successors, find
4369 out the other_bb. */
4370 if (first_bb == last_bb)
4371 {
4372 other_bb = NULL;
4373 /* As non-GIMPLE_COND last stmt always terminates the range,
4374 if forward search didn't discover anything, just give up. */
4375 if (gimple_code (stmt) != GIMPLE_COND)
4376 return cfg_cleanup_needed;
4377 /* Look at both successors. Either it ends with a GIMPLE_COND
4378 and satisfies suitable_cond_bb, or ends with a cast and
4379 other_bb is that cast's successor. */
4380 FOR_EACH_EDGE (e, ei, first_bb->succs)
4381 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4382 || e->dest == first_bb)
4383 return cfg_cleanup_needed;
4384 else if (single_pred_p (e->dest))
4385 {
4386 stmt = last_stmt (e->dest);
4387 if (stmt
4388 && gimple_code (stmt) == GIMPLE_COND
4389 && EDGE_COUNT (e->dest->succs) == 2)
4390 {
4391 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
4392 break;
4393 else
4394 other_bb = NULL;
4395 }
4396 else if (stmt
4397 && final_range_test_p (stmt)
4398 && find_edge (first_bb, single_succ (e->dest)))
4399 {
4400 other_bb = single_succ (e->dest);
4401 if (other_bb == first_bb)
4402 other_bb = NULL;
4403 }
4404 }
4405 if (other_bb == NULL)
4406 return cfg_cleanup_needed;
4407 }
4408 /* Now do the forward search, moving last_bb to successor bbs
4409 that aren't other_bb. */
4410 while (EDGE_COUNT (last_bb->succs) == 2)
4411 {
4412 FOR_EACH_EDGE (e, ei, last_bb->succs)
4413 if (e->dest != other_bb)
4414 break;
4415 if (e == NULL)
4416 break;
4417 if (!single_pred_p (e->dest))
4418 break;
4419 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
4420 break;
4421 if (!no_side_effect_bb (e->dest))
4422 break;
4423 last_bb = e->dest;
4424 }
4425 if (first_bb == last_bb)
4426 return cfg_cleanup_needed;
4427 /* Here basic blocks first_bb through last_bb's predecessor
4428 end with GIMPLE_COND, all of them have one of the edges to
4429 other_bb and another to another block in the range,
4430 all blocks except first_bb don't have side-effects and
4431 last_bb ends with either GIMPLE_COND, or cast satisfying
4432 final_range_test_p. */
4433 for (bb = last_bb; ; bb = single_pred (bb))
4434 {
4435 enum tree_code code;
4436 tree lhs, rhs;
4437 inter_bb_range_test_entry bb_ent;
4438
4439 bb_ent.op = NULL_TREE;
4440 bb_ent.first_idx = ops.length ();
4441 bb_ent.last_idx = bb_ent.first_idx;
4442 e = find_edge (bb, other_bb);
4443 stmt = last_stmt (bb);
4444 gimple_set_visited (stmt, true);
4445 if (gimple_code (stmt) != GIMPLE_COND)
4446 {
4447 use_operand_p use_p;
4448 gimple *phi;
4449 edge e2;
4450 unsigned int d;
4451
4452 lhs = gimple_assign_lhs (stmt);
4453 rhs = gimple_assign_rhs1 (stmt);
4454 gcc_assert (bb == last_bb);
4455
4456 /* stmt is
4457 _123 = (int) _234;
4458 OR
4459 _234 = a_2(D) == 2;
4460
4461 followed by:
4462 <bb M>:
4463 # _345 = PHI <_123(N), 1(...), 1(...)>
4464
4465 or 0 instead of 1. If it is 0, the _234
4466 range test is anded together with all the
4467 other range tests, if it is 1, it is ored with
4468 them. */
4469 single_imm_use (lhs, &use_p, &phi);
4470 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4471 e2 = find_edge (first_bb, other_bb);
4472 d = e2->dest_idx;
4473 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4474 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4475 code = BIT_AND_EXPR;
4476 else
4477 {
4478 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4479 code = BIT_IOR_EXPR;
4480 }
4481
4482 /* If _234 SSA_NAME_DEF_STMT is
4483 _234 = _567 | _789;
4484 (or &, corresponding to 1/0 in the phi arguments,
4485 push into ops the individual range test arguments
4486 of the bitwise or resp. and, recursively. */
4487 if (TREE_CODE (rhs) == SSA_NAME
4488 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4489 != tcc_comparison)
4490 && !get_ops (rhs, code, &ops,
4491 loop_containing_stmt (stmt))
4492 && has_single_use (rhs))
4493 {
4494 /* Otherwise, push the _234 range test itself. */
4495 operand_entry *oe = operand_entry_pool.allocate ();
4496
4497 oe->op = rhs;
4498 oe->rank = code;
4499 oe->id = 0;
4500 oe->count = 1;
4501 oe->stmt_to_insert = NULL;
4502 ops.safe_push (oe);
4503 bb_ent.last_idx++;
4504 bb_ent.op = rhs;
4505 }
4506 else if (is_gimple_assign (stmt)
4507 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4508 == tcc_comparison)
4509 && !get_ops (lhs, code, &ops,
4510 loop_containing_stmt (stmt))
4511 && has_single_use (lhs))
4512 {
4513 operand_entry *oe = operand_entry_pool.allocate ();
4514 oe->op = lhs;
4515 oe->rank = code;
4516 oe->id = 0;
4517 oe->count = 1;
4518 ops.safe_push (oe);
4519 bb_ent.last_idx++;
4520 bb_ent.op = lhs;
4521 }
4522 else
4523 {
4524 bb_ent.last_idx = ops.length ();
4525 bb_ent.op = rhs;
4526 }
4527 bbinfo.safe_push (bb_ent);
4528 continue;
4529 }
4530 /* Otherwise stmt is GIMPLE_COND. */
4531 code = gimple_cond_code (stmt);
4532 lhs = gimple_cond_lhs (stmt);
4533 rhs = gimple_cond_rhs (stmt);
4534 if (TREE_CODE (lhs) == SSA_NAME
4535 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4536 && ((code != EQ_EXPR && code != NE_EXPR)
4537 || rhs != boolean_false_node
4538 /* Either push into ops the individual bitwise
4539 or resp. and operands, depending on which
4540 edge is other_bb. */
4541 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4542 ^ (code == EQ_EXPR))
4543 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4544 loop_containing_stmt (stmt))))
4545 {
4546 /* Or push the GIMPLE_COND stmt itself. */
4547 operand_entry *oe = operand_entry_pool.allocate ();
4548
4549 oe->op = NULL;
4550 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4551 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4552 /* oe->op = NULL signs that there is no SSA_NAME
4553 for the range test, and oe->id instead is the
4554 basic block number, at which's end the GIMPLE_COND
4555 is. */
4556 oe->id = bb->index;
4557 oe->count = 1;
4558 oe->stmt_to_insert = NULL;
4559 ops.safe_push (oe);
4560 bb_ent.op = NULL;
4561 bb_ent.last_idx++;
4562 }
4563 else if (ops.length () > bb_ent.first_idx)
4564 {
4565 bb_ent.op = lhs;
4566 bb_ent.last_idx = ops.length ();
4567 }
4568 bbinfo.safe_push (bb_ent);
4569 if (bb == first_bb)
4570 break;
4571 }
4572 if (ops.length () > 1)
4573 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4574 if (any_changes)
4575 {
4576 unsigned int idx, max_idx = 0;
4577 /* update_ops relies on has_single_use predicates returning the
4578 same values as it did during get_ops earlier. Additionally it
4579 never removes statements, only adds new ones and it should walk
4580 from the single imm use and check the predicate already before
4581 making those changes.
4582 On the other side, the handling of GIMPLE_COND directly can turn
4583 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4584 it needs to be done in a separate loop afterwards. */
4585 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4586 {
4587 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4588 && bbinfo[idx].op != NULL_TREE)
4589 {
4590 tree new_op;
4591
4592 max_idx = idx;
4593 stmt = last_stmt (bb);
4594 new_op = update_ops (bbinfo[idx].op,
4595 (enum tree_code)
4596 ops[bbinfo[idx].first_idx]->rank,
4597 ops, &bbinfo[idx].first_idx,
4598 loop_containing_stmt (stmt));
4599 if (new_op == NULL_TREE)
4600 {
4601 gcc_assert (bb == last_bb);
4602 new_op = ops[bbinfo[idx].first_idx++]->op;
4603 }
4604 if (bbinfo[idx].op != new_op)
4605 {
4606 imm_use_iterator iter;
4607 use_operand_p use_p;
4608 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4609
4610 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4611 if (is_gimple_debug (use_stmt))
4612 continue;
4613 else if (gimple_code (use_stmt) == GIMPLE_COND
4614 || gimple_code (use_stmt) == GIMPLE_PHI)
4615 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4616 SET_USE (use_p, new_op);
4617 else if ((is_gimple_assign (use_stmt)
4618 && (TREE_CODE_CLASS
4619 (gimple_assign_rhs_code (use_stmt))
4620 == tcc_comparison)))
4621 cast_or_tcc_cmp_stmt = use_stmt;
4622 else if (gimple_assign_cast_p (use_stmt))
4623 cast_or_tcc_cmp_stmt = use_stmt;
4624 else
4625 gcc_unreachable ();
4626
4627 if (cast_or_tcc_cmp_stmt)
4628 {
4629 gcc_assert (bb == last_bb);
4630 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4631 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4632 enum tree_code rhs_code
4633 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4634 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4635 : CONVERT_EXPR;
4636 gassign *g;
4637 if (is_gimple_min_invariant (new_op))
4638 {
4639 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4640 g = gimple_build_assign (new_lhs, new_op);
4641 }
4642 else
4643 g = gimple_build_assign (new_lhs, rhs_code, new_op);
4644 gimple_stmt_iterator gsi
4645 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4646 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4647 gimple_set_visited (g, true);
4648 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4649 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4650 if (is_gimple_debug (use_stmt))
4651 continue;
4652 else if (gimple_code (use_stmt) == GIMPLE_COND
4653 || gimple_code (use_stmt) == GIMPLE_PHI)
4654 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4655 SET_USE (use_p, new_lhs);
4656 else
4657 gcc_unreachable ();
4658 }
4659 }
4660 }
4661 if (bb == first_bb)
4662 break;
4663 }
4664 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4665 {
4666 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4667 && bbinfo[idx].op == NULL_TREE
4668 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4669 {
4670 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4671
4672 if (idx > max_idx)
4673 max_idx = idx;
4674
4675 /* If we collapse the conditional to a true/false
4676 condition, then bubble that knowledge up to our caller. */
4677 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4678 {
4679 gimple_cond_make_false (cond_stmt);
4680 cfg_cleanup_needed = true;
4681 }
4682 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4683 {
4684 gimple_cond_make_true (cond_stmt);
4685 cfg_cleanup_needed = true;
4686 }
4687 else
4688 {
4689 gimple_cond_set_code (cond_stmt, NE_EXPR);
4690 gimple_cond_set_lhs (cond_stmt,
4691 ops[bbinfo[idx].first_idx]->op);
4692 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4693 }
4694 update_stmt (cond_stmt);
4695 }
4696 if (bb == first_bb)
4697 break;
4698 }
4699
4700 /* The above changes could result in basic blocks after the first
4701 modified one, up to and including last_bb, to be executed even if
4702 they would not be in the original program. If the value ranges of
4703 assignment lhs' in those bbs were dependent on the conditions
4704 guarding those basic blocks which now can change, the VRs might
4705 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4706 are only used within the same bb, it should be not a big deal if
4707 we just reset all the VRs in those bbs. See PR68671. */
4708 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4709 reset_flow_sensitive_info_in_bb (bb);
4710 }
4711 return cfg_cleanup_needed;
4712 }
4713
4714 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4715 of STMT in it's operands. This is also known as a "destructive
4716 update" operation. */
4717
4718 static bool
4719 is_phi_for_stmt (gimple *stmt, tree operand)
4720 {
4721 gimple *def_stmt;
4722 gphi *def_phi;
4723 tree lhs;
4724 use_operand_p arg_p;
4725 ssa_op_iter i;
4726
4727 if (TREE_CODE (operand) != SSA_NAME)
4728 return false;
4729
4730 lhs = gimple_assign_lhs (stmt);
4731
4732 def_stmt = SSA_NAME_DEF_STMT (operand);
4733 def_phi = dyn_cast <gphi *> (def_stmt);
4734 if (!def_phi)
4735 return false;
4736
4737 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4738 if (lhs == USE_FROM_PTR (arg_p))
4739 return true;
4740 return false;
4741 }
4742
4743 /* Remove def stmt of VAR if VAR has zero uses and recurse
4744 on rhs1 operand if so. */
4745
4746 static void
4747 remove_visited_stmt_chain (tree var)
4748 {
4749 gimple *stmt;
4750 gimple_stmt_iterator gsi;
4751
4752 while (1)
4753 {
4754 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4755 return;
4756 stmt = SSA_NAME_DEF_STMT (var);
4757 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4758 {
4759 var = gimple_assign_rhs1 (stmt);
4760 gsi = gsi_for_stmt (stmt);
4761 reassoc_remove_stmt (&gsi);
4762 release_defs (stmt);
4763 }
4764 else
4765 return;
4766 }
4767 }
4768
4769 /* This function checks three consequtive operands in
4770 passed operands vector OPS starting from OPINDEX and
4771 swaps two operands if it is profitable for binary operation
4772 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4773
4774 We pair ops with the same rank if possible.
4775
4776 The alternative we try is to see if STMT is a destructive
4777 update style statement, which is like:
4778 b = phi (a, ...)
4779 a = c + b;
4780 In that case, we want to use the destructive update form to
4781 expose the possible vectorizer sum reduction opportunity.
4782 In that case, the third operand will be the phi node. This
4783 check is not performed if STMT is null.
4784
4785 We could, of course, try to be better as noted above, and do a
4786 lot of work to try to find these opportunities in >3 operand
4787 cases, but it is unlikely to be worth it. */
4788
4789 static void
4790 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4791 unsigned int opindex, gimple *stmt)
4792 {
4793 operand_entry *oe1, *oe2, *oe3;
4794
4795 oe1 = ops[opindex];
4796 oe2 = ops[opindex + 1];
4797 oe3 = ops[opindex + 2];
4798
4799 if ((oe1->rank == oe2->rank
4800 && oe2->rank != oe3->rank)
4801 || (stmt && is_phi_for_stmt (stmt, oe3->op)
4802 && !is_phi_for_stmt (stmt, oe1->op)
4803 && !is_phi_for_stmt (stmt, oe2->op)))
4804 std::swap (*oe1, *oe3);
4805 else if ((oe1->rank == oe3->rank
4806 && oe2->rank != oe3->rank)
4807 || (stmt && is_phi_for_stmt (stmt, oe2->op)
4808 && !is_phi_for_stmt (stmt, oe1->op)
4809 && !is_phi_for_stmt (stmt, oe3->op)))
4810 std::swap (*oe1, *oe2);
4811 }
4812
4813 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4814 two definitions, otherwise return STMT. */
4815
4816 static inline gimple *
4817 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4818 {
4819 if (TREE_CODE (rhs1) == SSA_NAME
4820 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4821 stmt = SSA_NAME_DEF_STMT (rhs1);
4822 if (TREE_CODE (rhs2) == SSA_NAME
4823 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4824 stmt = SSA_NAME_DEF_STMT (rhs2);
4825 return stmt;
4826 }
4827
4828 /* If the stmt that defines operand has to be inserted, insert it
4829 before the use. */
4830 static void
4831 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4832 {
4833 gcc_assert (is_gimple_assign (stmt_to_insert));
4834 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4835 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4836 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4837 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4838 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4839
4840 /* If the insert point is not stmt, then insert_point would be
4841 the point where operand rhs1 or rhs2 is defined. In this case,
4842 stmt_to_insert has to be inserted afterwards. This would
4843 only happen when the stmt insertion point is flexible. */
4844 if (stmt == insert_point)
4845 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4846 else
4847 insert_stmt_after (stmt_to_insert, insert_point);
4848 }
4849
4850
4851 /* Recursively rewrite our linearized statements so that the operators
4852 match those in OPS[OPINDEX], putting the computation in rank
4853 order. Return new lhs.
4854 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4855 the current stmt and during recursive invocations.
4856 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4857 recursive invocations. */
4858
4859 static tree
4860 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
4861 vec<operand_entry *> ops, bool changed, bool next_changed)
4862 {
4863 tree rhs1 = gimple_assign_rhs1 (stmt);
4864 tree rhs2 = gimple_assign_rhs2 (stmt);
4865 tree lhs = gimple_assign_lhs (stmt);
4866 operand_entry *oe;
4867
4868 /* The final recursion case for this function is that you have
4869 exactly two operations left.
4870 If we had exactly one op in the entire list to start with, we
4871 would have never called this function, and the tail recursion
4872 rewrites them one at a time. */
4873 if (opindex + 2 == ops.length ())
4874 {
4875 operand_entry *oe1, *oe2;
4876
4877 oe1 = ops[opindex];
4878 oe2 = ops[opindex + 1];
4879
4880 if (rhs1 != oe1->op || rhs2 != oe2->op)
4881 {
4882 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4883 unsigned int uid = gimple_uid (stmt);
4884
4885 if (dump_file && (dump_flags & TDF_DETAILS))
4886 {
4887 fprintf (dump_file, "Transforming ");
4888 print_gimple_stmt (dump_file, stmt, 0);
4889 }
4890
4891 /* If the stmt that defines operand has to be inserted, insert it
4892 before the use. */
4893 if (oe1->stmt_to_insert)
4894 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4895 if (oe2->stmt_to_insert)
4896 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4897 /* Even when changed is false, reassociation could have e.g. removed
4898 some redundant operations, so unless we are just swapping the
4899 arguments or unless there is no change at all (then we just
4900 return lhs), force creation of a new SSA_NAME. */
4901 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4902 {
4903 gimple *insert_point
4904 = find_insert_point (stmt, oe1->op, oe2->op);
4905 lhs = make_ssa_name (TREE_TYPE (lhs));
4906 stmt
4907 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4908 oe1->op, oe2->op);
4909 gimple_set_uid (stmt, uid);
4910 gimple_set_visited (stmt, true);
4911 if (insert_point == gsi_stmt (gsi))
4912 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4913 else
4914 insert_stmt_after (stmt, insert_point);
4915 }
4916 else
4917 {
4918 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4919 == stmt);
4920 gimple_assign_set_rhs1 (stmt, oe1->op);
4921 gimple_assign_set_rhs2 (stmt, oe2->op);
4922 update_stmt (stmt);
4923 }
4924
4925 if (rhs1 != oe1->op && rhs1 != oe2->op)
4926 remove_visited_stmt_chain (rhs1);
4927
4928 if (dump_file && (dump_flags & TDF_DETAILS))
4929 {
4930 fprintf (dump_file, " into ");
4931 print_gimple_stmt (dump_file, stmt, 0);
4932 }
4933 }
4934 return lhs;
4935 }
4936
4937 /* If we hit here, we should have 3 or more ops left. */
4938 gcc_assert (opindex + 2 < ops.length ());
4939
4940 /* Rewrite the next operator. */
4941 oe = ops[opindex];
4942
4943 /* If the stmt that defines operand has to be inserted, insert it
4944 before the use. */
4945 if (oe->stmt_to_insert)
4946 insert_stmt_before_use (stmt, oe->stmt_to_insert);
4947
4948 /* Recurse on the LHS of the binary operator, which is guaranteed to
4949 be the non-leaf side. */
4950 tree new_rhs1
4951 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
4952 changed || oe->op != rhs2 || next_changed,
4953 false);
4954
4955 if (oe->op != rhs2 || new_rhs1 != rhs1)
4956 {
4957 if (dump_file && (dump_flags & TDF_DETAILS))
4958 {
4959 fprintf (dump_file, "Transforming ");
4960 print_gimple_stmt (dump_file, stmt, 0);
4961 }
4962
4963 /* If changed is false, this is either opindex == 0
4964 or all outer rhs2's were equal to corresponding oe->op,
4965 and powi_result is NULL.
4966 That means lhs is equivalent before and after reassociation.
4967 Otherwise ensure the old lhs SSA_NAME is not reused and
4968 create a new stmt as well, so that any debug stmts will be
4969 properly adjusted. */
4970 if (changed)
4971 {
4972 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4973 unsigned int uid = gimple_uid (stmt);
4974 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4975
4976 lhs = make_ssa_name (TREE_TYPE (lhs));
4977 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4978 new_rhs1, oe->op);
4979 gimple_set_uid (stmt, uid);
4980 gimple_set_visited (stmt, true);
4981 if (insert_point == gsi_stmt (gsi))
4982 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4983 else
4984 insert_stmt_after (stmt, insert_point);
4985 }
4986 else
4987 {
4988 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4989 == stmt);
4990 gimple_assign_set_rhs1 (stmt, new_rhs1);
4991 gimple_assign_set_rhs2 (stmt, oe->op);
4992 update_stmt (stmt);
4993 }
4994
4995 if (dump_file && (dump_flags & TDF_DETAILS))
4996 {
4997 fprintf (dump_file, " into ");
4998 print_gimple_stmt (dump_file, stmt, 0);
4999 }
5000 }
5001 return lhs;
5002 }
5003
5004 /* Find out how many cycles we need to compute statements chain.
5005 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5006 maximum number of independent statements we may execute per cycle. */
5007
5008 static int
5009 get_required_cycles (int ops_num, int cpu_width)
5010 {
5011 int res;
5012 int elog;
5013 unsigned int rest;
5014
5015 /* While we have more than 2 * cpu_width operands
5016 we may reduce number of operands by cpu_width
5017 per cycle. */
5018 res = ops_num / (2 * cpu_width);
5019
5020 /* Remained operands count may be reduced twice per cycle
5021 until we have only one operand. */
5022 rest = (unsigned)(ops_num - res * cpu_width);
5023 elog = exact_log2 (rest);
5024 if (elog >= 0)
5025 res += elog;
5026 else
5027 res += floor_log2 (rest) + 1;
5028
5029 return res;
5030 }
5031
5032 /* Returns an optimal number of registers to use for computation of
5033 given statements. */
5034
5035 static int
5036 get_reassociation_width (int ops_num, enum tree_code opc,
5037 machine_mode mode)
5038 {
5039 int param_width = param_tree_reassoc_width;
5040 int width;
5041 int width_min;
5042 int cycles_best;
5043
5044 if (param_width > 0)
5045 width = param_width;
5046 else
5047 width = targetm.sched.reassociation_width (opc, mode);
5048
5049 if (width == 1)
5050 return width;
5051
5052 /* Get the minimal time required for sequence computation. */
5053 cycles_best = get_required_cycles (ops_num, width);
5054
5055 /* Check if we may use less width and still compute sequence for
5056 the same time. It will allow us to reduce registers usage.
5057 get_required_cycles is monotonically increasing with lower width
5058 so we can perform a binary search for the minimal width that still
5059 results in the optimal cycle count. */
5060 width_min = 1;
5061 while (width > width_min)
5062 {
5063 int width_mid = (width + width_min) / 2;
5064
5065 if (get_required_cycles (ops_num, width_mid) == cycles_best)
5066 width = width_mid;
5067 else if (width_min < width_mid)
5068 width_min = width_mid;
5069 else
5070 break;
5071 }
5072
5073 return width;
5074 }
5075
5076 /* Recursively rewrite our linearized statements so that the operators
5077 match those in OPS[OPINDEX], putting the computation in rank
5078 order and trying to allow operations to be executed in
5079 parallel. */
5080
5081 static void
5082 rewrite_expr_tree_parallel (gassign *stmt, int width,
5083 vec<operand_entry *> ops)
5084 {
5085 enum tree_code opcode = gimple_assign_rhs_code (stmt);
5086 int op_num = ops.length ();
5087 gcc_assert (op_num > 0);
5088 int stmt_num = op_num - 1;
5089 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5090 int op_index = op_num - 1;
5091 int stmt_index = 0;
5092 int ready_stmts_end = 0;
5093 int i = 0;
5094 gimple *stmt1 = NULL, *stmt2 = NULL;
5095 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5096
5097 /* We start expression rewriting from the top statements.
5098 So, in this loop we create a full list of statements
5099 we will work with. */
5100 stmts[stmt_num - 1] = stmt;
5101 for (i = stmt_num - 2; i >= 0; i--)
5102 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5103
5104 for (i = 0; i < stmt_num; i++)
5105 {
5106 tree op1, op2;
5107
5108 /* Determine whether we should use results of
5109 already handled statements or not. */
5110 if (ready_stmts_end == 0
5111 && (i - stmt_index >= width || op_index < 1))
5112 ready_stmts_end = i;
5113
5114 /* Now we choose operands for the next statement. Non zero
5115 value in ready_stmts_end means here that we should use
5116 the result of already generated statements as new operand. */
5117 if (ready_stmts_end > 0)
5118 {
5119 op1 = gimple_assign_lhs (stmts[stmt_index++]);
5120 if (ready_stmts_end > stmt_index)
5121 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5122 else if (op_index >= 0)
5123 {
5124 operand_entry *oe = ops[op_index--];
5125 stmt2 = oe->stmt_to_insert;
5126 op2 = oe->op;
5127 }
5128 else
5129 {
5130 gcc_assert (stmt_index < i);
5131 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5132 }
5133
5134 if (stmt_index >= ready_stmts_end)
5135 ready_stmts_end = 0;
5136 }
5137 else
5138 {
5139 if (op_index > 1)
5140 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
5141 operand_entry *oe2 = ops[op_index--];
5142 operand_entry *oe1 = ops[op_index--];
5143 op2 = oe2->op;
5144 stmt2 = oe2->stmt_to_insert;
5145 op1 = oe1->op;
5146 stmt1 = oe1->stmt_to_insert;
5147 }
5148
5149 /* If we emit the last statement then we should put
5150 operands into the last statement. It will also
5151 break the loop. */
5152 if (op_index < 0 && stmt_index == i)
5153 i = stmt_num - 1;
5154
5155 if (dump_file && (dump_flags & TDF_DETAILS))
5156 {
5157 fprintf (dump_file, "Transforming ");
5158 print_gimple_stmt (dump_file, stmts[i], 0);
5159 }
5160
5161 /* If the stmt that defines operand has to be inserted, insert it
5162 before the use. */
5163 if (stmt1)
5164 insert_stmt_before_use (stmts[i], stmt1);
5165 if (stmt2)
5166 insert_stmt_before_use (stmts[i], stmt2);
5167 stmt1 = stmt2 = NULL;
5168
5169 /* We keep original statement only for the last one. All
5170 others are recreated. */
5171 if (i == stmt_num - 1)
5172 {
5173 gimple_assign_set_rhs1 (stmts[i], op1);
5174 gimple_assign_set_rhs2 (stmts[i], op2);
5175 update_stmt (stmts[i]);
5176 }
5177 else
5178 {
5179 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5180 gimple_set_visited (stmts[i], true);
5181 }
5182 if (dump_file && (dump_flags & TDF_DETAILS))
5183 {
5184 fprintf (dump_file, " into ");
5185 print_gimple_stmt (dump_file, stmts[i], 0);
5186 }
5187 }
5188
5189 remove_visited_stmt_chain (last_rhs1);
5190 }
5191
5192 /* Transform STMT, which is really (A +B) + (C + D) into the left
5193 linear form, ((A+B)+C)+D.
5194 Recurse on D if necessary. */
5195
5196 static void
5197 linearize_expr (gimple *stmt)
5198 {
5199 gimple_stmt_iterator gsi;
5200 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5201 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5202 gimple *oldbinrhs = binrhs;
5203 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5204 gimple *newbinrhs = NULL;
5205 class loop *loop = loop_containing_stmt (stmt);
5206 tree lhs = gimple_assign_lhs (stmt);
5207
5208 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5209 && is_reassociable_op (binrhs, rhscode, loop));
5210
5211 gsi = gsi_for_stmt (stmt);
5212
5213 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5214 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5215 gimple_assign_rhs_code (binrhs),
5216 gimple_assign_lhs (binlhs),
5217 gimple_assign_rhs2 (binrhs));
5218 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5219 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5220 gimple_set_uid (binrhs, gimple_uid (stmt));
5221
5222 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5223 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5224
5225 if (dump_file && (dump_flags & TDF_DETAILS))
5226 {
5227 fprintf (dump_file, "Linearized: ");
5228 print_gimple_stmt (dump_file, stmt, 0);
5229 }
5230
5231 reassociate_stats.linearized++;
5232 update_stmt (stmt);
5233
5234 gsi = gsi_for_stmt (oldbinrhs);
5235 reassoc_remove_stmt (&gsi);
5236 release_defs (oldbinrhs);
5237
5238 gimple_set_visited (stmt, true);
5239 gimple_set_visited (binlhs, true);
5240 gimple_set_visited (binrhs, true);
5241
5242 /* Tail recurse on the new rhs if it still needs reassociation. */
5243 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5244 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5245 want to change the algorithm while converting to tuples. */
5246 linearize_expr (stmt);
5247 }
5248
5249 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5250 it. Otherwise, return NULL. */
5251
5252 static gimple *
5253 get_single_immediate_use (tree lhs)
5254 {
5255 use_operand_p immuse;
5256 gimple *immusestmt;
5257
5258 if (TREE_CODE (lhs) == SSA_NAME
5259 && single_imm_use (lhs, &immuse, &immusestmt)
5260 && is_gimple_assign (immusestmt))
5261 return immusestmt;
5262
5263 return NULL;
5264 }
5265
5266 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5267 representing the negated value. Insertions of any necessary
5268 instructions go before GSI.
5269 This function is recursive in that, if you hand it "a_5" as the
5270 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5271 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5272
5273 static tree
5274 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5275 {
5276 gimple *negatedefstmt = NULL;
5277 tree resultofnegate;
5278 gimple_stmt_iterator gsi;
5279 unsigned int uid;
5280
5281 /* If we are trying to negate a name, defined by an add, negate the
5282 add operands instead. */
5283 if (TREE_CODE (tonegate) == SSA_NAME)
5284 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5285 if (TREE_CODE (tonegate) == SSA_NAME
5286 && is_gimple_assign (negatedefstmt)
5287 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5288 && has_single_use (gimple_assign_lhs (negatedefstmt))
5289 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5290 {
5291 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5292 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5293 tree lhs = gimple_assign_lhs (negatedefstmt);
5294 gimple *g;
5295
5296 gsi = gsi_for_stmt (negatedefstmt);
5297 rhs1 = negate_value (rhs1, &gsi);
5298
5299 gsi = gsi_for_stmt (negatedefstmt);
5300 rhs2 = negate_value (rhs2, &gsi);
5301
5302 gsi = gsi_for_stmt (negatedefstmt);
5303 lhs = make_ssa_name (TREE_TYPE (lhs));
5304 gimple_set_visited (negatedefstmt, true);
5305 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5306 gimple_set_uid (g, gimple_uid (negatedefstmt));
5307 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5308 return lhs;
5309 }
5310
5311 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5312 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5313 NULL_TREE, true, GSI_SAME_STMT);
5314 gsi = *gsip;
5315 uid = gimple_uid (gsi_stmt (gsi));
5316 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5317 {
5318 gimple *stmt = gsi_stmt (gsi);
5319 if (gimple_uid (stmt) != 0)
5320 break;
5321 gimple_set_uid (stmt, uid);
5322 }
5323 return resultofnegate;
5324 }
5325
5326 /* Return true if we should break up the subtract in STMT into an add
5327 with negate. This is true when we the subtract operands are really
5328 adds, or the subtract itself is used in an add expression. In
5329 either case, breaking up the subtract into an add with negate
5330 exposes the adds to reassociation. */
5331
5332 static bool
5333 should_break_up_subtract (gimple *stmt)
5334 {
5335 tree lhs = gimple_assign_lhs (stmt);
5336 tree binlhs = gimple_assign_rhs1 (stmt);
5337 tree binrhs = gimple_assign_rhs2 (stmt);
5338 gimple *immusestmt;
5339 class loop *loop = loop_containing_stmt (stmt);
5340
5341 if (TREE_CODE (binlhs) == SSA_NAME
5342 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5343 return true;
5344
5345 if (TREE_CODE (binrhs) == SSA_NAME
5346 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5347 return true;
5348
5349 if (TREE_CODE (lhs) == SSA_NAME
5350 && (immusestmt = get_single_immediate_use (lhs))
5351 && is_gimple_assign (immusestmt)
5352 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5353 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5354 && gimple_assign_rhs1 (immusestmt) == lhs)
5355 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5356 return true;
5357 return false;
5358 }
5359
5360 /* Transform STMT from A - B into A + -B. */
5361
5362 static void
5363 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5364 {
5365 tree rhs1 = gimple_assign_rhs1 (stmt);
5366 tree rhs2 = gimple_assign_rhs2 (stmt);
5367
5368 if (dump_file && (dump_flags & TDF_DETAILS))
5369 {
5370 fprintf (dump_file, "Breaking up subtract ");
5371 print_gimple_stmt (dump_file, stmt, 0);
5372 }
5373
5374 rhs2 = negate_value (rhs2, gsip);
5375 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5376 update_stmt (stmt);
5377 }
5378
5379 /* Determine whether STMT is a builtin call that raises an SSA name
5380 to an integer power and has only one use. If so, and this is early
5381 reassociation and unsafe math optimizations are permitted, place
5382 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5383 If any of these conditions does not hold, return FALSE. */
5384
5385 static bool
5386 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5387 {
5388 tree arg1;
5389 REAL_VALUE_TYPE c, cint;
5390
5391 switch (gimple_call_combined_fn (stmt))
5392 {
5393 CASE_CFN_POW:
5394 if (flag_errno_math)
5395 return false;
5396
5397 *base = gimple_call_arg (stmt, 0);
5398 arg1 = gimple_call_arg (stmt, 1);
5399
5400 if (TREE_CODE (arg1) != REAL_CST)
5401 return false;
5402
5403 c = TREE_REAL_CST (arg1);
5404
5405 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5406 return false;
5407
5408 *exponent = real_to_integer (&c);
5409 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5410 if (!real_identical (&c, &cint))
5411 return false;
5412
5413 break;
5414
5415 CASE_CFN_POWI:
5416 *base = gimple_call_arg (stmt, 0);
5417 arg1 = gimple_call_arg (stmt, 1);
5418
5419 if (!tree_fits_shwi_p (arg1))
5420 return false;
5421
5422 *exponent = tree_to_shwi (arg1);
5423 break;
5424
5425 default:
5426 return false;
5427 }
5428
5429 /* Expanding negative exponents is generally unproductive, so we don't
5430 complicate matters with those. Exponents of zero and one should
5431 have been handled by expression folding. */
5432 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5433 return false;
5434
5435 return true;
5436 }
5437
5438 /* Try to derive and add operand entry for OP to *OPS. Return false if
5439 unsuccessful. */
5440
5441 static bool
5442 try_special_add_to_ops (vec<operand_entry *> *ops,
5443 enum tree_code code,
5444 tree op, gimple* def_stmt)
5445 {
5446 tree base = NULL_TREE;
5447 HOST_WIDE_INT exponent = 0;
5448
5449 if (TREE_CODE (op) != SSA_NAME
5450 || ! has_single_use (op))
5451 return false;
5452
5453 if (code == MULT_EXPR
5454 && reassoc_insert_powi_p
5455 && flag_unsafe_math_optimizations
5456 && is_gimple_call (def_stmt)
5457 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5458 {
5459 add_repeat_to_ops_vec (ops, base, exponent);
5460 gimple_set_visited (def_stmt, true);
5461 return true;
5462 }
5463 else if (code == MULT_EXPR
5464 && is_gimple_assign (def_stmt)
5465 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5466 && !HONOR_SNANS (TREE_TYPE (op))
5467 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5468 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5469 {
5470 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5471 tree cst = build_minus_one_cst (TREE_TYPE (op));
5472 add_to_ops_vec (ops, rhs1);
5473 add_to_ops_vec (ops, cst);
5474 gimple_set_visited (def_stmt, true);
5475 return true;
5476 }
5477
5478 return false;
5479 }
5480
5481 /* Recursively linearize a binary expression that is the RHS of STMT.
5482 Place the operands of the expression tree in the vector named OPS. */
5483
5484 static void
5485 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5486 bool is_associative, bool set_visited)
5487 {
5488 tree binlhs = gimple_assign_rhs1 (stmt);
5489 tree binrhs = gimple_assign_rhs2 (stmt);
5490 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5491 bool binlhsisreassoc = false;
5492 bool binrhsisreassoc = false;
5493 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5494 class loop *loop = loop_containing_stmt (stmt);
5495
5496 if (set_visited)
5497 gimple_set_visited (stmt, true);
5498
5499 if (TREE_CODE (binlhs) == SSA_NAME)
5500 {
5501 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5502 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5503 && !stmt_could_throw_p (cfun, binlhsdef));
5504 }
5505
5506 if (TREE_CODE (binrhs) == SSA_NAME)
5507 {
5508 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5509 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5510 && !stmt_could_throw_p (cfun, binrhsdef));
5511 }
5512
5513 /* If the LHS is not reassociable, but the RHS is, we need to swap
5514 them. If neither is reassociable, there is nothing we can do, so
5515 just put them in the ops vector. If the LHS is reassociable,
5516 linearize it. If both are reassociable, then linearize the RHS
5517 and the LHS. */
5518
5519 if (!binlhsisreassoc)
5520 {
5521 /* If this is not a associative operation like division, give up. */
5522 if (!is_associative)
5523 {
5524 add_to_ops_vec (ops, binrhs);
5525 return;
5526 }
5527
5528 if (!binrhsisreassoc)
5529 {
5530 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5531 add_to_ops_vec (ops, binrhs);
5532
5533 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5534 add_to_ops_vec (ops, binlhs);
5535
5536 return;
5537 }
5538
5539 if (dump_file && (dump_flags & TDF_DETAILS))
5540 {
5541 fprintf (dump_file, "swapping operands of ");
5542 print_gimple_stmt (dump_file, stmt, 0);
5543 }
5544
5545 swap_ssa_operands (stmt,
5546 gimple_assign_rhs1_ptr (stmt),
5547 gimple_assign_rhs2_ptr (stmt));
5548 update_stmt (stmt);
5549
5550 if (dump_file && (dump_flags & TDF_DETAILS))
5551 {
5552 fprintf (dump_file, " is now ");
5553 print_gimple_stmt (dump_file, stmt, 0);
5554 }
5555
5556 /* We want to make it so the lhs is always the reassociative op,
5557 so swap. */
5558 std::swap (binlhs, binrhs);
5559 }
5560 else if (binrhsisreassoc)
5561 {
5562 linearize_expr (stmt);
5563 binlhs = gimple_assign_rhs1 (stmt);
5564 binrhs = gimple_assign_rhs2 (stmt);
5565 }
5566
5567 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5568 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5569 rhscode, loop));
5570 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5571 is_associative, set_visited);
5572
5573 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5574 add_to_ops_vec (ops, binrhs);
5575 }
5576
5577 /* Repropagate the negates back into subtracts, since no other pass
5578 currently does it. */
5579
5580 static void
5581 repropagate_negates (void)
5582 {
5583 unsigned int i = 0;
5584 tree negate;
5585
5586 FOR_EACH_VEC_ELT (plus_negates, i, negate)
5587 {
5588 gimple *user = get_single_immediate_use (negate);
5589
5590 if (!user || !is_gimple_assign (user))
5591 continue;
5592
5593 /* The negate operand can be either operand of a PLUS_EXPR
5594 (it can be the LHS if the RHS is a constant for example).
5595
5596 Force the negate operand to the RHS of the PLUS_EXPR, then
5597 transform the PLUS_EXPR into a MINUS_EXPR. */
5598 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5599 {
5600 /* If the negated operand appears on the LHS of the
5601 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5602 to force the negated operand to the RHS of the PLUS_EXPR. */
5603 if (gimple_assign_rhs1 (user) == negate)
5604 {
5605 swap_ssa_operands (user,
5606 gimple_assign_rhs1_ptr (user),
5607 gimple_assign_rhs2_ptr (user));
5608 }
5609
5610 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5611 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5612 if (gimple_assign_rhs2 (user) == negate)
5613 {
5614 tree rhs1 = gimple_assign_rhs1 (user);
5615 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5616 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5617 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5618 update_stmt (user);
5619 }
5620 }
5621 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5622 {
5623 if (gimple_assign_rhs1 (user) == negate)
5624 {
5625 /* We have
5626 x = -a
5627 y = x - b
5628 which we transform into
5629 x = a + b
5630 y = -x .
5631 This pushes down the negate which we possibly can merge
5632 into some other operation, hence insert it into the
5633 plus_negates vector. */
5634 gimple *feed = SSA_NAME_DEF_STMT (negate);
5635 tree a = gimple_assign_rhs1 (feed);
5636 tree b = gimple_assign_rhs2 (user);
5637 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5638 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5639 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5640 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5641 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5642 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5643 user = gsi_stmt (gsi2);
5644 update_stmt (user);
5645 reassoc_remove_stmt (&gsi);
5646 release_defs (feed);
5647 plus_negates.safe_push (gimple_assign_lhs (user));
5648 }
5649 else
5650 {
5651 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5652 rid of one operation. */
5653 gimple *feed = SSA_NAME_DEF_STMT (negate);
5654 tree a = gimple_assign_rhs1 (feed);
5655 tree rhs1 = gimple_assign_rhs1 (user);
5656 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5657 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5658 update_stmt (gsi_stmt (gsi));
5659 }
5660 }
5661 }
5662 }
5663
5664 /* Returns true if OP is of a type for which we can do reassociation.
5665 That is for integral or non-saturating fixed-point types, and for
5666 floating point type when associative-math is enabled. */
5667
5668 static bool
5669 can_reassociate_p (tree op)
5670 {
5671 tree type = TREE_TYPE (op);
5672 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5673 return false;
5674 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5675 || NON_SAT_FIXED_POINT_TYPE_P (type)
5676 || (flag_associative_math && FLOAT_TYPE_P (type)))
5677 return true;
5678 return false;
5679 }
5680
5681 /* Break up subtract operations in block BB.
5682
5683 We do this top down because we don't know whether the subtract is
5684 part of a possible chain of reassociation except at the top.
5685
5686 IE given
5687 d = f + g
5688 c = a + e
5689 b = c - d
5690 q = b - r
5691 k = t - q
5692
5693 we want to break up k = t - q, but we won't until we've transformed q
5694 = b - r, which won't be broken up until we transform b = c - d.
5695
5696 En passant, clear the GIMPLE visited flag on every statement
5697 and set UIDs within each basic block. */
5698
5699 static void
5700 break_up_subtract_bb (basic_block bb)
5701 {
5702 gimple_stmt_iterator gsi;
5703 basic_block son;
5704 unsigned int uid = 1;
5705
5706 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5707 {
5708 gimple *stmt = gsi_stmt (gsi);
5709 gimple_set_visited (stmt, false);
5710 gimple_set_uid (stmt, uid++);
5711
5712 if (!is_gimple_assign (stmt)
5713 || !can_reassociate_p (gimple_assign_lhs (stmt)))
5714 continue;
5715
5716 /* Look for simple gimple subtract operations. */
5717 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5718 {
5719 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5720 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5721 continue;
5722
5723 /* Check for a subtract used only in an addition. If this
5724 is the case, transform it into add of a negate for better
5725 reassociation. IE transform C = A-B into C = A + -B if C
5726 is only used in an addition. */
5727 if (should_break_up_subtract (stmt))
5728 break_up_subtract (stmt, &gsi);
5729 }
5730 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5731 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5732 plus_negates.safe_push (gimple_assign_lhs (stmt));
5733 }
5734 for (son = first_dom_son (CDI_DOMINATORS, bb);
5735 son;
5736 son = next_dom_son (CDI_DOMINATORS, son))
5737 break_up_subtract_bb (son);
5738 }
5739
5740 /* Used for repeated factor analysis. */
5741 struct repeat_factor
5742 {
5743 /* An SSA name that occurs in a multiply chain. */
5744 tree factor;
5745
5746 /* Cached rank of the factor. */
5747 unsigned rank;
5748
5749 /* Number of occurrences of the factor in the chain. */
5750 HOST_WIDE_INT count;
5751
5752 /* An SSA name representing the product of this factor and
5753 all factors appearing later in the repeated factor vector. */
5754 tree repr;
5755 };
5756
5757
5758 static vec<repeat_factor> repeat_factor_vec;
5759
5760 /* Used for sorting the repeat factor vector. Sort primarily by
5761 ascending occurrence count, secondarily by descending rank. */
5762
5763 static int
5764 compare_repeat_factors (const void *x1, const void *x2)
5765 {
5766 const repeat_factor *rf1 = (const repeat_factor *) x1;
5767 const repeat_factor *rf2 = (const repeat_factor *) x2;
5768
5769 if (rf1->count != rf2->count)
5770 return rf1->count - rf2->count;
5771
5772 return rf2->rank - rf1->rank;
5773 }
5774
5775 /* Look for repeated operands in OPS in the multiply tree rooted at
5776 STMT. Replace them with an optimal sequence of multiplies and powi
5777 builtin calls, and remove the used operands from OPS. Return an
5778 SSA name representing the value of the replacement sequence. */
5779
5780 static tree
5781 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5782 {
5783 unsigned i, j, vec_len;
5784 int ii;
5785 operand_entry *oe;
5786 repeat_factor *rf1, *rf2;
5787 repeat_factor rfnew;
5788 tree result = NULL_TREE;
5789 tree target_ssa, iter_result;
5790 tree type = TREE_TYPE (gimple_get_lhs (stmt));
5791 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5792 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5793 gimple *mul_stmt, *pow_stmt;
5794
5795 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5796 target. */
5797 if (!powi_fndecl)
5798 return NULL_TREE;
5799
5800 /* Allocate the repeated factor vector. */
5801 repeat_factor_vec.create (10);
5802
5803 /* Scan the OPS vector for all SSA names in the product and build
5804 up a vector of occurrence counts for each factor. */
5805 FOR_EACH_VEC_ELT (*ops, i, oe)
5806 {
5807 if (TREE_CODE (oe->op) == SSA_NAME)
5808 {
5809 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5810 {
5811 if (rf1->factor == oe->op)
5812 {
5813 rf1->count += oe->count;
5814 break;
5815 }
5816 }
5817
5818 if (j >= repeat_factor_vec.length ())
5819 {
5820 rfnew.factor = oe->op;
5821 rfnew.rank = oe->rank;
5822 rfnew.count = oe->count;
5823 rfnew.repr = NULL_TREE;
5824 repeat_factor_vec.safe_push (rfnew);
5825 }
5826 }
5827 }
5828
5829 /* Sort the repeated factor vector by (a) increasing occurrence count,
5830 and (b) decreasing rank. */
5831 repeat_factor_vec.qsort (compare_repeat_factors);
5832
5833 /* It is generally best to combine as many base factors as possible
5834 into a product before applying __builtin_powi to the result.
5835 However, the sort order chosen for the repeated factor vector
5836 allows us to cache partial results for the product of the base
5837 factors for subsequent use. When we already have a cached partial
5838 result from a previous iteration, it is best to make use of it
5839 before looking for another __builtin_pow opportunity.
5840
5841 As an example, consider x * x * y * y * y * z * z * z * z.
5842 We want to first compose the product x * y * z, raise it to the
5843 second power, then multiply this by y * z, and finally multiply
5844 by z. This can be done in 5 multiplies provided we cache y * z
5845 for use in both expressions:
5846
5847 t1 = y * z
5848 t2 = t1 * x
5849 t3 = t2 * t2
5850 t4 = t1 * t3
5851 result = t4 * z
5852
5853 If we instead ignored the cached y * z and first multiplied by
5854 the __builtin_pow opportunity z * z, we would get the inferior:
5855
5856 t1 = y * z
5857 t2 = t1 * x
5858 t3 = t2 * t2
5859 t4 = z * z
5860 t5 = t3 * t4
5861 result = t5 * y */
5862
5863 vec_len = repeat_factor_vec.length ();
5864
5865 /* Repeatedly look for opportunities to create a builtin_powi call. */
5866 while (true)
5867 {
5868 HOST_WIDE_INT power;
5869
5870 /* First look for the largest cached product of factors from
5871 preceding iterations. If found, create a builtin_powi for
5872 it if the minimum occurrence count for its factors is at
5873 least 2, or just use this cached product as our next
5874 multiplicand if the minimum occurrence count is 1. */
5875 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5876 {
5877 if (rf1->repr && rf1->count > 0)
5878 break;
5879 }
5880
5881 if (j < vec_len)
5882 {
5883 power = rf1->count;
5884
5885 if (power == 1)
5886 {
5887 iter_result = rf1->repr;
5888
5889 if (dump_file && (dump_flags & TDF_DETAILS))
5890 {
5891 unsigned elt;
5892 repeat_factor *rf;
5893 fputs ("Multiplying by cached product ", dump_file);
5894 for (elt = j; elt < vec_len; elt++)
5895 {
5896 rf = &repeat_factor_vec[elt];
5897 print_generic_expr (dump_file, rf->factor);
5898 if (elt < vec_len - 1)
5899 fputs (" * ", dump_file);
5900 }
5901 fputs ("\n", dump_file);
5902 }
5903 }
5904 else
5905 {
5906 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5907 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5908 build_int_cst (integer_type_node,
5909 power));
5910 gimple_call_set_lhs (pow_stmt, iter_result);
5911 gimple_set_location (pow_stmt, gimple_location (stmt));
5912 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5913 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5914
5915 if (dump_file && (dump_flags & TDF_DETAILS))
5916 {
5917 unsigned elt;
5918 repeat_factor *rf;
5919 fputs ("Building __builtin_pow call for cached product (",
5920 dump_file);
5921 for (elt = j; elt < vec_len; elt++)
5922 {
5923 rf = &repeat_factor_vec[elt];
5924 print_generic_expr (dump_file, rf->factor);
5925 if (elt < vec_len - 1)
5926 fputs (" * ", dump_file);
5927 }
5928 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5929 power);
5930 }
5931 }
5932 }
5933 else
5934 {
5935 /* Otherwise, find the first factor in the repeated factor
5936 vector whose occurrence count is at least 2. If no such
5937 factor exists, there are no builtin_powi opportunities
5938 remaining. */
5939 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5940 {
5941 if (rf1->count >= 2)
5942 break;
5943 }
5944
5945 if (j >= vec_len)
5946 break;
5947
5948 power = rf1->count;
5949
5950 if (dump_file && (dump_flags & TDF_DETAILS))
5951 {
5952 unsigned elt;
5953 repeat_factor *rf;
5954 fputs ("Building __builtin_pow call for (", dump_file);
5955 for (elt = j; elt < vec_len; elt++)
5956 {
5957 rf = &repeat_factor_vec[elt];
5958 print_generic_expr (dump_file, rf->factor);
5959 if (elt < vec_len - 1)
5960 fputs (" * ", dump_file);
5961 }
5962 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5963 }
5964
5965 reassociate_stats.pows_created++;
5966
5967 /* Visit each element of the vector in reverse order (so that
5968 high-occurrence elements are visited first, and within the
5969 same occurrence count, lower-ranked elements are visited
5970 first). Form a linear product of all elements in this order
5971 whose occurrencce count is at least that of element J.
5972 Record the SSA name representing the product of each element
5973 with all subsequent elements in the vector. */
5974 if (j == vec_len - 1)
5975 rf1->repr = rf1->factor;
5976 else
5977 {
5978 for (ii = vec_len - 2; ii >= (int)j; ii--)
5979 {
5980 tree op1, op2;
5981
5982 rf1 = &repeat_factor_vec[ii];
5983 rf2 = &repeat_factor_vec[ii + 1];
5984
5985 /* Init the last factor's representative to be itself. */
5986 if (!rf2->repr)
5987 rf2->repr = rf2->factor;
5988
5989 op1 = rf1->factor;
5990 op2 = rf2->repr;
5991
5992 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5993 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5994 op1, op2);
5995 gimple_set_location (mul_stmt, gimple_location (stmt));
5996 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5997 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5998 rf1->repr = target_ssa;
5999
6000 /* Don't reprocess the multiply we just introduced. */
6001 gimple_set_visited (mul_stmt, true);
6002 }
6003 }
6004
6005 /* Form a call to __builtin_powi for the maximum product
6006 just formed, raised to the power obtained earlier. */
6007 rf1 = &repeat_factor_vec[j];
6008 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6009 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6010 build_int_cst (integer_type_node,
6011 power));
6012 gimple_call_set_lhs (pow_stmt, iter_result);
6013 gimple_set_location (pow_stmt, gimple_location (stmt));
6014 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6015 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6016 }
6017
6018 /* If we previously formed at least one other builtin_powi call,
6019 form the product of this one and those others. */
6020 if (result)
6021 {
6022 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6023 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6024 result, iter_result);
6025 gimple_set_location (mul_stmt, gimple_location (stmt));
6026 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6027 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6028 gimple_set_visited (mul_stmt, true);
6029 result = new_result;
6030 }
6031 else
6032 result = iter_result;
6033
6034 /* Decrement the occurrence count of each element in the product
6035 by the count found above, and remove this many copies of each
6036 factor from OPS. */
6037 for (i = j; i < vec_len; i++)
6038 {
6039 unsigned k = power;
6040 unsigned n;
6041
6042 rf1 = &repeat_factor_vec[i];
6043 rf1->count -= power;
6044
6045 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6046 {
6047 if (oe->op == rf1->factor)
6048 {
6049 if (oe->count <= k)
6050 {
6051 ops->ordered_remove (n);
6052 k -= oe->count;
6053
6054 if (k == 0)
6055 break;
6056 }
6057 else
6058 {
6059 oe->count -= k;
6060 break;
6061 }
6062 }
6063 }
6064 }
6065 }
6066
6067 /* At this point all elements in the repeated factor vector have a
6068 remaining occurrence count of 0 or 1, and those with a count of 1
6069 don't have cached representatives. Re-sort the ops vector and
6070 clean up. */
6071 ops->qsort (sort_by_operand_rank);
6072 repeat_factor_vec.release ();
6073
6074 /* Return the final product computed herein. Note that there may
6075 still be some elements with single occurrence count left in OPS;
6076 those will be handled by the normal reassociation logic. */
6077 return result;
6078 }
6079
6080 /* Attempt to optimize
6081 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6082 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6083
6084 static void
6085 attempt_builtin_copysign (vec<operand_entry *> *ops)
6086 {
6087 operand_entry *oe;
6088 unsigned int i;
6089 unsigned int length = ops->length ();
6090 tree cst = ops->last ()->op;
6091
6092 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6093 return;
6094
6095 FOR_EACH_VEC_ELT (*ops, i, oe)
6096 {
6097 if (TREE_CODE (oe->op) == SSA_NAME
6098 && has_single_use (oe->op))
6099 {
6100 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6101 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6102 {
6103 tree arg0, arg1;
6104 switch (gimple_call_combined_fn (old_call))
6105 {
6106 CASE_CFN_COPYSIGN:
6107 CASE_CFN_COPYSIGN_FN:
6108 arg0 = gimple_call_arg (old_call, 0);
6109 arg1 = gimple_call_arg (old_call, 1);
6110 /* The first argument of copysign must be a constant,
6111 otherwise there's nothing to do. */
6112 if (TREE_CODE (arg0) == REAL_CST)
6113 {
6114 tree type = TREE_TYPE (arg0);
6115 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6116 /* If we couldn't fold to a single constant, skip it.
6117 That happens e.g. for inexact multiplication when
6118 -frounding-math. */
6119 if (mul == NULL_TREE)
6120 break;
6121 /* Instead of adjusting OLD_CALL, let's build a new
6122 call to not leak the LHS and prevent keeping bogus
6123 debug statements. DCE will clean up the old call. */
6124 gcall *new_call;
6125 if (gimple_call_internal_p (old_call))
6126 new_call = gimple_build_call_internal
6127 (IFN_COPYSIGN, 2, mul, arg1);
6128 else
6129 new_call = gimple_build_call
6130 (gimple_call_fndecl (old_call), 2, mul, arg1);
6131 tree lhs = make_ssa_name (type);
6132 gimple_call_set_lhs (new_call, lhs);
6133 gimple_set_location (new_call,
6134 gimple_location (old_call));
6135 insert_stmt_after (new_call, old_call);
6136 /* We've used the constant, get rid of it. */
6137 ops->pop ();
6138 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6139 /* Handle the CST1 < 0 case by negating the result. */
6140 if (cst1_neg)
6141 {
6142 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6143 gimple *negate_stmt
6144 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6145 insert_stmt_after (negate_stmt, new_call);
6146 oe->op = negrhs;
6147 }
6148 else
6149 oe->op = lhs;
6150 if (dump_file && (dump_flags & TDF_DETAILS))
6151 {
6152 fprintf (dump_file, "Optimizing copysign: ");
6153 print_generic_expr (dump_file, cst);
6154 fprintf (dump_file, " * COPYSIGN (");
6155 print_generic_expr (dump_file, arg0);
6156 fprintf (dump_file, ", ");
6157 print_generic_expr (dump_file, arg1);
6158 fprintf (dump_file, ") into %sCOPYSIGN (",
6159 cst1_neg ? "-" : "");
6160 print_generic_expr (dump_file, mul);
6161 fprintf (dump_file, ", ");
6162 print_generic_expr (dump_file, arg1);
6163 fprintf (dump_file, "\n");
6164 }
6165 return;
6166 }
6167 break;
6168 default:
6169 break;
6170 }
6171 }
6172 }
6173 }
6174 }
6175
6176 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6177
6178 static void
6179 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6180 {
6181 tree rhs1;
6182
6183 if (dump_file && (dump_flags & TDF_DETAILS))
6184 {
6185 fprintf (dump_file, "Transforming ");
6186 print_gimple_stmt (dump_file, stmt, 0);
6187 }
6188
6189 rhs1 = gimple_assign_rhs1 (stmt);
6190 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6191 update_stmt (stmt);
6192 remove_visited_stmt_chain (rhs1);
6193
6194 if (dump_file && (dump_flags & TDF_DETAILS))
6195 {
6196 fprintf (dump_file, " into ");
6197 print_gimple_stmt (dump_file, stmt, 0);
6198 }
6199 }
6200
6201 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6202
6203 static void
6204 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6205 tree rhs1, tree rhs2)
6206 {
6207 if (dump_file && (dump_flags & TDF_DETAILS))
6208 {
6209 fprintf (dump_file, "Transforming ");
6210 print_gimple_stmt (dump_file, stmt, 0);
6211 }
6212
6213 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6214 update_stmt (gsi_stmt (*gsi));
6215 remove_visited_stmt_chain (rhs1);
6216
6217 if (dump_file && (dump_flags & TDF_DETAILS))
6218 {
6219 fprintf (dump_file, " into ");
6220 print_gimple_stmt (dump_file, stmt, 0);
6221 }
6222 }
6223
6224 /* Reassociate expressions in basic block BB and its post-dominator as
6225 children.
6226
6227 Bubble up return status from maybe_optimize_range_tests. */
6228
6229 static bool
6230 reassociate_bb (basic_block bb)
6231 {
6232 gimple_stmt_iterator gsi;
6233 basic_block son;
6234 gimple *stmt = last_stmt (bb);
6235 bool cfg_cleanup_needed = false;
6236
6237 if (stmt && !gimple_visited_p (stmt))
6238 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6239
6240 bool do_prev = false;
6241 for (gsi = gsi_last_bb (bb);
6242 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6243 {
6244 do_prev = true;
6245 stmt = gsi_stmt (gsi);
6246
6247 if (is_gimple_assign (stmt)
6248 && !stmt_could_throw_p (cfun, stmt))
6249 {
6250 tree lhs, rhs1, rhs2;
6251 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6252
6253 /* If this was part of an already processed statement,
6254 we don't need to touch it again. */
6255 if (gimple_visited_p (stmt))
6256 {
6257 /* This statement might have become dead because of previous
6258 reassociations. */
6259 if (has_zero_uses (gimple_get_lhs (stmt)))
6260 {
6261 reassoc_remove_stmt (&gsi);
6262 release_defs (stmt);
6263 /* We might end up removing the last stmt above which
6264 places the iterator to the end of the sequence.
6265 Reset it to the last stmt in this case and make sure
6266 we don't do gsi_prev in that case. */
6267 if (gsi_end_p (gsi))
6268 {
6269 gsi = gsi_last_bb (bb);
6270 do_prev = false;
6271 }
6272 }
6273 continue;
6274 }
6275
6276 /* If this is not a gimple binary expression, there is
6277 nothing for us to do with it. */
6278 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6279 continue;
6280
6281 lhs = gimple_assign_lhs (stmt);
6282 rhs1 = gimple_assign_rhs1 (stmt);
6283 rhs2 = gimple_assign_rhs2 (stmt);
6284
6285 /* For non-bit or min/max operations we can't associate
6286 all types. Verify that here. */
6287 if (rhs_code != BIT_IOR_EXPR
6288 && rhs_code != BIT_AND_EXPR
6289 && rhs_code != BIT_XOR_EXPR
6290 && rhs_code != MIN_EXPR
6291 && rhs_code != MAX_EXPR
6292 && (!can_reassociate_p (lhs)
6293 || !can_reassociate_p (rhs1)
6294 || !can_reassociate_p (rhs2)))
6295 continue;
6296
6297 if (associative_tree_code (rhs_code))
6298 {
6299 auto_vec<operand_entry *> ops;
6300 tree powi_result = NULL_TREE;
6301 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6302
6303 /* There may be no immediate uses left by the time we
6304 get here because we may have eliminated them all. */
6305 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6306 continue;
6307
6308 gimple_set_visited (stmt, true);
6309 linearize_expr_tree (&ops, stmt, true, true);
6310 ops.qsort (sort_by_operand_rank);
6311 int orig_len = ops.length ();
6312 optimize_ops_list (rhs_code, &ops);
6313 if (undistribute_ops_list (rhs_code, &ops,
6314 loop_containing_stmt (stmt)))
6315 {
6316 ops.qsort (sort_by_operand_rank);
6317 optimize_ops_list (rhs_code, &ops);
6318 }
6319 if (undistribute_bitref_for_vector (rhs_code, &ops,
6320 loop_containing_stmt (stmt)))
6321 {
6322 ops.qsort (sort_by_operand_rank);
6323 optimize_ops_list (rhs_code, &ops);
6324 }
6325 if (rhs_code == PLUS_EXPR
6326 && transform_add_to_multiply (&ops))
6327 ops.qsort (sort_by_operand_rank);
6328
6329 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6330 {
6331 if (is_vector)
6332 optimize_vec_cond_expr (rhs_code, &ops);
6333 else
6334 optimize_range_tests (rhs_code, &ops, NULL);
6335 }
6336
6337 if (rhs_code == MULT_EXPR && !is_vector)
6338 {
6339 attempt_builtin_copysign (&ops);
6340
6341 if (reassoc_insert_powi_p
6342 && flag_unsafe_math_optimizations)
6343 powi_result = attempt_builtin_powi (stmt, &ops);
6344 }
6345
6346 operand_entry *last;
6347 bool negate_result = false;
6348 if (ops.length () > 1
6349 && rhs_code == MULT_EXPR)
6350 {
6351 last = ops.last ();
6352 if ((integer_minus_onep (last->op)
6353 || real_minus_onep (last->op))
6354 && !HONOR_SNANS (TREE_TYPE (lhs))
6355 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6356 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6357 {
6358 ops.pop ();
6359 negate_result = true;
6360 }
6361 }
6362
6363 tree new_lhs = lhs;
6364 /* If the operand vector is now empty, all operands were
6365 consumed by the __builtin_powi optimization. */
6366 if (ops.length () == 0)
6367 transform_stmt_to_copy (&gsi, stmt, powi_result);
6368 else if (ops.length () == 1)
6369 {
6370 tree last_op = ops.last ()->op;
6371
6372 /* If the stmt that defines operand has to be inserted, insert it
6373 before the use. */
6374 if (ops.last ()->stmt_to_insert)
6375 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6376 if (powi_result)
6377 transform_stmt_to_multiply (&gsi, stmt, last_op,
6378 powi_result);
6379 else
6380 transform_stmt_to_copy (&gsi, stmt, last_op);
6381 }
6382 else
6383 {
6384 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6385 int ops_num = ops.length ();
6386 int width;
6387
6388 /* For binary bit operations, if there are at least 3
6389 operands and the last operand in OPS is a constant,
6390 move it to the front. This helps ensure that we generate
6391 (X & Y) & C rather than (X & C) & Y. The former will
6392 often match a canonical bit test when we get to RTL. */
6393 if (ops.length () > 2
6394 && (rhs_code == BIT_AND_EXPR
6395 || rhs_code == BIT_IOR_EXPR
6396 || rhs_code == BIT_XOR_EXPR)
6397 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6398 std::swap (*ops[0], *ops[ops_num - 1]);
6399
6400 /* Only rewrite the expression tree to parallel in the
6401 last reassoc pass to avoid useless work back-and-forth
6402 with initial linearization. */
6403 if (!reassoc_insert_powi_p
6404 && ops.length () > 3
6405 && (width = get_reassociation_width (ops_num, rhs_code,
6406 mode)) > 1)
6407 {
6408 if (dump_file && (dump_flags & TDF_DETAILS))
6409 fprintf (dump_file,
6410 "Width = %d was chosen for reassociation\n",
6411 width);
6412 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6413 width, ops);
6414 }
6415 else
6416 {
6417 /* When there are three operands left, we want
6418 to make sure the ones that get the double
6419 binary op are chosen wisely. */
6420 int len = ops.length ();
6421 if (len >= 3)
6422 swap_ops_for_binary_stmt (ops, len - 3, stmt);
6423
6424 new_lhs = rewrite_expr_tree (stmt, 0, ops,
6425 powi_result != NULL
6426 || negate_result,
6427 len != orig_len);
6428 }
6429
6430 /* If we combined some repeated factors into a
6431 __builtin_powi call, multiply that result by the
6432 reassociated operands. */
6433 if (powi_result)
6434 {
6435 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6436 tree type = TREE_TYPE (lhs);
6437 tree target_ssa = make_temp_ssa_name (type, NULL,
6438 "reassocpow");
6439 gimple_set_lhs (lhs_stmt, target_ssa);
6440 update_stmt (lhs_stmt);
6441 if (lhs != new_lhs)
6442 {
6443 target_ssa = new_lhs;
6444 new_lhs = lhs;
6445 }
6446 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6447 powi_result, target_ssa);
6448 gimple_set_location (mul_stmt, gimple_location (stmt));
6449 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6450 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6451 }
6452 }
6453
6454 if (negate_result)
6455 {
6456 stmt = SSA_NAME_DEF_STMT (lhs);
6457 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6458 gimple_set_lhs (stmt, tmp);
6459 if (lhs != new_lhs)
6460 tmp = new_lhs;
6461 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6462 tmp);
6463 gimple_set_uid (neg_stmt, gimple_uid (stmt));
6464 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6465 update_stmt (stmt);
6466 }
6467 }
6468 }
6469 }
6470 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6471 son;
6472 son = next_dom_son (CDI_POST_DOMINATORS, son))
6473 cfg_cleanup_needed |= reassociate_bb (son);
6474
6475 return cfg_cleanup_needed;
6476 }
6477
6478 /* Add jumps around shifts for range tests turned into bit tests.
6479 For each SSA_NAME VAR we have code like:
6480 VAR = ...; // final stmt of range comparison
6481 // bit test here...;
6482 OTHERVAR = ...; // final stmt of the bit test sequence
6483 RES = VAR | OTHERVAR;
6484 Turn the above into:
6485 VAR = ...;
6486 if (VAR != 0)
6487 goto <l3>;
6488 else
6489 goto <l2>;
6490 <l2>:
6491 // bit test here...;
6492 OTHERVAR = ...;
6493 <l3>:
6494 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6495
6496 static void
6497 branch_fixup (void)
6498 {
6499 tree var;
6500 unsigned int i;
6501
6502 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6503 {
6504 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6505 gimple *use_stmt;
6506 use_operand_p use;
6507 bool ok = single_imm_use (var, &use, &use_stmt);
6508 gcc_assert (ok
6509 && is_gimple_assign (use_stmt)
6510 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6511 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6512
6513 basic_block cond_bb = gimple_bb (def_stmt);
6514 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6515 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6516
6517 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6518 gimple *g = gimple_build_cond (NE_EXPR, var,
6519 build_zero_cst (TREE_TYPE (var)),
6520 NULL_TREE, NULL_TREE);
6521 location_t loc = gimple_location (use_stmt);
6522 gimple_set_location (g, loc);
6523 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6524
6525 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6526 etrue->probability = profile_probability::even ();
6527 edge efalse = find_edge (cond_bb, then_bb);
6528 efalse->flags = EDGE_FALSE_VALUE;
6529 efalse->probability -= etrue->probability;
6530 then_bb->count -= etrue->count ();
6531
6532 tree othervar = NULL_TREE;
6533 if (gimple_assign_rhs1 (use_stmt) == var)
6534 othervar = gimple_assign_rhs2 (use_stmt);
6535 else if (gimple_assign_rhs2 (use_stmt) == var)
6536 othervar = gimple_assign_rhs1 (use_stmt);
6537 else
6538 gcc_unreachable ();
6539 tree lhs = gimple_assign_lhs (use_stmt);
6540 gphi *phi = create_phi_node (lhs, merge_bb);
6541 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6542 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6543 gsi = gsi_for_stmt (use_stmt);
6544 gsi_remove (&gsi, true);
6545
6546 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6547 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6548 }
6549 reassoc_branch_fixups.release ();
6550 }
6551
6552 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6553 void debug_ops_vector (vec<operand_entry *> ops);
6554
6555 /* Dump the operand entry vector OPS to FILE. */
6556
6557 void
6558 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6559 {
6560 operand_entry *oe;
6561 unsigned int i;
6562
6563 FOR_EACH_VEC_ELT (ops, i, oe)
6564 {
6565 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6566 print_generic_expr (file, oe->op);
6567 fprintf (file, "\n");
6568 }
6569 }
6570
6571 /* Dump the operand entry vector OPS to STDERR. */
6572
6573 DEBUG_FUNCTION void
6574 debug_ops_vector (vec<operand_entry *> ops)
6575 {
6576 dump_ops_vector (stderr, ops);
6577 }
6578
6579 /* Bubble up return status from reassociate_bb. */
6580
6581 static bool
6582 do_reassoc (void)
6583 {
6584 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6585 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6586 }
6587
6588 /* Initialize the reassociation pass. */
6589
6590 static void
6591 init_reassoc (void)
6592 {
6593 int i;
6594 long rank = 2;
6595 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6596
6597 /* Find the loops, so that we can prevent moving calculations in
6598 them. */
6599 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6600
6601 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6602
6603 next_operand_entry_id = 0;
6604
6605 /* Reverse RPO (Reverse Post Order) will give us something where
6606 deeper loops come later. */
6607 pre_and_rev_post_order_compute (NULL, bbs, false);
6608 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
6609 operand_rank = new hash_map<tree, long>;
6610
6611 /* Give each default definition a distinct rank. This includes
6612 parameters and the static chain. Walk backwards over all
6613 SSA names so that we get proper rank ordering according
6614 to tree_swap_operands_p. */
6615 for (i = num_ssa_names - 1; i > 0; --i)
6616 {
6617 tree name = ssa_name (i);
6618 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6619 insert_operand_rank (name, ++rank);
6620 }
6621
6622 /* Set up rank for each BB */
6623 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6624 bb_rank[bbs[i]] = ++rank << 16;
6625
6626 free (bbs);
6627 calculate_dominance_info (CDI_POST_DOMINATORS);
6628 plus_negates = vNULL;
6629 }
6630
6631 /* Cleanup after the reassociation pass, and print stats if
6632 requested. */
6633
6634 static void
6635 fini_reassoc (void)
6636 {
6637 statistics_counter_event (cfun, "Linearized",
6638 reassociate_stats.linearized);
6639 statistics_counter_event (cfun, "Constants eliminated",
6640 reassociate_stats.constants_eliminated);
6641 statistics_counter_event (cfun, "Ops eliminated",
6642 reassociate_stats.ops_eliminated);
6643 statistics_counter_event (cfun, "Statements rewritten",
6644 reassociate_stats.rewritten);
6645 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6646 reassociate_stats.pows_encountered);
6647 statistics_counter_event (cfun, "Built-in powi calls created",
6648 reassociate_stats.pows_created);
6649
6650 delete operand_rank;
6651 operand_entry_pool.release ();
6652 free (bb_rank);
6653 plus_negates.release ();
6654 free_dominance_info (CDI_POST_DOMINATORS);
6655 loop_optimizer_finalize ();
6656 }
6657
6658 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6659 insertion of __builtin_powi calls.
6660
6661 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6662 optimization of a gimple conditional. Otherwise returns zero. */
6663
6664 static unsigned int
6665 execute_reassoc (bool insert_powi_p)
6666 {
6667 reassoc_insert_powi_p = insert_powi_p;
6668
6669 init_reassoc ();
6670
6671 bool cfg_cleanup_needed;
6672 cfg_cleanup_needed = do_reassoc ();
6673 repropagate_negates ();
6674 branch_fixup ();
6675
6676 fini_reassoc ();
6677 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6678 }
6679
6680 namespace {
6681
6682 const pass_data pass_data_reassoc =
6683 {
6684 GIMPLE_PASS, /* type */
6685 "reassoc", /* name */
6686 OPTGROUP_NONE, /* optinfo_flags */
6687 TV_TREE_REASSOC, /* tv_id */
6688 ( PROP_cfg | PROP_ssa ), /* properties_required */
6689 0, /* properties_provided */
6690 0, /* properties_destroyed */
6691 0, /* todo_flags_start */
6692 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6693 };
6694
6695 class pass_reassoc : public gimple_opt_pass
6696 {
6697 public:
6698 pass_reassoc (gcc::context *ctxt)
6699 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6700 {}
6701
6702 /* opt_pass methods: */
6703 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
6704 void set_pass_param (unsigned int n, bool param)
6705 {
6706 gcc_assert (n == 0);
6707 insert_powi_p = param;
6708 }
6709 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
6710 virtual unsigned int execute (function *)
6711 { return execute_reassoc (insert_powi_p); }
6712
6713 private:
6714 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6715 point 3a in the pass header comment. */
6716 bool insert_powi_p;
6717 }; // class pass_reassoc
6718
6719 } // anon namespace
6720
6721 gimple_opt_pass *
6722 make_pass_reassoc (gcc::context *ctxt)
6723 {
6724 return new pass_reassoc (ctxt);
6725 }