]>
Commit | Line | Data |
---|---|---|
012309e6 | 1 | /* Reassociation for trees. |
a5544970 | 2 | Copyright (C) 2005-2019 Free Software Foundation, Inc. |
012309e6 DB |
3 | Contributed by Daniel Berlin <dan@dberlin.org> |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9dcd6f09 | 9 | the Free Software Foundation; either version 3, or (at your option) |
012309e6 DB |
10 | any later version. |
11 | ||
12 | GCC is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
012309e6 DB |
20 | |
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
c7131fb2 | 24 | #include "backend.h" |
957060b5 AM |
25 | #include "target.h" |
26 | #include "rtl.h" | |
c7131fb2 AM |
27 | #include "tree.h" |
28 | #include "gimple.h" | |
957060b5 AM |
29 | #include "cfghooks.h" |
30 | #include "alloc-pool.h" | |
31 | #include "tree-pass.h" | |
4d0cdd0c | 32 | #include "memmodel.h" |
b114bfb4 | 33 | #include "tm_p.h" |
957060b5 AM |
34 | #include "ssa.h" |
35 | #include "optabs-tree.h" | |
36 | #include "gimple-pretty-print.h" | |
37 | #include "diagnostic-core.h" | |
40e23961 | 38 | #include "fold-const.h" |
d8a2d370 | 39 | #include "stor-layout.h" |
60393bbc | 40 | #include "cfganal.h" |
2fb9a547 AM |
41 | #include "gimple-fold.h" |
42 | #include "tree-eh.h" | |
5be5c238 | 43 | #include "gimple-iterator.h" |
18f429e2 | 44 | #include "gimplify-me.h" |
442b4905 | 45 | #include "tree-cfg.h" |
442b4905 | 46 | #include "tree-ssa-loop.h" |
36566b39 | 47 | #include "flags.h" |
442b4905 | 48 | #include "tree-ssa.h" |
0e0ed594 | 49 | #include "langhooks.h" |
7a9c7d01 | 50 | #include "cfgloop.h" |
df7b0cc4 | 51 | #include "params.h" |
9b2b7279 | 52 | #include "builtins.h" |
73049af5 | 53 | #include "gimplify.h" |
314709cd | 54 | #include "case-cfn-macros.h" |
012309e6 | 55 | |
0e0ed594 JL |
56 | /* This is a simple global reassociation pass. It is, in part, based |
57 | on the LLVM pass of the same name (They do some things more/less | |
58 | than we do, in different orders, etc). | |
012309e6 | 59 | |
0e0ed594 | 60 | It consists of five steps: |
012309e6 | 61 | |
0e0ed594 JL |
62 | 1. Breaking up subtract operations into addition + negate, where |
63 | it would promote the reassociation of adds. | |
012309e6 | 64 | |
0e0ed594 JL |
65 | 2. Left linearization of the expression trees, so that (A+B)+(C+D) |
66 | becomes (((A+B)+C)+D), which is easier for us to rewrite later. | |
67 | During linearization, we place the operands of the binary | |
526ceb68 | 68 | expressions into a vector of operand_entry_* |
012309e6 | 69 | |
0e0ed594 JL |
70 | 3. Optimization of the operand lists, eliminating things like a + |
71 | -a, a & a, etc. | |
012309e6 | 72 | |
a6f8851e BS |
73 | 3a. Combine repeated factors with the same occurrence counts |
74 | into a __builtin_powi call that will later be optimized into | |
75 | an optimal number of multiplies. | |
76 | ||
0e0ed594 JL |
77 | 4. Rewrite the expression trees we linearized and optimized so |
78 | they are in proper rank order. | |
012309e6 | 79 | |
0e0ed594 | 80 | 5. Repropagate negates, as nothing else will clean it up ATM. |
012309e6 | 81 | |
0e0ed594 JL |
82 | A bit of theory on #4, since nobody seems to write anything down |
83 | about why it makes sense to do it the way they do it: | |
012309e6 | 84 | |
0e0ed594 JL |
85 | We could do this much nicer theoretically, but don't (for reasons |
86 | explained after how to do it theoretically nice :P). | |
012309e6 | 87 | |
0e0ed594 JL |
88 | In order to promote the most redundancy elimination, you want |
89 | binary expressions whose operands are the same rank (or | |
6416ae7f | 90 | preferably, the same value) exposed to the redundancy eliminator, |
0e0ed594 | 91 | for possible elimination. |
012309e6 | 92 | |
0e0ed594 JL |
93 | So the way to do this if we really cared, is to build the new op |
94 | tree from the leaves to the roots, merging as you go, and putting the | |
95 | new op on the end of the worklist, until you are left with one | |
96 | thing on the worklist. | |
012309e6 | 97 | |
0e0ed594 JL |
98 | IE if you have to rewrite the following set of operands (listed with |
99 | rank in parentheses), with opcode PLUS_EXPR: | |
012309e6 | 100 | |
0e0ed594 | 101 | a (1), b (1), c (1), d (2), e (2) |
012309e6 | 102 | |
012309e6 | 103 | |
0e0ed594 JL |
104 | We start with our merge worklist empty, and the ops list with all of |
105 | those on it. | |
012309e6 | 106 | |
0e0ed594 JL |
107 | You want to first merge all leaves of the same rank, as much as |
108 | possible. | |
109 | ||
110 | So first build a binary op of | |
111 | ||
112 | mergetmp = a + b, and put "mergetmp" on the merge worklist. | |
113 | ||
114 | Because there is no three operand form of PLUS_EXPR, c is not going to | |
115 | be exposed to redundancy elimination as a rank 1 operand. | |
116 | ||
117 | So you might as well throw it on the merge worklist (you could also | |
118 | consider it to now be a rank two operand, and merge it with d and e, | |
119 | but in this case, you then have evicted e from a binary op. So at | |
120 | least in this situation, you can't win.) | |
121 | ||
122 | Then build a binary op of d + e | |
123 | mergetmp2 = d + e | |
124 | ||
125 | and put mergetmp2 on the merge worklist. | |
b8698a0f | 126 | |
0e0ed594 | 127 | so merge worklist = {mergetmp, c, mergetmp2} |
b8698a0f | 128 | |
0e0ed594 JL |
129 | Continue building binary ops of these operations until you have only |
130 | one operation left on the worklist. | |
b8698a0f | 131 | |
0e0ed594 | 132 | So we have |
b8698a0f | 133 | |
0e0ed594 JL |
134 | build binary op |
135 | mergetmp3 = mergetmp + c | |
b8698a0f | 136 | |
0e0ed594 | 137 | worklist = {mergetmp2, mergetmp3} |
b8698a0f | 138 | |
0e0ed594 | 139 | mergetmp4 = mergetmp2 + mergetmp3 |
b8698a0f | 140 | |
0e0ed594 | 141 | worklist = {mergetmp4} |
b8698a0f | 142 | |
0e0ed594 JL |
143 | because we have one operation left, we can now just set the original |
144 | statement equal to the result of that operation. | |
b8698a0f | 145 | |
0e0ed594 JL |
146 | This will at least expose a + b and d + e to redundancy elimination |
147 | as binary operations. | |
b8698a0f | 148 | |
0e0ed594 JL |
149 | For extra points, you can reuse the old statements to build the |
150 | mergetmps, since you shouldn't run out. | |
151 | ||
152 | So why don't we do this? | |
b8698a0f | 153 | |
0e0ed594 JL |
154 | Because it's expensive, and rarely will help. Most trees we are |
155 | reassociating have 3 or less ops. If they have 2 ops, they already | |
156 | will be written into a nice single binary op. If you have 3 ops, a | |
157 | single simple check suffices to tell you whether the first two are of the | |
158 | same rank. If so, you know to order it | |
159 | ||
160 | mergetmp = op1 + op2 | |
161 | newstmt = mergetmp + op3 | |
b8698a0f | 162 | |
0e0ed594 JL |
163 | instead of |
164 | mergetmp = op2 + op3 | |
165 | newstmt = mergetmp + op1 | |
b8698a0f | 166 | |
0e0ed594 JL |
167 | If all three are of the same rank, you can't expose them all in a |
168 | single binary operator anyway, so the above is *still* the best you | |
169 | can do. | |
b8698a0f | 170 | |
0e0ed594 JL |
171 | Thus, this is what we do. When we have three ops left, we check to see |
172 | what order to put them in, and call it a day. As a nod to vector sum | |
1d72ff1a | 173 | reduction, we check if any of the ops are really a phi node that is a |
0e0ed594 JL |
174 | destructive update for the associating op, and keep the destructive |
175 | update together for vector sum reduction recognition. */ | |
012309e6 | 176 | |
2162bfe1 TV |
177 | /* Enable insertion of __builtin_powi calls during execute_reassoc. See |
178 | point 3a in the pass header comment. */ | |
179 | static bool reassoc_insert_powi_p; | |
012309e6 | 180 | |
0e0ed594 JL |
181 | /* Statistics */ |
182 | static struct | |
183 | { | |
184 | int linearized; | |
185 | int constants_eliminated; | |
186 | int ops_eliminated; | |
187 | int rewritten; | |
a6f8851e BS |
188 | int pows_encountered; |
189 | int pows_created; | |
0e0ed594 | 190 | } reassociate_stats; |
012309e6 | 191 | |
0e0ed594 | 192 | /* Operator, rank pair. */ |
526ceb68 | 193 | struct operand_entry |
012309e6 | 194 | { |
0e0ed594 | 195 | unsigned int rank; |
b51ee78c | 196 | unsigned int id; |
0e0ed594 | 197 | tree op; |
a6f8851e | 198 | unsigned int count; |
d2db36dd | 199 | gimple *stmt_to_insert; |
526ceb68 | 200 | }; |
0e0ed594 | 201 | |
fcb87c50 MM |
202 | static object_allocator<operand_entry> operand_entry_pool |
203 | ("operand entry pool"); | |
0e0ed594 | 204 | |
f1665f5c DK |
205 | /* This is used to assign a unique ID to each struct operand_entry |
206 | so that qsort results are identical on different hosts. */ | |
b51ee78c | 207 | static unsigned int next_operand_entry_id; |
012309e6 DB |
208 | |
209 | /* Starting rank number for a given basic block, so that we can rank | |
210 | operations using unmovable instructions in that BB based on the bb | |
211 | depth. */ | |
15814ba0 | 212 | static long *bb_rank; |
012309e6 | 213 | |
0e0ed594 | 214 | /* Operand->rank hashtable. */ |
b787e7a2 | 215 | static hash_map<tree, long> *operand_rank; |
012309e6 | 216 | |
73049af5 JJ |
217 | /* Vector of SSA_NAMEs on which after reassociate_bb is done with |
218 | all basic blocks the CFG should be adjusted - basic blocks | |
219 | split right after that SSA_NAME's definition statement and before | |
220 | the only use, which must be a bit ior. */ | |
221 | static vec<tree> reassoc_branch_fixups; | |
222 | ||
a3059635 BS |
223 | /* Forward decls. */ |
224 | static long get_rank (tree); | |
355fe088 | 225 | static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *); |
a3059635 | 226 | |
42fae17c JJ |
227 | /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts |
228 | possibly added by gsi_remove. */ | |
229 | ||
230 | bool | |
231 | reassoc_remove_stmt (gimple_stmt_iterator *gsi) | |
232 | { | |
355fe088 | 233 | gimple *stmt = gsi_stmt (*gsi); |
42fae17c | 234 | |
36f52e8f | 235 | if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI) |
42fae17c JJ |
236 | return gsi_remove (gsi, true); |
237 | ||
238 | gimple_stmt_iterator prev = *gsi; | |
239 | gsi_prev (&prev); | |
240 | unsigned uid = gimple_uid (stmt); | |
241 | basic_block bb = gimple_bb (stmt); | |
242 | bool ret = gsi_remove (gsi, true); | |
243 | if (!gsi_end_p (prev)) | |
244 | gsi_next (&prev); | |
245 | else | |
246 | prev = gsi_start_bb (bb); | |
355fe088 | 247 | gimple *end_stmt = gsi_stmt (*gsi); |
42fae17c JJ |
248 | while ((stmt = gsi_stmt (prev)) != end_stmt) |
249 | { | |
250 | gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0); | |
251 | gimple_set_uid (stmt, uid); | |
252 | gsi_next (&prev); | |
253 | } | |
254 | return ret; | |
255 | } | |
a3059635 BS |
256 | |
257 | /* Bias amount for loop-carried phis. We want this to be larger than | |
258 | the depth of any reassociation tree we can see, but not larger than | |
259 | the rank difference between two blocks. */ | |
260 | #define PHI_LOOP_BIAS (1 << 15) | |
261 | ||
262 | /* Rank assigned to a phi statement. If STMT is a loop-carried phi of | |
263 | an innermost loop, and the phi has only a single use which is inside | |
264 | the loop, then the rank is the block rank of the loop latch plus an | |
265 | extra bias for the loop-carried dependence. This causes expressions | |
266 | calculated into an accumulator variable to be independent for each | |
267 | iteration of the loop. If STMT is some other phi, the rank is the | |
268 | block rank of its containing block. */ | |
269 | static long | |
355fe088 | 270 | phi_rank (gimple *stmt) |
a3059635 BS |
271 | { |
272 | basic_block bb = gimple_bb (stmt); | |
99b1c316 | 273 | class loop *father = bb->loop_father; |
a3059635 BS |
274 | tree res; |
275 | unsigned i; | |
276 | use_operand_p use; | |
355fe088 | 277 | gimple *use_stmt; |
a3059635 BS |
278 | |
279 | /* We only care about real loops (those with a latch). */ | |
280 | if (!father->latch) | |
281 | return bb_rank[bb->index]; | |
282 | ||
283 | /* Interesting phis must be in headers of innermost loops. */ | |
284 | if (bb != father->header | |
285 | || father->inner) | |
286 | return bb_rank[bb->index]; | |
287 | ||
288 | /* Ignore virtual SSA_NAMEs. */ | |
289 | res = gimple_phi_result (stmt); | |
ea057359 | 290 | if (virtual_operand_p (res)) |
a3059635 BS |
291 | return bb_rank[bb->index]; |
292 | ||
293 | /* The phi definition must have a single use, and that use must be | |
294 | within the loop. Otherwise this isn't an accumulator pattern. */ | |
295 | if (!single_imm_use (res, &use, &use_stmt) | |
296 | || gimple_bb (use_stmt)->loop_father != father) | |
297 | return bb_rank[bb->index]; | |
298 | ||
299 | /* Look for phi arguments from within the loop. If found, bias this phi. */ | |
300 | for (i = 0; i < gimple_phi_num_args (stmt); i++) | |
301 | { | |
302 | tree arg = gimple_phi_arg_def (stmt, i); | |
303 | if (TREE_CODE (arg) == SSA_NAME | |
304 | && !SSA_NAME_IS_DEFAULT_DEF (arg)) | |
305 | { | |
355fe088 | 306 | gimple *def_stmt = SSA_NAME_DEF_STMT (arg); |
a3059635 BS |
307 | if (gimple_bb (def_stmt)->loop_father == father) |
308 | return bb_rank[father->latch->index] + PHI_LOOP_BIAS; | |
309 | } | |
310 | } | |
311 | ||
312 | /* Must be an uninteresting phi. */ | |
313 | return bb_rank[bb->index]; | |
314 | } | |
315 | ||
316 | /* If EXP is an SSA_NAME defined by a PHI statement that represents a | |
317 | loop-carried dependence of an innermost loop, return TRUE; else | |
318 | return FALSE. */ | |
319 | static bool | |
320 | loop_carried_phi (tree exp) | |
321 | { | |
355fe088 | 322 | gimple *phi_stmt; |
a3059635 BS |
323 | long block_rank; |
324 | ||
325 | if (TREE_CODE (exp) != SSA_NAME | |
326 | || SSA_NAME_IS_DEFAULT_DEF (exp)) | |
327 | return false; | |
328 | ||
329 | phi_stmt = SSA_NAME_DEF_STMT (exp); | |
330 | ||
331 | if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI) | |
332 | return false; | |
333 | ||
334 | /* Non-loop-carried phis have block rank. Loop-carried phis have | |
335 | an additional bias added in. If this phi doesn't have block rank, | |
336 | it's biased and should not be propagated. */ | |
337 | block_rank = bb_rank[gimple_bb (phi_stmt)->index]; | |
338 | ||
339 | if (phi_rank (phi_stmt) != block_rank) | |
340 | return true; | |
341 | ||
342 | return false; | |
343 | } | |
344 | ||
345 | /* Return the maximum of RANK and the rank that should be propagated | |
346 | from expression OP. For most operands, this is just the rank of OP. | |
347 | For loop-carried phis, the value is zero to avoid undoing the bias | |
348 | in favor of the phi. */ | |
349 | static long | |
350 | propagate_rank (long rank, tree op) | |
351 | { | |
352 | long op_rank; | |
353 | ||
354 | if (loop_carried_phi (op)) | |
355 | return rank; | |
356 | ||
357 | op_rank = get_rank (op); | |
358 | ||
359 | return MAX (rank, op_rank); | |
360 | } | |
012309e6 | 361 | |
0e0ed594 | 362 | /* Look up the operand rank structure for expression E. */ |
012309e6 | 363 | |
15814ba0 | 364 | static inline long |
0e0ed594 | 365 | find_operand_rank (tree e) |
012309e6 | 366 | { |
b787e7a2 TS |
367 | long *slot = operand_rank->get (e); |
368 | return slot ? *slot : -1; | |
012309e6 DB |
369 | } |
370 | ||
0e0ed594 | 371 | /* Insert {E,RANK} into the operand rank hashtable. */ |
012309e6 | 372 | |
15814ba0 PB |
373 | static inline void |
374 | insert_operand_rank (tree e, long rank) | |
012309e6 | 375 | { |
15814ba0 | 376 | gcc_assert (rank > 0); |
b787e7a2 | 377 | gcc_assert (!operand_rank->put (e, rank)); |
012309e6 DB |
378 | } |
379 | ||
012309e6 DB |
380 | /* Given an expression E, return the rank of the expression. */ |
381 | ||
15814ba0 | 382 | static long |
012309e6 DB |
383 | get_rank (tree e) |
384 | { | |
012309e6 DB |
385 | /* SSA_NAME's have the rank of the expression they are the result |
386 | of. | |
387 | For globals and uninitialized values, the rank is 0. | |
388 | For function arguments, use the pre-setup rank. | |
389 | For PHI nodes, stores, asm statements, etc, we use the rank of | |
390 | the BB. | |
391 | For simple operations, the rank is the maximum rank of any of | |
392 | its operands, or the bb_rank, whichever is less. | |
393 | I make no claims that this is optimal, however, it gives good | |
394 | results. */ | |
395 | ||
a3059635 BS |
396 | /* We make an exception to the normal ranking system to break |
397 | dependences of accumulator variables in loops. Suppose we | |
398 | have a simple one-block loop containing: | |
399 | ||
400 | x_1 = phi(x_0, x_2) | |
401 | b = a + x_1 | |
402 | c = b + d | |
403 | x_2 = c + e | |
404 | ||
405 | As shown, each iteration of the calculation into x is fully | |
406 | dependent upon the iteration before it. We would prefer to | |
407 | see this in the form: | |
408 | ||
409 | x_1 = phi(x_0, x_2) | |
410 | b = a + d | |
411 | c = b + e | |
412 | x_2 = c + x_1 | |
413 | ||
414 | If the loop is unrolled, the calculations of b and c from | |
415 | different iterations can be interleaved. | |
416 | ||
417 | To obtain this result during reassociation, we bias the rank | |
418 | of the phi definition x_1 upward, when it is recognized as an | |
419 | accumulator pattern. The artificial rank causes it to be | |
420 | added last, providing the desired independence. */ | |
421 | ||
012309e6 DB |
422 | if (TREE_CODE (e) == SSA_NAME) |
423 | { | |
44eff886 | 424 | ssa_op_iter iter; |
355fe088 | 425 | gimple *stmt; |
777a4e9a | 426 | long rank; |
a3059635 | 427 | tree op; |
0e0ed594 | 428 | |
be7493ca | 429 | if (SSA_NAME_IS_DEFAULT_DEF (e)) |
15814ba0 | 430 | return find_operand_rank (e); |
0e0ed594 | 431 | |
012309e6 | 432 | stmt = SSA_NAME_DEF_STMT (e); |
a3059635 BS |
433 | if (gimple_code (stmt) == GIMPLE_PHI) |
434 | return phi_rank (stmt); | |
435 | ||
44eff886 | 436 | if (!is_gimple_assign (stmt)) |
726a989a | 437 | return bb_rank[gimple_bb (stmt)->index]; |
012309e6 DB |
438 | |
439 | /* If we already have a rank for this expression, use that. */ | |
15814ba0 PB |
440 | rank = find_operand_rank (e); |
441 | if (rank != -1) | |
442 | return rank; | |
012309e6 | 443 | |
a3059635 BS |
444 | /* Otherwise, find the maximum rank for the operands. As an |
445 | exception, remove the bias from loop-carried phis when propagating | |
446 | the rank so that dependent operations are not also biased. */ | |
44eff886 RB |
447 | /* Simply walk over all SSA uses - this takes advatage of the |
448 | fact that non-SSA operands are is_gimple_min_invariant and | |
449 | thus have rank 0. */ | |
012309e6 | 450 | rank = 0; |
44eff886 RB |
451 | FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) |
452 | rank = propagate_rank (rank, op); | |
0e0ed594 | 453 | |
012309e6 DB |
454 | if (dump_file && (dump_flags & TDF_DETAILS)) |
455 | { | |
456 | fprintf (dump_file, "Rank for "); | |
ef6cb4c7 | 457 | print_generic_expr (dump_file, e); |
15814ba0 | 458 | fprintf (dump_file, " is %ld\n", (rank + 1)); |
012309e6 | 459 | } |
0e0ed594 | 460 | |
012309e6 | 461 | /* Note the rank in the hashtable so we don't recompute it. */ |
0e0ed594 | 462 | insert_operand_rank (e, (rank + 1)); |
012309e6 DB |
463 | return (rank + 1); |
464 | } | |
465 | ||
44eff886 | 466 | /* Constants, globals, etc., are rank 0 */ |
012309e6 DB |
467 | return 0; |
468 | } | |
469 | ||
0e0ed594 JL |
470 | |
471 | /* We want integer ones to end up last no matter what, since they are | |
472 | the ones we can do the most with. */ | |
7b9be700 JJ |
473 | #define INTEGER_CONST_TYPE 1 << 4 |
474 | #define FLOAT_ONE_CONST_TYPE 1 << 3 | |
0e0ed594 JL |
475 | #define FLOAT_CONST_TYPE 1 << 2 |
476 | #define OTHER_CONST_TYPE 1 << 1 | |
477 | ||
478 | /* Classify an invariant tree into integer, float, or other, so that | |
479 | we can sort them to be near other constants of the same type. */ | |
480 | static inline int | |
481 | constant_type (tree t) | |
482 | { | |
483 | if (INTEGRAL_TYPE_P (TREE_TYPE (t))) | |
484 | return INTEGER_CONST_TYPE; | |
485 | else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) | |
7b9be700 JJ |
486 | { |
487 | /* Sort -1.0 and 1.0 constants last, while in some cases | |
488 | const_binop can't optimize some inexact operations, multiplication | |
489 | by -1.0 or 1.0 can be always merged with others. */ | |
490 | if (real_onep (t) || real_minus_onep (t)) | |
491 | return FLOAT_ONE_CONST_TYPE; | |
492 | return FLOAT_CONST_TYPE; | |
493 | } | |
0e0ed594 JL |
494 | else |
495 | return OTHER_CONST_TYPE; | |
496 | } | |
497 | ||
498 | /* qsort comparison function to sort operand entries PA and PB by rank | |
499 | so that the sorted array is ordered by rank in decreasing order. */ | |
500 | static int | |
501 | sort_by_operand_rank (const void *pa, const void *pb) | |
502 | { | |
526ceb68 TS |
503 | const operand_entry *oea = *(const operand_entry *const *)pa; |
504 | const operand_entry *oeb = *(const operand_entry *const *)pb; | |
0e0ed594 | 505 | |
8a8e744e JJ |
506 | if (oeb->rank != oea->rank) |
507 | return oeb->rank > oea->rank ? 1 : -1; | |
508 | ||
0e0ed594 | 509 | /* It's nicer for optimize_expression if constants that are likely |
8a8e744e | 510 | to fold when added/multiplied/whatever are put next to each |
0e0ed594 | 511 | other. Since all constants have rank 0, order them by type. */ |
8a8e744e | 512 | if (oea->rank == 0) |
f1665f5c DK |
513 | { |
514 | if (constant_type (oeb->op) != constant_type (oea->op)) | |
7b9be700 | 515 | return constant_type (oea->op) - constant_type (oeb->op); |
f1665f5c DK |
516 | else |
517 | /* To make sorting result stable, we use unique IDs to determine | |
518 | order. */ | |
b51ee78c | 519 | return oeb->id > oea->id ? 1 : -1; |
f1665f5c | 520 | } |
0e0ed594 | 521 | |
8a8e744e JJ |
522 | if (TREE_CODE (oea->op) != SSA_NAME) |
523 | { | |
524 | if (TREE_CODE (oeb->op) != SSA_NAME) | |
525 | return oeb->id > oea->id ? 1 : -1; | |
526 | else | |
527 | return 1; | |
528 | } | |
529 | else if (TREE_CODE (oeb->op) != SSA_NAME) | |
530 | return -1; | |
531 | ||
0e0ed594 | 532 | /* Lastly, make sure the versions that are the same go next to each |
f661b085 | 533 | other. */ |
8a8e744e | 534 | if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) |
f1665f5c | 535 | { |
8a8e744e JJ |
536 | /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse |
537 | versions of removed SSA_NAMEs, so if possible, prefer to sort | |
538 | based on basic block and gimple_uid of the SSA_NAME_DEF_STMT. | |
539 | See PR60418. */ | |
540 | gimple *stmta = SSA_NAME_DEF_STMT (oea->op); | |
541 | gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op); | |
542 | basic_block bba = gimple_bb (stmta); | |
543 | basic_block bbb = gimple_bb (stmtb); | |
544 | if (bbb != bba) | |
f661b085 | 545 | { |
8a8e744e JJ |
546 | /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert |
547 | but the other might not. */ | |
548 | if (!bba) | |
549 | return 1; | |
550 | if (!bbb) | |
551 | return -1; | |
552 | /* If neither is, compare bb_rank. */ | |
553 | if (bb_rank[bbb->index] != bb_rank[bba->index]) | |
5ac4b056 | 554 | return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16); |
33ff5dda | 555 | } |
8a8e744e JJ |
556 | |
557 | bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb); | |
558 | bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta); | |
559 | if (da != db) | |
560 | return da ? 1 : -1; | |
561 | ||
562 | return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1; | |
f1665f5c | 563 | } |
0e0ed594 | 564 | |
8a8e744e | 565 | return oeb->id > oea->id ? 1 : -1; |
0e0ed594 JL |
566 | } |
567 | ||
568 | /* Add an operand entry to *OPS for the tree operand OP. */ | |
569 | ||
570 | static void | |
d2db36dd | 571 | add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL) |
0e0ed594 | 572 | { |
526ceb68 | 573 | operand_entry *oe = operand_entry_pool.allocate (); |
0e0ed594 JL |
574 | |
575 | oe->op = op; | |
576 | oe->rank = get_rank (op); | |
f1665f5c | 577 | oe->id = next_operand_entry_id++; |
a6f8851e | 578 | oe->count = 1; |
d2db36dd | 579 | oe->stmt_to_insert = stmt_to_insert; |
9771b263 | 580 | ops->safe_push (oe); |
0e0ed594 | 581 | } |
012309e6 | 582 | |
a6f8851e BS |
583 | /* Add an operand entry to *OPS for the tree operand OP with repeat |
584 | count REPEAT. */ | |
585 | ||
586 | static void | |
526ceb68 | 587 | add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op, |
a6f8851e BS |
588 | HOST_WIDE_INT repeat) |
589 | { | |
526ceb68 | 590 | operand_entry *oe = operand_entry_pool.allocate (); |
a6f8851e BS |
591 | |
592 | oe->op = op; | |
593 | oe->rank = get_rank (op); | |
594 | oe->id = next_operand_entry_id++; | |
595 | oe->count = repeat; | |
d2db36dd | 596 | oe->stmt_to_insert = NULL; |
9771b263 | 597 | ops->safe_push (oe); |
a6f8851e BS |
598 | |
599 | reassociate_stats.pows_encountered++; | |
600 | } | |
601 | ||
0e0ed594 | 602 | /* Return true if STMT is reassociable operation containing a binary |
7a9c7d01 | 603 | operation with tree code CODE, and is inside LOOP. */ |
012309e6 DB |
604 | |
605 | static bool | |
99b1c316 | 606 | is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop) |
0e0ed594 | 607 | { |
726a989a | 608 | basic_block bb = gimple_bb (stmt); |
7a9c7d01 | 609 | |
726a989a | 610 | if (gimple_bb (stmt) == NULL) |
7a9c7d01 ZD |
611 | return false; |
612 | ||
7a9c7d01 ZD |
613 | if (!flow_bb_inside_loop_p (loop, bb)) |
614 | return false; | |
615 | ||
726a989a RB |
616 | if (is_gimple_assign (stmt) |
617 | && gimple_assign_rhs_code (stmt) == code | |
618 | && has_single_use (gimple_assign_lhs (stmt))) | |
6139a3b7 JJ |
619 | { |
620 | tree rhs1 = gimple_assign_rhs1 (stmt); | |
141f2b50 | 621 | tree rhs2 = gimple_assign_rhs2 (stmt); |
6139a3b7 JJ |
622 | if (TREE_CODE (rhs1) == SSA_NAME |
623 | && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1)) | |
624 | return false; | |
625 | if (rhs2 | |
626 | && TREE_CODE (rhs2) == SSA_NAME | |
627 | && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2)) | |
628 | return false; | |
629 | return true; | |
630 | } | |
726a989a | 631 | |
0e0ed594 JL |
632 | return false; |
633 | } | |
634 | ||
635 | ||
ce40915e RB |
636 | /* Return true if STMT is a nop-conversion. */ |
637 | ||
638 | static bool | |
639 | gimple_nop_conversion_p (gimple *stmt) | |
640 | { | |
641 | if (gassign *ass = dyn_cast <gassign *> (stmt)) | |
642 | { | |
643 | if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass)) | |
644 | && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)), | |
645 | TREE_TYPE (gimple_assign_rhs1 (ass)))) | |
646 | return true; | |
647 | } | |
648 | return false; | |
649 | } | |
650 | ||
0e0ed594 JL |
651 | /* Given NAME, if NAME is defined by a unary operation OPCODE, return the |
652 | operand of the negate operation. Otherwise, return NULL. */ | |
653 | ||
654 | static tree | |
655 | get_unary_op (tree name, enum tree_code opcode) | |
656 | { | |
355fe088 | 657 | gimple *stmt = SSA_NAME_DEF_STMT (name); |
0e0ed594 | 658 | |
ce40915e RB |
659 | /* Look through nop conversions (sign changes). */ |
660 | if (gimple_nop_conversion_p (stmt) | |
661 | && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME) | |
662 | stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); | |
663 | ||
726a989a | 664 | if (!is_gimple_assign (stmt)) |
0e0ed594 JL |
665 | return NULL_TREE; |
666 | ||
726a989a RB |
667 | if (gimple_assign_rhs_code (stmt) == opcode) |
668 | return gimple_assign_rhs1 (stmt); | |
0e0ed594 JL |
669 | return NULL_TREE; |
670 | } | |
671 | ||
ce40915e RB |
672 | /* Return true if OP1 and OP2 have the same value if casted to either type. */ |
673 | ||
674 | static bool | |
675 | ops_equal_values_p (tree op1, tree op2) | |
676 | { | |
677 | if (op1 == op2) | |
678 | return true; | |
679 | ||
366298bd | 680 | tree orig_op1 = op1; |
ce40915e RB |
681 | if (TREE_CODE (op1) == SSA_NAME) |
682 | { | |
683 | gimple *stmt = SSA_NAME_DEF_STMT (op1); | |
684 | if (gimple_nop_conversion_p (stmt)) | |
685 | { | |
686 | op1 = gimple_assign_rhs1 (stmt); | |
687 | if (op1 == op2) | |
688 | return true; | |
689 | } | |
690 | } | |
691 | ||
692 | if (TREE_CODE (op2) == SSA_NAME) | |
693 | { | |
694 | gimple *stmt = SSA_NAME_DEF_STMT (op2); | |
695 | if (gimple_nop_conversion_p (stmt)) | |
696 | { | |
697 | op2 = gimple_assign_rhs1 (stmt); | |
366298bd RB |
698 | if (op1 == op2 |
699 | || orig_op1 == op2) | |
ce40915e RB |
700 | return true; |
701 | } | |
702 | } | |
703 | ||
704 | return false; | |
705 | } | |
706 | ||
707 | ||
0e0ed594 JL |
708 | /* If CURR and LAST are a pair of ops that OPCODE allows us to |
709 | eliminate through equivalences, do so, remove them from OPS, and | |
710 | return true. Otherwise, return false. */ | |
711 | ||
712 | static bool | |
713 | eliminate_duplicate_pair (enum tree_code opcode, | |
526ceb68 | 714 | vec<operand_entry *> *ops, |
0e0ed594 JL |
715 | bool *all_done, |
716 | unsigned int i, | |
526ceb68 TS |
717 | operand_entry *curr, |
718 | operand_entry *last) | |
0e0ed594 JL |
719 | { |
720 | ||
e969dbde AP |
721 | /* If we have two of the same op, and the opcode is & |, min, or max, |
722 | we can eliminate one of them. | |
0e0ed594 JL |
723 | If we have two of the same op, and the opcode is ^, we can |
724 | eliminate both of them. */ | |
012309e6 | 725 | |
0e0ed594 | 726 | if (last && last->op == curr->op) |
012309e6 | 727 | { |
0e0ed594 JL |
728 | switch (opcode) |
729 | { | |
e969dbde AP |
730 | case MAX_EXPR: |
731 | case MIN_EXPR: | |
0e0ed594 JL |
732 | case BIT_IOR_EXPR: |
733 | case BIT_AND_EXPR: | |
734 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
735 | { | |
736 | fprintf (dump_file, "Equivalence: "); | |
ef6cb4c7 | 737 | print_generic_expr (dump_file, curr->op); |
e969dbde | 738 | fprintf (dump_file, " [&|minmax] "); |
ef6cb4c7 | 739 | print_generic_expr (dump_file, last->op); |
0e0ed594 | 740 | fprintf (dump_file, " -> "); |
ef6cb4c7 | 741 | print_generic_stmt (dump_file, last->op); |
0e0ed594 JL |
742 | } |
743 | ||
9771b263 | 744 | ops->ordered_remove (i); |
0e0ed594 JL |
745 | reassociate_stats.ops_eliminated ++; |
746 | ||
747 | return true; | |
748 | ||
749 | case BIT_XOR_EXPR: | |
750 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
751 | { | |
752 | fprintf (dump_file, "Equivalence: "); | |
ef6cb4c7 | 753 | print_generic_expr (dump_file, curr->op); |
0e0ed594 | 754 | fprintf (dump_file, " ^ "); |
ef6cb4c7 | 755 | print_generic_expr (dump_file, last->op); |
0e0ed594 JL |
756 | fprintf (dump_file, " -> nothing\n"); |
757 | } | |
758 | ||
759 | reassociate_stats.ops_eliminated += 2; | |
760 | ||
9771b263 | 761 | if (ops->length () == 2) |
0e0ed594 | 762 | { |
b3e508d7 | 763 | ops->truncate (0); |
e8160c9a | 764 | add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op))); |
0e0ed594 JL |
765 | *all_done = true; |
766 | } | |
767 | else | |
768 | { | |
9771b263 DN |
769 | ops->ordered_remove (i-1); |
770 | ops->ordered_remove (i-1); | |
0e0ed594 JL |
771 | } |
772 | ||
773 | return true; | |
774 | ||
775 | default: | |
776 | break; | |
777 | } | |
778 | } | |
012309e6 DB |
779 | return false; |
780 | } | |
781 | ||
9771b263 | 782 | static vec<tree> plus_negates; |
b0aef8a8 | 783 | |
44741f03 AM |
784 | /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not |
785 | expression, look in OPS for a corresponding positive operation to cancel | |
786 | it out. If we find one, remove the other from OPS, replace | |
787 | OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise, | |
788 | return false. */ | |
012309e6 DB |
789 | |
790 | static bool | |
0e0ed594 | 791 | eliminate_plus_minus_pair (enum tree_code opcode, |
526ceb68 | 792 | vec<operand_entry *> *ops, |
0e0ed594 | 793 | unsigned int currindex, |
526ceb68 | 794 | operand_entry *curr) |
0e0ed594 JL |
795 | { |
796 | tree negateop; | |
44741f03 | 797 | tree notop; |
0e0ed594 | 798 | unsigned int i; |
526ceb68 | 799 | operand_entry *oe; |
0e0ed594 JL |
800 | |
801 | if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) | |
012309e6 | 802 | return false; |
0e0ed594 JL |
803 | |
804 | negateop = get_unary_op (curr->op, NEGATE_EXPR); | |
44741f03 AM |
805 | notop = get_unary_op (curr->op, BIT_NOT_EXPR); |
806 | if (negateop == NULL_TREE && notop == NULL_TREE) | |
0e0ed594 JL |
807 | return false; |
808 | ||
809 | /* Any non-negated version will have a rank that is one less than | |
810 | the current rank. So once we hit those ranks, if we don't find | |
811 | one, we can stop. */ | |
812 | ||
813 | for (i = currindex + 1; | |
9771b263 | 814 | ops->iterate (i, &oe) |
0e0ed594 JL |
815 | && oe->rank >= curr->rank - 1 ; |
816 | i++) | |
012309e6 | 817 | { |
ce40915e RB |
818 | if (negateop |
819 | && ops_equal_values_p (oe->op, negateop)) | |
0e0ed594 | 820 | { |
0e0ed594 JL |
821 | if (dump_file && (dump_flags & TDF_DETAILS)) |
822 | { | |
823 | fprintf (dump_file, "Equivalence: "); | |
ef6cb4c7 | 824 | print_generic_expr (dump_file, negateop); |
0e0ed594 | 825 | fprintf (dump_file, " + -"); |
ef6cb4c7 | 826 | print_generic_expr (dump_file, oe->op); |
0e0ed594 JL |
827 | fprintf (dump_file, " -> 0\n"); |
828 | } | |
829 | ||
9771b263 | 830 | ops->ordered_remove (i); |
e8160c9a | 831 | add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op))); |
9771b263 | 832 | ops->ordered_remove (currindex); |
0e0ed594 JL |
833 | reassociate_stats.ops_eliminated ++; |
834 | ||
44741f03 AM |
835 | return true; |
836 | } | |
ce40915e RB |
837 | else if (notop |
838 | && ops_equal_values_p (oe->op, notop)) | |
44741f03 AM |
839 | { |
840 | tree op_type = TREE_TYPE (oe->op); | |
841 | ||
842 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
843 | { | |
844 | fprintf (dump_file, "Equivalence: "); | |
ef6cb4c7 | 845 | print_generic_expr (dump_file, notop); |
44741f03 | 846 | fprintf (dump_file, " + ~"); |
ef6cb4c7 | 847 | print_generic_expr (dump_file, oe->op); |
44741f03 AM |
848 | fprintf (dump_file, " -> -1\n"); |
849 | } | |
850 | ||
9771b263 | 851 | ops->ordered_remove (i); |
ce9d0c03 | 852 | add_to_ops_vec (ops, build_all_ones_cst (op_type)); |
9771b263 | 853 | ops->ordered_remove (currindex); |
44741f03 AM |
854 | reassociate_stats.ops_eliminated ++; |
855 | ||
0e0ed594 JL |
856 | return true; |
857 | } | |
012309e6 DB |
858 | } |
859 | ||
ce40915e RB |
860 | /* If CURR->OP is a negate expr without nop conversion in a plus expr: |
861 | save it for later inspection in repropagate_negates(). */ | |
862 | if (negateop != NULL_TREE | |
863 | && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR) | |
9771b263 | 864 | plus_negates.safe_push (curr->op); |
b0aef8a8 | 865 | |
0e0ed594 JL |
866 | return false; |
867 | } | |
868 | ||
869 | /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a | |
870 | bitwise not expression, look in OPS for a corresponding operand to | |
871 | cancel it out. If we find one, remove the other from OPS, replace | |
872 | OPS[CURRINDEX] with 0, and return true. Otherwise, return | |
873 | false. */ | |
874 | ||
875 | static bool | |
876 | eliminate_not_pairs (enum tree_code opcode, | |
526ceb68 | 877 | vec<operand_entry *> *ops, |
0e0ed594 | 878 | unsigned int currindex, |
526ceb68 | 879 | operand_entry *curr) |
0e0ed594 JL |
880 | { |
881 | tree notop; | |
882 | unsigned int i; | |
526ceb68 | 883 | operand_entry *oe; |
0e0ed594 JL |
884 | |
885 | if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) | |
886 | || TREE_CODE (curr->op) != SSA_NAME) | |
887 | return false; | |
888 | ||
889 | notop = get_unary_op (curr->op, BIT_NOT_EXPR); | |
890 | if (notop == NULL_TREE) | |
891 | return false; | |
892 | ||
893 | /* Any non-not version will have a rank that is one less than | |
894 | the current rank. So once we hit those ranks, if we don't find | |
895 | one, we can stop. */ | |
896 | ||
897 | for (i = currindex + 1; | |
9771b263 | 898 | ops->iterate (i, &oe) |
0e0ed594 JL |
899 | && oe->rank >= curr->rank - 1; |
900 | i++) | |
012309e6 | 901 | { |
0e0ed594 | 902 | if (oe->op == notop) |
012309e6 | 903 | { |
0e0ed594 | 904 | if (dump_file && (dump_flags & TDF_DETAILS)) |
012309e6 | 905 | { |
0e0ed594 | 906 | fprintf (dump_file, "Equivalence: "); |
ef6cb4c7 | 907 | print_generic_expr (dump_file, notop); |
0e0ed594 JL |
908 | if (opcode == BIT_AND_EXPR) |
909 | fprintf (dump_file, " & ~"); | |
910 | else if (opcode == BIT_IOR_EXPR) | |
911 | fprintf (dump_file, " | ~"); | |
ef6cb4c7 | 912 | print_generic_expr (dump_file, oe->op); |
0e0ed594 JL |
913 | if (opcode == BIT_AND_EXPR) |
914 | fprintf (dump_file, " -> 0\n"); | |
915 | else if (opcode == BIT_IOR_EXPR) | |
916 | fprintf (dump_file, " -> -1\n"); | |
012309e6 | 917 | } |
0e0ed594 JL |
918 | |
919 | if (opcode == BIT_AND_EXPR) | |
e8160c9a | 920 | oe->op = build_zero_cst (TREE_TYPE (oe->op)); |
0e0ed594 | 921 | else if (opcode == BIT_IOR_EXPR) |
c888139c | 922 | oe->op = build_all_ones_cst (TREE_TYPE (oe->op)); |
0e0ed594 | 923 | |
9771b263 DN |
924 | reassociate_stats.ops_eliminated += ops->length () - 1; |
925 | ops->truncate (0); | |
926 | ops->quick_push (oe); | |
0e0ed594 | 927 | return true; |
012309e6 DB |
928 | } |
929 | } | |
0e0ed594 JL |
930 | |
931 | return false; | |
012309e6 DB |
932 | } |
933 | ||
0e0ed594 JL |
934 | /* Use constant value that may be present in OPS to try to eliminate |
935 | operands. Note that this function is only really used when we've | |
936 | eliminated ops for other reasons, or merged constants. Across | |
937 | single statements, fold already does all of this, plus more. There | |
938 | is little point in duplicating logic, so I've only included the | |
939 | identities that I could ever construct testcases to trigger. */ | |
012309e6 | 940 | |
0e0ed594 JL |
941 | static void |
942 | eliminate_using_constants (enum tree_code opcode, | |
526ceb68 | 943 | vec<operand_entry *> *ops) |
012309e6 | 944 | { |
526ceb68 | 945 | operand_entry *oelast = ops->last (); |
2dc0f633 | 946 | tree type = TREE_TYPE (oelast->op); |
012309e6 | 947 | |
2dc0f633 | 948 | if (oelast->rank == 0 |
1e4fa9b1 | 949 | && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type))) |
012309e6 | 950 | { |
0e0ed594 | 951 | switch (opcode) |
012309e6 | 952 | { |
0e0ed594 JL |
953 | case BIT_AND_EXPR: |
954 | if (integer_zerop (oelast->op)) | |
955 | { | |
9771b263 | 956 | if (ops->length () != 1) |
0e0ed594 JL |
957 | { |
958 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
959 | fprintf (dump_file, "Found & 0, removing all other ops\n"); | |
960 | ||
9771b263 | 961 | reassociate_stats.ops_eliminated += ops->length () - 1; |
b8698a0f | 962 | |
9771b263 DN |
963 | ops->truncate (0); |
964 | ops->quick_push (oelast); | |
0e0ed594 JL |
965 | return; |
966 | } | |
967 | } | |
968 | else if (integer_all_onesp (oelast->op)) | |
969 | { | |
9771b263 | 970 | if (ops->length () != 1) |
0e0ed594 JL |
971 | { |
972 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
973 | fprintf (dump_file, "Found & -1, removing\n"); | |
9771b263 | 974 | ops->pop (); |
0e0ed594 JL |
975 | reassociate_stats.ops_eliminated++; |
976 | } | |
977 | } | |
978 | break; | |
979 | case BIT_IOR_EXPR: | |
980 | if (integer_all_onesp (oelast->op)) | |
981 | { | |
9771b263 | 982 | if (ops->length () != 1) |
0e0ed594 JL |
983 | { |
984 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
985 | fprintf (dump_file, "Found | -1, removing all other ops\n"); | |
986 | ||
9771b263 | 987 | reassociate_stats.ops_eliminated += ops->length () - 1; |
b8698a0f | 988 | |
9771b263 DN |
989 | ops->truncate (0); |
990 | ops->quick_push (oelast); | |
0e0ed594 JL |
991 | return; |
992 | } | |
b8698a0f | 993 | } |
0e0ed594 JL |
994 | else if (integer_zerop (oelast->op)) |
995 | { | |
9771b263 | 996 | if (ops->length () != 1) |
0e0ed594 JL |
997 | { |
998 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
999 | fprintf (dump_file, "Found | 0, removing\n"); | |
9771b263 | 1000 | ops->pop (); |
0e0ed594 JL |
1001 | reassociate_stats.ops_eliminated++; |
1002 | } | |
1003 | } | |
1004 | break; | |
1005 | case MULT_EXPR: | |
2dc0f633 RG |
1006 | if (integer_zerop (oelast->op) |
1007 | || (FLOAT_TYPE_P (type) | |
1b457aa4 | 1008 | && !HONOR_NANS (type) |
3d3dbadd | 1009 | && !HONOR_SIGNED_ZEROS (type) |
2dc0f633 | 1010 | && real_zerop (oelast->op))) |
0e0ed594 | 1011 | { |
9771b263 | 1012 | if (ops->length () != 1) |
0e0ed594 JL |
1013 | { |
1014 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1015 | fprintf (dump_file, "Found * 0, removing all other ops\n"); | |
b8698a0f | 1016 | |
9771b263 | 1017 | reassociate_stats.ops_eliminated += ops->length () - 1; |
ef192ae1 | 1018 | ops->truncate (0); |
9771b263 | 1019 | ops->quick_push (oelast); |
0e0ed594 JL |
1020 | return; |
1021 | } | |
1022 | } | |
2dc0f633 RG |
1023 | else if (integer_onep (oelast->op) |
1024 | || (FLOAT_TYPE_P (type) | |
3d3dbadd | 1025 | && !HONOR_SNANS (type) |
2dc0f633 | 1026 | && real_onep (oelast->op))) |
0e0ed594 | 1027 | { |
9771b263 | 1028 | if (ops->length () != 1) |
0e0ed594 JL |
1029 | { |
1030 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1031 | fprintf (dump_file, "Found * 1, removing\n"); | |
9771b263 | 1032 | ops->pop (); |
0e0ed594 JL |
1033 | reassociate_stats.ops_eliminated++; |
1034 | return; | |
1035 | } | |
1036 | } | |
1037 | break; | |
1038 | case BIT_XOR_EXPR: | |
1039 | case PLUS_EXPR: | |
1040 | case MINUS_EXPR: | |
2dc0f633 RG |
1041 | if (integer_zerop (oelast->op) |
1042 | || (FLOAT_TYPE_P (type) | |
1043 | && (opcode == PLUS_EXPR || opcode == MINUS_EXPR) | |
1044 | && fold_real_zero_addition_p (type, oelast->op, | |
1045 | opcode == MINUS_EXPR))) | |
012309e6 | 1046 | { |
9771b263 | 1047 | if (ops->length () != 1) |
012309e6 | 1048 | { |
0e0ed594 JL |
1049 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1050 | fprintf (dump_file, "Found [|^+] 0, removing\n"); | |
9771b263 | 1051 | ops->pop (); |
0e0ed594 JL |
1052 | reassociate_stats.ops_eliminated++; |
1053 | return; | |
012309e6 | 1054 | } |
012309e6 | 1055 | } |
0e0ed594 JL |
1056 | break; |
1057 | default: | |
1058 | break; | |
012309e6 DB |
1059 | } |
1060 | } | |
012309e6 DB |
1061 | } |
1062 | ||
25c6036a | 1063 | |
526ceb68 | 1064 | static void linearize_expr_tree (vec<operand_entry *> *, gimple *, |
25c6036a RG |
1065 | bool, bool); |
1066 | ||
1067 | /* Structure for tracking and counting operands. */ | |
a79683d5 | 1068 | struct oecount { |
b51ee78c JJ |
1069 | unsigned int cnt; |
1070 | unsigned int id; | |
25c6036a RG |
1071 | enum tree_code oecode; |
1072 | tree op; | |
a79683d5 | 1073 | }; |
25c6036a | 1074 | |
25c6036a RG |
1075 | |
1076 | /* The heap for the oecount hashtable and the sorted list of operands. */ | |
9771b263 | 1077 | static vec<oecount> cvec; |
25c6036a | 1078 | |
bf190e8d LC |
1079 | |
1080 | /* Oecount hashtable helpers. */ | |
1081 | ||
e0702244 | 1082 | struct oecount_hasher : int_hash <int, 0, 1> |
bf190e8d | 1083 | { |
e0702244 RS |
1084 | static inline hashval_t hash (int); |
1085 | static inline bool equal (int, int); | |
bf190e8d LC |
1086 | }; |
1087 | ||
25c6036a RG |
1088 | /* Hash function for oecount. */ |
1089 | ||
bf190e8d | 1090 | inline hashval_t |
e0702244 | 1091 | oecount_hasher::hash (int p) |
25c6036a | 1092 | { |
84baa4b9 | 1093 | const oecount *c = &cvec[p - 42]; |
25c6036a RG |
1094 | return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode; |
1095 | } | |
1096 | ||
1097 | /* Comparison function for oecount. */ | |
1098 | ||
bf190e8d | 1099 | inline bool |
e0702244 | 1100 | oecount_hasher::equal (int p1, int p2) |
25c6036a | 1101 | { |
84baa4b9 TS |
1102 | const oecount *c1 = &cvec[p1 - 42]; |
1103 | const oecount *c2 = &cvec[p2 - 42]; | |
b51ee78c | 1104 | return c1->oecode == c2->oecode && c1->op == c2->op; |
25c6036a RG |
1105 | } |
1106 | ||
1107 | /* Comparison function for qsort sorting oecount elements by count. */ | |
1108 | ||
1109 | static int | |
1110 | oecount_cmp (const void *p1, const void *p2) | |
1111 | { | |
1112 | const oecount *c1 = (const oecount *)p1; | |
1113 | const oecount *c2 = (const oecount *)p2; | |
f1665f5c | 1114 | if (c1->cnt != c2->cnt) |
b51ee78c | 1115 | return c1->cnt > c2->cnt ? 1 : -1; |
f1665f5c DK |
1116 | else |
1117 | /* If counts are identical, use unique IDs to stabilize qsort. */ | |
b51ee78c | 1118 | return c1->id > c2->id ? 1 : -1; |
25c6036a RG |
1119 | } |
1120 | ||
c2723bde BS |
1121 | /* Return TRUE iff STMT represents a builtin call that raises OP |
1122 | to some exponent. */ | |
1123 | ||
1124 | static bool | |
355fe088 | 1125 | stmt_is_power_of_op (gimple *stmt, tree op) |
c2723bde | 1126 | { |
c2723bde BS |
1127 | if (!is_gimple_call (stmt)) |
1128 | return false; | |
1129 | ||
314709cd | 1130 | switch (gimple_call_combined_fn (stmt)) |
c2723bde | 1131 | { |
314709cd RS |
1132 | CASE_CFN_POW: |
1133 | CASE_CFN_POWI: | |
c2723bde BS |
1134 | return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0)); |
1135 | ||
1136 | default: | |
1137 | return false; | |
1138 | } | |
1139 | } | |
1140 | ||
1141 | /* Given STMT which is a __builtin_pow* call, decrement its exponent | |
1142 | in place and return the result. Assumes that stmt_is_power_of_op | |
1143 | was previously called for STMT and returned TRUE. */ | |
1144 | ||
1145 | static HOST_WIDE_INT | |
355fe088 | 1146 | decrement_power (gimple *stmt) |
c2723bde BS |
1147 | { |
1148 | REAL_VALUE_TYPE c, cint; | |
1149 | HOST_WIDE_INT power; | |
1150 | tree arg1; | |
1151 | ||
314709cd | 1152 | switch (gimple_call_combined_fn (stmt)) |
c2723bde | 1153 | { |
314709cd | 1154 | CASE_CFN_POW: |
c2723bde BS |
1155 | arg1 = gimple_call_arg (stmt, 1); |
1156 | c = TREE_REAL_CST (arg1); | |
1157 | power = real_to_integer (&c) - 1; | |
807e902e | 1158 | real_from_integer (&cint, VOIDmode, power, SIGNED); |
c2723bde BS |
1159 | gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint)); |
1160 | return power; | |
1161 | ||
314709cd | 1162 | CASE_CFN_POWI: |
c2723bde BS |
1163 | arg1 = gimple_call_arg (stmt, 1); |
1164 | power = TREE_INT_CST_LOW (arg1) - 1; | |
1165 | gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power)); | |
1166 | return power; | |
1167 | ||
1168 | default: | |
1169 | gcc_unreachable (); | |
1170 | } | |
1171 | } | |
1172 | ||
66454000 KV |
1173 | /* Replace SSA defined by STMT and replace all its uses with new |
1174 | SSA. Also return the new SSA. */ | |
1175 | ||
1176 | static tree | |
52af5e48 | 1177 | make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op) |
66454000 KV |
1178 | { |
1179 | gimple *use_stmt; | |
1180 | use_operand_p use; | |
1181 | imm_use_iterator iter; | |
52af5e48 | 1182 | tree new_lhs, new_debug_lhs = NULL_TREE; |
8be59d19 | 1183 | tree lhs = gimple_get_lhs (stmt); |
66454000 KV |
1184 | |
1185 | new_lhs = make_ssa_name (TREE_TYPE (lhs)); | |
1186 | gimple_set_lhs (stmt, new_lhs); | |
1187 | ||
1188 | /* Also need to update GIMPLE_DEBUGs. */ | |
1189 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) | |
1190 | { | |
52af5e48 JJ |
1191 | tree repl = new_lhs; |
1192 | if (is_gimple_debug (use_stmt)) | |
1193 | { | |
1194 | if (new_debug_lhs == NULL_TREE) | |
1195 | { | |
1196 | new_debug_lhs = make_node (DEBUG_EXPR_DECL); | |
1197 | gdebug *def_temp | |
1198 | = gimple_build_debug_bind (new_debug_lhs, | |
1199 | build2 (opcode, TREE_TYPE (lhs), | |
1200 | new_lhs, op), | |
1201 | stmt); | |
1202 | DECL_ARTIFICIAL (new_debug_lhs) = 1; | |
1203 | TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs); | |
1204 | SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs))); | |
1205 | gimple_set_uid (def_temp, gimple_uid (stmt)); | |
1206 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); | |
1207 | gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT); | |
1208 | } | |
1209 | repl = new_debug_lhs; | |
1210 | } | |
66454000 | 1211 | FOR_EACH_IMM_USE_ON_STMT (use, iter) |
52af5e48 | 1212 | SET_USE (use, repl); |
66454000 KV |
1213 | update_stmt (use_stmt); |
1214 | } | |
1215 | return new_lhs; | |
1216 | } | |
1217 | ||
1218 | /* Replace all SSAs defined in STMTS_TO_FIX and replace its | |
1219 | uses with new SSAs. Also do this for the stmt that defines DEF | |
1220 | if *DEF is not OP. */ | |
1221 | ||
1222 | static void | |
52af5e48 | 1223 | make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op, |
66454000 KV |
1224 | vec<gimple *> &stmts_to_fix) |
1225 | { | |
1226 | unsigned i; | |
1227 | gimple *stmt; | |
1228 | ||
1229 | if (*def != op | |
1230 | && TREE_CODE (*def) == SSA_NAME | |
1231 | && (stmt = SSA_NAME_DEF_STMT (*def)) | |
1232 | && gimple_code (stmt) != GIMPLE_NOP) | |
52af5e48 | 1233 | *def = make_new_ssa_for_def (stmt, opcode, op); |
66454000 KV |
1234 | |
1235 | FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt) | |
52af5e48 | 1236 | make_new_ssa_for_def (stmt, opcode, op); |
66454000 KV |
1237 | } |
1238 | ||
c2723bde BS |
1239 | /* Find the single immediate use of STMT's LHS, and replace it |
1240 | with OP. Remove STMT. If STMT's LHS is the same as *DEF, | |
1241 | replace *DEF with OP as well. */ | |
1242 | ||
1243 | static void | |
355fe088 | 1244 | propagate_op_to_single_use (tree op, gimple *stmt, tree *def) |
c2723bde BS |
1245 | { |
1246 | tree lhs; | |
355fe088 | 1247 | gimple *use_stmt; |
c2723bde BS |
1248 | use_operand_p use; |
1249 | gimple_stmt_iterator gsi; | |
1250 | ||
1251 | if (is_gimple_call (stmt)) | |
1252 | lhs = gimple_call_lhs (stmt); | |
1253 | else | |
1254 | lhs = gimple_assign_lhs (stmt); | |
1255 | ||
1256 | gcc_assert (has_single_use (lhs)); | |
1257 | single_imm_use (lhs, &use, &use_stmt); | |
1258 | if (lhs == *def) | |
1259 | *def = op; | |
1260 | SET_USE (use, op); | |
1261 | if (TREE_CODE (op) != SSA_NAME) | |
1262 | update_stmt (use_stmt); | |
1263 | gsi = gsi_for_stmt (stmt); | |
5e97dfb6 | 1264 | unlink_stmt_vdef (stmt); |
42fae17c | 1265 | reassoc_remove_stmt (&gsi); |
c2723bde | 1266 | release_defs (stmt); |
c2723bde BS |
1267 | } |
1268 | ||
25c6036a RG |
1269 | /* Walks the linear chain with result *DEF searching for an operation |
1270 | with operand OP and code OPCODE removing that from the chain. *DEF | |
1271 | is updated if there is only one operand but no operation left. */ | |
1272 | ||
1273 | static void | |
1274 | zero_one_operation (tree *def, enum tree_code opcode, tree op) | |
1275 | { | |
52af5e48 | 1276 | tree orig_def = *def; |
355fe088 | 1277 | gimple *stmt = SSA_NAME_DEF_STMT (*def); |
66454000 KV |
1278 | /* PR72835 - Record the stmt chain that has to be updated such that |
1279 | we dont use the same LHS when the values computed are different. */ | |
1280 | auto_vec<gimple *, 64> stmts_to_fix; | |
25c6036a RG |
1281 | |
1282 | do | |
1283 | { | |
c2723bde BS |
1284 | tree name; |
1285 | ||
e5328f5d | 1286 | if (opcode == MULT_EXPR) |
c2723bde | 1287 | { |
e5328f5d RB |
1288 | if (stmt_is_power_of_op (stmt, op)) |
1289 | { | |
1290 | if (decrement_power (stmt) == 1) | |
66454000 KV |
1291 | { |
1292 | if (stmts_to_fix.length () > 0) | |
1293 | stmts_to_fix.pop (); | |
1294 | propagate_op_to_single_use (op, stmt, def); | |
1295 | } | |
1296 | break; | |
e5328f5d | 1297 | } |
0d99f8a0 | 1298 | else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR) |
e5328f5d | 1299 | { |
0d99f8a0 RB |
1300 | if (gimple_assign_rhs1 (stmt) == op) |
1301 | { | |
ec963f2a | 1302 | tree cst = build_minus_one_cst (TREE_TYPE (op)); |
66454000 KV |
1303 | if (stmts_to_fix.length () > 0) |
1304 | stmts_to_fix.pop (); | |
ec963f2a | 1305 | propagate_op_to_single_use (cst, stmt, def); |
66454000 | 1306 | break; |
0d99f8a0 RB |
1307 | } |
1308 | else if (integer_minus_onep (op) | |
1309 | || real_minus_onep (op)) | |
1310 | { | |
1311 | gimple_assign_set_rhs_code | |
1312 | (stmt, TREE_CODE (gimple_assign_rhs1 (stmt))); | |
66454000 | 1313 | break; |
0d99f8a0 | 1314 | } |
e5328f5d | 1315 | } |
c2723bde BS |
1316 | } |
1317 | ||
1318 | name = gimple_assign_rhs1 (stmt); | |
25c6036a RG |
1319 | |
1320 | /* If this is the operation we look for and one of the operands | |
1321 | is ours simply propagate the other operand into the stmts | |
1322 | single use. */ | |
1323 | if (gimple_assign_rhs_code (stmt) == opcode | |
1324 | && (name == op | |
1325 | || gimple_assign_rhs2 (stmt) == op)) | |
1326 | { | |
25c6036a RG |
1327 | if (name == op) |
1328 | name = gimple_assign_rhs2 (stmt); | |
66454000 KV |
1329 | if (stmts_to_fix.length () > 0) |
1330 | stmts_to_fix.pop (); | |
c2723bde | 1331 | propagate_op_to_single_use (name, stmt, def); |
66454000 | 1332 | break; |
25c6036a RG |
1333 | } |
1334 | ||
c2723bde | 1335 | /* We might have a multiply of two __builtin_pow* calls, and |
e5328f5d RB |
1336 | the operand might be hiding in the rightmost one. Likewise |
1337 | this can happen for a negate. */ | |
c2723bde BS |
1338 | if (opcode == MULT_EXPR |
1339 | && gimple_assign_rhs_code (stmt) == opcode | |
5bf6606a JJ |
1340 | && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME |
1341 | && has_single_use (gimple_assign_rhs2 (stmt))) | |
c2723bde | 1342 | { |
355fe088 | 1343 | gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); |
c2723bde BS |
1344 | if (stmt_is_power_of_op (stmt2, op)) |
1345 | { | |
1346 | if (decrement_power (stmt2) == 1) | |
1347 | propagate_op_to_single_use (op, stmt2, def); | |
66454000 KV |
1348 | else |
1349 | stmts_to_fix.safe_push (stmt2); | |
1350 | break; | |
c2723bde | 1351 | } |
e5328f5d | 1352 | else if (is_gimple_assign (stmt2) |
0d99f8a0 | 1353 | && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR) |
e5328f5d | 1354 | { |
0d99f8a0 RB |
1355 | if (gimple_assign_rhs1 (stmt2) == op) |
1356 | { | |
ec963f2a KV |
1357 | tree cst = build_minus_one_cst (TREE_TYPE (op)); |
1358 | propagate_op_to_single_use (cst, stmt2, def); | |
66454000 | 1359 | break; |
0d99f8a0 RB |
1360 | } |
1361 | else if (integer_minus_onep (op) | |
1362 | || real_minus_onep (op)) | |
1363 | { | |
66454000 | 1364 | stmts_to_fix.safe_push (stmt2); |
0d99f8a0 RB |
1365 | gimple_assign_set_rhs_code |
1366 | (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2))); | |
66454000 | 1367 | break; |
0d99f8a0 | 1368 | } |
e5328f5d | 1369 | } |
c2723bde BS |
1370 | } |
1371 | ||
25c6036a RG |
1372 | /* Continue walking the chain. */ |
1373 | gcc_assert (name != op | |
1374 | && TREE_CODE (name) == SSA_NAME); | |
1375 | stmt = SSA_NAME_DEF_STMT (name); | |
66454000 | 1376 | stmts_to_fix.safe_push (stmt); |
25c6036a RG |
1377 | } |
1378 | while (1); | |
66454000 | 1379 | |
52af5e48 JJ |
1380 | if (stmts_to_fix.length () > 0 || *def == orig_def) |
1381 | make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix); | |
25c6036a RG |
1382 | } |
1383 | ||
5e40da4f JJ |
1384 | /* Returns true if statement S1 dominates statement S2. Like |
1385 | stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */ | |
035cb59f | 1386 | |
5e40da4f | 1387 | static bool |
355fe088 | 1388 | reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2) |
035cb59f | 1389 | { |
5e40da4f JJ |
1390 | basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); |
1391 | ||
1392 | /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D) | |
1393 | SSA_NAME. Assume it lives at the beginning of function and | |
1394 | thus dominates everything. */ | |
1395 | if (!bb1 || s1 == s2) | |
1396 | return true; | |
1397 | ||
1398 | /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */ | |
1399 | if (!bb2) | |
1400 | return false; | |
1401 | ||
1402 | if (bb1 == bb2) | |
1403 | { | |
1404 | /* PHIs in the same basic block are assumed to be | |
1405 | executed all in parallel, if only one stmt is a PHI, | |
1406 | it dominates the other stmt in the same basic block. */ | |
1407 | if (gimple_code (s1) == GIMPLE_PHI) | |
1408 | return true; | |
1409 | ||
1410 | if (gimple_code (s2) == GIMPLE_PHI) | |
1411 | return false; | |
1412 | ||
1413 | gcc_assert (gimple_uid (s1) && gimple_uid (s2)); | |
1414 | ||
1415 | if (gimple_uid (s1) < gimple_uid (s2)) | |
1416 | return true; | |
1417 | ||
1418 | if (gimple_uid (s1) > gimple_uid (s2)) | |
1419 | return false; | |
1420 | ||
1421 | gimple_stmt_iterator gsi = gsi_for_stmt (s1); | |
1422 | unsigned int uid = gimple_uid (s1); | |
1423 | for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1424 | { | |
355fe088 | 1425 | gimple *s = gsi_stmt (gsi); |
5e40da4f JJ |
1426 | if (gimple_uid (s) != uid) |
1427 | break; | |
1428 | if (s == s2) | |
1429 | return true; | |
1430 | } | |
1431 | ||
1432 | return false; | |
1433 | } | |
1434 | ||
1435 | return dominated_by_p (CDI_DOMINATORS, bb2, bb1); | |
1436 | } | |
1437 | ||
1438 | /* Insert STMT after INSERT_POINT. */ | |
1439 | ||
1440 | static void | |
355fe088 | 1441 | insert_stmt_after (gimple *stmt, gimple *insert_point) |
5e40da4f JJ |
1442 | { |
1443 | gimple_stmt_iterator gsi; | |
1444 | basic_block bb; | |
1445 | ||
1446 | if (gimple_code (insert_point) == GIMPLE_PHI) | |
1447 | bb = gimple_bb (insert_point); | |
1448 | else if (!stmt_ends_bb_p (insert_point)) | |
1449 | { | |
1450 | gsi = gsi_for_stmt (insert_point); | |
1451 | gimple_set_uid (stmt, gimple_uid (insert_point)); | |
1452 | gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); | |
1453 | return; | |
1454 | } | |
1455 | else | |
1456 | /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME, | |
1457 | thus if it must end a basic block, it should be a call that can | |
1458 | throw, or some assignment that can throw. If it throws, the LHS | |
1459 | of it will not be initialized though, so only valid places using | |
1460 | the SSA_NAME should be dominated by the fallthru edge. */ | |
1461 | bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest; | |
1462 | gsi = gsi_after_labels (bb); | |
1463 | if (gsi_end_p (gsi)) | |
1464 | { | |
1465 | gimple_stmt_iterator gsi2 = gsi_last_bb (bb); | |
1466 | gimple_set_uid (stmt, | |
1467 | gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); | |
1468 | } | |
1469 | else | |
1470 | gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi))); | |
1471 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
035cb59f ER |
1472 | } |
1473 | ||
25c6036a RG |
1474 | /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for |
1475 | the result. Places the statement after the definition of either | |
1476 | OP1 or OP2. Returns the new statement. */ | |
1477 | ||
355fe088 | 1478 | static gimple * |
83d5977e | 1479 | build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode) |
25c6036a | 1480 | { |
355fe088 | 1481 | gimple *op1def = NULL, *op2def = NULL; |
25c6036a RG |
1482 | gimple_stmt_iterator gsi; |
1483 | tree op; | |
538dd0b7 | 1484 | gassign *sum; |
25c6036a RG |
1485 | |
1486 | /* Create the addition statement. */ | |
b731b390 | 1487 | op = make_ssa_name (type); |
0d0e4a03 | 1488 | sum = gimple_build_assign (op, opcode, op1, op2); |
25c6036a RG |
1489 | |
1490 | /* Find an insertion place and insert. */ | |
1491 | if (TREE_CODE (op1) == SSA_NAME) | |
1492 | op1def = SSA_NAME_DEF_STMT (op1); | |
1493 | if (TREE_CODE (op2) == SSA_NAME) | |
1494 | op2def = SSA_NAME_DEF_STMT (op2); | |
1495 | if ((!op1def || gimple_nop_p (op1def)) | |
1496 | && (!op2def || gimple_nop_p (op2def))) | |
1497 | { | |
fefa31b5 | 1498 | gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); |
5e40da4f | 1499 | if (gsi_end_p (gsi)) |
25c6036a | 1500 | { |
5e40da4f | 1501 | gimple_stmt_iterator gsi2 |
fefa31b5 | 1502 | = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); |
5e40da4f JJ |
1503 | gimple_set_uid (sum, |
1504 | gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); | |
25c6036a RG |
1505 | } |
1506 | else | |
5e40da4f JJ |
1507 | gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi))); |
1508 | gsi_insert_before (&gsi, sum, GSI_NEW_STMT); | |
25c6036a RG |
1509 | } |
1510 | else | |
1511 | { | |
355fe088 | 1512 | gimple *insert_point; |
5e40da4f JJ |
1513 | if ((!op1def || gimple_nop_p (op1def)) |
1514 | || (op2def && !gimple_nop_p (op2def) | |
1515 | && reassoc_stmt_dominates_stmt_p (op1def, op2def))) | |
1516 | insert_point = op2def; | |
25c6036a | 1517 | else |
5e40da4f JJ |
1518 | insert_point = op1def; |
1519 | insert_stmt_after (sum, insert_point); | |
25c6036a RG |
1520 | } |
1521 | update_stmt (sum); | |
1522 | ||
1523 | return sum; | |
1524 | } | |
1525 | ||
1526 | /* Perform un-distribution of divisions and multiplications. | |
1527 | A * X + B * X is transformed into (A + B) * X and A / X + B / X | |
1528 | to (A + B) / X for real X. | |
1529 | ||
1530 | The algorithm is organized as follows. | |
1531 | ||
1532 | - First we walk the addition chain *OPS looking for summands that | |
1533 | are defined by a multiplication or a real division. This results | |
1534 | in the candidates bitmap with relevant indices into *OPS. | |
1535 | ||
1536 | - Second we build the chains of multiplications or divisions for | |
073a8998 | 1537 | these candidates, counting the number of occurrences of (operand, code) |
25c6036a RG |
1538 | pairs in all of the candidates chains. |
1539 | ||
073a8998 | 1540 | - Third we sort the (operand, code) pairs by number of occurrence and |
25c6036a RG |
1541 | process them starting with the pair with the most uses. |
1542 | ||
1543 | * For each such pair we walk the candidates again to build a | |
1544 | second candidate bitmap noting all multiplication/division chains | |
073a8998 | 1545 | that have at least one occurrence of (operand, code). |
25c6036a RG |
1546 | |
1547 | * We build an alternate addition chain only covering these | |
1548 | candidates with one (operand, code) operation removed from their | |
1549 | multiplication/division chain. | |
1550 | ||
1551 | * The first candidate gets replaced by the alternate addition chain | |
1552 | multiplied/divided by the operand. | |
1553 | ||
1554 | * All candidate chains get disabled for further processing and | |
1555 | processing of (operand, code) pairs continues. | |
1556 | ||
1557 | The alternate addition chains built are re-processed by the main | |
1558 | reassociation algorithm which allows optimizing a * x * y + b * y * x | |
1559 | to (a + b ) * x * y in one invocation of the reassociation pass. */ | |
1560 | ||
1561 | static bool | |
1562 | undistribute_ops_list (enum tree_code opcode, | |
99b1c316 | 1563 | vec<operand_entry *> *ops, class loop *loop) |
25c6036a | 1564 | { |
9771b263 | 1565 | unsigned int length = ops->length (); |
526ceb68 | 1566 | operand_entry *oe1; |
25c6036a | 1567 | unsigned i, j; |
25c6036a RG |
1568 | unsigned nr_candidates, nr_candidates2; |
1569 | sbitmap_iterator sbi0; | |
526ceb68 | 1570 | vec<operand_entry *> *subops; |
25c6036a | 1571 | bool changed = false; |
b51ee78c | 1572 | unsigned int next_oecount_id = 0; |
25c6036a RG |
1573 | |
1574 | if (length <= 1 | |
1575 | || opcode != PLUS_EXPR) | |
1576 | return false; | |
1577 | ||
1578 | /* Build a list of candidates to process. */ | |
7ba9e72d | 1579 | auto_sbitmap candidates (length); |
f61e445a | 1580 | bitmap_clear (candidates); |
25c6036a | 1581 | nr_candidates = 0; |
9771b263 | 1582 | FOR_EACH_VEC_ELT (*ops, i, oe1) |
25c6036a RG |
1583 | { |
1584 | enum tree_code dcode; | |
355fe088 | 1585 | gimple *oe1def; |
25c6036a RG |
1586 | |
1587 | if (TREE_CODE (oe1->op) != SSA_NAME) | |
1588 | continue; | |
1589 | oe1def = SSA_NAME_DEF_STMT (oe1->op); | |
1590 | if (!is_gimple_assign (oe1def)) | |
1591 | continue; | |
1592 | dcode = gimple_assign_rhs_code (oe1def); | |
1593 | if ((dcode != MULT_EXPR | |
1594 | && dcode != RDIV_EXPR) | |
1595 | || !is_reassociable_op (oe1def, dcode, loop)) | |
1596 | continue; | |
1597 | ||
d7c028c0 | 1598 | bitmap_set_bit (candidates, i); |
25c6036a RG |
1599 | nr_candidates++; |
1600 | } | |
1601 | ||
1602 | if (nr_candidates < 2) | |
7ba9e72d | 1603 | return false; |
25c6036a RG |
1604 | |
1605 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1606 | { | |
1607 | fprintf (dump_file, "searching for un-distribute opportunities "); | |
1608 | print_generic_expr (dump_file, | |
4af78ef8 | 1609 | (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE); |
25c6036a RG |
1610 | fprintf (dump_file, " %d\n", nr_candidates); |
1611 | } | |
1612 | ||
1613 | /* Build linearized sub-operand lists and the counting table. */ | |
9771b263 | 1614 | cvec.create (0); |
c203e8a7 TS |
1615 | |
1616 | hash_table<oecount_hasher> ctable (15); | |
1617 | ||
9771b263 DN |
1618 | /* ??? Macro arguments cannot have multi-argument template types in |
1619 | them. This typedef is needed to workaround that limitation. */ | |
526ceb68 | 1620 | typedef vec<operand_entry *> vec_operand_entry_t_heap; |
9771b263 | 1621 | subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ()); |
d4ac4ce2 | 1622 | EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) |
25c6036a | 1623 | { |
355fe088 | 1624 | gimple *oedef; |
25c6036a RG |
1625 | enum tree_code oecode; |
1626 | unsigned j; | |
1627 | ||
9771b263 | 1628 | oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op); |
25c6036a RG |
1629 | oecode = gimple_assign_rhs_code (oedef); |
1630 | linearize_expr_tree (&subops[i], oedef, | |
1631 | associative_tree_code (oecode), false); | |
1632 | ||
9771b263 | 1633 | FOR_EACH_VEC_ELT (subops[i], j, oe1) |
25c6036a RG |
1634 | { |
1635 | oecount c; | |
84baa4b9 TS |
1636 | int *slot; |
1637 | int idx; | |
25c6036a RG |
1638 | c.oecode = oecode; |
1639 | c.cnt = 1; | |
f1665f5c | 1640 | c.id = next_oecount_id++; |
25c6036a | 1641 | c.op = oe1->op; |
9771b263 DN |
1642 | cvec.safe_push (c); |
1643 | idx = cvec.length () + 41; | |
84baa4b9 | 1644 | slot = ctable.find_slot (idx, INSERT); |
25c6036a RG |
1645 | if (!*slot) |
1646 | { | |
84baa4b9 | 1647 | *slot = idx; |
25c6036a RG |
1648 | } |
1649 | else | |
1650 | { | |
9771b263 | 1651 | cvec.pop (); |
84baa4b9 | 1652 | cvec[*slot - 42].cnt++; |
25c6036a RG |
1653 | } |
1654 | } | |
1655 | } | |
25c6036a RG |
1656 | |
1657 | /* Sort the counting table. */ | |
9771b263 | 1658 | cvec.qsort (oecount_cmp); |
25c6036a RG |
1659 | |
1660 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1661 | { | |
1662 | oecount *c; | |
1663 | fprintf (dump_file, "Candidates:\n"); | |
9771b263 | 1664 | FOR_EACH_VEC_ELT (cvec, j, c) |
25c6036a RG |
1665 | { |
1666 | fprintf (dump_file, " %u %s: ", c->cnt, | |
1667 | c->oecode == MULT_EXPR | |
1668 | ? "*" : c->oecode == RDIV_EXPR ? "/" : "?"); | |
ef6cb4c7 | 1669 | print_generic_expr (dump_file, c->op); |
25c6036a RG |
1670 | fprintf (dump_file, "\n"); |
1671 | } | |
1672 | } | |
1673 | ||
c0d18c6c | 1674 | /* Process the (operand, code) pairs in order of most occurrence. */ |
7ba9e72d | 1675 | auto_sbitmap candidates2 (length); |
9771b263 | 1676 | while (!cvec.is_empty ()) |
25c6036a | 1677 | { |
9771b263 | 1678 | oecount *c = &cvec.last (); |
25c6036a RG |
1679 | if (c->cnt < 2) |
1680 | break; | |
1681 | ||
1682 | /* Now collect the operands in the outer chain that contain | |
1683 | the common operand in their inner chain. */ | |
f61e445a | 1684 | bitmap_clear (candidates2); |
25c6036a | 1685 | nr_candidates2 = 0; |
d4ac4ce2 | 1686 | EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) |
25c6036a | 1687 | { |
355fe088 | 1688 | gimple *oedef; |
25c6036a RG |
1689 | enum tree_code oecode; |
1690 | unsigned j; | |
9771b263 | 1691 | tree op = (*ops)[i]->op; |
25c6036a RG |
1692 | |
1693 | /* If we undistributed in this chain already this may be | |
1694 | a constant. */ | |
1695 | if (TREE_CODE (op) != SSA_NAME) | |
1696 | continue; | |
1697 | ||
1698 | oedef = SSA_NAME_DEF_STMT (op); | |
1699 | oecode = gimple_assign_rhs_code (oedef); | |
1700 | if (oecode != c->oecode) | |
1701 | continue; | |
1702 | ||
9771b263 | 1703 | FOR_EACH_VEC_ELT (subops[i], j, oe1) |
25c6036a | 1704 | { |
c2723bde | 1705 | if (oe1->op == c->op) |
25c6036a | 1706 | { |
d7c028c0 | 1707 | bitmap_set_bit (candidates2, i); |
25c6036a RG |
1708 | ++nr_candidates2; |
1709 | break; | |
1710 | } | |
1711 | } | |
1712 | } | |
1713 | ||
1714 | if (nr_candidates2 >= 2) | |
1715 | { | |
526ceb68 | 1716 | operand_entry *oe1, *oe2; |
355fe088 | 1717 | gimple *prod; |
f61e445a | 1718 | int first = bitmap_first_set_bit (candidates2); |
25c6036a RG |
1719 | |
1720 | /* Build the new addition chain. */ | |
9771b263 | 1721 | oe1 = (*ops)[first]; |
25c6036a RG |
1722 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1723 | { | |
1724 | fprintf (dump_file, "Building ("); | |
ef6cb4c7 | 1725 | print_generic_expr (dump_file, oe1->op); |
25c6036a | 1726 | } |
25c6036a | 1727 | zero_one_operation (&oe1->op, c->oecode, c->op); |
d4ac4ce2 | 1728 | EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0) |
25c6036a | 1729 | { |
355fe088 | 1730 | gimple *sum; |
9771b263 | 1731 | oe2 = (*ops)[i]; |
25c6036a RG |
1732 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1733 | { | |
1734 | fprintf (dump_file, " + "); | |
ef6cb4c7 | 1735 | print_generic_expr (dump_file, oe2->op); |
25c6036a RG |
1736 | } |
1737 | zero_one_operation (&oe2->op, c->oecode, c->op); | |
83d5977e RG |
1738 | sum = build_and_add_sum (TREE_TYPE (oe1->op), |
1739 | oe1->op, oe2->op, opcode); | |
e8160c9a | 1740 | oe2->op = build_zero_cst (TREE_TYPE (oe2->op)); |
25c6036a RG |
1741 | oe2->rank = 0; |
1742 | oe1->op = gimple_get_lhs (sum); | |
1743 | } | |
1744 | ||
1745 | /* Apply the multiplication/division. */ | |
83d5977e RG |
1746 | prod = build_and_add_sum (TREE_TYPE (oe1->op), |
1747 | oe1->op, c->op, c->oecode); | |
25c6036a RG |
1748 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1749 | { | |
1750 | fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/"); | |
ef6cb4c7 | 1751 | print_generic_expr (dump_file, c->op); |
25c6036a RG |
1752 | fprintf (dump_file, "\n"); |
1753 | } | |
1754 | ||
1755 | /* Record it in the addition chain and disable further | |
1756 | undistribution with this op. */ | |
1757 | oe1->op = gimple_assign_lhs (prod); | |
1758 | oe1->rank = get_rank (oe1->op); | |
9771b263 | 1759 | subops[first].release (); |
25c6036a RG |
1760 | |
1761 | changed = true; | |
1762 | } | |
1763 | ||
9771b263 | 1764 | cvec.pop (); |
25c6036a RG |
1765 | } |
1766 | ||
9771b263 DN |
1767 | for (i = 0; i < ops->length (); ++i) |
1768 | subops[i].release (); | |
25c6036a | 1769 | free (subops); |
9771b263 | 1770 | cvec.release (); |
25c6036a RG |
1771 | |
1772 | return changed; | |
1773 | } | |
1774 | ||
844381e5 SL |
1775 | /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison |
1776 | expression, examine the other OPS to see if any of them are comparisons | |
1777 | of the same values, which we may be able to combine or eliminate. | |
1778 | For example, we can rewrite (a < b) | (a == b) as (a <= b). */ | |
1779 | ||
1780 | static bool | |
1781 | eliminate_redundant_comparison (enum tree_code opcode, | |
526ceb68 | 1782 | vec<operand_entry *> *ops, |
844381e5 | 1783 | unsigned int currindex, |
526ceb68 | 1784 | operand_entry *curr) |
844381e5 SL |
1785 | { |
1786 | tree op1, op2; | |
1787 | enum tree_code lcode, rcode; | |
355fe088 | 1788 | gimple *def1, *def2; |
844381e5 | 1789 | int i; |
526ceb68 | 1790 | operand_entry *oe; |
844381e5 SL |
1791 | |
1792 | if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) | |
1793 | return false; | |
1794 | ||
1795 | /* Check that CURR is a comparison. */ | |
1796 | if (TREE_CODE (curr->op) != SSA_NAME) | |
1797 | return false; | |
1798 | def1 = SSA_NAME_DEF_STMT (curr->op); | |
1799 | if (!is_gimple_assign (def1)) | |
1800 | return false; | |
1801 | lcode = gimple_assign_rhs_code (def1); | |
1802 | if (TREE_CODE_CLASS (lcode) != tcc_comparison) | |
1803 | return false; | |
1804 | op1 = gimple_assign_rhs1 (def1); | |
1805 | op2 = gimple_assign_rhs2 (def1); | |
1806 | ||
1807 | /* Now look for a similar comparison in the remaining OPS. */ | |
9771b263 | 1808 | for (i = currindex + 1; ops->iterate (i, &oe); i++) |
844381e5 SL |
1809 | { |
1810 | tree t; | |
1811 | ||
1812 | if (TREE_CODE (oe->op) != SSA_NAME) | |
1813 | continue; | |
1814 | def2 = SSA_NAME_DEF_STMT (oe->op); | |
1815 | if (!is_gimple_assign (def2)) | |
1816 | continue; | |
1817 | rcode = gimple_assign_rhs_code (def2); | |
1818 | if (TREE_CODE_CLASS (rcode) != tcc_comparison) | |
1819 | continue; | |
844381e5 SL |
1820 | |
1821 | /* If we got here, we have a match. See if we can combine the | |
1822 | two comparisons. */ | |
e89065a1 SL |
1823 | if (opcode == BIT_IOR_EXPR) |
1824 | t = maybe_fold_or_comparisons (lcode, op1, op2, | |
1825 | rcode, gimple_assign_rhs1 (def2), | |
1826 | gimple_assign_rhs2 (def2)); | |
1827 | else | |
1828 | t = maybe_fold_and_comparisons (lcode, op1, op2, | |
1829 | rcode, gimple_assign_rhs1 (def2), | |
1830 | gimple_assign_rhs2 (def2)); | |
844381e5 SL |
1831 | if (!t) |
1832 | continue; | |
e89065a1 SL |
1833 | |
1834 | /* maybe_fold_and_comparisons and maybe_fold_or_comparisons | |
1835 | always give us a boolean_type_node value back. If the original | |
1836 | BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type, | |
1837 | we need to convert. */ | |
1838 | if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t))) | |
1839 | t = fold_convert (TREE_TYPE (curr->op), t); | |
1840 | ||
2c9da85b JJ |
1841 | if (TREE_CODE (t) != INTEGER_CST |
1842 | && !operand_equal_p (t, curr->op, 0)) | |
1843 | { | |
1844 | enum tree_code subcode; | |
1845 | tree newop1, newop2; | |
1846 | if (!COMPARISON_CLASS_P (t)) | |
1847 | continue; | |
1848 | extract_ops_from_tree (t, &subcode, &newop1, &newop2); | |
1849 | STRIP_USELESS_TYPE_CONVERSION (newop1); | |
1850 | STRIP_USELESS_TYPE_CONVERSION (newop2); | |
1851 | if (!is_gimple_val (newop1) || !is_gimple_val (newop2)) | |
1852 | continue; | |
1853 | } | |
1854 | ||
844381e5 SL |
1855 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1856 | { | |
1857 | fprintf (dump_file, "Equivalence: "); | |
ef6cb4c7 | 1858 | print_generic_expr (dump_file, curr->op); |
844381e5 | 1859 | fprintf (dump_file, " %s ", op_symbol_code (opcode)); |
ef6cb4c7 | 1860 | print_generic_expr (dump_file, oe->op); |
844381e5 | 1861 | fprintf (dump_file, " -> "); |
ef6cb4c7 | 1862 | print_generic_expr (dump_file, t); |
844381e5 SL |
1863 | fprintf (dump_file, "\n"); |
1864 | } | |
1865 | ||
1866 | /* Now we can delete oe, as it has been subsumed by the new combined | |
1867 | expression t. */ | |
9771b263 | 1868 | ops->ordered_remove (i); |
844381e5 SL |
1869 | reassociate_stats.ops_eliminated ++; |
1870 | ||
1871 | /* If t is the same as curr->op, we're done. Otherwise we must | |
1872 | replace curr->op with t. Special case is if we got a constant | |
1873 | back, in which case we add it to the end instead of in place of | |
1874 | the current entry. */ | |
1875 | if (TREE_CODE (t) == INTEGER_CST) | |
1876 | { | |
9771b263 | 1877 | ops->ordered_remove (currindex); |
844381e5 SL |
1878 | add_to_ops_vec (ops, t); |
1879 | } | |
e89065a1 | 1880 | else if (!operand_equal_p (t, curr->op, 0)) |
844381e5 | 1881 | { |
355fe088 | 1882 | gimple *sum; |
844381e5 SL |
1883 | enum tree_code subcode; |
1884 | tree newop1; | |
1885 | tree newop2; | |
ca046f7f | 1886 | gcc_assert (COMPARISON_CLASS_P (t)); |
844381e5 | 1887 | extract_ops_from_tree (t, &subcode, &newop1, &newop2); |
ca046f7f JJ |
1888 | STRIP_USELESS_TYPE_CONVERSION (newop1); |
1889 | STRIP_USELESS_TYPE_CONVERSION (newop2); | |
1890 | gcc_checking_assert (is_gimple_val (newop1) | |
1891 | && is_gimple_val (newop2)); | |
83d5977e | 1892 | sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode); |
844381e5 SL |
1893 | curr->op = gimple_get_lhs (sum); |
1894 | } | |
1895 | return true; | |
1896 | } | |
1897 | ||
1898 | return false; | |
1899 | } | |
25c6036a | 1900 | |
d2db36dd | 1901 | |
df8b0a11 KV |
1902 | /* Transform repeated addition of same values into multiply with |
1903 | constant. */ | |
1904 | static bool | |
d2db36dd | 1905 | transform_add_to_multiply (vec<operand_entry *> *ops) |
df8b0a11 KV |
1906 | { |
1907 | operand_entry *oe; | |
1908 | tree op = NULL_TREE; | |
1909 | int j; | |
1910 | int i, start = -1, end = 0, count = 0; | |
0766b9ab | 1911 | auto_vec<std::pair <int, int> > indxs; |
df8b0a11 KV |
1912 | bool changed = false; |
1913 | ||
1914 | if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op)) | |
88aea79f KV |
1915 | && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op)) |
1916 | || !flag_unsafe_math_optimizations)) | |
df8b0a11 KV |
1917 | return false; |
1918 | ||
1919 | /* Look for repeated operands. */ | |
1920 | FOR_EACH_VEC_ELT (*ops, i, oe) | |
1921 | { | |
1922 | if (start == -1) | |
1923 | { | |
1924 | count = 1; | |
1925 | op = oe->op; | |
1926 | start = i; | |
1927 | } | |
1928 | else if (operand_equal_p (oe->op, op, 0)) | |
1929 | { | |
1930 | count++; | |
1931 | end = i; | |
1932 | } | |
1933 | else | |
1934 | { | |
1935 | if (count > 1) | |
1936 | indxs.safe_push (std::make_pair (start, end)); | |
1937 | count = 1; | |
1938 | op = oe->op; | |
1939 | start = i; | |
1940 | } | |
1941 | } | |
1942 | ||
1943 | if (count > 1) | |
1944 | indxs.safe_push (std::make_pair (start, end)); | |
1945 | ||
1946 | for (j = indxs.length () - 1; j >= 0; --j) | |
1947 | { | |
1948 | /* Convert repeated operand addition to multiplication. */ | |
1949 | start = indxs[j].first; | |
1950 | end = indxs[j].second; | |
1951 | op = (*ops)[start]->op; | |
1952 | count = end - start + 1; | |
1953 | for (i = end; i >= start; --i) | |
1954 | ops->unordered_remove (i); | |
1955 | tree tmp = make_ssa_name (TREE_TYPE (op)); | |
1956 | tree cst = build_int_cst (integer_type_node, count); | |
df8b0a11 KV |
1957 | gassign *mul_stmt |
1958 | = gimple_build_assign (tmp, MULT_EXPR, | |
1959 | op, fold_convert (TREE_TYPE (op), cst)); | |
df8b0a11 | 1960 | gimple_set_visited (mul_stmt, true); |
d2db36dd | 1961 | add_to_ops_vec (ops, tmp, mul_stmt); |
df8b0a11 KV |
1962 | changed = true; |
1963 | } | |
1964 | ||
1965 | return changed; | |
1966 | } | |
1967 | ||
1968 | ||
0e0ed594 JL |
1969 | /* Perform various identities and other optimizations on the list of |
1970 | operand entries, stored in OPS. The tree code for the binary | |
1971 | operation between all the operands is OPCODE. */ | |
012309e6 | 1972 | |
0e0ed594 JL |
1973 | static void |
1974 | optimize_ops_list (enum tree_code opcode, | |
526ceb68 | 1975 | vec<operand_entry *> *ops) |
0e0ed594 | 1976 | { |
9771b263 | 1977 | unsigned int length = ops->length (); |
0e0ed594 | 1978 | unsigned int i; |
526ceb68 TS |
1979 | operand_entry *oe; |
1980 | operand_entry *oelast = NULL; | |
0e0ed594 | 1981 | bool iterate = false; |
012309e6 | 1982 | |
0e0ed594 JL |
1983 | if (length == 1) |
1984 | return; | |
012309e6 | 1985 | |
9771b263 | 1986 | oelast = ops->last (); |
012309e6 | 1987 | |
0e0ed594 JL |
1988 | /* If the last two are constants, pop the constants off, merge them |
1989 | and try the next two. */ | |
1990 | if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) | |
1991 | { | |
526ceb68 | 1992 | operand_entry *oelm1 = (*ops)[length - 2]; |
0e0ed594 JL |
1993 | |
1994 | if (oelm1->rank == 0 | |
1995 | && is_gimple_min_invariant (oelm1->op) | |
f4088621 RG |
1996 | && useless_type_conversion_p (TREE_TYPE (oelm1->op), |
1997 | TREE_TYPE (oelast->op))) | |
0e0ed594 | 1998 | { |
0dd4b47b | 1999 | tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op), |
0e0ed594 JL |
2000 | oelm1->op, oelast->op); |
2001 | ||
0dd4b47b | 2002 | if (folded && is_gimple_min_invariant (folded)) |
0e0ed594 JL |
2003 | { |
2004 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2005 | fprintf (dump_file, "Merging constants\n"); | |
2006 | ||
9771b263 DN |
2007 | ops->pop (); |
2008 | ops->pop (); | |
0e0ed594 JL |
2009 | |
2010 | add_to_ops_vec (ops, folded); | |
2011 | reassociate_stats.constants_eliminated++; | |
2012 | ||
2013 | optimize_ops_list (opcode, ops); | |
2014 | return; | |
2015 | } | |
2016 | } | |
2017 | } | |
2018 | ||
2019 | eliminate_using_constants (opcode, ops); | |
2020 | oelast = NULL; | |
2021 | ||
9771b263 | 2022 | for (i = 0; ops->iterate (i, &oe);) |
0e0ed594 JL |
2023 | { |
2024 | bool done = false; | |
2025 | ||
2026 | if (eliminate_not_pairs (opcode, ops, i, oe)) | |
2027 | return; | |
2028 | if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast) | |
844381e5 SL |
2029 | || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)) |
2030 | || (!done && eliminate_redundant_comparison (opcode, ops, i, oe))) | |
0e0ed594 JL |
2031 | { |
2032 | if (done) | |
2033 | return; | |
2034 | iterate = true; | |
2035 | oelast = NULL; | |
2036 | continue; | |
2037 | } | |
2038 | oelast = oe; | |
2039 | i++; | |
2040 | } | |
2041 | ||
0e0ed594 JL |
2042 | if (iterate) |
2043 | optimize_ops_list (opcode, ops); | |
2044 | } | |
2045 | ||
0ccb5dbf JJ |
2046 | /* The following functions are subroutines to optimize_range_tests and allow |
2047 | it to try to change a logical combination of comparisons into a range | |
2048 | test. | |
2049 | ||
2050 | For example, both | |
2051 | X == 2 || X == 5 || X == 3 || X == 4 | |
2052 | and | |
2053 | X >= 2 && X <= 5 | |
2054 | are converted to | |
2055 | (unsigned) (X - 2) <= 3 | |
2056 | ||
2057 | For more information see comments above fold_test_range in fold-const.c, | |
2058 | this implementation is for GIMPLE. */ | |
2059 | ||
2060 | struct range_entry | |
2061 | { | |
2062 | tree exp; | |
2063 | tree low; | |
2064 | tree high; | |
2065 | bool in_p; | |
2066 | bool strict_overflow_p; | |
2067 | unsigned int idx, next; | |
2068 | }; | |
2069 | ||
2070 | /* This is similar to make_range in fold-const.c, but on top of | |
d578e863 JJ |
2071 | GIMPLE instead of trees. If EXP is non-NULL, it should be |
2072 | an SSA_NAME and STMT argument is ignored, otherwise STMT | |
2073 | argument should be a GIMPLE_COND. */ | |
0ccb5dbf JJ |
2074 | |
2075 | static void | |
355fe088 | 2076 | init_range_entry (struct range_entry *r, tree exp, gimple *stmt) |
0ccb5dbf JJ |
2077 | { |
2078 | int in_p; | |
2079 | tree low, high; | |
2080 | bool is_bool, strict_overflow_p; | |
2081 | ||
2082 | r->exp = NULL_TREE; | |
2083 | r->in_p = false; | |
2084 | r->strict_overflow_p = false; | |
2085 | r->low = NULL_TREE; | |
2086 | r->high = NULL_TREE; | |
d578e863 JJ |
2087 | if (exp != NULL_TREE |
2088 | && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))) | |
0ccb5dbf JJ |
2089 | return; |
2090 | ||
2091 | /* Start with simply saying "EXP != 0" and then look at the code of EXP | |
2092 | and see if we can refine the range. Some of the cases below may not | |
2093 | happen, but it doesn't seem worth worrying about this. We "continue" | |
2094 | the outer loop when we've changed something; otherwise we "break" | |
2095 | the switch, which will "break" the while. */ | |
d578e863 | 2096 | low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node; |
0ccb5dbf JJ |
2097 | high = low; |
2098 | in_p = 0; | |
2099 | strict_overflow_p = false; | |
2100 | is_bool = false; | |
d578e863 JJ |
2101 | if (exp == NULL_TREE) |
2102 | is_bool = true; | |
2103 | else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1) | |
0ccb5dbf JJ |
2104 | { |
2105 | if (TYPE_UNSIGNED (TREE_TYPE (exp))) | |
2106 | is_bool = true; | |
2107 | else | |
2108 | return; | |
2109 | } | |
2110 | else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) | |
2111 | is_bool = true; | |
2112 | ||
2113 | while (1) | |
2114 | { | |
0ccb5dbf JJ |
2115 | enum tree_code code; |
2116 | tree arg0, arg1, exp_type; | |
2117 | tree nexp; | |
2118 | location_t loc; | |
2119 | ||
d578e863 JJ |
2120 | if (exp != NULL_TREE) |
2121 | { | |
bababbfb EB |
2122 | if (TREE_CODE (exp) != SSA_NAME |
2123 | || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp)) | |
d578e863 | 2124 | break; |
0ccb5dbf | 2125 | |
d578e863 JJ |
2126 | stmt = SSA_NAME_DEF_STMT (exp); |
2127 | if (!is_gimple_assign (stmt)) | |
2128 | break; | |
2129 | ||
2130 | code = gimple_assign_rhs_code (stmt); | |
2131 | arg0 = gimple_assign_rhs1 (stmt); | |
2132 | arg1 = gimple_assign_rhs2 (stmt); | |
2133 | exp_type = TREE_TYPE (exp); | |
2134 | } | |
2135 | else | |
2136 | { | |
2137 | code = gimple_cond_code (stmt); | |
2138 | arg0 = gimple_cond_lhs (stmt); | |
2139 | arg1 = gimple_cond_rhs (stmt); | |
2140 | exp_type = boolean_type_node; | |
2141 | } | |
0ccb5dbf | 2142 | |
98dc565e RB |
2143 | if (TREE_CODE (arg0) != SSA_NAME |
2144 | || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0)) | |
e4a5b262 | 2145 | break; |
0ccb5dbf JJ |
2146 | loc = gimple_location (stmt); |
2147 | switch (code) | |
2148 | { | |
2149 | case BIT_NOT_EXPR: | |
28fd0ba2 JJ |
2150 | if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE |
2151 | /* Ensure the range is either +[-,0], +[0,0], | |
2152 | -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or | |
2153 | -[1,1]. If it is e.g. +[-,-] or -[-,-] | |
2154 | or similar expression of unconditional true or | |
2155 | false, it should not be negated. */ | |
2156 | && ((high && integer_zerop (high)) | |
2157 | || (low && integer_onep (low)))) | |
0ccb5dbf JJ |
2158 | { |
2159 | in_p = !in_p; | |
2160 | exp = arg0; | |
2161 | continue; | |
2162 | } | |
2163 | break; | |
2164 | case SSA_NAME: | |
2165 | exp = arg0; | |
2166 | continue; | |
2167 | CASE_CONVERT: | |
2168 | if (is_bool) | |
5e5ef52c EB |
2169 | { |
2170 | if ((TYPE_PRECISION (exp_type) == 1 | |
2171 | || TREE_CODE (exp_type) == BOOLEAN_TYPE) | |
2172 | && TYPE_PRECISION (TREE_TYPE (arg0)) > 1) | |
2173 | return; | |
2174 | } | |
2175 | else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1) | |
0ccb5dbf JJ |
2176 | { |
2177 | if (TYPE_UNSIGNED (TREE_TYPE (arg0))) | |
2178 | is_bool = true; | |
2179 | else | |
2180 | return; | |
2181 | } | |
2182 | else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE) | |
2183 | is_bool = true; | |
2184 | goto do_default; | |
2185 | case EQ_EXPR: | |
2186 | case NE_EXPR: | |
2187 | case LT_EXPR: | |
2188 | case LE_EXPR: | |
2189 | case GE_EXPR: | |
2190 | case GT_EXPR: | |
2191 | is_bool = true; | |
2192 | /* FALLTHRU */ | |
2193 | default: | |
2194 | if (!is_bool) | |
2195 | return; | |
2196 | do_default: | |
2197 | nexp = make_range_step (loc, code, arg0, arg1, exp_type, | |
2198 | &low, &high, &in_p, | |
2199 | &strict_overflow_p); | |
2200 | if (nexp != NULL_TREE) | |
2201 | { | |
2202 | exp = nexp; | |
2203 | gcc_assert (TREE_CODE (exp) == SSA_NAME); | |
2204 | continue; | |
2205 | } | |
2206 | break; | |
2207 | } | |
2208 | break; | |
2209 | } | |
2210 | if (is_bool) | |
2211 | { | |
2212 | r->exp = exp; | |
2213 | r->in_p = in_p; | |
2214 | r->low = low; | |
2215 | r->high = high; | |
2216 | r->strict_overflow_p = strict_overflow_p; | |
2217 | } | |
2218 | } | |
2219 | ||
2220 | /* Comparison function for qsort. Sort entries | |
2221 | without SSA_NAME exp first, then with SSA_NAMEs sorted | |
2222 | by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs | |
2223 | by increasing ->low and if ->low is the same, by increasing | |
2224 | ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE | |
2225 | maximum. */ | |
2226 | ||
2227 | static int | |
2228 | range_entry_cmp (const void *a, const void *b) | |
2229 | { | |
2230 | const struct range_entry *p = (const struct range_entry *) a; | |
2231 | const struct range_entry *q = (const struct range_entry *) b; | |
2232 | ||
2233 | if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME) | |
2234 | { | |
2235 | if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) | |
2236 | { | |
2237 | /* Group range_entries for the same SSA_NAME together. */ | |
2238 | if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp)) | |
2239 | return -1; | |
2240 | else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp)) | |
2241 | return 1; | |
2242 | /* If ->low is different, NULL low goes first, then by | |
2243 | ascending low. */ | |
2244 | if (p->low != NULL_TREE) | |
2245 | { | |
2246 | if (q->low != NULL_TREE) | |
2247 | { | |
2248 | tree tem = fold_binary (LT_EXPR, boolean_type_node, | |
2249 | p->low, q->low); | |
2250 | if (tem && integer_onep (tem)) | |
2251 | return -1; | |
2252 | tem = fold_binary (GT_EXPR, boolean_type_node, | |
2253 | p->low, q->low); | |
2254 | if (tem && integer_onep (tem)) | |
2255 | return 1; | |
2256 | } | |
2257 | else | |
2258 | return 1; | |
2259 | } | |
2260 | else if (q->low != NULL_TREE) | |
2261 | return -1; | |
2262 | /* If ->high is different, NULL high goes last, before that by | |
2263 | ascending high. */ | |
2264 | if (p->high != NULL_TREE) | |
2265 | { | |
2266 | if (q->high != NULL_TREE) | |
2267 | { | |
2268 | tree tem = fold_binary (LT_EXPR, boolean_type_node, | |
2269 | p->high, q->high); | |
2270 | if (tem && integer_onep (tem)) | |
2271 | return -1; | |
2272 | tem = fold_binary (GT_EXPR, boolean_type_node, | |
2273 | p->high, q->high); | |
2274 | if (tem && integer_onep (tem)) | |
2275 | return 1; | |
2276 | } | |
2277 | else | |
2278 | return -1; | |
2279 | } | |
77e60088 | 2280 | else if (q->high != NULL_TREE) |
0ccb5dbf JJ |
2281 | return 1; |
2282 | /* If both ranges are the same, sort below by ascending idx. */ | |
2283 | } | |
2284 | else | |
2285 | return 1; | |
2286 | } | |
2287 | else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) | |
2288 | return -1; | |
2289 | ||
2290 | if (p->idx < q->idx) | |
2291 | return -1; | |
2292 | else | |
2293 | { | |
2294 | gcc_checking_assert (p->idx > q->idx); | |
2295 | return 1; | |
2296 | } | |
2297 | } | |
2298 | ||
aebce396 JJ |
2299 | /* Helper function for update_range_test. Force EXPR into an SSA_NAME, |
2300 | insert needed statements BEFORE or after GSI. */ | |
2301 | ||
2302 | static tree | |
2303 | force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before) | |
2304 | { | |
2305 | enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING; | |
2306 | tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m); | |
2307 | if (TREE_CODE (ret) != SSA_NAME) | |
2308 | { | |
2309 | gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret); | |
2310 | if (before) | |
2311 | gsi_insert_before (gsi, g, GSI_SAME_STMT); | |
2312 | else | |
2313 | gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING); | |
2314 | ret = gimple_assign_lhs (g); | |
2315 | } | |
2316 | return ret; | |
2317 | } | |
2318 | ||
0ccb5dbf JJ |
2319 | /* Helper routine of optimize_range_test. |
2320 | [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for | |
2321 | RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges, | |
73049af5 JJ |
2322 | OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE |
2323 | is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to | |
2324 | an array of COUNT pointers to other ranges. Return | |
d578e863 JJ |
2325 | true if the range merge has been successful. |
2326 | If OPCODE is ERROR_MARK, this is called from within | |
2327 | maybe_optimize_range_tests and is performing inter-bb range optimization. | |
5e40da4f JJ |
2328 | In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in |
2329 | oe->rank. */ | |
0ccb5dbf JJ |
2330 | |
2331 | static bool | |
2332 | update_range_test (struct range_entry *range, struct range_entry *otherrange, | |
73049af5 | 2333 | struct range_entry **otherrangep, |
0ccb5dbf | 2334 | unsigned int count, enum tree_code opcode, |
526ceb68 | 2335 | vec<operand_entry *> *ops, tree exp, gimple_seq seq, |
73049af5 | 2336 | bool in_p, tree low, tree high, bool strict_overflow_p) |
0ccb5dbf | 2337 | { |
526ceb68 | 2338 | operand_entry *oe = (*ops)[range->idx]; |
d578e863 | 2339 | tree op = oe->op; |
3824a0a2 JJ |
2340 | gimple *stmt = op ? SSA_NAME_DEF_STMT (op) |
2341 | : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); | |
d578e863 JJ |
2342 | location_t loc = gimple_location (stmt); |
2343 | tree optype = op ? TREE_TYPE (op) : boolean_type_node; | |
73049af5 JJ |
2344 | tree tem = build_range_check (loc, optype, unshare_expr (exp), |
2345 | in_p, low, high); | |
0ccb5dbf JJ |
2346 | enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; |
2347 | gimple_stmt_iterator gsi; | |
3824a0a2 | 2348 | unsigned int i, uid; |
0ccb5dbf JJ |
2349 | |
2350 | if (tem == NULL_TREE) | |
2351 | return false; | |
2352 | ||
3824a0a2 JJ |
2353 | /* If op is default def SSA_NAME, there is no place to insert the |
2354 | new comparison. Give up, unless we can use OP itself as the | |
2355 | range test. */ | |
2356 | if (op && SSA_NAME_IS_DEFAULT_DEF (op)) | |
2357 | { | |
2358 | if (op == range->exp | |
2359 | && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype)) | |
2360 | || TREE_CODE (optype) == BOOLEAN_TYPE) | |
2361 | && (op == tem | |
2362 | || (TREE_CODE (tem) == EQ_EXPR | |
2363 | && TREE_OPERAND (tem, 0) == op | |
2364 | && integer_onep (TREE_OPERAND (tem, 1)))) | |
2365 | && opcode != BIT_IOR_EXPR | |
2366 | && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR)) | |
2367 | { | |
2368 | stmt = NULL; | |
2369 | tem = op; | |
2370 | } | |
2371 | else | |
2372 | return false; | |
2373 | } | |
2374 | ||
0ccb5dbf JJ |
2375 | if (strict_overflow_p && issue_strict_overflow_warning (wc)) |
2376 | warning_at (loc, OPT_Wstrict_overflow, | |
2377 | "assuming signed overflow does not occur " | |
2378 | "when simplifying range test"); | |
2379 | ||
2380 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2381 | { | |
2382 | struct range_entry *r; | |
2383 | fprintf (dump_file, "Optimizing range tests "); | |
ef6cb4c7 | 2384 | print_generic_expr (dump_file, range->exp); |
0ccb5dbf | 2385 | fprintf (dump_file, " %c[", range->in_p ? '+' : '-'); |
ef6cb4c7 | 2386 | print_generic_expr (dump_file, range->low); |
0ccb5dbf | 2387 | fprintf (dump_file, ", "); |
ef6cb4c7 | 2388 | print_generic_expr (dump_file, range->high); |
0ccb5dbf | 2389 | fprintf (dump_file, "]"); |
73049af5 | 2390 | for (i = 0; i < count; i++) |
0ccb5dbf | 2391 | { |
73049af5 JJ |
2392 | if (otherrange) |
2393 | r = otherrange + i; | |
2394 | else | |
2395 | r = otherrangep[i]; | |
caab3763 JJ |
2396 | if (r->exp |
2397 | && r->exp != range->exp | |
2398 | && TREE_CODE (r->exp) == SSA_NAME) | |
2399 | { | |
2400 | fprintf (dump_file, " and "); | |
2401 | print_generic_expr (dump_file, r->exp); | |
2402 | } | |
2403 | else | |
2404 | fprintf (dump_file, " and"); | |
2405 | fprintf (dump_file, " %c[", r->in_p ? '+' : '-'); | |
ef6cb4c7 | 2406 | print_generic_expr (dump_file, r->low); |
0ccb5dbf | 2407 | fprintf (dump_file, ", "); |
ef6cb4c7 | 2408 | print_generic_expr (dump_file, r->high); |
0ccb5dbf JJ |
2409 | fprintf (dump_file, "]"); |
2410 | } | |
2411 | fprintf (dump_file, "\n into "); | |
ef6cb4c7 | 2412 | print_generic_expr (dump_file, tem); |
0ccb5dbf JJ |
2413 | fprintf (dump_file, "\n"); |
2414 | } | |
2415 | ||
d578e863 JJ |
2416 | if (opcode == BIT_IOR_EXPR |
2417 | || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) | |
0ccb5dbf JJ |
2418 | tem = invert_truthvalue_loc (loc, tem); |
2419 | ||
d578e863 | 2420 | tem = fold_convert_loc (loc, optype, tem); |
3824a0a2 JJ |
2421 | if (stmt) |
2422 | { | |
2423 | gsi = gsi_for_stmt (stmt); | |
2424 | uid = gimple_uid (stmt); | |
2425 | } | |
2426 | else | |
2427 | { | |
2428 | gsi = gsi_none (); | |
2429 | uid = 0; | |
2430 | } | |
2431 | if (stmt == NULL) | |
2432 | gcc_checking_assert (tem == op); | |
5f07cbdb JJ |
2433 | /* In rare cases range->exp can be equal to lhs of stmt. |
2434 | In that case we have to insert after the stmt rather then before | |
952e216e | 2435 | it. If stmt is a PHI, insert it at the start of the basic block. */ |
3824a0a2 | 2436 | else if (op != range->exp) |
952e216e JJ |
2437 | { |
2438 | gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); | |
aebce396 | 2439 | tem = force_into_ssa_name (&gsi, tem, true); |
952e216e JJ |
2440 | gsi_prev (&gsi); |
2441 | } | |
2442 | else if (gimple_code (stmt) != GIMPLE_PHI) | |
73049af5 JJ |
2443 | { |
2444 | gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); | |
aebce396 | 2445 | tem = force_into_ssa_name (&gsi, tem, false); |
73049af5 | 2446 | } |
5f07cbdb JJ |
2447 | else |
2448 | { | |
952e216e JJ |
2449 | gsi = gsi_after_labels (gimple_bb (stmt)); |
2450 | if (!gsi_end_p (gsi)) | |
2451 | uid = gimple_uid (gsi_stmt (gsi)); | |
2452 | else | |
2453 | { | |
2454 | gsi = gsi_start_bb (gimple_bb (stmt)); | |
2455 | uid = 1; | |
2456 | while (!gsi_end_p (gsi)) | |
2457 | { | |
2458 | uid = gimple_uid (gsi_stmt (gsi)); | |
2459 | gsi_next (&gsi); | |
2460 | } | |
2461 | } | |
73049af5 | 2462 | gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); |
aebce396 | 2463 | tem = force_into_ssa_name (&gsi, tem, true); |
952e216e JJ |
2464 | if (gsi_end_p (gsi)) |
2465 | gsi = gsi_last_bb (gimple_bb (stmt)); | |
2466 | else | |
2467 | gsi_prev (&gsi); | |
5f07cbdb JJ |
2468 | } |
2469 | for (; !gsi_end_p (gsi); gsi_prev (&gsi)) | |
5e40da4f JJ |
2470 | if (gimple_uid (gsi_stmt (gsi))) |
2471 | break; | |
2472 | else | |
952e216e | 2473 | gimple_set_uid (gsi_stmt (gsi), uid); |
0ccb5dbf | 2474 | |
d578e863 | 2475 | oe->op = tem; |
0ccb5dbf JJ |
2476 | range->exp = exp; |
2477 | range->low = low; | |
2478 | range->high = high; | |
2479 | range->in_p = in_p; | |
2480 | range->strict_overflow_p = false; | |
2481 | ||
73049af5 | 2482 | for (i = 0; i < count; i++) |
0ccb5dbf | 2483 | { |
73049af5 JJ |
2484 | if (otherrange) |
2485 | range = otherrange + i; | |
2486 | else | |
2487 | range = otherrangep[i]; | |
9771b263 | 2488 | oe = (*ops)[range->idx]; |
d578e863 JJ |
2489 | /* Now change all the other range test immediate uses, so that |
2490 | those tests will be optimized away. */ | |
2491 | if (opcode == ERROR_MARK) | |
2492 | { | |
2493 | if (oe->op) | |
5e40da4f JJ |
2494 | oe->op = build_int_cst (TREE_TYPE (oe->op), |
2495 | oe->rank == BIT_IOR_EXPR ? 0 : 1); | |
d578e863 | 2496 | else |
5e40da4f JJ |
2497 | oe->op = (oe->rank == BIT_IOR_EXPR |
2498 | ? boolean_false_node : boolean_true_node); | |
d578e863 | 2499 | } |
5e40da4f JJ |
2500 | else |
2501 | oe->op = error_mark_node; | |
0ccb5dbf | 2502 | range->exp = NULL_TREE; |
a93cdc5c JJ |
2503 | range->low = NULL_TREE; |
2504 | range->high = NULL_TREE; | |
0ccb5dbf JJ |
2505 | } |
2506 | return true; | |
2507 | } | |
2508 | ||
b114bfb4 ZC |
2509 | /* Optimize X == CST1 || X == CST2 |
2510 | if popcount (CST1 ^ CST2) == 1 into | |
2511 | (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)). | |
2512 | Similarly for ranges. E.g. | |
2513 | X != 2 && X != 3 && X != 10 && X != 11 | |
2514 | will be transformed by the previous optimization into | |
2515 | !((X - 2U) <= 1U || (X - 10U) <= 1U) | |
2516 | and this loop can transform that into | |
2517 | !(((X & ~8) - 2U) <= 1U). */ | |
2518 | ||
2519 | static bool | |
2520 | optimize_range_tests_xor (enum tree_code opcode, tree type, | |
2521 | tree lowi, tree lowj, tree highi, tree highj, | |
526ceb68 | 2522 | vec<operand_entry *> *ops, |
b114bfb4 ZC |
2523 | struct range_entry *rangei, |
2524 | struct range_entry *rangej) | |
2525 | { | |
2526 | tree lowxor, highxor, tem, exp; | |
73049af5 JJ |
2527 | /* Check lowi ^ lowj == highi ^ highj and |
2528 | popcount (lowi ^ lowj) == 1. */ | |
b114bfb4 ZC |
2529 | lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj); |
2530 | if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST) | |
2531 | return false; | |
4eb4a256 | 2532 | if (!integer_pow2p (lowxor)) |
b114bfb4 ZC |
2533 | return false; |
2534 | highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); | |
2535 | if (!tree_int_cst_equal (lowxor, highxor)) | |
2536 | return false; | |
2537 | ||
4df6a906 JJ |
2538 | exp = rangei->exp; |
2539 | scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type)); | |
2540 | int prec = GET_MODE_PRECISION (mode); | |
2541 | if (TYPE_PRECISION (type) < prec | |
2542 | || (wi::to_wide (TYPE_MIN_VALUE (type)) | |
2543 | != wi::min_value (prec, TYPE_SIGN (type))) | |
2544 | || (wi::to_wide (TYPE_MAX_VALUE (type)) | |
2545 | != wi::max_value (prec, TYPE_SIGN (type)))) | |
2546 | { | |
2547 | type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type)); | |
2548 | exp = fold_convert (type, exp); | |
2549 | lowxor = fold_convert (type, lowxor); | |
2550 | lowi = fold_convert (type, lowi); | |
2551 | highi = fold_convert (type, highi); | |
2552 | } | |
b114bfb4 | 2553 | tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); |
4df6a906 | 2554 | exp = fold_build2 (BIT_AND_EXPR, type, exp, tem); |
b114bfb4 ZC |
2555 | lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); |
2556 | highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); | |
73049af5 JJ |
2557 | if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp, |
2558 | NULL, rangei->in_p, lowj, highj, | |
b114bfb4 ZC |
2559 | rangei->strict_overflow_p |
2560 | || rangej->strict_overflow_p)) | |
2561 | return true; | |
2562 | return false; | |
2563 | } | |
2564 | ||
2565 | /* Optimize X == CST1 || X == CST2 | |
2566 | if popcount (CST2 - CST1) == 1 into | |
2567 | ((X - CST1) & ~(CST2 - CST1)) == 0. | |
2568 | Similarly for ranges. E.g. | |
2569 | X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46 | |
2570 | || X == 75 || X == 45 | |
2571 | will be transformed by the previous optimization into | |
2572 | (X - 43U) <= 3U || (X - 75U) <= 3U | |
2573 | and this loop can transform that into | |
2574 | ((X - 43U) & ~(75U - 43U)) <= 3U. */ | |
2575 | static bool | |
2576 | optimize_range_tests_diff (enum tree_code opcode, tree type, | |
a93cdc5c JJ |
2577 | tree lowi, tree lowj, tree highi, tree highj, |
2578 | vec<operand_entry *> *ops, | |
2579 | struct range_entry *rangei, | |
2580 | struct range_entry *rangej) | |
b114bfb4 ZC |
2581 | { |
2582 | tree tem1, tem2, mask; | |
2583 | /* Check highi - lowi == highj - lowj. */ | |
2584 | tem1 = fold_binary (MINUS_EXPR, type, highi, lowi); | |
2585 | if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) | |
2586 | return false; | |
2587 | tem2 = fold_binary (MINUS_EXPR, type, highj, lowj); | |
2588 | if (!tree_int_cst_equal (tem1, tem2)) | |
2589 | return false; | |
2590 | /* Check popcount (lowj - lowi) == 1. */ | |
2591 | tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi); | |
2592 | if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) | |
2593 | return false; | |
4eb4a256 | 2594 | if (!integer_pow2p (tem1)) |
b114bfb4 ZC |
2595 | return false; |
2596 | ||
4df6a906 JJ |
2597 | scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type)); |
2598 | int prec = GET_MODE_PRECISION (mode); | |
2599 | if (TYPE_PRECISION (type) < prec | |
2600 | || (wi::to_wide (TYPE_MIN_VALUE (type)) | |
2601 | != wi::min_value (prec, TYPE_SIGN (type))) | |
2602 | || (wi::to_wide (TYPE_MAX_VALUE (type)) | |
2603 | != wi::max_value (prec, TYPE_SIGN (type)))) | |
2604 | type = build_nonstandard_integer_type (prec, 1); | |
2605 | else | |
2606 | type = unsigned_type_for (type); | |
db247aed JJ |
2607 | tem1 = fold_convert (type, tem1); |
2608 | tem2 = fold_convert (type, tem2); | |
2609 | lowi = fold_convert (type, lowi); | |
b114bfb4 | 2610 | mask = fold_build1 (BIT_NOT_EXPR, type, tem1); |
56a6d474 | 2611 | tem1 = fold_build2 (MINUS_EXPR, type, |
db247aed | 2612 | fold_convert (type, rangei->exp), lowi); |
b114bfb4 ZC |
2613 | tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask); |
2614 | lowj = build_int_cst (type, 0); | |
73049af5 JJ |
2615 | if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1, |
2616 | NULL, rangei->in_p, lowj, tem2, | |
b114bfb4 ZC |
2617 | rangei->strict_overflow_p |
2618 | || rangej->strict_overflow_p)) | |
2619 | return true; | |
2620 | return false; | |
2621 | } | |
2622 | ||
2623 | /* It does some common checks for function optimize_range_tests_xor and | |
2624 | optimize_range_tests_diff. | |
2625 | If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor. | |
2626 | Else it calls optimize_range_tests_diff. */ | |
2627 | ||
2628 | static bool | |
2629 | optimize_range_tests_1 (enum tree_code opcode, int first, int length, | |
526ceb68 | 2630 | bool optimize_xor, vec<operand_entry *> *ops, |
b114bfb4 ZC |
2631 | struct range_entry *ranges) |
2632 | { | |
2633 | int i, j; | |
2634 | bool any_changes = false; | |
2635 | for (i = first; i < length; i++) | |
2636 | { | |
2637 | tree lowi, highi, lowj, highj, type, tem; | |
2638 | ||
2639 | if (ranges[i].exp == NULL_TREE || ranges[i].in_p) | |
2640 | continue; | |
2641 | type = TREE_TYPE (ranges[i].exp); | |
2642 | if (!INTEGRAL_TYPE_P (type)) | |
2643 | continue; | |
2644 | lowi = ranges[i].low; | |
2645 | if (lowi == NULL_TREE) | |
2646 | lowi = TYPE_MIN_VALUE (type); | |
2647 | highi = ranges[i].high; | |
2648 | if (highi == NULL_TREE) | |
2649 | continue; | |
2650 | for (j = i + 1; j < length && j < i + 64; j++) | |
2651 | { | |
2652 | bool changes; | |
2653 | if (ranges[i].exp != ranges[j].exp || ranges[j].in_p) | |
2654 | continue; | |
2655 | lowj = ranges[j].low; | |
2656 | if (lowj == NULL_TREE) | |
2657 | continue; | |
2658 | highj = ranges[j].high; | |
2659 | if (highj == NULL_TREE) | |
2660 | highj = TYPE_MAX_VALUE (type); | |
2661 | /* Check lowj > highi. */ | |
2662 | tem = fold_binary (GT_EXPR, boolean_type_node, | |
2663 | lowj, highi); | |
2664 | if (tem == NULL_TREE || !integer_onep (tem)) | |
2665 | continue; | |
2666 | if (optimize_xor) | |
2667 | changes = optimize_range_tests_xor (opcode, type, lowi, lowj, | |
2668 | highi, highj, ops, | |
2669 | ranges + i, ranges + j); | |
2670 | else | |
2671 | changes = optimize_range_tests_diff (opcode, type, lowi, lowj, | |
2672 | highi, highj, ops, | |
2673 | ranges + i, ranges + j); | |
2674 | if (changes) | |
2675 | { | |
2676 | any_changes = true; | |
2677 | break; | |
2678 | } | |
2679 | } | |
2680 | } | |
2681 | return any_changes; | |
2682 | } | |
2683 | ||
73049af5 JJ |
2684 | /* Helper function of optimize_range_tests_to_bit_test. Handle a single |
2685 | range, EXP, LOW, HIGH, compute bit mask of bits to test and return | |
2686 | EXP on success, NULL otherwise. */ | |
2687 | ||
2688 | static tree | |
2689 | extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high, | |
2690 | wide_int *mask, tree *totallowp) | |
2691 | { | |
2692 | tree tem = int_const_binop (MINUS_EXPR, high, low); | |
2693 | if (tem == NULL_TREE | |
2694 | || TREE_CODE (tem) != INTEGER_CST | |
2695 | || TREE_OVERFLOW (tem) | |
2696 | || tree_int_cst_sgn (tem) == -1 | |
2697 | || compare_tree_int (tem, prec) != -1) | |
2698 | return NULL_TREE; | |
2699 | ||
2700 | unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1; | |
2701 | *mask = wi::shifted_mask (0, max, false, prec); | |
2702 | if (TREE_CODE (exp) == BIT_AND_EXPR | |
2703 | && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) | |
2704 | { | |
2705 | widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1)); | |
2706 | msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp))); | |
2707 | if (wi::popcount (msk) == 1 | |
2708 | && wi::ltu_p (msk, prec - max)) | |
2709 | { | |
2710 | *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec); | |
2711 | max += msk.to_uhwi (); | |
2712 | exp = TREE_OPERAND (exp, 0); | |
2713 | if (integer_zerop (low) | |
2714 | && TREE_CODE (exp) == PLUS_EXPR | |
2715 | && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) | |
2716 | { | |
c2f41ffd JJ |
2717 | tree ret = TREE_OPERAND (exp, 0); |
2718 | STRIP_NOPS (ret); | |
73049af5 JJ |
2719 | widest_int bias |
2720 | = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)), | |
2721 | TYPE_PRECISION (TREE_TYPE (low)))); | |
c2f41ffd | 2722 | tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias); |
73049af5 JJ |
2723 | if (totallowp) |
2724 | { | |
2725 | *totallowp = tbias; | |
c2f41ffd | 2726 | return ret; |
73049af5 JJ |
2727 | } |
2728 | else if (!tree_int_cst_lt (totallow, tbias)) | |
2729 | return NULL_TREE; | |
c2f41ffd | 2730 | bias = wi::to_widest (tbias); |
73049af5 | 2731 | bias -= wi::to_widest (totallow); |
032c80e9 | 2732 | if (bias >= 0 && bias < prec - max) |
73049af5 JJ |
2733 | { |
2734 | *mask = wi::lshift (*mask, bias); | |
c2f41ffd | 2735 | return ret; |
73049af5 JJ |
2736 | } |
2737 | } | |
2738 | } | |
2739 | } | |
2740 | if (totallowp) | |
2741 | return exp; | |
2742 | if (!tree_int_cst_lt (totallow, low)) | |
2743 | return exp; | |
2744 | tem = int_const_binop (MINUS_EXPR, low, totallow); | |
2745 | if (tem == NULL_TREE | |
2746 | || TREE_CODE (tem) != INTEGER_CST | |
2747 | || TREE_OVERFLOW (tem) | |
2748 | || compare_tree_int (tem, prec - max) == 1) | |
2749 | return NULL_TREE; | |
2750 | ||
2751 | *mask = wi::lshift (*mask, wi::to_widest (tem)); | |
2752 | return exp; | |
2753 | } | |
2754 | ||
2755 | /* Attempt to optimize small range tests using bit test. | |
2756 | E.g. | |
2757 | X != 43 && X != 76 && X != 44 && X != 78 && X != 49 | |
2758 | && X != 77 && X != 46 && X != 75 && X != 45 && X != 82 | |
2759 | has been by earlier optimizations optimized into: | |
2760 | ((X - 43U) & ~32U) > 3U && X != 49 && X != 82 | |
2761 | As all the 43 through 82 range is less than 64 numbers, | |
2762 | for 64-bit word targets optimize that into: | |
2763 | (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */ | |
2764 | ||
2765 | static bool | |
2766 | optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length, | |
526ceb68 | 2767 | vec<operand_entry *> *ops, |
73049af5 JJ |
2768 | struct range_entry *ranges) |
2769 | { | |
2770 | int i, j; | |
2771 | bool any_changes = false; | |
2772 | int prec = GET_MODE_BITSIZE (word_mode); | |
2773 | auto_vec<struct range_entry *, 64> candidates; | |
2774 | ||
2775 | for (i = first; i < length - 2; i++) | |
2776 | { | |
2777 | tree lowi, highi, lowj, highj, type; | |
2778 | ||
2779 | if (ranges[i].exp == NULL_TREE || ranges[i].in_p) | |
2780 | continue; | |
2781 | type = TREE_TYPE (ranges[i].exp); | |
2782 | if (!INTEGRAL_TYPE_P (type)) | |
2783 | continue; | |
2784 | lowi = ranges[i].low; | |
2785 | if (lowi == NULL_TREE) | |
2786 | lowi = TYPE_MIN_VALUE (type); | |
2787 | highi = ranges[i].high; | |
2788 | if (highi == NULL_TREE) | |
2789 | continue; | |
2790 | wide_int mask; | |
2791 | tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi, | |
2792 | highi, &mask, &lowi); | |
2793 | if (exp == NULL_TREE) | |
2794 | continue; | |
2795 | bool strict_overflow_p = ranges[i].strict_overflow_p; | |
2796 | candidates.truncate (0); | |
2797 | int end = MIN (i + 64, length); | |
2798 | for (j = i + 1; j < end; j++) | |
2799 | { | |
2800 | tree exp2; | |
2801 | if (ranges[j].exp == NULL_TREE || ranges[j].in_p) | |
2802 | continue; | |
2803 | if (ranges[j].exp == exp) | |
2804 | ; | |
2805 | else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR) | |
2806 | { | |
2807 | exp2 = TREE_OPERAND (ranges[j].exp, 0); | |
2808 | if (exp2 == exp) | |
2809 | ; | |
2810 | else if (TREE_CODE (exp2) == PLUS_EXPR) | |
2811 | { | |
2812 | exp2 = TREE_OPERAND (exp2, 0); | |
2813 | STRIP_NOPS (exp2); | |
2814 | if (exp2 != exp) | |
2815 | continue; | |
2816 | } | |
2817 | else | |
2818 | continue; | |
2819 | } | |
2820 | else | |
2821 | continue; | |
2822 | lowj = ranges[j].low; | |
2823 | if (lowj == NULL_TREE) | |
2824 | continue; | |
2825 | highj = ranges[j].high; | |
2826 | if (highj == NULL_TREE) | |
2827 | highj = TYPE_MAX_VALUE (type); | |
2828 | wide_int mask2; | |
2829 | exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj, | |
2830 | highj, &mask2, NULL); | |
2831 | if (exp2 != exp) | |
2832 | continue; | |
2833 | mask |= mask2; | |
2834 | strict_overflow_p |= ranges[j].strict_overflow_p; | |
2835 | candidates.safe_push (&ranges[j]); | |
2836 | } | |
2837 | ||
2838 | /* If we need otherwise 3 or more comparisons, use a bit test. */ | |
2839 | if (candidates.length () >= 2) | |
2840 | { | |
2841 | tree high = wide_int_to_tree (TREE_TYPE (lowi), | |
2842 | wi::to_widest (lowi) | |
46a54708 | 2843 | + prec - 1 - wi::clz (mask)); |
526ceb68 | 2844 | operand_entry *oe = (*ops)[ranges[i].idx]; |
73049af5 | 2845 | tree op = oe->op; |
355fe088 | 2846 | gimple *stmt = op ? SSA_NAME_DEF_STMT (op) |
3824a0a2 | 2847 | : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); |
73049af5 JJ |
2848 | location_t loc = gimple_location (stmt); |
2849 | tree optype = op ? TREE_TYPE (op) : boolean_type_node; | |
2850 | ||
2851 | /* See if it isn't cheaper to pretend the minimum value of the | |
2852 | range is 0, if maximum value is small enough. | |
2853 | We can avoid then subtraction of the minimum value, but the | |
2854 | mask constant could be perhaps more expensive. */ | |
2855 | if (compare_tree_int (lowi, 0) > 0 | |
2856 | && compare_tree_int (high, prec) < 0) | |
2857 | { | |
2858 | int cost_diff; | |
2859 | HOST_WIDE_INT m = tree_to_uhwi (lowi); | |
2860 | rtx reg = gen_raw_REG (word_mode, 10000); | |
2861 | bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt)); | |
2862 | cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg, | |
2863 | GEN_INT (-m)), speed_p); | |
2864 | rtx r = immed_wide_int_const (mask, word_mode); | |
2865 | cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r), | |
e548c9df | 2866 | word_mode, speed_p); |
73049af5 JJ |
2867 | r = immed_wide_int_const (wi::lshift (mask, m), word_mode); |
2868 | cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r), | |
e548c9df | 2869 | word_mode, speed_p); |
73049af5 JJ |
2870 | if (cost_diff > 0) |
2871 | { | |
2872 | mask = wi::lshift (mask, m); | |
2873 | lowi = build_zero_cst (TREE_TYPE (lowi)); | |
2874 | } | |
2875 | } | |
2876 | ||
2877 | tree tem = build_range_check (loc, optype, unshare_expr (exp), | |
2878 | false, lowi, high); | |
2879 | if (tem == NULL_TREE || is_gimple_val (tem)) | |
2880 | continue; | |
2881 | tree etype = unsigned_type_for (TREE_TYPE (exp)); | |
2882 | exp = fold_build2_loc (loc, MINUS_EXPR, etype, | |
2883 | fold_convert_loc (loc, etype, exp), | |
2884 | fold_convert_loc (loc, etype, lowi)); | |
2885 | exp = fold_convert_loc (loc, integer_type_node, exp); | |
2886 | tree word_type = lang_hooks.types.type_for_mode (word_mode, 1); | |
2887 | exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type, | |
2888 | build_int_cst (word_type, 1), exp); | |
2889 | exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp, | |
2890 | wide_int_to_tree (word_type, mask)); | |
2891 | exp = fold_build2_loc (loc, EQ_EXPR, optype, exp, | |
2892 | build_zero_cst (word_type)); | |
2893 | if (is_gimple_val (exp)) | |
2894 | continue; | |
2895 | ||
2896 | /* The shift might have undefined behavior if TEM is true, | |
2897 | but reassociate_bb isn't prepared to have basic blocks | |
2898 | split when it is running. So, temporarily emit a code | |
2899 | with BIT_IOR_EXPR instead of &&, and fix it up in | |
2900 | branch_fixup. */ | |
2901 | gimple_seq seq; | |
2902 | tem = force_gimple_operand (tem, &seq, true, NULL_TREE); | |
2903 | gcc_assert (TREE_CODE (tem) == SSA_NAME); | |
2904 | gimple_set_visited (SSA_NAME_DEF_STMT (tem), true); | |
2905 | gimple_seq seq2; | |
2906 | exp = force_gimple_operand (exp, &seq2, true, NULL_TREE); | |
2907 | gimple_seq_add_seq_without_update (&seq, seq2); | |
2908 | gcc_assert (TREE_CODE (exp) == SSA_NAME); | |
2909 | gimple_set_visited (SSA_NAME_DEF_STMT (exp), true); | |
355fe088 | 2910 | gimple *g = gimple_build_assign (make_ssa_name (optype), |
3824a0a2 | 2911 | BIT_IOR_EXPR, tem, exp); |
73049af5 JJ |
2912 | gimple_set_location (g, loc); |
2913 | gimple_seq_add_stmt_without_update (&seq, g); | |
2914 | exp = gimple_assign_lhs (g); | |
2915 | tree val = build_zero_cst (optype); | |
2916 | if (update_range_test (&ranges[i], NULL, candidates.address (), | |
2917 | candidates.length (), opcode, ops, exp, | |
2918 | seq, false, val, val, strict_overflow_p)) | |
2919 | { | |
2920 | any_changes = true; | |
2921 | reassoc_branch_fixups.safe_push (tem); | |
2922 | } | |
2923 | else | |
2924 | gimple_seq_discard (seq); | |
2925 | } | |
2926 | } | |
2927 | return any_changes; | |
2928 | } | |
2929 | ||
caab3763 JJ |
2930 | /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0 |
2931 | and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */ | |
2932 | ||
2933 | static bool | |
2934 | optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, | |
2935 | vec<operand_entry *> *ops, | |
2936 | struct range_entry *ranges) | |
2937 | { | |
2938 | int i; | |
2939 | unsigned int b; | |
2940 | bool any_changes = false; | |
2941 | auto_vec<int, 128> buckets; | |
2942 | auto_vec<int, 32> chains; | |
2943 | auto_vec<struct range_entry *, 32> candidates; | |
2944 | ||
2945 | for (i = first; i < length; i++) | |
2946 | { | |
2947 | if (ranges[i].exp == NULL_TREE | |
2948 | || TREE_CODE (ranges[i].exp) != SSA_NAME | |
2949 | || !ranges[i].in_p | |
2950 | || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1 | |
2951 | || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE | |
2952 | || ranges[i].low == NULL_TREE | |
2953 | || ranges[i].low != ranges[i].high) | |
2954 | continue; | |
2955 | ||
2956 | bool zero_p = integer_zerop (ranges[i].low); | |
2957 | if (!zero_p && !integer_all_onesp (ranges[i].low)) | |
2958 | continue; | |
2959 | ||
2960 | b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p; | |
2961 | if (buckets.length () <= b) | |
2962 | buckets.safe_grow_cleared (b + 1); | |
2963 | if (chains.length () <= (unsigned) i) | |
2964 | chains.safe_grow (i + 1); | |
2965 | chains[i] = buckets[b]; | |
2966 | buckets[b] = i + 1; | |
2967 | } | |
2968 | ||
2969 | FOR_EACH_VEC_ELT (buckets, b, i) | |
2970 | if (i && chains[i - 1]) | |
2971 | { | |
2972 | int j, k = i; | |
2973 | for (j = chains[i - 1]; j; j = chains[j - 1]) | |
2974 | { | |
2975 | gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp); | |
2976 | gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp); | |
2977 | if (reassoc_stmt_dominates_stmt_p (gk, gj)) | |
2978 | k = j; | |
2979 | } | |
2980 | tree type1 = TREE_TYPE (ranges[k - 1].exp); | |
2981 | tree type2 = NULL_TREE; | |
2982 | bool strict_overflow_p = false; | |
2983 | candidates.truncate (0); | |
2984 | for (j = i; j; j = chains[j - 1]) | |
2985 | { | |
2986 | tree type = TREE_TYPE (ranges[j - 1].exp); | |
2987 | strict_overflow_p |= ranges[j - 1].strict_overflow_p; | |
2988 | if (j == k | |
2989 | || useless_type_conversion_p (type1, type)) | |
2990 | ; | |
2991 | else if (type2 == NULL_TREE | |
2992 | || useless_type_conversion_p (type2, type)) | |
2993 | { | |
2994 | if (type2 == NULL_TREE) | |
2995 | type2 = type; | |
2996 | candidates.safe_push (&ranges[j - 1]); | |
2997 | } | |
2998 | } | |
2999 | unsigned l = candidates.length (); | |
3000 | for (j = i; j; j = chains[j - 1]) | |
3001 | { | |
3002 | tree type = TREE_TYPE (ranges[j - 1].exp); | |
3003 | if (j == k) | |
3004 | continue; | |
3005 | if (useless_type_conversion_p (type1, type)) | |
3006 | ; | |
3007 | else if (type2 == NULL_TREE | |
3008 | || useless_type_conversion_p (type2, type)) | |
3009 | continue; | |
3010 | candidates.safe_push (&ranges[j - 1]); | |
3011 | } | |
3012 | gimple_seq seq = NULL; | |
3013 | tree op = NULL_TREE; | |
3014 | unsigned int id; | |
3015 | struct range_entry *r; | |
3016 | candidates.safe_push (&ranges[k - 1]); | |
3017 | FOR_EACH_VEC_ELT (candidates, id, r) | |
3018 | { | |
3019 | gimple *g; | |
3020 | if (id == 0) | |
3021 | { | |
3022 | op = r->exp; | |
3023 | continue; | |
3024 | } | |
3025 | if (id == l) | |
3026 | { | |
3027 | g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op); | |
3028 | gimple_seq_add_stmt_without_update (&seq, g); | |
3029 | op = gimple_assign_lhs (g); | |
3030 | } | |
3031 | tree type = TREE_TYPE (r->exp); | |
3032 | tree exp = r->exp; | |
3033 | if (id >= l && !useless_type_conversion_p (type1, type)) | |
3034 | { | |
3035 | g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp); | |
3036 | gimple_seq_add_stmt_without_update (&seq, g); | |
3037 | exp = gimple_assign_lhs (g); | |
3038 | } | |
3039 | g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2), | |
3040 | (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR, | |
3041 | op, exp); | |
3042 | gimple_seq_add_stmt_without_update (&seq, g); | |
3043 | op = gimple_assign_lhs (g); | |
3044 | } | |
3045 | candidates.pop (); | |
3046 | if (update_range_test (&ranges[k - 1], NULL, candidates.address (), | |
3047 | candidates.length (), opcode, ops, op, | |
3048 | seq, true, ranges[k - 1].low, | |
3049 | ranges[k - 1].low, strict_overflow_p)) | |
3050 | any_changes = true; | |
3051 | else | |
3052 | gimple_seq_discard (seq); | |
3053 | } | |
3054 | ||
3055 | return any_changes; | |
3056 | } | |
3057 | ||
a93cdc5c JJ |
3058 | /* Attempt to optimize for signed a and b where b is known to be >= 0: |
3059 | a >= 0 && a < b into (unsigned) a < (unsigned) b | |
3060 | a >= 0 && a <= b into (unsigned) a <= (unsigned) b */ | |
3061 | ||
3062 | static bool | |
3063 | optimize_range_tests_var_bound (enum tree_code opcode, int first, int length, | |
3064 | vec<operand_entry *> *ops, | |
92007ba6 JJ |
3065 | struct range_entry *ranges, |
3066 | basic_block first_bb) | |
a93cdc5c JJ |
3067 | { |
3068 | int i; | |
3069 | bool any_changes = false; | |
3070 | hash_map<tree, int> *map = NULL; | |
3071 | ||
3072 | for (i = first; i < length; i++) | |
3073 | { | |
4dfb8a2a JJ |
3074 | if (ranges[i].exp == NULL_TREE |
3075 | || TREE_CODE (ranges[i].exp) != SSA_NAME | |
3076 | || !ranges[i].in_p) | |
a93cdc5c JJ |
3077 | continue; |
3078 | ||
3079 | tree type = TREE_TYPE (ranges[i].exp); | |
3080 | if (!INTEGRAL_TYPE_P (type) | |
3081 | || TYPE_UNSIGNED (type) | |
3082 | || ranges[i].low == NULL_TREE | |
3083 | || !integer_zerop (ranges[i].low) | |
3084 | || ranges[i].high != NULL_TREE) | |
3085 | continue; | |
3086 | /* EXP >= 0 here. */ | |
3087 | if (map == NULL) | |
3088 | map = new hash_map <tree, int>; | |
3089 | map->put (ranges[i].exp, i); | |
3090 | } | |
3091 | ||
3092 | if (map == NULL) | |
3093 | return false; | |
3094 | ||
3095 | for (i = 0; i < length; i++) | |
3096 | { | |
1f9be505 | 3097 | bool in_p = ranges[i].in_p; |
a93cdc5c | 3098 | if (ranges[i].low == NULL_TREE |
1f9be505 | 3099 | || ranges[i].high == NULL_TREE) |
a93cdc5c | 3100 | continue; |
1f9be505 JJ |
3101 | if (!integer_zerop (ranges[i].low) |
3102 | || !integer_zerop (ranges[i].high)) | |
3103 | { | |
3104 | if (ranges[i].exp | |
3105 | && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1 | |
3106 | && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp)) | |
3107 | && integer_onep (ranges[i].low) | |
3108 | && integer_onep (ranges[i].high)) | |
3109 | in_p = !in_p; | |
3110 | else | |
3111 | continue; | |
3112 | } | |
a93cdc5c JJ |
3113 | |
3114 | gimple *stmt; | |
3115 | tree_code ccode; | |
3116 | tree rhs1, rhs2; | |
3117 | if (ranges[i].exp) | |
3118 | { | |
4dfb8a2a JJ |
3119 | if (TREE_CODE (ranges[i].exp) != SSA_NAME) |
3120 | continue; | |
a93cdc5c JJ |
3121 | stmt = SSA_NAME_DEF_STMT (ranges[i].exp); |
3122 | if (!is_gimple_assign (stmt)) | |
3123 | continue; | |
3124 | ccode = gimple_assign_rhs_code (stmt); | |
3125 | rhs1 = gimple_assign_rhs1 (stmt); | |
3126 | rhs2 = gimple_assign_rhs2 (stmt); | |
3127 | } | |
3128 | else | |
3129 | { | |
3130 | operand_entry *oe = (*ops)[ranges[i].idx]; | |
3131 | stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); | |
3132 | if (gimple_code (stmt) != GIMPLE_COND) | |
3133 | continue; | |
3134 | ccode = gimple_cond_code (stmt); | |
3135 | rhs1 = gimple_cond_lhs (stmt); | |
3136 | rhs2 = gimple_cond_rhs (stmt); | |
3137 | } | |
3138 | ||
3139 | if (TREE_CODE (rhs1) != SSA_NAME | |
3140 | || rhs2 == NULL_TREE | |
3141 | || TREE_CODE (rhs2) != SSA_NAME) | |
3142 | continue; | |
3143 | ||
3144 | switch (ccode) | |
3145 | { | |
3146 | case GT_EXPR: | |
3147 | case GE_EXPR: | |
da98e3b1 JJ |
3148 | case LT_EXPR: |
3149 | case LE_EXPR: | |
3150 | break; | |
3151 | default: | |
3152 | continue; | |
3153 | } | |
1f9be505 | 3154 | if (in_p) |
da98e3b1 JJ |
3155 | ccode = invert_tree_comparison (ccode, false); |
3156 | switch (ccode) | |
3157 | { | |
3158 | case GT_EXPR: | |
3159 | case GE_EXPR: | |
3160 | std::swap (rhs1, rhs2); | |
a93cdc5c JJ |
3161 | ccode = swap_tree_comparison (ccode); |
3162 | break; | |
3163 | case LT_EXPR: | |
3164 | case LE_EXPR: | |
a93cdc5c JJ |
3165 | break; |
3166 | default: | |
da98e3b1 | 3167 | gcc_unreachable (); |
a93cdc5c JJ |
3168 | } |
3169 | ||
3170 | int *idx = map->get (rhs1); | |
3171 | if (idx == NULL) | |
3172 | continue; | |
3173 | ||
92007ba6 JJ |
3174 | /* maybe_optimize_range_tests allows statements without side-effects |
3175 | in the basic blocks as long as they are consumed in the same bb. | |
3176 | Make sure rhs2's def stmt is not among them, otherwise we can't | |
3177 | use safely get_nonzero_bits on it. E.g. in: | |
3178 | # RANGE [-83, 1] NONZERO 173 | |
3179 | # k_32 = PHI <k_47(13), k_12(9)> | |
3180 | ... | |
3181 | if (k_32 >= 0) | |
3182 | goto <bb 5>; [26.46%] | |
3183 | else | |
3184 | goto <bb 9>; [73.54%] | |
3185 | ||
3186 | <bb 5> [local count: 140323371]: | |
3187 | # RANGE [0, 1] NONZERO 1 | |
3188 | _5 = (int) k_32; | |
3189 | # RANGE [0, 4] NONZERO 4 | |
3190 | _21 = _5 << 2; | |
3191 | # RANGE [0, 4] NONZERO 4 | |
3192 | iftmp.0_44 = (char) _21; | |
3193 | if (k_32 < iftmp.0_44) | |
3194 | goto <bb 6>; [84.48%] | |
3195 | else | |
3196 | goto <bb 9>; [15.52%] | |
3197 | the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that | |
3198 | k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44 | |
3199 | to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute | |
3200 | those stmts even for negative k_32 and the value ranges would be no | |
3201 | longer guaranteed and so the optimization would be invalid. */ | |
ca6b7410 | 3202 | while (opcode == ERROR_MARK) |
92007ba6 JJ |
3203 | { |
3204 | gimple *g = SSA_NAME_DEF_STMT (rhs2); | |
3205 | basic_block bb2 = gimple_bb (g); | |
3206 | if (bb2 | |
3207 | && bb2 != first_bb | |
3208 | && dominated_by_p (CDI_DOMINATORS, bb2, first_bb)) | |
3209 | { | |
3210 | /* As an exception, handle a few common cases. */ | |
3211 | if (gimple_assign_cast_p (g) | |
ca6b7410 JJ |
3212 | && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g)))) |
3213 | { | |
3214 | tree op0 = gimple_assign_rhs1 (g); | |
3215 | if (TYPE_UNSIGNED (TREE_TYPE (op0)) | |
3216 | && (TYPE_PRECISION (TREE_TYPE (rhs2)) | |
3217 | > TYPE_PRECISION (TREE_TYPE (op0)))) | |
3218 | /* Zero-extension is always ok. */ | |
3219 | break; | |
3220 | else if (TYPE_PRECISION (TREE_TYPE (rhs2)) | |
3221 | == TYPE_PRECISION (TREE_TYPE (op0)) | |
3222 | && TREE_CODE (op0) == SSA_NAME) | |
3223 | { | |
3224 | /* Cast from signed to unsigned or vice versa. Retry | |
3225 | with the op0 as new rhs2. */ | |
3226 | rhs2 = op0; | |
3227 | continue; | |
3228 | } | |
3229 | } | |
92007ba6 JJ |
3230 | else if (is_gimple_assign (g) |
3231 | && gimple_assign_rhs_code (g) == BIT_AND_EXPR | |
3232 | && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST | |
3233 | && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g)))) | |
3234 | /* Masking with INTEGER_CST with MSB clear is always ok | |
ca6b7410 JJ |
3235 | too. */ |
3236 | break; | |
3237 | rhs2 = NULL_TREE; | |
92007ba6 | 3238 | } |
ca6b7410 | 3239 | break; |
92007ba6 | 3240 | } |
ca6b7410 JJ |
3241 | if (rhs2 == NULL_TREE) |
3242 | continue; | |
92007ba6 | 3243 | |
a93cdc5c JJ |
3244 | wide_int nz = get_nonzero_bits (rhs2); |
3245 | if (wi::neg_p (nz)) | |
3246 | continue; | |
3247 | ||
3248 | /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0 | |
3249 | and RHS2 is known to be RHS2 >= 0. */ | |
3250 | tree utype = unsigned_type_for (TREE_TYPE (rhs1)); | |
3251 | ||
3252 | enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; | |
3253 | if ((ranges[*idx].strict_overflow_p | |
3254 | || ranges[i].strict_overflow_p) | |
3255 | && issue_strict_overflow_warning (wc)) | |
3256 | warning_at (gimple_location (stmt), OPT_Wstrict_overflow, | |
3257 | "assuming signed overflow does not occur " | |
3258 | "when simplifying range test"); | |
3259 | ||
3260 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3261 | { | |
3262 | struct range_entry *r = &ranges[*idx]; | |
3263 | fprintf (dump_file, "Optimizing range test "); | |
ef6cb4c7 | 3264 | print_generic_expr (dump_file, r->exp); |
a93cdc5c | 3265 | fprintf (dump_file, " +["); |
ef6cb4c7 | 3266 | print_generic_expr (dump_file, r->low); |
a93cdc5c | 3267 | fprintf (dump_file, ", "); |
ef6cb4c7 | 3268 | print_generic_expr (dump_file, r->high); |
a93cdc5c | 3269 | fprintf (dump_file, "] and comparison "); |
ef6cb4c7 | 3270 | print_generic_expr (dump_file, rhs1); |
a93cdc5c | 3271 | fprintf (dump_file, " %s ", op_symbol_code (ccode)); |
ef6cb4c7 | 3272 | print_generic_expr (dump_file, rhs2); |
a93cdc5c | 3273 | fprintf (dump_file, "\n into ("); |
ef6cb4c7 | 3274 | print_generic_expr (dump_file, utype); |
a93cdc5c | 3275 | fprintf (dump_file, ") "); |
ef6cb4c7 | 3276 | print_generic_expr (dump_file, rhs1); |
a93cdc5c | 3277 | fprintf (dump_file, " %s (", op_symbol_code (ccode)); |
ef6cb4c7 | 3278 | print_generic_expr (dump_file, utype); |
a93cdc5c | 3279 | fprintf (dump_file, ") "); |
ef6cb4c7 | 3280 | print_generic_expr (dump_file, rhs2); |
a93cdc5c JJ |
3281 | fprintf (dump_file, "\n"); |
3282 | } | |
3283 | ||
da98e3b1 JJ |
3284 | operand_entry *oe = (*ops)[ranges[i].idx]; |
3285 | ranges[i].in_p = 0; | |
3286 | if (opcode == BIT_IOR_EXPR | |
3287 | || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) | |
3288 | { | |
3289 | ranges[i].in_p = 1; | |
3290 | ccode = invert_tree_comparison (ccode, false); | |
3291 | } | |
a93cdc5c JJ |
3292 | |
3293 | unsigned int uid = gimple_uid (stmt); | |
3294 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); | |
3295 | gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1); | |
3296 | gimple_set_uid (g, uid); | |
3297 | rhs1 = gimple_assign_lhs (g); | |
3298 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); | |
ca6b7410 JJ |
3299 | if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2))) |
3300 | { | |
3301 | g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2); | |
3302 | gimple_set_uid (g, uid); | |
3303 | rhs2 = gimple_assign_lhs (g); | |
3304 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); | |
3305 | } | |
14e72812 | 3306 | if (tree_swap_operands_p (rhs1, rhs2)) |
a93cdc5c JJ |
3307 | { |
3308 | std::swap (rhs1, rhs2); | |
3309 | ccode = swap_tree_comparison (ccode); | |
3310 | } | |
3311 | if (gimple_code (stmt) == GIMPLE_COND) | |
3312 | { | |
3313 | gcond *c = as_a <gcond *> (stmt); | |
3314 | gimple_cond_set_code (c, ccode); | |
3315 | gimple_cond_set_lhs (c, rhs1); | |
3316 | gimple_cond_set_rhs (c, rhs2); | |
3317 | update_stmt (stmt); | |
3318 | } | |
3319 | else | |
3320 | { | |
4a8b97cb JJ |
3321 | tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node; |
3322 | if (!INTEGRAL_TYPE_P (ctype) | |
3323 | || (TREE_CODE (ctype) != BOOLEAN_TYPE | |
3324 | && TYPE_PRECISION (ctype) != 1)) | |
3325 | ctype = boolean_type_node; | |
3326 | g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2); | |
a93cdc5c JJ |
3327 | gimple_set_uid (g, uid); |
3328 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); | |
4a8b97cb JJ |
3329 | if (oe->op && ctype != TREE_TYPE (oe->op)) |
3330 | { | |
3331 | g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)), | |
3332 | NOP_EXPR, gimple_assign_lhs (g)); | |
3333 | gimple_set_uid (g, uid); | |
3334 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); | |
3335 | } | |
a93cdc5c | 3336 | ranges[i].exp = gimple_assign_lhs (g); |
4a8b97cb JJ |
3337 | oe->op = ranges[i].exp; |
3338 | ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp)); | |
3339 | ranges[i].high = ranges[i].low; | |
a93cdc5c JJ |
3340 | } |
3341 | ranges[i].strict_overflow_p = false; | |
da98e3b1 | 3342 | oe = (*ops)[ranges[*idx].idx]; |
a93cdc5c JJ |
3343 | /* Now change all the other range test immediate uses, so that |
3344 | those tests will be optimized away. */ | |
3345 | if (opcode == ERROR_MARK) | |
3346 | { | |
3347 | if (oe->op) | |
3348 | oe->op = build_int_cst (TREE_TYPE (oe->op), | |
3349 | oe->rank == BIT_IOR_EXPR ? 0 : 1); | |
3350 | else | |
3351 | oe->op = (oe->rank == BIT_IOR_EXPR | |
3352 | ? boolean_false_node : boolean_true_node); | |
3353 | } | |
3354 | else | |
3355 | oe->op = error_mark_node; | |
3356 | ranges[*idx].exp = NULL_TREE; | |
3357 | ranges[*idx].low = NULL_TREE; | |
3358 | ranges[*idx].high = NULL_TREE; | |
3359 | any_changes = true; | |
3360 | } | |
3361 | ||
3362 | delete map; | |
3363 | return any_changes; | |
3364 | } | |
3365 | ||
0ccb5dbf JJ |
3366 | /* Optimize range tests, similarly how fold_range_test optimizes |
3367 | it on trees. The tree code for the binary | |
d578e863 JJ |
3368 | operation between all the operands is OPCODE. |
3369 | If OPCODE is ERROR_MARK, optimize_range_tests is called from within | |
3370 | maybe_optimize_range_tests for inter-bb range optimization. | |
3371 | In that case if oe->op is NULL, oe->id is bb->index whose | |
3372 | GIMPLE_COND is && or ||ed into the test, and oe->rank says | |
92007ba6 JJ |
3373 | the actual opcode. |
3374 | FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */ | |
0ccb5dbf | 3375 | |
5e40da4f | 3376 | static bool |
0ccb5dbf | 3377 | optimize_range_tests (enum tree_code opcode, |
92007ba6 | 3378 | vec<operand_entry *> *ops, basic_block first_bb) |
0ccb5dbf | 3379 | { |
9771b263 | 3380 | unsigned int length = ops->length (), i, j, first; |
526ceb68 | 3381 | operand_entry *oe; |
0ccb5dbf JJ |
3382 | struct range_entry *ranges; |
3383 | bool any_changes = false; | |
3384 | ||
3385 | if (length == 1) | |
5e40da4f | 3386 | return false; |
0ccb5dbf JJ |
3387 | |
3388 | ranges = XNEWVEC (struct range_entry, length); | |
3389 | for (i = 0; i < length; i++) | |
3390 | { | |
9771b263 | 3391 | oe = (*ops)[i]; |
0ccb5dbf | 3392 | ranges[i].idx = i; |
d578e863 | 3393 | init_range_entry (ranges + i, oe->op, |
3824a0a2 JJ |
3394 | oe->op |
3395 | ? NULL | |
3396 | : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id))); | |
0ccb5dbf JJ |
3397 | /* For | invert it now, we will invert it again before emitting |
3398 | the optimized expression. */ | |
d578e863 JJ |
3399 | if (opcode == BIT_IOR_EXPR |
3400 | || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) | |
0ccb5dbf JJ |
3401 | ranges[i].in_p = !ranges[i].in_p; |
3402 | } | |
3403 | ||
3404 | qsort (ranges, length, sizeof (*ranges), range_entry_cmp); | |
3405 | for (i = 0; i < length; i++) | |
3406 | if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME) | |
3407 | break; | |
3408 | ||
3409 | /* Try to merge ranges. */ | |
3410 | for (first = i; i < length; i++) | |
3411 | { | |
3412 | tree low = ranges[i].low; | |
3413 | tree high = ranges[i].high; | |
3414 | int in_p = ranges[i].in_p; | |
3415 | bool strict_overflow_p = ranges[i].strict_overflow_p; | |
3416 | int update_fail_count = 0; | |
3417 | ||
3418 | for (j = i + 1; j < length; j++) | |
3419 | { | |
3420 | if (ranges[i].exp != ranges[j].exp) | |
3421 | break; | |
3422 | if (!merge_ranges (&in_p, &low, &high, in_p, low, high, | |
3423 | ranges[j].in_p, ranges[j].low, ranges[j].high)) | |
3424 | break; | |
3425 | strict_overflow_p |= ranges[j].strict_overflow_p; | |
3426 | } | |
3427 | ||
3428 | if (j == i + 1) | |
3429 | continue; | |
3430 | ||
73049af5 JJ |
3431 | if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1, |
3432 | opcode, ops, ranges[i].exp, NULL, in_p, | |
3433 | low, high, strict_overflow_p)) | |
0ccb5dbf JJ |
3434 | { |
3435 | i = j - 1; | |
3436 | any_changes = true; | |
3437 | } | |
3438 | /* Avoid quadratic complexity if all merge_ranges calls would succeed, | |
3439 | while update_range_test would fail. */ | |
3440 | else if (update_fail_count == 64) | |
3441 | i = j - 1; | |
3442 | else | |
3443 | ++update_fail_count; | |
3444 | } | |
3445 | ||
b114bfb4 ZC |
3446 | any_changes |= optimize_range_tests_1 (opcode, first, length, true, |
3447 | ops, ranges); | |
0ccb5dbf | 3448 | |
b114bfb4 ZC |
3449 | if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2) |
3450 | any_changes |= optimize_range_tests_1 (opcode, first, length, false, | |
3451 | ops, ranges); | |
73049af5 JJ |
3452 | if (lshift_cheap_p (optimize_function_for_speed_p (cfun))) |
3453 | any_changes |= optimize_range_tests_to_bit_test (opcode, first, length, | |
3454 | ops, ranges); | |
caab3763 JJ |
3455 | any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length, |
3456 | ops, ranges); | |
a93cdc5c | 3457 | any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops, |
92007ba6 | 3458 | ranges, first_bb); |
0ccb5dbf | 3459 | |
d578e863 | 3460 | if (any_changes && opcode != ERROR_MARK) |
0ccb5dbf JJ |
3461 | { |
3462 | j = 0; | |
9771b263 | 3463 | FOR_EACH_VEC_ELT (*ops, i, oe) |
0ccb5dbf JJ |
3464 | { |
3465 | if (oe->op == error_mark_node) | |
3466 | continue; | |
3467 | else if (i != j) | |
9771b263 | 3468 | (*ops)[j] = oe; |
0ccb5dbf JJ |
3469 | j++; |
3470 | } | |
9771b263 | 3471 | ops->truncate (j); |
0ccb5dbf JJ |
3472 | } |
3473 | ||
3474 | XDELETEVEC (ranges); | |
5e40da4f | 3475 | return any_changes; |
0ccb5dbf JJ |
3476 | } |
3477 | ||
51d4212a RH |
3478 | /* A subroutine of optimize_vec_cond_expr to extract and canonicalize |
3479 | the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure, | |
3480 | otherwise the comparison code. */ | |
3481 | ||
3482 | static tree_code | |
3483 | ovce_extract_ops (tree var, gassign **rets, bool *reti) | |
3484 | { | |
3485 | if (TREE_CODE (var) != SSA_NAME) | |
3486 | return ERROR_MARK; | |
3487 | ||
3488 | gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var)); | |
3489 | if (stmt == NULL) | |
3490 | return ERROR_MARK; | |
3491 | ||
3492 | /* ??? If we start creating more COND_EXPR, we could perform | |
3493 | this same optimization with them. For now, simplify. */ | |
3494 | if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR) | |
3495 | return ERROR_MARK; | |
3496 | ||
3497 | tree cond = gimple_assign_rhs1 (stmt); | |
3498 | tree_code cmp = TREE_CODE (cond); | |
3499 | if (TREE_CODE_CLASS (cmp) != tcc_comparison) | |
3500 | return ERROR_MARK; | |
3501 | ||
3502 | /* ??? For now, allow only canonical true and false result vectors. | |
3503 | We could expand this to other constants should the need arise, | |
3504 | but at the moment we don't create them. */ | |
3505 | tree t = gimple_assign_rhs2 (stmt); | |
3506 | tree f = gimple_assign_rhs3 (stmt); | |
3507 | bool inv; | |
3508 | if (integer_all_onesp (t)) | |
3509 | inv = false; | |
3510 | else if (integer_all_onesp (f)) | |
3511 | { | |
3512 | cmp = invert_tree_comparison (cmp, false); | |
3513 | inv = true; | |
3514 | } | |
3515 | else | |
3516 | return ERROR_MARK; | |
3517 | if (!integer_zerop (f)) | |
3518 | return ERROR_MARK; | |
3519 | ||
3520 | /* Success! */ | |
3521 | if (rets) | |
3522 | *rets = stmt; | |
3523 | if (reti) | |
3524 | *reti = inv; | |
3525 | return cmp; | |
3526 | } | |
3527 | ||
3528 | /* Optimize the condition of VEC_COND_EXPRs which have been combined | |
3529 | with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */ | |
3530 | ||
3531 | static bool | |
3532 | optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops) | |
3533 | { | |
3534 | unsigned int length = ops->length (), i, j; | |
3535 | bool any_changes = false; | |
3536 | ||
3537 | if (length == 1) | |
3538 | return false; | |
3539 | ||
3540 | for (i = 0; i < length; ++i) | |
3541 | { | |
3542 | tree elt0 = (*ops)[i]->op; | |
3543 | ||
3544 | gassign *stmt0; | |
3545 | bool invert; | |
3546 | tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert); | |
3547 | if (cmp0 == ERROR_MARK) | |
3548 | continue; | |
3549 | ||
3550 | for (j = i + 1; j < length; ++j) | |
3551 | { | |
3552 | tree &elt1 = (*ops)[j]->op; | |
3553 | ||
3554 | gassign *stmt1; | |
3555 | tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL); | |
3556 | if (cmp1 == ERROR_MARK) | |
3557 | continue; | |
3558 | ||
3559 | tree cond0 = gimple_assign_rhs1 (stmt0); | |
3560 | tree x0 = TREE_OPERAND (cond0, 0); | |
3561 | tree y0 = TREE_OPERAND (cond0, 1); | |
3562 | ||
3563 | tree cond1 = gimple_assign_rhs1 (stmt1); | |
3564 | tree x1 = TREE_OPERAND (cond1, 0); | |
3565 | tree y1 = TREE_OPERAND (cond1, 1); | |
3566 | ||
3567 | tree comb; | |
3568 | if (opcode == BIT_AND_EXPR) | |
3569 | comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1); | |
3570 | else if (opcode == BIT_IOR_EXPR) | |
3571 | comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1); | |
3572 | else | |
3573 | gcc_unreachable (); | |
3574 | if (comb == NULL) | |
3575 | continue; | |
3576 | ||
3577 | /* Success! */ | |
3578 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3579 | { | |
3580 | fprintf (dump_file, "Transforming "); | |
ef6cb4c7 | 3581 | print_generic_expr (dump_file, cond0); |
51d4212a | 3582 | fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|'); |
ef6cb4c7 | 3583 | print_generic_expr (dump_file, cond1); |
51d4212a | 3584 | fprintf (dump_file, " into "); |
ef6cb4c7 | 3585 | print_generic_expr (dump_file, comb); |
51d4212a RH |
3586 | fputc ('\n', dump_file); |
3587 | } | |
3588 | ||
3589 | gimple_assign_set_rhs1 (stmt0, comb); | |
3590 | if (invert) | |
3591 | std::swap (*gimple_assign_rhs2_ptr (stmt0), | |
3592 | *gimple_assign_rhs3_ptr (stmt0)); | |
3593 | update_stmt (stmt0); | |
3594 | ||
3595 | elt1 = error_mark_node; | |
3596 | any_changes = true; | |
3597 | } | |
3598 | } | |
3599 | ||
3600 | if (any_changes) | |
3601 | { | |
3602 | operand_entry *oe; | |
3603 | j = 0; | |
3604 | FOR_EACH_VEC_ELT (*ops, i, oe) | |
3605 | { | |
3606 | if (oe->op == error_mark_node) | |
3607 | continue; | |
3608 | else if (i != j) | |
3609 | (*ops)[j] = oe; | |
3610 | j++; | |
3611 | } | |
3612 | ops->truncate (j); | |
3613 | } | |
3614 | ||
3615 | return any_changes; | |
3616 | } | |
3617 | ||
d578e863 JJ |
3618 | /* Return true if STMT is a cast like: |
3619 | <bb N>: | |
3620 | ... | |
3621 | _123 = (int) _234; | |
3622 | ||
3623 | <bb M>: | |
3624 | # _345 = PHI <_123(N), 1(...), 1(...)> | |
3625 | where _234 has bool type, _123 has single use and | |
3626 | bb N has a single successor M. This is commonly used in | |
635c1074 KV |
3627 | the last block of a range test. |
3628 | ||
3629 | Also Return true if STMT is tcc_compare like: | |
3630 | <bb N>: | |
3631 | ... | |
3632 | _234 = a_2(D) == 2; | |
3633 | ||
3634 | <bb M>: | |
3635 | # _345 = PHI <_234(N), 1(...), 1(...)> | |
3636 | _346 = (int) _345; | |
3637 | where _234 has booltype, single use and | |
3638 | bb N has a single successor M. This is commonly used in | |
d578e863 JJ |
3639 | the last block of a range test. */ |
3640 | ||
3641 | static bool | |
355fe088 | 3642 | final_range_test_p (gimple *stmt) |
d578e863 | 3643 | { |
635c1074 | 3644 | basic_block bb, rhs_bb, lhs_bb; |
d578e863 JJ |
3645 | edge e; |
3646 | tree lhs, rhs; | |
3647 | use_operand_p use_p; | |
355fe088 | 3648 | gimple *use_stmt; |
d578e863 | 3649 | |
635c1074 KV |
3650 | if (!gimple_assign_cast_p (stmt) |
3651 | && (!is_gimple_assign (stmt) | |
3652 | || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) | |
3653 | != tcc_comparison))) | |
d578e863 JJ |
3654 | return false; |
3655 | bb = gimple_bb (stmt); | |
3656 | if (!single_succ_p (bb)) | |
3657 | return false; | |
3658 | e = single_succ_edge (bb); | |
3659 | if (e->flags & EDGE_COMPLEX) | |
3660 | return false; | |
3661 | ||
3662 | lhs = gimple_assign_lhs (stmt); | |
3663 | rhs = gimple_assign_rhs1 (stmt); | |
635c1074 KV |
3664 | if (gimple_assign_cast_p (stmt) |
3665 | && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) | |
3666 | || TREE_CODE (rhs) != SSA_NAME | |
3667 | || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)) | |
d578e863 JJ |
3668 | return false; |
3669 | ||
635c1074 KV |
3670 | if (!gimple_assign_cast_p (stmt) |
3671 | && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)) | |
3672 | return false; | |
3673 | ||
d578e863 JJ |
3674 | /* Test whether lhs is consumed only by a PHI in the only successor bb. */ |
3675 | if (!single_imm_use (lhs, &use_p, &use_stmt)) | |
3676 | return false; | |
3677 | ||
3678 | if (gimple_code (use_stmt) != GIMPLE_PHI | |
3679 | || gimple_bb (use_stmt) != e->dest) | |
3680 | return false; | |
3681 | ||
3682 | /* And that the rhs is defined in the same loop. */ | |
635c1074 KV |
3683 | if (gimple_assign_cast_p (stmt)) |
3684 | { | |
3685 | if (TREE_CODE (rhs) != SSA_NAME | |
3686 | || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs))) | |
3687 | || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb)) | |
3688 | return false; | |
3689 | } | |
3690 | else | |
3691 | { | |
3692 | if (TREE_CODE (lhs) != SSA_NAME | |
3693 | || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs))) | |
3694 | || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb)) | |
3695 | return false; | |
3696 | } | |
d578e863 JJ |
3697 | |
3698 | return true; | |
3699 | } | |
3700 | ||
3701 | /* Return true if BB is suitable basic block for inter-bb range test | |
3702 | optimization. If BACKWARD is true, BB should be the only predecessor | |
3703 | of TEST_BB, and *OTHER_BB is either NULL and filled by the routine, | |
3704 | or compared with to find a common basic block to which all conditions | |
3705 | branch to if true resp. false. If BACKWARD is false, TEST_BB should | |
3706 | be the only predecessor of BB. */ | |
3707 | ||
3708 | static bool | |
3709 | suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb, | |
3710 | bool backward) | |
3711 | { | |
3712 | edge_iterator ei, ei2; | |
3713 | edge e, e2; | |
355fe088 | 3714 | gimple *stmt; |
538dd0b7 | 3715 | gphi_iterator gsi; |
d578e863 JJ |
3716 | bool other_edge_seen = false; |
3717 | bool is_cond; | |
3718 | ||
3719 | if (test_bb == bb) | |
3720 | return false; | |
3721 | /* Check last stmt first. */ | |
3722 | stmt = last_stmt (bb); | |
3723 | if (stmt == NULL | |
3724 | || (gimple_code (stmt) != GIMPLE_COND | |
3725 | && (backward || !final_range_test_p (stmt))) | |
3726 | || gimple_visited_p (stmt) | |
36bbc05d | 3727 | || stmt_could_throw_p (cfun, stmt) |
d578e863 JJ |
3728 | || *other_bb == bb) |
3729 | return false; | |
3730 | is_cond = gimple_code (stmt) == GIMPLE_COND; | |
3731 | if (is_cond) | |
3732 | { | |
3733 | /* If last stmt is GIMPLE_COND, verify that one of the succ edges | |
3734 | goes to the next bb (if BACKWARD, it is TEST_BB), and the other | |
3735 | to *OTHER_BB (if not set yet, try to find it out). */ | |
3736 | if (EDGE_COUNT (bb->succs) != 2) | |
3737 | return false; | |
3738 | FOR_EACH_EDGE (e, ei, bb->succs) | |
3739 | { | |
3740 | if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) | |
3741 | return false; | |
3742 | if (e->dest == test_bb) | |
3743 | { | |
3744 | if (backward) | |
3745 | continue; | |
3746 | else | |
3747 | return false; | |
3748 | } | |
3749 | if (e->dest == bb) | |
3750 | return false; | |
3751 | if (*other_bb == NULL) | |
3752 | { | |
3753 | FOR_EACH_EDGE (e2, ei2, test_bb->succs) | |
3754 | if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) | |
3755 | return false; | |
3756 | else if (e->dest == e2->dest) | |
3757 | *other_bb = e->dest; | |
3758 | if (*other_bb == NULL) | |
3759 | return false; | |
3760 | } | |
3761 | if (e->dest == *other_bb) | |
3762 | other_edge_seen = true; | |
3763 | else if (backward) | |
3764 | return false; | |
3765 | } | |
3766 | if (*other_bb == NULL || !other_edge_seen) | |
3767 | return false; | |
3768 | } | |
3769 | else if (single_succ (bb) != *other_bb) | |
3770 | return false; | |
3771 | ||
3772 | /* Now check all PHIs of *OTHER_BB. */ | |
3773 | e = find_edge (bb, *other_bb); | |
3774 | e2 = find_edge (test_bb, *other_bb); | |
3775 | for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) | |
3776 | { | |
538dd0b7 | 3777 | gphi *phi = gsi.phi (); |
d578e863 JJ |
3778 | /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments |
3779 | corresponding to BB and TEST_BB predecessor must be the same. */ | |
3780 | if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx), | |
3781 | gimple_phi_arg_def (phi, e2->dest_idx), 0)) | |
3782 | { | |
3783 | /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND, | |
3784 | one of the PHIs should have the lhs of the last stmt in | |
3785 | that block as PHI arg and that PHI should have 0 or 1 | |
3786 | corresponding to it in all other range test basic blocks | |
3787 | considered. */ | |
3788 | if (!is_cond) | |
3789 | { | |
3790 | if (gimple_phi_arg_def (phi, e->dest_idx) | |
3791 | == gimple_assign_lhs (stmt) | |
3792 | && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx)) | |
3793 | || integer_onep (gimple_phi_arg_def (phi, | |
3794 | e2->dest_idx)))) | |
3795 | continue; | |
3796 | } | |
3797 | else | |
3798 | { | |
355fe088 | 3799 | gimple *test_last = last_stmt (test_bb); |
d578e863 JJ |
3800 | if (gimple_code (test_last) != GIMPLE_COND |
3801 | && gimple_phi_arg_def (phi, e2->dest_idx) | |
3802 | == gimple_assign_lhs (test_last) | |
3803 | && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx)) | |
3804 | || integer_onep (gimple_phi_arg_def (phi, e->dest_idx)))) | |
3805 | continue; | |
3806 | } | |
3807 | ||
3808 | return false; | |
3809 | } | |
3810 | } | |
3811 | return true; | |
3812 | } | |
3813 | ||
3814 | /* Return true if BB doesn't have side-effects that would disallow | |
3815 | range test optimization, all SSA_NAMEs set in the bb are consumed | |
3816 | in the bb and there are no PHIs. */ | |
3817 | ||
3818 | static bool | |
3819 | no_side_effect_bb (basic_block bb) | |
3820 | { | |
3821 | gimple_stmt_iterator gsi; | |
355fe088 | 3822 | gimple *last; |
d578e863 JJ |
3823 | |
3824 | if (!gimple_seq_empty_p (phi_nodes (bb))) | |
3825 | return false; | |
3826 | last = last_stmt (bb); | |
3827 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
3828 | { | |
355fe088 | 3829 | gimple *stmt = gsi_stmt (gsi); |
d578e863 JJ |
3830 | tree lhs; |
3831 | imm_use_iterator imm_iter; | |
3832 | use_operand_p use_p; | |
3833 | ||
3834 | if (is_gimple_debug (stmt)) | |
3835 | continue; | |
3836 | if (gimple_has_side_effects (stmt)) | |
3837 | return false; | |
3838 | if (stmt == last) | |
3839 | return true; | |
3840 | if (!is_gimple_assign (stmt)) | |
3841 | return false; | |
3842 | lhs = gimple_assign_lhs (stmt); | |
3843 | if (TREE_CODE (lhs) != SSA_NAME) | |
3844 | return false; | |
3845 | if (gimple_assign_rhs_could_trap_p (stmt)) | |
3846 | return false; | |
3847 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) | |
3848 | { | |
355fe088 | 3849 | gimple *use_stmt = USE_STMT (use_p); |
d578e863 JJ |
3850 | if (is_gimple_debug (use_stmt)) |
3851 | continue; | |
3852 | if (gimple_bb (use_stmt) != bb) | |
3853 | return false; | |
3854 | } | |
3855 | } | |
3856 | return false; | |
3857 | } | |
3858 | ||
3859 | /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable, | |
3860 | return true and fill in *OPS recursively. */ | |
3861 | ||
3862 | static bool | |
526ceb68 | 3863 | get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops, |
99b1c316 | 3864 | class loop *loop) |
d578e863 | 3865 | { |
355fe088 | 3866 | gimple *stmt = SSA_NAME_DEF_STMT (var); |
d578e863 JJ |
3867 | tree rhs[2]; |
3868 | int i; | |
3869 | ||
3870 | if (!is_reassociable_op (stmt, code, loop)) | |
3871 | return false; | |
3872 | ||
3873 | rhs[0] = gimple_assign_rhs1 (stmt); | |
3874 | rhs[1] = gimple_assign_rhs2 (stmt); | |
3875 | gimple_set_visited (stmt, true); | |
3876 | for (i = 0; i < 2; i++) | |
3877 | if (TREE_CODE (rhs[i]) == SSA_NAME | |
3878 | && !get_ops (rhs[i], code, ops, loop) | |
3879 | && has_single_use (rhs[i])) | |
3880 | { | |
526ceb68 | 3881 | operand_entry *oe = operand_entry_pool.allocate (); |
d578e863 JJ |
3882 | |
3883 | oe->op = rhs[i]; | |
3884 | oe->rank = code; | |
3885 | oe->id = 0; | |
3886 | oe->count = 1; | |
d2db36dd | 3887 | oe->stmt_to_insert = NULL; |
9771b263 | 3888 | ops->safe_push (oe); |
d578e863 JJ |
3889 | } |
3890 | return true; | |
3891 | } | |
3892 | ||
5e40da4f JJ |
3893 | /* Find the ops that were added by get_ops starting from VAR, see if |
3894 | they were changed during update_range_test and if yes, create new | |
3895 | stmts. */ | |
3896 | ||
3897 | static tree | |
526ceb68 | 3898 | update_ops (tree var, enum tree_code code, vec<operand_entry *> ops, |
99b1c316 | 3899 | unsigned int *pidx, class loop *loop) |
5e40da4f | 3900 | { |
355fe088 | 3901 | gimple *stmt = SSA_NAME_DEF_STMT (var); |
5e40da4f JJ |
3902 | tree rhs[4]; |
3903 | int i; | |
3904 | ||
3905 | if (!is_reassociable_op (stmt, code, loop)) | |
3906 | return NULL; | |
3907 | ||
3908 | rhs[0] = gimple_assign_rhs1 (stmt); | |
3909 | rhs[1] = gimple_assign_rhs2 (stmt); | |
3910 | rhs[2] = rhs[0]; | |
3911 | rhs[3] = rhs[1]; | |
3912 | for (i = 0; i < 2; i++) | |
3913 | if (TREE_CODE (rhs[i]) == SSA_NAME) | |
3914 | { | |
3915 | rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop); | |
3916 | if (rhs[2 + i] == NULL_TREE) | |
3917 | { | |
3918 | if (has_single_use (rhs[i])) | |
3919 | rhs[2 + i] = ops[(*pidx)++]->op; | |
3920 | else | |
3921 | rhs[2 + i] = rhs[i]; | |
3922 | } | |
3923 | } | |
3924 | if ((rhs[2] != rhs[0] || rhs[3] != rhs[1]) | |
3925 | && (rhs[2] != rhs[1] || rhs[3] != rhs[0])) | |
3926 | { | |
3927 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); | |
b731b390 | 3928 | var = make_ssa_name (TREE_TYPE (var)); |
0d0e4a03 JJ |
3929 | gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt), |
3930 | rhs[2], rhs[3]); | |
5e40da4f JJ |
3931 | gimple_set_uid (g, gimple_uid (stmt)); |
3932 | gimple_set_visited (g, true); | |
3933 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); | |
3934 | } | |
3935 | return var; | |
3936 | } | |
3937 | ||
3938 | /* Structure to track the initial value passed to get_ops and | |
3939 | the range in the ops vector for each basic block. */ | |
3940 | ||
3941 | struct inter_bb_range_test_entry | |
3942 | { | |
3943 | tree op; | |
3944 | unsigned int first_idx, last_idx; | |
3945 | }; | |
3946 | ||
8010f31f | 3947 | /* Inter-bb range test optimization. |
d578e863 | 3948 | |
8010f31f JL |
3949 | Returns TRUE if a gimple conditional is optimized to a true/false, |
3950 | otherwise return FALSE. | |
3951 | ||
3952 | This indicates to the caller that it should run a CFG cleanup pass | |
3953 | once reassociation is completed. */ | |
3954 | ||
3955 | static bool | |
355fe088 | 3956 | maybe_optimize_range_tests (gimple *stmt) |
d578e863 JJ |
3957 | { |
3958 | basic_block first_bb = gimple_bb (stmt); | |
3959 | basic_block last_bb = first_bb; | |
3960 | basic_block other_bb = NULL; | |
3961 | basic_block bb; | |
3962 | edge_iterator ei; | |
3963 | edge e; | |
526ceb68 | 3964 | auto_vec<operand_entry *> ops; |
ef062b13 | 3965 | auto_vec<inter_bb_range_test_entry> bbinfo; |
96f9e25a | 3966 | bool any_changes = false; |
8010f31f | 3967 | bool cfg_cleanup_needed = false; |
d578e863 JJ |
3968 | |
3969 | /* Consider only basic blocks that end with GIMPLE_COND or | |
3970 | a cast statement satisfying final_range_test_p. All | |
3971 | but the last bb in the first_bb .. last_bb range | |
3972 | should end with GIMPLE_COND. */ | |
3973 | if (gimple_code (stmt) == GIMPLE_COND) | |
3974 | { | |
3975 | if (EDGE_COUNT (first_bb->succs) != 2) | |
8010f31f | 3976 | return cfg_cleanup_needed; |
d578e863 JJ |
3977 | } |
3978 | else if (final_range_test_p (stmt)) | |
3979 | other_bb = single_succ (first_bb); | |
3980 | else | |
8010f31f | 3981 | return cfg_cleanup_needed; |
d578e863 | 3982 | |
36bbc05d | 3983 | if (stmt_could_throw_p (cfun, stmt)) |
8010f31f | 3984 | return cfg_cleanup_needed; |
d578e863 JJ |
3985 | |
3986 | /* As relative ordering of post-dominator sons isn't fixed, | |
3987 | maybe_optimize_range_tests can be called first on any | |
3988 | bb in the range we want to optimize. So, start searching | |
3989 | backwards, if first_bb can be set to a predecessor. */ | |
3990 | while (single_pred_p (first_bb)) | |
3991 | { | |
3992 | basic_block pred_bb = single_pred (first_bb); | |
3993 | if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true)) | |
3994 | break; | |
3995 | if (!no_side_effect_bb (first_bb)) | |
3996 | break; | |
3997 | first_bb = pred_bb; | |
3998 | } | |
3999 | /* If first_bb is last_bb, other_bb hasn't been computed yet. | |
4000 | Before starting forward search in last_bb successors, find | |
4001 | out the other_bb. */ | |
4002 | if (first_bb == last_bb) | |
4003 | { | |
4004 | other_bb = NULL; | |
4005 | /* As non-GIMPLE_COND last stmt always terminates the range, | |
4006 | if forward search didn't discover anything, just give up. */ | |
4007 | if (gimple_code (stmt) != GIMPLE_COND) | |
8010f31f | 4008 | return cfg_cleanup_needed; |
d578e863 JJ |
4009 | /* Look at both successors. Either it ends with a GIMPLE_COND |
4010 | and satisfies suitable_cond_bb, or ends with a cast and | |
4011 | other_bb is that cast's successor. */ | |
4012 | FOR_EACH_EDGE (e, ei, first_bb->succs) | |
4013 | if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)) | |
4014 | || e->dest == first_bb) | |
8010f31f | 4015 | return cfg_cleanup_needed; |
d578e863 JJ |
4016 | else if (single_pred_p (e->dest)) |
4017 | { | |
4018 | stmt = last_stmt (e->dest); | |
4019 | if (stmt | |
4020 | && gimple_code (stmt) == GIMPLE_COND | |
4021 | && EDGE_COUNT (e->dest->succs) == 2) | |
4022 | { | |
4023 | if (suitable_cond_bb (first_bb, e->dest, &other_bb, true)) | |
4024 | break; | |
4025 | else | |
4026 | other_bb = NULL; | |
4027 | } | |
4028 | else if (stmt | |
4029 | && final_range_test_p (stmt) | |
4030 | && find_edge (first_bb, single_succ (e->dest))) | |
4031 | { | |
4032 | other_bb = single_succ (e->dest); | |
4033 | if (other_bb == first_bb) | |
4034 | other_bb = NULL; | |
4035 | } | |
4036 | } | |
4037 | if (other_bb == NULL) | |
8010f31f | 4038 | return cfg_cleanup_needed; |
d578e863 JJ |
4039 | } |
4040 | /* Now do the forward search, moving last_bb to successor bbs | |
4041 | that aren't other_bb. */ | |
4042 | while (EDGE_COUNT (last_bb->succs) == 2) | |
4043 | { | |
4044 | FOR_EACH_EDGE (e, ei, last_bb->succs) | |
4045 | if (e->dest != other_bb) | |
4046 | break; | |
4047 | if (e == NULL) | |
4048 | break; | |
4049 | if (!single_pred_p (e->dest)) | |
4050 | break; | |
4051 | if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false)) | |
4052 | break; | |
4053 | if (!no_side_effect_bb (e->dest)) | |
4054 | break; | |
4055 | last_bb = e->dest; | |
4056 | } | |
4057 | if (first_bb == last_bb) | |
8010f31f | 4058 | return cfg_cleanup_needed; |
d578e863 JJ |
4059 | /* Here basic blocks first_bb through last_bb's predecessor |
4060 | end with GIMPLE_COND, all of them have one of the edges to | |
4061 | other_bb and another to another block in the range, | |
4062 | all blocks except first_bb don't have side-effects and | |
4063 | last_bb ends with either GIMPLE_COND, or cast satisfying | |
4064 | final_range_test_p. */ | |
4065 | for (bb = last_bb; ; bb = single_pred (bb)) | |
4066 | { | |
4067 | enum tree_code code; | |
4068 | tree lhs, rhs; | |
5e40da4f | 4069 | inter_bb_range_test_entry bb_ent; |
d578e863 | 4070 | |
5e40da4f JJ |
4071 | bb_ent.op = NULL_TREE; |
4072 | bb_ent.first_idx = ops.length (); | |
4073 | bb_ent.last_idx = bb_ent.first_idx; | |
d578e863 JJ |
4074 | e = find_edge (bb, other_bb); |
4075 | stmt = last_stmt (bb); | |
4076 | gimple_set_visited (stmt, true); | |
4077 | if (gimple_code (stmt) != GIMPLE_COND) | |
4078 | { | |
4079 | use_operand_p use_p; | |
355fe088 | 4080 | gimple *phi; |
d578e863 JJ |
4081 | edge e2; |
4082 | unsigned int d; | |
4083 | ||
4084 | lhs = gimple_assign_lhs (stmt); | |
4085 | rhs = gimple_assign_rhs1 (stmt); | |
4086 | gcc_assert (bb == last_bb); | |
4087 | ||
4088 | /* stmt is | |
4089 | _123 = (int) _234; | |
635c1074 KV |
4090 | OR |
4091 | _234 = a_2(D) == 2; | |
d578e863 JJ |
4092 | |
4093 | followed by: | |
4094 | <bb M>: | |
4095 | # _345 = PHI <_123(N), 1(...), 1(...)> | |
4096 | ||
4097 | or 0 instead of 1. If it is 0, the _234 | |
4098 | range test is anded together with all the | |
4099 | other range tests, if it is 1, it is ored with | |
4100 | them. */ | |
4101 | single_imm_use (lhs, &use_p, &phi); | |
4102 | gcc_assert (gimple_code (phi) == GIMPLE_PHI); | |
4103 | e2 = find_edge (first_bb, other_bb); | |
4104 | d = e2->dest_idx; | |
4105 | gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs); | |
4106 | if (integer_zerop (gimple_phi_arg_def (phi, d))) | |
4107 | code = BIT_AND_EXPR; | |
4108 | else | |
4109 | { | |
4110 | gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d))); | |
4111 | code = BIT_IOR_EXPR; | |
4112 | } | |
4113 | ||
4114 | /* If _234 SSA_NAME_DEF_STMT is | |
4115 | _234 = _567 | _789; | |
4116 | (or &, corresponding to 1/0 in the phi arguments, | |
4117 | push into ops the individual range test arguments | |
4118 | of the bitwise or resp. and, recursively. */ | |
16b05965 | 4119 | if (TREE_CODE (rhs) == SSA_NAME |
635c1074 KV |
4120 | && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) |
4121 | != tcc_comparison) | |
4536610d KV |
4122 | && !get_ops (rhs, code, &ops, |
4123 | loop_containing_stmt (stmt)) | |
d578e863 JJ |
4124 | && has_single_use (rhs)) |
4125 | { | |
4126 | /* Otherwise, push the _234 range test itself. */ | |
526ceb68 | 4127 | operand_entry *oe = operand_entry_pool.allocate (); |
d578e863 JJ |
4128 | |
4129 | oe->op = rhs; | |
4130 | oe->rank = code; | |
4131 | oe->id = 0; | |
4132 | oe->count = 1; | |
d2db36dd | 4133 | oe->stmt_to_insert = NULL; |
9771b263 | 4134 | ops.safe_push (oe); |
5e40da4f | 4135 | bb_ent.last_idx++; |
635c1074 KV |
4136 | bb_ent.op = rhs; |
4137 | } | |
4138 | else if (is_gimple_assign (stmt) | |
4139 | && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) | |
4140 | == tcc_comparison) | |
16b05965 ML |
4141 | && !get_ops (lhs, code, &ops, |
4142 | loop_containing_stmt (stmt)) | |
635c1074 KV |
4143 | && has_single_use (lhs)) |
4144 | { | |
4145 | operand_entry *oe = operand_entry_pool.allocate (); | |
4146 | oe->op = lhs; | |
4147 | oe->rank = code; | |
4148 | oe->id = 0; | |
4149 | oe->count = 1; | |
4150 | ops.safe_push (oe); | |
4151 | bb_ent.last_idx++; | |
4152 | bb_ent.op = lhs; | |
d578e863 | 4153 | } |
5e40da4f | 4154 | else |
635c1074 KV |
4155 | { |
4156 | bb_ent.last_idx = ops.length (); | |
4157 | bb_ent.op = rhs; | |
4158 | } | |
5e40da4f | 4159 | bbinfo.safe_push (bb_ent); |
d578e863 JJ |
4160 | continue; |
4161 | } | |
4162 | /* Otherwise stmt is GIMPLE_COND. */ | |
4163 | code = gimple_cond_code (stmt); | |
4164 | lhs = gimple_cond_lhs (stmt); | |
4165 | rhs = gimple_cond_rhs (stmt); | |
4166 | if (TREE_CODE (lhs) == SSA_NAME | |
4167 | && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) | |
4168 | && ((code != EQ_EXPR && code != NE_EXPR) | |
4169 | || rhs != boolean_false_node | |
4170 | /* Either push into ops the individual bitwise | |
4171 | or resp. and operands, depending on which | |
4172 | edge is other_bb. */ | |
4173 | || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0) | |
4174 | ^ (code == EQ_EXPR)) | |
4175 | ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops, | |
4176 | loop_containing_stmt (stmt)))) | |
4177 | { | |
4178 | /* Or push the GIMPLE_COND stmt itself. */ | |
526ceb68 | 4179 | operand_entry *oe = operand_entry_pool.allocate (); |
d578e863 JJ |
4180 | |
4181 | oe->op = NULL; | |
4182 | oe->rank = (e->flags & EDGE_TRUE_VALUE) | |
4183 | ? BIT_IOR_EXPR : BIT_AND_EXPR; | |
4184 | /* oe->op = NULL signs that there is no SSA_NAME | |
4185 | for the range test, and oe->id instead is the | |
4186 | basic block number, at which's end the GIMPLE_COND | |
4187 | is. */ | |
4188 | oe->id = bb->index; | |
4189 | oe->count = 1; | |
d2db36dd | 4190 | oe->stmt_to_insert = NULL; |
9771b263 | 4191 | ops.safe_push (oe); |
5e40da4f JJ |
4192 | bb_ent.op = NULL; |
4193 | bb_ent.last_idx++; | |
d578e863 | 4194 | } |
5e40da4f JJ |
4195 | else if (ops.length () > bb_ent.first_idx) |
4196 | { | |
4197 | bb_ent.op = lhs; | |
4198 | bb_ent.last_idx = ops.length (); | |
4199 | } | |
4200 | bbinfo.safe_push (bb_ent); | |
d578e863 JJ |
4201 | if (bb == first_bb) |
4202 | break; | |
4203 | } | |
9771b263 | 4204 | if (ops.length () > 1) |
92007ba6 | 4205 | any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb); |
96f9e25a | 4206 | if (any_changes) |
5e40da4f | 4207 | { |
83b58b6b | 4208 | unsigned int idx, max_idx = 0; |
96f9e25a JJ |
4209 | /* update_ops relies on has_single_use predicates returning the |
4210 | same values as it did during get_ops earlier. Additionally it | |
4211 | never removes statements, only adds new ones and it should walk | |
4212 | from the single imm use and check the predicate already before | |
4213 | making those changes. | |
4214 | On the other side, the handling of GIMPLE_COND directly can turn | |
4215 | previously multiply used SSA_NAMEs into single use SSA_NAMEs, so | |
4216 | it needs to be done in a separate loop afterwards. */ | |
4217 | for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) | |
5e40da4f | 4218 | { |
96f9e25a JJ |
4219 | if (bbinfo[idx].first_idx < bbinfo[idx].last_idx |
4220 | && bbinfo[idx].op != NULL_TREE) | |
5e40da4f | 4221 | { |
5e40da4f JJ |
4222 | tree new_op; |
4223 | ||
83b58b6b | 4224 | max_idx = idx; |
96f9e25a JJ |
4225 | stmt = last_stmt (bb); |
4226 | new_op = update_ops (bbinfo[idx].op, | |
4227 | (enum tree_code) | |
4228 | ops[bbinfo[idx].first_idx]->rank, | |
4229 | ops, &bbinfo[idx].first_idx, | |
4230 | loop_containing_stmt (stmt)); | |
5e40da4f JJ |
4231 | if (new_op == NULL_TREE) |
4232 | { | |
4233 | gcc_assert (bb == last_bb); | |
4234 | new_op = ops[bbinfo[idx].first_idx++]->op; | |
4235 | } | |
4236 | if (bbinfo[idx].op != new_op) | |
4237 | { | |
4238 | imm_use_iterator iter; | |
4239 | use_operand_p use_p; | |
635c1074 | 4240 | gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL; |
5e40da4f JJ |
4241 | |
4242 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op) | |
4c707980 | 4243 | if (is_gimple_debug (use_stmt)) |
5e40da4f | 4244 | continue; |
4c707980 KV |
4245 | else if (gimple_code (use_stmt) == GIMPLE_COND |
4246 | || gimple_code (use_stmt) == GIMPLE_PHI) | |
4247 | FOR_EACH_IMM_USE_ON_STMT (use_p, iter) | |
4248 | SET_USE (use_p, new_op); | |
635c1074 KV |
4249 | else if ((is_gimple_assign (use_stmt) |
4250 | && (TREE_CODE_CLASS | |
4251 | (gimple_assign_rhs_code (use_stmt)) | |
4252 | == tcc_comparison))) | |
4253 | cast_or_tcc_cmp_stmt = use_stmt; | |
5e40da4f | 4254 | else if (gimple_assign_cast_p (use_stmt)) |
635c1074 | 4255 | cast_or_tcc_cmp_stmt = use_stmt; |
4c707980 KV |
4256 | else |
4257 | gcc_unreachable (); | |
635c1074 KV |
4258 | |
4259 | if (cast_or_tcc_cmp_stmt) | |
5e40da4f JJ |
4260 | { |
4261 | gcc_assert (bb == last_bb); | |
635c1074 | 4262 | tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt); |
b731b390 | 4263 | tree new_lhs = make_ssa_name (TREE_TYPE (lhs)); |
5e40da4f | 4264 | enum tree_code rhs_code |
635c1074 KV |
4265 | = gimple_assign_cast_p (cast_or_tcc_cmp_stmt) |
4266 | ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt) | |
4267 | : CONVERT_EXPR; | |
538dd0b7 | 4268 | gassign *g; |
e07ded7b JJ |
4269 | if (is_gimple_min_invariant (new_op)) |
4270 | { | |
4271 | new_op = fold_convert (TREE_TYPE (lhs), new_op); | |
4272 | g = gimple_build_assign (new_lhs, new_op); | |
4273 | } | |
4274 | else | |
0d0e4a03 | 4275 | g = gimple_build_assign (new_lhs, rhs_code, new_op); |
635c1074 KV |
4276 | gimple_stmt_iterator gsi |
4277 | = gsi_for_stmt (cast_or_tcc_cmp_stmt); | |
4278 | gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt)); | |
5e40da4f | 4279 | gimple_set_visited (g, true); |
4c707980 | 4280 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
5e40da4f JJ |
4281 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) |
4282 | if (is_gimple_debug (use_stmt)) | |
4283 | continue; | |
4284 | else if (gimple_code (use_stmt) == GIMPLE_COND | |
4285 | || gimple_code (use_stmt) == GIMPLE_PHI) | |
4286 | FOR_EACH_IMM_USE_ON_STMT (use_p, iter) | |
4287 | SET_USE (use_p, new_lhs); | |
4288 | else | |
4289 | gcc_unreachable (); | |
4290 | } | |
4291 | } | |
4292 | } | |
4293 | if (bb == first_bb) | |
4294 | break; | |
4295 | } | |
96f9e25a JJ |
4296 | for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) |
4297 | { | |
4298 | if (bbinfo[idx].first_idx < bbinfo[idx].last_idx | |
4299 | && bbinfo[idx].op == NULL_TREE | |
4300 | && ops[bbinfo[idx].first_idx]->op != NULL_TREE) | |
4301 | { | |
538dd0b7 | 4302 | gcond *cond_stmt = as_a <gcond *> (last_stmt (bb)); |
83b58b6b JJ |
4303 | |
4304 | if (idx > max_idx) | |
4305 | max_idx = idx; | |
4306 | ||
8010f31f JL |
4307 | /* If we collapse the conditional to a true/false |
4308 | condition, then bubble that knowledge up to our caller. */ | |
96f9e25a | 4309 | if (integer_zerop (ops[bbinfo[idx].first_idx]->op)) |
8010f31f JL |
4310 | { |
4311 | gimple_cond_make_false (cond_stmt); | |
4312 | cfg_cleanup_needed = true; | |
4313 | } | |
96f9e25a | 4314 | else if (integer_onep (ops[bbinfo[idx].first_idx]->op)) |
8010f31f JL |
4315 | { |
4316 | gimple_cond_make_true (cond_stmt); | |
4317 | cfg_cleanup_needed = true; | |
4318 | } | |
96f9e25a JJ |
4319 | else |
4320 | { | |
538dd0b7 DM |
4321 | gimple_cond_set_code (cond_stmt, NE_EXPR); |
4322 | gimple_cond_set_lhs (cond_stmt, | |
4323 | ops[bbinfo[idx].first_idx]->op); | |
4324 | gimple_cond_set_rhs (cond_stmt, boolean_false_node); | |
96f9e25a | 4325 | } |
538dd0b7 | 4326 | update_stmt (cond_stmt); |
96f9e25a JJ |
4327 | } |
4328 | if (bb == first_bb) | |
4329 | break; | |
4330 | } | |
83b58b6b JJ |
4331 | |
4332 | /* The above changes could result in basic blocks after the first | |
4333 | modified one, up to and including last_bb, to be executed even if | |
4334 | they would not be in the original program. If the value ranges of | |
4335 | assignment lhs' in those bbs were dependent on the conditions | |
4336 | guarding those basic blocks which now can change, the VRs might | |
4337 | be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs | |
4338 | are only used within the same bb, it should be not a big deal if | |
4339 | we just reset all the VRs in those bbs. See PR68671. */ | |
4340 | for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++) | |
4341 | reset_flow_sensitive_info_in_bb (bb); | |
5e40da4f | 4342 | } |
8010f31f | 4343 | return cfg_cleanup_needed; |
d578e863 JJ |
4344 | } |
4345 | ||
0e0ed594 JL |
4346 | /* Return true if OPERAND is defined by a PHI node which uses the LHS |
4347 | of STMT in it's operands. This is also known as a "destructive | |
4348 | update" operation. */ | |
4349 | ||
4350 | static bool | |
355fe088 | 4351 | is_phi_for_stmt (gimple *stmt, tree operand) |
0e0ed594 | 4352 | { |
355fe088 | 4353 | gimple *def_stmt; |
538dd0b7 | 4354 | gphi *def_phi; |
726a989a | 4355 | tree lhs; |
0e0ed594 JL |
4356 | use_operand_p arg_p; |
4357 | ssa_op_iter i; | |
4358 | ||
4359 | if (TREE_CODE (operand) != SSA_NAME) | |
4360 | return false; | |
4361 | ||
726a989a RB |
4362 | lhs = gimple_assign_lhs (stmt); |
4363 | ||
0e0ed594 | 4364 | def_stmt = SSA_NAME_DEF_STMT (operand); |
538dd0b7 DM |
4365 | def_phi = dyn_cast <gphi *> (def_stmt); |
4366 | if (!def_phi) | |
0e0ed594 JL |
4367 | return false; |
4368 | ||
538dd0b7 | 4369 | FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE) |
0e0ed594 JL |
4370 | if (lhs == USE_FROM_PTR (arg_p)) |
4371 | return true; | |
4372 | return false; | |
4373 | } | |
4374 | ||
ec81df7d JJ |
4375 | /* Remove def stmt of VAR if VAR has zero uses and recurse |
4376 | on rhs1 operand if so. */ | |
4377 | ||
4378 | static void | |
4379 | remove_visited_stmt_chain (tree var) | |
4380 | { | |
355fe088 | 4381 | gimple *stmt; |
ec81df7d JJ |
4382 | gimple_stmt_iterator gsi; |
4383 | ||
4384 | while (1) | |
4385 | { | |
4386 | if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var)) | |
4387 | return; | |
4388 | stmt = SSA_NAME_DEF_STMT (var); | |
a6f8851e BS |
4389 | if (is_gimple_assign (stmt) && gimple_visited_p (stmt)) |
4390 | { | |
4391 | var = gimple_assign_rhs1 (stmt); | |
a6f8851e | 4392 | gsi = gsi_for_stmt (stmt); |
42fae17c | 4393 | reassoc_remove_stmt (&gsi); |
a6f8851e | 4394 | release_defs (stmt); |
a6f8851e BS |
4395 | } |
4396 | else | |
ec81df7d | 4397 | return; |
ec81df7d JJ |
4398 | } |
4399 | } | |
4400 | ||
df7b0cc4 EI |
4401 | /* This function checks three consequtive operands in |
4402 | passed operands vector OPS starting from OPINDEX and | |
4403 | swaps two operands if it is profitable for binary operation | |
4404 | consuming OPINDEX + 1 abnd OPINDEX + 2 operands. | |
4405 | ||
4406 | We pair ops with the same rank if possible. | |
4407 | ||
4408 | The alternative we try is to see if STMT is a destructive | |
4409 | update style statement, which is like: | |
4410 | b = phi (a, ...) | |
4411 | a = c + b; | |
4412 | In that case, we want to use the destructive update form to | |
4413 | expose the possible vectorizer sum reduction opportunity. | |
4414 | In that case, the third operand will be the phi node. This | |
4415 | check is not performed if STMT is null. | |
4416 | ||
4417 | We could, of course, try to be better as noted above, and do a | |
4418 | lot of work to try to find these opportunities in >3 operand | |
4419 | cases, but it is unlikely to be worth it. */ | |
4420 | ||
4421 | static void | |
526ceb68 | 4422 | swap_ops_for_binary_stmt (vec<operand_entry *> ops, |
355fe088 | 4423 | unsigned int opindex, gimple *stmt) |
df7b0cc4 | 4424 | { |
526ceb68 | 4425 | operand_entry *oe1, *oe2, *oe3; |
df7b0cc4 | 4426 | |
9771b263 DN |
4427 | oe1 = ops[opindex]; |
4428 | oe2 = ops[opindex + 1]; | |
4429 | oe3 = ops[opindex + 2]; | |
df7b0cc4 EI |
4430 | |
4431 | if ((oe1->rank == oe2->rank | |
4432 | && oe2->rank != oe3->rank) | |
4433 | || (stmt && is_phi_for_stmt (stmt, oe3->op) | |
4434 | && !is_phi_for_stmt (stmt, oe1->op) | |
4435 | && !is_phi_for_stmt (stmt, oe2->op))) | |
db5447ca | 4436 | std::swap (*oe1, *oe3); |
df7b0cc4 EI |
4437 | else if ((oe1->rank == oe3->rank |
4438 | && oe2->rank != oe3->rank) | |
4439 | || (stmt && is_phi_for_stmt (stmt, oe2->op) | |
4440 | && !is_phi_for_stmt (stmt, oe1->op) | |
4441 | && !is_phi_for_stmt (stmt, oe3->op))) | |
57509809 | 4442 | std::swap (*oe1, *oe2); |
933f507d ER |
4443 | } |
4444 | ||
5e40da4f JJ |
4445 | /* If definition of RHS1 or RHS2 dominates STMT, return the later of those |
4446 | two definitions, otherwise return STMT. */ | |
933f507d | 4447 | |
355fe088 TS |
4448 | static inline gimple * |
4449 | find_insert_point (gimple *stmt, tree rhs1, tree rhs2) | |
933f507d | 4450 | { |
5e40da4f JJ |
4451 | if (TREE_CODE (rhs1) == SSA_NAME |
4452 | && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1))) | |
4453 | stmt = SSA_NAME_DEF_STMT (rhs1); | |
4454 | if (TREE_CODE (rhs2) == SSA_NAME | |
4455 | && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2))) | |
4456 | stmt = SSA_NAME_DEF_STMT (rhs2); | |
4457 | return stmt; | |
933f507d ER |
4458 | } |
4459 | ||
5d476e35 KV |
4460 | /* If the stmt that defines operand has to be inserted, insert it |
4461 | before the use. */ | |
4462 | static void | |
4463 | insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert) | |
4464 | { | |
4465 | gcc_assert (is_gimple_assign (stmt_to_insert)); | |
4466 | tree rhs1 = gimple_assign_rhs1 (stmt_to_insert); | |
4467 | tree rhs2 = gimple_assign_rhs2 (stmt_to_insert); | |
4468 | gimple *insert_point = find_insert_point (stmt, rhs1, rhs2); | |
4469 | gimple_stmt_iterator gsi = gsi_for_stmt (insert_point); | |
4470 | gimple_set_uid (stmt_to_insert, gimple_uid (insert_point)); | |
4471 | ||
4472 | /* If the insert point is not stmt, then insert_point would be | |
4473 | the point where operand rhs1 or rhs2 is defined. In this case, | |
4474 | stmt_to_insert has to be inserted afterwards. This would | |
4475 | only happen when the stmt insertion point is flexible. */ | |
4476 | if (stmt == insert_point) | |
4477 | gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT); | |
4478 | else | |
4479 | insert_stmt_after (stmt_to_insert, insert_point); | |
4480 | } | |
4481 | ||
4482 | ||
0e0ed594 JL |
4483 | /* Recursively rewrite our linearized statements so that the operators |
4484 | match those in OPS[OPINDEX], putting the computation in rank | |
7d25ac20 JJ |
4485 | order. Return new lhs. |
4486 | CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in | |
4487 | the current stmt and during recursive invocations. | |
4488 | NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in | |
4489 | recursive invocations. */ | |
0e0ed594 | 4490 | |
5e40da4f | 4491 | static tree |
355fe088 | 4492 | rewrite_expr_tree (gimple *stmt, unsigned int opindex, |
7d25ac20 | 4493 | vec<operand_entry *> ops, bool changed, bool next_changed) |
0e0ed594 | 4494 | { |
726a989a RB |
4495 | tree rhs1 = gimple_assign_rhs1 (stmt); |
4496 | tree rhs2 = gimple_assign_rhs2 (stmt); | |
5e40da4f | 4497 | tree lhs = gimple_assign_lhs (stmt); |
526ceb68 | 4498 | operand_entry *oe; |
0e0ed594 | 4499 | |
0e0ed594 JL |
4500 | /* The final recursion case for this function is that you have |
4501 | exactly two operations left. | |
d288c0ab | 4502 | If we had exactly one op in the entire list to start with, we |
0e0ed594 JL |
4503 | would have never called this function, and the tail recursion |
4504 | rewrites them one at a time. */ | |
9771b263 | 4505 | if (opindex + 2 == ops.length ()) |
0e0ed594 | 4506 | { |
526ceb68 | 4507 | operand_entry *oe1, *oe2; |
0e0ed594 | 4508 | |
9771b263 DN |
4509 | oe1 = ops[opindex]; |
4510 | oe2 = ops[opindex + 1]; | |
0e0ed594 | 4511 | |
726a989a | 4512 | if (rhs1 != oe1->op || rhs2 != oe2->op) |
0e0ed594 | 4513 | { |
5e40da4f JJ |
4514 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
4515 | unsigned int uid = gimple_uid (stmt); | |
4516 | ||
0e0ed594 JL |
4517 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4518 | { | |
4519 | fprintf (dump_file, "Transforming "); | |
ef6cb4c7 | 4520 | print_gimple_stmt (dump_file, stmt, 0); |
0e0ed594 JL |
4521 | } |
4522 | ||
5d476e35 KV |
4523 | /* If the stmt that defines operand has to be inserted, insert it |
4524 | before the use. */ | |
4525 | if (oe1->stmt_to_insert) | |
4526 | insert_stmt_before_use (stmt, oe1->stmt_to_insert); | |
4527 | if (oe2->stmt_to_insert) | |
4528 | insert_stmt_before_use (stmt, oe2->stmt_to_insert); | |
d288c0ab JJ |
4529 | /* Even when changed is false, reassociation could have e.g. removed |
4530 | some redundant operations, so unless we are just swapping the | |
4531 | arguments or unless there is no change at all (then we just | |
4532 | return lhs), force creation of a new SSA_NAME. */ | |
4533 | if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex)) | |
5e40da4f | 4534 | { |
355fe088 TS |
4535 | gimple *insert_point |
4536 | = find_insert_point (stmt, oe1->op, oe2->op); | |
b731b390 | 4537 | lhs = make_ssa_name (TREE_TYPE (lhs)); |
5e40da4f | 4538 | stmt |
0d0e4a03 JJ |
4539 | = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt), |
4540 | oe1->op, oe2->op); | |
5e40da4f JJ |
4541 | gimple_set_uid (stmt, uid); |
4542 | gimple_set_visited (stmt, true); | |
4543 | if (insert_point == gsi_stmt (gsi)) | |
4544 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
4545 | else | |
4546 | insert_stmt_after (stmt, insert_point); | |
4547 | } | |
4548 | else | |
4549 | { | |
4550 | gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op) | |
4551 | == stmt); | |
4552 | gimple_assign_set_rhs1 (stmt, oe1->op); | |
4553 | gimple_assign_set_rhs2 (stmt, oe2->op); | |
4554 | update_stmt (stmt); | |
4555 | } | |
4556 | ||
ec81df7d JJ |
4557 | if (rhs1 != oe1->op && rhs1 != oe2->op) |
4558 | remove_visited_stmt_chain (rhs1); | |
0e0ed594 JL |
4559 | |
4560 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
4561 | { | |
4562 | fprintf (dump_file, " into "); | |
ef6cb4c7 | 4563 | print_gimple_stmt (dump_file, stmt, 0); |
0e0ed594 | 4564 | } |
0e0ed594 | 4565 | } |
5e40da4f | 4566 | return lhs; |
0e0ed594 JL |
4567 | } |
4568 | ||
4569 | /* If we hit here, we should have 3 or more ops left. */ | |
9771b263 | 4570 | gcc_assert (opindex + 2 < ops.length ()); |
0e0ed594 JL |
4571 | |
4572 | /* Rewrite the next operator. */ | |
9771b263 | 4573 | oe = ops[opindex]; |
0e0ed594 | 4574 | |
d2db36dd KV |
4575 | /* If the stmt that defines operand has to be inserted, insert it |
4576 | before the use. */ | |
4577 | if (oe->stmt_to_insert) | |
4578 | insert_stmt_before_use (stmt, oe->stmt_to_insert); | |
4579 | ||
5e40da4f JJ |
4580 | /* Recurse on the LHS of the binary operator, which is guaranteed to |
4581 | be the non-leaf side. */ | |
4582 | tree new_rhs1 | |
4583 | = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, | |
7d25ac20 JJ |
4584 | changed || oe->op != rhs2 || next_changed, |
4585 | false); | |
0e0ed594 | 4586 | |
5e40da4f JJ |
4587 | if (oe->op != rhs2 || new_rhs1 != rhs1) |
4588 | { | |
0e0ed594 JL |
4589 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4590 | { | |
4591 | fprintf (dump_file, "Transforming "); | |
ef6cb4c7 | 4592 | print_gimple_stmt (dump_file, stmt, 0); |
0e0ed594 JL |
4593 | } |
4594 | ||
5e40da4f JJ |
4595 | /* If changed is false, this is either opindex == 0 |
4596 | or all outer rhs2's were equal to corresponding oe->op, | |
4597 | and powi_result is NULL. | |
4598 | That means lhs is equivalent before and after reassociation. | |
4599 | Otherwise ensure the old lhs SSA_NAME is not reused and | |
4600 | create a new stmt as well, so that any debug stmts will be | |
4601 | properly adjusted. */ | |
4602 | if (changed) | |
4603 | { | |
4604 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); | |
4605 | unsigned int uid = gimple_uid (stmt); | |
355fe088 | 4606 | gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op); |
5e40da4f | 4607 | |
b731b390 | 4608 | lhs = make_ssa_name (TREE_TYPE (lhs)); |
0d0e4a03 JJ |
4609 | stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt), |
4610 | new_rhs1, oe->op); | |
5e40da4f JJ |
4611 | gimple_set_uid (stmt, uid); |
4612 | gimple_set_visited (stmt, true); | |
4613 | if (insert_point == gsi_stmt (gsi)) | |
4614 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
4615 | else | |
4616 | insert_stmt_after (stmt, insert_point); | |
4617 | } | |
4618 | else | |
4619 | { | |
4620 | gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op) | |
4621 | == stmt); | |
4622 | gimple_assign_set_rhs1 (stmt, new_rhs1); | |
4623 | gimple_assign_set_rhs2 (stmt, oe->op); | |
4624 | update_stmt (stmt); | |
4625 | } | |
0e0ed594 JL |
4626 | |
4627 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
4628 | { | |
4629 | fprintf (dump_file, " into "); | |
ef6cb4c7 | 4630 | print_gimple_stmt (dump_file, stmt, 0); |
0e0ed594 JL |
4631 | } |
4632 | } | |
5e40da4f | 4633 | return lhs; |
0e0ed594 JL |
4634 | } |
4635 | ||
df7b0cc4 EI |
4636 | /* Find out how many cycles we need to compute statements chain. |
4637 | OPS_NUM holds number os statements in a chain. CPU_WIDTH is a | |
4638 | maximum number of independent statements we may execute per cycle. */ | |
4639 | ||
4640 | static int | |
4641 | get_required_cycles (int ops_num, int cpu_width) | |
4642 | { | |
4643 | int res; | |
4644 | int elog; | |
4645 | unsigned int rest; | |
4646 | ||
4647 | /* While we have more than 2 * cpu_width operands | |
4648 | we may reduce number of operands by cpu_width | |
4649 | per cycle. */ | |
4650 | res = ops_num / (2 * cpu_width); | |
4651 | ||
4652 | /* Remained operands count may be reduced twice per cycle | |
4653 | until we have only one operand. */ | |
4654 | rest = (unsigned)(ops_num - res * cpu_width); | |
4655 | elog = exact_log2 (rest); | |
4656 | if (elog >= 0) | |
4657 | res += elog; | |
4658 | else | |
4659 | res += floor_log2 (rest) + 1; | |
4660 | ||
4661 | return res; | |
4662 | } | |
4663 | ||
4664 | /* Returns an optimal number of registers to use for computation of | |
4665 | given statements. */ | |
4666 | ||
4667 | static int | |
4668 | get_reassociation_width (int ops_num, enum tree_code opc, | |
ef4bddc2 | 4669 | machine_mode mode) |
df7b0cc4 EI |
4670 | { |
4671 | int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH); | |
4672 | int width; | |
4673 | int width_min; | |
4674 | int cycles_best; | |
4675 | ||
4676 | if (param_width > 0) | |
4677 | width = param_width; | |
4678 | else | |
4679 | width = targetm.sched.reassociation_width (opc, mode); | |
4680 | ||
4681 | if (width == 1) | |
4682 | return width; | |
4683 | ||
4684 | /* Get the minimal time required for sequence computation. */ | |
4685 | cycles_best = get_required_cycles (ops_num, width); | |
4686 | ||
4687 | /* Check if we may use less width and still compute sequence for | |
4688 | the same time. It will allow us to reduce registers usage. | |
4689 | get_required_cycles is monotonically increasing with lower width | |
4690 | so we can perform a binary search for the minimal width that still | |
4691 | results in the optimal cycle count. */ | |
4692 | width_min = 1; | |
4693 | while (width > width_min) | |
4694 | { | |
4695 | int width_mid = (width + width_min) / 2; | |
4696 | ||
4697 | if (get_required_cycles (ops_num, width_mid) == cycles_best) | |
4698 | width = width_mid; | |
4699 | else if (width_min < width_mid) | |
4700 | width_min = width_mid; | |
4701 | else | |
4702 | break; | |
4703 | } | |
4704 | ||
4705 | return width; | |
4706 | } | |
4707 | ||
4708 | /* Recursively rewrite our linearized statements so that the operators | |
4709 | match those in OPS[OPINDEX], putting the computation in rank | |
4710 | order and trying to allow operations to be executed in | |
4711 | parallel. */ | |
4712 | ||
4713 | static void | |
538dd0b7 | 4714 | rewrite_expr_tree_parallel (gassign *stmt, int width, |
526ceb68 | 4715 | vec<operand_entry *> ops) |
df7b0cc4 EI |
4716 | { |
4717 | enum tree_code opcode = gimple_assign_rhs_code (stmt); | |
9771b263 | 4718 | int op_num = ops.length (); |
ad804024 | 4719 | gcc_assert (op_num > 0); |
df7b0cc4 | 4720 | int stmt_num = op_num - 1; |
355fe088 | 4721 | gimple **stmts = XALLOCAVEC (gimple *, stmt_num); |
df7b0cc4 EI |
4722 | int op_index = op_num - 1; |
4723 | int stmt_index = 0; | |
4724 | int ready_stmts_end = 0; | |
4725 | int i = 0; | |
d2db36dd | 4726 | gimple *stmt1 = NULL, *stmt2 = NULL; |
df7b0cc4 | 4727 | tree last_rhs1 = gimple_assign_rhs1 (stmt); |
df7b0cc4 EI |
4728 | |
4729 | /* We start expression rewriting from the top statements. | |
4730 | So, in this loop we create a full list of statements | |
4731 | we will work with. */ | |
4732 | stmts[stmt_num - 1] = stmt; | |
4733 | for (i = stmt_num - 2; i >= 0; i--) | |
4734 | stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1])); | |
4735 | ||
df7b0cc4 EI |
4736 | for (i = 0; i < stmt_num; i++) |
4737 | { | |
4738 | tree op1, op2; | |
4739 | ||
4740 | /* Determine whether we should use results of | |
4741 | already handled statements or not. */ | |
4742 | if (ready_stmts_end == 0 | |
4743 | && (i - stmt_index >= width || op_index < 1)) | |
4744 | ready_stmts_end = i; | |
4745 | ||
4746 | /* Now we choose operands for the next statement. Non zero | |
4747 | value in ready_stmts_end means here that we should use | |
4748 | the result of already generated statements as new operand. */ | |
4749 | if (ready_stmts_end > 0) | |
4750 | { | |
4751 | op1 = gimple_assign_lhs (stmts[stmt_index++]); | |
4752 | if (ready_stmts_end > stmt_index) | |
4753 | op2 = gimple_assign_lhs (stmts[stmt_index++]); | |
4754 | else if (op_index >= 0) | |
d2db36dd KV |
4755 | { |
4756 | operand_entry *oe = ops[op_index--]; | |
4757 | stmt2 = oe->stmt_to_insert; | |
4758 | op2 = oe->op; | |
4759 | } | |
df7b0cc4 EI |
4760 | else |
4761 | { | |
4762 | gcc_assert (stmt_index < i); | |
4763 | op2 = gimple_assign_lhs (stmts[stmt_index++]); | |
4764 | } | |
4765 | ||
4766 | if (stmt_index >= ready_stmts_end) | |
4767 | ready_stmts_end = 0; | |
4768 | } | |
4769 | else | |
4770 | { | |
4771 | if (op_index > 1) | |
4772 | swap_ops_for_binary_stmt (ops, op_index - 2, NULL); | |
d2db36dd KV |
4773 | operand_entry *oe2 = ops[op_index--]; |
4774 | operand_entry *oe1 = ops[op_index--]; | |
4775 | op2 = oe2->op; | |
4776 | stmt2 = oe2->stmt_to_insert; | |
4777 | op1 = oe1->op; | |
4778 | stmt1 = oe1->stmt_to_insert; | |
df7b0cc4 EI |
4779 | } |
4780 | ||
4781 | /* If we emit the last statement then we should put | |
4782 | operands into the last statement. It will also | |
4783 | break the loop. */ | |
4784 | if (op_index < 0 && stmt_index == i) | |
4785 | i = stmt_num - 1; | |
4786 | ||
4787 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
4788 | { | |
4789 | fprintf (dump_file, "Transforming "); | |
ef6cb4c7 | 4790 | print_gimple_stmt (dump_file, stmts[i], 0); |
df7b0cc4 EI |
4791 | } |
4792 | ||
5d476e35 KV |
4793 | /* If the stmt that defines operand has to be inserted, insert it |
4794 | before the use. */ | |
4795 | if (stmt1) | |
4796 | insert_stmt_before_use (stmts[i], stmt1); | |
4797 | if (stmt2) | |
4798 | insert_stmt_before_use (stmts[i], stmt2); | |
4799 | stmt1 = stmt2 = NULL; | |
4800 | ||
df7b0cc4 EI |
4801 | /* We keep original statement only for the last one. All |
4802 | others are recreated. */ | |
4803 | if (i == stmt_num - 1) | |
4804 | { | |
4805 | gimple_assign_set_rhs1 (stmts[i], op1); | |
4806 | gimple_assign_set_rhs2 (stmts[i], op2); | |
4807 | update_stmt (stmts[i]); | |
4808 | } | |
4809 | else | |
ca6f60bc KV |
4810 | { |
4811 | stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode); | |
8d5558c5 | 4812 | gimple_set_visited (stmts[i], true); |
ca6f60bc | 4813 | } |
df7b0cc4 EI |
4814 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4815 | { | |
4816 | fprintf (dump_file, " into "); | |
ef6cb4c7 | 4817 | print_gimple_stmt (dump_file, stmts[i], 0); |
df7b0cc4 EI |
4818 | } |
4819 | } | |
4820 | ||
4821 | remove_visited_stmt_chain (last_rhs1); | |
4822 | } | |
4823 | ||
0e0ed594 JL |
4824 | /* Transform STMT, which is really (A +B) + (C + D) into the left |
4825 | linear form, ((A+B)+C)+D. | |
4826 | Recurse on D if necessary. */ | |
4827 | ||
4828 | static void | |
355fe088 | 4829 | linearize_expr (gimple *stmt) |
0e0ed594 | 4830 | { |
5e40da4f | 4831 | gimple_stmt_iterator gsi; |
355fe088 TS |
4832 | gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); |
4833 | gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); | |
4834 | gimple *oldbinrhs = binrhs; | |
726a989a | 4835 | enum tree_code rhscode = gimple_assign_rhs_code (stmt); |
355fe088 | 4836 | gimple *newbinrhs = NULL; |
99b1c316 | 4837 | class loop *loop = loop_containing_stmt (stmt); |
5e40da4f | 4838 | tree lhs = gimple_assign_lhs (stmt); |
0e0ed594 | 4839 | |
726a989a RB |
4840 | gcc_assert (is_reassociable_op (binlhs, rhscode, loop) |
4841 | && is_reassociable_op (binrhs, rhscode, loop)); | |
4842 | ||
5e40da4f | 4843 | gsi = gsi_for_stmt (stmt); |
0e0ed594 | 4844 | |
726a989a | 4845 | gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs)); |
0d0e4a03 JJ |
4846 | binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)), |
4847 | gimple_assign_rhs_code (binrhs), | |
4848 | gimple_assign_lhs (binlhs), | |
4849 | gimple_assign_rhs2 (binrhs)); | |
726a989a | 4850 | gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs)); |
5e40da4f JJ |
4851 | gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT); |
4852 | gimple_set_uid (binrhs, gimple_uid (stmt)); | |
0e0ed594 | 4853 | |
726a989a RB |
4854 | if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) |
4855 | newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); | |
0e0ed594 JL |
4856 | |
4857 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
4858 | { | |
4859 | fprintf (dump_file, "Linearized: "); | |
ef6cb4c7 | 4860 | print_gimple_stmt (dump_file, stmt, 0); |
0e0ed594 JL |
4861 | } |
4862 | ||
4863 | reassociate_stats.linearized++; | |
0e0ed594 | 4864 | update_stmt (stmt); |
726a989a | 4865 | |
5e40da4f | 4866 | gsi = gsi_for_stmt (oldbinrhs); |
42fae17c | 4867 | reassoc_remove_stmt (&gsi); |
5e40da4f JJ |
4868 | release_defs (oldbinrhs); |
4869 | ||
726a989a RB |
4870 | gimple_set_visited (stmt, true); |
4871 | gimple_set_visited (binlhs, true); | |
4872 | gimple_set_visited (binrhs, true); | |
0e0ed594 JL |
4873 | |
4874 | /* Tail recurse on the new rhs if it still needs reassociation. */ | |
7a9c7d01 | 4875 | if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop)) |
726a989a RB |
4876 | /* ??? This should probably be linearize_expr (newbinrhs) but I don't |
4877 | want to change the algorithm while converting to tuples. */ | |
0e0ed594 | 4878 | linearize_expr (stmt); |
0e0ed594 JL |
4879 | } |
4880 | ||
726a989a | 4881 | /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return |
0e0ed594 JL |
4882 | it. Otherwise, return NULL. */ |
4883 | ||
355fe088 | 4884 | static gimple * |
0e0ed594 JL |
4885 | get_single_immediate_use (tree lhs) |
4886 | { | |
4887 | use_operand_p immuse; | |
355fe088 | 4888 | gimple *immusestmt; |
0e0ed594 JL |
4889 | |
4890 | if (TREE_CODE (lhs) == SSA_NAME | |
726a989a RB |
4891 | && single_imm_use (lhs, &immuse, &immusestmt) |
4892 | && is_gimple_assign (immusestmt)) | |
4893 | return immusestmt; | |
4894 | ||
4895 | return NULL; | |
0e0ed594 | 4896 | } |
0e0ed594 | 4897 | |
0e0ed594 JL |
4898 | /* Recursively negate the value of TONEGATE, and return the SSA_NAME |
4899 | representing the negated value. Insertions of any necessary | |
726a989a | 4900 | instructions go before GSI. |
0e0ed594 JL |
4901 | This function is recursive in that, if you hand it "a_5" as the |
4902 | value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will | |
4903 | transform b_3 + b_4 into a_5 = -b_3 + -b_4. */ | |
4904 | ||
4905 | static tree | |
5e40da4f | 4906 | negate_value (tree tonegate, gimple_stmt_iterator *gsip) |
0e0ed594 | 4907 | { |
355fe088 | 4908 | gimple *negatedefstmt = NULL; |
0e0ed594 | 4909 | tree resultofnegate; |
5e40da4f JJ |
4910 | gimple_stmt_iterator gsi; |
4911 | unsigned int uid; | |
0e0ed594 | 4912 | |
0e0ed594 JL |
4913 | /* If we are trying to negate a name, defined by an add, negate the |
4914 | add operands instead. */ | |
726a989a RB |
4915 | if (TREE_CODE (tonegate) == SSA_NAME) |
4916 | negatedefstmt = SSA_NAME_DEF_STMT (tonegate); | |
0e0ed594 | 4917 | if (TREE_CODE (tonegate) == SSA_NAME |
726a989a RB |
4918 | && is_gimple_assign (negatedefstmt) |
4919 | && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME | |
4920 | && has_single_use (gimple_assign_lhs (negatedefstmt)) | |
4921 | && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR) | |
0e0ed594 | 4922 | { |
726a989a RB |
4923 | tree rhs1 = gimple_assign_rhs1 (negatedefstmt); |
4924 | tree rhs2 = gimple_assign_rhs2 (negatedefstmt); | |
5e40da4f | 4925 | tree lhs = gimple_assign_lhs (negatedefstmt); |
355fe088 | 4926 | gimple *g; |
726a989a RB |
4927 | |
4928 | gsi = gsi_for_stmt (negatedefstmt); | |
4929 | rhs1 = negate_value (rhs1, &gsi); | |
726a989a RB |
4930 | |
4931 | gsi = gsi_for_stmt (negatedefstmt); | |
4932 | rhs2 = negate_value (rhs2, &gsi); | |
726a989a | 4933 | |
5e40da4f | 4934 | gsi = gsi_for_stmt (negatedefstmt); |
b731b390 | 4935 | lhs = make_ssa_name (TREE_TYPE (lhs)); |
5e40da4f | 4936 | gimple_set_visited (negatedefstmt, true); |
0d0e4a03 | 4937 | g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2); |
5e40da4f JJ |
4938 | gimple_set_uid (g, gimple_uid (negatedefstmt)); |
4939 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); | |
4940 | return lhs; | |
0e0ed594 JL |
4941 | } |
4942 | ||
4943 | tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate); | |
5e40da4f | 4944 | resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true, |
726a989a | 4945 | NULL_TREE, true, GSI_SAME_STMT); |
5e40da4f JJ |
4946 | gsi = *gsip; |
4947 | uid = gimple_uid (gsi_stmt (gsi)); | |
4948 | for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi)) | |
4949 | { | |
355fe088 | 4950 | gimple *stmt = gsi_stmt (gsi); |
5e40da4f JJ |
4951 | if (gimple_uid (stmt) != 0) |
4952 | break; | |
4953 | gimple_set_uid (stmt, uid); | |
4954 | } | |
0e0ed594 | 4955 | return resultofnegate; |
0e0ed594 JL |
4956 | } |
4957 | ||
4958 | /* Return true if we should break up the subtract in STMT into an add | |
4959 | with negate. This is true when we the subtract operands are really | |
4960 | adds, or the subtract itself is used in an add expression. In | |
4961 | either case, breaking up the subtract into an add with negate | |
4962 | exposes the adds to reassociation. */ | |
4963 | ||
4964 | static bool | |
355fe088 | 4965 | should_break_up_subtract (gimple *stmt) |
0e0ed594 | 4966 | { |
726a989a RB |
4967 | tree lhs = gimple_assign_lhs (stmt); |
4968 | tree binlhs = gimple_assign_rhs1 (stmt); | |
4969 | tree binrhs = gimple_assign_rhs2 (stmt); | |
355fe088 | 4970 | gimple *immusestmt; |
99b1c316 | 4971 | class loop *loop = loop_containing_stmt (stmt); |
0e0ed594 JL |
4972 | |
4973 | if (TREE_CODE (binlhs) == SSA_NAME | |
7a9c7d01 | 4974 | && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) |
0e0ed594 JL |
4975 | return true; |
4976 | ||
4977 | if (TREE_CODE (binrhs) == SSA_NAME | |
7a9c7d01 | 4978 | && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop)) |
0e0ed594 JL |
4979 | return true; |
4980 | ||
4981 | if (TREE_CODE (lhs) == SSA_NAME | |
4982 | && (immusestmt = get_single_immediate_use (lhs)) | |
726a989a | 4983 | && is_gimple_assign (immusestmt) |
25c6036a | 4984 | && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR |
297db279 RB |
4985 | || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR |
4986 | && gimple_assign_rhs1 (immusestmt) == lhs) | |
4987 | || gimple_assign_rhs_code (immusestmt) == MULT_EXPR)) | |
0e0ed594 JL |
4988 | return true; |
4989 | return false; | |
0e0ed594 JL |
4990 | } |
4991 | ||
4992 | /* Transform STMT from A - B into A + -B. */ | |
4993 | ||
4994 | static void | |
355fe088 | 4995 | break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip) |
0e0ed594 | 4996 | { |
726a989a RB |
4997 | tree rhs1 = gimple_assign_rhs1 (stmt); |
4998 | tree rhs2 = gimple_assign_rhs2 (stmt); | |
0e0ed594 JL |
4999 | |
5000 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
5001 | { | |
5002 | fprintf (dump_file, "Breaking up subtract "); | |
ef6cb4c7 | 5003 | print_gimple_stmt (dump_file, stmt, 0); |
0e0ed594 JL |
5004 | } |
5005 | ||
726a989a RB |
5006 | rhs2 = negate_value (rhs2, gsip); |
5007 | gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2); | |
0e0ed594 JL |
5008 | update_stmt (stmt); |
5009 | } | |
5010 | ||
a6f8851e BS |
5011 | /* Determine whether STMT is a builtin call that raises an SSA name |
5012 | to an integer power and has only one use. If so, and this is early | |
5013 | reassociation and unsafe math optimizations are permitted, place | |
5014 | the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE. | |
5015 | If any of these conditions does not hold, return FALSE. */ | |
5016 | ||
5017 | static bool | |
18b54004 | 5018 | acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent) |
a6f8851e | 5019 | { |
314709cd | 5020 | tree arg1; |
a6f8851e BS |
5021 | REAL_VALUE_TYPE c, cint; |
5022 | ||
314709cd | 5023 | switch (gimple_call_combined_fn (stmt)) |
a6f8851e | 5024 | { |
314709cd | 5025 | CASE_CFN_POW: |
c293be1a BS |
5026 | if (flag_errno_math) |
5027 | return false; | |
5028 | ||
a6f8851e BS |
5029 | *base = gimple_call_arg (stmt, 0); |
5030 | arg1 = gimple_call_arg (stmt, 1); | |
5031 | ||
5032 | if (TREE_CODE (arg1) != REAL_CST) | |
5033 | return false; | |
5034 | ||
5035 | c = TREE_REAL_CST (arg1); | |
5036 | ||
5037 | if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT) | |
5038 | return false; | |
5039 | ||
5040 | *exponent = real_to_integer (&c); | |
807e902e | 5041 | real_from_integer (&cint, VOIDmode, *exponent, SIGNED); |
a6f8851e BS |
5042 | if (!real_identical (&c, &cint)) |
5043 | return false; | |
5044 | ||
5045 | break; | |
5046 | ||
314709cd | 5047 | CASE_CFN_POWI: |
a6f8851e BS |
5048 | *base = gimple_call_arg (stmt, 0); |
5049 | arg1 = gimple_call_arg (stmt, 1); | |
5050 | ||
9541ffee | 5051 | if (!tree_fits_shwi_p (arg1)) |
a6f8851e BS |
5052 | return false; |
5053 | ||
eb1ce453 | 5054 | *exponent = tree_to_shwi (arg1); |
a6f8851e BS |
5055 | break; |
5056 | ||
5057 | default: | |
5058 | return false; | |
5059 | } | |
5060 | ||
5061 | /* Expanding negative exponents is generally unproductive, so we don't | |
5062 | complicate matters with those. Exponents of zero and one should | |
5063 | have been handled by expression folding. */ | |
5064 | if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME) | |
5065 | return false; | |
5066 | ||
5067 | return true; | |
5068 | } | |
5069 | ||
8a85cee2 KV |
5070 | /* Try to derive and add operand entry for OP to *OPS. Return false if |
5071 | unsuccessful. */ | |
5072 | ||
5073 | static bool | |
5074 | try_special_add_to_ops (vec<operand_entry *> *ops, | |
5075 | enum tree_code code, | |
5076 | tree op, gimple* def_stmt) | |
5077 | { | |
5078 | tree base = NULL_TREE; | |
5079 | HOST_WIDE_INT exponent = 0; | |
5080 | ||
18b54004 RB |
5081 | if (TREE_CODE (op) != SSA_NAME |
5082 | || ! has_single_use (op)) | |
8a85cee2 KV |
5083 | return false; |
5084 | ||
5085 | if (code == MULT_EXPR | |
18b54004 RB |
5086 | && reassoc_insert_powi_p |
5087 | && flag_unsafe_math_optimizations | |
5088 | && is_gimple_call (def_stmt) | |
5089 | && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent)) | |
8a85cee2 KV |
5090 | { |
5091 | add_repeat_to_ops_vec (ops, base, exponent); | |
5092 | gimple_set_visited (def_stmt, true); | |
5093 | return true; | |
5094 | } | |
5095 | else if (code == MULT_EXPR | |
5096 | && is_gimple_assign (def_stmt) | |
5097 | && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR | |
5098 | && !HONOR_SNANS (TREE_TYPE (op)) | |
5099 | && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op)) | |
5100 | || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))) | |
5101 | { | |
5102 | tree rhs1 = gimple_assign_rhs1 (def_stmt); | |
5103 | tree cst = build_minus_one_cst (TREE_TYPE (op)); | |
5104 | add_to_ops_vec (ops, rhs1); | |
5105 | add_to_ops_vec (ops, cst); | |
5106 | gimple_set_visited (def_stmt, true); | |
5107 | return true; | |
5108 | } | |
5109 | ||
5110 | return false; | |
5111 | } | |
5112 | ||
0e0ed594 JL |
5113 | /* Recursively linearize a binary expression that is the RHS of STMT. |
5114 | Place the operands of the expression tree in the vector named OPS. */ | |
5115 | ||
5116 | static void | |
526ceb68 | 5117 | linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt, |
25c6036a | 5118 | bool is_associative, bool set_visited) |
0e0ed594 | 5119 | { |
726a989a RB |
5120 | tree binlhs = gimple_assign_rhs1 (stmt); |
5121 | tree binrhs = gimple_assign_rhs2 (stmt); | |
355fe088 | 5122 | gimple *binlhsdef = NULL, *binrhsdef = NULL; |
0e0ed594 JL |
5123 | bool binlhsisreassoc = false; |
5124 | bool binrhsisreassoc = false; | |
726a989a | 5125 | enum tree_code rhscode = gimple_assign_rhs_code (stmt); |
99b1c316 | 5126 | class loop *loop = loop_containing_stmt (stmt); |
0e0ed594 | 5127 | |
25c6036a RG |
5128 | if (set_visited) |
5129 | gimple_set_visited (stmt, true); | |
0e0ed594 JL |
5130 | |
5131 | if (TREE_CODE (binlhs) == SSA_NAME) | |
5132 | { | |
5133 | binlhsdef = SSA_NAME_DEF_STMT (binlhs); | |
6b03de57 | 5134 | binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop) |
36bbc05d | 5135 | && !stmt_could_throw_p (cfun, binlhsdef)); |
0e0ed594 JL |
5136 | } |
5137 | ||
5138 | if (TREE_CODE (binrhs) == SSA_NAME) | |
5139 | { | |
5140 | binrhsdef = SSA_NAME_DEF_STMT (binrhs); | |
6b03de57 | 5141 | binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop) |
36bbc05d | 5142 | && !stmt_could_throw_p (cfun, binrhsdef)); |
0e0ed594 JL |
5143 | } |
5144 | ||
5145 | /* If the LHS is not reassociable, but the RHS is, we need to swap | |
5146 | them. If neither is reassociable, there is nothing we can do, so | |
5147 | just put them in the ops vector. If the LHS is reassociable, | |
5148 | linearize it. If both are reassociable, then linearize the RHS | |
5149 | and the LHS. */ | |
5150 | ||
5151 | if (!binlhsisreassoc) | |
5152 | { | |
25c6036a RG |
5153 | /* If this is not a associative operation like division, give up. */ |
5154 | if (!is_associative) | |
5155 | { | |
5156 | add_to_ops_vec (ops, binrhs); | |
5157 | return; | |
5158 | } | |
5159 | ||
0e0ed594 JL |
5160 | if (!binrhsisreassoc) |
5161 | { | |
8a85cee2 | 5162 | if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef)) |
a6f8851e BS |
5163 | add_to_ops_vec (ops, binrhs); |
5164 | ||
8a85cee2 | 5165 | if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef)) |
a6f8851e BS |
5166 | add_to_ops_vec (ops, binlhs); |
5167 | ||
0e0ed594 JL |
5168 | return; |
5169 | } | |
5170 | ||
5171 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
5172 | { | |
5173 | fprintf (dump_file, "swapping operands of "); | |
ef6cb4c7 | 5174 | print_gimple_stmt (dump_file, stmt, 0); |
0e0ed594 JL |
5175 | } |
5176 | ||
80560f95 AM |
5177 | swap_ssa_operands (stmt, |
5178 | gimple_assign_rhs1_ptr (stmt), | |
5179 | gimple_assign_rhs2_ptr (stmt)); | |
0e0ed594 JL |
5180 | update_stmt (stmt); |
5181 | ||
5182 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
5183 | { | |
5184 | fprintf (dump_file, " is now "); | |
ef6cb4c7 | 5185 | print_gimple_stmt (dump_file, stmt, 0); |
0e0ed594 JL |
5186 | } |
5187 | ||
5188 | /* We want to make it so the lhs is always the reassociative op, | |
5189 | so swap. */ | |
fab27f52 | 5190 | std::swap (binlhs, binrhs); |
0e0ed594 JL |
5191 | } |
5192 | else if (binrhsisreassoc) | |
5193 | { | |
5194 | linearize_expr (stmt); | |
726a989a RB |
5195 | binlhs = gimple_assign_rhs1 (stmt); |
5196 | binrhs = gimple_assign_rhs2 (stmt); | |
0e0ed594 JL |
5197 | } |
5198 | ||
5199 | gcc_assert (TREE_CODE (binrhs) != SSA_NAME | |
7a9c7d01 ZD |
5200 | || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), |
5201 | rhscode, loop)); | |
25c6036a RG |
5202 | linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs), |
5203 | is_associative, set_visited); | |
a6f8851e | 5204 | |
8a85cee2 | 5205 | if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef)) |
a6f8851e | 5206 | add_to_ops_vec (ops, binrhs); |
0e0ed594 JL |
5207 | } |
5208 | ||
5209 | /* Repropagate the negates back into subtracts, since no other pass | |
5210 | currently does it. */ | |
5211 | ||
5212 | static void | |
5213 | repropagate_negates (void) | |
5214 | { | |
5215 | unsigned int i = 0; | |
5216 | tree negate; | |
5217 | ||
9771b263 | 5218 | FOR_EACH_VEC_ELT (plus_negates, i, negate) |
0e0ed594 | 5219 | { |
355fe088 | 5220 | gimple *user = get_single_immediate_use (negate); |
0e0ed594 | 5221 | |
143597ff MM |
5222 | if (!user || !is_gimple_assign (user)) |
5223 | continue; | |
5224 | ||
0e0ed594 JL |
5225 | /* The negate operand can be either operand of a PLUS_EXPR |
5226 | (it can be the LHS if the RHS is a constant for example). | |
5227 | ||
5228 | Force the negate operand to the RHS of the PLUS_EXPR, then | |
5229 | transform the PLUS_EXPR into a MINUS_EXPR. */ | |
143597ff | 5230 | if (gimple_assign_rhs_code (user) == PLUS_EXPR) |
0e0ed594 | 5231 | { |
0e0ed594 JL |
5232 | /* If the negated operand appears on the LHS of the |
5233 | PLUS_EXPR, exchange the operands of the PLUS_EXPR | |
5234 | to force the negated operand to the RHS of the PLUS_EXPR. */ | |
726a989a | 5235 | if (gimple_assign_rhs1 (user) == negate) |
0e0ed594 | 5236 | { |
80560f95 AM |
5237 | swap_ssa_operands (user, |
5238 | gimple_assign_rhs1_ptr (user), | |
5239 | gimple_assign_rhs2_ptr (user)); | |
0e0ed594 JL |
5240 | } |
5241 | ||
5242 | /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace | |
5243 | the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */ | |
726a989a | 5244 | if (gimple_assign_rhs2 (user) == negate) |
0e0ed594 | 5245 | { |
726a989a | 5246 | tree rhs1 = gimple_assign_rhs1 (user); |
ce40915e | 5247 | tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate)); |
726a989a RB |
5248 | gimple_stmt_iterator gsi = gsi_for_stmt (user); |
5249 | gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2); | |
0e0ed594 JL |
5250 | update_stmt (user); |
5251 | } | |
5252 | } | |
143597ff MM |
5253 | else if (gimple_assign_rhs_code (user) == MINUS_EXPR) |
5254 | { | |
5255 | if (gimple_assign_rhs1 (user) == negate) | |
5256 | { | |
5257 | /* We have | |
5258 | x = -a | |
5259 | y = x - b | |
5260 | which we transform into | |
5261 | x = a + b | |
5262 | y = -x . | |
5263 | This pushes down the negate which we possibly can merge | |
5264 | into some other operation, hence insert it into the | |
5265 | plus_negates vector. */ | |
355fe088 | 5266 | gimple *feed = SSA_NAME_DEF_STMT (negate); |
143597ff | 5267 | tree a = gimple_assign_rhs1 (feed); |
5e40da4f JJ |
5268 | tree b = gimple_assign_rhs2 (user); |
5269 | gimple_stmt_iterator gsi = gsi_for_stmt (feed); | |
5270 | gimple_stmt_iterator gsi2 = gsi_for_stmt (user); | |
b731b390 | 5271 | tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed))); |
355fe088 | 5272 | gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b); |
5e40da4f | 5273 | gsi_insert_before (&gsi2, g, GSI_SAME_STMT); |
00d66391 | 5274 | gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x); |
5e40da4f JJ |
5275 | user = gsi_stmt (gsi2); |
5276 | update_stmt (user); | |
42fae17c | 5277 | reassoc_remove_stmt (&gsi); |
5e40da4f JJ |
5278 | release_defs (feed); |
5279 | plus_negates.safe_push (gimple_assign_lhs (user)); | |
143597ff MM |
5280 | } |
5281 | else | |
5282 | { | |
5283 | /* Transform "x = -a; y = b - x" into "y = b + a", getting | |
5284 | rid of one operation. */ | |
355fe088 | 5285 | gimple *feed = SSA_NAME_DEF_STMT (negate); |
143597ff MM |
5286 | tree a = gimple_assign_rhs1 (feed); |
5287 | tree rhs1 = gimple_assign_rhs1 (user); | |
5288 | gimple_stmt_iterator gsi = gsi_for_stmt (user); | |
5289 | gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a); | |
5290 | update_stmt (gsi_stmt (gsi)); | |
5291 | } | |
5292 | } | |
0e0ed594 JL |
5293 | } |
5294 | } | |
5295 | ||
143597ff MM |
5296 | /* Returns true if OP is of a type for which we can do reassociation. |
5297 | That is for integral or non-saturating fixed-point types, and for | |
5298 | floating point type when associative-math is enabled. */ | |
5299 | ||
5300 | static bool | |
5301 | can_reassociate_p (tree op) | |
5302 | { | |
5303 | tree type = TREE_TYPE (op); | |
6139a3b7 JJ |
5304 | if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op)) |
5305 | return false; | |
51d4212a | 5306 | if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)) |
143597ff | 5307 | || NON_SAT_FIXED_POINT_TYPE_P (type) |
8a9ecffd | 5308 | || (flag_associative_math && FLOAT_TYPE_P (type))) |
143597ff MM |
5309 | return true; |
5310 | return false; | |
5311 | } | |
5312 | ||
0e0ed594 JL |
5313 | /* Break up subtract operations in block BB. |
5314 | ||
5315 | We do this top down because we don't know whether the subtract is | |
5316 | part of a possible chain of reassociation except at the top. | |
b8698a0f | 5317 | |
0e0ed594 JL |
5318 | IE given |
5319 | d = f + g | |
5320 | c = a + e | |
5321 | b = c - d | |
5322 | q = b - r | |
5323 | k = t - q | |
b8698a0f | 5324 | |
0e0ed594 | 5325 | we want to break up k = t - q, but we won't until we've transformed q |
726a989a RB |
5326 | = b - r, which won't be broken up until we transform b = c - d. |
5327 | ||
5e40da4f JJ |
5328 | En passant, clear the GIMPLE visited flag on every statement |
5329 | and set UIDs within each basic block. */ | |
0e0ed594 JL |
5330 | |
5331 | static void | |
5332 | break_up_subtract_bb (basic_block bb) | |
5333 | { | |
726a989a | 5334 | gimple_stmt_iterator gsi; |
0e0ed594 | 5335 | basic_block son; |
5e40da4f | 5336 | unsigned int uid = 1; |
0e0ed594 | 5337 | |
726a989a | 5338 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
0e0ed594 | 5339 | { |
355fe088 | 5340 | gimple *stmt = gsi_stmt (gsi); |
726a989a | 5341 | gimple_set_visited (stmt, false); |
5e40da4f | 5342 | gimple_set_uid (stmt, uid++); |
0e0ed594 | 5343 | |
143597ff MM |
5344 | if (!is_gimple_assign (stmt) |
5345 | || !can_reassociate_p (gimple_assign_lhs (stmt))) | |
5346 | continue; | |
5347 | ||
726a989a | 5348 | /* Look for simple gimple subtract operations. */ |
143597ff | 5349 | if (gimple_assign_rhs_code (stmt) == MINUS_EXPR) |
0e0ed594 | 5350 | { |
143597ff MM |
5351 | if (!can_reassociate_p (gimple_assign_rhs1 (stmt)) |
5352 | || !can_reassociate_p (gimple_assign_rhs2 (stmt))) | |
0e0ed594 JL |
5353 | continue; |
5354 | ||
5355 | /* Check for a subtract used only in an addition. If this | |
5356 | is the case, transform it into add of a negate for better | |
5357 | reassociation. IE transform C = A-B into C = A + -B if C | |
5358 | is only used in an addition. */ | |
726a989a RB |
5359 | if (should_break_up_subtract (stmt)) |
5360 | break_up_subtract (stmt, &gsi); | |
0e0ed594 | 5361 | } |
143597ff MM |
5362 | else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR |
5363 | && can_reassociate_p (gimple_assign_rhs1 (stmt))) | |
9771b263 | 5364 | plus_negates.safe_push (gimple_assign_lhs (stmt)); |
0e0ed594 JL |
5365 | } |
5366 | for (son = first_dom_son (CDI_DOMINATORS, bb); | |
5367 | son; | |
5368 | son = next_dom_son (CDI_DOMINATORS, son)) | |
5369 | break_up_subtract_bb (son); | |
5370 | } | |
5371 | ||
a6f8851e | 5372 | /* Used for repeated factor analysis. */ |
526ceb68 | 5373 | struct repeat_factor |
a6f8851e BS |
5374 | { |
5375 | /* An SSA name that occurs in a multiply chain. */ | |
5376 | tree factor; | |
5377 | ||
5378 | /* Cached rank of the factor. */ | |
5379 | unsigned rank; | |
5380 | ||
5381 | /* Number of occurrences of the factor in the chain. */ | |
5382 | HOST_WIDE_INT count; | |
5383 | ||
5384 | /* An SSA name representing the product of this factor and | |
5385 | all factors appearing later in the repeated factor vector. */ | |
5386 | tree repr; | |
5387 | }; | |
5388 | ||
a6f8851e | 5389 | |
9771b263 | 5390 | static vec<repeat_factor> repeat_factor_vec; |
a6f8851e BS |
5391 | |
5392 | /* Used for sorting the repeat factor vector. Sort primarily by | |
5393 | ascending occurrence count, secondarily by descending rank. */ | |
5394 | ||
5395 | static int | |
5396 | compare_repeat_factors (const void *x1, const void *x2) | |
5397 | { | |
526ceb68 TS |
5398 | const repeat_factor *rf1 = (const repeat_factor *) x1; |
5399 | const repeat_factor *rf2 = (const repeat_factor *) x2; | |
a6f8851e BS |
5400 | |
5401 | if (rf1->count != rf2->count) | |
5402 | return rf1->count - rf2->count; | |
5403 | ||
5404 | return rf2->rank - rf1->rank; | |
5405 | } | |
5406 | ||
a6f8851e BS |
5407 | /* Look for repeated operands in OPS in the multiply tree rooted at |
5408 | STMT. Replace them with an optimal sequence of multiplies and powi | |
917a5202 BS |
5409 | builtin calls, and remove the used operands from OPS. Return an |
5410 | SSA name representing the value of the replacement sequence. */ | |
a6f8851e | 5411 | |
917a5202 | 5412 | static tree |
526ceb68 | 5413 | attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops) |
a6f8851e BS |
5414 | { |
5415 | unsigned i, j, vec_len; | |
5416 | int ii; | |
526ceb68 TS |
5417 | operand_entry *oe; |
5418 | repeat_factor *rf1, *rf2; | |
a6f8851e | 5419 | repeat_factor rfnew; |
917a5202 | 5420 | tree result = NULL_TREE; |
a6f8851e BS |
5421 | tree target_ssa, iter_result; |
5422 | tree type = TREE_TYPE (gimple_get_lhs (stmt)); | |
5423 | tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI); | |
5424 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); | |
355fe088 | 5425 | gimple *mul_stmt, *pow_stmt; |
a6f8851e BS |
5426 | |
5427 | /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and | |
5428 | target. */ | |
5429 | if (!powi_fndecl) | |
917a5202 | 5430 | return NULL_TREE; |
a6f8851e BS |
5431 | |
5432 | /* Allocate the repeated factor vector. */ | |
9771b263 | 5433 | repeat_factor_vec.create (10); |
a6f8851e BS |
5434 | |
5435 | /* Scan the OPS vector for all SSA names in the product and build | |
5436 | up a vector of occurrence counts for each factor. */ | |
9771b263 | 5437 | FOR_EACH_VEC_ELT (*ops, i, oe) |
a6f8851e BS |
5438 | { |
5439 | if (TREE_CODE (oe->op) == SSA_NAME) | |
5440 | { | |
9771b263 | 5441 | FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) |
a6f8851e BS |
5442 | { |
5443 | if (rf1->factor == oe->op) | |
5444 | { | |
5445 | rf1->count += oe->count; | |
5446 | break; | |
5447 | } | |
5448 | } | |
5449 | ||
9771b263 | 5450 | if (j >= repeat_factor_vec.length ()) |
a6f8851e BS |
5451 | { |
5452 | rfnew.factor = oe->op; | |
5453 | rfnew.rank = oe->rank; | |
5454 | rfnew.count = oe->count; | |
5455 | rfnew.repr = NULL_TREE; | |
9771b263 | 5456 | repeat_factor_vec.safe_push (rfnew); |
a6f8851e BS |
5457 | } |
5458 | } | |
5459 | } | |
5460 | ||
5461 | /* Sort the repeated factor vector by (a) increasing occurrence count, | |
5462 | and (b) decreasing rank. */ | |
9771b263 | 5463 | repeat_factor_vec.qsort (compare_repeat_factors); |
a6f8851e BS |
5464 | |
5465 | /* It is generally best to combine as many base factors as possible | |
5466 | into a product before applying __builtin_powi to the result. | |
5467 | However, the sort order chosen for the repeated factor vector | |
5468 | allows us to cache partial results for the product of the base | |
5469 | factors for subsequent use. When we already have a cached partial | |
5470 | result from a previous iteration, it is best to make use of it | |
5471 | before looking for another __builtin_pow opportunity. | |
5472 | ||
5473 | As an example, consider x * x * y * y * y * z * z * z * z. | |
5474 | We want to first compose the product x * y * z, raise it to the | |
5475 | second power, then multiply this by y * z, and finally multiply | |
5476 | by z. This can be done in 5 multiplies provided we cache y * z | |
5477 | for use in both expressions: | |
5478 | ||
5479 | t1 = y * z | |
5480 | t2 = t1 * x | |
5481 | t3 = t2 * t2 | |
5482 | t4 = t1 * t3 | |
5483 | result = t4 * z | |
5484 | ||
5485 | If we instead ignored the cached y * z and first multiplied by | |
5486 | the __builtin_pow opportunity z * z, we would get the inferior: | |
5487 | ||
5488 | t1 = y * z | |
5489 | t2 = t1 * x | |
5490 | t3 = t2 * t2 | |
5491 | t4 = z * z | |
5492 | t5 = t3 * t4 | |
5493 | result = t5 * y */ | |
5494 | ||
9771b263 | 5495 | vec_len = repeat_factor_vec.length (); |
a6f8851e BS |
5496 | |
5497 | /* Repeatedly look for opportunities to create a builtin_powi call. */ | |
5498 | while (true) | |
5499 | { | |
5500 | HOST_WIDE_INT power; | |
5501 | ||
5502 | /* First look for the largest cached product of factors from | |
5503 | preceding iterations. If found, create a builtin_powi for | |
5504 | it if the minimum occurrence count for its factors is at | |
5505 | least 2, or just use this cached product as our next | |
5506 | multiplicand if the minimum occurrence count is 1. */ | |
9771b263 | 5507 | FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) |
a6f8851e BS |
5508 | { |
5509 | if (rf1->repr && rf1->count > 0) | |
5510 | break; | |
5511 | } | |
5512 | ||
5513 | if (j < vec_len) | |
5514 | { | |
5515 | power = rf1->count; | |
5516 | ||
5517 | if (power == 1) | |
5518 | { | |
5519 | iter_result = rf1->repr; | |
5520 | ||
5521 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
5522 | { | |
5523 | unsigned elt; | |
526ceb68 | 5524 | repeat_factor *rf; |
a6f8851e BS |
5525 | fputs ("Multiplying by cached product ", dump_file); |
5526 | for (elt = j; elt < vec_len; elt++) | |
5527 | { | |
9771b263 | 5528 | rf = &repeat_factor_vec[elt]; |
ef6cb4c7 | 5529 | print_generic_expr (dump_file, rf->factor); |
a6f8851e BS |
5530 | if (elt < vec_len - 1) |
5531 | fputs (" * ", dump_file); | |
5532 | } | |
5533 | fputs ("\n", dump_file); | |
5534 | } | |
5535 | } | |
5536 | else | |
5537 | { | |
83d5977e | 5538 | iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); |
a6f8851e BS |
5539 | pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, |
5540 | build_int_cst (integer_type_node, | |
5541 | power)); | |
5542 | gimple_call_set_lhs (pow_stmt, iter_result); | |
5543 | gimple_set_location (pow_stmt, gimple_location (stmt)); | |
8ebcad86 | 5544 | gimple_set_uid (pow_stmt, gimple_uid (stmt)); |
a6f8851e BS |
5545 | gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); |
5546 | ||
5547 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
5548 | { | |
5549 | unsigned elt; | |
526ceb68 | 5550 | repeat_factor *rf; |
a6f8851e BS |
5551 | fputs ("Building __builtin_pow call for cached product (", |
5552 | dump_file); | |
5553 | for (elt = j; elt < vec_len; elt++) | |
5554 | { | |
9771b263 | 5555 | rf = &repeat_factor_vec[elt]; |
ef6cb4c7 | 5556 | print_generic_expr (dump_file, rf->factor); |
a6f8851e BS |
5557 | if (elt < vec_len - 1) |
5558 | fputs (" * ", dump_file); | |
5559 | } | |
16998094 | 5560 | fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", |
1ede5f2c | 5561 | power); |
a6f8851e BS |
5562 | } |
5563 | } | |
5564 | } | |
5565 | else | |
5566 | { | |
5567 | /* Otherwise, find the first factor in the repeated factor | |
5568 | vector whose occurrence count is at least 2. If no such | |
5569 | factor exists, there are no builtin_powi opportunities | |
5570 | remaining. */ | |
9771b263 | 5571 | FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) |
a6f8851e BS |
5572 | { |
5573 | if (rf1->count >= 2) | |
5574 | break; | |
5575 | } | |
5576 | ||
5577 | if (j >= vec_len) | |
5578 | break; | |
5579 | ||
5580 | power = rf1->count; | |
5581 | ||
5582 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
5583 | { | |
5584 | unsigned elt; | |
526ceb68 | 5585 | repeat_factor *rf; |
a6f8851e BS |
5586 | fputs ("Building __builtin_pow call for (", dump_file); |
5587 | for (elt = j; elt < vec_len; elt++) | |
5588 | { | |
9771b263 | 5589 | rf = &repeat_factor_vec[elt]; |
ef6cb4c7 | 5590 | print_generic_expr (dump_file, rf->factor); |
a6f8851e BS |
5591 | if (elt < vec_len - 1) |
5592 | fputs (" * ", dump_file); | |
5593 | } | |
16998094 | 5594 | fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power); |
a6f8851e BS |
5595 | } |
5596 | ||
5597 | reassociate_stats.pows_created++; | |
5598 | ||
5599 | /* Visit each element of the vector in reverse order (so that | |
5600 | high-occurrence elements are visited first, and within the | |
5601 | same occurrence count, lower-ranked elements are visited | |
5602 | first). Form a linear product of all elements in this order | |
5603 | whose occurrencce count is at least that of element J. | |
5604 | Record the SSA name representing the product of each element | |
5605 | with all subsequent elements in the vector. */ | |
5606 | if (j == vec_len - 1) | |
5607 | rf1->repr = rf1->factor; | |
5608 | else | |
5609 | { | |
5610 | for (ii = vec_len - 2; ii >= (int)j; ii--) | |
5611 | { | |
5612 | tree op1, op2; | |
5613 | ||
9771b263 DN |
5614 | rf1 = &repeat_factor_vec[ii]; |
5615 | rf2 = &repeat_factor_vec[ii + 1]; | |
a6f8851e BS |
5616 | |
5617 | /* Init the last factor's representative to be itself. */ | |
5618 | if (!rf2->repr) | |
5619 | rf2->repr = rf2->factor; | |
5620 | ||
5621 | op1 = rf1->factor; | |
5622 | op2 = rf2->repr; | |
5623 | ||
83d5977e | 5624 | target_ssa = make_temp_ssa_name (type, NULL, "reassocpow"); |
0d0e4a03 JJ |
5625 | mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR, |
5626 | op1, op2); | |
a6f8851e | 5627 | gimple_set_location (mul_stmt, gimple_location (stmt)); |
8ebcad86 | 5628 | gimple_set_uid (mul_stmt, gimple_uid (stmt)); |
a6f8851e BS |
5629 | gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); |
5630 | rf1->repr = target_ssa; | |
5631 | ||
5632 | /* Don't reprocess the multiply we just introduced. */ | |
5633 | gimple_set_visited (mul_stmt, true); | |
5634 | } | |
5635 | } | |
5636 | ||
5637 | /* Form a call to __builtin_powi for the maximum product | |
5638 | just formed, raised to the power obtained earlier. */ | |
9771b263 | 5639 | rf1 = &repeat_factor_vec[j]; |
83d5977e | 5640 | iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); |
a6f8851e BS |
5641 | pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, |
5642 | build_int_cst (integer_type_node, | |
5643 | power)); | |
5644 | gimple_call_set_lhs (pow_stmt, iter_result); | |
5645 | gimple_set_location (pow_stmt, gimple_location (stmt)); | |
8ebcad86 | 5646 | gimple_set_uid (pow_stmt, gimple_uid (stmt)); |
a6f8851e BS |
5647 | gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); |
5648 | } | |
5649 | ||
917a5202 BS |
5650 | /* If we previously formed at least one other builtin_powi call, |
5651 | form the product of this one and those others. */ | |
5652 | if (result) | |
5653 | { | |
83d5977e | 5654 | tree new_result = make_temp_ssa_name (type, NULL, "reassocpow"); |
0d0e4a03 JJ |
5655 | mul_stmt = gimple_build_assign (new_result, MULT_EXPR, |
5656 | result, iter_result); | |
917a5202 | 5657 | gimple_set_location (mul_stmt, gimple_location (stmt)); |
8ebcad86 | 5658 | gimple_set_uid (mul_stmt, gimple_uid (stmt)); |
917a5202 BS |
5659 | gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); |
5660 | gimple_set_visited (mul_stmt, true); | |
5661 | result = new_result; | |
5662 | } | |
5663 | else | |
5664 | result = iter_result; | |
a6f8851e BS |
5665 | |
5666 | /* Decrement the occurrence count of each element in the product | |
5667 | by the count found above, and remove this many copies of each | |
5668 | factor from OPS. */ | |
5669 | for (i = j; i < vec_len; i++) | |
5670 | { | |
5671 | unsigned k = power; | |
5672 | unsigned n; | |
5673 | ||
9771b263 | 5674 | rf1 = &repeat_factor_vec[i]; |
a6f8851e BS |
5675 | rf1->count -= power; |
5676 | ||
9771b263 | 5677 | FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe) |
a6f8851e BS |
5678 | { |
5679 | if (oe->op == rf1->factor) | |
5680 | { | |
5681 | if (oe->count <= k) | |
5682 | { | |
9771b263 | 5683 | ops->ordered_remove (n); |
a6f8851e BS |
5684 | k -= oe->count; |
5685 | ||
5686 | if (k == 0) | |
5687 | break; | |
5688 | } | |
5689 | else | |
5690 | { | |
5691 | oe->count -= k; | |
5692 | break; | |
5693 | } | |
5694 | } | |
5695 | } | |
5696 | } | |
5697 | } | |
5698 | ||
5699 | /* At this point all elements in the repeated factor vector have a | |
5700 | remaining occurrence count of 0 or 1, and those with a count of 1 | |
5701 | don't have cached representatives. Re-sort the ops vector and | |
5702 | clean up. */ | |
9771b263 DN |
5703 | ops->qsort (sort_by_operand_rank); |
5704 | repeat_factor_vec.release (); | |
917a5202 BS |
5705 | |
5706 | /* Return the final product computed herein. Note that there may | |
5707 | still be some elements with single occurrence count left in OPS; | |
5708 | those will be handled by the normal reassociation logic. */ | |
5709 | return result; | |
5710 | } | |
5711 | ||
0155ad40 MP |
5712 | /* Attempt to optimize |
5713 | CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or | |
5714 | CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */ | |
5715 | ||
5716 | static void | |
5717 | attempt_builtin_copysign (vec<operand_entry *> *ops) | |
5718 | { | |
5719 | operand_entry *oe; | |
5720 | unsigned int i; | |
5721 | unsigned int length = ops->length (); | |
5722 | tree cst = ops->last ()->op; | |
5723 | ||
5724 | if (length == 1 || TREE_CODE (cst) != REAL_CST) | |
5725 | return; | |
5726 | ||
5727 | FOR_EACH_VEC_ELT (*ops, i, oe) | |
5728 | { | |
5729 | if (TREE_CODE (oe->op) == SSA_NAME | |
5730 | && has_single_use (oe->op)) | |
5731 | { | |
5732 | gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op); | |
314709cd | 5733 | if (gcall *old_call = dyn_cast <gcall *> (def_stmt)) |
0155ad40 | 5734 | { |
0155ad40 | 5735 | tree arg0, arg1; |
314709cd | 5736 | switch (gimple_call_combined_fn (old_call)) |
0155ad40 | 5737 | { |
314709cd | 5738 | CASE_CFN_COPYSIGN: |
ee5fd23a | 5739 | CASE_CFN_COPYSIGN_FN: |
314709cd RS |
5740 | arg0 = gimple_call_arg (old_call, 0); |
5741 | arg1 = gimple_call_arg (old_call, 1); | |
0155ad40 MP |
5742 | /* The first argument of copysign must be a constant, |
5743 | otherwise there's nothing to do. */ | |
5744 | if (TREE_CODE (arg0) == REAL_CST) | |
5745 | { | |
314709cd RS |
5746 | tree type = TREE_TYPE (arg0); |
5747 | tree mul = const_binop (MULT_EXPR, type, cst, arg0); | |
0155ad40 MP |
5748 | /* If we couldn't fold to a single constant, skip it. |
5749 | That happens e.g. for inexact multiplication when | |
5750 | -frounding-math. */ | |
5751 | if (mul == NULL_TREE) | |
5752 | break; | |
314709cd RS |
5753 | /* Instead of adjusting OLD_CALL, let's build a new |
5754 | call to not leak the LHS and prevent keeping bogus | |
5755 | debug statements. DCE will clean up the old call. */ | |
5756 | gcall *new_call; | |
5757 | if (gimple_call_internal_p (old_call)) | |
5758 | new_call = gimple_build_call_internal | |
5759 | (IFN_COPYSIGN, 2, mul, arg1); | |
5760 | else | |
5761 | new_call = gimple_build_call | |
5762 | (gimple_call_fndecl (old_call), 2, mul, arg1); | |
5763 | tree lhs = make_ssa_name (type); | |
5764 | gimple_call_set_lhs (new_call, lhs); | |
5765 | gimple_set_location (new_call, | |
5766 | gimple_location (old_call)); | |
5767 | insert_stmt_after (new_call, old_call); | |
0155ad40 MP |
5768 | /* We've used the constant, get rid of it. */ |
5769 | ops->pop (); | |
5770 | bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst)); | |
5771 | /* Handle the CST1 < 0 case by negating the result. */ | |
5772 | if (cst1_neg) | |
5773 | { | |
5774 | tree negrhs = make_ssa_name (TREE_TYPE (lhs)); | |
5775 | gimple *negate_stmt | |
5776 | = gimple_build_assign (negrhs, NEGATE_EXPR, lhs); | |
314709cd | 5777 | insert_stmt_after (negate_stmt, new_call); |
0155ad40 MP |
5778 | oe->op = negrhs; |
5779 | } | |
5780 | else | |
5781 | oe->op = lhs; | |
5782 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
5783 | { | |
5784 | fprintf (dump_file, "Optimizing copysign: "); | |
ef6cb4c7 | 5785 | print_generic_expr (dump_file, cst); |
314709cd | 5786 | fprintf (dump_file, " * COPYSIGN ("); |
ef6cb4c7 | 5787 | print_generic_expr (dump_file, arg0); |
0155ad40 | 5788 | fprintf (dump_file, ", "); |
ef6cb4c7 | 5789 | print_generic_expr (dump_file, arg1); |
314709cd | 5790 | fprintf (dump_file, ") into %sCOPYSIGN (", |
0155ad40 | 5791 | cst1_neg ? "-" : ""); |
ef6cb4c7 | 5792 | print_generic_expr (dump_file, mul); |
0155ad40 | 5793 | fprintf (dump_file, ", "); |
ef6cb4c7 | 5794 | print_generic_expr (dump_file, arg1); |
0155ad40 MP |
5795 | fprintf (dump_file, "\n"); |
5796 | } | |
5797 | return; | |
5798 | } | |
5799 | break; | |
5800 | default: | |
5801 | break; | |
5802 | } | |
5803 | } | |
5804 | } | |
5805 | } | |
5806 | } | |
5807 | ||
917a5202 BS |
5808 | /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */ |
5809 | ||
5810 | static void | |
355fe088 | 5811 | transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs) |
917a5202 BS |
5812 | { |
5813 | tree rhs1; | |
5814 | ||
5815 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
5816 | { | |
5817 | fprintf (dump_file, "Transforming "); | |
ef6cb4c7 | 5818 | print_gimple_stmt (dump_file, stmt, 0); |
917a5202 BS |
5819 | } |
5820 | ||
5821 | rhs1 = gimple_assign_rhs1 (stmt); | |
5822 | gimple_assign_set_rhs_from_tree (gsi, new_rhs); | |
5823 | update_stmt (stmt); | |
5824 | remove_visited_stmt_chain (rhs1); | |
5825 | ||
5826 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
5827 | { | |
5828 | fprintf (dump_file, " into "); | |
ef6cb4c7 | 5829 | print_gimple_stmt (dump_file, stmt, 0); |
917a5202 BS |
5830 | } |
5831 | } | |
5832 | ||
5833 | /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */ | |
5834 | ||
5835 | static void | |
355fe088 | 5836 | transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt, |
917a5202 BS |
5837 | tree rhs1, tree rhs2) |
5838 | { | |
5839 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
5840 | { | |
5841 | fprintf (dump_file, "Transforming "); | |
ef6cb4c7 | 5842 | print_gimple_stmt (dump_file, stmt, 0); |
917a5202 BS |
5843 | } |
5844 | ||
be7493ca | 5845 | gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2); |
917a5202 BS |
5846 | update_stmt (gsi_stmt (*gsi)); |
5847 | remove_visited_stmt_chain (rhs1); | |
5848 | ||
5849 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
5850 | { | |
5851 | fprintf (dump_file, " into "); | |
ef6cb4c7 | 5852 | print_gimple_stmt (dump_file, stmt, 0); |
917a5202 | 5853 | } |
a6f8851e BS |
5854 | } |
5855 | ||
0e0ed594 | 5856 | /* Reassociate expressions in basic block BB and its post-dominator as |
8010f31f | 5857 | children. |
0e0ed594 | 5858 | |
8010f31f JL |
5859 | Bubble up return status from maybe_optimize_range_tests. */ |
5860 | ||
5861 | static bool | |
0e0ed594 JL |
5862 | reassociate_bb (basic_block bb) |
5863 | { | |
726a989a | 5864 | gimple_stmt_iterator gsi; |
0e0ed594 | 5865 | basic_block son; |
355fe088 | 5866 | gimple *stmt = last_stmt (bb); |
8010f31f | 5867 | bool cfg_cleanup_needed = false; |
d578e863 JJ |
5868 | |
5869 | if (stmt && !gimple_visited_p (stmt)) | |
8010f31f | 5870 | cfg_cleanup_needed |= maybe_optimize_range_tests (stmt); |
0e0ed594 | 5871 | |
726a989a | 5872 | for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi)) |
0e0ed594 | 5873 | { |
d578e863 | 5874 | stmt = gsi_stmt (gsi); |
0e0ed594 | 5875 | |
6b03de57 | 5876 | if (is_gimple_assign (stmt) |
36bbc05d | 5877 | && !stmt_could_throw_p (cfun, stmt)) |
0e0ed594 | 5878 | { |
726a989a RB |
5879 | tree lhs, rhs1, rhs2; |
5880 | enum tree_code rhs_code = gimple_assign_rhs_code (stmt); | |
0e0ed594 | 5881 | |
726a989a RB |
5882 | /* If this is not a gimple binary expression, there is |
5883 | nothing for us to do with it. */ | |
5884 | if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) | |
0e0ed594 JL |
5885 | continue; |
5886 | ||
726a989a RB |
5887 | /* If this was part of an already processed statement, |
5888 | we don't need to touch it again. */ | |
5889 | if (gimple_visited_p (stmt)) | |
25c6036a RG |
5890 | { |
5891 | /* This statement might have become dead because of previous | |
5892 | reassociations. */ | |
5893 | if (has_zero_uses (gimple_get_lhs (stmt))) | |
5894 | { | |
42fae17c | 5895 | reassoc_remove_stmt (&gsi); |
25c6036a | 5896 | release_defs (stmt); |
e4658728 RG |
5897 | /* We might end up removing the last stmt above which |
5898 | places the iterator to the end of the sequence. | |
5899 | Reset it to the last stmt in this case which might | |
5900 | be the end of the sequence as well if we removed | |
5901 | the last statement of the sequence. In which case | |
5902 | we need to bail out. */ | |
5903 | if (gsi_end_p (gsi)) | |
5904 | { | |
5905 | gsi = gsi_last_bb (bb); | |
5906 | if (gsi_end_p (gsi)) | |
5907 | break; | |
5908 | } | |
25c6036a RG |
5909 | } |
5910 | continue; | |
5911 | } | |
726a989a RB |
5912 | |
5913 | lhs = gimple_assign_lhs (stmt); | |
5914 | rhs1 = gimple_assign_rhs1 (stmt); | |
5915 | rhs2 = gimple_assign_rhs2 (stmt); | |
5916 | ||
2d698d3b RG |
5917 | /* For non-bit or min/max operations we can't associate |
5918 | all types. Verify that here. */ | |
5919 | if (rhs_code != BIT_IOR_EXPR | |
5920 | && rhs_code != BIT_AND_EXPR | |
5921 | && rhs_code != BIT_XOR_EXPR | |
5922 | && rhs_code != MIN_EXPR | |
5923 | && rhs_code != MAX_EXPR | |
5924 | && (!can_reassociate_p (lhs) | |
5925 | || !can_reassociate_p (rhs1) | |
5926 | || !can_reassociate_p (rhs2))) | |
0e0ed594 JL |
5927 | continue; |
5928 | ||
726a989a | 5929 | if (associative_tree_code (rhs_code)) |
0e0ed594 | 5930 | { |
526ceb68 | 5931 | auto_vec<operand_entry *> ops; |
917a5202 | 5932 | tree powi_result = NULL_TREE; |
51d4212a | 5933 | bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs)); |
0e0ed594 JL |
5934 | |
5935 | /* There may be no immediate uses left by the time we | |
5936 | get here because we may have eliminated them all. */ | |
bfc646bf | 5937 | if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs)) |
0e0ed594 JL |
5938 | continue; |
5939 | ||
726a989a | 5940 | gimple_set_visited (stmt, true); |
25c6036a | 5941 | linearize_expr_tree (&ops, stmt, true, true); |
9771b263 | 5942 | ops.qsort (sort_by_operand_rank); |
7d25ac20 | 5943 | int orig_len = ops.length (); |
726a989a | 5944 | optimize_ops_list (rhs_code, &ops); |
25c6036a RG |
5945 | if (undistribute_ops_list (rhs_code, &ops, |
5946 | loop_containing_stmt (stmt))) | |
5947 | { | |
9771b263 | 5948 | ops.qsort (sort_by_operand_rank); |
25c6036a RG |
5949 | optimize_ops_list (rhs_code, &ops); |
5950 | } | |
0e0ed594 | 5951 | |
df8b0a11 | 5952 | if (rhs_code == PLUS_EXPR |
d2db36dd | 5953 | && transform_add_to_multiply (&ops)) |
df8b0a11 KV |
5954 | ops.qsort (sort_by_operand_rank); |
5955 | ||
0ccb5dbf | 5956 | if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) |
51d4212a RH |
5957 | { |
5958 | if (is_vector) | |
5959 | optimize_vec_cond_expr (rhs_code, &ops); | |
5960 | else | |
92007ba6 | 5961 | optimize_range_tests (rhs_code, &ops, NULL); |
51d4212a | 5962 | } |
0ccb5dbf | 5963 | |
51d4212a RH |
5964 | if (rhs_code == MULT_EXPR && !is_vector) |
5965 | { | |
5966 | attempt_builtin_copysign (&ops); | |
0155ad40 | 5967 | |
51d4212a RH |
5968 | if (reassoc_insert_powi_p |
5969 | && flag_unsafe_math_optimizations) | |
5970 | powi_result = attempt_builtin_powi (stmt, &ops); | |
5971 | } | |
a6f8851e | 5972 | |
8a85cee2 KV |
5973 | operand_entry *last; |
5974 | bool negate_result = false; | |
5975 | if (ops.length () > 1 | |
5976 | && rhs_code == MULT_EXPR) | |
5977 | { | |
5978 | last = ops.last (); | |
b97d37b4 | 5979 | if ((integer_minus_onep (last->op) |
8a85cee2 KV |
5980 | || real_minus_onep (last->op)) |
5981 | && !HONOR_SNANS (TREE_TYPE (lhs)) | |
5982 | && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs)) | |
5983 | || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs)))) | |
5984 | { | |
5985 | ops.pop (); | |
5986 | negate_result = true; | |
5987 | } | |
5988 | } | |
5989 | ||
2a65e70b | 5990 | tree new_lhs = lhs; |
917a5202 BS |
5991 | /* If the operand vector is now empty, all operands were |
5992 | consumed by the __builtin_powi optimization. */ | |
9771b263 | 5993 | if (ops.length () == 0) |
917a5202 | 5994 | transform_stmt_to_copy (&gsi, stmt, powi_result); |
9771b263 | 5995 | else if (ops.length () == 1) |
0e0ed594 | 5996 | { |
9771b263 | 5997 | tree last_op = ops.last ()->op; |
d2db36dd KV |
5998 | |
5999 | /* If the stmt that defines operand has to be inserted, insert it | |
6000 | before the use. */ | |
6001 | if (ops.last ()->stmt_to_insert) | |
6002 | insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert); | |
917a5202 BS |
6003 | if (powi_result) |
6004 | transform_stmt_to_multiply (&gsi, stmt, last_op, | |
6005 | powi_result); | |
6006 | else | |
6007 | transform_stmt_to_copy (&gsi, stmt, last_op); | |
0e0ed594 JL |
6008 | } |
6009 | else | |
df7b0cc4 | 6010 | { |
ef4bddc2 | 6011 | machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); |
9771b263 | 6012 | int ops_num = ops.length (); |
332d6c41 | 6013 | int width; |
8713d0f1 | 6014 | |
5aa102aa JL |
6015 | /* For binary bit operations, if there are at least 3 |
6016 | operands and the last last operand in OPS is a constant, | |
6017 | move it to the front. This helps ensure that we generate | |
6018 | (X & Y) & C rather than (X & C) & Y. The former will | |
6019 | often match a canonical bit test when we get to RTL. */ | |
a1c47ade | 6020 | if (ops.length () > 2 |
5aa102aa JL |
6021 | && (rhs_code == BIT_AND_EXPR |
6022 | || rhs_code == BIT_IOR_EXPR | |
6023 | || rhs_code == BIT_XOR_EXPR) | |
8713d0f1 JL |
6024 | && TREE_CODE (ops.last ()->op) == INTEGER_CST) |
6025 | std::swap (*ops[0], *ops[ops_num - 1]); | |
6026 | ||
332d6c41 RB |
6027 | /* Only rewrite the expression tree to parallel in the |
6028 | last reassoc pass to avoid useless work back-and-forth | |
6029 | with initial linearization. */ | |
6030 | if (!reassoc_insert_powi_p | |
6031 | && ops.length () > 3 | |
6032 | && (width = get_reassociation_width (ops_num, rhs_code, | |
6033 | mode)) > 1) | |
6034 | { | |
6035 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
6036 | fprintf (dump_file, | |
6037 | "Width = %d was chosen for reassociation\n", | |
6038 | width); | |
6039 | rewrite_expr_tree_parallel (as_a <gassign *> (stmt), | |
6040 | width, ops); | |
6041 | } | |
df7b0cc4 | 6042 | else |
933f507d ER |
6043 | { |
6044 | /* When there are three operands left, we want | |
6045 | to make sure the ones that get the double | |
6046 | binary op are chosen wisely. */ | |
6047 | int len = ops.length (); | |
6048 | if (len >= 3) | |
6049 | swap_ops_for_binary_stmt (ops, len - 3, stmt); | |
6050 | ||
5e40da4f | 6051 | new_lhs = rewrite_expr_tree (stmt, 0, ops, |
2a65e70b | 6052 | powi_result != NULL |
7d25ac20 JJ |
6053 | || negate_result, |
6054 | len != orig_len); | |
933f507d | 6055 | } |
917a5202 BS |
6056 | |
6057 | /* If we combined some repeated factors into a | |
6058 | __builtin_powi call, multiply that result by the | |
6059 | reassociated operands. */ | |
6060 | if (powi_result) | |
6061 | { | |
355fe088 | 6062 | gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs); |
5e40da4f | 6063 | tree type = TREE_TYPE (lhs); |
83d5977e RG |
6064 | tree target_ssa = make_temp_ssa_name (type, NULL, |
6065 | "reassocpow"); | |
5e40da4f JJ |
6066 | gimple_set_lhs (lhs_stmt, target_ssa); |
6067 | update_stmt (lhs_stmt); | |
6068 | if (lhs != new_lhs) | |
2a65e70b JJ |
6069 | { |
6070 | target_ssa = new_lhs; | |
6071 | new_lhs = lhs; | |
6072 | } | |
0d0e4a03 JJ |
6073 | mul_stmt = gimple_build_assign (lhs, MULT_EXPR, |
6074 | powi_result, target_ssa); | |
917a5202 | 6075 | gimple_set_location (mul_stmt, gimple_location (stmt)); |
8ebcad86 | 6076 | gimple_set_uid (mul_stmt, gimple_uid (stmt)); |
917a5202 BS |
6077 | gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT); |
6078 | } | |
df7b0cc4 | 6079 | } |
8a85cee2 KV |
6080 | |
6081 | if (negate_result) | |
6082 | { | |
6083 | stmt = SSA_NAME_DEF_STMT (lhs); | |
6084 | tree tmp = make_ssa_name (TREE_TYPE (lhs)); | |
6085 | gimple_set_lhs (stmt, tmp); | |
2a65e70b JJ |
6086 | if (lhs != new_lhs) |
6087 | tmp = new_lhs; | |
8a85cee2 KV |
6088 | gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR, |
6089 | tmp); | |
46ab5b6e | 6090 | gimple_set_uid (neg_stmt, gimple_uid (stmt)); |
8a85cee2 KV |
6091 | gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT); |
6092 | update_stmt (stmt); | |
6093 | } | |
0e0ed594 JL |
6094 | } |
6095 | } | |
6096 | } | |
6097 | for (son = first_dom_son (CDI_POST_DOMINATORS, bb); | |
6098 | son; | |
6099 | son = next_dom_son (CDI_POST_DOMINATORS, son)) | |
8010f31f JL |
6100 | cfg_cleanup_needed |= reassociate_bb (son); |
6101 | ||
6102 | return cfg_cleanup_needed; | |
0e0ed594 JL |
6103 | } |
6104 | ||
73049af5 JJ |
6105 | /* Add jumps around shifts for range tests turned into bit tests. |
6106 | For each SSA_NAME VAR we have code like: | |
6107 | VAR = ...; // final stmt of range comparison | |
6108 | // bit test here...; | |
6109 | OTHERVAR = ...; // final stmt of the bit test sequence | |
6110 | RES = VAR | OTHERVAR; | |
6111 | Turn the above into: | |
6112 | VAR = ...; | |
6113 | if (VAR != 0) | |
6114 | goto <l3>; | |
6115 | else | |
6116 | goto <l2>; | |
6117 | <l2>: | |
6118 | // bit test here...; | |
6119 | OTHERVAR = ...; | |
6120 | <l3>: | |
6121 | # RES = PHI<1(l1), OTHERVAR(l2)>; */ | |
6122 | ||
6123 | static void | |
6124 | branch_fixup (void) | |
6125 | { | |
6126 | tree var; | |
6127 | unsigned int i; | |
6128 | ||
6129 | FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var) | |
6130 | { | |
355fe088 TS |
6131 | gimple *def_stmt = SSA_NAME_DEF_STMT (var); |
6132 | gimple *use_stmt; | |
73049af5 JJ |
6133 | use_operand_p use; |
6134 | bool ok = single_imm_use (var, &use, &use_stmt); | |
6135 | gcc_assert (ok | |
6136 | && is_gimple_assign (use_stmt) | |
6137 | && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR | |
6138 | && gimple_bb (def_stmt) == gimple_bb (use_stmt)); | |
6139 | ||
6140 | basic_block cond_bb = gimple_bb (def_stmt); | |
6141 | basic_block then_bb = split_block (cond_bb, def_stmt)->dest; | |
6142 | basic_block merge_bb = split_block (then_bb, use_stmt)->dest; | |
6143 | ||
6144 | gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt); | |
355fe088 TS |
6145 | gimple *g = gimple_build_cond (NE_EXPR, var, |
6146 | build_zero_cst (TREE_TYPE (var)), | |
6147 | NULL_TREE, NULL_TREE); | |
73049af5 JJ |
6148 | location_t loc = gimple_location (use_stmt); |
6149 | gimple_set_location (g, loc); | |
6150 | gsi_insert_after (&gsi, g, GSI_NEW_STMT); | |
6151 | ||
6152 | edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE); | |
357067f2 | 6153 | etrue->probability = profile_probability::even (); |
73049af5 JJ |
6154 | edge efalse = find_edge (cond_bb, then_bb); |
6155 | efalse->flags = EDGE_FALSE_VALUE; | |
6156 | efalse->probability -= etrue->probability; | |
ef30ab83 | 6157 | then_bb->count -= etrue->count (); |
73049af5 JJ |
6158 | |
6159 | tree othervar = NULL_TREE; | |
6160 | if (gimple_assign_rhs1 (use_stmt) == var) | |
6161 | othervar = gimple_assign_rhs2 (use_stmt); | |
6162 | else if (gimple_assign_rhs2 (use_stmt) == var) | |
6163 | othervar = gimple_assign_rhs1 (use_stmt); | |
6164 | else | |
6165 | gcc_unreachable (); | |
6166 | tree lhs = gimple_assign_lhs (use_stmt); | |
538dd0b7 | 6167 | gphi *phi = create_phi_node (lhs, merge_bb); |
73049af5 JJ |
6168 | add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc); |
6169 | add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc); | |
6170 | gsi = gsi_for_stmt (use_stmt); | |
6171 | gsi_remove (&gsi, true); | |
6172 | ||
6173 | set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb); | |
6174 | set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb); | |
6175 | } | |
6176 | reassoc_branch_fixups.release (); | |
6177 | } | |
6178 | ||
526ceb68 TS |
6179 | void dump_ops_vector (FILE *file, vec<operand_entry *> ops); |
6180 | void debug_ops_vector (vec<operand_entry *> ops); | |
0e0ed594 JL |
6181 | |
6182 | /* Dump the operand entry vector OPS to FILE. */ | |
6183 | ||
6184 | void | |
526ceb68 | 6185 | dump_ops_vector (FILE *file, vec<operand_entry *> ops) |
0e0ed594 | 6186 | { |
526ceb68 | 6187 | operand_entry *oe; |
0e0ed594 JL |
6188 | unsigned int i; |
6189 | ||
9771b263 | 6190 | FOR_EACH_VEC_ELT (ops, i, oe) |
0e0ed594 JL |
6191 | { |
6192 | fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank); | |
ef6cb4c7 | 6193 | print_generic_expr (file, oe->op); |
f28ff5b5 | 6194 | fprintf (file, "\n"); |
0e0ed594 JL |
6195 | } |
6196 | } | |
6197 | ||
6198 | /* Dump the operand entry vector OPS to STDERR. */ | |
6199 | ||
24e47c76 | 6200 | DEBUG_FUNCTION void |
526ceb68 | 6201 | debug_ops_vector (vec<operand_entry *> ops) |
0e0ed594 JL |
6202 | { |
6203 | dump_ops_vector (stderr, ops); | |
6204 | } | |
6205 | ||
8010f31f JL |
6206 | /* Bubble up return status from reassociate_bb. */ |
6207 | ||
6208 | static bool | |
0e0ed594 JL |
6209 | do_reassoc (void) |
6210 | { | |
fefa31b5 | 6211 | break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
8010f31f | 6212 | return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)); |
0e0ed594 JL |
6213 | } |
6214 | ||
6215 | /* Initialize the reassociation pass. */ | |
6216 | ||
6217 | static void | |
6218 | init_reassoc (void) | |
6219 | { | |
6220 | int i; | |
15814ba0 | 6221 | long rank = 2; |
0cae8d31 | 6222 | int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); |
0e0ed594 | 6223 | |
7a9c7d01 ZD |
6224 | /* Find the loops, so that we can prevent moving calculations in |
6225 | them. */ | |
6226 | loop_optimizer_init (AVOID_CFG_MODIFICATIONS); | |
6227 | ||
0e0ed594 JL |
6228 | memset (&reassociate_stats, 0, sizeof (reassociate_stats)); |
6229 | ||
f1665f5c | 6230 | next_operand_entry_id = 0; |
0e0ed594 JL |
6231 | |
6232 | /* Reverse RPO (Reverse Post Order) will give us something where | |
6233 | deeper loops come later. */ | |
f91a0beb | 6234 | pre_and_rev_post_order_compute (NULL, bbs, false); |
8b1c6fd7 | 6235 | bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun)); |
b787e7a2 | 6236 | operand_rank = new hash_map<tree, long>; |
0e0ed594 | 6237 | |
be7493ca RG |
6238 | /* Give each default definition a distinct rank. This includes |
6239 | parameters and the static chain. Walk backwards over all | |
6240 | SSA names so that we get proper rank ordering according | |
6241 | to tree_swap_operands_p. */ | |
6242 | for (i = num_ssa_names - 1; i > 0; --i) | |
0e0ed594 | 6243 | { |
be7493ca RG |
6244 | tree name = ssa_name (i); |
6245 | if (name && SSA_NAME_IS_DEFAULT_DEF (name)) | |
6246 | insert_operand_rank (name, ++rank); | |
0e0ed594 JL |
6247 | } |
6248 | ||
6249 | /* Set up rank for each BB */ | |
0cae8d31 | 6250 | for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++) |
5ac4b056 | 6251 | bb_rank[bbs[i]] = ++rank << 16; |
0e0ed594 JL |
6252 | |
6253 | free (bbs); | |
0e0ed594 | 6254 | calculate_dominance_info (CDI_POST_DOMINATORS); |
6e1aa848 | 6255 | plus_negates = vNULL; |
0e0ed594 JL |
6256 | } |
6257 | ||
6258 | /* Cleanup after the reassociation pass, and print stats if | |
6259 | requested. */ | |
6260 | ||
6261 | static void | |
6262 | fini_reassoc (void) | |
6263 | { | |
01902653 RG |
6264 | statistics_counter_event (cfun, "Linearized", |
6265 | reassociate_stats.linearized); | |
6266 | statistics_counter_event (cfun, "Constants eliminated", | |
6267 | reassociate_stats.constants_eliminated); | |
6268 | statistics_counter_event (cfun, "Ops eliminated", | |
6269 | reassociate_stats.ops_eliminated); | |
6270 | statistics_counter_event (cfun, "Statements rewritten", | |
6271 | reassociate_stats.rewritten); | |
a6f8851e BS |
6272 | statistics_counter_event (cfun, "Built-in pow[i] calls encountered", |
6273 | reassociate_stats.pows_encountered); | |
6274 | statistics_counter_event (cfun, "Built-in powi calls created", | |
6275 | reassociate_stats.pows_created); | |
0e0ed594 | 6276 | |
b787e7a2 | 6277 | delete operand_rank; |
153e4228 | 6278 | operand_entry_pool.release (); |
0e0ed594 | 6279 | free (bb_rank); |
9771b263 | 6280 | plus_negates.release (); |
0e0ed594 | 6281 | free_dominance_info (CDI_POST_DOMINATORS); |
7a9c7d01 | 6282 | loop_optimizer_finalize (); |
0e0ed594 JL |
6283 | } |
6284 | ||
2162bfe1 | 6285 | /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable |
8010f31f JL |
6286 | insertion of __builtin_powi calls. |
6287 | ||
6288 | Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to | |
6289 | optimization of a gimple conditional. Otherwise returns zero. */ | |
0e0ed594 | 6290 | |
c2924966 | 6291 | static unsigned int |
2162bfe1 | 6292 | execute_reassoc (bool insert_powi_p) |
0e0ed594 | 6293 | { |
2162bfe1 TV |
6294 | reassoc_insert_powi_p = insert_powi_p; |
6295 | ||
012309e6 | 6296 | init_reassoc (); |
0e0ed594 | 6297 | |
8010f31f JL |
6298 | bool cfg_cleanup_needed; |
6299 | cfg_cleanup_needed = do_reassoc (); | |
0e0ed594 | 6300 | repropagate_negates (); |
73049af5 | 6301 | branch_fixup (); |
0e0ed594 | 6302 | |
012309e6 | 6303 | fini_reassoc (); |
8010f31f | 6304 | return cfg_cleanup_needed ? TODO_cleanup_cfg : 0; |
012309e6 DB |
6305 | } |
6306 | ||
27a4cd48 DM |
6307 | namespace { |
6308 | ||
6309 | const pass_data pass_data_reassoc = | |
012309e6 | 6310 | { |
27a4cd48 DM |
6311 | GIMPLE_PASS, /* type */ |
6312 | "reassoc", /* name */ | |
6313 | OPTGROUP_NONE, /* optinfo_flags */ | |
27a4cd48 DM |
6314 | TV_TREE_REASSOC, /* tv_id */ |
6315 | ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
6316 | 0, /* properties_provided */ | |
6317 | 0, /* properties_destroyed */ | |
6318 | 0, /* todo_flags_start */ | |
3bea341f | 6319 | TODO_update_ssa_only_virtuals, /* todo_flags_finish */ |
012309e6 | 6320 | }; |
27a4cd48 DM |
6321 | |
6322 | class pass_reassoc : public gimple_opt_pass | |
6323 | { | |
6324 | public: | |
c3284718 | 6325 | pass_reassoc (gcc::context *ctxt) |
2162bfe1 | 6326 | : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false) |
27a4cd48 DM |
6327 | {} |
6328 | ||
6329 | /* opt_pass methods: */ | |
65d3284b | 6330 | opt_pass * clone () { return new pass_reassoc (m_ctxt); } |
2162bfe1 TV |
6331 | void set_pass_param (unsigned int n, bool param) |
6332 | { | |
6333 | gcc_assert (n == 0); | |
6334 | insert_powi_p = param; | |
6335 | } | |
1a3d085c | 6336 | virtual bool gate (function *) { return flag_tree_reassoc != 0; } |
2162bfe1 TV |
6337 | virtual unsigned int execute (function *) |
6338 | { return execute_reassoc (insert_powi_p); } | |
27a4cd48 | 6339 | |
2162bfe1 TV |
6340 | private: |
6341 | /* Enable insertion of __builtin_powi calls during execute_reassoc. See | |
6342 | point 3a in the pass header comment. */ | |
6343 | bool insert_powi_p; | |
27a4cd48 DM |
6344 | }; // class pass_reassoc |
6345 | ||
6346 | } // anon namespace | |
6347 | ||
6348 | gimple_opt_pass * | |
6349 | make_pass_reassoc (gcc::context *ctxt) | |
6350 | { | |
6351 | return new pass_reassoc (ctxt); | |
6352 | } |