]>
Commit | Line | Data |
---|---|---|
3dec5460 | 1 | /* Reassociation for trees. |
d353bf18 | 2 | Copyright (C) 2005-2015 Free Software Foundation, Inc. |
3dec5460 | 3 | Contributed by Daniel Berlin <dan@dberlin.org> |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
8c4c00c1 | 9 | the Free Software Foundation; either version 3, or (at your option) |
3dec5460 | 10 | any later version. |
11 | ||
12 | GCC is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
8c4c00c1 | 18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
3dec5460 | 20 | |
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
9ef16211 | 24 | #include "backend.h" |
d040a5b0 | 25 | #include "cfghooks.h" |
9ef16211 | 26 | #include "tree.h" |
27 | #include "gimple.h" | |
b0c0c879 | 28 | #include "rtl.h" |
9ef16211 | 29 | #include "ssa.h" |
b0c0c879 | 30 | #include "tm_p.h" |
b20a8bb4 | 31 | #include "alias.h" |
b20a8bb4 | 32 | #include "fold-const.h" |
9ed99284 | 33 | #include "stor-layout.h" |
94ea8568 | 34 | #include "cfganal.h" |
ce084dfc | 35 | #include "gimple-pretty-print.h" |
3dec5460 | 36 | #include "tree-inline.h" |
bc61cadb | 37 | #include "internal-fn.h" |
38 | #include "gimple-fold.h" | |
39 | #include "tree-eh.h" | |
dcf1a1ec | 40 | #include "gimple-iterator.h" |
e795d6e1 | 41 | #include "gimplify-me.h" |
073c1fd5 | 42 | #include "tree-cfg.h" |
05d9c18a | 43 | #include "tree-ssa-loop-niter.h" |
073c1fd5 | 44 | #include "tree-ssa-loop.h" |
d53441c8 | 45 | #include "flags.h" |
d53441c8 | 46 | #include "insn-config.h" |
47 | #include "expmed.h" | |
48 | #include "dojump.h" | |
49 | #include "explow.h" | |
50 | #include "calls.h" | |
51 | #include "emit-rtl.h" | |
52 | #include "varasm.h" | |
53 | #include "stmt.h" | |
9ed99284 | 54 | #include "expr.h" |
073c1fd5 | 55 | #include "tree-dfa.h" |
56 | #include "tree-ssa.h" | |
3dec5460 | 57 | #include "tree-iterator.h" |
58 | #include "tree-pass.h" | |
54aceb26 | 59 | #include "alloc-pool.h" |
54aceb26 | 60 | #include "langhooks.h" |
a4c3fb95 | 61 | #include "cfgloop.h" |
5b1c765d | 62 | #include "target.h" |
63 | #include "params.h" | |
946e9eb4 | 64 | #include "diagnostic-core.h" |
f7715905 | 65 | #include "builtins.h" |
e3668db5 | 66 | #include "gimplify.h" |
34517c64 | 67 | #include "insn-codes.h" |
e3668db5 | 68 | #include "optabs.h" |
3dec5460 | 69 | |
54aceb26 | 70 | /* This is a simple global reassociation pass. It is, in part, based |
71 | on the LLVM pass of the same name (They do some things more/less | |
72 | than we do, in different orders, etc). | |
3dec5460 | 73 | |
54aceb26 | 74 | It consists of five steps: |
3dec5460 | 75 | |
54aceb26 | 76 | 1. Breaking up subtract operations into addition + negate, where |
77 | it would promote the reassociation of adds. | |
3dec5460 | 78 | |
54aceb26 | 79 | 2. Left linearization of the expression trees, so that (A+B)+(C+D) |
80 | becomes (((A+B)+C)+D), which is easier for us to rewrite later. | |
81 | During linearization, we place the operands of the binary | |
82 | expressions into a vector of operand_entry_t | |
3dec5460 | 83 | |
54aceb26 | 84 | 3. Optimization of the operand lists, eliminating things like a + |
85 | -a, a & a, etc. | |
3dec5460 | 86 | |
8c5ac7f6 | 87 | 3a. Combine repeated factors with the same occurrence counts |
88 | into a __builtin_powi call that will later be optimized into | |
89 | an optimal number of multiplies. | |
90 | ||
54aceb26 | 91 | 4. Rewrite the expression trees we linearized and optimized so |
92 | they are in proper rank order. | |
3dec5460 | 93 | |
54aceb26 | 94 | 5. Repropagate negates, as nothing else will clean it up ATM. |
3dec5460 | 95 | |
54aceb26 | 96 | A bit of theory on #4, since nobody seems to write anything down |
97 | about why it makes sense to do it the way they do it: | |
3dec5460 | 98 | |
54aceb26 | 99 | We could do this much nicer theoretically, but don't (for reasons |
100 | explained after how to do it theoretically nice :P). | |
3dec5460 | 101 | |
54aceb26 | 102 | In order to promote the most redundancy elimination, you want |
103 | binary expressions whose operands are the same rank (or | |
191ec5a2 | 104 | preferably, the same value) exposed to the redundancy eliminator, |
54aceb26 | 105 | for possible elimination. |
3dec5460 | 106 | |
54aceb26 | 107 | So the way to do this if we really cared, is to build the new op |
108 | tree from the leaves to the roots, merging as you go, and putting the | |
109 | new op on the end of the worklist, until you are left with one | |
110 | thing on the worklist. | |
3dec5460 | 111 | |
54aceb26 | 112 | IE if you have to rewrite the following set of operands (listed with |
113 | rank in parentheses), with opcode PLUS_EXPR: | |
3dec5460 | 114 | |
54aceb26 | 115 | a (1), b (1), c (1), d (2), e (2) |
3dec5460 | 116 | |
3dec5460 | 117 | |
54aceb26 | 118 | We start with our merge worklist empty, and the ops list with all of |
119 | those on it. | |
3dec5460 | 120 | |
54aceb26 | 121 | You want to first merge all leaves of the same rank, as much as |
122 | possible. | |
123 | ||
124 | So first build a binary op of | |
125 | ||
126 | mergetmp = a + b, and put "mergetmp" on the merge worklist. | |
127 | ||
128 | Because there is no three operand form of PLUS_EXPR, c is not going to | |
129 | be exposed to redundancy elimination as a rank 1 operand. | |
130 | ||
131 | So you might as well throw it on the merge worklist (you could also | |
132 | consider it to now be a rank two operand, and merge it with d and e, | |
133 | but in this case, you then have evicted e from a binary op. So at | |
134 | least in this situation, you can't win.) | |
135 | ||
136 | Then build a binary op of d + e | |
137 | mergetmp2 = d + e | |
138 | ||
139 | and put mergetmp2 on the merge worklist. | |
48e1416a | 140 | |
54aceb26 | 141 | so merge worklist = {mergetmp, c, mergetmp2} |
48e1416a | 142 | |
54aceb26 | 143 | Continue building binary ops of these operations until you have only |
144 | one operation left on the worklist. | |
48e1416a | 145 | |
54aceb26 | 146 | So we have |
48e1416a | 147 | |
54aceb26 | 148 | build binary op |
149 | mergetmp3 = mergetmp + c | |
48e1416a | 150 | |
54aceb26 | 151 | worklist = {mergetmp2, mergetmp3} |
48e1416a | 152 | |
54aceb26 | 153 | mergetmp4 = mergetmp2 + mergetmp3 |
48e1416a | 154 | |
54aceb26 | 155 | worklist = {mergetmp4} |
48e1416a | 156 | |
54aceb26 | 157 | because we have one operation left, we can now just set the original |
158 | statement equal to the result of that operation. | |
48e1416a | 159 | |
54aceb26 | 160 | This will at least expose a + b and d + e to redundancy elimination |
161 | as binary operations. | |
48e1416a | 162 | |
54aceb26 | 163 | For extra points, you can reuse the old statements to build the |
164 | mergetmps, since you shouldn't run out. | |
165 | ||
166 | So why don't we do this? | |
48e1416a | 167 | |
54aceb26 | 168 | Because it's expensive, and rarely will help. Most trees we are |
169 | reassociating have 3 or less ops. If they have 2 ops, they already | |
170 | will be written into a nice single binary op. If you have 3 ops, a | |
171 | single simple check suffices to tell you whether the first two are of the | |
172 | same rank. If so, you know to order it | |
173 | ||
174 | mergetmp = op1 + op2 | |
175 | newstmt = mergetmp + op3 | |
48e1416a | 176 | |
54aceb26 | 177 | instead of |
178 | mergetmp = op2 + op3 | |
179 | newstmt = mergetmp + op1 | |
48e1416a | 180 | |
54aceb26 | 181 | If all three are of the same rank, you can't expose them all in a |
182 | single binary operator anyway, so the above is *still* the best you | |
183 | can do. | |
48e1416a | 184 | |
54aceb26 | 185 | Thus, this is what we do. When we have three ops left, we check to see |
186 | what order to put them in, and call it a day. As a nod to vector sum | |
2d97a789 | 187 | reduction, we check if any of the ops are really a phi node that is a |
54aceb26 | 188 | destructive update for the associating op, and keep the destructive |
189 | update together for vector sum reduction recognition. */ | |
3dec5460 | 190 | |
3dec5460 | 191 | |
54aceb26 | 192 | /* Statistics */ |
193 | static struct | |
194 | { | |
195 | int linearized; | |
196 | int constants_eliminated; | |
197 | int ops_eliminated; | |
198 | int rewritten; | |
8c5ac7f6 | 199 | int pows_encountered; |
200 | int pows_created; | |
54aceb26 | 201 | } reassociate_stats; |
3dec5460 | 202 | |
54aceb26 | 203 | /* Operator, rank pair. */ |
204 | typedef struct operand_entry | |
3dec5460 | 205 | { |
54aceb26 | 206 | unsigned int rank; |
17b5ea6f | 207 | int id; |
54aceb26 | 208 | tree op; |
8c5ac7f6 | 209 | unsigned int count; |
54aceb26 | 210 | } *operand_entry_t; |
211 | ||
e16712b1 | 212 | static object_allocator<operand_entry> operand_entry_pool ("operand entry pool", |
672758dc | 213 | 30); |
54aceb26 | 214 | |
17b5ea6f | 215 | /* This is used to assign a unique ID to each struct operand_entry |
216 | so that qsort results are identical on different hosts. */ | |
217 | static int next_operand_entry_id; | |
3dec5460 | 218 | |
219 | /* Starting rank number for a given basic block, so that we can rank | |
220 | operations using unmovable instructions in that BB based on the bb | |
221 | depth. */ | |
b30a8715 | 222 | static long *bb_rank; |
3dec5460 | 223 | |
54aceb26 | 224 | /* Operand->rank hashtable. */ |
06ecf488 | 225 | static hash_map<tree, long> *operand_rank; |
3dec5460 | 226 | |
e3668db5 | 227 | /* Vector of SSA_NAMEs on which after reassociate_bb is done with |
228 | all basic blocks the CFG should be adjusted - basic blocks | |
229 | split right after that SSA_NAME's definition statement and before | |
230 | the only use, which must be a bit ior. */ | |
231 | static vec<tree> reassoc_branch_fixups; | |
232 | ||
b248bf30 | 233 | /* Forward decls. */ |
234 | static long get_rank (tree); | |
c89eca7b | 235 | static bool reassoc_stmt_dominates_stmt_p (gimple, gimple); |
b248bf30 | 236 | |
54675e05 | 237 | /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts |
238 | possibly added by gsi_remove. */ | |
239 | ||
240 | bool | |
241 | reassoc_remove_stmt (gimple_stmt_iterator *gsi) | |
242 | { | |
243 | gimple stmt = gsi_stmt (*gsi); | |
244 | ||
245 | if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI) | |
246 | return gsi_remove (gsi, true); | |
247 | ||
248 | gimple_stmt_iterator prev = *gsi; | |
249 | gsi_prev (&prev); | |
250 | unsigned uid = gimple_uid (stmt); | |
251 | basic_block bb = gimple_bb (stmt); | |
252 | bool ret = gsi_remove (gsi, true); | |
253 | if (!gsi_end_p (prev)) | |
254 | gsi_next (&prev); | |
255 | else | |
256 | prev = gsi_start_bb (bb); | |
257 | gimple end_stmt = gsi_stmt (*gsi); | |
258 | while ((stmt = gsi_stmt (prev)) != end_stmt) | |
259 | { | |
260 | gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0); | |
261 | gimple_set_uid (stmt, uid); | |
262 | gsi_next (&prev); | |
263 | } | |
264 | return ret; | |
265 | } | |
b248bf30 | 266 | |
267 | /* Bias amount for loop-carried phis. We want this to be larger than | |
268 | the depth of any reassociation tree we can see, but not larger than | |
269 | the rank difference between two blocks. */ | |
270 | #define PHI_LOOP_BIAS (1 << 15) | |
271 | ||
272 | /* Rank assigned to a phi statement. If STMT is a loop-carried phi of | |
273 | an innermost loop, and the phi has only a single use which is inside | |
274 | the loop, then the rank is the block rank of the loop latch plus an | |
275 | extra bias for the loop-carried dependence. This causes expressions | |
276 | calculated into an accumulator variable to be independent for each | |
277 | iteration of the loop. If STMT is some other phi, the rank is the | |
278 | block rank of its containing block. */ | |
279 | static long | |
280 | phi_rank (gimple stmt) | |
281 | { | |
282 | basic_block bb = gimple_bb (stmt); | |
283 | struct loop *father = bb->loop_father; | |
284 | tree res; | |
285 | unsigned i; | |
286 | use_operand_p use; | |
287 | gimple use_stmt; | |
288 | ||
289 | /* We only care about real loops (those with a latch). */ | |
290 | if (!father->latch) | |
291 | return bb_rank[bb->index]; | |
292 | ||
293 | /* Interesting phis must be in headers of innermost loops. */ | |
294 | if (bb != father->header | |
295 | || father->inner) | |
296 | return bb_rank[bb->index]; | |
297 | ||
298 | /* Ignore virtual SSA_NAMEs. */ | |
299 | res = gimple_phi_result (stmt); | |
7c782c9b | 300 | if (virtual_operand_p (res)) |
b248bf30 | 301 | return bb_rank[bb->index]; |
302 | ||
303 | /* The phi definition must have a single use, and that use must be | |
304 | within the loop. Otherwise this isn't an accumulator pattern. */ | |
305 | if (!single_imm_use (res, &use, &use_stmt) | |
306 | || gimple_bb (use_stmt)->loop_father != father) | |
307 | return bb_rank[bb->index]; | |
308 | ||
309 | /* Look for phi arguments from within the loop. If found, bias this phi. */ | |
310 | for (i = 0; i < gimple_phi_num_args (stmt); i++) | |
311 | { | |
312 | tree arg = gimple_phi_arg_def (stmt, i); | |
313 | if (TREE_CODE (arg) == SSA_NAME | |
314 | && !SSA_NAME_IS_DEFAULT_DEF (arg)) | |
315 | { | |
316 | gimple def_stmt = SSA_NAME_DEF_STMT (arg); | |
317 | if (gimple_bb (def_stmt)->loop_father == father) | |
318 | return bb_rank[father->latch->index] + PHI_LOOP_BIAS; | |
319 | } | |
320 | } | |
321 | ||
322 | /* Must be an uninteresting phi. */ | |
323 | return bb_rank[bb->index]; | |
324 | } | |
325 | ||
326 | /* If EXP is an SSA_NAME defined by a PHI statement that represents a | |
327 | loop-carried dependence of an innermost loop, return TRUE; else | |
328 | return FALSE. */ | |
329 | static bool | |
330 | loop_carried_phi (tree exp) | |
331 | { | |
332 | gimple phi_stmt; | |
333 | long block_rank; | |
334 | ||
335 | if (TREE_CODE (exp) != SSA_NAME | |
336 | || SSA_NAME_IS_DEFAULT_DEF (exp)) | |
337 | return false; | |
338 | ||
339 | phi_stmt = SSA_NAME_DEF_STMT (exp); | |
340 | ||
341 | if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI) | |
342 | return false; | |
343 | ||
344 | /* Non-loop-carried phis have block rank. Loop-carried phis have | |
345 | an additional bias added in. If this phi doesn't have block rank, | |
346 | it's biased and should not be propagated. */ | |
347 | block_rank = bb_rank[gimple_bb (phi_stmt)->index]; | |
348 | ||
349 | if (phi_rank (phi_stmt) != block_rank) | |
350 | return true; | |
351 | ||
352 | return false; | |
353 | } | |
354 | ||
355 | /* Return the maximum of RANK and the rank that should be propagated | |
356 | from expression OP. For most operands, this is just the rank of OP. | |
357 | For loop-carried phis, the value is zero to avoid undoing the bias | |
358 | in favor of the phi. */ | |
359 | static long | |
360 | propagate_rank (long rank, tree op) | |
361 | { | |
362 | long op_rank; | |
363 | ||
364 | if (loop_carried_phi (op)) | |
365 | return rank; | |
366 | ||
367 | op_rank = get_rank (op); | |
368 | ||
369 | return MAX (rank, op_rank); | |
370 | } | |
3dec5460 | 371 | |
54aceb26 | 372 | /* Look up the operand rank structure for expression E. */ |
3dec5460 | 373 | |
b30a8715 | 374 | static inline long |
54aceb26 | 375 | find_operand_rank (tree e) |
3dec5460 | 376 | { |
06ecf488 | 377 | long *slot = operand_rank->get (e); |
378 | return slot ? *slot : -1; | |
3dec5460 | 379 | } |
380 | ||
54aceb26 | 381 | /* Insert {E,RANK} into the operand rank hashtable. */ |
3dec5460 | 382 | |
b30a8715 | 383 | static inline void |
384 | insert_operand_rank (tree e, long rank) | |
3dec5460 | 385 | { |
b30a8715 | 386 | gcc_assert (rank > 0); |
06ecf488 | 387 | gcc_assert (!operand_rank->put (e, rank)); |
3dec5460 | 388 | } |
389 | ||
3dec5460 | 390 | /* Given an expression E, return the rank of the expression. */ |
391 | ||
b30a8715 | 392 | static long |
3dec5460 | 393 | get_rank (tree e) |
394 | { | |
3dec5460 | 395 | /* SSA_NAME's have the rank of the expression they are the result |
396 | of. | |
397 | For globals and uninitialized values, the rank is 0. | |
398 | For function arguments, use the pre-setup rank. | |
399 | For PHI nodes, stores, asm statements, etc, we use the rank of | |
400 | the BB. | |
401 | For simple operations, the rank is the maximum rank of any of | |
402 | its operands, or the bb_rank, whichever is less. | |
403 | I make no claims that this is optimal, however, it gives good | |
404 | results. */ | |
405 | ||
b248bf30 | 406 | /* We make an exception to the normal ranking system to break |
407 | dependences of accumulator variables in loops. Suppose we | |
408 | have a simple one-block loop containing: | |
409 | ||
410 | x_1 = phi(x_0, x_2) | |
411 | b = a + x_1 | |
412 | c = b + d | |
413 | x_2 = c + e | |
414 | ||
415 | As shown, each iteration of the calculation into x is fully | |
416 | dependent upon the iteration before it. We would prefer to | |
417 | see this in the form: | |
418 | ||
419 | x_1 = phi(x_0, x_2) | |
420 | b = a + d | |
421 | c = b + e | |
422 | x_2 = c + x_1 | |
423 | ||
424 | If the loop is unrolled, the calculations of b and c from | |
425 | different iterations can be interleaved. | |
426 | ||
427 | To obtain this result during reassociation, we bias the rank | |
428 | of the phi definition x_1 upward, when it is recognized as an | |
429 | accumulator pattern. The artificial rank causes it to be | |
430 | added last, providing the desired independence. */ | |
431 | ||
3dec5460 | 432 | if (TREE_CODE (e) == SSA_NAME) |
433 | { | |
e86e65ef | 434 | ssa_op_iter iter; |
75a70cf9 | 435 | gimple stmt; |
aa52f48f | 436 | long rank; |
b248bf30 | 437 | tree op; |
54aceb26 | 438 | |
61e371b0 | 439 | if (SSA_NAME_IS_DEFAULT_DEF (e)) |
b30a8715 | 440 | return find_operand_rank (e); |
54aceb26 | 441 | |
3dec5460 | 442 | stmt = SSA_NAME_DEF_STMT (e); |
b248bf30 | 443 | if (gimple_code (stmt) == GIMPLE_PHI) |
444 | return phi_rank (stmt); | |
445 | ||
e86e65ef | 446 | if (!is_gimple_assign (stmt)) |
75a70cf9 | 447 | return bb_rank[gimple_bb (stmt)->index]; |
3dec5460 | 448 | |
449 | /* If we already have a rank for this expression, use that. */ | |
b30a8715 | 450 | rank = find_operand_rank (e); |
451 | if (rank != -1) | |
452 | return rank; | |
3dec5460 | 453 | |
b248bf30 | 454 | /* Otherwise, find the maximum rank for the operands. As an |
455 | exception, remove the bias from loop-carried phis when propagating | |
456 | the rank so that dependent operations are not also biased. */ | |
e86e65ef | 457 | /* Simply walk over all SSA uses - this takes advatage of the |
458 | fact that non-SSA operands are is_gimple_min_invariant and | |
459 | thus have rank 0. */ | |
3dec5460 | 460 | rank = 0; |
e86e65ef | 461 | FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) |
462 | rank = propagate_rank (rank, op); | |
54aceb26 | 463 | |
3dec5460 | 464 | if (dump_file && (dump_flags & TDF_DETAILS)) |
465 | { | |
466 | fprintf (dump_file, "Rank for "); | |
467 | print_generic_expr (dump_file, e, 0); | |
b30a8715 | 468 | fprintf (dump_file, " is %ld\n", (rank + 1)); |
3dec5460 | 469 | } |
54aceb26 | 470 | |
3dec5460 | 471 | /* Note the rank in the hashtable so we don't recompute it. */ |
54aceb26 | 472 | insert_operand_rank (e, (rank + 1)); |
3dec5460 | 473 | return (rank + 1); |
474 | } | |
475 | ||
e86e65ef | 476 | /* Constants, globals, etc., are rank 0 */ |
3dec5460 | 477 | return 0; |
478 | } | |
479 | ||
54aceb26 | 480 | |
481 | /* We want integer ones to end up last no matter what, since they are | |
482 | the ones we can do the most with. */ | |
483 | #define INTEGER_CONST_TYPE 1 << 3 | |
484 | #define FLOAT_CONST_TYPE 1 << 2 | |
485 | #define OTHER_CONST_TYPE 1 << 1 | |
486 | ||
487 | /* Classify an invariant tree into integer, float, or other, so that | |
488 | we can sort them to be near other constants of the same type. */ | |
489 | static inline int | |
490 | constant_type (tree t) | |
491 | { | |
492 | if (INTEGRAL_TYPE_P (TREE_TYPE (t))) | |
493 | return INTEGER_CONST_TYPE; | |
494 | else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) | |
495 | return FLOAT_CONST_TYPE; | |
496 | else | |
497 | return OTHER_CONST_TYPE; | |
498 | } | |
499 | ||
500 | /* qsort comparison function to sort operand entries PA and PB by rank | |
501 | so that the sorted array is ordered by rank in decreasing order. */ | |
502 | static int | |
503 | sort_by_operand_rank (const void *pa, const void *pb) | |
504 | { | |
505 | const operand_entry_t oea = *(const operand_entry_t *)pa; | |
506 | const operand_entry_t oeb = *(const operand_entry_t *)pb; | |
507 | ||
508 | /* It's nicer for optimize_expression if constants that are likely | |
509 | to fold when added/multiplied//whatever are put next to each | |
510 | other. Since all constants have rank 0, order them by type. */ | |
61e371b0 | 511 | if (oeb->rank == 0 && oea->rank == 0) |
17b5ea6f | 512 | { |
513 | if (constant_type (oeb->op) != constant_type (oea->op)) | |
514 | return constant_type (oeb->op) - constant_type (oea->op); | |
515 | else | |
516 | /* To make sorting result stable, we use unique IDs to determine | |
517 | order. */ | |
518 | return oeb->id - oea->id; | |
519 | } | |
54aceb26 | 520 | |
521 | /* Lastly, make sure the versions that are the same go next to each | |
c89eca7b | 522 | other. */ |
54aceb26 | 523 | if ((oeb->rank - oea->rank == 0) |
524 | && TREE_CODE (oea->op) == SSA_NAME | |
525 | && TREE_CODE (oeb->op) == SSA_NAME) | |
17b5ea6f | 526 | { |
c89eca7b | 527 | /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse |
528 | versions of removed SSA_NAMEs, so if possible, prefer to sort | |
529 | based on basic block and gimple_uid of the SSA_NAME_DEF_STMT. | |
530 | See PR60418. */ | |
531 | if (!SSA_NAME_IS_DEFAULT_DEF (oea->op) | |
532 | && !SSA_NAME_IS_DEFAULT_DEF (oeb->op) | |
533 | && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) | |
534 | { | |
535 | gimple stmta = SSA_NAME_DEF_STMT (oea->op); | |
536 | gimple stmtb = SSA_NAME_DEF_STMT (oeb->op); | |
537 | basic_block bba = gimple_bb (stmta); | |
538 | basic_block bbb = gimple_bb (stmtb); | |
539 | if (bbb != bba) | |
540 | { | |
541 | if (bb_rank[bbb->index] != bb_rank[bba->index]) | |
542 | return bb_rank[bbb->index] - bb_rank[bba->index]; | |
543 | } | |
544 | else | |
545 | { | |
546 | bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb); | |
547 | bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta); | |
548 | if (da != db) | |
549 | return da ? 1 : -1; | |
550 | } | |
551 | } | |
552 | ||
17b5ea6f | 553 | if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) |
554 | return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op); | |
555 | else | |
556 | return oeb->id - oea->id; | |
557 | } | |
54aceb26 | 558 | |
17b5ea6f | 559 | if (oeb->rank != oea->rank) |
560 | return oeb->rank - oea->rank; | |
561 | else | |
562 | return oeb->id - oea->id; | |
54aceb26 | 563 | } |
564 | ||
565 | /* Add an operand entry to *OPS for the tree operand OP. */ | |
566 | ||
567 | static void | |
f1f41a6c | 568 | add_to_ops_vec (vec<operand_entry_t> *ops, tree op) |
54aceb26 | 569 | { |
672758dc | 570 | operand_entry_t oe = operand_entry_pool.allocate (); |
54aceb26 | 571 | |
572 | oe->op = op; | |
573 | oe->rank = get_rank (op); | |
17b5ea6f | 574 | oe->id = next_operand_entry_id++; |
8c5ac7f6 | 575 | oe->count = 1; |
f1f41a6c | 576 | ops->safe_push (oe); |
54aceb26 | 577 | } |
3dec5460 | 578 | |
8c5ac7f6 | 579 | /* Add an operand entry to *OPS for the tree operand OP with repeat |
580 | count REPEAT. */ | |
581 | ||
582 | static void | |
f1f41a6c | 583 | add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op, |
8c5ac7f6 | 584 | HOST_WIDE_INT repeat) |
585 | { | |
672758dc | 586 | operand_entry_t oe = operand_entry_pool.allocate (); |
8c5ac7f6 | 587 | |
588 | oe->op = op; | |
589 | oe->rank = get_rank (op); | |
590 | oe->id = next_operand_entry_id++; | |
591 | oe->count = repeat; | |
f1f41a6c | 592 | ops->safe_push (oe); |
8c5ac7f6 | 593 | |
594 | reassociate_stats.pows_encountered++; | |
595 | } | |
596 | ||
54aceb26 | 597 | /* Return true if STMT is reassociable operation containing a binary |
a4c3fb95 | 598 | operation with tree code CODE, and is inside LOOP. */ |
3dec5460 | 599 | |
600 | static bool | |
75a70cf9 | 601 | is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop) |
54aceb26 | 602 | { |
75a70cf9 | 603 | basic_block bb = gimple_bb (stmt); |
a4c3fb95 | 604 | |
75a70cf9 | 605 | if (gimple_bb (stmt) == NULL) |
a4c3fb95 | 606 | return false; |
607 | ||
a4c3fb95 | 608 | if (!flow_bb_inside_loop_p (loop, bb)) |
609 | return false; | |
610 | ||
75a70cf9 | 611 | if (is_gimple_assign (stmt) |
612 | && gimple_assign_rhs_code (stmt) == code | |
613 | && has_single_use (gimple_assign_lhs (stmt))) | |
3dec5460 | 614 | return true; |
75a70cf9 | 615 | |
54aceb26 | 616 | return false; |
617 | } | |
618 | ||
619 | ||
620 | /* Given NAME, if NAME is defined by a unary operation OPCODE, return the | |
621 | operand of the negate operation. Otherwise, return NULL. */ | |
622 | ||
623 | static tree | |
624 | get_unary_op (tree name, enum tree_code opcode) | |
625 | { | |
75a70cf9 | 626 | gimple stmt = SSA_NAME_DEF_STMT (name); |
54aceb26 | 627 | |
75a70cf9 | 628 | if (!is_gimple_assign (stmt)) |
54aceb26 | 629 | return NULL_TREE; |
630 | ||
75a70cf9 | 631 | if (gimple_assign_rhs_code (stmt) == opcode) |
632 | return gimple_assign_rhs1 (stmt); | |
54aceb26 | 633 | return NULL_TREE; |
634 | } | |
635 | ||
636 | /* If CURR and LAST are a pair of ops that OPCODE allows us to | |
637 | eliminate through equivalences, do so, remove them from OPS, and | |
638 | return true. Otherwise, return false. */ | |
639 | ||
640 | static bool | |
641 | eliminate_duplicate_pair (enum tree_code opcode, | |
f1f41a6c | 642 | vec<operand_entry_t> *ops, |
54aceb26 | 643 | bool *all_done, |
644 | unsigned int i, | |
645 | operand_entry_t curr, | |
646 | operand_entry_t last) | |
647 | { | |
648 | ||
91543a50 | 649 | /* If we have two of the same op, and the opcode is & |, min, or max, |
650 | we can eliminate one of them. | |
54aceb26 | 651 | If we have two of the same op, and the opcode is ^, we can |
652 | eliminate both of them. */ | |
3dec5460 | 653 | |
54aceb26 | 654 | if (last && last->op == curr->op) |
3dec5460 | 655 | { |
54aceb26 | 656 | switch (opcode) |
657 | { | |
91543a50 | 658 | case MAX_EXPR: |
659 | case MIN_EXPR: | |
54aceb26 | 660 | case BIT_IOR_EXPR: |
661 | case BIT_AND_EXPR: | |
662 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
663 | { | |
664 | fprintf (dump_file, "Equivalence: "); | |
665 | print_generic_expr (dump_file, curr->op, 0); | |
91543a50 | 666 | fprintf (dump_file, " [&|minmax] "); |
54aceb26 | 667 | print_generic_expr (dump_file, last->op, 0); |
668 | fprintf (dump_file, " -> "); | |
669 | print_generic_stmt (dump_file, last->op, 0); | |
670 | } | |
671 | ||
f1f41a6c | 672 | ops->ordered_remove (i); |
54aceb26 | 673 | reassociate_stats.ops_eliminated ++; |
674 | ||
675 | return true; | |
676 | ||
677 | case BIT_XOR_EXPR: | |
678 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
679 | { | |
680 | fprintf (dump_file, "Equivalence: "); | |
681 | print_generic_expr (dump_file, curr->op, 0); | |
682 | fprintf (dump_file, " ^ "); | |
683 | print_generic_expr (dump_file, last->op, 0); | |
684 | fprintf (dump_file, " -> nothing\n"); | |
685 | } | |
686 | ||
687 | reassociate_stats.ops_eliminated += 2; | |
688 | ||
f1f41a6c | 689 | if (ops->length () == 2) |
54aceb26 | 690 | { |
f1f41a6c | 691 | ops->create (0); |
385f3f36 | 692 | add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op))); |
54aceb26 | 693 | *all_done = true; |
694 | } | |
695 | else | |
696 | { | |
f1f41a6c | 697 | ops->ordered_remove (i-1); |
698 | ops->ordered_remove (i-1); | |
54aceb26 | 699 | } |
700 | ||
701 | return true; | |
702 | ||
703 | default: | |
704 | break; | |
705 | } | |
706 | } | |
3dec5460 | 707 | return false; |
708 | } | |
709 | ||
f1f41a6c | 710 | static vec<tree> plus_negates; |
c2f29260 | 711 | |
50fa62c6 | 712 | /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not |
713 | expression, look in OPS for a corresponding positive operation to cancel | |
714 | it out. If we find one, remove the other from OPS, replace | |
715 | OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise, | |
716 | return false. */ | |
3dec5460 | 717 | |
718 | static bool | |
54aceb26 | 719 | eliminate_plus_minus_pair (enum tree_code opcode, |
f1f41a6c | 720 | vec<operand_entry_t> *ops, |
54aceb26 | 721 | unsigned int currindex, |
722 | operand_entry_t curr) | |
723 | { | |
724 | tree negateop; | |
50fa62c6 | 725 | tree notop; |
54aceb26 | 726 | unsigned int i; |
727 | operand_entry_t oe; | |
728 | ||
729 | if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) | |
3dec5460 | 730 | return false; |
54aceb26 | 731 | |
732 | negateop = get_unary_op (curr->op, NEGATE_EXPR); | |
50fa62c6 | 733 | notop = get_unary_op (curr->op, BIT_NOT_EXPR); |
734 | if (negateop == NULL_TREE && notop == NULL_TREE) | |
54aceb26 | 735 | return false; |
736 | ||
737 | /* Any non-negated version will have a rank that is one less than | |
738 | the current rank. So once we hit those ranks, if we don't find | |
739 | one, we can stop. */ | |
740 | ||
741 | for (i = currindex + 1; | |
f1f41a6c | 742 | ops->iterate (i, &oe) |
54aceb26 | 743 | && oe->rank >= curr->rank - 1 ; |
744 | i++) | |
3dec5460 | 745 | { |
54aceb26 | 746 | if (oe->op == negateop) |
747 | { | |
748 | ||
749 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
750 | { | |
751 | fprintf (dump_file, "Equivalence: "); | |
752 | print_generic_expr (dump_file, negateop, 0); | |
753 | fprintf (dump_file, " + -"); | |
754 | print_generic_expr (dump_file, oe->op, 0); | |
755 | fprintf (dump_file, " -> 0\n"); | |
756 | } | |
757 | ||
f1f41a6c | 758 | ops->ordered_remove (i); |
385f3f36 | 759 | add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op))); |
f1f41a6c | 760 | ops->ordered_remove (currindex); |
54aceb26 | 761 | reassociate_stats.ops_eliminated ++; |
762 | ||
50fa62c6 | 763 | return true; |
764 | } | |
765 | else if (oe->op == notop) | |
766 | { | |
767 | tree op_type = TREE_TYPE (oe->op); | |
768 | ||
769 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
770 | { | |
771 | fprintf (dump_file, "Equivalence: "); | |
772 | print_generic_expr (dump_file, notop, 0); | |
773 | fprintf (dump_file, " + ~"); | |
774 | print_generic_expr (dump_file, oe->op, 0); | |
775 | fprintf (dump_file, " -> -1\n"); | |
776 | } | |
777 | ||
f1f41a6c | 778 | ops->ordered_remove (i); |
50fa62c6 | 779 | add_to_ops_vec (ops, build_int_cst_type (op_type, -1)); |
f1f41a6c | 780 | ops->ordered_remove (currindex); |
50fa62c6 | 781 | reassociate_stats.ops_eliminated ++; |
782 | ||
54aceb26 | 783 | return true; |
784 | } | |
3dec5460 | 785 | } |
786 | ||
c2f29260 | 787 | /* CURR->OP is a negate expr in a plus expr: save it for later |
788 | inspection in repropagate_negates(). */ | |
50fa62c6 | 789 | if (negateop != NULL_TREE) |
f1f41a6c | 790 | plus_negates.safe_push (curr->op); |
c2f29260 | 791 | |
54aceb26 | 792 | return false; |
793 | } | |
794 | ||
795 | /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a | |
796 | bitwise not expression, look in OPS for a corresponding operand to | |
797 | cancel it out. If we find one, remove the other from OPS, replace | |
798 | OPS[CURRINDEX] with 0, and return true. Otherwise, return | |
799 | false. */ | |
800 | ||
801 | static bool | |
802 | eliminate_not_pairs (enum tree_code opcode, | |
f1f41a6c | 803 | vec<operand_entry_t> *ops, |
54aceb26 | 804 | unsigned int currindex, |
805 | operand_entry_t curr) | |
806 | { | |
807 | tree notop; | |
808 | unsigned int i; | |
809 | operand_entry_t oe; | |
810 | ||
811 | if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) | |
812 | || TREE_CODE (curr->op) != SSA_NAME) | |
813 | return false; | |
814 | ||
815 | notop = get_unary_op (curr->op, BIT_NOT_EXPR); | |
816 | if (notop == NULL_TREE) | |
817 | return false; | |
818 | ||
819 | /* Any non-not version will have a rank that is one less than | |
820 | the current rank. So once we hit those ranks, if we don't find | |
821 | one, we can stop. */ | |
822 | ||
823 | for (i = currindex + 1; | |
f1f41a6c | 824 | ops->iterate (i, &oe) |
54aceb26 | 825 | && oe->rank >= curr->rank - 1; |
826 | i++) | |
3dec5460 | 827 | { |
54aceb26 | 828 | if (oe->op == notop) |
3dec5460 | 829 | { |
54aceb26 | 830 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3dec5460 | 831 | { |
54aceb26 | 832 | fprintf (dump_file, "Equivalence: "); |
833 | print_generic_expr (dump_file, notop, 0); | |
834 | if (opcode == BIT_AND_EXPR) | |
835 | fprintf (dump_file, " & ~"); | |
836 | else if (opcode == BIT_IOR_EXPR) | |
837 | fprintf (dump_file, " | ~"); | |
838 | print_generic_expr (dump_file, oe->op, 0); | |
839 | if (opcode == BIT_AND_EXPR) | |
840 | fprintf (dump_file, " -> 0\n"); | |
841 | else if (opcode == BIT_IOR_EXPR) | |
842 | fprintf (dump_file, " -> -1\n"); | |
3dec5460 | 843 | } |
54aceb26 | 844 | |
845 | if (opcode == BIT_AND_EXPR) | |
385f3f36 | 846 | oe->op = build_zero_cst (TREE_TYPE (oe->op)); |
54aceb26 | 847 | else if (opcode == BIT_IOR_EXPR) |
c2c07119 | 848 | oe->op = build_all_ones_cst (TREE_TYPE (oe->op)); |
54aceb26 | 849 | |
f1f41a6c | 850 | reassociate_stats.ops_eliminated += ops->length () - 1; |
851 | ops->truncate (0); | |
852 | ops->quick_push (oe); | |
54aceb26 | 853 | return true; |
3dec5460 | 854 | } |
855 | } | |
54aceb26 | 856 | |
857 | return false; | |
3dec5460 | 858 | } |
859 | ||
54aceb26 | 860 | /* Use constant value that may be present in OPS to try to eliminate |
861 | operands. Note that this function is only really used when we've | |
862 | eliminated ops for other reasons, or merged constants. Across | |
863 | single statements, fold already does all of this, plus more. There | |
864 | is little point in duplicating logic, so I've only included the | |
865 | identities that I could ever construct testcases to trigger. */ | |
3dec5460 | 866 | |
54aceb26 | 867 | static void |
868 | eliminate_using_constants (enum tree_code opcode, | |
f1f41a6c | 869 | vec<operand_entry_t> *ops) |
3dec5460 | 870 | { |
f1f41a6c | 871 | operand_entry_t oelast = ops->last (); |
46ef5347 | 872 | tree type = TREE_TYPE (oelast->op); |
3dec5460 | 873 | |
46ef5347 | 874 | if (oelast->rank == 0 |
875 | && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type))) | |
3dec5460 | 876 | { |
54aceb26 | 877 | switch (opcode) |
3dec5460 | 878 | { |
54aceb26 | 879 | case BIT_AND_EXPR: |
880 | if (integer_zerop (oelast->op)) | |
881 | { | |
f1f41a6c | 882 | if (ops->length () != 1) |
54aceb26 | 883 | { |
884 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
885 | fprintf (dump_file, "Found & 0, removing all other ops\n"); | |
886 | ||
f1f41a6c | 887 | reassociate_stats.ops_eliminated += ops->length () - 1; |
48e1416a | 888 | |
f1f41a6c | 889 | ops->truncate (0); |
890 | ops->quick_push (oelast); | |
54aceb26 | 891 | return; |
892 | } | |
893 | } | |
894 | else if (integer_all_onesp (oelast->op)) | |
895 | { | |
f1f41a6c | 896 | if (ops->length () != 1) |
54aceb26 | 897 | { |
898 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
899 | fprintf (dump_file, "Found & -1, removing\n"); | |
f1f41a6c | 900 | ops->pop (); |
54aceb26 | 901 | reassociate_stats.ops_eliminated++; |
902 | } | |
903 | } | |
904 | break; | |
905 | case BIT_IOR_EXPR: | |
906 | if (integer_all_onesp (oelast->op)) | |
907 | { | |
f1f41a6c | 908 | if (ops->length () != 1) |
54aceb26 | 909 | { |
910 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
911 | fprintf (dump_file, "Found | -1, removing all other ops\n"); | |
912 | ||
f1f41a6c | 913 | reassociate_stats.ops_eliminated += ops->length () - 1; |
48e1416a | 914 | |
f1f41a6c | 915 | ops->truncate (0); |
916 | ops->quick_push (oelast); | |
54aceb26 | 917 | return; |
918 | } | |
48e1416a | 919 | } |
54aceb26 | 920 | else if (integer_zerop (oelast->op)) |
921 | { | |
f1f41a6c | 922 | if (ops->length () != 1) |
54aceb26 | 923 | { |
924 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
925 | fprintf (dump_file, "Found | 0, removing\n"); | |
f1f41a6c | 926 | ops->pop (); |
54aceb26 | 927 | reassociate_stats.ops_eliminated++; |
928 | } | |
929 | } | |
930 | break; | |
931 | case MULT_EXPR: | |
46ef5347 | 932 | if (integer_zerop (oelast->op) |
933 | || (FLOAT_TYPE_P (type) | |
93633022 | 934 | && !HONOR_NANS (type) |
fe994837 | 935 | && !HONOR_SIGNED_ZEROS (type) |
46ef5347 | 936 | && real_zerop (oelast->op))) |
54aceb26 | 937 | { |
f1f41a6c | 938 | if (ops->length () != 1) |
54aceb26 | 939 | { |
940 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
941 | fprintf (dump_file, "Found * 0, removing all other ops\n"); | |
48e1416a | 942 | |
f1f41a6c | 943 | reassociate_stats.ops_eliminated += ops->length () - 1; |
944 | ops->truncate (1); | |
945 | ops->quick_push (oelast); | |
54aceb26 | 946 | return; |
947 | } | |
948 | } | |
46ef5347 | 949 | else if (integer_onep (oelast->op) |
950 | || (FLOAT_TYPE_P (type) | |
fe994837 | 951 | && !HONOR_SNANS (type) |
46ef5347 | 952 | && real_onep (oelast->op))) |
54aceb26 | 953 | { |
f1f41a6c | 954 | if (ops->length () != 1) |
54aceb26 | 955 | { |
956 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
957 | fprintf (dump_file, "Found * 1, removing\n"); | |
f1f41a6c | 958 | ops->pop (); |
54aceb26 | 959 | reassociate_stats.ops_eliminated++; |
960 | return; | |
961 | } | |
962 | } | |
963 | break; | |
964 | case BIT_XOR_EXPR: | |
965 | case PLUS_EXPR: | |
966 | case MINUS_EXPR: | |
46ef5347 | 967 | if (integer_zerop (oelast->op) |
968 | || (FLOAT_TYPE_P (type) | |
969 | && (opcode == PLUS_EXPR || opcode == MINUS_EXPR) | |
970 | && fold_real_zero_addition_p (type, oelast->op, | |
971 | opcode == MINUS_EXPR))) | |
3dec5460 | 972 | { |
f1f41a6c | 973 | if (ops->length () != 1) |
3dec5460 | 974 | { |
54aceb26 | 975 | if (dump_file && (dump_flags & TDF_DETAILS)) |
976 | fprintf (dump_file, "Found [|^+] 0, removing\n"); | |
f1f41a6c | 977 | ops->pop (); |
54aceb26 | 978 | reassociate_stats.ops_eliminated++; |
979 | return; | |
3dec5460 | 980 | } |
3dec5460 | 981 | } |
54aceb26 | 982 | break; |
983 | default: | |
984 | break; | |
3dec5460 | 985 | } |
986 | } | |
3dec5460 | 987 | } |
988 | ||
dddf5036 | 989 | |
f1f41a6c | 990 | static void linearize_expr_tree (vec<operand_entry_t> *, gimple, |
dddf5036 | 991 | bool, bool); |
992 | ||
993 | /* Structure for tracking and counting operands. */ | |
994 | typedef struct oecount_s { | |
995 | int cnt; | |
17b5ea6f | 996 | int id; |
dddf5036 | 997 | enum tree_code oecode; |
998 | tree op; | |
999 | } oecount; | |
1000 | ||
dddf5036 | 1001 | |
1002 | /* The heap for the oecount hashtable and the sorted list of operands. */ | |
f1f41a6c | 1003 | static vec<oecount> cvec; |
dddf5036 | 1004 | |
3e871d4d | 1005 | |
1006 | /* Oecount hashtable helpers. */ | |
1007 | ||
613732c1 | 1008 | struct oecount_hasher : int_hash <int, 0, 1> |
3e871d4d | 1009 | { |
613732c1 | 1010 | static inline hashval_t hash (int); |
1011 | static inline bool equal (int, int); | |
3e871d4d | 1012 | }; |
1013 | ||
dddf5036 | 1014 | /* Hash function for oecount. */ |
1015 | ||
3e871d4d | 1016 | inline hashval_t |
613732c1 | 1017 | oecount_hasher::hash (int p) |
dddf5036 | 1018 | { |
2933f7af | 1019 | const oecount *c = &cvec[p - 42]; |
dddf5036 | 1020 | return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode; |
1021 | } | |
1022 | ||
1023 | /* Comparison function for oecount. */ | |
1024 | ||
3e871d4d | 1025 | inline bool |
613732c1 | 1026 | oecount_hasher::equal (int p1, int p2) |
dddf5036 | 1027 | { |
2933f7af | 1028 | const oecount *c1 = &cvec[p1 - 42]; |
1029 | const oecount *c2 = &cvec[p2 - 42]; | |
dddf5036 | 1030 | return (c1->oecode == c2->oecode |
1031 | && c1->op == c2->op); | |
1032 | } | |
1033 | ||
1034 | /* Comparison function for qsort sorting oecount elements by count. */ | |
1035 | ||
1036 | static int | |
1037 | oecount_cmp (const void *p1, const void *p2) | |
1038 | { | |
1039 | const oecount *c1 = (const oecount *)p1; | |
1040 | const oecount *c2 = (const oecount *)p2; | |
17b5ea6f | 1041 | if (c1->cnt != c2->cnt) |
1042 | return c1->cnt - c2->cnt; | |
1043 | else | |
1044 | /* If counts are identical, use unique IDs to stabilize qsort. */ | |
1045 | return c1->id - c2->id; | |
dddf5036 | 1046 | } |
1047 | ||
56e650d6 | 1048 | /* Return TRUE iff STMT represents a builtin call that raises OP |
1049 | to some exponent. */ | |
1050 | ||
1051 | static bool | |
1052 | stmt_is_power_of_op (gimple stmt, tree op) | |
1053 | { | |
1054 | tree fndecl; | |
1055 | ||
1056 | if (!is_gimple_call (stmt)) | |
1057 | return false; | |
1058 | ||
1059 | fndecl = gimple_call_fndecl (stmt); | |
1060 | ||
1061 | if (!fndecl | |
1062 | || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) | |
1063 | return false; | |
1064 | ||
1065 | switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) | |
1066 | { | |
1067 | CASE_FLT_FN (BUILT_IN_POW): | |
1068 | CASE_FLT_FN (BUILT_IN_POWI): | |
1069 | return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0)); | |
1070 | ||
1071 | default: | |
1072 | return false; | |
1073 | } | |
1074 | } | |
1075 | ||
1076 | /* Given STMT which is a __builtin_pow* call, decrement its exponent | |
1077 | in place and return the result. Assumes that stmt_is_power_of_op | |
1078 | was previously called for STMT and returned TRUE. */ | |
1079 | ||
1080 | static HOST_WIDE_INT | |
1081 | decrement_power (gimple stmt) | |
1082 | { | |
1083 | REAL_VALUE_TYPE c, cint; | |
1084 | HOST_WIDE_INT power; | |
1085 | tree arg1; | |
1086 | ||
1087 | switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) | |
1088 | { | |
1089 | CASE_FLT_FN (BUILT_IN_POW): | |
1090 | arg1 = gimple_call_arg (stmt, 1); | |
1091 | c = TREE_REAL_CST (arg1); | |
1092 | power = real_to_integer (&c) - 1; | |
e913b5cd | 1093 | real_from_integer (&cint, VOIDmode, power, SIGNED); |
56e650d6 | 1094 | gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint)); |
1095 | return power; | |
1096 | ||
1097 | CASE_FLT_FN (BUILT_IN_POWI): | |
1098 | arg1 = gimple_call_arg (stmt, 1); | |
f9ae6f95 | 1099 | power = TREE_INT_CST_LOW (arg1) - 1; |
56e650d6 | 1100 | gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power)); |
1101 | return power; | |
1102 | ||
1103 | default: | |
1104 | gcc_unreachable (); | |
1105 | } | |
1106 | } | |
1107 | ||
1108 | /* Find the single immediate use of STMT's LHS, and replace it | |
1109 | with OP. Remove STMT. If STMT's LHS is the same as *DEF, | |
1110 | replace *DEF with OP as well. */ | |
1111 | ||
1112 | static void | |
1113 | propagate_op_to_single_use (tree op, gimple stmt, tree *def) | |
1114 | { | |
1115 | tree lhs; | |
1116 | gimple use_stmt; | |
1117 | use_operand_p use; | |
1118 | gimple_stmt_iterator gsi; | |
1119 | ||
1120 | if (is_gimple_call (stmt)) | |
1121 | lhs = gimple_call_lhs (stmt); | |
1122 | else | |
1123 | lhs = gimple_assign_lhs (stmt); | |
1124 | ||
1125 | gcc_assert (has_single_use (lhs)); | |
1126 | single_imm_use (lhs, &use, &use_stmt); | |
1127 | if (lhs == *def) | |
1128 | *def = op; | |
1129 | SET_USE (use, op); | |
1130 | if (TREE_CODE (op) != SSA_NAME) | |
1131 | update_stmt (use_stmt); | |
1132 | gsi = gsi_for_stmt (stmt); | |
e6b37e57 | 1133 | unlink_stmt_vdef (stmt); |
54675e05 | 1134 | reassoc_remove_stmt (&gsi); |
56e650d6 | 1135 | release_defs (stmt); |
56e650d6 | 1136 | } |
1137 | ||
dddf5036 | 1138 | /* Walks the linear chain with result *DEF searching for an operation |
1139 | with operand OP and code OPCODE removing that from the chain. *DEF | |
1140 | is updated if there is only one operand but no operation left. */ | |
1141 | ||
1142 | static void | |
1143 | zero_one_operation (tree *def, enum tree_code opcode, tree op) | |
1144 | { | |
1145 | gimple stmt = SSA_NAME_DEF_STMT (*def); | |
1146 | ||
1147 | do | |
1148 | { | |
56e650d6 | 1149 | tree name; |
1150 | ||
1151 | if (opcode == MULT_EXPR | |
1152 | && stmt_is_power_of_op (stmt, op)) | |
1153 | { | |
1154 | if (decrement_power (stmt) == 1) | |
1155 | propagate_op_to_single_use (op, stmt, def); | |
1156 | return; | |
1157 | } | |
1158 | ||
1159 | name = gimple_assign_rhs1 (stmt); | |
dddf5036 | 1160 | |
1161 | /* If this is the operation we look for and one of the operands | |
1162 | is ours simply propagate the other operand into the stmts | |
1163 | single use. */ | |
1164 | if (gimple_assign_rhs_code (stmt) == opcode | |
1165 | && (name == op | |
1166 | || gimple_assign_rhs2 (stmt) == op)) | |
1167 | { | |
dddf5036 | 1168 | if (name == op) |
1169 | name = gimple_assign_rhs2 (stmt); | |
56e650d6 | 1170 | propagate_op_to_single_use (name, stmt, def); |
dddf5036 | 1171 | return; |
1172 | } | |
1173 | ||
56e650d6 | 1174 | /* We might have a multiply of two __builtin_pow* calls, and |
1175 | the operand might be hiding in the rightmost one. */ | |
1176 | if (opcode == MULT_EXPR | |
1177 | && gimple_assign_rhs_code (stmt) == opcode | |
bdb5a35a | 1178 | && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME |
1179 | && has_single_use (gimple_assign_rhs2 (stmt))) | |
56e650d6 | 1180 | { |
1181 | gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); | |
1182 | if (stmt_is_power_of_op (stmt2, op)) | |
1183 | { | |
1184 | if (decrement_power (stmt2) == 1) | |
1185 | propagate_op_to_single_use (op, stmt2, def); | |
1186 | return; | |
1187 | } | |
1188 | } | |
1189 | ||
dddf5036 | 1190 | /* Continue walking the chain. */ |
1191 | gcc_assert (name != op | |
1192 | && TREE_CODE (name) == SSA_NAME); | |
1193 | stmt = SSA_NAME_DEF_STMT (name); | |
1194 | } | |
1195 | while (1); | |
1196 | } | |
1197 | ||
82cb4e1c | 1198 | /* Returns true if statement S1 dominates statement S2. Like |
1199 | stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */ | |
c9dbb922 | 1200 | |
82cb4e1c | 1201 | static bool |
1202 | reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2) | |
c9dbb922 | 1203 | { |
82cb4e1c | 1204 | basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); |
1205 | ||
1206 | /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D) | |
1207 | SSA_NAME. Assume it lives at the beginning of function and | |
1208 | thus dominates everything. */ | |
1209 | if (!bb1 || s1 == s2) | |
1210 | return true; | |
1211 | ||
1212 | /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */ | |
1213 | if (!bb2) | |
1214 | return false; | |
1215 | ||
1216 | if (bb1 == bb2) | |
1217 | { | |
1218 | /* PHIs in the same basic block are assumed to be | |
1219 | executed all in parallel, if only one stmt is a PHI, | |
1220 | it dominates the other stmt in the same basic block. */ | |
1221 | if (gimple_code (s1) == GIMPLE_PHI) | |
1222 | return true; | |
1223 | ||
1224 | if (gimple_code (s2) == GIMPLE_PHI) | |
1225 | return false; | |
1226 | ||
1227 | gcc_assert (gimple_uid (s1) && gimple_uid (s2)); | |
1228 | ||
1229 | if (gimple_uid (s1) < gimple_uid (s2)) | |
1230 | return true; | |
1231 | ||
1232 | if (gimple_uid (s1) > gimple_uid (s2)) | |
1233 | return false; | |
1234 | ||
1235 | gimple_stmt_iterator gsi = gsi_for_stmt (s1); | |
1236 | unsigned int uid = gimple_uid (s1); | |
1237 | for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1238 | { | |
1239 | gimple s = gsi_stmt (gsi); | |
1240 | if (gimple_uid (s) != uid) | |
1241 | break; | |
1242 | if (s == s2) | |
1243 | return true; | |
1244 | } | |
1245 | ||
1246 | return false; | |
1247 | } | |
1248 | ||
1249 | return dominated_by_p (CDI_DOMINATORS, bb2, bb1); | |
1250 | } | |
1251 | ||
1252 | /* Insert STMT after INSERT_POINT. */ | |
1253 | ||
1254 | static void | |
1255 | insert_stmt_after (gimple stmt, gimple insert_point) | |
1256 | { | |
1257 | gimple_stmt_iterator gsi; | |
1258 | basic_block bb; | |
1259 | ||
1260 | if (gimple_code (insert_point) == GIMPLE_PHI) | |
1261 | bb = gimple_bb (insert_point); | |
1262 | else if (!stmt_ends_bb_p (insert_point)) | |
1263 | { | |
1264 | gsi = gsi_for_stmt (insert_point); | |
1265 | gimple_set_uid (stmt, gimple_uid (insert_point)); | |
1266 | gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); | |
1267 | return; | |
1268 | } | |
1269 | else | |
1270 | /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME, | |
1271 | thus if it must end a basic block, it should be a call that can | |
1272 | throw, or some assignment that can throw. If it throws, the LHS | |
1273 | of it will not be initialized though, so only valid places using | |
1274 | the SSA_NAME should be dominated by the fallthru edge. */ | |
1275 | bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest; | |
1276 | gsi = gsi_after_labels (bb); | |
1277 | if (gsi_end_p (gsi)) | |
1278 | { | |
1279 | gimple_stmt_iterator gsi2 = gsi_last_bb (bb); | |
1280 | gimple_set_uid (stmt, | |
1281 | gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); | |
1282 | } | |
1283 | else | |
1284 | gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi))); | |
1285 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
c9dbb922 | 1286 | } |
1287 | ||
dddf5036 | 1288 | /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for |
1289 | the result. Places the statement after the definition of either | |
1290 | OP1 or OP2. Returns the new statement. */ | |
1291 | ||
1292 | static gimple | |
03d37e4e | 1293 | build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode) |
dddf5036 | 1294 | { |
1295 | gimple op1def = NULL, op2def = NULL; | |
1296 | gimple_stmt_iterator gsi; | |
1297 | tree op; | |
1a91d914 | 1298 | gassign *sum; |
dddf5036 | 1299 | |
1300 | /* Create the addition statement. */ | |
f9e245b2 | 1301 | op = make_ssa_name (type); |
e9cf809e | 1302 | sum = gimple_build_assign (op, opcode, op1, op2); |
dddf5036 | 1303 | |
1304 | /* Find an insertion place and insert. */ | |
1305 | if (TREE_CODE (op1) == SSA_NAME) | |
1306 | op1def = SSA_NAME_DEF_STMT (op1); | |
1307 | if (TREE_CODE (op2) == SSA_NAME) | |
1308 | op2def = SSA_NAME_DEF_STMT (op2); | |
1309 | if ((!op1def || gimple_nop_p (op1def)) | |
1310 | && (!op2def || gimple_nop_p (op2def))) | |
1311 | { | |
34154e27 | 1312 | gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); |
82cb4e1c | 1313 | if (gsi_end_p (gsi)) |
dddf5036 | 1314 | { |
82cb4e1c | 1315 | gimple_stmt_iterator gsi2 |
34154e27 | 1316 | = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); |
82cb4e1c | 1317 | gimple_set_uid (sum, |
1318 | gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); | |
dddf5036 | 1319 | } |
1320 | else | |
82cb4e1c | 1321 | gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi))); |
1322 | gsi_insert_before (&gsi, sum, GSI_NEW_STMT); | |
dddf5036 | 1323 | } |
1324 | else | |
1325 | { | |
82cb4e1c | 1326 | gimple insert_point; |
1327 | if ((!op1def || gimple_nop_p (op1def)) | |
1328 | || (op2def && !gimple_nop_p (op2def) | |
1329 | && reassoc_stmt_dominates_stmt_p (op1def, op2def))) | |
1330 | insert_point = op2def; | |
dddf5036 | 1331 | else |
82cb4e1c | 1332 | insert_point = op1def; |
1333 | insert_stmt_after (sum, insert_point); | |
dddf5036 | 1334 | } |
1335 | update_stmt (sum); | |
1336 | ||
1337 | return sum; | |
1338 | } | |
1339 | ||
1340 | /* Perform un-distribution of divisions and multiplications. | |
1341 | A * X + B * X is transformed into (A + B) * X and A / X + B / X | |
1342 | to (A + B) / X for real X. | |
1343 | ||
1344 | The algorithm is organized as follows. | |
1345 | ||
1346 | - First we walk the addition chain *OPS looking for summands that | |
1347 | are defined by a multiplication or a real division. This results | |
1348 | in the candidates bitmap with relevant indices into *OPS. | |
1349 | ||
1350 | - Second we build the chains of multiplications or divisions for | |
9d75589a | 1351 | these candidates, counting the number of occurrences of (operand, code) |
dddf5036 | 1352 | pairs in all of the candidates chains. |
1353 | ||
9d75589a | 1354 | - Third we sort the (operand, code) pairs by number of occurrence and |
dddf5036 | 1355 | process them starting with the pair with the most uses. |
1356 | ||
1357 | * For each such pair we walk the candidates again to build a | |
1358 | second candidate bitmap noting all multiplication/division chains | |
9d75589a | 1359 | that have at least one occurrence of (operand, code). |
dddf5036 | 1360 | |
1361 | * We build an alternate addition chain only covering these | |
1362 | candidates with one (operand, code) operation removed from their | |
1363 | multiplication/division chain. | |
1364 | ||
1365 | * The first candidate gets replaced by the alternate addition chain | |
1366 | multiplied/divided by the operand. | |
1367 | ||
1368 | * All candidate chains get disabled for further processing and | |
1369 | processing of (operand, code) pairs continues. | |
1370 | ||
1371 | The alternate addition chains built are re-processed by the main | |
1372 | reassociation algorithm which allows optimizing a * x * y + b * y * x | |
1373 | to (a + b ) * x * y in one invocation of the reassociation pass. */ | |
1374 | ||
1375 | static bool | |
1376 | undistribute_ops_list (enum tree_code opcode, | |
f1f41a6c | 1377 | vec<operand_entry_t> *ops, struct loop *loop) |
dddf5036 | 1378 | { |
f1f41a6c | 1379 | unsigned int length = ops->length (); |
dddf5036 | 1380 | operand_entry_t oe1; |
1381 | unsigned i, j; | |
1382 | sbitmap candidates, candidates2; | |
1383 | unsigned nr_candidates, nr_candidates2; | |
1384 | sbitmap_iterator sbi0; | |
f1f41a6c | 1385 | vec<operand_entry_t> *subops; |
dddf5036 | 1386 | bool changed = false; |
17b5ea6f | 1387 | int next_oecount_id = 0; |
dddf5036 | 1388 | |
1389 | if (length <= 1 | |
1390 | || opcode != PLUS_EXPR) | |
1391 | return false; | |
1392 | ||
1393 | /* Build a list of candidates to process. */ | |
1394 | candidates = sbitmap_alloc (length); | |
53c5d9d4 | 1395 | bitmap_clear (candidates); |
dddf5036 | 1396 | nr_candidates = 0; |
f1f41a6c | 1397 | FOR_EACH_VEC_ELT (*ops, i, oe1) |
dddf5036 | 1398 | { |
1399 | enum tree_code dcode; | |
1400 | gimple oe1def; | |
1401 | ||
1402 | if (TREE_CODE (oe1->op) != SSA_NAME) | |
1403 | continue; | |
1404 | oe1def = SSA_NAME_DEF_STMT (oe1->op); | |
1405 | if (!is_gimple_assign (oe1def)) | |
1406 | continue; | |
1407 | dcode = gimple_assign_rhs_code (oe1def); | |
1408 | if ((dcode != MULT_EXPR | |
1409 | && dcode != RDIV_EXPR) | |
1410 | || !is_reassociable_op (oe1def, dcode, loop)) | |
1411 | continue; | |
1412 | ||
08b7917c | 1413 | bitmap_set_bit (candidates, i); |
dddf5036 | 1414 | nr_candidates++; |
1415 | } | |
1416 | ||
1417 | if (nr_candidates < 2) | |
1418 | { | |
1419 | sbitmap_free (candidates); | |
1420 | return false; | |
1421 | } | |
1422 | ||
1423 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1424 | { | |
1425 | fprintf (dump_file, "searching for un-distribute opportunities "); | |
1426 | print_generic_expr (dump_file, | |
f1f41a6c | 1427 | (*ops)[bitmap_first_set_bit (candidates)]->op, 0); |
dddf5036 | 1428 | fprintf (dump_file, " %d\n", nr_candidates); |
1429 | } | |
1430 | ||
1431 | /* Build linearized sub-operand lists and the counting table. */ | |
f1f41a6c | 1432 | cvec.create (0); |
c1f445d2 | 1433 | |
1434 | hash_table<oecount_hasher> ctable (15); | |
1435 | ||
f1f41a6c | 1436 | /* ??? Macro arguments cannot have multi-argument template types in |
1437 | them. This typedef is needed to workaround that limitation. */ | |
1438 | typedef vec<operand_entry_t> vec_operand_entry_t_heap; | |
1439 | subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ()); | |
0d211963 | 1440 | EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) |
dddf5036 | 1441 | { |
1442 | gimple oedef; | |
1443 | enum tree_code oecode; | |
1444 | unsigned j; | |
1445 | ||
f1f41a6c | 1446 | oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op); |
dddf5036 | 1447 | oecode = gimple_assign_rhs_code (oedef); |
1448 | linearize_expr_tree (&subops[i], oedef, | |
1449 | associative_tree_code (oecode), false); | |
1450 | ||
f1f41a6c | 1451 | FOR_EACH_VEC_ELT (subops[i], j, oe1) |
dddf5036 | 1452 | { |
1453 | oecount c; | |
2933f7af | 1454 | int *slot; |
1455 | int idx; | |
dddf5036 | 1456 | c.oecode = oecode; |
1457 | c.cnt = 1; | |
17b5ea6f | 1458 | c.id = next_oecount_id++; |
dddf5036 | 1459 | c.op = oe1->op; |
f1f41a6c | 1460 | cvec.safe_push (c); |
1461 | idx = cvec.length () + 41; | |
2933f7af | 1462 | slot = ctable.find_slot (idx, INSERT); |
dddf5036 | 1463 | if (!*slot) |
1464 | { | |
2933f7af | 1465 | *slot = idx; |
dddf5036 | 1466 | } |
1467 | else | |
1468 | { | |
f1f41a6c | 1469 | cvec.pop (); |
2933f7af | 1470 | cvec[*slot - 42].cnt++; |
dddf5036 | 1471 | } |
1472 | } | |
1473 | } | |
dddf5036 | 1474 | |
1475 | /* Sort the counting table. */ | |
f1f41a6c | 1476 | cvec.qsort (oecount_cmp); |
dddf5036 | 1477 | |
1478 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1479 | { | |
1480 | oecount *c; | |
1481 | fprintf (dump_file, "Candidates:\n"); | |
f1f41a6c | 1482 | FOR_EACH_VEC_ELT (cvec, j, c) |
dddf5036 | 1483 | { |
1484 | fprintf (dump_file, " %u %s: ", c->cnt, | |
1485 | c->oecode == MULT_EXPR | |
1486 | ? "*" : c->oecode == RDIV_EXPR ? "/" : "?"); | |
1487 | print_generic_expr (dump_file, c->op, 0); | |
1488 | fprintf (dump_file, "\n"); | |
1489 | } | |
1490 | } | |
1491 | ||
c31fb425 | 1492 | /* Process the (operand, code) pairs in order of most occurrence. */ |
dddf5036 | 1493 | candidates2 = sbitmap_alloc (length); |
f1f41a6c | 1494 | while (!cvec.is_empty ()) |
dddf5036 | 1495 | { |
f1f41a6c | 1496 | oecount *c = &cvec.last (); |
dddf5036 | 1497 | if (c->cnt < 2) |
1498 | break; | |
1499 | ||
1500 | /* Now collect the operands in the outer chain that contain | |
1501 | the common operand in their inner chain. */ | |
53c5d9d4 | 1502 | bitmap_clear (candidates2); |
dddf5036 | 1503 | nr_candidates2 = 0; |
0d211963 | 1504 | EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) |
dddf5036 | 1505 | { |
1506 | gimple oedef; | |
1507 | enum tree_code oecode; | |
1508 | unsigned j; | |
f1f41a6c | 1509 | tree op = (*ops)[i]->op; |
dddf5036 | 1510 | |
1511 | /* If we undistributed in this chain already this may be | |
1512 | a constant. */ | |
1513 | if (TREE_CODE (op) != SSA_NAME) | |
1514 | continue; | |
1515 | ||
1516 | oedef = SSA_NAME_DEF_STMT (op); | |
1517 | oecode = gimple_assign_rhs_code (oedef); | |
1518 | if (oecode != c->oecode) | |
1519 | continue; | |
1520 | ||
f1f41a6c | 1521 | FOR_EACH_VEC_ELT (subops[i], j, oe1) |
dddf5036 | 1522 | { |
56e650d6 | 1523 | if (oe1->op == c->op) |
dddf5036 | 1524 | { |
08b7917c | 1525 | bitmap_set_bit (candidates2, i); |
dddf5036 | 1526 | ++nr_candidates2; |
1527 | break; | |
1528 | } | |
1529 | } | |
1530 | } | |
1531 | ||
1532 | if (nr_candidates2 >= 2) | |
1533 | { | |
1534 | operand_entry_t oe1, oe2; | |
dddf5036 | 1535 | gimple prod; |
53c5d9d4 | 1536 | int first = bitmap_first_set_bit (candidates2); |
dddf5036 | 1537 | |
1538 | /* Build the new addition chain. */ | |
f1f41a6c | 1539 | oe1 = (*ops)[first]; |
dddf5036 | 1540 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1541 | { | |
1542 | fprintf (dump_file, "Building ("); | |
1543 | print_generic_expr (dump_file, oe1->op, 0); | |
1544 | } | |
dddf5036 | 1545 | zero_one_operation (&oe1->op, c->oecode, c->op); |
0d211963 | 1546 | EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0) |
dddf5036 | 1547 | { |
1548 | gimple sum; | |
f1f41a6c | 1549 | oe2 = (*ops)[i]; |
dddf5036 | 1550 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1551 | { | |
1552 | fprintf (dump_file, " + "); | |
1553 | print_generic_expr (dump_file, oe2->op, 0); | |
1554 | } | |
1555 | zero_one_operation (&oe2->op, c->oecode, c->op); | |
03d37e4e | 1556 | sum = build_and_add_sum (TREE_TYPE (oe1->op), |
1557 | oe1->op, oe2->op, opcode); | |
385f3f36 | 1558 | oe2->op = build_zero_cst (TREE_TYPE (oe2->op)); |
dddf5036 | 1559 | oe2->rank = 0; |
1560 | oe1->op = gimple_get_lhs (sum); | |
1561 | } | |
1562 | ||
1563 | /* Apply the multiplication/division. */ | |
03d37e4e | 1564 | prod = build_and_add_sum (TREE_TYPE (oe1->op), |
1565 | oe1->op, c->op, c->oecode); | |
dddf5036 | 1566 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1567 | { | |
1568 | fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/"); | |
1569 | print_generic_expr (dump_file, c->op, 0); | |
1570 | fprintf (dump_file, "\n"); | |
1571 | } | |
1572 | ||
1573 | /* Record it in the addition chain and disable further | |
1574 | undistribution with this op. */ | |
1575 | oe1->op = gimple_assign_lhs (prod); | |
1576 | oe1->rank = get_rank (oe1->op); | |
f1f41a6c | 1577 | subops[first].release (); |
dddf5036 | 1578 | |
1579 | changed = true; | |
1580 | } | |
1581 | ||
f1f41a6c | 1582 | cvec.pop (); |
dddf5036 | 1583 | } |
1584 | ||
f1f41a6c | 1585 | for (i = 0; i < ops->length (); ++i) |
1586 | subops[i].release (); | |
dddf5036 | 1587 | free (subops); |
f1f41a6c | 1588 | cvec.release (); |
dddf5036 | 1589 | sbitmap_free (candidates); |
1590 | sbitmap_free (candidates2); | |
1591 | ||
1592 | return changed; | |
1593 | } | |
1594 | ||
5d6da7da | 1595 | /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison |
1596 | expression, examine the other OPS to see if any of them are comparisons | |
1597 | of the same values, which we may be able to combine or eliminate. | |
1598 | For example, we can rewrite (a < b) | (a == b) as (a <= b). */ | |
1599 | ||
1600 | static bool | |
1601 | eliminate_redundant_comparison (enum tree_code opcode, | |
f1f41a6c | 1602 | vec<operand_entry_t> *ops, |
5d6da7da | 1603 | unsigned int currindex, |
1604 | operand_entry_t curr) | |
1605 | { | |
1606 | tree op1, op2; | |
1607 | enum tree_code lcode, rcode; | |
1608 | gimple def1, def2; | |
1609 | int i; | |
1610 | operand_entry_t oe; | |
1611 | ||
1612 | if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) | |
1613 | return false; | |
1614 | ||
1615 | /* Check that CURR is a comparison. */ | |
1616 | if (TREE_CODE (curr->op) != SSA_NAME) | |
1617 | return false; | |
1618 | def1 = SSA_NAME_DEF_STMT (curr->op); | |
1619 | if (!is_gimple_assign (def1)) | |
1620 | return false; | |
1621 | lcode = gimple_assign_rhs_code (def1); | |
1622 | if (TREE_CODE_CLASS (lcode) != tcc_comparison) | |
1623 | return false; | |
1624 | op1 = gimple_assign_rhs1 (def1); | |
1625 | op2 = gimple_assign_rhs2 (def1); | |
1626 | ||
1627 | /* Now look for a similar comparison in the remaining OPS. */ | |
f1f41a6c | 1628 | for (i = currindex + 1; ops->iterate (i, &oe); i++) |
5d6da7da | 1629 | { |
1630 | tree t; | |
1631 | ||
1632 | if (TREE_CODE (oe->op) != SSA_NAME) | |
1633 | continue; | |
1634 | def2 = SSA_NAME_DEF_STMT (oe->op); | |
1635 | if (!is_gimple_assign (def2)) | |
1636 | continue; | |
1637 | rcode = gimple_assign_rhs_code (def2); | |
1638 | if (TREE_CODE_CLASS (rcode) != tcc_comparison) | |
1639 | continue; | |
5d6da7da | 1640 | |
1641 | /* If we got here, we have a match. See if we can combine the | |
1642 | two comparisons. */ | |
c82d157a | 1643 | if (opcode == BIT_IOR_EXPR) |
1644 | t = maybe_fold_or_comparisons (lcode, op1, op2, | |
1645 | rcode, gimple_assign_rhs1 (def2), | |
1646 | gimple_assign_rhs2 (def2)); | |
1647 | else | |
1648 | t = maybe_fold_and_comparisons (lcode, op1, op2, | |
1649 | rcode, gimple_assign_rhs1 (def2), | |
1650 | gimple_assign_rhs2 (def2)); | |
5d6da7da | 1651 | if (!t) |
1652 | continue; | |
c82d157a | 1653 | |
1654 | /* maybe_fold_and_comparisons and maybe_fold_or_comparisons | |
1655 | always give us a boolean_type_node value back. If the original | |
1656 | BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type, | |
1657 | we need to convert. */ | |
1658 | if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t))) | |
1659 | t = fold_convert (TREE_TYPE (curr->op), t); | |
1660 | ||
89c993b6 | 1661 | if (TREE_CODE (t) != INTEGER_CST |
1662 | && !operand_equal_p (t, curr->op, 0)) | |
1663 | { | |
1664 | enum tree_code subcode; | |
1665 | tree newop1, newop2; | |
1666 | if (!COMPARISON_CLASS_P (t)) | |
1667 | continue; | |
1668 | extract_ops_from_tree (t, &subcode, &newop1, &newop2); | |
1669 | STRIP_USELESS_TYPE_CONVERSION (newop1); | |
1670 | STRIP_USELESS_TYPE_CONVERSION (newop2); | |
1671 | if (!is_gimple_val (newop1) || !is_gimple_val (newop2)) | |
1672 | continue; | |
1673 | } | |
1674 | ||
5d6da7da | 1675 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1676 | { | |
1677 | fprintf (dump_file, "Equivalence: "); | |
1678 | print_generic_expr (dump_file, curr->op, 0); | |
1679 | fprintf (dump_file, " %s ", op_symbol_code (opcode)); | |
1680 | print_generic_expr (dump_file, oe->op, 0); | |
1681 | fprintf (dump_file, " -> "); | |
1682 | print_generic_expr (dump_file, t, 0); | |
1683 | fprintf (dump_file, "\n"); | |
1684 | } | |
1685 | ||
1686 | /* Now we can delete oe, as it has been subsumed by the new combined | |
1687 | expression t. */ | |
f1f41a6c | 1688 | ops->ordered_remove (i); |
5d6da7da | 1689 | reassociate_stats.ops_eliminated ++; |
1690 | ||
1691 | /* If t is the same as curr->op, we're done. Otherwise we must | |
1692 | replace curr->op with t. Special case is if we got a constant | |
1693 | back, in which case we add it to the end instead of in place of | |
1694 | the current entry. */ | |
1695 | if (TREE_CODE (t) == INTEGER_CST) | |
1696 | { | |
f1f41a6c | 1697 | ops->ordered_remove (currindex); |
5d6da7da | 1698 | add_to_ops_vec (ops, t); |
1699 | } | |
c82d157a | 1700 | else if (!operand_equal_p (t, curr->op, 0)) |
5d6da7da | 1701 | { |
5d6da7da | 1702 | gimple sum; |
1703 | enum tree_code subcode; | |
1704 | tree newop1; | |
1705 | tree newop2; | |
b2d33090 | 1706 | gcc_assert (COMPARISON_CLASS_P (t)); |
5d6da7da | 1707 | extract_ops_from_tree (t, &subcode, &newop1, &newop2); |
b2d33090 | 1708 | STRIP_USELESS_TYPE_CONVERSION (newop1); |
1709 | STRIP_USELESS_TYPE_CONVERSION (newop2); | |
1710 | gcc_checking_assert (is_gimple_val (newop1) | |
1711 | && is_gimple_val (newop2)); | |
03d37e4e | 1712 | sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode); |
5d6da7da | 1713 | curr->op = gimple_get_lhs (sum); |
1714 | } | |
1715 | return true; | |
1716 | } | |
1717 | ||
1718 | return false; | |
1719 | } | |
dddf5036 | 1720 | |
54aceb26 | 1721 | /* Perform various identities and other optimizations on the list of |
1722 | operand entries, stored in OPS. The tree code for the binary | |
1723 | operation between all the operands is OPCODE. */ | |
3dec5460 | 1724 | |
54aceb26 | 1725 | static void |
1726 | optimize_ops_list (enum tree_code opcode, | |
f1f41a6c | 1727 | vec<operand_entry_t> *ops) |
54aceb26 | 1728 | { |
f1f41a6c | 1729 | unsigned int length = ops->length (); |
54aceb26 | 1730 | unsigned int i; |
1731 | operand_entry_t oe; | |
1732 | operand_entry_t oelast = NULL; | |
1733 | bool iterate = false; | |
3dec5460 | 1734 | |
54aceb26 | 1735 | if (length == 1) |
1736 | return; | |
3dec5460 | 1737 | |
f1f41a6c | 1738 | oelast = ops->last (); |
3dec5460 | 1739 | |
54aceb26 | 1740 | /* If the last two are constants, pop the constants off, merge them |
1741 | and try the next two. */ | |
1742 | if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) | |
1743 | { | |
f1f41a6c | 1744 | operand_entry_t oelm1 = (*ops)[length - 2]; |
54aceb26 | 1745 | |
1746 | if (oelm1->rank == 0 | |
1747 | && is_gimple_min_invariant (oelm1->op) | |
c8ca3ee7 | 1748 | && useless_type_conversion_p (TREE_TYPE (oelm1->op), |
1749 | TREE_TYPE (oelast->op))) | |
54aceb26 | 1750 | { |
5f9acd88 | 1751 | tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op), |
54aceb26 | 1752 | oelm1->op, oelast->op); |
1753 | ||
5f9acd88 | 1754 | if (folded && is_gimple_min_invariant (folded)) |
54aceb26 | 1755 | { |
1756 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1757 | fprintf (dump_file, "Merging constants\n"); | |
1758 | ||
f1f41a6c | 1759 | ops->pop (); |
1760 | ops->pop (); | |
54aceb26 | 1761 | |
1762 | add_to_ops_vec (ops, folded); | |
1763 | reassociate_stats.constants_eliminated++; | |
1764 | ||
1765 | optimize_ops_list (opcode, ops); | |
1766 | return; | |
1767 | } | |
1768 | } | |
1769 | } | |
1770 | ||
1771 | eliminate_using_constants (opcode, ops); | |
1772 | oelast = NULL; | |
1773 | ||
f1f41a6c | 1774 | for (i = 0; ops->iterate (i, &oe);) |
54aceb26 | 1775 | { |
1776 | bool done = false; | |
1777 | ||
1778 | if (eliminate_not_pairs (opcode, ops, i, oe)) | |
1779 | return; | |
1780 | if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast) | |
5d6da7da | 1781 | || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)) |
1782 | || (!done && eliminate_redundant_comparison (opcode, ops, i, oe))) | |
54aceb26 | 1783 | { |
1784 | if (done) | |
1785 | return; | |
1786 | iterate = true; | |
1787 | oelast = NULL; | |
1788 | continue; | |
1789 | } | |
1790 | oelast = oe; | |
1791 | i++; | |
1792 | } | |
1793 | ||
f1f41a6c | 1794 | length = ops->length (); |
1795 | oelast = ops->last (); | |
54aceb26 | 1796 | |
1797 | if (iterate) | |
1798 | optimize_ops_list (opcode, ops); | |
1799 | } | |
1800 | ||
946e9eb4 | 1801 | /* The following functions are subroutines to optimize_range_tests and allow |
1802 | it to try to change a logical combination of comparisons into a range | |
1803 | test. | |
1804 | ||
1805 | For example, both | |
1806 | X == 2 || X == 5 || X == 3 || X == 4 | |
1807 | and | |
1808 | X >= 2 && X <= 5 | |
1809 | are converted to | |
1810 | (unsigned) (X - 2) <= 3 | |
1811 | ||
1812 | For more information see comments above fold_test_range in fold-const.c, | |
1813 | this implementation is for GIMPLE. */ | |
1814 | ||
1815 | struct range_entry | |
1816 | { | |
1817 | tree exp; | |
1818 | tree low; | |
1819 | tree high; | |
1820 | bool in_p; | |
1821 | bool strict_overflow_p; | |
1822 | unsigned int idx, next; | |
1823 | }; | |
1824 | ||
1825 | /* This is similar to make_range in fold-const.c, but on top of | |
8a2c7744 | 1826 | GIMPLE instead of trees. If EXP is non-NULL, it should be |
1827 | an SSA_NAME and STMT argument is ignored, otherwise STMT | |
1828 | argument should be a GIMPLE_COND. */ | |
946e9eb4 | 1829 | |
1830 | static void | |
8a2c7744 | 1831 | init_range_entry (struct range_entry *r, tree exp, gimple stmt) |
946e9eb4 | 1832 | { |
1833 | int in_p; | |
1834 | tree low, high; | |
1835 | bool is_bool, strict_overflow_p; | |
1836 | ||
1837 | r->exp = NULL_TREE; | |
1838 | r->in_p = false; | |
1839 | r->strict_overflow_p = false; | |
1840 | r->low = NULL_TREE; | |
1841 | r->high = NULL_TREE; | |
8a2c7744 | 1842 | if (exp != NULL_TREE |
1843 | && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))) | |
946e9eb4 | 1844 | return; |
1845 | ||
1846 | /* Start with simply saying "EXP != 0" and then look at the code of EXP | |
1847 | and see if we can refine the range. Some of the cases below may not | |
1848 | happen, but it doesn't seem worth worrying about this. We "continue" | |
1849 | the outer loop when we've changed something; otherwise we "break" | |
1850 | the switch, which will "break" the while. */ | |
8a2c7744 | 1851 | low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node; |
946e9eb4 | 1852 | high = low; |
1853 | in_p = 0; | |
1854 | strict_overflow_p = false; | |
1855 | is_bool = false; | |
8a2c7744 | 1856 | if (exp == NULL_TREE) |
1857 | is_bool = true; | |
1858 | else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1) | |
946e9eb4 | 1859 | { |
1860 | if (TYPE_UNSIGNED (TREE_TYPE (exp))) | |
1861 | is_bool = true; | |
1862 | else | |
1863 | return; | |
1864 | } | |
1865 | else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) | |
1866 | is_bool = true; | |
1867 | ||
1868 | while (1) | |
1869 | { | |
946e9eb4 | 1870 | enum tree_code code; |
1871 | tree arg0, arg1, exp_type; | |
1872 | tree nexp; | |
1873 | location_t loc; | |
1874 | ||
8a2c7744 | 1875 | if (exp != NULL_TREE) |
1876 | { | |
81cab242 | 1877 | if (TREE_CODE (exp) != SSA_NAME |
1878 | || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp)) | |
8a2c7744 | 1879 | break; |
946e9eb4 | 1880 | |
8a2c7744 | 1881 | stmt = SSA_NAME_DEF_STMT (exp); |
1882 | if (!is_gimple_assign (stmt)) | |
1883 | break; | |
1884 | ||
1885 | code = gimple_assign_rhs_code (stmt); | |
1886 | arg0 = gimple_assign_rhs1 (stmt); | |
1887 | arg1 = gimple_assign_rhs2 (stmt); | |
1888 | exp_type = TREE_TYPE (exp); | |
1889 | } | |
1890 | else | |
1891 | { | |
1892 | code = gimple_cond_code (stmt); | |
1893 | arg0 = gimple_cond_lhs (stmt); | |
1894 | arg1 = gimple_cond_rhs (stmt); | |
1895 | exp_type = boolean_type_node; | |
1896 | } | |
946e9eb4 | 1897 | |
9adacac7 | 1898 | if (TREE_CODE (arg0) != SSA_NAME) |
1899 | break; | |
946e9eb4 | 1900 | loc = gimple_location (stmt); |
1901 | switch (code) | |
1902 | { | |
1903 | case BIT_NOT_EXPR: | |
e423480a | 1904 | if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE |
1905 | /* Ensure the range is either +[-,0], +[0,0], | |
1906 | -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or | |
1907 | -[1,1]. If it is e.g. +[-,-] or -[-,-] | |
1908 | or similar expression of unconditional true or | |
1909 | false, it should not be negated. */ | |
1910 | && ((high && integer_zerop (high)) | |
1911 | || (low && integer_onep (low)))) | |
946e9eb4 | 1912 | { |
1913 | in_p = !in_p; | |
1914 | exp = arg0; | |
1915 | continue; | |
1916 | } | |
1917 | break; | |
1918 | case SSA_NAME: | |
1919 | exp = arg0; | |
1920 | continue; | |
1921 | CASE_CONVERT: | |
1922 | if (is_bool) | |
1923 | goto do_default; | |
1924 | if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1) | |
1925 | { | |
1926 | if (TYPE_UNSIGNED (TREE_TYPE (arg0))) | |
1927 | is_bool = true; | |
1928 | else | |
1929 | return; | |
1930 | } | |
1931 | else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE) | |
1932 | is_bool = true; | |
1933 | goto do_default; | |
1934 | case EQ_EXPR: | |
1935 | case NE_EXPR: | |
1936 | case LT_EXPR: | |
1937 | case LE_EXPR: | |
1938 | case GE_EXPR: | |
1939 | case GT_EXPR: | |
1940 | is_bool = true; | |
1941 | /* FALLTHRU */ | |
1942 | default: | |
1943 | if (!is_bool) | |
1944 | return; | |
1945 | do_default: | |
1946 | nexp = make_range_step (loc, code, arg0, arg1, exp_type, | |
1947 | &low, &high, &in_p, | |
1948 | &strict_overflow_p); | |
1949 | if (nexp != NULL_TREE) | |
1950 | { | |
1951 | exp = nexp; | |
1952 | gcc_assert (TREE_CODE (exp) == SSA_NAME); | |
1953 | continue; | |
1954 | } | |
1955 | break; | |
1956 | } | |
1957 | break; | |
1958 | } | |
1959 | if (is_bool) | |
1960 | { | |
1961 | r->exp = exp; | |
1962 | r->in_p = in_p; | |
1963 | r->low = low; | |
1964 | r->high = high; | |
1965 | r->strict_overflow_p = strict_overflow_p; | |
1966 | } | |
1967 | } | |
1968 | ||
1969 | /* Comparison function for qsort. Sort entries | |
1970 | without SSA_NAME exp first, then with SSA_NAMEs sorted | |
1971 | by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs | |
1972 | by increasing ->low and if ->low is the same, by increasing | |
1973 | ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE | |
1974 | maximum. */ | |
1975 | ||
1976 | static int | |
1977 | range_entry_cmp (const void *a, const void *b) | |
1978 | { | |
1979 | const struct range_entry *p = (const struct range_entry *) a; | |
1980 | const struct range_entry *q = (const struct range_entry *) b; | |
1981 | ||
1982 | if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME) | |
1983 | { | |
1984 | if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) | |
1985 | { | |
1986 | /* Group range_entries for the same SSA_NAME together. */ | |
1987 | if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp)) | |
1988 | return -1; | |
1989 | else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp)) | |
1990 | return 1; | |
1991 | /* If ->low is different, NULL low goes first, then by | |
1992 | ascending low. */ | |
1993 | if (p->low != NULL_TREE) | |
1994 | { | |
1995 | if (q->low != NULL_TREE) | |
1996 | { | |
1997 | tree tem = fold_binary (LT_EXPR, boolean_type_node, | |
1998 | p->low, q->low); | |
1999 | if (tem && integer_onep (tem)) | |
2000 | return -1; | |
2001 | tem = fold_binary (GT_EXPR, boolean_type_node, | |
2002 | p->low, q->low); | |
2003 | if (tem && integer_onep (tem)) | |
2004 | return 1; | |
2005 | } | |
2006 | else | |
2007 | return 1; | |
2008 | } | |
2009 | else if (q->low != NULL_TREE) | |
2010 | return -1; | |
2011 | /* If ->high is different, NULL high goes last, before that by | |
2012 | ascending high. */ | |
2013 | if (p->high != NULL_TREE) | |
2014 | { | |
2015 | if (q->high != NULL_TREE) | |
2016 | { | |
2017 | tree tem = fold_binary (LT_EXPR, boolean_type_node, | |
2018 | p->high, q->high); | |
2019 | if (tem && integer_onep (tem)) | |
2020 | return -1; | |
2021 | tem = fold_binary (GT_EXPR, boolean_type_node, | |
2022 | p->high, q->high); | |
2023 | if (tem && integer_onep (tem)) | |
2024 | return 1; | |
2025 | } | |
2026 | else | |
2027 | return -1; | |
2028 | } | |
eff8617c | 2029 | else if (q->high != NULL_TREE) |
946e9eb4 | 2030 | return 1; |
2031 | /* If both ranges are the same, sort below by ascending idx. */ | |
2032 | } | |
2033 | else | |
2034 | return 1; | |
2035 | } | |
2036 | else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) | |
2037 | return -1; | |
2038 | ||
2039 | if (p->idx < q->idx) | |
2040 | return -1; | |
2041 | else | |
2042 | { | |
2043 | gcc_checking_assert (p->idx > q->idx); | |
2044 | return 1; | |
2045 | } | |
2046 | } | |
2047 | ||
2048 | /* Helper routine of optimize_range_test. | |
2049 | [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for | |
2050 | RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges, | |
e3668db5 | 2051 | OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE |
2052 | is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to | |
2053 | an array of COUNT pointers to other ranges. Return | |
8a2c7744 | 2054 | true if the range merge has been successful. |
2055 | If OPCODE is ERROR_MARK, this is called from within | |
2056 | maybe_optimize_range_tests and is performing inter-bb range optimization. | |
82cb4e1c | 2057 | In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in |
2058 | oe->rank. */ | |
946e9eb4 | 2059 | |
2060 | static bool | |
2061 | update_range_test (struct range_entry *range, struct range_entry *otherrange, | |
e3668db5 | 2062 | struct range_entry **otherrangep, |
946e9eb4 | 2063 | unsigned int count, enum tree_code opcode, |
e3668db5 | 2064 | vec<operand_entry_t> *ops, tree exp, gimple_seq seq, |
2065 | bool in_p, tree low, tree high, bool strict_overflow_p) | |
946e9eb4 | 2066 | { |
f1f41a6c | 2067 | operand_entry_t oe = (*ops)[range->idx]; |
8a2c7744 | 2068 | tree op = oe->op; |
f5a6b05f | 2069 | gimple stmt = op ? SSA_NAME_DEF_STMT (op) : |
2070 | last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); | |
8a2c7744 | 2071 | location_t loc = gimple_location (stmt); |
2072 | tree optype = op ? TREE_TYPE (op) : boolean_type_node; | |
e3668db5 | 2073 | tree tem = build_range_check (loc, optype, unshare_expr (exp), |
2074 | in_p, low, high); | |
946e9eb4 | 2075 | enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; |
2076 | gimple_stmt_iterator gsi; | |
e3668db5 | 2077 | unsigned int i; |
946e9eb4 | 2078 | |
2079 | if (tem == NULL_TREE) | |
2080 | return false; | |
2081 | ||
2082 | if (strict_overflow_p && issue_strict_overflow_warning (wc)) | |
2083 | warning_at (loc, OPT_Wstrict_overflow, | |
2084 | "assuming signed overflow does not occur " | |
2085 | "when simplifying range test"); | |
2086 | ||
2087 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2088 | { | |
2089 | struct range_entry *r; | |
2090 | fprintf (dump_file, "Optimizing range tests "); | |
2091 | print_generic_expr (dump_file, range->exp, 0); | |
2092 | fprintf (dump_file, " %c[", range->in_p ? '+' : '-'); | |
2093 | print_generic_expr (dump_file, range->low, 0); | |
2094 | fprintf (dump_file, ", "); | |
2095 | print_generic_expr (dump_file, range->high, 0); | |
2096 | fprintf (dump_file, "]"); | |
e3668db5 | 2097 | for (i = 0; i < count; i++) |
946e9eb4 | 2098 | { |
e3668db5 | 2099 | if (otherrange) |
2100 | r = otherrange + i; | |
2101 | else | |
2102 | r = otherrangep[i]; | |
946e9eb4 | 2103 | fprintf (dump_file, " and %c[", r->in_p ? '+' : '-'); |
2104 | print_generic_expr (dump_file, r->low, 0); | |
2105 | fprintf (dump_file, ", "); | |
2106 | print_generic_expr (dump_file, r->high, 0); | |
2107 | fprintf (dump_file, "]"); | |
2108 | } | |
2109 | fprintf (dump_file, "\n into "); | |
2110 | print_generic_expr (dump_file, tem, 0); | |
2111 | fprintf (dump_file, "\n"); | |
2112 | } | |
2113 | ||
8a2c7744 | 2114 | if (opcode == BIT_IOR_EXPR |
2115 | || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) | |
946e9eb4 | 2116 | tem = invert_truthvalue_loc (loc, tem); |
2117 | ||
8a2c7744 | 2118 | tem = fold_convert_loc (loc, optype, tem); |
2119 | gsi = gsi_for_stmt (stmt); | |
927d5076 | 2120 | unsigned int uid = gimple_uid (stmt); |
a0ebffaa | 2121 | /* In rare cases range->exp can be equal to lhs of stmt. |
2122 | In that case we have to insert after the stmt rather then before | |
927d5076 | 2123 | it. If stmt is a PHI, insert it at the start of the basic block. */ |
2124 | if (op != range->exp) | |
2125 | { | |
2126 | gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); | |
2127 | tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true, | |
2128 | GSI_SAME_STMT); | |
2129 | gsi_prev (&gsi); | |
2130 | } | |
2131 | else if (gimple_code (stmt) != GIMPLE_PHI) | |
e3668db5 | 2132 | { |
2133 | gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); | |
2134 | tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false, | |
2135 | GSI_CONTINUE_LINKING); | |
2136 | } | |
a0ebffaa | 2137 | else |
2138 | { | |
927d5076 | 2139 | gsi = gsi_after_labels (gimple_bb (stmt)); |
2140 | if (!gsi_end_p (gsi)) | |
2141 | uid = gimple_uid (gsi_stmt (gsi)); | |
2142 | else | |
2143 | { | |
2144 | gsi = gsi_start_bb (gimple_bb (stmt)); | |
2145 | uid = 1; | |
2146 | while (!gsi_end_p (gsi)) | |
2147 | { | |
2148 | uid = gimple_uid (gsi_stmt (gsi)); | |
2149 | gsi_next (&gsi); | |
2150 | } | |
2151 | } | |
e3668db5 | 2152 | gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); |
a0ebffaa | 2153 | tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true, |
2154 | GSI_SAME_STMT); | |
927d5076 | 2155 | if (gsi_end_p (gsi)) |
2156 | gsi = gsi_last_bb (gimple_bb (stmt)); | |
2157 | else | |
2158 | gsi_prev (&gsi); | |
a0ebffaa | 2159 | } |
2160 | for (; !gsi_end_p (gsi); gsi_prev (&gsi)) | |
82cb4e1c | 2161 | if (gimple_uid (gsi_stmt (gsi))) |
2162 | break; | |
2163 | else | |
927d5076 | 2164 | gimple_set_uid (gsi_stmt (gsi), uid); |
946e9eb4 | 2165 | |
8a2c7744 | 2166 | oe->op = tem; |
946e9eb4 | 2167 | range->exp = exp; |
2168 | range->low = low; | |
2169 | range->high = high; | |
2170 | range->in_p = in_p; | |
2171 | range->strict_overflow_p = false; | |
2172 | ||
e3668db5 | 2173 | for (i = 0; i < count; i++) |
946e9eb4 | 2174 | { |
e3668db5 | 2175 | if (otherrange) |
2176 | range = otherrange + i; | |
2177 | else | |
2178 | range = otherrangep[i]; | |
f1f41a6c | 2179 | oe = (*ops)[range->idx]; |
8a2c7744 | 2180 | /* Now change all the other range test immediate uses, so that |
2181 | those tests will be optimized away. */ | |
2182 | if (opcode == ERROR_MARK) | |
2183 | { | |
2184 | if (oe->op) | |
82cb4e1c | 2185 | oe->op = build_int_cst (TREE_TYPE (oe->op), |
2186 | oe->rank == BIT_IOR_EXPR ? 0 : 1); | |
8a2c7744 | 2187 | else |
82cb4e1c | 2188 | oe->op = (oe->rank == BIT_IOR_EXPR |
2189 | ? boolean_false_node : boolean_true_node); | |
8a2c7744 | 2190 | } |
82cb4e1c | 2191 | else |
2192 | oe->op = error_mark_node; | |
946e9eb4 | 2193 | range->exp = NULL_TREE; |
2194 | } | |
2195 | return true; | |
2196 | } | |
2197 | ||
b0c0c879 | 2198 | /* Optimize X == CST1 || X == CST2 |
2199 | if popcount (CST1 ^ CST2) == 1 into | |
2200 | (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)). | |
2201 | Similarly for ranges. E.g. | |
2202 | X != 2 && X != 3 && X != 10 && X != 11 | |
2203 | will be transformed by the previous optimization into | |
2204 | !((X - 2U) <= 1U || (X - 10U) <= 1U) | |
2205 | and this loop can transform that into | |
2206 | !(((X & ~8) - 2U) <= 1U). */ | |
2207 | ||
2208 | static bool | |
2209 | optimize_range_tests_xor (enum tree_code opcode, tree type, | |
2210 | tree lowi, tree lowj, tree highi, tree highj, | |
2211 | vec<operand_entry_t> *ops, | |
2212 | struct range_entry *rangei, | |
2213 | struct range_entry *rangej) | |
2214 | { | |
2215 | tree lowxor, highxor, tem, exp; | |
e3668db5 | 2216 | /* Check lowi ^ lowj == highi ^ highj and |
2217 | popcount (lowi ^ lowj) == 1. */ | |
b0c0c879 | 2218 | lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj); |
2219 | if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST) | |
2220 | return false; | |
8016354a | 2221 | if (!integer_pow2p (lowxor)) |
b0c0c879 | 2222 | return false; |
2223 | highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); | |
2224 | if (!tree_int_cst_equal (lowxor, highxor)) | |
2225 | return false; | |
2226 | ||
2227 | tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); | |
2228 | exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem); | |
2229 | lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); | |
2230 | highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); | |
e3668db5 | 2231 | if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp, |
2232 | NULL, rangei->in_p, lowj, highj, | |
b0c0c879 | 2233 | rangei->strict_overflow_p |
2234 | || rangej->strict_overflow_p)) | |
2235 | return true; | |
2236 | return false; | |
2237 | } | |
2238 | ||
2239 | /* Optimize X == CST1 || X == CST2 | |
2240 | if popcount (CST2 - CST1) == 1 into | |
2241 | ((X - CST1) & ~(CST2 - CST1)) == 0. | |
2242 | Similarly for ranges. E.g. | |
2243 | X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46 | |
2244 | || X == 75 || X == 45 | |
2245 | will be transformed by the previous optimization into | |
2246 | (X - 43U) <= 3U || (X - 75U) <= 3U | |
2247 | and this loop can transform that into | |
2248 | ((X - 43U) & ~(75U - 43U)) <= 3U. */ | |
2249 | static bool | |
2250 | optimize_range_tests_diff (enum tree_code opcode, tree type, | |
2251 | tree lowi, tree lowj, tree highi, tree highj, | |
2252 | vec<operand_entry_t> *ops, | |
2253 | struct range_entry *rangei, | |
2254 | struct range_entry *rangej) | |
2255 | { | |
2256 | tree tem1, tem2, mask; | |
2257 | /* Check highi - lowi == highj - lowj. */ | |
2258 | tem1 = fold_binary (MINUS_EXPR, type, highi, lowi); | |
2259 | if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) | |
2260 | return false; | |
2261 | tem2 = fold_binary (MINUS_EXPR, type, highj, lowj); | |
2262 | if (!tree_int_cst_equal (tem1, tem2)) | |
2263 | return false; | |
2264 | /* Check popcount (lowj - lowi) == 1. */ | |
2265 | tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi); | |
2266 | if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) | |
2267 | return false; | |
8016354a | 2268 | if (!integer_pow2p (tem1)) |
b0c0c879 | 2269 | return false; |
2270 | ||
db7164e3 | 2271 | type = unsigned_type_for (type); |
2272 | tem1 = fold_convert (type, tem1); | |
2273 | tem2 = fold_convert (type, tem2); | |
2274 | lowi = fold_convert (type, lowi); | |
b0c0c879 | 2275 | mask = fold_build1 (BIT_NOT_EXPR, type, tem1); |
db7164e3 | 2276 | tem1 = fold_binary (MINUS_EXPR, type, |
2277 | fold_convert (type, rangei->exp), lowi); | |
b0c0c879 | 2278 | tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask); |
2279 | lowj = build_int_cst (type, 0); | |
e3668db5 | 2280 | if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1, |
2281 | NULL, rangei->in_p, lowj, tem2, | |
b0c0c879 | 2282 | rangei->strict_overflow_p |
2283 | || rangej->strict_overflow_p)) | |
2284 | return true; | |
2285 | return false; | |
2286 | } | |
2287 | ||
2288 | /* It does some common checks for function optimize_range_tests_xor and | |
2289 | optimize_range_tests_diff. | |
2290 | If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor. | |
2291 | Else it calls optimize_range_tests_diff. */ | |
2292 | ||
2293 | static bool | |
2294 | optimize_range_tests_1 (enum tree_code opcode, int first, int length, | |
2295 | bool optimize_xor, vec<operand_entry_t> *ops, | |
2296 | struct range_entry *ranges) | |
2297 | { | |
2298 | int i, j; | |
2299 | bool any_changes = false; | |
2300 | for (i = first; i < length; i++) | |
2301 | { | |
2302 | tree lowi, highi, lowj, highj, type, tem; | |
2303 | ||
2304 | if (ranges[i].exp == NULL_TREE || ranges[i].in_p) | |
2305 | continue; | |
2306 | type = TREE_TYPE (ranges[i].exp); | |
2307 | if (!INTEGRAL_TYPE_P (type)) | |
2308 | continue; | |
2309 | lowi = ranges[i].low; | |
2310 | if (lowi == NULL_TREE) | |
2311 | lowi = TYPE_MIN_VALUE (type); | |
2312 | highi = ranges[i].high; | |
2313 | if (highi == NULL_TREE) | |
2314 | continue; | |
2315 | for (j = i + 1; j < length && j < i + 64; j++) | |
2316 | { | |
2317 | bool changes; | |
2318 | if (ranges[i].exp != ranges[j].exp || ranges[j].in_p) | |
2319 | continue; | |
2320 | lowj = ranges[j].low; | |
2321 | if (lowj == NULL_TREE) | |
2322 | continue; | |
2323 | highj = ranges[j].high; | |
2324 | if (highj == NULL_TREE) | |
2325 | highj = TYPE_MAX_VALUE (type); | |
2326 | /* Check lowj > highi. */ | |
2327 | tem = fold_binary (GT_EXPR, boolean_type_node, | |
2328 | lowj, highi); | |
2329 | if (tem == NULL_TREE || !integer_onep (tem)) | |
2330 | continue; | |
2331 | if (optimize_xor) | |
2332 | changes = optimize_range_tests_xor (opcode, type, lowi, lowj, | |
2333 | highi, highj, ops, | |
2334 | ranges + i, ranges + j); | |
2335 | else | |
2336 | changes = optimize_range_tests_diff (opcode, type, lowi, lowj, | |
2337 | highi, highj, ops, | |
2338 | ranges + i, ranges + j); | |
2339 | if (changes) | |
2340 | { | |
2341 | any_changes = true; | |
2342 | break; | |
2343 | } | |
2344 | } | |
2345 | } | |
2346 | return any_changes; | |
2347 | } | |
2348 | ||
e3668db5 | 2349 | /* Helper function of optimize_range_tests_to_bit_test. Handle a single |
2350 | range, EXP, LOW, HIGH, compute bit mask of bits to test and return | |
2351 | EXP on success, NULL otherwise. */ | |
2352 | ||
2353 | static tree | |
2354 | extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high, | |
2355 | wide_int *mask, tree *totallowp) | |
2356 | { | |
2357 | tree tem = int_const_binop (MINUS_EXPR, high, low); | |
2358 | if (tem == NULL_TREE | |
2359 | || TREE_CODE (tem) != INTEGER_CST | |
2360 | || TREE_OVERFLOW (tem) | |
2361 | || tree_int_cst_sgn (tem) == -1 | |
2362 | || compare_tree_int (tem, prec) != -1) | |
2363 | return NULL_TREE; | |
2364 | ||
2365 | unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1; | |
2366 | *mask = wi::shifted_mask (0, max, false, prec); | |
2367 | if (TREE_CODE (exp) == BIT_AND_EXPR | |
2368 | && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) | |
2369 | { | |
2370 | widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1)); | |
2371 | msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp))); | |
2372 | if (wi::popcount (msk) == 1 | |
2373 | && wi::ltu_p (msk, prec - max)) | |
2374 | { | |
2375 | *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec); | |
2376 | max += msk.to_uhwi (); | |
2377 | exp = TREE_OPERAND (exp, 0); | |
2378 | if (integer_zerop (low) | |
2379 | && TREE_CODE (exp) == PLUS_EXPR | |
2380 | && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) | |
2381 | { | |
f6835415 | 2382 | tree ret = TREE_OPERAND (exp, 0); |
2383 | STRIP_NOPS (ret); | |
e3668db5 | 2384 | widest_int bias |
2385 | = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)), | |
2386 | TYPE_PRECISION (TREE_TYPE (low)))); | |
f6835415 | 2387 | tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias); |
e3668db5 | 2388 | if (totallowp) |
2389 | { | |
2390 | *totallowp = tbias; | |
f6835415 | 2391 | return ret; |
e3668db5 | 2392 | } |
2393 | else if (!tree_int_cst_lt (totallow, tbias)) | |
2394 | return NULL_TREE; | |
f6835415 | 2395 | bias = wi::to_widest (tbias); |
e3668db5 | 2396 | bias -= wi::to_widest (totallow); |
2397 | if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max)) | |
2398 | { | |
2399 | *mask = wi::lshift (*mask, bias); | |
f6835415 | 2400 | return ret; |
e3668db5 | 2401 | } |
2402 | } | |
2403 | } | |
2404 | } | |
2405 | if (totallowp) | |
2406 | return exp; | |
2407 | if (!tree_int_cst_lt (totallow, low)) | |
2408 | return exp; | |
2409 | tem = int_const_binop (MINUS_EXPR, low, totallow); | |
2410 | if (tem == NULL_TREE | |
2411 | || TREE_CODE (tem) != INTEGER_CST | |
2412 | || TREE_OVERFLOW (tem) | |
2413 | || compare_tree_int (tem, prec - max) == 1) | |
2414 | return NULL_TREE; | |
2415 | ||
2416 | *mask = wi::lshift (*mask, wi::to_widest (tem)); | |
2417 | return exp; | |
2418 | } | |
2419 | ||
2420 | /* Attempt to optimize small range tests using bit test. | |
2421 | E.g. | |
2422 | X != 43 && X != 76 && X != 44 && X != 78 && X != 49 | |
2423 | && X != 77 && X != 46 && X != 75 && X != 45 && X != 82 | |
2424 | has been by earlier optimizations optimized into: | |
2425 | ((X - 43U) & ~32U) > 3U && X != 49 && X != 82 | |
2426 | As all the 43 through 82 range is less than 64 numbers, | |
2427 | for 64-bit word targets optimize that into: | |
2428 | (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */ | |
2429 | ||
2430 | static bool | |
2431 | optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length, | |
2432 | vec<operand_entry_t> *ops, | |
2433 | struct range_entry *ranges) | |
2434 | { | |
2435 | int i, j; | |
2436 | bool any_changes = false; | |
2437 | int prec = GET_MODE_BITSIZE (word_mode); | |
2438 | auto_vec<struct range_entry *, 64> candidates; | |
2439 | ||
2440 | for (i = first; i < length - 2; i++) | |
2441 | { | |
2442 | tree lowi, highi, lowj, highj, type; | |
2443 | ||
2444 | if (ranges[i].exp == NULL_TREE || ranges[i].in_p) | |
2445 | continue; | |
2446 | type = TREE_TYPE (ranges[i].exp); | |
2447 | if (!INTEGRAL_TYPE_P (type)) | |
2448 | continue; | |
2449 | lowi = ranges[i].low; | |
2450 | if (lowi == NULL_TREE) | |
2451 | lowi = TYPE_MIN_VALUE (type); | |
2452 | highi = ranges[i].high; | |
2453 | if (highi == NULL_TREE) | |
2454 | continue; | |
2455 | wide_int mask; | |
2456 | tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi, | |
2457 | highi, &mask, &lowi); | |
2458 | if (exp == NULL_TREE) | |
2459 | continue; | |
2460 | bool strict_overflow_p = ranges[i].strict_overflow_p; | |
2461 | candidates.truncate (0); | |
2462 | int end = MIN (i + 64, length); | |
2463 | for (j = i + 1; j < end; j++) | |
2464 | { | |
2465 | tree exp2; | |
2466 | if (ranges[j].exp == NULL_TREE || ranges[j].in_p) | |
2467 | continue; | |
2468 | if (ranges[j].exp == exp) | |
2469 | ; | |
2470 | else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR) | |
2471 | { | |
2472 | exp2 = TREE_OPERAND (ranges[j].exp, 0); | |
2473 | if (exp2 == exp) | |
2474 | ; | |
2475 | else if (TREE_CODE (exp2) == PLUS_EXPR) | |
2476 | { | |
2477 | exp2 = TREE_OPERAND (exp2, 0); | |
2478 | STRIP_NOPS (exp2); | |
2479 | if (exp2 != exp) | |
2480 | continue; | |
2481 | } | |
2482 | else | |
2483 | continue; | |
2484 | } | |
2485 | else | |
2486 | continue; | |
2487 | lowj = ranges[j].low; | |
2488 | if (lowj == NULL_TREE) | |
2489 | continue; | |
2490 | highj = ranges[j].high; | |
2491 | if (highj == NULL_TREE) | |
2492 | highj = TYPE_MAX_VALUE (type); | |
2493 | wide_int mask2; | |
2494 | exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj, | |
2495 | highj, &mask2, NULL); | |
2496 | if (exp2 != exp) | |
2497 | continue; | |
2498 | mask |= mask2; | |
2499 | strict_overflow_p |= ranges[j].strict_overflow_p; | |
2500 | candidates.safe_push (&ranges[j]); | |
2501 | } | |
2502 | ||
2503 | /* If we need otherwise 3 or more comparisons, use a bit test. */ | |
2504 | if (candidates.length () >= 2) | |
2505 | { | |
2506 | tree high = wide_int_to_tree (TREE_TYPE (lowi), | |
2507 | wi::to_widest (lowi) | |
8f936b5e | 2508 | + prec - 1 - wi::clz (mask)); |
e3668db5 | 2509 | operand_entry_t oe = (*ops)[ranges[i].idx]; |
2510 | tree op = oe->op; | |
2511 | gimple stmt = op ? SSA_NAME_DEF_STMT (op) | |
2512 | : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); | |
2513 | location_t loc = gimple_location (stmt); | |
2514 | tree optype = op ? TREE_TYPE (op) : boolean_type_node; | |
2515 | ||
2516 | /* See if it isn't cheaper to pretend the minimum value of the | |
2517 | range is 0, if maximum value is small enough. | |
2518 | We can avoid then subtraction of the minimum value, but the | |
2519 | mask constant could be perhaps more expensive. */ | |
2520 | if (compare_tree_int (lowi, 0) > 0 | |
2521 | && compare_tree_int (high, prec) < 0) | |
2522 | { | |
2523 | int cost_diff; | |
2524 | HOST_WIDE_INT m = tree_to_uhwi (lowi); | |
2525 | rtx reg = gen_raw_REG (word_mode, 10000); | |
2526 | bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt)); | |
2527 | cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg, | |
2528 | GEN_INT (-m)), speed_p); | |
2529 | rtx r = immed_wide_int_const (mask, word_mode); | |
2530 | cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r), | |
5ae4887d | 2531 | word_mode, speed_p); |
e3668db5 | 2532 | r = immed_wide_int_const (wi::lshift (mask, m), word_mode); |
2533 | cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r), | |
5ae4887d | 2534 | word_mode, speed_p); |
e3668db5 | 2535 | if (cost_diff > 0) |
2536 | { | |
2537 | mask = wi::lshift (mask, m); | |
2538 | lowi = build_zero_cst (TREE_TYPE (lowi)); | |
2539 | } | |
2540 | } | |
2541 | ||
2542 | tree tem = build_range_check (loc, optype, unshare_expr (exp), | |
2543 | false, lowi, high); | |
2544 | if (tem == NULL_TREE || is_gimple_val (tem)) | |
2545 | continue; | |
2546 | tree etype = unsigned_type_for (TREE_TYPE (exp)); | |
2547 | exp = fold_build2_loc (loc, MINUS_EXPR, etype, | |
2548 | fold_convert_loc (loc, etype, exp), | |
2549 | fold_convert_loc (loc, etype, lowi)); | |
2550 | exp = fold_convert_loc (loc, integer_type_node, exp); | |
2551 | tree word_type = lang_hooks.types.type_for_mode (word_mode, 1); | |
2552 | exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type, | |
2553 | build_int_cst (word_type, 1), exp); | |
2554 | exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp, | |
2555 | wide_int_to_tree (word_type, mask)); | |
2556 | exp = fold_build2_loc (loc, EQ_EXPR, optype, exp, | |
2557 | build_zero_cst (word_type)); | |
2558 | if (is_gimple_val (exp)) | |
2559 | continue; | |
2560 | ||
2561 | /* The shift might have undefined behavior if TEM is true, | |
2562 | but reassociate_bb isn't prepared to have basic blocks | |
2563 | split when it is running. So, temporarily emit a code | |
2564 | with BIT_IOR_EXPR instead of &&, and fix it up in | |
2565 | branch_fixup. */ | |
2566 | gimple_seq seq; | |
2567 | tem = force_gimple_operand (tem, &seq, true, NULL_TREE); | |
2568 | gcc_assert (TREE_CODE (tem) == SSA_NAME); | |
2569 | gimple_set_visited (SSA_NAME_DEF_STMT (tem), true); | |
2570 | gimple_seq seq2; | |
2571 | exp = force_gimple_operand (exp, &seq2, true, NULL_TREE); | |
2572 | gimple_seq_add_seq_without_update (&seq, seq2); | |
2573 | gcc_assert (TREE_CODE (exp) == SSA_NAME); | |
2574 | gimple_set_visited (SSA_NAME_DEF_STMT (exp), true); | |
e9cf809e | 2575 | gimple g = gimple_build_assign (make_ssa_name (optype), |
2576 | BIT_IOR_EXPR, tem, exp); | |
e3668db5 | 2577 | gimple_set_location (g, loc); |
2578 | gimple_seq_add_stmt_without_update (&seq, g); | |
2579 | exp = gimple_assign_lhs (g); | |
2580 | tree val = build_zero_cst (optype); | |
2581 | if (update_range_test (&ranges[i], NULL, candidates.address (), | |
2582 | candidates.length (), opcode, ops, exp, | |
2583 | seq, false, val, val, strict_overflow_p)) | |
2584 | { | |
2585 | any_changes = true; | |
2586 | reassoc_branch_fixups.safe_push (tem); | |
2587 | } | |
2588 | else | |
2589 | gimple_seq_discard (seq); | |
2590 | } | |
2591 | } | |
2592 | return any_changes; | |
2593 | } | |
2594 | ||
946e9eb4 | 2595 | /* Optimize range tests, similarly how fold_range_test optimizes |
2596 | it on trees. The tree code for the binary | |
8a2c7744 | 2597 | operation between all the operands is OPCODE. |
2598 | If OPCODE is ERROR_MARK, optimize_range_tests is called from within | |
2599 | maybe_optimize_range_tests for inter-bb range optimization. | |
2600 | In that case if oe->op is NULL, oe->id is bb->index whose | |
2601 | GIMPLE_COND is && or ||ed into the test, and oe->rank says | |
2602 | the actual opcode. */ | |
946e9eb4 | 2603 | |
82cb4e1c | 2604 | static bool |
946e9eb4 | 2605 | optimize_range_tests (enum tree_code opcode, |
f1f41a6c | 2606 | vec<operand_entry_t> *ops) |
946e9eb4 | 2607 | { |
f1f41a6c | 2608 | unsigned int length = ops->length (), i, j, first; |
946e9eb4 | 2609 | operand_entry_t oe; |
2610 | struct range_entry *ranges; | |
2611 | bool any_changes = false; | |
2612 | ||
2613 | if (length == 1) | |
82cb4e1c | 2614 | return false; |
946e9eb4 | 2615 | |
2616 | ranges = XNEWVEC (struct range_entry, length); | |
2617 | for (i = 0; i < length; i++) | |
2618 | { | |
f1f41a6c | 2619 | oe = (*ops)[i]; |
946e9eb4 | 2620 | ranges[i].idx = i; |
8a2c7744 | 2621 | init_range_entry (ranges + i, oe->op, |
f5a6b05f | 2622 | oe->op ? NULL : |
2623 | last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id))); | |
946e9eb4 | 2624 | /* For | invert it now, we will invert it again before emitting |
2625 | the optimized expression. */ | |
8a2c7744 | 2626 | if (opcode == BIT_IOR_EXPR |
2627 | || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) | |
946e9eb4 | 2628 | ranges[i].in_p = !ranges[i].in_p; |
2629 | } | |
2630 | ||
2631 | qsort (ranges, length, sizeof (*ranges), range_entry_cmp); | |
2632 | for (i = 0; i < length; i++) | |
2633 | if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME) | |
2634 | break; | |
2635 | ||
2636 | /* Try to merge ranges. */ | |
2637 | for (first = i; i < length; i++) | |
2638 | { | |
2639 | tree low = ranges[i].low; | |
2640 | tree high = ranges[i].high; | |
2641 | int in_p = ranges[i].in_p; | |
2642 | bool strict_overflow_p = ranges[i].strict_overflow_p; | |
2643 | int update_fail_count = 0; | |
2644 | ||
2645 | for (j = i + 1; j < length; j++) | |
2646 | { | |
2647 | if (ranges[i].exp != ranges[j].exp) | |
2648 | break; | |
2649 | if (!merge_ranges (&in_p, &low, &high, in_p, low, high, | |
2650 | ranges[j].in_p, ranges[j].low, ranges[j].high)) | |
2651 | break; | |
2652 | strict_overflow_p |= ranges[j].strict_overflow_p; | |
2653 | } | |
2654 | ||
2655 | if (j == i + 1) | |
2656 | continue; | |
2657 | ||
e3668db5 | 2658 | if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1, |
2659 | opcode, ops, ranges[i].exp, NULL, in_p, | |
2660 | low, high, strict_overflow_p)) | |
946e9eb4 | 2661 | { |
2662 | i = j - 1; | |
2663 | any_changes = true; | |
2664 | } | |
2665 | /* Avoid quadratic complexity if all merge_ranges calls would succeed, | |
2666 | while update_range_test would fail. */ | |
2667 | else if (update_fail_count == 64) | |
2668 | i = j - 1; | |
2669 | else | |
2670 | ++update_fail_count; | |
2671 | } | |
2672 | ||
b0c0c879 | 2673 | any_changes |= optimize_range_tests_1 (opcode, first, length, true, |
2674 | ops, ranges); | |
946e9eb4 | 2675 | |
b0c0c879 | 2676 | if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2) |
2677 | any_changes |= optimize_range_tests_1 (opcode, first, length, false, | |
2678 | ops, ranges); | |
e3668db5 | 2679 | if (lshift_cheap_p (optimize_function_for_speed_p (cfun))) |
2680 | any_changes |= optimize_range_tests_to_bit_test (opcode, first, length, | |
2681 | ops, ranges); | |
946e9eb4 | 2682 | |
8a2c7744 | 2683 | if (any_changes && opcode != ERROR_MARK) |
946e9eb4 | 2684 | { |
2685 | j = 0; | |
f1f41a6c | 2686 | FOR_EACH_VEC_ELT (*ops, i, oe) |
946e9eb4 | 2687 | { |
2688 | if (oe->op == error_mark_node) | |
2689 | continue; | |
2690 | else if (i != j) | |
f1f41a6c | 2691 | (*ops)[j] = oe; |
946e9eb4 | 2692 | j++; |
2693 | } | |
f1f41a6c | 2694 | ops->truncate (j); |
946e9eb4 | 2695 | } |
2696 | ||
2697 | XDELETEVEC (ranges); | |
82cb4e1c | 2698 | return any_changes; |
946e9eb4 | 2699 | } |
2700 | ||
8a2c7744 | 2701 | /* Return true if STMT is a cast like: |
2702 | <bb N>: | |
2703 | ... | |
2704 | _123 = (int) _234; | |
2705 | ||
2706 | <bb M>: | |
2707 | # _345 = PHI <_123(N), 1(...), 1(...)> | |
2708 | where _234 has bool type, _123 has single use and | |
2709 | bb N has a single successor M. This is commonly used in | |
2710 | the last block of a range test. */ | |
2711 | ||
2712 | static bool | |
2713 | final_range_test_p (gimple stmt) | |
2714 | { | |
2715 | basic_block bb, rhs_bb; | |
2716 | edge e; | |
2717 | tree lhs, rhs; | |
2718 | use_operand_p use_p; | |
2719 | gimple use_stmt; | |
2720 | ||
2721 | if (!gimple_assign_cast_p (stmt)) | |
2722 | return false; | |
2723 | bb = gimple_bb (stmt); | |
2724 | if (!single_succ_p (bb)) | |
2725 | return false; | |
2726 | e = single_succ_edge (bb); | |
2727 | if (e->flags & EDGE_COMPLEX) | |
2728 | return false; | |
2729 | ||
2730 | lhs = gimple_assign_lhs (stmt); | |
2731 | rhs = gimple_assign_rhs1 (stmt); | |
2732 | if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) | |
2733 | || TREE_CODE (rhs) != SSA_NAME | |
2734 | || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE) | |
2735 | return false; | |
2736 | ||
2737 | /* Test whether lhs is consumed only by a PHI in the only successor bb. */ | |
2738 | if (!single_imm_use (lhs, &use_p, &use_stmt)) | |
2739 | return false; | |
2740 | ||
2741 | if (gimple_code (use_stmt) != GIMPLE_PHI | |
2742 | || gimple_bb (use_stmt) != e->dest) | |
2743 | return false; | |
2744 | ||
2745 | /* And that the rhs is defined in the same loop. */ | |
2746 | rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)); | |
2747 | if (rhs_bb == NULL | |
2748 | || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb)) | |
2749 | return false; | |
2750 | ||
2751 | return true; | |
2752 | } | |
2753 | ||
2754 | /* Return true if BB is suitable basic block for inter-bb range test | |
2755 | optimization. If BACKWARD is true, BB should be the only predecessor | |
2756 | of TEST_BB, and *OTHER_BB is either NULL and filled by the routine, | |
2757 | or compared with to find a common basic block to which all conditions | |
2758 | branch to if true resp. false. If BACKWARD is false, TEST_BB should | |
2759 | be the only predecessor of BB. */ | |
2760 | ||
2761 | static bool | |
2762 | suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb, | |
2763 | bool backward) | |
2764 | { | |
2765 | edge_iterator ei, ei2; | |
2766 | edge e, e2; | |
2767 | gimple stmt; | |
1a91d914 | 2768 | gphi_iterator gsi; |
8a2c7744 | 2769 | bool other_edge_seen = false; |
2770 | bool is_cond; | |
2771 | ||
2772 | if (test_bb == bb) | |
2773 | return false; | |
2774 | /* Check last stmt first. */ | |
2775 | stmt = last_stmt (bb); | |
2776 | if (stmt == NULL | |
2777 | || (gimple_code (stmt) != GIMPLE_COND | |
2778 | && (backward || !final_range_test_p (stmt))) | |
2779 | || gimple_visited_p (stmt) | |
2780 | || stmt_could_throw_p (stmt) | |
2781 | || *other_bb == bb) | |
2782 | return false; | |
2783 | is_cond = gimple_code (stmt) == GIMPLE_COND; | |
2784 | if (is_cond) | |
2785 | { | |
2786 | /* If last stmt is GIMPLE_COND, verify that one of the succ edges | |
2787 | goes to the next bb (if BACKWARD, it is TEST_BB), and the other | |
2788 | to *OTHER_BB (if not set yet, try to find it out). */ | |
2789 | if (EDGE_COUNT (bb->succs) != 2) | |
2790 | return false; | |
2791 | FOR_EACH_EDGE (e, ei, bb->succs) | |
2792 | { | |
2793 | if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) | |
2794 | return false; | |
2795 | if (e->dest == test_bb) | |
2796 | { | |
2797 | if (backward) | |
2798 | continue; | |
2799 | else | |
2800 | return false; | |
2801 | } | |
2802 | if (e->dest == bb) | |
2803 | return false; | |
2804 | if (*other_bb == NULL) | |
2805 | { | |
2806 | FOR_EACH_EDGE (e2, ei2, test_bb->succs) | |
2807 | if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) | |
2808 | return false; | |
2809 | else if (e->dest == e2->dest) | |
2810 | *other_bb = e->dest; | |
2811 | if (*other_bb == NULL) | |
2812 | return false; | |
2813 | } | |
2814 | if (e->dest == *other_bb) | |
2815 | other_edge_seen = true; | |
2816 | else if (backward) | |
2817 | return false; | |
2818 | } | |
2819 | if (*other_bb == NULL || !other_edge_seen) | |
2820 | return false; | |
2821 | } | |
2822 | else if (single_succ (bb) != *other_bb) | |
2823 | return false; | |
2824 | ||
2825 | /* Now check all PHIs of *OTHER_BB. */ | |
2826 | e = find_edge (bb, *other_bb); | |
2827 | e2 = find_edge (test_bb, *other_bb); | |
2828 | for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) | |
2829 | { | |
1a91d914 | 2830 | gphi *phi = gsi.phi (); |
8a2c7744 | 2831 | /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments |
2832 | corresponding to BB and TEST_BB predecessor must be the same. */ | |
2833 | if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx), | |
2834 | gimple_phi_arg_def (phi, e2->dest_idx), 0)) | |
2835 | { | |
2836 | /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND, | |
2837 | one of the PHIs should have the lhs of the last stmt in | |
2838 | that block as PHI arg and that PHI should have 0 or 1 | |
2839 | corresponding to it in all other range test basic blocks | |
2840 | considered. */ | |
2841 | if (!is_cond) | |
2842 | { | |
2843 | if (gimple_phi_arg_def (phi, e->dest_idx) | |
2844 | == gimple_assign_lhs (stmt) | |
2845 | && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx)) | |
2846 | || integer_onep (gimple_phi_arg_def (phi, | |
2847 | e2->dest_idx)))) | |
2848 | continue; | |
2849 | } | |
2850 | else | |
2851 | { | |
2852 | gimple test_last = last_stmt (test_bb); | |
2853 | if (gimple_code (test_last) != GIMPLE_COND | |
2854 | && gimple_phi_arg_def (phi, e2->dest_idx) | |
2855 | == gimple_assign_lhs (test_last) | |
2856 | && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx)) | |
2857 | || integer_onep (gimple_phi_arg_def (phi, e->dest_idx)))) | |
2858 | continue; | |
2859 | } | |
2860 | ||
2861 | return false; | |
2862 | } | |
2863 | } | |
2864 | return true; | |
2865 | } | |
2866 | ||
2867 | /* Return true if BB doesn't have side-effects that would disallow | |
2868 | range test optimization, all SSA_NAMEs set in the bb are consumed | |
2869 | in the bb and there are no PHIs. */ | |
2870 | ||
2871 | static bool | |
2872 | no_side_effect_bb (basic_block bb) | |
2873 | { | |
2874 | gimple_stmt_iterator gsi; | |
2875 | gimple last; | |
2876 | ||
2877 | if (!gimple_seq_empty_p (phi_nodes (bb))) | |
2878 | return false; | |
2879 | last = last_stmt (bb); | |
2880 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
2881 | { | |
2882 | gimple stmt = gsi_stmt (gsi); | |
2883 | tree lhs; | |
2884 | imm_use_iterator imm_iter; | |
2885 | use_operand_p use_p; | |
2886 | ||
2887 | if (is_gimple_debug (stmt)) | |
2888 | continue; | |
2889 | if (gimple_has_side_effects (stmt)) | |
2890 | return false; | |
2891 | if (stmt == last) | |
2892 | return true; | |
2893 | if (!is_gimple_assign (stmt)) | |
2894 | return false; | |
2895 | lhs = gimple_assign_lhs (stmt); | |
2896 | if (TREE_CODE (lhs) != SSA_NAME) | |
2897 | return false; | |
2898 | if (gimple_assign_rhs_could_trap_p (stmt)) | |
2899 | return false; | |
2900 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) | |
2901 | { | |
2902 | gimple use_stmt = USE_STMT (use_p); | |
2903 | if (is_gimple_debug (use_stmt)) | |
2904 | continue; | |
2905 | if (gimple_bb (use_stmt) != bb) | |
2906 | return false; | |
2907 | } | |
2908 | } | |
2909 | return false; | |
2910 | } | |
2911 | ||
2912 | /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable, | |
2913 | return true and fill in *OPS recursively. */ | |
2914 | ||
2915 | static bool | |
f1f41a6c | 2916 | get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops, |
8a2c7744 | 2917 | struct loop *loop) |
2918 | { | |
2919 | gimple stmt = SSA_NAME_DEF_STMT (var); | |
2920 | tree rhs[2]; | |
2921 | int i; | |
2922 | ||
2923 | if (!is_reassociable_op (stmt, code, loop)) | |
2924 | return false; | |
2925 | ||
2926 | rhs[0] = gimple_assign_rhs1 (stmt); | |
2927 | rhs[1] = gimple_assign_rhs2 (stmt); | |
2928 | gimple_set_visited (stmt, true); | |
2929 | for (i = 0; i < 2; i++) | |
2930 | if (TREE_CODE (rhs[i]) == SSA_NAME | |
2931 | && !get_ops (rhs[i], code, ops, loop) | |
2932 | && has_single_use (rhs[i])) | |
2933 | { | |
672758dc | 2934 | operand_entry_t oe = operand_entry_pool.allocate (); |
8a2c7744 | 2935 | |
2936 | oe->op = rhs[i]; | |
2937 | oe->rank = code; | |
2938 | oe->id = 0; | |
2939 | oe->count = 1; | |
f1f41a6c | 2940 | ops->safe_push (oe); |
8a2c7744 | 2941 | } |
2942 | return true; | |
2943 | } | |
2944 | ||
82cb4e1c | 2945 | /* Find the ops that were added by get_ops starting from VAR, see if |
2946 | they were changed during update_range_test and if yes, create new | |
2947 | stmts. */ | |
2948 | ||
2949 | static tree | |
2950 | update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops, | |
2951 | unsigned int *pidx, struct loop *loop) | |
2952 | { | |
2953 | gimple stmt = SSA_NAME_DEF_STMT (var); | |
2954 | tree rhs[4]; | |
2955 | int i; | |
2956 | ||
2957 | if (!is_reassociable_op (stmt, code, loop)) | |
2958 | return NULL; | |
2959 | ||
2960 | rhs[0] = gimple_assign_rhs1 (stmt); | |
2961 | rhs[1] = gimple_assign_rhs2 (stmt); | |
2962 | rhs[2] = rhs[0]; | |
2963 | rhs[3] = rhs[1]; | |
2964 | for (i = 0; i < 2; i++) | |
2965 | if (TREE_CODE (rhs[i]) == SSA_NAME) | |
2966 | { | |
2967 | rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop); | |
2968 | if (rhs[2 + i] == NULL_TREE) | |
2969 | { | |
2970 | if (has_single_use (rhs[i])) | |
2971 | rhs[2 + i] = ops[(*pidx)++]->op; | |
2972 | else | |
2973 | rhs[2 + i] = rhs[i]; | |
2974 | } | |
2975 | } | |
2976 | if ((rhs[2] != rhs[0] || rhs[3] != rhs[1]) | |
2977 | && (rhs[2] != rhs[1] || rhs[3] != rhs[0])) | |
2978 | { | |
2979 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); | |
f9e245b2 | 2980 | var = make_ssa_name (TREE_TYPE (var)); |
e9cf809e | 2981 | gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt), |
2982 | rhs[2], rhs[3]); | |
82cb4e1c | 2983 | gimple_set_uid (g, gimple_uid (stmt)); |
2984 | gimple_set_visited (g, true); | |
2985 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); | |
2986 | } | |
2987 | return var; | |
2988 | } | |
2989 | ||
2990 | /* Structure to track the initial value passed to get_ops and | |
2991 | the range in the ops vector for each basic block. */ | |
2992 | ||
2993 | struct inter_bb_range_test_entry | |
2994 | { | |
2995 | tree op; | |
2996 | unsigned int first_idx, last_idx; | |
2997 | }; | |
2998 | ||
8a2c7744 | 2999 | /* Inter-bb range test optimization. */ |
3000 | ||
3001 | static void | |
3002 | maybe_optimize_range_tests (gimple stmt) | |
3003 | { | |
3004 | basic_block first_bb = gimple_bb (stmt); | |
3005 | basic_block last_bb = first_bb; | |
3006 | basic_block other_bb = NULL; | |
3007 | basic_block bb; | |
3008 | edge_iterator ei; | |
3009 | edge e; | |
c2078b80 | 3010 | auto_vec<operand_entry_t> ops; |
3011 | auto_vec<inter_bb_range_test_entry> bbinfo; | |
3c8e4ef8 | 3012 | bool any_changes = false; |
8a2c7744 | 3013 | |
3014 | /* Consider only basic blocks that end with GIMPLE_COND or | |
3015 | a cast statement satisfying final_range_test_p. All | |
3016 | but the last bb in the first_bb .. last_bb range | |
3017 | should end with GIMPLE_COND. */ | |
3018 | if (gimple_code (stmt) == GIMPLE_COND) | |
3019 | { | |
3020 | if (EDGE_COUNT (first_bb->succs) != 2) | |
3021 | return; | |
3022 | } | |
3023 | else if (final_range_test_p (stmt)) | |
3024 | other_bb = single_succ (first_bb); | |
3025 | else | |
3026 | return; | |
3027 | ||
3028 | if (stmt_could_throw_p (stmt)) | |
3029 | return; | |
3030 | ||
3031 | /* As relative ordering of post-dominator sons isn't fixed, | |
3032 | maybe_optimize_range_tests can be called first on any | |
3033 | bb in the range we want to optimize. So, start searching | |
3034 | backwards, if first_bb can be set to a predecessor. */ | |
3035 | while (single_pred_p (first_bb)) | |
3036 | { | |
3037 | basic_block pred_bb = single_pred (first_bb); | |
3038 | if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true)) | |
3039 | break; | |
3040 | if (!no_side_effect_bb (first_bb)) | |
3041 | break; | |
3042 | first_bb = pred_bb; | |
3043 | } | |
3044 | /* If first_bb is last_bb, other_bb hasn't been computed yet. | |
3045 | Before starting forward search in last_bb successors, find | |
3046 | out the other_bb. */ | |
3047 | if (first_bb == last_bb) | |
3048 | { | |
3049 | other_bb = NULL; | |
3050 | /* As non-GIMPLE_COND last stmt always terminates the range, | |
3051 | if forward search didn't discover anything, just give up. */ | |
3052 | if (gimple_code (stmt) != GIMPLE_COND) | |
3053 | return; | |
3054 | /* Look at both successors. Either it ends with a GIMPLE_COND | |
3055 | and satisfies suitable_cond_bb, or ends with a cast and | |
3056 | other_bb is that cast's successor. */ | |
3057 | FOR_EACH_EDGE (e, ei, first_bb->succs) | |
3058 | if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)) | |
3059 | || e->dest == first_bb) | |
3060 | return; | |
3061 | else if (single_pred_p (e->dest)) | |
3062 | { | |
3063 | stmt = last_stmt (e->dest); | |
3064 | if (stmt | |
3065 | && gimple_code (stmt) == GIMPLE_COND | |
3066 | && EDGE_COUNT (e->dest->succs) == 2) | |
3067 | { | |
3068 | if (suitable_cond_bb (first_bb, e->dest, &other_bb, true)) | |
3069 | break; | |
3070 | else | |
3071 | other_bb = NULL; | |
3072 | } | |
3073 | else if (stmt | |
3074 | && final_range_test_p (stmt) | |
3075 | && find_edge (first_bb, single_succ (e->dest))) | |
3076 | { | |
3077 | other_bb = single_succ (e->dest); | |
3078 | if (other_bb == first_bb) | |
3079 | other_bb = NULL; | |
3080 | } | |
3081 | } | |
3082 | if (other_bb == NULL) | |
3083 | return; | |
3084 | } | |
3085 | /* Now do the forward search, moving last_bb to successor bbs | |
3086 | that aren't other_bb. */ | |
3087 | while (EDGE_COUNT (last_bb->succs) == 2) | |
3088 | { | |
3089 | FOR_EACH_EDGE (e, ei, last_bb->succs) | |
3090 | if (e->dest != other_bb) | |
3091 | break; | |
3092 | if (e == NULL) | |
3093 | break; | |
3094 | if (!single_pred_p (e->dest)) | |
3095 | break; | |
3096 | if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false)) | |
3097 | break; | |
3098 | if (!no_side_effect_bb (e->dest)) | |
3099 | break; | |
3100 | last_bb = e->dest; | |
3101 | } | |
3102 | if (first_bb == last_bb) | |
3103 | return; | |
3104 | /* Here basic blocks first_bb through last_bb's predecessor | |
3105 | end with GIMPLE_COND, all of them have one of the edges to | |
3106 | other_bb and another to another block in the range, | |
3107 | all blocks except first_bb don't have side-effects and | |
3108 | last_bb ends with either GIMPLE_COND, or cast satisfying | |
3109 | final_range_test_p. */ | |
3110 | for (bb = last_bb; ; bb = single_pred (bb)) | |
3111 | { | |
3112 | enum tree_code code; | |
3113 | tree lhs, rhs; | |
82cb4e1c | 3114 | inter_bb_range_test_entry bb_ent; |
8a2c7744 | 3115 | |
82cb4e1c | 3116 | bb_ent.op = NULL_TREE; |
3117 | bb_ent.first_idx = ops.length (); | |
3118 | bb_ent.last_idx = bb_ent.first_idx; | |
8a2c7744 | 3119 | e = find_edge (bb, other_bb); |
3120 | stmt = last_stmt (bb); | |
3121 | gimple_set_visited (stmt, true); | |
3122 | if (gimple_code (stmt) != GIMPLE_COND) | |
3123 | { | |
3124 | use_operand_p use_p; | |
3125 | gimple phi; | |
3126 | edge e2; | |
3127 | unsigned int d; | |
3128 | ||
3129 | lhs = gimple_assign_lhs (stmt); | |
3130 | rhs = gimple_assign_rhs1 (stmt); | |
3131 | gcc_assert (bb == last_bb); | |
3132 | ||
3133 | /* stmt is | |
3134 | _123 = (int) _234; | |
3135 | ||
3136 | followed by: | |
3137 | <bb M>: | |
3138 | # _345 = PHI <_123(N), 1(...), 1(...)> | |
3139 | ||
3140 | or 0 instead of 1. If it is 0, the _234 | |
3141 | range test is anded together with all the | |
3142 | other range tests, if it is 1, it is ored with | |
3143 | them. */ | |
3144 | single_imm_use (lhs, &use_p, &phi); | |
3145 | gcc_assert (gimple_code (phi) == GIMPLE_PHI); | |
3146 | e2 = find_edge (first_bb, other_bb); | |
3147 | d = e2->dest_idx; | |
3148 | gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs); | |
3149 | if (integer_zerop (gimple_phi_arg_def (phi, d))) | |
3150 | code = BIT_AND_EXPR; | |
3151 | else | |
3152 | { | |
3153 | gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d))); | |
3154 | code = BIT_IOR_EXPR; | |
3155 | } | |
3156 | ||
3157 | /* If _234 SSA_NAME_DEF_STMT is | |
3158 | _234 = _567 | _789; | |
3159 | (or &, corresponding to 1/0 in the phi arguments, | |
3160 | push into ops the individual range test arguments | |
3161 | of the bitwise or resp. and, recursively. */ | |
3162 | if (!get_ops (rhs, code, &ops, | |
3163 | loop_containing_stmt (stmt)) | |
3164 | && has_single_use (rhs)) | |
3165 | { | |
3166 | /* Otherwise, push the _234 range test itself. */ | |
672758dc | 3167 | operand_entry_t oe = operand_entry_pool.allocate (); |
8a2c7744 | 3168 | |
3169 | oe->op = rhs; | |
3170 | oe->rank = code; | |
3171 | oe->id = 0; | |
3172 | oe->count = 1; | |
f1f41a6c | 3173 | ops.safe_push (oe); |
82cb4e1c | 3174 | bb_ent.last_idx++; |
8a2c7744 | 3175 | } |
82cb4e1c | 3176 | else |
3177 | bb_ent.last_idx = ops.length (); | |
3178 | bb_ent.op = rhs; | |
3179 | bbinfo.safe_push (bb_ent); | |
8a2c7744 | 3180 | continue; |
3181 | } | |
3182 | /* Otherwise stmt is GIMPLE_COND. */ | |
3183 | code = gimple_cond_code (stmt); | |
3184 | lhs = gimple_cond_lhs (stmt); | |
3185 | rhs = gimple_cond_rhs (stmt); | |
3186 | if (TREE_CODE (lhs) == SSA_NAME | |
3187 | && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) | |
3188 | && ((code != EQ_EXPR && code != NE_EXPR) | |
3189 | || rhs != boolean_false_node | |
3190 | /* Either push into ops the individual bitwise | |
3191 | or resp. and operands, depending on which | |
3192 | edge is other_bb. */ | |
3193 | || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0) | |
3194 | ^ (code == EQ_EXPR)) | |
3195 | ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops, | |
3196 | loop_containing_stmt (stmt)))) | |
3197 | { | |
3198 | /* Or push the GIMPLE_COND stmt itself. */ | |
672758dc | 3199 | operand_entry_t oe = operand_entry_pool.allocate (); |
8a2c7744 | 3200 | |
3201 | oe->op = NULL; | |
3202 | oe->rank = (e->flags & EDGE_TRUE_VALUE) | |
3203 | ? BIT_IOR_EXPR : BIT_AND_EXPR; | |
3204 | /* oe->op = NULL signs that there is no SSA_NAME | |
3205 | for the range test, and oe->id instead is the | |
3206 | basic block number, at which's end the GIMPLE_COND | |
3207 | is. */ | |
3208 | oe->id = bb->index; | |
3209 | oe->count = 1; | |
f1f41a6c | 3210 | ops.safe_push (oe); |
82cb4e1c | 3211 | bb_ent.op = NULL; |
3212 | bb_ent.last_idx++; | |
8a2c7744 | 3213 | } |
82cb4e1c | 3214 | else if (ops.length () > bb_ent.first_idx) |
3215 | { | |
3216 | bb_ent.op = lhs; | |
3217 | bb_ent.last_idx = ops.length (); | |
3218 | } | |
3219 | bbinfo.safe_push (bb_ent); | |
8a2c7744 | 3220 | if (bb == first_bb) |
3221 | break; | |
3222 | } | |
f1f41a6c | 3223 | if (ops.length () > 1) |
3c8e4ef8 | 3224 | any_changes = optimize_range_tests (ERROR_MARK, &ops); |
3225 | if (any_changes) | |
82cb4e1c | 3226 | { |
3227 | unsigned int idx; | |
3c8e4ef8 | 3228 | /* update_ops relies on has_single_use predicates returning the |
3229 | same values as it did during get_ops earlier. Additionally it | |
3230 | never removes statements, only adds new ones and it should walk | |
3231 | from the single imm use and check the predicate already before | |
3232 | making those changes. | |
3233 | On the other side, the handling of GIMPLE_COND directly can turn | |
3234 | previously multiply used SSA_NAMEs into single use SSA_NAMEs, so | |
3235 | it needs to be done in a separate loop afterwards. */ | |
3236 | for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) | |
82cb4e1c | 3237 | { |
3c8e4ef8 | 3238 | if (bbinfo[idx].first_idx < bbinfo[idx].last_idx |
3239 | && bbinfo[idx].op != NULL_TREE) | |
82cb4e1c | 3240 | { |
82cb4e1c | 3241 | tree new_op; |
3242 | ||
3c8e4ef8 | 3243 | stmt = last_stmt (bb); |
3244 | new_op = update_ops (bbinfo[idx].op, | |
3245 | (enum tree_code) | |
3246 | ops[bbinfo[idx].first_idx]->rank, | |
3247 | ops, &bbinfo[idx].first_idx, | |
3248 | loop_containing_stmt (stmt)); | |
82cb4e1c | 3249 | if (new_op == NULL_TREE) |
3250 | { | |
3251 | gcc_assert (bb == last_bb); | |
3252 | new_op = ops[bbinfo[idx].first_idx++]->op; | |
3253 | } | |
3254 | if (bbinfo[idx].op != new_op) | |
3255 | { | |
3256 | imm_use_iterator iter; | |
3257 | use_operand_p use_p; | |
3258 | gimple use_stmt, cast_stmt = NULL; | |
3259 | ||
3260 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op) | |
3261 | if (is_gimple_debug (use_stmt)) | |
3262 | continue; | |
3263 | else if (gimple_code (use_stmt) == GIMPLE_COND | |
3264 | || gimple_code (use_stmt) == GIMPLE_PHI) | |
3265 | FOR_EACH_IMM_USE_ON_STMT (use_p, iter) | |
3266 | SET_USE (use_p, new_op); | |
3267 | else if (gimple_assign_cast_p (use_stmt)) | |
3268 | cast_stmt = use_stmt; | |
3269 | else | |
3270 | gcc_unreachable (); | |
3271 | if (cast_stmt) | |
3272 | { | |
3273 | gcc_assert (bb == last_bb); | |
3274 | tree lhs = gimple_assign_lhs (cast_stmt); | |
f9e245b2 | 3275 | tree new_lhs = make_ssa_name (TREE_TYPE (lhs)); |
82cb4e1c | 3276 | enum tree_code rhs_code |
3277 | = gimple_assign_rhs_code (cast_stmt); | |
1a91d914 | 3278 | gassign *g; |
4c0d6cf7 | 3279 | if (is_gimple_min_invariant (new_op)) |
3280 | { | |
3281 | new_op = fold_convert (TREE_TYPE (lhs), new_op); | |
3282 | g = gimple_build_assign (new_lhs, new_op); | |
3283 | } | |
3284 | else | |
e9cf809e | 3285 | g = gimple_build_assign (new_lhs, rhs_code, new_op); |
82cb4e1c | 3286 | gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt); |
3287 | gimple_set_uid (g, gimple_uid (cast_stmt)); | |
3288 | gimple_set_visited (g, true); | |
3289 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); | |
3290 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) | |
3291 | if (is_gimple_debug (use_stmt)) | |
3292 | continue; | |
3293 | else if (gimple_code (use_stmt) == GIMPLE_COND | |
3294 | || gimple_code (use_stmt) == GIMPLE_PHI) | |
3295 | FOR_EACH_IMM_USE_ON_STMT (use_p, iter) | |
3296 | SET_USE (use_p, new_lhs); | |
3297 | else | |
3298 | gcc_unreachable (); | |
3299 | } | |
3300 | } | |
3301 | } | |
3302 | if (bb == first_bb) | |
3303 | break; | |
3304 | } | |
3c8e4ef8 | 3305 | for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) |
3306 | { | |
3307 | if (bbinfo[idx].first_idx < bbinfo[idx].last_idx | |
3308 | && bbinfo[idx].op == NULL_TREE | |
3309 | && ops[bbinfo[idx].first_idx]->op != NULL_TREE) | |
3310 | { | |
1a91d914 | 3311 | gcond *cond_stmt = as_a <gcond *> (last_stmt (bb)); |
3c8e4ef8 | 3312 | if (integer_zerop (ops[bbinfo[idx].first_idx]->op)) |
1a91d914 | 3313 | gimple_cond_make_false (cond_stmt); |
3c8e4ef8 | 3314 | else if (integer_onep (ops[bbinfo[idx].first_idx]->op)) |
1a91d914 | 3315 | gimple_cond_make_true (cond_stmt); |
3c8e4ef8 | 3316 | else |
3317 | { | |
1a91d914 | 3318 | gimple_cond_set_code (cond_stmt, NE_EXPR); |
3319 | gimple_cond_set_lhs (cond_stmt, | |
3320 | ops[bbinfo[idx].first_idx]->op); | |
3321 | gimple_cond_set_rhs (cond_stmt, boolean_false_node); | |
3c8e4ef8 | 3322 | } |
1a91d914 | 3323 | update_stmt (cond_stmt); |
3c8e4ef8 | 3324 | } |
3325 | if (bb == first_bb) | |
3326 | break; | |
3327 | } | |
82cb4e1c | 3328 | } |
8a2c7744 | 3329 | } |
3330 | ||
54aceb26 | 3331 | /* Return true if OPERAND is defined by a PHI node which uses the LHS |
3332 | of STMT in it's operands. This is also known as a "destructive | |
3333 | update" operation. */ | |
3334 | ||
3335 | static bool | |
75a70cf9 | 3336 | is_phi_for_stmt (gimple stmt, tree operand) |
54aceb26 | 3337 | { |
75a70cf9 | 3338 | gimple def_stmt; |
1a91d914 | 3339 | gphi *def_phi; |
75a70cf9 | 3340 | tree lhs; |
54aceb26 | 3341 | use_operand_p arg_p; |
3342 | ssa_op_iter i; | |
3343 | ||
3344 | if (TREE_CODE (operand) != SSA_NAME) | |
3345 | return false; | |
3346 | ||
75a70cf9 | 3347 | lhs = gimple_assign_lhs (stmt); |
3348 | ||
54aceb26 | 3349 | def_stmt = SSA_NAME_DEF_STMT (operand); |
1a91d914 | 3350 | def_phi = dyn_cast <gphi *> (def_stmt); |
3351 | if (!def_phi) | |
54aceb26 | 3352 | return false; |
3353 | ||
1a91d914 | 3354 | FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE) |
54aceb26 | 3355 | if (lhs == USE_FROM_PTR (arg_p)) |
3356 | return true; | |
3357 | return false; | |
3358 | } | |
3359 | ||
9ce93694 | 3360 | /* Remove def stmt of VAR if VAR has zero uses and recurse |
3361 | on rhs1 operand if so. */ | |
3362 | ||
3363 | static void | |
3364 | remove_visited_stmt_chain (tree var) | |
3365 | { | |
3366 | gimple stmt; | |
3367 | gimple_stmt_iterator gsi; | |
3368 | ||
3369 | while (1) | |
3370 | { | |
3371 | if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var)) | |
3372 | return; | |
3373 | stmt = SSA_NAME_DEF_STMT (var); | |
8c5ac7f6 | 3374 | if (is_gimple_assign (stmt) && gimple_visited_p (stmt)) |
3375 | { | |
3376 | var = gimple_assign_rhs1 (stmt); | |
8c5ac7f6 | 3377 | gsi = gsi_for_stmt (stmt); |
54675e05 | 3378 | reassoc_remove_stmt (&gsi); |
8c5ac7f6 | 3379 | release_defs (stmt); |
8c5ac7f6 | 3380 | } |
3381 | else | |
9ce93694 | 3382 | return; |
9ce93694 | 3383 | } |
3384 | } | |
3385 | ||
5b1c765d | 3386 | /* This function checks three consequtive operands in |
3387 | passed operands vector OPS starting from OPINDEX and | |
3388 | swaps two operands if it is profitable for binary operation | |
3389 | consuming OPINDEX + 1 abnd OPINDEX + 2 operands. | |
3390 | ||
3391 | We pair ops with the same rank if possible. | |
3392 | ||
3393 | The alternative we try is to see if STMT is a destructive | |
3394 | update style statement, which is like: | |
3395 | b = phi (a, ...) | |
3396 | a = c + b; | |
3397 | In that case, we want to use the destructive update form to | |
3398 | expose the possible vectorizer sum reduction opportunity. | |
3399 | In that case, the third operand will be the phi node. This | |
3400 | check is not performed if STMT is null. | |
3401 | ||
3402 | We could, of course, try to be better as noted above, and do a | |
3403 | lot of work to try to find these opportunities in >3 operand | |
3404 | cases, but it is unlikely to be worth it. */ | |
3405 | ||
3406 | static void | |
f1f41a6c | 3407 | swap_ops_for_binary_stmt (vec<operand_entry_t> ops, |
5b1c765d | 3408 | unsigned int opindex, gimple stmt) |
3409 | { | |
3410 | operand_entry_t oe1, oe2, oe3; | |
3411 | ||
f1f41a6c | 3412 | oe1 = ops[opindex]; |
3413 | oe2 = ops[opindex + 1]; | |
3414 | oe3 = ops[opindex + 2]; | |
5b1c765d | 3415 | |
3416 | if ((oe1->rank == oe2->rank | |
3417 | && oe2->rank != oe3->rank) | |
3418 | || (stmt && is_phi_for_stmt (stmt, oe3->op) | |
3419 | && !is_phi_for_stmt (stmt, oe1->op) | |
3420 | && !is_phi_for_stmt (stmt, oe2->op))) | |
3421 | { | |
3422 | struct operand_entry temp = *oe3; | |
3423 | oe3->op = oe1->op; | |
3424 | oe3->rank = oe1->rank; | |
3425 | oe1->op = temp.op; | |
3426 | oe1->rank= temp.rank; | |
3427 | } | |
3428 | else if ((oe1->rank == oe3->rank | |
3429 | && oe2->rank != oe3->rank) | |
3430 | || (stmt && is_phi_for_stmt (stmt, oe2->op) | |
3431 | && !is_phi_for_stmt (stmt, oe1->op) | |
3432 | && !is_phi_for_stmt (stmt, oe3->op))) | |
3433 | { | |
3434 | struct operand_entry temp = *oe2; | |
3435 | oe2->op = oe1->op; | |
3436 | oe2->rank = oe1->rank; | |
3437 | oe1->op = temp.op; | |
82cb4e1c | 3438 | oe1->rank = temp.rank; |
bb88a6c7 | 3439 | } |
bb88a6c7 | 3440 | } |
3441 | ||
82cb4e1c | 3442 | /* If definition of RHS1 or RHS2 dominates STMT, return the later of those |
3443 | two definitions, otherwise return STMT. */ | |
a2bd0c99 | 3444 | |
3445 | static inline gimple | |
82cb4e1c | 3446 | find_insert_point (gimple stmt, tree rhs1, tree rhs2) |
a2bd0c99 | 3447 | { |
82cb4e1c | 3448 | if (TREE_CODE (rhs1) == SSA_NAME |
3449 | && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1))) | |
3450 | stmt = SSA_NAME_DEF_STMT (rhs1); | |
3451 | if (TREE_CODE (rhs2) == SSA_NAME | |
3452 | && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2))) | |
3453 | stmt = SSA_NAME_DEF_STMT (rhs2); | |
3454 | return stmt; | |
a2bd0c99 | 3455 | } |
3456 | ||
54aceb26 | 3457 | /* Recursively rewrite our linearized statements so that the operators |
3458 | match those in OPS[OPINDEX], putting the computation in rank | |
82cb4e1c | 3459 | order. Return new lhs. */ |
54aceb26 | 3460 | |
82cb4e1c | 3461 | static tree |
75a70cf9 | 3462 | rewrite_expr_tree (gimple stmt, unsigned int opindex, |
82cb4e1c | 3463 | vec<operand_entry_t> ops, bool changed) |
54aceb26 | 3464 | { |
75a70cf9 | 3465 | tree rhs1 = gimple_assign_rhs1 (stmt); |
3466 | tree rhs2 = gimple_assign_rhs2 (stmt); | |
82cb4e1c | 3467 | tree lhs = gimple_assign_lhs (stmt); |
54aceb26 | 3468 | operand_entry_t oe; |
3469 | ||
54aceb26 | 3470 | /* The final recursion case for this function is that you have |
3471 | exactly two operations left. | |
229fa37d | 3472 | If we had exactly one op in the entire list to start with, we |
54aceb26 | 3473 | would have never called this function, and the tail recursion |
3474 | rewrites them one at a time. */ | |
f1f41a6c | 3475 | if (opindex + 2 == ops.length ()) |
54aceb26 | 3476 | { |
3477 | operand_entry_t oe1, oe2; | |
3478 | ||
f1f41a6c | 3479 | oe1 = ops[opindex]; |
3480 | oe2 = ops[opindex + 1]; | |
54aceb26 | 3481 | |
75a70cf9 | 3482 | if (rhs1 != oe1->op || rhs2 != oe2->op) |
54aceb26 | 3483 | { |
82cb4e1c | 3484 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
3485 | unsigned int uid = gimple_uid (stmt); | |
3486 | ||
54aceb26 | 3487 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3488 | { | |
3489 | fprintf (dump_file, "Transforming "); | |
75a70cf9 | 3490 | print_gimple_stmt (dump_file, stmt, 0, 0); |
54aceb26 | 3491 | } |
3492 | ||
229fa37d | 3493 | /* Even when changed is false, reassociation could have e.g. removed |
3494 | some redundant operations, so unless we are just swapping the | |
3495 | arguments or unless there is no change at all (then we just | |
3496 | return lhs), force creation of a new SSA_NAME. */ | |
3497 | if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex)) | |
82cb4e1c | 3498 | { |
3499 | gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op); | |
f9e245b2 | 3500 | lhs = make_ssa_name (TREE_TYPE (lhs)); |
82cb4e1c | 3501 | stmt |
e9cf809e | 3502 | = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt), |
3503 | oe1->op, oe2->op); | |
82cb4e1c | 3504 | gimple_set_uid (stmt, uid); |
3505 | gimple_set_visited (stmt, true); | |
3506 | if (insert_point == gsi_stmt (gsi)) | |
3507 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
3508 | else | |
3509 | insert_stmt_after (stmt, insert_point); | |
3510 | } | |
3511 | else | |
3512 | { | |
3513 | gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op) | |
3514 | == stmt); | |
3515 | gimple_assign_set_rhs1 (stmt, oe1->op); | |
3516 | gimple_assign_set_rhs2 (stmt, oe2->op); | |
3517 | update_stmt (stmt); | |
3518 | } | |
3519 | ||
9ce93694 | 3520 | if (rhs1 != oe1->op && rhs1 != oe2->op) |
3521 | remove_visited_stmt_chain (rhs1); | |
54aceb26 | 3522 | |
3523 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3524 | { | |
3525 | fprintf (dump_file, " into "); | |
75a70cf9 | 3526 | print_gimple_stmt (dump_file, stmt, 0, 0); |
54aceb26 | 3527 | } |
54aceb26 | 3528 | } |
82cb4e1c | 3529 | return lhs; |
54aceb26 | 3530 | } |
3531 | ||
3532 | /* If we hit here, we should have 3 or more ops left. */ | |
f1f41a6c | 3533 | gcc_assert (opindex + 2 < ops.length ()); |
54aceb26 | 3534 | |
3535 | /* Rewrite the next operator. */ | |
f1f41a6c | 3536 | oe = ops[opindex]; |
54aceb26 | 3537 | |
82cb4e1c | 3538 | /* Recurse on the LHS of the binary operator, which is guaranteed to |
3539 | be the non-leaf side. */ | |
3540 | tree new_rhs1 | |
3541 | = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, | |
3542 | changed || oe->op != rhs2); | |
54aceb26 | 3543 | |
82cb4e1c | 3544 | if (oe->op != rhs2 || new_rhs1 != rhs1) |
3545 | { | |
54aceb26 | 3546 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3547 | { | |
3548 | fprintf (dump_file, "Transforming "); | |
75a70cf9 | 3549 | print_gimple_stmt (dump_file, stmt, 0, 0); |
54aceb26 | 3550 | } |
3551 | ||
82cb4e1c | 3552 | /* If changed is false, this is either opindex == 0 |
3553 | or all outer rhs2's were equal to corresponding oe->op, | |
3554 | and powi_result is NULL. | |
3555 | That means lhs is equivalent before and after reassociation. | |
3556 | Otherwise ensure the old lhs SSA_NAME is not reused and | |
3557 | create a new stmt as well, so that any debug stmts will be | |
3558 | properly adjusted. */ | |
3559 | if (changed) | |
3560 | { | |
3561 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); | |
3562 | unsigned int uid = gimple_uid (stmt); | |
3563 | gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op); | |
3564 | ||
f9e245b2 | 3565 | lhs = make_ssa_name (TREE_TYPE (lhs)); |
e9cf809e | 3566 | stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt), |
3567 | new_rhs1, oe->op); | |
82cb4e1c | 3568 | gimple_set_uid (stmt, uid); |
3569 | gimple_set_visited (stmt, true); | |
3570 | if (insert_point == gsi_stmt (gsi)) | |
3571 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
3572 | else | |
3573 | insert_stmt_after (stmt, insert_point); | |
3574 | } | |
3575 | else | |
3576 | { | |
3577 | gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op) | |
3578 | == stmt); | |
3579 | gimple_assign_set_rhs1 (stmt, new_rhs1); | |
3580 | gimple_assign_set_rhs2 (stmt, oe->op); | |
3581 | update_stmt (stmt); | |
3582 | } | |
54aceb26 | 3583 | |
3584 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3585 | { | |
3586 | fprintf (dump_file, " into "); | |
75a70cf9 | 3587 | print_gimple_stmt (dump_file, stmt, 0, 0); |
54aceb26 | 3588 | } |
3589 | } | |
82cb4e1c | 3590 | return lhs; |
54aceb26 | 3591 | } |
3592 | ||
5b1c765d | 3593 | /* Find out how many cycles we need to compute statements chain. |
3594 | OPS_NUM holds number os statements in a chain. CPU_WIDTH is a | |
3595 | maximum number of independent statements we may execute per cycle. */ | |
3596 | ||
3597 | static int | |
3598 | get_required_cycles (int ops_num, int cpu_width) | |
3599 | { | |
3600 | int res; | |
3601 | int elog; | |
3602 | unsigned int rest; | |
3603 | ||
3604 | /* While we have more than 2 * cpu_width operands | |
3605 | we may reduce number of operands by cpu_width | |
3606 | per cycle. */ | |
3607 | res = ops_num / (2 * cpu_width); | |
3608 | ||
3609 | /* Remained operands count may be reduced twice per cycle | |
3610 | until we have only one operand. */ | |
3611 | rest = (unsigned)(ops_num - res * cpu_width); | |
3612 | elog = exact_log2 (rest); | |
3613 | if (elog >= 0) | |
3614 | res += elog; | |
3615 | else | |
3616 | res += floor_log2 (rest) + 1; | |
3617 | ||
3618 | return res; | |
3619 | } | |
3620 | ||
3621 | /* Returns an optimal number of registers to use for computation of | |
3622 | given statements. */ | |
3623 | ||
3624 | static int | |
3625 | get_reassociation_width (int ops_num, enum tree_code opc, | |
3754d046 | 3626 | machine_mode mode) |
5b1c765d | 3627 | { |
3628 | int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH); | |
3629 | int width; | |
3630 | int width_min; | |
3631 | int cycles_best; | |
3632 | ||
3633 | if (param_width > 0) | |
3634 | width = param_width; | |
3635 | else | |
3636 | width = targetm.sched.reassociation_width (opc, mode); | |
3637 | ||
3638 | if (width == 1) | |
3639 | return width; | |
3640 | ||
3641 | /* Get the minimal time required for sequence computation. */ | |
3642 | cycles_best = get_required_cycles (ops_num, width); | |
3643 | ||
3644 | /* Check if we may use less width and still compute sequence for | |
3645 | the same time. It will allow us to reduce registers usage. | |
3646 | get_required_cycles is monotonically increasing with lower width | |
3647 | so we can perform a binary search for the minimal width that still | |
3648 | results in the optimal cycle count. */ | |
3649 | width_min = 1; | |
3650 | while (width > width_min) | |
3651 | { | |
3652 | int width_mid = (width + width_min) / 2; | |
3653 | ||
3654 | if (get_required_cycles (ops_num, width_mid) == cycles_best) | |
3655 | width = width_mid; | |
3656 | else if (width_min < width_mid) | |
3657 | width_min = width_mid; | |
3658 | else | |
3659 | break; | |
3660 | } | |
3661 | ||
3662 | return width; | |
3663 | } | |
3664 | ||
3665 | /* Recursively rewrite our linearized statements so that the operators | |
3666 | match those in OPS[OPINDEX], putting the computation in rank | |
3667 | order and trying to allow operations to be executed in | |
3668 | parallel. */ | |
3669 | ||
3670 | static void | |
1a91d914 | 3671 | rewrite_expr_tree_parallel (gassign *stmt, int width, |
82cb4e1c | 3672 | vec<operand_entry_t> ops) |
5b1c765d | 3673 | { |
3674 | enum tree_code opcode = gimple_assign_rhs_code (stmt); | |
f1f41a6c | 3675 | int op_num = ops.length (); |
5b1c765d | 3676 | int stmt_num = op_num - 1; |
3677 | gimple *stmts = XALLOCAVEC (gimple, stmt_num); | |
3678 | int op_index = op_num - 1; | |
3679 | int stmt_index = 0; | |
3680 | int ready_stmts_end = 0; | |
3681 | int i = 0; | |
3682 | tree last_rhs1 = gimple_assign_rhs1 (stmt); | |
5b1c765d | 3683 | |
3684 | /* We start expression rewriting from the top statements. | |
3685 | So, in this loop we create a full list of statements | |
3686 | we will work with. */ | |
3687 | stmts[stmt_num - 1] = stmt; | |
3688 | for (i = stmt_num - 2; i >= 0; i--) | |
3689 | stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1])); | |
3690 | ||
5b1c765d | 3691 | for (i = 0; i < stmt_num; i++) |
3692 | { | |
3693 | tree op1, op2; | |
3694 | ||
3695 | /* Determine whether we should use results of | |
3696 | already handled statements or not. */ | |
3697 | if (ready_stmts_end == 0 | |
3698 | && (i - stmt_index >= width || op_index < 1)) | |
3699 | ready_stmts_end = i; | |
3700 | ||
3701 | /* Now we choose operands for the next statement. Non zero | |
3702 | value in ready_stmts_end means here that we should use | |
3703 | the result of already generated statements as new operand. */ | |
3704 | if (ready_stmts_end > 0) | |
3705 | { | |
3706 | op1 = gimple_assign_lhs (stmts[stmt_index++]); | |
3707 | if (ready_stmts_end > stmt_index) | |
3708 | op2 = gimple_assign_lhs (stmts[stmt_index++]); | |
3709 | else if (op_index >= 0) | |
f1f41a6c | 3710 | op2 = ops[op_index--]->op; |
5b1c765d | 3711 | else |
3712 | { | |
3713 | gcc_assert (stmt_index < i); | |
3714 | op2 = gimple_assign_lhs (stmts[stmt_index++]); | |
3715 | } | |
3716 | ||
3717 | if (stmt_index >= ready_stmts_end) | |
3718 | ready_stmts_end = 0; | |
3719 | } | |
3720 | else | |
3721 | { | |
3722 | if (op_index > 1) | |
3723 | swap_ops_for_binary_stmt (ops, op_index - 2, NULL); | |
f1f41a6c | 3724 | op2 = ops[op_index--]->op; |
3725 | op1 = ops[op_index--]->op; | |
5b1c765d | 3726 | } |
3727 | ||
3728 | /* If we emit the last statement then we should put | |
3729 | operands into the last statement. It will also | |
3730 | break the loop. */ | |
3731 | if (op_index < 0 && stmt_index == i) | |
3732 | i = stmt_num - 1; | |
3733 | ||
3734 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3735 | { | |
3736 | fprintf (dump_file, "Transforming "); | |
3737 | print_gimple_stmt (dump_file, stmts[i], 0, 0); | |
3738 | } | |
3739 | ||
3740 | /* We keep original statement only for the last one. All | |
3741 | others are recreated. */ | |
3742 | if (i == stmt_num - 1) | |
3743 | { | |
3744 | gimple_assign_set_rhs1 (stmts[i], op1); | |
3745 | gimple_assign_set_rhs2 (stmts[i], op2); | |
3746 | update_stmt (stmts[i]); | |
3747 | } | |
3748 | else | |
03d37e4e | 3749 | stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode); |
5b1c765d | 3750 | |
3751 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3752 | { | |
3753 | fprintf (dump_file, " into "); | |
3754 | print_gimple_stmt (dump_file, stmts[i], 0, 0); | |
3755 | } | |
3756 | } | |
3757 | ||
3758 | remove_visited_stmt_chain (last_rhs1); | |
3759 | } | |
3760 | ||
54aceb26 | 3761 | /* Transform STMT, which is really (A +B) + (C + D) into the left |
3762 | linear form, ((A+B)+C)+D. | |
3763 | Recurse on D if necessary. */ | |
3764 | ||
3765 | static void | |
75a70cf9 | 3766 | linearize_expr (gimple stmt) |
54aceb26 | 3767 | { |
82cb4e1c | 3768 | gimple_stmt_iterator gsi; |
75a70cf9 | 3769 | gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); |
3770 | gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); | |
82cb4e1c | 3771 | gimple oldbinrhs = binrhs; |
75a70cf9 | 3772 | enum tree_code rhscode = gimple_assign_rhs_code (stmt); |
3773 | gimple newbinrhs = NULL; | |
a4c3fb95 | 3774 | struct loop *loop = loop_containing_stmt (stmt); |
82cb4e1c | 3775 | tree lhs = gimple_assign_lhs (stmt); |
54aceb26 | 3776 | |
75a70cf9 | 3777 | gcc_assert (is_reassociable_op (binlhs, rhscode, loop) |
3778 | && is_reassociable_op (binrhs, rhscode, loop)); | |
3779 | ||
82cb4e1c | 3780 | gsi = gsi_for_stmt (stmt); |
54aceb26 | 3781 | |
75a70cf9 | 3782 | gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs)); |
e9cf809e | 3783 | binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)), |
3784 | gimple_assign_rhs_code (binrhs), | |
3785 | gimple_assign_lhs (binlhs), | |
3786 | gimple_assign_rhs2 (binrhs)); | |
75a70cf9 | 3787 | gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs)); |
82cb4e1c | 3788 | gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT); |
3789 | gimple_set_uid (binrhs, gimple_uid (stmt)); | |
54aceb26 | 3790 | |
75a70cf9 | 3791 | if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) |
3792 | newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); | |
54aceb26 | 3793 | |
3794 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3795 | { | |
3796 | fprintf (dump_file, "Linearized: "); | |
75a70cf9 | 3797 | print_gimple_stmt (dump_file, stmt, 0, 0); |
54aceb26 | 3798 | } |
3799 | ||
3800 | reassociate_stats.linearized++; | |
54aceb26 | 3801 | update_stmt (stmt); |
75a70cf9 | 3802 | |
82cb4e1c | 3803 | gsi = gsi_for_stmt (oldbinrhs); |
54675e05 | 3804 | reassoc_remove_stmt (&gsi); |
82cb4e1c | 3805 | release_defs (oldbinrhs); |
3806 | ||
75a70cf9 | 3807 | gimple_set_visited (stmt, true); |
3808 | gimple_set_visited (binlhs, true); | |
3809 | gimple_set_visited (binrhs, true); | |
54aceb26 | 3810 | |
3811 | /* Tail recurse on the new rhs if it still needs reassociation. */ | |
a4c3fb95 | 3812 | if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop)) |
75a70cf9 | 3813 | /* ??? This should probably be linearize_expr (newbinrhs) but I don't |
3814 | want to change the algorithm while converting to tuples. */ | |
54aceb26 | 3815 | linearize_expr (stmt); |
54aceb26 | 3816 | } |
3817 | ||
75a70cf9 | 3818 | /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return |
54aceb26 | 3819 | it. Otherwise, return NULL. */ |
3820 | ||
75a70cf9 | 3821 | static gimple |
54aceb26 | 3822 | get_single_immediate_use (tree lhs) |
3823 | { | |
3824 | use_operand_p immuse; | |
75a70cf9 | 3825 | gimple immusestmt; |
54aceb26 | 3826 | |
3827 | if (TREE_CODE (lhs) == SSA_NAME | |
75a70cf9 | 3828 | && single_imm_use (lhs, &immuse, &immusestmt) |
3829 | && is_gimple_assign (immusestmt)) | |
3830 | return immusestmt; | |
3831 | ||
3832 | return NULL; | |
54aceb26 | 3833 | } |
54aceb26 | 3834 | |
54aceb26 | 3835 | /* Recursively negate the value of TONEGATE, and return the SSA_NAME |
3836 | representing the negated value. Insertions of any necessary | |
75a70cf9 | 3837 | instructions go before GSI. |
54aceb26 | 3838 | This function is recursive in that, if you hand it "a_5" as the |
3839 | value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will | |
3840 | transform b_3 + b_4 into a_5 = -b_3 + -b_4. */ | |
3841 | ||
3842 | static tree | |
82cb4e1c | 3843 | negate_value (tree tonegate, gimple_stmt_iterator *gsip) |
54aceb26 | 3844 | { |
82cb4e1c | 3845 | gimple negatedefstmt = NULL; |
54aceb26 | 3846 | tree resultofnegate; |
82cb4e1c | 3847 | gimple_stmt_iterator gsi; |
3848 | unsigned int uid; | |
54aceb26 | 3849 | |
54aceb26 | 3850 | /* If we are trying to negate a name, defined by an add, negate the |
3851 | add operands instead. */ | |
75a70cf9 | 3852 | if (TREE_CODE (tonegate) == SSA_NAME) |
3853 | negatedefstmt = SSA_NAME_DEF_STMT (tonegate); | |
54aceb26 | 3854 | if (TREE_CODE (tonegate) == SSA_NAME |
75a70cf9 | 3855 | && is_gimple_assign (negatedefstmt) |
3856 | && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME | |
3857 | && has_single_use (gimple_assign_lhs (negatedefstmt)) | |
3858 | && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR) | |
54aceb26 | 3859 | { |
75a70cf9 | 3860 | tree rhs1 = gimple_assign_rhs1 (negatedefstmt); |
3861 | tree rhs2 = gimple_assign_rhs2 (negatedefstmt); | |
82cb4e1c | 3862 | tree lhs = gimple_assign_lhs (negatedefstmt); |
3863 | gimple g; | |
75a70cf9 | 3864 | |
3865 | gsi = gsi_for_stmt (negatedefstmt); | |
3866 | rhs1 = negate_value (rhs1, &gsi); | |
75a70cf9 | 3867 | |
3868 | gsi = gsi_for_stmt (negatedefstmt); | |
3869 | rhs2 = negate_value (rhs2, &gsi); | |
75a70cf9 | 3870 | |
82cb4e1c | 3871 | gsi = gsi_for_stmt (negatedefstmt); |
f9e245b2 | 3872 | lhs = make_ssa_name (TREE_TYPE (lhs)); |
82cb4e1c | 3873 | gimple_set_visited (negatedefstmt, true); |
e9cf809e | 3874 | g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2); |
82cb4e1c | 3875 | gimple_set_uid (g, gimple_uid (negatedefstmt)); |
3876 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); | |
3877 | return lhs; | |
54aceb26 | 3878 | } |
3879 | ||
3880 | tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate); | |
82cb4e1c | 3881 | resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true, |
75a70cf9 | 3882 | NULL_TREE, true, GSI_SAME_STMT); |
82cb4e1c | 3883 | gsi = *gsip; |
3884 | uid = gimple_uid (gsi_stmt (gsi)); | |
3885 | for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi)) | |
3886 | { | |
3887 | gimple stmt = gsi_stmt (gsi); | |
3888 | if (gimple_uid (stmt) != 0) | |
3889 | break; | |
3890 | gimple_set_uid (stmt, uid); | |
3891 | } | |
54aceb26 | 3892 | return resultofnegate; |
54aceb26 | 3893 | } |
3894 | ||
3895 | /* Return true if we should break up the subtract in STMT into an add | |
3896 | with negate. This is true when we the subtract operands are really | |
3897 | adds, or the subtract itself is used in an add expression. In | |
3898 | either case, breaking up the subtract into an add with negate | |
3899 | exposes the adds to reassociation. */ | |
3900 | ||
3901 | static bool | |
75a70cf9 | 3902 | should_break_up_subtract (gimple stmt) |
54aceb26 | 3903 | { |
75a70cf9 | 3904 | tree lhs = gimple_assign_lhs (stmt); |
3905 | tree binlhs = gimple_assign_rhs1 (stmt); | |
3906 | tree binrhs = gimple_assign_rhs2 (stmt); | |
3907 | gimple immusestmt; | |
a4c3fb95 | 3908 | struct loop *loop = loop_containing_stmt (stmt); |
54aceb26 | 3909 | |
3910 | if (TREE_CODE (binlhs) == SSA_NAME | |
a4c3fb95 | 3911 | && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) |
54aceb26 | 3912 | return true; |
3913 | ||
3914 | if (TREE_CODE (binrhs) == SSA_NAME | |
a4c3fb95 | 3915 | && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop)) |
54aceb26 | 3916 | return true; |
3917 | ||
3918 | if (TREE_CODE (lhs) == SSA_NAME | |
3919 | && (immusestmt = get_single_immediate_use (lhs)) | |
75a70cf9 | 3920 | && is_gimple_assign (immusestmt) |
dddf5036 | 3921 | && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR |
3922 | || gimple_assign_rhs_code (immusestmt) == MULT_EXPR)) | |
54aceb26 | 3923 | return true; |
3924 | return false; | |
54aceb26 | 3925 | } |
3926 | ||
3927 | /* Transform STMT from A - B into A + -B. */ | |
3928 | ||
3929 | static void | |
75a70cf9 | 3930 | break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip) |
54aceb26 | 3931 | { |
75a70cf9 | 3932 | tree rhs1 = gimple_assign_rhs1 (stmt); |
3933 | tree rhs2 = gimple_assign_rhs2 (stmt); | |
54aceb26 | 3934 | |
3935 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3936 | { | |
3937 | fprintf (dump_file, "Breaking up subtract "); | |
75a70cf9 | 3938 | print_gimple_stmt (dump_file, stmt, 0, 0); |
54aceb26 | 3939 | } |
3940 | ||
75a70cf9 | 3941 | rhs2 = negate_value (rhs2, gsip); |
3942 | gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2); | |
54aceb26 | 3943 | update_stmt (stmt); |
3944 | } | |
3945 | ||
8c5ac7f6 | 3946 | /* Determine whether STMT is a builtin call that raises an SSA name |
3947 | to an integer power and has only one use. If so, and this is early | |
3948 | reassociation and unsafe math optimizations are permitted, place | |
3949 | the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE. | |
3950 | If any of these conditions does not hold, return FALSE. */ | |
3951 | ||
3952 | static bool | |
3953 | acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent) | |
3954 | { | |
3955 | tree fndecl, arg1; | |
3956 | REAL_VALUE_TYPE c, cint; | |
3957 | ||
3958 | if (!first_pass_instance | |
3959 | || !flag_unsafe_math_optimizations | |
3960 | || !is_gimple_call (stmt) | |
3961 | || !has_single_use (gimple_call_lhs (stmt))) | |
3962 | return false; | |
3963 | ||
3964 | fndecl = gimple_call_fndecl (stmt); | |
3965 | ||
3966 | if (!fndecl | |
3967 | || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) | |
3968 | return false; | |
3969 | ||
3970 | switch (DECL_FUNCTION_CODE (fndecl)) | |
3971 | { | |
3972 | CASE_FLT_FN (BUILT_IN_POW): | |
54faaad5 | 3973 | if (flag_errno_math) |
3974 | return false; | |
3975 | ||
8c5ac7f6 | 3976 | *base = gimple_call_arg (stmt, 0); |
3977 | arg1 = gimple_call_arg (stmt, 1); | |
3978 | ||
3979 | if (TREE_CODE (arg1) != REAL_CST) | |
3980 | return false; | |
3981 | ||
3982 | c = TREE_REAL_CST (arg1); | |
3983 | ||
3984 | if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT) | |
3985 | return false; | |
3986 | ||
3987 | *exponent = real_to_integer (&c); | |
e913b5cd | 3988 | real_from_integer (&cint, VOIDmode, *exponent, SIGNED); |
8c5ac7f6 | 3989 | if (!real_identical (&c, &cint)) |
3990 | return false; | |
3991 | ||
3992 | break; | |
3993 | ||
3994 | CASE_FLT_FN (BUILT_IN_POWI): | |
3995 | *base = gimple_call_arg (stmt, 0); | |
3996 | arg1 = gimple_call_arg (stmt, 1); | |
3997 | ||
e913b5cd | 3998 | if (!tree_fits_shwi_p (arg1)) |
8c5ac7f6 | 3999 | return false; |
4000 | ||
e913b5cd | 4001 | *exponent = tree_to_shwi (arg1); |
8c5ac7f6 | 4002 | break; |
4003 | ||
4004 | default: | |
4005 | return false; | |
4006 | } | |
4007 | ||
4008 | /* Expanding negative exponents is generally unproductive, so we don't | |
4009 | complicate matters with those. Exponents of zero and one should | |
4010 | have been handled by expression folding. */ | |
4011 | if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME) | |
4012 | return false; | |
4013 | ||
4014 | return true; | |
4015 | } | |
4016 | ||
54aceb26 | 4017 | /* Recursively linearize a binary expression that is the RHS of STMT. |
4018 | Place the operands of the expression tree in the vector named OPS. */ | |
4019 | ||
4020 | static void | |
f1f41a6c | 4021 | linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt, |
dddf5036 | 4022 | bool is_associative, bool set_visited) |
54aceb26 | 4023 | { |
75a70cf9 | 4024 | tree binlhs = gimple_assign_rhs1 (stmt); |
4025 | tree binrhs = gimple_assign_rhs2 (stmt); | |
8c5ac7f6 | 4026 | gimple binlhsdef = NULL, binrhsdef = NULL; |
54aceb26 | 4027 | bool binlhsisreassoc = false; |
4028 | bool binrhsisreassoc = false; | |
75a70cf9 | 4029 | enum tree_code rhscode = gimple_assign_rhs_code (stmt); |
a4c3fb95 | 4030 | struct loop *loop = loop_containing_stmt (stmt); |
8c5ac7f6 | 4031 | tree base = NULL_TREE; |
4032 | HOST_WIDE_INT exponent = 0; | |
54aceb26 | 4033 | |
dddf5036 | 4034 | if (set_visited) |
4035 | gimple_set_visited (stmt, true); | |
54aceb26 | 4036 | |
4037 | if (TREE_CODE (binlhs) == SSA_NAME) | |
4038 | { | |
4039 | binlhsdef = SSA_NAME_DEF_STMT (binlhs); | |
df0675b8 | 4040 | binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop) |
4041 | && !stmt_could_throw_p (binlhsdef)); | |
54aceb26 | 4042 | } |
4043 | ||
4044 | if (TREE_CODE (binrhs) == SSA_NAME) | |
4045 | { | |
4046 | binrhsdef = SSA_NAME_DEF_STMT (binrhs); | |
df0675b8 | 4047 | binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop) |
4048 | && !stmt_could_throw_p (binrhsdef)); | |
54aceb26 | 4049 | } |
4050 | ||
4051 | /* If the LHS is not reassociable, but the RHS is, we need to swap | |
4052 | them. If neither is reassociable, there is nothing we can do, so | |
4053 | just put them in the ops vector. If the LHS is reassociable, | |
4054 | linearize it. If both are reassociable, then linearize the RHS | |
4055 | and the LHS. */ | |
4056 | ||
4057 | if (!binlhsisreassoc) | |
4058 | { | |
dddf5036 | 4059 | /* If this is not a associative operation like division, give up. */ |
4060 | if (!is_associative) | |
4061 | { | |
4062 | add_to_ops_vec (ops, binrhs); | |
4063 | return; | |
4064 | } | |
4065 | ||
54aceb26 | 4066 | if (!binrhsisreassoc) |
4067 | { | |
8c5ac7f6 | 4068 | if (rhscode == MULT_EXPR |
4069 | && TREE_CODE (binrhs) == SSA_NAME | |
4070 | && acceptable_pow_call (binrhsdef, &base, &exponent)) | |
4071 | { | |
4072 | add_repeat_to_ops_vec (ops, base, exponent); | |
4073 | gimple_set_visited (binrhsdef, true); | |
4074 | } | |
4075 | else | |
4076 | add_to_ops_vec (ops, binrhs); | |
4077 | ||
4078 | if (rhscode == MULT_EXPR | |
4079 | && TREE_CODE (binlhs) == SSA_NAME | |
4080 | && acceptable_pow_call (binlhsdef, &base, &exponent)) | |
4081 | { | |
4082 | add_repeat_to_ops_vec (ops, base, exponent); | |
4083 | gimple_set_visited (binlhsdef, true); | |
4084 | } | |
4085 | else | |
4086 | add_to_ops_vec (ops, binlhs); | |
4087 | ||
54aceb26 | 4088 | return; |
4089 | } | |
4090 | ||
4091 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
4092 | { | |
4093 | fprintf (dump_file, "swapping operands of "); | |
75a70cf9 | 4094 | print_gimple_stmt (dump_file, stmt, 0, 0); |
54aceb26 | 4095 | } |
4096 | ||
8f6fa493 | 4097 | swap_ssa_operands (stmt, |
4098 | gimple_assign_rhs1_ptr (stmt), | |
4099 | gimple_assign_rhs2_ptr (stmt)); | |
54aceb26 | 4100 | update_stmt (stmt); |
4101 | ||
4102 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
4103 | { | |
4104 | fprintf (dump_file, " is now "); | |
75a70cf9 | 4105 | print_gimple_stmt (dump_file, stmt, 0, 0); |
54aceb26 | 4106 | } |
4107 | ||
4108 | /* We want to make it so the lhs is always the reassociative op, | |
4109 | so swap. */ | |
dfcf26a5 | 4110 | std::swap (binlhs, binrhs); |
54aceb26 | 4111 | } |
4112 | else if (binrhsisreassoc) | |
4113 | { | |
4114 | linearize_expr (stmt); | |
75a70cf9 | 4115 | binlhs = gimple_assign_rhs1 (stmt); |
4116 | binrhs = gimple_assign_rhs2 (stmt); | |
54aceb26 | 4117 | } |
4118 | ||
4119 | gcc_assert (TREE_CODE (binrhs) != SSA_NAME | |
a4c3fb95 | 4120 | || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), |
4121 | rhscode, loop)); | |
dddf5036 | 4122 | linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs), |
4123 | is_associative, set_visited); | |
8c5ac7f6 | 4124 | |
4125 | if (rhscode == MULT_EXPR | |
4126 | && TREE_CODE (binrhs) == SSA_NAME | |
4127 | && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent)) | |
4128 | { | |
4129 | add_repeat_to_ops_vec (ops, base, exponent); | |
4130 | gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true); | |
4131 | } | |
4132 | else | |
4133 | add_to_ops_vec (ops, binrhs); | |
54aceb26 | 4134 | } |
4135 | ||
4136 | /* Repropagate the negates back into subtracts, since no other pass | |
4137 | currently does it. */ | |
4138 | ||
4139 | static void | |
4140 | repropagate_negates (void) | |
4141 | { | |
4142 | unsigned int i = 0; | |
4143 | tree negate; | |
4144 | ||
f1f41a6c | 4145 | FOR_EACH_VEC_ELT (plus_negates, i, negate) |
54aceb26 | 4146 | { |
75a70cf9 | 4147 | gimple user = get_single_immediate_use (negate); |
54aceb26 | 4148 | |
c07e5b8b | 4149 | if (!user || !is_gimple_assign (user)) |
4150 | continue; | |
4151 | ||
54aceb26 | 4152 | /* The negate operand can be either operand of a PLUS_EXPR |
4153 | (it can be the LHS if the RHS is a constant for example). | |
4154 | ||
4155 | Force the negate operand to the RHS of the PLUS_EXPR, then | |
4156 | transform the PLUS_EXPR into a MINUS_EXPR. */ | |
c07e5b8b | 4157 | if (gimple_assign_rhs_code (user) == PLUS_EXPR) |
54aceb26 | 4158 | { |
54aceb26 | 4159 | /* If the negated operand appears on the LHS of the |
4160 | PLUS_EXPR, exchange the operands of the PLUS_EXPR | |
4161 | to force the negated operand to the RHS of the PLUS_EXPR. */ | |
75a70cf9 | 4162 | if (gimple_assign_rhs1 (user) == negate) |
54aceb26 | 4163 | { |
8f6fa493 | 4164 | swap_ssa_operands (user, |
4165 | gimple_assign_rhs1_ptr (user), | |
4166 | gimple_assign_rhs2_ptr (user)); | |
54aceb26 | 4167 | } |
4168 | ||
4169 | /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace | |
4170 | the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */ | |
75a70cf9 | 4171 | if (gimple_assign_rhs2 (user) == negate) |
54aceb26 | 4172 | { |
75a70cf9 | 4173 | tree rhs1 = gimple_assign_rhs1 (user); |
4174 | tree rhs2 = get_unary_op (negate, NEGATE_EXPR); | |
4175 | gimple_stmt_iterator gsi = gsi_for_stmt (user); | |
4176 | gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2); | |
54aceb26 | 4177 | update_stmt (user); |
4178 | } | |
4179 | } | |
c07e5b8b | 4180 | else if (gimple_assign_rhs_code (user) == MINUS_EXPR) |
4181 | { | |
4182 | if (gimple_assign_rhs1 (user) == negate) | |
4183 | { | |
4184 | /* We have | |
4185 | x = -a | |
4186 | y = x - b | |
4187 | which we transform into | |
4188 | x = a + b | |
4189 | y = -x . | |
4190 | This pushes down the negate which we possibly can merge | |
4191 | into some other operation, hence insert it into the | |
4192 | plus_negates vector. */ | |
4193 | gimple feed = SSA_NAME_DEF_STMT (negate); | |
4194 | tree a = gimple_assign_rhs1 (feed); | |
82cb4e1c | 4195 | tree b = gimple_assign_rhs2 (user); |
4196 | gimple_stmt_iterator gsi = gsi_for_stmt (feed); | |
4197 | gimple_stmt_iterator gsi2 = gsi_for_stmt (user); | |
f9e245b2 | 4198 | tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed))); |
e9cf809e | 4199 | gimple g = gimple_build_assign (x, PLUS_EXPR, a, b); |
82cb4e1c | 4200 | gsi_insert_before (&gsi2, g, GSI_SAME_STMT); |
806413d2 | 4201 | gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x); |
82cb4e1c | 4202 | user = gsi_stmt (gsi2); |
4203 | update_stmt (user); | |
54675e05 | 4204 | reassoc_remove_stmt (&gsi); |
82cb4e1c | 4205 | release_defs (feed); |
4206 | plus_negates.safe_push (gimple_assign_lhs (user)); | |
c07e5b8b | 4207 | } |
4208 | else | |
4209 | { | |
4210 | /* Transform "x = -a; y = b - x" into "y = b + a", getting | |
4211 | rid of one operation. */ | |
4212 | gimple feed = SSA_NAME_DEF_STMT (negate); | |
4213 | tree a = gimple_assign_rhs1 (feed); | |
4214 | tree rhs1 = gimple_assign_rhs1 (user); | |
4215 | gimple_stmt_iterator gsi = gsi_for_stmt (user); | |
4216 | gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a); | |
4217 | update_stmt (gsi_stmt (gsi)); | |
4218 | } | |
4219 | } | |
54aceb26 | 4220 | } |
4221 | } | |
4222 | ||
c07e5b8b | 4223 | /* Returns true if OP is of a type for which we can do reassociation. |
4224 | That is for integral or non-saturating fixed-point types, and for | |
4225 | floating point type when associative-math is enabled. */ | |
4226 | ||
4227 | static bool | |
4228 | can_reassociate_p (tree op) | |
4229 | { | |
4230 | tree type = TREE_TYPE (op); | |
ca3c9092 | 4231 | if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)) |
c07e5b8b | 4232 | || NON_SAT_FIXED_POINT_TYPE_P (type) |
f4a50267 | 4233 | || (flag_associative_math && FLOAT_TYPE_P (type))) |
c07e5b8b | 4234 | return true; |
4235 | return false; | |
4236 | } | |
4237 | ||
54aceb26 | 4238 | /* Break up subtract operations in block BB. |
4239 | ||
4240 | We do this top down because we don't know whether the subtract is | |
4241 | part of a possible chain of reassociation except at the top. | |
48e1416a | 4242 | |
54aceb26 | 4243 | IE given |
4244 | d = f + g | |
4245 | c = a + e | |
4246 | b = c - d | |
4247 | q = b - r | |
4248 | k = t - q | |
48e1416a | 4249 | |
54aceb26 | 4250 | we want to break up k = t - q, but we won't until we've transformed q |
75a70cf9 | 4251 | = b - r, which won't be broken up until we transform b = c - d. |
4252 | ||
82cb4e1c | 4253 | En passant, clear the GIMPLE visited flag on every statement |
4254 | and set UIDs within each basic block. */ | |
54aceb26 | 4255 | |
4256 | static void | |
4257 | break_up_subtract_bb (basic_block bb) | |
4258 | { | |
75a70cf9 | 4259 | gimple_stmt_iterator gsi; |
54aceb26 | 4260 | basic_block son; |
82cb4e1c | 4261 | unsigned int uid = 1; |
54aceb26 | 4262 | |
75a70cf9 | 4263 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
54aceb26 | 4264 | { |
75a70cf9 | 4265 | gimple stmt = gsi_stmt (gsi); |
4266 | gimple_set_visited (stmt, false); | |
82cb4e1c | 4267 | gimple_set_uid (stmt, uid++); |
54aceb26 | 4268 | |
c07e5b8b | 4269 | if (!is_gimple_assign (stmt) |
4270 | || !can_reassociate_p (gimple_assign_lhs (stmt))) | |
4271 | continue; | |
4272 | ||
75a70cf9 | 4273 | /* Look for simple gimple subtract operations. */ |
c07e5b8b | 4274 | if (gimple_assign_rhs_code (stmt) == MINUS_EXPR) |
54aceb26 | 4275 | { |
c07e5b8b | 4276 | if (!can_reassociate_p (gimple_assign_rhs1 (stmt)) |
4277 | || !can_reassociate_p (gimple_assign_rhs2 (stmt))) | |
54aceb26 | 4278 | continue; |
4279 | ||
4280 | /* Check for a subtract used only in an addition. If this | |
4281 | is the case, transform it into add of a negate for better | |
4282 | reassociation. IE transform C = A-B into C = A + -B if C | |
4283 | is only used in an addition. */ | |
75a70cf9 | 4284 | if (should_break_up_subtract (stmt)) |
4285 | break_up_subtract (stmt, &gsi); | |
54aceb26 | 4286 | } |
c07e5b8b | 4287 | else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR |
4288 | && can_reassociate_p (gimple_assign_rhs1 (stmt))) | |
f1f41a6c | 4289 | plus_negates.safe_push (gimple_assign_lhs (stmt)); |
54aceb26 | 4290 | } |
4291 | for (son = first_dom_son (CDI_DOMINATORS, bb); | |
4292 | son; | |
4293 | son = next_dom_son (CDI_DOMINATORS, son)) | |
4294 | break_up_subtract_bb (son); | |
4295 | } | |
4296 | ||
8c5ac7f6 | 4297 | /* Used for repeated factor analysis. */ |
4298 | struct repeat_factor_d | |
4299 | { | |
4300 | /* An SSA name that occurs in a multiply chain. */ | |
4301 | tree factor; | |
4302 | ||
4303 | /* Cached rank of the factor. */ | |
4304 | unsigned rank; | |
4305 | ||
4306 | /* Number of occurrences of the factor in the chain. */ | |
4307 | HOST_WIDE_INT count; | |
4308 | ||
4309 | /* An SSA name representing the product of this factor and | |
4310 | all factors appearing later in the repeated factor vector. */ | |
4311 | tree repr; | |
4312 | }; | |
4313 | ||
4314 | typedef struct repeat_factor_d repeat_factor, *repeat_factor_t; | |
4315 | typedef const struct repeat_factor_d *const_repeat_factor_t; | |
4316 | ||
8c5ac7f6 | 4317 | |
f1f41a6c | 4318 | static vec<repeat_factor> repeat_factor_vec; |
8c5ac7f6 | 4319 | |
4320 | /* Used for sorting the repeat factor vector. Sort primarily by | |
4321 | ascending occurrence count, secondarily by descending rank. */ | |
4322 | ||
4323 | static int | |
4324 | compare_repeat_factors (const void *x1, const void *x2) | |
4325 | { | |
4326 | const_repeat_factor_t rf1 = (const_repeat_factor_t) x1; | |
4327 | const_repeat_factor_t rf2 = (const_repeat_factor_t) x2; | |
4328 | ||
4329 | if (rf1->count != rf2->count) | |
4330 | return rf1->count - rf2->count; | |
4331 | ||
4332 | return rf2->rank - rf1->rank; | |
4333 | } | |
4334 | ||
8c5ac7f6 | 4335 | /* Look for repeated operands in OPS in the multiply tree rooted at |
4336 | STMT. Replace them with an optimal sequence of multiplies and powi | |
97269507 | 4337 | builtin calls, and remove the used operands from OPS. Return an |
4338 | SSA name representing the value of the replacement sequence. */ | |
8c5ac7f6 | 4339 | |
97269507 | 4340 | static tree |
f1f41a6c | 4341 | attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops) |
8c5ac7f6 | 4342 | { |
4343 | unsigned i, j, vec_len; | |
4344 | int ii; | |
4345 | operand_entry_t oe; | |
4346 | repeat_factor_t rf1, rf2; | |
4347 | repeat_factor rfnew; | |
97269507 | 4348 | tree result = NULL_TREE; |
8c5ac7f6 | 4349 | tree target_ssa, iter_result; |
4350 | tree type = TREE_TYPE (gimple_get_lhs (stmt)); | |
4351 | tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI); | |
4352 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); | |
4353 | gimple mul_stmt, pow_stmt; | |
4354 | ||
4355 | /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and | |
4356 | target. */ | |
4357 | if (!powi_fndecl) | |
97269507 | 4358 | return NULL_TREE; |
8c5ac7f6 | 4359 | |
4360 | /* Allocate the repeated factor vector. */ | |
f1f41a6c | 4361 | repeat_factor_vec.create (10); |
8c5ac7f6 | 4362 | |
4363 | /* Scan the OPS vector for all SSA names in the product and build | |
4364 | up a vector of occurrence counts for each factor. */ | |
f1f41a6c | 4365 | FOR_EACH_VEC_ELT (*ops, i, oe) |
8c5ac7f6 | 4366 | { |
4367 | if (TREE_CODE (oe->op) == SSA_NAME) | |
4368 | { | |
f1f41a6c | 4369 | FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) |
8c5ac7f6 | 4370 | { |
4371 | if (rf1->factor == oe->op) | |
4372 | { | |
4373 | rf1->count += oe->count; | |
4374 | break; | |
4375 | } | |
4376 | } | |
4377 | ||
f1f41a6c | 4378 | if (j >= repeat_factor_vec.length ()) |
8c5ac7f6 | 4379 | { |
4380 | rfnew.factor = oe->op; | |
4381 | rfnew.rank = oe->rank; | |
4382 | rfnew.count = oe->count; | |
4383 | rfnew.repr = NULL_TREE; | |
f1f41a6c | 4384 | repeat_factor_vec.safe_push (rfnew); |
8c5ac7f6 | 4385 | } |
4386 | } | |
4387 | } | |
4388 | ||
4389 | /* Sort the repeated factor vector by (a) increasing occurrence count, | |
4390 | and (b) decreasing rank. */ | |
f1f41a6c | 4391 | repeat_factor_vec.qsort (compare_repeat_factors); |
8c5ac7f6 | 4392 | |
4393 | /* It is generally best to combine as many base factors as possible | |
4394 | into a product before applying __builtin_powi to the result. | |
4395 | However, the sort order chosen for the repeated factor vector | |
4396 | allows us to cache partial results for the product of the base | |
4397 | factors for subsequent use. When we already have a cached partial | |
4398 | result from a previous iteration, it is best to make use of it | |
4399 | before looking for another __builtin_pow opportunity. | |
4400 | ||
4401 | As an example, consider x * x * y * y * y * z * z * z * z. | |
4402 | We want to first compose the product x * y * z, raise it to the | |
4403 | second power, then multiply this by y * z, and finally multiply | |
4404 | by z. This can be done in 5 multiplies provided we cache y * z | |
4405 | for use in both expressions: | |
4406 | ||
4407 | t1 = y * z | |
4408 | t2 = t1 * x | |
4409 | t3 = t2 * t2 | |
4410 | t4 = t1 * t3 | |
4411 | result = t4 * z | |
4412 | ||
4413 | If we instead ignored the cached y * z and first multiplied by | |
4414 | the __builtin_pow opportunity z * z, we would get the inferior: | |
4415 | ||
4416 | t1 = y * z | |
4417 | t2 = t1 * x | |
4418 | t3 = t2 * t2 | |
4419 | t4 = z * z | |
4420 | t5 = t3 * t4 | |
4421 | result = t5 * y */ | |
4422 | ||
f1f41a6c | 4423 | vec_len = repeat_factor_vec.length (); |
8c5ac7f6 | 4424 | |
4425 | /* Repeatedly look for opportunities to create a builtin_powi call. */ | |
4426 | while (true) | |
4427 | { | |
4428 | HOST_WIDE_INT power; | |
4429 | ||
4430 | /* First look for the largest cached product of factors from | |
4431 | preceding iterations. If found, create a builtin_powi for | |
4432 | it if the minimum occurrence count for its factors is at | |
4433 | least 2, or just use this cached product as our next | |
4434 | multiplicand if the minimum occurrence count is 1. */ | |
f1f41a6c | 4435 | FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) |
8c5ac7f6 | 4436 | { |
4437 | if (rf1->repr && rf1->count > 0) | |
4438 | break; | |
4439 | } | |
4440 | ||
4441 | if (j < vec_len) | |
4442 | { | |
4443 | power = rf1->count; | |
4444 | ||
4445 | if (power == 1) | |
4446 | { | |
4447 | iter_result = rf1->repr; | |
4448 | ||
4449 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
4450 | { | |
4451 | unsigned elt; | |
4452 | repeat_factor_t rf; | |
4453 | fputs ("Multiplying by cached product ", dump_file); | |
4454 | for (elt = j; elt < vec_len; elt++) | |
4455 | { | |
f1f41a6c | 4456 | rf = &repeat_factor_vec[elt]; |
8c5ac7f6 | 4457 | print_generic_expr (dump_file, rf->factor, 0); |
4458 | if (elt < vec_len - 1) | |
4459 | fputs (" * ", dump_file); | |
4460 | } | |
4461 | fputs ("\n", dump_file); | |
4462 | } | |
4463 | } | |
4464 | else | |
4465 | { | |
03d37e4e | 4466 | iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); |
8c5ac7f6 | 4467 | pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, |
4468 | build_int_cst (integer_type_node, | |
4469 | power)); | |
4470 | gimple_call_set_lhs (pow_stmt, iter_result); | |
4471 | gimple_set_location (pow_stmt, gimple_location (stmt)); | |
4472 | gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); | |
4473 | ||
4474 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
4475 | { | |
4476 | unsigned elt; | |
4477 | repeat_factor_t rf; | |
4478 | fputs ("Building __builtin_pow call for cached product (", | |
4479 | dump_file); | |
4480 | for (elt = j; elt < vec_len; elt++) | |
4481 | { | |
f1f41a6c | 4482 | rf = &repeat_factor_vec[elt]; |
8c5ac7f6 | 4483 | print_generic_expr (dump_file, rf->factor, 0); |
4484 | if (elt < vec_len - 1) | |
4485 | fputs (" * ", dump_file); | |
4486 | } | |
f03df321 | 4487 | fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", |
8ef3b7cb | 4488 | power); |
8c5ac7f6 | 4489 | } |
4490 | } | |
4491 | } | |
4492 | else | |
4493 | { | |
4494 | /* Otherwise, find the first factor in the repeated factor | |
4495 | vector whose occurrence count is at least 2. If no such | |
4496 | factor exists, there are no builtin_powi opportunities | |
4497 | remaining. */ | |
f1f41a6c | 4498 | FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) |
8c5ac7f6 | 4499 | { |
4500 | if (rf1->count >= 2) | |
4501 | break; | |
4502 | } | |
4503 | ||
4504 | if (j >= vec_len) | |
4505 | break; | |
4506 | ||
4507 | power = rf1->count; | |
4508 | ||
4509 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
4510 | { | |
4511 | unsigned elt; | |
4512 | repeat_factor_t rf; | |
4513 | fputs ("Building __builtin_pow call for (", dump_file); | |
4514 | for (elt = j; elt < vec_len; elt++) | |
4515 | { | |
f1f41a6c | 4516 | rf = &repeat_factor_vec[elt]; |
8c5ac7f6 | 4517 | print_generic_expr (dump_file, rf->factor, 0); |
4518 | if (elt < vec_len - 1) | |
4519 | fputs (" * ", dump_file); | |
4520 | } | |
f03df321 | 4521 | fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power); |
8c5ac7f6 | 4522 | } |
4523 | ||
4524 | reassociate_stats.pows_created++; | |
4525 | ||
4526 | /* Visit each element of the vector in reverse order (so that | |
4527 | high-occurrence elements are visited first, and within the | |
4528 | same occurrence count, lower-ranked elements are visited | |
4529 | first). Form a linear product of all elements in this order | |
4530 | whose occurrencce count is at least that of element J. | |
4531 | Record the SSA name representing the product of each element | |
4532 | with all subsequent elements in the vector. */ | |
4533 | if (j == vec_len - 1) | |
4534 | rf1->repr = rf1->factor; | |
4535 | else | |
4536 | { | |
4537 | for (ii = vec_len - 2; ii >= (int)j; ii--) | |
4538 | { | |
4539 | tree op1, op2; | |
4540 | ||
f1f41a6c | 4541 | rf1 = &repeat_factor_vec[ii]; |
4542 | rf2 = &repeat_factor_vec[ii + 1]; | |
8c5ac7f6 | 4543 | |
4544 | /* Init the last factor's representative to be itself. */ | |
4545 | if (!rf2->repr) | |
4546 | rf2->repr = rf2->factor; | |
4547 | ||
4548 | op1 = rf1->factor; | |
4549 | op2 = rf2->repr; | |
4550 | ||
03d37e4e | 4551 | target_ssa = make_temp_ssa_name (type, NULL, "reassocpow"); |
e9cf809e | 4552 | mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR, |
4553 | op1, op2); | |
8c5ac7f6 | 4554 | gimple_set_location (mul_stmt, gimple_location (stmt)); |
4555 | gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); | |
4556 | rf1->repr = target_ssa; | |
4557 | ||
4558 | /* Don't reprocess the multiply we just introduced. */ | |
4559 | gimple_set_visited (mul_stmt, true); | |
4560 | } | |
4561 | } | |
4562 | ||
4563 | /* Form a call to __builtin_powi for the maximum product | |
4564 | just formed, raised to the power obtained earlier. */ | |
f1f41a6c | 4565 | rf1 = &repeat_factor_vec[j]; |
03d37e4e | 4566 | iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); |
8c5ac7f6 | 4567 | pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, |
4568 | build_int_cst (integer_type_node, | |
4569 | power)); | |
4570 | gimple_call_set_lhs (pow_stmt, iter_result); | |
4571 | gimple_set_location (pow_stmt, gimple_location (stmt)); | |
4572 | gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); | |
4573 | } | |
4574 | ||
97269507 | 4575 | /* If we previously formed at least one other builtin_powi call, |
4576 | form the product of this one and those others. */ | |
4577 | if (result) | |
4578 | { | |
03d37e4e | 4579 | tree new_result = make_temp_ssa_name (type, NULL, "reassocpow"); |
e9cf809e | 4580 | mul_stmt = gimple_build_assign (new_result, MULT_EXPR, |
4581 | result, iter_result); | |
97269507 | 4582 | gimple_set_location (mul_stmt, gimple_location (stmt)); |
4583 | gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); | |
4584 | gimple_set_visited (mul_stmt, true); | |
4585 | result = new_result; | |
4586 | } | |
4587 | else | |
4588 | result = iter_result; | |
8c5ac7f6 | 4589 | |
4590 | /* Decrement the occurrence count of each element in the product | |
4591 | by the count found above, and remove this many copies of each | |
4592 | factor from OPS. */ | |
4593 | for (i = j; i < vec_len; i++) | |
4594 | { | |
4595 | unsigned k = power; | |
4596 | unsigned n; | |
4597 | ||
f1f41a6c | 4598 | rf1 = &repeat_factor_vec[i]; |
8c5ac7f6 | 4599 | rf1->count -= power; |
4600 | ||
f1f41a6c | 4601 | FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe) |
8c5ac7f6 | 4602 | { |
4603 | if (oe->op == rf1->factor) | |
4604 | { | |
4605 | if (oe->count <= k) | |
4606 | { | |
f1f41a6c | 4607 | ops->ordered_remove (n); |
8c5ac7f6 | 4608 | k -= oe->count; |
4609 | ||
4610 | if (k == 0) | |
4611 | break; | |
4612 | } | |
4613 | else | |
4614 | { | |
4615 | oe->count -= k; | |
4616 | break; | |
4617 | } | |
4618 | } | |
4619 | } | |
4620 | } | |
4621 | } | |
4622 | ||
4623 | /* At this point all elements in the repeated factor vector have a | |
4624 | remaining occurrence count of 0 or 1, and those with a count of 1 | |
4625 | don't have cached representatives. Re-sort the ops vector and | |
4626 | clean up. */ | |
f1f41a6c | 4627 | ops->qsort (sort_by_operand_rank); |
4628 | repeat_factor_vec.release (); | |
97269507 | 4629 | |
4630 | /* Return the final product computed herein. Note that there may | |
4631 | still be some elements with single occurrence count left in OPS; | |
4632 | those will be handled by the normal reassociation logic. */ | |
4633 | return result; | |
4634 | } | |
4635 | ||
4636 | /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */ | |
4637 | ||
4638 | static void | |
4639 | transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs) | |
4640 | { | |
4641 | tree rhs1; | |
4642 | ||
4643 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
4644 | { | |
4645 | fprintf (dump_file, "Transforming "); | |
4646 | print_gimple_stmt (dump_file, stmt, 0, 0); | |
4647 | } | |
4648 | ||
4649 | rhs1 = gimple_assign_rhs1 (stmt); | |
4650 | gimple_assign_set_rhs_from_tree (gsi, new_rhs); | |
4651 | update_stmt (stmt); | |
4652 | remove_visited_stmt_chain (rhs1); | |
4653 | ||
4654 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
4655 | { | |
4656 | fprintf (dump_file, " into "); | |
4657 | print_gimple_stmt (dump_file, stmt, 0, 0); | |
4658 | } | |
4659 | } | |
4660 | ||
4661 | /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */ | |
4662 | ||
4663 | static void | |
4664 | transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt, | |
4665 | tree rhs1, tree rhs2) | |
4666 | { | |
4667 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
4668 | { | |
4669 | fprintf (dump_file, "Transforming "); | |
4670 | print_gimple_stmt (dump_file, stmt, 0, 0); | |
4671 | } | |
4672 | ||
61e371b0 | 4673 | gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2); |
97269507 | 4674 | update_stmt (gsi_stmt (*gsi)); |
4675 | remove_visited_stmt_chain (rhs1); | |
4676 | ||
4677 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
4678 | { | |
4679 | fprintf (dump_file, " into "); | |
4680 | print_gimple_stmt (dump_file, stmt, 0, 0); | |
4681 | } | |
8c5ac7f6 | 4682 | } |
4683 | ||
54aceb26 | 4684 | /* Reassociate expressions in basic block BB and its post-dominator as |
4685 | children. */ | |
4686 | ||
4687 | static void | |
4688 | reassociate_bb (basic_block bb) | |
4689 | { | |
75a70cf9 | 4690 | gimple_stmt_iterator gsi; |
54aceb26 | 4691 | basic_block son; |
8a2c7744 | 4692 | gimple stmt = last_stmt (bb); |
4693 | ||
4694 | if (stmt && !gimple_visited_p (stmt)) | |
4695 | maybe_optimize_range_tests (stmt); | |
54aceb26 | 4696 | |
75a70cf9 | 4697 | for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi)) |
54aceb26 | 4698 | { |
8a2c7744 | 4699 | stmt = gsi_stmt (gsi); |
54aceb26 | 4700 | |
df0675b8 | 4701 | if (is_gimple_assign (stmt) |
4702 | && !stmt_could_throw_p (stmt)) | |
54aceb26 | 4703 | { |
75a70cf9 | 4704 | tree lhs, rhs1, rhs2; |
4705 | enum tree_code rhs_code = gimple_assign_rhs_code (stmt); | |
54aceb26 | 4706 | |
75a70cf9 | 4707 | /* If this is not a gimple binary expression, there is |
4708 | nothing for us to do with it. */ | |
4709 | if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) | |
54aceb26 | 4710 | continue; |
4711 | ||
75a70cf9 | 4712 | /* If this was part of an already processed statement, |
4713 | we don't need to touch it again. */ | |
4714 | if (gimple_visited_p (stmt)) | |
dddf5036 | 4715 | { |
4716 | /* This statement might have become dead because of previous | |
4717 | reassociations. */ | |
4718 | if (has_zero_uses (gimple_get_lhs (stmt))) | |
4719 | { | |
54675e05 | 4720 | reassoc_remove_stmt (&gsi); |
dddf5036 | 4721 | release_defs (stmt); |
fb715874 | 4722 | /* We might end up removing the last stmt above which |
4723 | places the iterator to the end of the sequence. | |
4724 | Reset it to the last stmt in this case which might | |
4725 | be the end of the sequence as well if we removed | |
4726 | the last statement of the sequence. In which case | |
4727 | we need to bail out. */ | |
4728 | if (gsi_end_p (gsi)) | |
4729 | { | |
4730 | gsi = gsi_last_bb (bb); | |
4731 | if (gsi_end_p (gsi)) | |
4732 | break; | |
4733 | } | |
dddf5036 | 4734 | } |
4735 | continue; | |
4736 | } | |
75a70cf9 | 4737 | |
4738 | lhs = gimple_assign_lhs (stmt); | |
4739 | rhs1 = gimple_assign_rhs1 (stmt); | |
4740 | rhs2 = gimple_assign_rhs2 (stmt); | |
4741 | ||
ca3c9092 | 4742 | /* For non-bit or min/max operations we can't associate |
4743 | all types. Verify that here. */ | |
4744 | if (rhs_code != BIT_IOR_EXPR | |
4745 | && rhs_code != BIT_AND_EXPR | |
4746 | && rhs_code != BIT_XOR_EXPR | |
4747 | && rhs_code != MIN_EXPR | |
4748 | && rhs_code != MAX_EXPR | |
4749 | && (!can_reassociate_p (lhs) | |
4750 | || !can_reassociate_p (rhs1) | |
4751 | || !can_reassociate_p (rhs2))) | |
54aceb26 | 4752 | continue; |
4753 | ||
75a70cf9 | 4754 | if (associative_tree_code (rhs_code)) |
54aceb26 | 4755 | { |
c2078b80 | 4756 | auto_vec<operand_entry_t> ops; |
97269507 | 4757 | tree powi_result = NULL_TREE; |
54aceb26 | 4758 | |
4759 | /* There may be no immediate uses left by the time we | |
4760 | get here because we may have eliminated them all. */ | |
72e3ec84 | 4761 | if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs)) |
54aceb26 | 4762 | continue; |
4763 | ||
75a70cf9 | 4764 | gimple_set_visited (stmt, true); |
dddf5036 | 4765 | linearize_expr_tree (&ops, stmt, true, true); |
f1f41a6c | 4766 | ops.qsort (sort_by_operand_rank); |
75a70cf9 | 4767 | optimize_ops_list (rhs_code, &ops); |
dddf5036 | 4768 | if (undistribute_ops_list (rhs_code, &ops, |
4769 | loop_containing_stmt (stmt))) | |
4770 | { | |
f1f41a6c | 4771 | ops.qsort (sort_by_operand_rank); |
dddf5036 | 4772 | optimize_ops_list (rhs_code, &ops); |
4773 | } | |
54aceb26 | 4774 | |
946e9eb4 | 4775 | if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) |
4776 | optimize_range_tests (rhs_code, &ops); | |
4777 | ||
8c5ac7f6 | 4778 | if (first_pass_instance |
4779 | && rhs_code == MULT_EXPR | |
4780 | && flag_unsafe_math_optimizations) | |
03d37e4e | 4781 | powi_result = attempt_builtin_powi (stmt, &ops); |
8c5ac7f6 | 4782 | |
97269507 | 4783 | /* If the operand vector is now empty, all operands were |
4784 | consumed by the __builtin_powi optimization. */ | |
f1f41a6c | 4785 | if (ops.length () == 0) |
97269507 | 4786 | transform_stmt_to_copy (&gsi, stmt, powi_result); |
f1f41a6c | 4787 | else if (ops.length () == 1) |
54aceb26 | 4788 | { |
f1f41a6c | 4789 | tree last_op = ops.last ()->op; |
97269507 | 4790 | |
4791 | if (powi_result) | |
4792 | transform_stmt_to_multiply (&gsi, stmt, last_op, | |
4793 | powi_result); | |
4794 | else | |
4795 | transform_stmt_to_copy (&gsi, stmt, last_op); | |
54aceb26 | 4796 | } |
4797 | else | |
5b1c765d | 4798 | { |
3754d046 | 4799 | machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); |
f1f41a6c | 4800 | int ops_num = ops.length (); |
5b1c765d | 4801 | int width = get_reassociation_width (ops_num, rhs_code, mode); |
82cb4e1c | 4802 | tree new_lhs = lhs; |
5b1c765d | 4803 | |
4804 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
4805 | fprintf (dump_file, | |
4806 | "Width = %d was chosen for reassociation\n", width); | |
4807 | ||
4808 | if (width > 1 | |
f1f41a6c | 4809 | && ops.length () > 3) |
1a91d914 | 4810 | rewrite_expr_tree_parallel (as_a <gassign *> (stmt), |
4811 | width, ops); | |
5b1c765d | 4812 | else |
a2bd0c99 | 4813 | { |
4814 | /* When there are three operands left, we want | |
4815 | to make sure the ones that get the double | |
4816 | binary op are chosen wisely. */ | |
4817 | int len = ops.length (); | |
4818 | if (len >= 3) | |
4819 | swap_ops_for_binary_stmt (ops, len - 3, stmt); | |
4820 | ||
82cb4e1c | 4821 | new_lhs = rewrite_expr_tree (stmt, 0, ops, |
4822 | powi_result != NULL); | |
a2bd0c99 | 4823 | } |
97269507 | 4824 | |
4825 | /* If we combined some repeated factors into a | |
4826 | __builtin_powi call, multiply that result by the | |
4827 | reassociated operands. */ | |
4828 | if (powi_result) | |
4829 | { | |
82cb4e1c | 4830 | gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs); |
4831 | tree type = TREE_TYPE (lhs); | |
03d37e4e | 4832 | tree target_ssa = make_temp_ssa_name (type, NULL, |
4833 | "reassocpow"); | |
82cb4e1c | 4834 | gimple_set_lhs (lhs_stmt, target_ssa); |
4835 | update_stmt (lhs_stmt); | |
4836 | if (lhs != new_lhs) | |
4837 | target_ssa = new_lhs; | |
e9cf809e | 4838 | mul_stmt = gimple_build_assign (lhs, MULT_EXPR, |
4839 | powi_result, target_ssa); | |
97269507 | 4840 | gimple_set_location (mul_stmt, gimple_location (stmt)); |
4841 | gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT); | |
4842 | } | |
5b1c765d | 4843 | } |
54aceb26 | 4844 | } |
4845 | } | |
4846 | } | |
4847 | for (son = first_dom_son (CDI_POST_DOMINATORS, bb); | |
4848 | son; | |
4849 | son = next_dom_son (CDI_POST_DOMINATORS, son)) | |
4850 | reassociate_bb (son); | |
4851 | } | |
4852 | ||
e3668db5 | 4853 | /* Add jumps around shifts for range tests turned into bit tests. |
4854 | For each SSA_NAME VAR we have code like: | |
4855 | VAR = ...; // final stmt of range comparison | |
4856 | // bit test here...; | |
4857 | OTHERVAR = ...; // final stmt of the bit test sequence | |
4858 | RES = VAR | OTHERVAR; | |
4859 | Turn the above into: | |
4860 | VAR = ...; | |
4861 | if (VAR != 0) | |
4862 | goto <l3>; | |
4863 | else | |
4864 | goto <l2>; | |
4865 | <l2>: | |
4866 | // bit test here...; | |
4867 | OTHERVAR = ...; | |
4868 | <l3>: | |
4869 | # RES = PHI<1(l1), OTHERVAR(l2)>; */ | |
4870 | ||
4871 | static void | |
4872 | branch_fixup (void) | |
4873 | { | |
4874 | tree var; | |
4875 | unsigned int i; | |
4876 | ||
4877 | FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var) | |
4878 | { | |
4879 | gimple def_stmt = SSA_NAME_DEF_STMT (var); | |
4880 | gimple use_stmt; | |
4881 | use_operand_p use; | |
4882 | bool ok = single_imm_use (var, &use, &use_stmt); | |
4883 | gcc_assert (ok | |
4884 | && is_gimple_assign (use_stmt) | |
4885 | && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR | |
4886 | && gimple_bb (def_stmt) == gimple_bb (use_stmt)); | |
4887 | ||
4888 | basic_block cond_bb = gimple_bb (def_stmt); | |
4889 | basic_block then_bb = split_block (cond_bb, def_stmt)->dest; | |
4890 | basic_block merge_bb = split_block (then_bb, use_stmt)->dest; | |
4891 | ||
4892 | gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt); | |
4893 | gimple g = gimple_build_cond (NE_EXPR, var, | |
4894 | build_zero_cst (TREE_TYPE (var)), | |
4895 | NULL_TREE, NULL_TREE); | |
4896 | location_t loc = gimple_location (use_stmt); | |
4897 | gimple_set_location (g, loc); | |
4898 | gsi_insert_after (&gsi, g, GSI_NEW_STMT); | |
4899 | ||
4900 | edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE); | |
4901 | etrue->probability = REG_BR_PROB_BASE / 2; | |
4902 | etrue->count = cond_bb->count / 2; | |
4903 | edge efalse = find_edge (cond_bb, then_bb); | |
4904 | efalse->flags = EDGE_FALSE_VALUE; | |
4905 | efalse->probability -= etrue->probability; | |
4906 | efalse->count -= etrue->count; | |
4907 | then_bb->count -= etrue->count; | |
4908 | ||
4909 | tree othervar = NULL_TREE; | |
4910 | if (gimple_assign_rhs1 (use_stmt) == var) | |
4911 | othervar = gimple_assign_rhs2 (use_stmt); | |
4912 | else if (gimple_assign_rhs2 (use_stmt) == var) | |
4913 | othervar = gimple_assign_rhs1 (use_stmt); | |
4914 | else | |
4915 | gcc_unreachable (); | |
4916 | tree lhs = gimple_assign_lhs (use_stmt); | |
1a91d914 | 4917 | gphi *phi = create_phi_node (lhs, merge_bb); |
e3668db5 | 4918 | add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc); |
4919 | add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc); | |
4920 | gsi = gsi_for_stmt (use_stmt); | |
4921 | gsi_remove (&gsi, true); | |
4922 | ||
4923 | set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb); | |
4924 | set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb); | |
4925 | } | |
4926 | reassoc_branch_fixups.release (); | |
4927 | } | |
4928 | ||
f1f41a6c | 4929 | void dump_ops_vector (FILE *file, vec<operand_entry_t> ops); |
4930 | void debug_ops_vector (vec<operand_entry_t> ops); | |
54aceb26 | 4931 | |
4932 | /* Dump the operand entry vector OPS to FILE. */ | |
4933 | ||
4934 | void | |
f1f41a6c | 4935 | dump_ops_vector (FILE *file, vec<operand_entry_t> ops) |
54aceb26 | 4936 | { |
4937 | operand_entry_t oe; | |
4938 | unsigned int i; | |
4939 | ||
f1f41a6c | 4940 | FOR_EACH_VEC_ELT (ops, i, oe) |
54aceb26 | 4941 | { |
4942 | fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank); | |
75a70cf9 | 4943 | print_generic_expr (file, oe->op, 0); |
54aceb26 | 4944 | } |
4945 | } | |
4946 | ||
4947 | /* Dump the operand entry vector OPS to STDERR. */ | |
4948 | ||
4b987fac | 4949 | DEBUG_FUNCTION void |
f1f41a6c | 4950 | debug_ops_vector (vec<operand_entry_t> ops) |
54aceb26 | 4951 | { |
4952 | dump_ops_vector (stderr, ops); | |
4953 | } | |
4954 | ||
4955 | static void | |
4956 | do_reassoc (void) | |
4957 | { | |
34154e27 | 4958 | break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
4959 | reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)); | |
54aceb26 | 4960 | } |
4961 | ||
4962 | /* Initialize the reassociation pass. */ | |
4963 | ||
4964 | static void | |
4965 | init_reassoc (void) | |
4966 | { | |
4967 | int i; | |
b30a8715 | 4968 | long rank = 2; |
a28770e1 | 4969 | int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); |
54aceb26 | 4970 | |
a4c3fb95 | 4971 | /* Find the loops, so that we can prevent moving calculations in |
4972 | them. */ | |
4973 | loop_optimizer_init (AVOID_CFG_MODIFICATIONS); | |
4974 | ||
54aceb26 | 4975 | memset (&reassociate_stats, 0, sizeof (reassociate_stats)); |
4976 | ||
17b5ea6f | 4977 | next_operand_entry_id = 0; |
54aceb26 | 4978 | |
4979 | /* Reverse RPO (Reverse Post Order) will give us something where | |
4980 | deeper loops come later. */ | |
6180f28d | 4981 | pre_and_rev_post_order_compute (NULL, bbs, false); |
fe672ac0 | 4982 | bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun)); |
06ecf488 | 4983 | operand_rank = new hash_map<tree, long>; |
54aceb26 | 4984 | |
61e371b0 | 4985 | /* Give each default definition a distinct rank. This includes |
4986 | parameters and the static chain. Walk backwards over all | |
4987 | SSA names so that we get proper rank ordering according | |
4988 | to tree_swap_operands_p. */ | |
4989 | for (i = num_ssa_names - 1; i > 0; --i) | |
54aceb26 | 4990 | { |
61e371b0 | 4991 | tree name = ssa_name (i); |
4992 | if (name && SSA_NAME_IS_DEFAULT_DEF (name)) | |
4993 | insert_operand_rank (name, ++rank); | |
54aceb26 | 4994 | } |
4995 | ||
4996 | /* Set up rank for each BB */ | |
a28770e1 | 4997 | for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++) |
54aceb26 | 4998 | bb_rank[bbs[i]] = ++rank << 16; |
4999 | ||
5000 | free (bbs); | |
54aceb26 | 5001 | calculate_dominance_info (CDI_POST_DOMINATORS); |
1e094109 | 5002 | plus_negates = vNULL; |
54aceb26 | 5003 | } |
5004 | ||
5005 | /* Cleanup after the reassociation pass, and print stats if | |
5006 | requested. */ | |
5007 | ||
5008 | static void | |
5009 | fini_reassoc (void) | |
5010 | { | |
581f8050 | 5011 | statistics_counter_event (cfun, "Linearized", |
5012 | reassociate_stats.linearized); | |
5013 | statistics_counter_event (cfun, "Constants eliminated", | |
5014 | reassociate_stats.constants_eliminated); | |
5015 | statistics_counter_event (cfun, "Ops eliminated", | |
5016 | reassociate_stats.ops_eliminated); | |
5017 | statistics_counter_event (cfun, "Statements rewritten", | |
5018 | reassociate_stats.rewritten); | |
8c5ac7f6 | 5019 | statistics_counter_event (cfun, "Built-in pow[i] calls encountered", |
5020 | reassociate_stats.pows_encountered); | |
5021 | statistics_counter_event (cfun, "Built-in powi calls created", | |
5022 | reassociate_stats.pows_created); | |
54aceb26 | 5023 | |
06ecf488 | 5024 | delete operand_rank; |
672758dc | 5025 | operand_entry_pool.release (); |
54aceb26 | 5026 | free (bb_rank); |
f1f41a6c | 5027 | plus_negates.release (); |
54aceb26 | 5028 | free_dominance_info (CDI_POST_DOMINATORS); |
a4c3fb95 | 5029 | loop_optimizer_finalize (); |
54aceb26 | 5030 | } |
5031 | ||
5032 | /* Gate and execute functions for Reassociation. */ | |
5033 | ||
2a1990e9 | 5034 | static unsigned int |
54aceb26 | 5035 | execute_reassoc (void) |
5036 | { | |
3dec5460 | 5037 | init_reassoc (); |
54aceb26 | 5038 | |
3dec5460 | 5039 | do_reassoc (); |
54aceb26 | 5040 | repropagate_negates (); |
e3668db5 | 5041 | branch_fixup (); |
54aceb26 | 5042 | |
3dec5460 | 5043 | fini_reassoc (); |
2a1990e9 | 5044 | return 0; |
3dec5460 | 5045 | } |
5046 | ||
cbe8bda8 | 5047 | namespace { |
5048 | ||
5049 | const pass_data pass_data_reassoc = | |
3dec5460 | 5050 | { |
cbe8bda8 | 5051 | GIMPLE_PASS, /* type */ |
5052 | "reassoc", /* name */ | |
5053 | OPTGROUP_NONE, /* optinfo_flags */ | |
cbe8bda8 | 5054 | TV_TREE_REASSOC, /* tv_id */ |
5055 | ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
5056 | 0, /* properties_provided */ | |
5057 | 0, /* properties_destroyed */ | |
5058 | 0, /* todo_flags_start */ | |
8b88439e | 5059 | TODO_update_ssa_only_virtuals, /* todo_flags_finish */ |
3dec5460 | 5060 | }; |
cbe8bda8 | 5061 | |
5062 | class pass_reassoc : public gimple_opt_pass | |
5063 | { | |
5064 | public: | |
9af5ce0c | 5065 | pass_reassoc (gcc::context *ctxt) |
5066 | : gimple_opt_pass (pass_data_reassoc, ctxt) | |
cbe8bda8 | 5067 | {} |
5068 | ||
5069 | /* opt_pass methods: */ | |
ae84f584 | 5070 | opt_pass * clone () { return new pass_reassoc (m_ctxt); } |
31315c24 | 5071 | virtual bool gate (function *) { return flag_tree_reassoc != 0; } |
65b0537f | 5072 | virtual unsigned int execute (function *) { return execute_reassoc (); } |
cbe8bda8 | 5073 | |
5074 | }; // class pass_reassoc | |
5075 | ||
5076 | } // anon namespace | |
5077 | ||
5078 | gimple_opt_pass * | |
5079 | make_pass_reassoc (gcc::context *ctxt) | |
5080 | { | |
5081 | return new pass_reassoc (ctxt); | |
5082 | } |