]>
Commit | Line | Data |
---|---|---|
012309e6 | 1 | /* Reassociation for trees. |
2c9da85b JJ |
2 | Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011 |
3 | Free Software Foundation, Inc. | |
012309e6 DB |
4 | Contributed by Daniel Berlin <dan@dberlin.org> |
5 | ||
6 | This file is part of GCC. | |
7 | ||
8 | GCC is free software; you can redistribute it and/or modify | |
9 | it under the terms of the GNU General Public License as published by | |
9dcd6f09 | 10 | the Free Software Foundation; either version 3, or (at your option) |
012309e6 DB |
11 | any later version. |
12 | ||
13 | GCC is distributed in the hope that it will be useful, | |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
19 | along with GCC; see the file COPYING3. If not see |
20 | <http://www.gnu.org/licenses/>. */ | |
012309e6 DB |
21 | |
22 | #include "config.h" | |
23 | #include "system.h" | |
24 | #include "coretypes.h" | |
25 | #include "tm.h" | |
012309e6 DB |
26 | #include "tree.h" |
27 | #include "basic-block.h" | |
cf835838 | 28 | #include "gimple-pretty-print.h" |
012309e6 DB |
29 | #include "tree-inline.h" |
30 | #include "tree-flow.h" | |
726a989a | 31 | #include "gimple.h" |
012309e6 DB |
32 | #include "tree-iterator.h" |
33 | #include "tree-pass.h" | |
0e0ed594 JL |
34 | #include "alloc-pool.h" |
35 | #include "vec.h" | |
36 | #include "langhooks.h" | |
15814ba0 | 37 | #include "pointer-set.h" |
7a9c7d01 | 38 | #include "cfgloop.h" |
2dc0f633 | 39 | #include "flags.h" |
df7b0cc4 EI |
40 | #include "target.h" |
41 | #include "params.h" | |
0ccb5dbf | 42 | #include "diagnostic-core.h" |
012309e6 | 43 | |
0e0ed594 JL |
44 | /* This is a simple global reassociation pass. It is, in part, based |
45 | on the LLVM pass of the same name (They do some things more/less | |
46 | than we do, in different orders, etc). | |
012309e6 | 47 | |
0e0ed594 | 48 | It consists of five steps: |
012309e6 | 49 | |
0e0ed594 JL |
50 | 1. Breaking up subtract operations into addition + negate, where |
51 | it would promote the reassociation of adds. | |
012309e6 | 52 | |
0e0ed594 JL |
53 | 2. Left linearization of the expression trees, so that (A+B)+(C+D) |
54 | becomes (((A+B)+C)+D), which is easier for us to rewrite later. | |
55 | During linearization, we place the operands of the binary | |
56 | expressions into a vector of operand_entry_t | |
012309e6 | 57 | |
0e0ed594 JL |
58 | 3. Optimization of the operand lists, eliminating things like a + |
59 | -a, a & a, etc. | |
012309e6 | 60 | |
a6f8851e BS |
61 | 3a. Combine repeated factors with the same occurrence counts |
62 | into a __builtin_powi call that will later be optimized into | |
63 | an optimal number of multiplies. | |
64 | ||
0e0ed594 JL |
65 | 4. Rewrite the expression trees we linearized and optimized so |
66 | they are in proper rank order. | |
012309e6 | 67 | |
0e0ed594 | 68 | 5. Repropagate negates, as nothing else will clean it up ATM. |
012309e6 | 69 | |
0e0ed594 JL |
70 | A bit of theory on #4, since nobody seems to write anything down |
71 | about why it makes sense to do it the way they do it: | |
012309e6 | 72 | |
0e0ed594 JL |
73 | We could do this much nicer theoretically, but don't (for reasons |
74 | explained after how to do it theoretically nice :P). | |
012309e6 | 75 | |
0e0ed594 JL |
76 | In order to promote the most redundancy elimination, you want |
77 | binary expressions whose operands are the same rank (or | |
6416ae7f | 78 | preferably, the same value) exposed to the redundancy eliminator, |
0e0ed594 | 79 | for possible elimination. |
012309e6 | 80 | |
0e0ed594 JL |
81 | So the way to do this if we really cared, is to build the new op |
82 | tree from the leaves to the roots, merging as you go, and putting the | |
83 | new op on the end of the worklist, until you are left with one | |
84 | thing on the worklist. | |
012309e6 | 85 | |
0e0ed594 JL |
86 | IE if you have to rewrite the following set of operands (listed with |
87 | rank in parentheses), with opcode PLUS_EXPR: | |
012309e6 | 88 | |
0e0ed594 | 89 | a (1), b (1), c (1), d (2), e (2) |
012309e6 | 90 | |
012309e6 | 91 | |
0e0ed594 JL |
92 | We start with our merge worklist empty, and the ops list with all of |
93 | those on it. | |
012309e6 | 94 | |
0e0ed594 JL |
95 | You want to first merge all leaves of the same rank, as much as |
96 | possible. | |
97 | ||
98 | So first build a binary op of | |
99 | ||
100 | mergetmp = a + b, and put "mergetmp" on the merge worklist. | |
101 | ||
102 | Because there is no three operand form of PLUS_EXPR, c is not going to | |
103 | be exposed to redundancy elimination as a rank 1 operand. | |
104 | ||
105 | So you might as well throw it on the merge worklist (you could also | |
106 | consider it to now be a rank two operand, and merge it with d and e, | |
107 | but in this case, you then have evicted e from a binary op. So at | |
108 | least in this situation, you can't win.) | |
109 | ||
110 | Then build a binary op of d + e | |
111 | mergetmp2 = d + e | |
112 | ||
113 | and put mergetmp2 on the merge worklist. | |
b8698a0f | 114 | |
0e0ed594 | 115 | so merge worklist = {mergetmp, c, mergetmp2} |
b8698a0f | 116 | |
0e0ed594 JL |
117 | Continue building binary ops of these operations until you have only |
118 | one operation left on the worklist. | |
b8698a0f | 119 | |
0e0ed594 | 120 | So we have |
b8698a0f | 121 | |
0e0ed594 JL |
122 | build binary op |
123 | mergetmp3 = mergetmp + c | |
b8698a0f | 124 | |
0e0ed594 | 125 | worklist = {mergetmp2, mergetmp3} |
b8698a0f | 126 | |
0e0ed594 | 127 | mergetmp4 = mergetmp2 + mergetmp3 |
b8698a0f | 128 | |
0e0ed594 | 129 | worklist = {mergetmp4} |
b8698a0f | 130 | |
0e0ed594 JL |
131 | because we have one operation left, we can now just set the original |
132 | statement equal to the result of that operation. | |
b8698a0f | 133 | |
0e0ed594 JL |
134 | This will at least expose a + b and d + e to redundancy elimination |
135 | as binary operations. | |
b8698a0f | 136 | |
0e0ed594 JL |
137 | For extra points, you can reuse the old statements to build the |
138 | mergetmps, since you shouldn't run out. | |
139 | ||
140 | So why don't we do this? | |
b8698a0f | 141 | |
0e0ed594 JL |
142 | Because it's expensive, and rarely will help. Most trees we are |
143 | reassociating have 3 or less ops. If they have 2 ops, they already | |
144 | will be written into a nice single binary op. If you have 3 ops, a | |
145 | single simple check suffices to tell you whether the first two are of the | |
146 | same rank. If so, you know to order it | |
147 | ||
148 | mergetmp = op1 + op2 | |
149 | newstmt = mergetmp + op3 | |
b8698a0f | 150 | |
0e0ed594 JL |
151 | instead of |
152 | mergetmp = op2 + op3 | |
153 | newstmt = mergetmp + op1 | |
b8698a0f | 154 | |
0e0ed594 JL |
155 | If all three are of the same rank, you can't expose them all in a |
156 | single binary operator anyway, so the above is *still* the best you | |
157 | can do. | |
b8698a0f | 158 | |
0e0ed594 JL |
159 | Thus, this is what we do. When we have three ops left, we check to see |
160 | what order to put them in, and call it a day. As a nod to vector sum | |
1d72ff1a | 161 | reduction, we check if any of the ops are really a phi node that is a |
0e0ed594 JL |
162 | destructive update for the associating op, and keep the destructive |
163 | update together for vector sum reduction recognition. */ | |
012309e6 | 164 | |
012309e6 | 165 | |
0e0ed594 JL |
166 | /* Statistics */ |
167 | static struct | |
168 | { | |
169 | int linearized; | |
170 | int constants_eliminated; | |
171 | int ops_eliminated; | |
172 | int rewritten; | |
a6f8851e BS |
173 | int pows_encountered; |
174 | int pows_created; | |
0e0ed594 | 175 | } reassociate_stats; |
012309e6 | 176 | |
0e0ed594 JL |
177 | /* Operator, rank pair. */ |
178 | typedef struct operand_entry | |
012309e6 | 179 | { |
0e0ed594 | 180 | unsigned int rank; |
f1665f5c | 181 | int id; |
0e0ed594 | 182 | tree op; |
a6f8851e | 183 | unsigned int count; |
0e0ed594 JL |
184 | } *operand_entry_t; |
185 | ||
186 | static alloc_pool operand_entry_pool; | |
187 | ||
f1665f5c DK |
188 | /* This is used to assign a unique ID to each struct operand_entry |
189 | so that qsort results are identical on different hosts. */ | |
190 | static int next_operand_entry_id; | |
012309e6 DB |
191 | |
192 | /* Starting rank number for a given basic block, so that we can rank | |
193 | operations using unmovable instructions in that BB based on the bb | |
194 | depth. */ | |
15814ba0 | 195 | static long *bb_rank; |
012309e6 | 196 | |
0e0ed594 | 197 | /* Operand->rank hashtable. */ |
15814ba0 | 198 | static struct pointer_map_t *operand_rank; |
012309e6 | 199 | |
a3059635 BS |
200 | /* Forward decls. */ |
201 | static long get_rank (tree); | |
202 | ||
203 | ||
204 | /* Bias amount for loop-carried phis. We want this to be larger than | |
205 | the depth of any reassociation tree we can see, but not larger than | |
206 | the rank difference between two blocks. */ | |
207 | #define PHI_LOOP_BIAS (1 << 15) | |
208 | ||
209 | /* Rank assigned to a phi statement. If STMT is a loop-carried phi of | |
210 | an innermost loop, and the phi has only a single use which is inside | |
211 | the loop, then the rank is the block rank of the loop latch plus an | |
212 | extra bias for the loop-carried dependence. This causes expressions | |
213 | calculated into an accumulator variable to be independent for each | |
214 | iteration of the loop. If STMT is some other phi, the rank is the | |
215 | block rank of its containing block. */ | |
216 | static long | |
217 | phi_rank (gimple stmt) | |
218 | { | |
219 | basic_block bb = gimple_bb (stmt); | |
220 | struct loop *father = bb->loop_father; | |
221 | tree res; | |
222 | unsigned i; | |
223 | use_operand_p use; | |
224 | gimple use_stmt; | |
225 | ||
226 | /* We only care about real loops (those with a latch). */ | |
227 | if (!father->latch) | |
228 | return bb_rank[bb->index]; | |
229 | ||
230 | /* Interesting phis must be in headers of innermost loops. */ | |
231 | if (bb != father->header | |
232 | || father->inner) | |
233 | return bb_rank[bb->index]; | |
234 | ||
235 | /* Ignore virtual SSA_NAMEs. */ | |
236 | res = gimple_phi_result (stmt); | |
ea057359 | 237 | if (virtual_operand_p (res)) |
a3059635 BS |
238 | return bb_rank[bb->index]; |
239 | ||
240 | /* The phi definition must have a single use, and that use must be | |
241 | within the loop. Otherwise this isn't an accumulator pattern. */ | |
242 | if (!single_imm_use (res, &use, &use_stmt) | |
243 | || gimple_bb (use_stmt)->loop_father != father) | |
244 | return bb_rank[bb->index]; | |
245 | ||
246 | /* Look for phi arguments from within the loop. If found, bias this phi. */ | |
247 | for (i = 0; i < gimple_phi_num_args (stmt); i++) | |
248 | { | |
249 | tree arg = gimple_phi_arg_def (stmt, i); | |
250 | if (TREE_CODE (arg) == SSA_NAME | |
251 | && !SSA_NAME_IS_DEFAULT_DEF (arg)) | |
252 | { | |
253 | gimple def_stmt = SSA_NAME_DEF_STMT (arg); | |
254 | if (gimple_bb (def_stmt)->loop_father == father) | |
255 | return bb_rank[father->latch->index] + PHI_LOOP_BIAS; | |
256 | } | |
257 | } | |
258 | ||
259 | /* Must be an uninteresting phi. */ | |
260 | return bb_rank[bb->index]; | |
261 | } | |
262 | ||
263 | /* If EXP is an SSA_NAME defined by a PHI statement that represents a | |
264 | loop-carried dependence of an innermost loop, return TRUE; else | |
265 | return FALSE. */ | |
266 | static bool | |
267 | loop_carried_phi (tree exp) | |
268 | { | |
269 | gimple phi_stmt; | |
270 | long block_rank; | |
271 | ||
272 | if (TREE_CODE (exp) != SSA_NAME | |
273 | || SSA_NAME_IS_DEFAULT_DEF (exp)) | |
274 | return false; | |
275 | ||
276 | phi_stmt = SSA_NAME_DEF_STMT (exp); | |
277 | ||
278 | if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI) | |
279 | return false; | |
280 | ||
281 | /* Non-loop-carried phis have block rank. Loop-carried phis have | |
282 | an additional bias added in. If this phi doesn't have block rank, | |
283 | it's biased and should not be propagated. */ | |
284 | block_rank = bb_rank[gimple_bb (phi_stmt)->index]; | |
285 | ||
286 | if (phi_rank (phi_stmt) != block_rank) | |
287 | return true; | |
288 | ||
289 | return false; | |
290 | } | |
291 | ||
292 | /* Return the maximum of RANK and the rank that should be propagated | |
293 | from expression OP. For most operands, this is just the rank of OP. | |
294 | For loop-carried phis, the value is zero to avoid undoing the bias | |
295 | in favor of the phi. */ | |
296 | static long | |
297 | propagate_rank (long rank, tree op) | |
298 | { | |
299 | long op_rank; | |
300 | ||
301 | if (loop_carried_phi (op)) | |
302 | return rank; | |
303 | ||
304 | op_rank = get_rank (op); | |
305 | ||
306 | return MAX (rank, op_rank); | |
307 | } | |
012309e6 | 308 | |
0e0ed594 | 309 | /* Look up the operand rank structure for expression E. */ |
012309e6 | 310 | |
15814ba0 | 311 | static inline long |
0e0ed594 | 312 | find_operand_rank (tree e) |
012309e6 | 313 | { |
15814ba0 | 314 | void **slot = pointer_map_contains (operand_rank, e); |
34c6743c | 315 | return slot ? (long) (intptr_t) *slot : -1; |
012309e6 DB |
316 | } |
317 | ||
0e0ed594 | 318 | /* Insert {E,RANK} into the operand rank hashtable. */ |
012309e6 | 319 | |
15814ba0 PB |
320 | static inline void |
321 | insert_operand_rank (tree e, long rank) | |
012309e6 DB |
322 | { |
323 | void **slot; | |
15814ba0 PB |
324 | gcc_assert (rank > 0); |
325 | slot = pointer_map_insert (operand_rank, e); | |
326 | gcc_assert (!*slot); | |
34c6743c | 327 | *slot = (void *) (intptr_t) rank; |
012309e6 DB |
328 | } |
329 | ||
012309e6 DB |
330 | /* Given an expression E, return the rank of the expression. */ |
331 | ||
15814ba0 | 332 | static long |
012309e6 DB |
333 | get_rank (tree e) |
334 | { | |
0e0ed594 | 335 | /* Constants have rank 0. */ |
012309e6 DB |
336 | if (is_gimple_min_invariant (e)) |
337 | return 0; | |
0e0ed594 | 338 | |
012309e6 DB |
339 | /* SSA_NAME's have the rank of the expression they are the result |
340 | of. | |
341 | For globals and uninitialized values, the rank is 0. | |
342 | For function arguments, use the pre-setup rank. | |
343 | For PHI nodes, stores, asm statements, etc, we use the rank of | |
344 | the BB. | |
345 | For simple operations, the rank is the maximum rank of any of | |
346 | its operands, or the bb_rank, whichever is less. | |
347 | I make no claims that this is optimal, however, it gives good | |
348 | results. */ | |
349 | ||
a3059635 BS |
350 | /* We make an exception to the normal ranking system to break |
351 | dependences of accumulator variables in loops. Suppose we | |
352 | have a simple one-block loop containing: | |
353 | ||
354 | x_1 = phi(x_0, x_2) | |
355 | b = a + x_1 | |
356 | c = b + d | |
357 | x_2 = c + e | |
358 | ||
359 | As shown, each iteration of the calculation into x is fully | |
360 | dependent upon the iteration before it. We would prefer to | |
361 | see this in the form: | |
362 | ||
363 | x_1 = phi(x_0, x_2) | |
364 | b = a + d | |
365 | c = b + e | |
366 | x_2 = c + x_1 | |
367 | ||
368 | If the loop is unrolled, the calculations of b and c from | |
369 | different iterations can be interleaved. | |
370 | ||
371 | To obtain this result during reassociation, we bias the rank | |
372 | of the phi definition x_1 upward, when it is recognized as an | |
373 | accumulator pattern. The artificial rank causes it to be | |
374 | added last, providing the desired independence. */ | |
375 | ||
012309e6 DB |
376 | if (TREE_CODE (e) == SSA_NAME) |
377 | { | |
726a989a | 378 | gimple stmt; |
777a4e9a | 379 | long rank; |
726a989a | 380 | int i, n; |
a3059635 | 381 | tree op; |
0e0ed594 | 382 | |
be7493ca | 383 | if (SSA_NAME_IS_DEFAULT_DEF (e)) |
15814ba0 | 384 | return find_operand_rank (e); |
0e0ed594 | 385 | |
012309e6 | 386 | stmt = SSA_NAME_DEF_STMT (e); |
a3059635 BS |
387 | if (gimple_code (stmt) == GIMPLE_PHI) |
388 | return phi_rank (stmt); | |
389 | ||
726a989a | 390 | if (!is_gimple_assign (stmt) |
5006671f | 391 | || gimple_vdef (stmt)) |
726a989a | 392 | return bb_rank[gimple_bb (stmt)->index]; |
012309e6 DB |
393 | |
394 | /* If we already have a rank for this expression, use that. */ | |
15814ba0 PB |
395 | rank = find_operand_rank (e); |
396 | if (rank != -1) | |
397 | return rank; | |
012309e6 | 398 | |
a3059635 BS |
399 | /* Otherwise, find the maximum rank for the operands. As an |
400 | exception, remove the bias from loop-carried phis when propagating | |
401 | the rank so that dependent operations are not also biased. */ | |
012309e6 | 402 | rank = 0; |
726a989a RB |
403 | if (gimple_assign_single_p (stmt)) |
404 | { | |
405 | tree rhs = gimple_assign_rhs1 (stmt); | |
406 | n = TREE_OPERAND_LENGTH (rhs); | |
407 | if (n == 0) | |
a3059635 | 408 | rank = propagate_rank (rank, rhs); |
726a989a RB |
409 | else |
410 | { | |
777a4e9a | 411 | for (i = 0; i < n; i++) |
a3059635 BS |
412 | { |
413 | op = TREE_OPERAND (rhs, i); | |
414 | ||
415 | if (op != NULL_TREE) | |
416 | rank = propagate_rank (rank, op); | |
417 | } | |
726a989a RB |
418 | } |
419 | } | |
0e0ed594 | 420 | else |
012309e6 | 421 | { |
726a989a | 422 | n = gimple_num_ops (stmt); |
777a4e9a | 423 | for (i = 1; i < n; i++) |
726a989a | 424 | { |
a3059635 BS |
425 | op = gimple_op (stmt, i); |
426 | gcc_assert (op); | |
427 | rank = propagate_rank (rank, op); | |
726a989a | 428 | } |
012309e6 | 429 | } |
0e0ed594 | 430 | |
012309e6 DB |
431 | if (dump_file && (dump_flags & TDF_DETAILS)) |
432 | { | |
433 | fprintf (dump_file, "Rank for "); | |
434 | print_generic_expr (dump_file, e, 0); | |
15814ba0 | 435 | fprintf (dump_file, " is %ld\n", (rank + 1)); |
012309e6 | 436 | } |
0e0ed594 | 437 | |
012309e6 | 438 | /* Note the rank in the hashtable so we don't recompute it. */ |
0e0ed594 | 439 | insert_operand_rank (e, (rank + 1)); |
012309e6 DB |
440 | return (rank + 1); |
441 | } | |
442 | ||
443 | /* Globals, etc, are rank 0 */ | |
444 | return 0; | |
445 | } | |
446 | ||
0e0ed594 JL |
447 | DEF_VEC_P(operand_entry_t); |
448 | DEF_VEC_ALLOC_P(operand_entry_t, heap); | |
449 | ||
450 | /* We want integer ones to end up last no matter what, since they are | |
451 | the ones we can do the most with. */ | |
452 | #define INTEGER_CONST_TYPE 1 << 3 | |
453 | #define FLOAT_CONST_TYPE 1 << 2 | |
454 | #define OTHER_CONST_TYPE 1 << 1 | |
455 | ||
456 | /* Classify an invariant tree into integer, float, or other, so that | |
457 | we can sort them to be near other constants of the same type. */ | |
458 | static inline int | |
459 | constant_type (tree t) | |
460 | { | |
461 | if (INTEGRAL_TYPE_P (TREE_TYPE (t))) | |
462 | return INTEGER_CONST_TYPE; | |
463 | else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) | |
464 | return FLOAT_CONST_TYPE; | |
465 | else | |
466 | return OTHER_CONST_TYPE; | |
467 | } | |
468 | ||
469 | /* qsort comparison function to sort operand entries PA and PB by rank | |
470 | so that the sorted array is ordered by rank in decreasing order. */ | |
471 | static int | |
472 | sort_by_operand_rank (const void *pa, const void *pb) | |
473 | { | |
474 | const operand_entry_t oea = *(const operand_entry_t *)pa; | |
475 | const operand_entry_t oeb = *(const operand_entry_t *)pb; | |
476 | ||
477 | /* It's nicer for optimize_expression if constants that are likely | |
478 | to fold when added/multiplied//whatever are put next to each | |
479 | other. Since all constants have rank 0, order them by type. */ | |
be7493ca | 480 | if (oeb->rank == 0 && oea->rank == 0) |
f1665f5c DK |
481 | { |
482 | if (constant_type (oeb->op) != constant_type (oea->op)) | |
483 | return constant_type (oeb->op) - constant_type (oea->op); | |
484 | else | |
485 | /* To make sorting result stable, we use unique IDs to determine | |
486 | order. */ | |
487 | return oeb->id - oea->id; | |
488 | } | |
0e0ed594 JL |
489 | |
490 | /* Lastly, make sure the versions that are the same go next to each | |
491 | other. We use SSA_NAME_VERSION because it's stable. */ | |
492 | if ((oeb->rank - oea->rank == 0) | |
493 | && TREE_CODE (oea->op) == SSA_NAME | |
494 | && TREE_CODE (oeb->op) == SSA_NAME) | |
f1665f5c DK |
495 | { |
496 | if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) | |
497 | return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op); | |
498 | else | |
499 | return oeb->id - oea->id; | |
500 | } | |
0e0ed594 | 501 | |
f1665f5c DK |
502 | if (oeb->rank != oea->rank) |
503 | return oeb->rank - oea->rank; | |
504 | else | |
505 | return oeb->id - oea->id; | |
0e0ed594 JL |
506 | } |
507 | ||
508 | /* Add an operand entry to *OPS for the tree operand OP. */ | |
509 | ||
510 | static void | |
511 | add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op) | |
512 | { | |
c22940cd | 513 | operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); |
0e0ed594 JL |
514 | |
515 | oe->op = op; | |
516 | oe->rank = get_rank (op); | |
f1665f5c | 517 | oe->id = next_operand_entry_id++; |
a6f8851e | 518 | oe->count = 1; |
0e0ed594 JL |
519 | VEC_safe_push (operand_entry_t, heap, *ops, oe); |
520 | } | |
012309e6 | 521 | |
a6f8851e BS |
522 | /* Add an operand entry to *OPS for the tree operand OP with repeat |
523 | count REPEAT. */ | |
524 | ||
525 | static void | |
526 | add_repeat_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op, | |
527 | HOST_WIDE_INT repeat) | |
528 | { | |
529 | operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); | |
530 | ||
531 | oe->op = op; | |
532 | oe->rank = get_rank (op); | |
533 | oe->id = next_operand_entry_id++; | |
534 | oe->count = repeat; | |
535 | VEC_safe_push (operand_entry_t, heap, *ops, oe); | |
536 | ||
537 | reassociate_stats.pows_encountered++; | |
538 | } | |
539 | ||
0e0ed594 | 540 | /* Return true if STMT is reassociable operation containing a binary |
7a9c7d01 | 541 | operation with tree code CODE, and is inside LOOP. */ |
012309e6 DB |
542 | |
543 | static bool | |
726a989a | 544 | is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop) |
0e0ed594 | 545 | { |
726a989a | 546 | basic_block bb = gimple_bb (stmt); |
7a9c7d01 | 547 | |
726a989a | 548 | if (gimple_bb (stmt) == NULL) |
7a9c7d01 ZD |
549 | return false; |
550 | ||
7a9c7d01 ZD |
551 | if (!flow_bb_inside_loop_p (loop, bb)) |
552 | return false; | |
553 | ||
726a989a RB |
554 | if (is_gimple_assign (stmt) |
555 | && gimple_assign_rhs_code (stmt) == code | |
556 | && has_single_use (gimple_assign_lhs (stmt))) | |
012309e6 | 557 | return true; |
726a989a | 558 | |
0e0ed594 JL |
559 | return false; |
560 | } | |
561 | ||
562 | ||
563 | /* Given NAME, if NAME is defined by a unary operation OPCODE, return the | |
564 | operand of the negate operation. Otherwise, return NULL. */ | |
565 | ||
566 | static tree | |
567 | get_unary_op (tree name, enum tree_code opcode) | |
568 | { | |
726a989a | 569 | gimple stmt = SSA_NAME_DEF_STMT (name); |
0e0ed594 | 570 | |
726a989a | 571 | if (!is_gimple_assign (stmt)) |
0e0ed594 JL |
572 | return NULL_TREE; |
573 | ||
726a989a RB |
574 | if (gimple_assign_rhs_code (stmt) == opcode) |
575 | return gimple_assign_rhs1 (stmt); | |
0e0ed594 JL |
576 | return NULL_TREE; |
577 | } | |
578 | ||
579 | /* If CURR and LAST are a pair of ops that OPCODE allows us to | |
580 | eliminate through equivalences, do so, remove them from OPS, and | |
581 | return true. Otherwise, return false. */ | |
582 | ||
583 | static bool | |
584 | eliminate_duplicate_pair (enum tree_code opcode, | |
585 | VEC (operand_entry_t, heap) **ops, | |
586 | bool *all_done, | |
587 | unsigned int i, | |
588 | operand_entry_t curr, | |
589 | operand_entry_t last) | |
590 | { | |
591 | ||
e969dbde AP |
592 | /* If we have two of the same op, and the opcode is & |, min, or max, |
593 | we can eliminate one of them. | |
0e0ed594 JL |
594 | If we have two of the same op, and the opcode is ^, we can |
595 | eliminate both of them. */ | |
012309e6 | 596 | |
0e0ed594 | 597 | if (last && last->op == curr->op) |
012309e6 | 598 | { |
0e0ed594 JL |
599 | switch (opcode) |
600 | { | |
e969dbde AP |
601 | case MAX_EXPR: |
602 | case MIN_EXPR: | |
0e0ed594 JL |
603 | case BIT_IOR_EXPR: |
604 | case BIT_AND_EXPR: | |
605 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
606 | { | |
607 | fprintf (dump_file, "Equivalence: "); | |
608 | print_generic_expr (dump_file, curr->op, 0); | |
e969dbde | 609 | fprintf (dump_file, " [&|minmax] "); |
0e0ed594 JL |
610 | print_generic_expr (dump_file, last->op, 0); |
611 | fprintf (dump_file, " -> "); | |
612 | print_generic_stmt (dump_file, last->op, 0); | |
613 | } | |
614 | ||
615 | VEC_ordered_remove (operand_entry_t, *ops, i); | |
616 | reassociate_stats.ops_eliminated ++; | |
617 | ||
618 | return true; | |
619 | ||
620 | case BIT_XOR_EXPR: | |
621 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
622 | { | |
623 | fprintf (dump_file, "Equivalence: "); | |
624 | print_generic_expr (dump_file, curr->op, 0); | |
625 | fprintf (dump_file, " ^ "); | |
626 | print_generic_expr (dump_file, last->op, 0); | |
627 | fprintf (dump_file, " -> nothing\n"); | |
628 | } | |
629 | ||
630 | reassociate_stats.ops_eliminated += 2; | |
631 | ||
632 | if (VEC_length (operand_entry_t, *ops) == 2) | |
633 | { | |
634 | VEC_free (operand_entry_t, heap, *ops); | |
635 | *ops = NULL; | |
e8160c9a | 636 | add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op))); |
0e0ed594 JL |
637 | *all_done = true; |
638 | } | |
639 | else | |
640 | { | |
641 | VEC_ordered_remove (operand_entry_t, *ops, i-1); | |
642 | VEC_ordered_remove (operand_entry_t, *ops, i-1); | |
643 | } | |
644 | ||
645 | return true; | |
646 | ||
647 | default: | |
648 | break; | |
649 | } | |
650 | } | |
012309e6 DB |
651 | return false; |
652 | } | |
653 | ||
b0aef8a8 MK |
654 | static VEC(tree, heap) *plus_negates; |
655 | ||
44741f03 AM |
656 | /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not |
657 | expression, look in OPS for a corresponding positive operation to cancel | |
658 | it out. If we find one, remove the other from OPS, replace | |
659 | OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise, | |
660 | return false. */ | |
012309e6 DB |
661 | |
662 | static bool | |
0e0ed594 JL |
663 | eliminate_plus_minus_pair (enum tree_code opcode, |
664 | VEC (operand_entry_t, heap) **ops, | |
665 | unsigned int currindex, | |
666 | operand_entry_t curr) | |
667 | { | |
668 | tree negateop; | |
44741f03 | 669 | tree notop; |
0e0ed594 JL |
670 | unsigned int i; |
671 | operand_entry_t oe; | |
672 | ||
673 | if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) | |
012309e6 | 674 | return false; |
0e0ed594 JL |
675 | |
676 | negateop = get_unary_op (curr->op, NEGATE_EXPR); | |
44741f03 AM |
677 | notop = get_unary_op (curr->op, BIT_NOT_EXPR); |
678 | if (negateop == NULL_TREE && notop == NULL_TREE) | |
0e0ed594 JL |
679 | return false; |
680 | ||
681 | /* Any non-negated version will have a rank that is one less than | |
682 | the current rank. So once we hit those ranks, if we don't find | |
683 | one, we can stop. */ | |
684 | ||
685 | for (i = currindex + 1; | |
686 | VEC_iterate (operand_entry_t, *ops, i, oe) | |
687 | && oe->rank >= curr->rank - 1 ; | |
688 | i++) | |
012309e6 | 689 | { |
0e0ed594 JL |
690 | if (oe->op == negateop) |
691 | { | |
692 | ||
693 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
694 | { | |
695 | fprintf (dump_file, "Equivalence: "); | |
696 | print_generic_expr (dump_file, negateop, 0); | |
697 | fprintf (dump_file, " + -"); | |
698 | print_generic_expr (dump_file, oe->op, 0); | |
699 | fprintf (dump_file, " -> 0\n"); | |
700 | } | |
701 | ||
702 | VEC_ordered_remove (operand_entry_t, *ops, i); | |
e8160c9a | 703 | add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op))); |
0e0ed594 JL |
704 | VEC_ordered_remove (operand_entry_t, *ops, currindex); |
705 | reassociate_stats.ops_eliminated ++; | |
706 | ||
44741f03 AM |
707 | return true; |
708 | } | |
709 | else if (oe->op == notop) | |
710 | { | |
711 | tree op_type = TREE_TYPE (oe->op); | |
712 | ||
713 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
714 | { | |
715 | fprintf (dump_file, "Equivalence: "); | |
716 | print_generic_expr (dump_file, notop, 0); | |
717 | fprintf (dump_file, " + ~"); | |
718 | print_generic_expr (dump_file, oe->op, 0); | |
719 | fprintf (dump_file, " -> -1\n"); | |
720 | } | |
721 | ||
722 | VEC_ordered_remove (operand_entry_t, *ops, i); | |
723 | add_to_ops_vec (ops, build_int_cst_type (op_type, -1)); | |
724 | VEC_ordered_remove (operand_entry_t, *ops, currindex); | |
725 | reassociate_stats.ops_eliminated ++; | |
726 | ||
0e0ed594 JL |
727 | return true; |
728 | } | |
012309e6 DB |
729 | } |
730 | ||
b0aef8a8 MK |
731 | /* CURR->OP is a negate expr in a plus expr: save it for later |
732 | inspection in repropagate_negates(). */ | |
44741f03 AM |
733 | if (negateop != NULL_TREE) |
734 | VEC_safe_push (tree, heap, plus_negates, curr->op); | |
b0aef8a8 | 735 | |
0e0ed594 JL |
736 | return false; |
737 | } | |
738 | ||
739 | /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a | |
740 | bitwise not expression, look in OPS for a corresponding operand to | |
741 | cancel it out. If we find one, remove the other from OPS, replace | |
742 | OPS[CURRINDEX] with 0, and return true. Otherwise, return | |
743 | false. */ | |
744 | ||
745 | static bool | |
746 | eliminate_not_pairs (enum tree_code opcode, | |
747 | VEC (operand_entry_t, heap) **ops, | |
748 | unsigned int currindex, | |
749 | operand_entry_t curr) | |
750 | { | |
751 | tree notop; | |
752 | unsigned int i; | |
753 | operand_entry_t oe; | |
754 | ||
755 | if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) | |
756 | || TREE_CODE (curr->op) != SSA_NAME) | |
757 | return false; | |
758 | ||
759 | notop = get_unary_op (curr->op, BIT_NOT_EXPR); | |
760 | if (notop == NULL_TREE) | |
761 | return false; | |
762 | ||
763 | /* Any non-not version will have a rank that is one less than | |
764 | the current rank. So once we hit those ranks, if we don't find | |
765 | one, we can stop. */ | |
766 | ||
767 | for (i = currindex + 1; | |
768 | VEC_iterate (operand_entry_t, *ops, i, oe) | |
769 | && oe->rank >= curr->rank - 1; | |
770 | i++) | |
012309e6 | 771 | { |
0e0ed594 | 772 | if (oe->op == notop) |
012309e6 | 773 | { |
0e0ed594 | 774 | if (dump_file && (dump_flags & TDF_DETAILS)) |
012309e6 | 775 | { |
0e0ed594 JL |
776 | fprintf (dump_file, "Equivalence: "); |
777 | print_generic_expr (dump_file, notop, 0); | |
778 | if (opcode == BIT_AND_EXPR) | |
779 | fprintf (dump_file, " & ~"); | |
780 | else if (opcode == BIT_IOR_EXPR) | |
781 | fprintf (dump_file, " | ~"); | |
782 | print_generic_expr (dump_file, oe->op, 0); | |
783 | if (opcode == BIT_AND_EXPR) | |
784 | fprintf (dump_file, " -> 0\n"); | |
785 | else if (opcode == BIT_IOR_EXPR) | |
786 | fprintf (dump_file, " -> -1\n"); | |
012309e6 | 787 | } |
0e0ed594 JL |
788 | |
789 | if (opcode == BIT_AND_EXPR) | |
e8160c9a | 790 | oe->op = build_zero_cst (TREE_TYPE (oe->op)); |
0e0ed594 JL |
791 | else if (opcode == BIT_IOR_EXPR) |
792 | oe->op = build_low_bits_mask (TREE_TYPE (oe->op), | |
793 | TYPE_PRECISION (TREE_TYPE (oe->op))); | |
794 | ||
b8698a0f | 795 | reassociate_stats.ops_eliminated |
0e0ed594 JL |
796 | += VEC_length (operand_entry_t, *ops) - 1; |
797 | VEC_free (operand_entry_t, heap, *ops); | |
798 | *ops = NULL; | |
799 | VEC_safe_push (operand_entry_t, heap, *ops, oe); | |
800 | return true; | |
012309e6 DB |
801 | } |
802 | } | |
0e0ed594 JL |
803 | |
804 | return false; | |
012309e6 DB |
805 | } |
806 | ||
0e0ed594 JL |
807 | /* Use constant value that may be present in OPS to try to eliminate |
808 | operands. Note that this function is only really used when we've | |
809 | eliminated ops for other reasons, or merged constants. Across | |
810 | single statements, fold already does all of this, plus more. There | |
811 | is little point in duplicating logic, so I've only included the | |
812 | identities that I could ever construct testcases to trigger. */ | |
012309e6 | 813 | |
0e0ed594 JL |
814 | static void |
815 | eliminate_using_constants (enum tree_code opcode, | |
816 | VEC(operand_entry_t, heap) **ops) | |
012309e6 | 817 | { |
0e0ed594 | 818 | operand_entry_t oelast = VEC_last (operand_entry_t, *ops); |
2dc0f633 | 819 | tree type = TREE_TYPE (oelast->op); |
012309e6 | 820 | |
2dc0f633 RG |
821 | if (oelast->rank == 0 |
822 | && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type))) | |
012309e6 | 823 | { |
0e0ed594 | 824 | switch (opcode) |
012309e6 | 825 | { |
0e0ed594 JL |
826 | case BIT_AND_EXPR: |
827 | if (integer_zerop (oelast->op)) | |
828 | { | |
829 | if (VEC_length (operand_entry_t, *ops) != 1) | |
830 | { | |
831 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
832 | fprintf (dump_file, "Found & 0, removing all other ops\n"); | |
833 | ||
b8698a0f | 834 | reassociate_stats.ops_eliminated |
0e0ed594 | 835 | += VEC_length (operand_entry_t, *ops) - 1; |
b8698a0f | 836 | |
0e0ed594 JL |
837 | VEC_free (operand_entry_t, heap, *ops); |
838 | *ops = NULL; | |
839 | VEC_safe_push (operand_entry_t, heap, *ops, oelast); | |
840 | return; | |
841 | } | |
842 | } | |
843 | else if (integer_all_onesp (oelast->op)) | |
844 | { | |
845 | if (VEC_length (operand_entry_t, *ops) != 1) | |
846 | { | |
847 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
848 | fprintf (dump_file, "Found & -1, removing\n"); | |
849 | VEC_pop (operand_entry_t, *ops); | |
850 | reassociate_stats.ops_eliminated++; | |
851 | } | |
852 | } | |
853 | break; | |
854 | case BIT_IOR_EXPR: | |
855 | if (integer_all_onesp (oelast->op)) | |
856 | { | |
857 | if (VEC_length (operand_entry_t, *ops) != 1) | |
858 | { | |
859 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
860 | fprintf (dump_file, "Found | -1, removing all other ops\n"); | |
861 | ||
b8698a0f | 862 | reassociate_stats.ops_eliminated |
0e0ed594 | 863 | += VEC_length (operand_entry_t, *ops) - 1; |
b8698a0f | 864 | |
0e0ed594 JL |
865 | VEC_free (operand_entry_t, heap, *ops); |
866 | *ops = NULL; | |
867 | VEC_safe_push (operand_entry_t, heap, *ops, oelast); | |
868 | return; | |
869 | } | |
b8698a0f | 870 | } |
0e0ed594 JL |
871 | else if (integer_zerop (oelast->op)) |
872 | { | |
873 | if (VEC_length (operand_entry_t, *ops) != 1) | |
874 | { | |
875 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
876 | fprintf (dump_file, "Found | 0, removing\n"); | |
877 | VEC_pop (operand_entry_t, *ops); | |
878 | reassociate_stats.ops_eliminated++; | |
879 | } | |
880 | } | |
881 | break; | |
882 | case MULT_EXPR: | |
2dc0f633 RG |
883 | if (integer_zerop (oelast->op) |
884 | || (FLOAT_TYPE_P (type) | |
885 | && !HONOR_NANS (TYPE_MODE (type)) | |
886 | && !HONOR_SIGNED_ZEROS (TYPE_MODE (type)) | |
887 | && real_zerop (oelast->op))) | |
0e0ed594 JL |
888 | { |
889 | if (VEC_length (operand_entry_t, *ops) != 1) | |
890 | { | |
891 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
892 | fprintf (dump_file, "Found * 0, removing all other ops\n"); | |
b8698a0f L |
893 | |
894 | reassociate_stats.ops_eliminated | |
0e0ed594 JL |
895 | += VEC_length (operand_entry_t, *ops) - 1; |
896 | VEC_free (operand_entry_t, heap, *ops); | |
897 | *ops = NULL; | |
898 | VEC_safe_push (operand_entry_t, heap, *ops, oelast); | |
899 | return; | |
900 | } | |
901 | } | |
2dc0f633 RG |
902 | else if (integer_onep (oelast->op) |
903 | || (FLOAT_TYPE_P (type) | |
904 | && !HONOR_SNANS (TYPE_MODE (type)) | |
905 | && real_onep (oelast->op))) | |
0e0ed594 JL |
906 | { |
907 | if (VEC_length (operand_entry_t, *ops) != 1) | |
908 | { | |
909 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
910 | fprintf (dump_file, "Found * 1, removing\n"); | |
911 | VEC_pop (operand_entry_t, *ops); | |
912 | reassociate_stats.ops_eliminated++; | |
913 | return; | |
914 | } | |
915 | } | |
916 | break; | |
917 | case BIT_XOR_EXPR: | |
918 | case PLUS_EXPR: | |
919 | case MINUS_EXPR: | |
2dc0f633 RG |
920 | if (integer_zerop (oelast->op) |
921 | || (FLOAT_TYPE_P (type) | |
922 | && (opcode == PLUS_EXPR || opcode == MINUS_EXPR) | |
923 | && fold_real_zero_addition_p (type, oelast->op, | |
924 | opcode == MINUS_EXPR))) | |
012309e6 | 925 | { |
0e0ed594 | 926 | if (VEC_length (operand_entry_t, *ops) != 1) |
012309e6 | 927 | { |
0e0ed594 JL |
928 | if (dump_file && (dump_flags & TDF_DETAILS)) |
929 | fprintf (dump_file, "Found [|^+] 0, removing\n"); | |
930 | VEC_pop (operand_entry_t, *ops); | |
931 | reassociate_stats.ops_eliminated++; | |
932 | return; | |
012309e6 | 933 | } |
012309e6 | 934 | } |
0e0ed594 JL |
935 | break; |
936 | default: | |
937 | break; | |
012309e6 DB |
938 | } |
939 | } | |
012309e6 DB |
940 | } |
941 | ||
25c6036a RG |
942 | |
943 | static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple, | |
944 | bool, bool); | |
945 | ||
946 | /* Structure for tracking and counting operands. */ | |
947 | typedef struct oecount_s { | |
948 | int cnt; | |
f1665f5c | 949 | int id; |
25c6036a RG |
950 | enum tree_code oecode; |
951 | tree op; | |
952 | } oecount; | |
953 | ||
954 | DEF_VEC_O(oecount); | |
955 | DEF_VEC_ALLOC_O(oecount,heap); | |
956 | ||
957 | /* The heap for the oecount hashtable and the sorted list of operands. */ | |
958 | static VEC (oecount, heap) *cvec; | |
959 | ||
960 | /* Hash function for oecount. */ | |
961 | ||
962 | static hashval_t | |
963 | oecount_hash (const void *p) | |
964 | { | |
965 | const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42); | |
966 | return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode; | |
967 | } | |
968 | ||
969 | /* Comparison function for oecount. */ | |
970 | ||
971 | static int | |
972 | oecount_eq (const void *p1, const void *p2) | |
973 | { | |
974 | const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42); | |
975 | const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42); | |
976 | return (c1->oecode == c2->oecode | |
977 | && c1->op == c2->op); | |
978 | } | |
979 | ||
980 | /* Comparison function for qsort sorting oecount elements by count. */ | |
981 | ||
982 | static int | |
983 | oecount_cmp (const void *p1, const void *p2) | |
984 | { | |
985 | const oecount *c1 = (const oecount *)p1; | |
986 | const oecount *c2 = (const oecount *)p2; | |
f1665f5c DK |
987 | if (c1->cnt != c2->cnt) |
988 | return c1->cnt - c2->cnt; | |
989 | else | |
990 | /* If counts are identical, use unique IDs to stabilize qsort. */ | |
991 | return c1->id - c2->id; | |
25c6036a RG |
992 | } |
993 | ||
c2723bde BS |
994 | /* Return TRUE iff STMT represents a builtin call that raises OP |
995 | to some exponent. */ | |
996 | ||
997 | static bool | |
998 | stmt_is_power_of_op (gimple stmt, tree op) | |
999 | { | |
1000 | tree fndecl; | |
1001 | ||
1002 | if (!is_gimple_call (stmt)) | |
1003 | return false; | |
1004 | ||
1005 | fndecl = gimple_call_fndecl (stmt); | |
1006 | ||
1007 | if (!fndecl | |
1008 | || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) | |
1009 | return false; | |
1010 | ||
1011 | switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) | |
1012 | { | |
1013 | CASE_FLT_FN (BUILT_IN_POW): | |
1014 | CASE_FLT_FN (BUILT_IN_POWI): | |
1015 | return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0)); | |
1016 | ||
1017 | default: | |
1018 | return false; | |
1019 | } | |
1020 | } | |
1021 | ||
1022 | /* Given STMT which is a __builtin_pow* call, decrement its exponent | |
1023 | in place and return the result. Assumes that stmt_is_power_of_op | |
1024 | was previously called for STMT and returned TRUE. */ | |
1025 | ||
1026 | static HOST_WIDE_INT | |
1027 | decrement_power (gimple stmt) | |
1028 | { | |
1029 | REAL_VALUE_TYPE c, cint; | |
1030 | HOST_WIDE_INT power; | |
1031 | tree arg1; | |
1032 | ||
1033 | switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) | |
1034 | { | |
1035 | CASE_FLT_FN (BUILT_IN_POW): | |
1036 | arg1 = gimple_call_arg (stmt, 1); | |
1037 | c = TREE_REAL_CST (arg1); | |
1038 | power = real_to_integer (&c) - 1; | |
1039 | real_from_integer (&cint, VOIDmode, power, 0, 0); | |
1040 | gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint)); | |
1041 | return power; | |
1042 | ||
1043 | CASE_FLT_FN (BUILT_IN_POWI): | |
1044 | arg1 = gimple_call_arg (stmt, 1); | |
1045 | power = TREE_INT_CST_LOW (arg1) - 1; | |
1046 | gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power)); | |
1047 | return power; | |
1048 | ||
1049 | default: | |
1050 | gcc_unreachable (); | |
1051 | } | |
1052 | } | |
1053 | ||
1054 | /* Find the single immediate use of STMT's LHS, and replace it | |
1055 | with OP. Remove STMT. If STMT's LHS is the same as *DEF, | |
1056 | replace *DEF with OP as well. */ | |
1057 | ||
1058 | static void | |
1059 | propagate_op_to_single_use (tree op, gimple stmt, tree *def) | |
1060 | { | |
1061 | tree lhs; | |
1062 | gimple use_stmt; | |
1063 | use_operand_p use; | |
1064 | gimple_stmt_iterator gsi; | |
1065 | ||
1066 | if (is_gimple_call (stmt)) | |
1067 | lhs = gimple_call_lhs (stmt); | |
1068 | else | |
1069 | lhs = gimple_assign_lhs (stmt); | |
1070 | ||
1071 | gcc_assert (has_single_use (lhs)); | |
1072 | single_imm_use (lhs, &use, &use_stmt); | |
1073 | if (lhs == *def) | |
1074 | *def = op; | |
1075 | SET_USE (use, op); | |
1076 | if (TREE_CODE (op) != SSA_NAME) | |
1077 | update_stmt (use_stmt); | |
1078 | gsi = gsi_for_stmt (stmt); | |
1079 | gsi_remove (&gsi, true); | |
1080 | release_defs (stmt); | |
1081 | ||
1082 | if (is_gimple_call (stmt)) | |
1083 | unlink_stmt_vdef (stmt); | |
1084 | } | |
1085 | ||
25c6036a RG |
1086 | /* Walks the linear chain with result *DEF searching for an operation |
1087 | with operand OP and code OPCODE removing that from the chain. *DEF | |
1088 | is updated if there is only one operand but no operation left. */ | |
1089 | ||
1090 | static void | |
1091 | zero_one_operation (tree *def, enum tree_code opcode, tree op) | |
1092 | { | |
1093 | gimple stmt = SSA_NAME_DEF_STMT (*def); | |
1094 | ||
1095 | do | |
1096 | { | |
c2723bde BS |
1097 | tree name; |
1098 | ||
1099 | if (opcode == MULT_EXPR | |
1100 | && stmt_is_power_of_op (stmt, op)) | |
1101 | { | |
1102 | if (decrement_power (stmt) == 1) | |
1103 | propagate_op_to_single_use (op, stmt, def); | |
1104 | return; | |
1105 | } | |
1106 | ||
1107 | name = gimple_assign_rhs1 (stmt); | |
25c6036a RG |
1108 | |
1109 | /* If this is the operation we look for and one of the operands | |
1110 | is ours simply propagate the other operand into the stmts | |
1111 | single use. */ | |
1112 | if (gimple_assign_rhs_code (stmt) == opcode | |
1113 | && (name == op | |
1114 | || gimple_assign_rhs2 (stmt) == op)) | |
1115 | { | |
25c6036a RG |
1116 | if (name == op) |
1117 | name = gimple_assign_rhs2 (stmt); | |
c2723bde | 1118 | propagate_op_to_single_use (name, stmt, def); |
25c6036a RG |
1119 | return; |
1120 | } | |
1121 | ||
c2723bde BS |
1122 | /* We might have a multiply of two __builtin_pow* calls, and |
1123 | the operand might be hiding in the rightmost one. */ | |
1124 | if (opcode == MULT_EXPR | |
1125 | && gimple_assign_rhs_code (stmt) == opcode | |
1126 | && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) | |
1127 | { | |
1128 | gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); | |
1129 | if (stmt_is_power_of_op (stmt2, op)) | |
1130 | { | |
1131 | if (decrement_power (stmt2) == 1) | |
1132 | propagate_op_to_single_use (op, stmt2, def); | |
1133 | return; | |
1134 | } | |
1135 | } | |
1136 | ||
25c6036a RG |
1137 | /* Continue walking the chain. */ |
1138 | gcc_assert (name != op | |
1139 | && TREE_CODE (name) == SSA_NAME); | |
1140 | stmt = SSA_NAME_DEF_STMT (name); | |
1141 | } | |
1142 | while (1); | |
1143 | } | |
1144 | ||
1145 | /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for | |
1146 | the result. Places the statement after the definition of either | |
1147 | OP1 or OP2. Returns the new statement. */ | |
1148 | ||
1149 | static gimple | |
83d5977e | 1150 | build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode) |
25c6036a RG |
1151 | { |
1152 | gimple op1def = NULL, op2def = NULL; | |
1153 | gimple_stmt_iterator gsi; | |
1154 | tree op; | |
1155 | gimple sum; | |
1156 | ||
1157 | /* Create the addition statement. */ | |
83d5977e RG |
1158 | op = make_ssa_name (type, NULL); |
1159 | sum = gimple_build_assign_with_ops (opcode, op, op1, op2); | |
25c6036a RG |
1160 | |
1161 | /* Find an insertion place and insert. */ | |
1162 | if (TREE_CODE (op1) == SSA_NAME) | |
1163 | op1def = SSA_NAME_DEF_STMT (op1); | |
1164 | if (TREE_CODE (op2) == SSA_NAME) | |
1165 | op2def = SSA_NAME_DEF_STMT (op2); | |
1166 | if ((!op1def || gimple_nop_p (op1def)) | |
1167 | && (!op2def || gimple_nop_p (op2def))) | |
1168 | { | |
1d21a8e5 | 1169 | gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR)); |
25c6036a RG |
1170 | gsi_insert_before (&gsi, sum, GSI_NEW_STMT); |
1171 | } | |
1172 | else if ((!op1def || gimple_nop_p (op1def)) | |
1173 | || (op2def && !gimple_nop_p (op2def) | |
1174 | && stmt_dominates_stmt_p (op1def, op2def))) | |
1175 | { | |
1176 | if (gimple_code (op2def) == GIMPLE_PHI) | |
1177 | { | |
1d21a8e5 | 1178 | gsi = gsi_after_labels (gimple_bb (op2def)); |
25c6036a RG |
1179 | gsi_insert_before (&gsi, sum, GSI_NEW_STMT); |
1180 | } | |
1181 | else | |
1182 | { | |
e7089ecf RG |
1183 | if (!stmt_ends_bb_p (op2def)) |
1184 | { | |
1185 | gsi = gsi_for_stmt (op2def); | |
1186 | gsi_insert_after (&gsi, sum, GSI_NEW_STMT); | |
1187 | } | |
1188 | else | |
1189 | { | |
1190 | edge e; | |
1191 | edge_iterator ei; | |
1192 | ||
1193 | FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs) | |
1194 | if (e->flags & EDGE_FALLTHRU) | |
1195 | gsi_insert_on_edge_immediate (e, sum); | |
1196 | } | |
25c6036a RG |
1197 | } |
1198 | } | |
1199 | else | |
1200 | { | |
1201 | if (gimple_code (op1def) == GIMPLE_PHI) | |
1202 | { | |
1d21a8e5 | 1203 | gsi = gsi_after_labels (gimple_bb (op1def)); |
25c6036a RG |
1204 | gsi_insert_before (&gsi, sum, GSI_NEW_STMT); |
1205 | } | |
1206 | else | |
1207 | { | |
e7089ecf RG |
1208 | if (!stmt_ends_bb_p (op1def)) |
1209 | { | |
1210 | gsi = gsi_for_stmt (op1def); | |
1211 | gsi_insert_after (&gsi, sum, GSI_NEW_STMT); | |
1212 | } | |
1213 | else | |
1214 | { | |
1215 | edge e; | |
1216 | edge_iterator ei; | |
1217 | ||
1218 | FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs) | |
1219 | if (e->flags & EDGE_FALLTHRU) | |
1220 | gsi_insert_on_edge_immediate (e, sum); | |
1221 | } | |
25c6036a RG |
1222 | } |
1223 | } | |
1224 | update_stmt (sum); | |
1225 | ||
1226 | return sum; | |
1227 | } | |
1228 | ||
1229 | /* Perform un-distribution of divisions and multiplications. | |
1230 | A * X + B * X is transformed into (A + B) * X and A / X + B / X | |
1231 | to (A + B) / X for real X. | |
1232 | ||
1233 | The algorithm is organized as follows. | |
1234 | ||
1235 | - First we walk the addition chain *OPS looking for summands that | |
1236 | are defined by a multiplication or a real division. This results | |
1237 | in the candidates bitmap with relevant indices into *OPS. | |
1238 | ||
1239 | - Second we build the chains of multiplications or divisions for | |
073a8998 | 1240 | these candidates, counting the number of occurrences of (operand, code) |
25c6036a RG |
1241 | pairs in all of the candidates chains. |
1242 | ||
073a8998 | 1243 | - Third we sort the (operand, code) pairs by number of occurrence and |
25c6036a RG |
1244 | process them starting with the pair with the most uses. |
1245 | ||
1246 | * For each such pair we walk the candidates again to build a | |
1247 | second candidate bitmap noting all multiplication/division chains | |
073a8998 | 1248 | that have at least one occurrence of (operand, code). |
25c6036a RG |
1249 | |
1250 | * We build an alternate addition chain only covering these | |
1251 | candidates with one (operand, code) operation removed from their | |
1252 | multiplication/division chain. | |
1253 | ||
1254 | * The first candidate gets replaced by the alternate addition chain | |
1255 | multiplied/divided by the operand. | |
1256 | ||
1257 | * All candidate chains get disabled for further processing and | |
1258 | processing of (operand, code) pairs continues. | |
1259 | ||
1260 | The alternate addition chains built are re-processed by the main | |
1261 | reassociation algorithm which allows optimizing a * x * y + b * y * x | |
1262 | to (a + b ) * x * y in one invocation of the reassociation pass. */ | |
1263 | ||
1264 | static bool | |
1265 | undistribute_ops_list (enum tree_code opcode, | |
1266 | VEC (operand_entry_t, heap) **ops, struct loop *loop) | |
1267 | { | |
1268 | unsigned int length = VEC_length (operand_entry_t, *ops); | |
1269 | operand_entry_t oe1; | |
1270 | unsigned i, j; | |
1271 | sbitmap candidates, candidates2; | |
1272 | unsigned nr_candidates, nr_candidates2; | |
1273 | sbitmap_iterator sbi0; | |
1274 | VEC (operand_entry_t, heap) **subops; | |
1275 | htab_t ctable; | |
1276 | bool changed = false; | |
f1665f5c | 1277 | int next_oecount_id = 0; |
25c6036a RG |
1278 | |
1279 | if (length <= 1 | |
1280 | || opcode != PLUS_EXPR) | |
1281 | return false; | |
1282 | ||
1283 | /* Build a list of candidates to process. */ | |
1284 | candidates = sbitmap_alloc (length); | |
1285 | sbitmap_zero (candidates); | |
1286 | nr_candidates = 0; | |
ac47786e | 1287 | FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe1) |
25c6036a RG |
1288 | { |
1289 | enum tree_code dcode; | |
1290 | gimple oe1def; | |
1291 | ||
1292 | if (TREE_CODE (oe1->op) != SSA_NAME) | |
1293 | continue; | |
1294 | oe1def = SSA_NAME_DEF_STMT (oe1->op); | |
1295 | if (!is_gimple_assign (oe1def)) | |
1296 | continue; | |
1297 | dcode = gimple_assign_rhs_code (oe1def); | |
1298 | if ((dcode != MULT_EXPR | |
1299 | && dcode != RDIV_EXPR) | |
1300 | || !is_reassociable_op (oe1def, dcode, loop)) | |
1301 | continue; | |
1302 | ||
1303 | SET_BIT (candidates, i); | |
1304 | nr_candidates++; | |
1305 | } | |
1306 | ||
1307 | if (nr_candidates < 2) | |
1308 | { | |
1309 | sbitmap_free (candidates); | |
1310 | return false; | |
1311 | } | |
1312 | ||
1313 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1314 | { | |
1315 | fprintf (dump_file, "searching for un-distribute opportunities "); | |
1316 | print_generic_expr (dump_file, | |
1317 | VEC_index (operand_entry_t, *ops, | |
1318 | sbitmap_first_set_bit (candidates))->op, 0); | |
1319 | fprintf (dump_file, " %d\n", nr_candidates); | |
1320 | } | |
1321 | ||
1322 | /* Build linearized sub-operand lists and the counting table. */ | |
1323 | cvec = NULL; | |
1324 | ctable = htab_create (15, oecount_hash, oecount_eq, NULL); | |
1325 | subops = XCNEWVEC (VEC (operand_entry_t, heap) *, | |
1326 | VEC_length (operand_entry_t, *ops)); | |
1327 | EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0) | |
1328 | { | |
1329 | gimple oedef; | |
1330 | enum tree_code oecode; | |
1331 | unsigned j; | |
1332 | ||
1333 | oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op); | |
1334 | oecode = gimple_assign_rhs_code (oedef); | |
1335 | linearize_expr_tree (&subops[i], oedef, | |
1336 | associative_tree_code (oecode), false); | |
1337 | ||
ac47786e | 1338 | FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1) |
25c6036a RG |
1339 | { |
1340 | oecount c; | |
1341 | void **slot; | |
1342 | size_t idx; | |
1343 | c.oecode = oecode; | |
1344 | c.cnt = 1; | |
f1665f5c | 1345 | c.id = next_oecount_id++; |
25c6036a RG |
1346 | c.op = oe1->op; |
1347 | VEC_safe_push (oecount, heap, cvec, &c); | |
1348 | idx = VEC_length (oecount, cvec) + 41; | |
1349 | slot = htab_find_slot (ctable, (void *)idx, INSERT); | |
1350 | if (!*slot) | |
1351 | { | |
1352 | *slot = (void *)idx; | |
1353 | } | |
1354 | else | |
1355 | { | |
1356 | VEC_pop (oecount, cvec); | |
1357 | VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++; | |
1358 | } | |
1359 | } | |
1360 | } | |
1361 | htab_delete (ctable); | |
1362 | ||
1363 | /* Sort the counting table. */ | |
5095da95 | 1364 | VEC_qsort (oecount, cvec, oecount_cmp); |
25c6036a RG |
1365 | |
1366 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1367 | { | |
1368 | oecount *c; | |
1369 | fprintf (dump_file, "Candidates:\n"); | |
ac47786e | 1370 | FOR_EACH_VEC_ELT (oecount, cvec, j, c) |
25c6036a RG |
1371 | { |
1372 | fprintf (dump_file, " %u %s: ", c->cnt, | |
1373 | c->oecode == MULT_EXPR | |
1374 | ? "*" : c->oecode == RDIV_EXPR ? "/" : "?"); | |
1375 | print_generic_expr (dump_file, c->op, 0); | |
1376 | fprintf (dump_file, "\n"); | |
1377 | } | |
1378 | } | |
1379 | ||
1380 | /* Process the (operand, code) pairs in order of most occurence. */ | |
1381 | candidates2 = sbitmap_alloc (length); | |
1382 | while (!VEC_empty (oecount, cvec)) | |
1383 | { | |
1384 | oecount *c = VEC_last (oecount, cvec); | |
1385 | if (c->cnt < 2) | |
1386 | break; | |
1387 | ||
1388 | /* Now collect the operands in the outer chain that contain | |
1389 | the common operand in their inner chain. */ | |
1390 | sbitmap_zero (candidates2); | |
1391 | nr_candidates2 = 0; | |
1392 | EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0) | |
1393 | { | |
1394 | gimple oedef; | |
1395 | enum tree_code oecode; | |
1396 | unsigned j; | |
1397 | tree op = VEC_index (operand_entry_t, *ops, i)->op; | |
1398 | ||
1399 | /* If we undistributed in this chain already this may be | |
1400 | a constant. */ | |
1401 | if (TREE_CODE (op) != SSA_NAME) | |
1402 | continue; | |
1403 | ||
1404 | oedef = SSA_NAME_DEF_STMT (op); | |
1405 | oecode = gimple_assign_rhs_code (oedef); | |
1406 | if (oecode != c->oecode) | |
1407 | continue; | |
1408 | ||
ac47786e | 1409 | FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1) |
25c6036a | 1410 | { |
c2723bde | 1411 | if (oe1->op == c->op) |
25c6036a RG |
1412 | { |
1413 | SET_BIT (candidates2, i); | |
1414 | ++nr_candidates2; | |
1415 | break; | |
1416 | } | |
1417 | } | |
1418 | } | |
1419 | ||
1420 | if (nr_candidates2 >= 2) | |
1421 | { | |
1422 | operand_entry_t oe1, oe2; | |
25c6036a RG |
1423 | gimple prod; |
1424 | int first = sbitmap_first_set_bit (candidates2); | |
1425 | ||
1426 | /* Build the new addition chain. */ | |
1427 | oe1 = VEC_index (operand_entry_t, *ops, first); | |
1428 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1429 | { | |
1430 | fprintf (dump_file, "Building ("); | |
1431 | print_generic_expr (dump_file, oe1->op, 0); | |
1432 | } | |
25c6036a RG |
1433 | zero_one_operation (&oe1->op, c->oecode, c->op); |
1434 | EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0) | |
1435 | { | |
1436 | gimple sum; | |
1437 | oe2 = VEC_index (operand_entry_t, *ops, i); | |
1438 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1439 | { | |
1440 | fprintf (dump_file, " + "); | |
1441 | print_generic_expr (dump_file, oe2->op, 0); | |
1442 | } | |
1443 | zero_one_operation (&oe2->op, c->oecode, c->op); | |
83d5977e RG |
1444 | sum = build_and_add_sum (TREE_TYPE (oe1->op), |
1445 | oe1->op, oe2->op, opcode); | |
e8160c9a | 1446 | oe2->op = build_zero_cst (TREE_TYPE (oe2->op)); |
25c6036a RG |
1447 | oe2->rank = 0; |
1448 | oe1->op = gimple_get_lhs (sum); | |
1449 | } | |
1450 | ||
1451 | /* Apply the multiplication/division. */ | |
83d5977e RG |
1452 | prod = build_and_add_sum (TREE_TYPE (oe1->op), |
1453 | oe1->op, c->op, c->oecode); | |
25c6036a RG |
1454 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1455 | { | |
1456 | fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/"); | |
1457 | print_generic_expr (dump_file, c->op, 0); | |
1458 | fprintf (dump_file, "\n"); | |
1459 | } | |
1460 | ||
1461 | /* Record it in the addition chain and disable further | |
1462 | undistribution with this op. */ | |
1463 | oe1->op = gimple_assign_lhs (prod); | |
1464 | oe1->rank = get_rank (oe1->op); | |
1465 | VEC_free (operand_entry_t, heap, subops[first]); | |
1466 | ||
1467 | changed = true; | |
1468 | } | |
1469 | ||
1470 | VEC_pop (oecount, cvec); | |
1471 | } | |
1472 | ||
1473 | for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i) | |
1474 | VEC_free (operand_entry_t, heap, subops[i]); | |
1475 | free (subops); | |
1476 | VEC_free (oecount, heap, cvec); | |
1477 | sbitmap_free (candidates); | |
1478 | sbitmap_free (candidates2); | |
1479 | ||
1480 | return changed; | |
1481 | } | |
1482 | ||
844381e5 SL |
1483 | /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison |
1484 | expression, examine the other OPS to see if any of them are comparisons | |
1485 | of the same values, which we may be able to combine or eliminate. | |
1486 | For example, we can rewrite (a < b) | (a == b) as (a <= b). */ | |
1487 | ||
1488 | static bool | |
1489 | eliminate_redundant_comparison (enum tree_code opcode, | |
1490 | VEC (operand_entry_t, heap) **ops, | |
1491 | unsigned int currindex, | |
1492 | operand_entry_t curr) | |
1493 | { | |
1494 | tree op1, op2; | |
1495 | enum tree_code lcode, rcode; | |
1496 | gimple def1, def2; | |
1497 | int i; | |
1498 | operand_entry_t oe; | |
1499 | ||
1500 | if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) | |
1501 | return false; | |
1502 | ||
1503 | /* Check that CURR is a comparison. */ | |
1504 | if (TREE_CODE (curr->op) != SSA_NAME) | |
1505 | return false; | |
1506 | def1 = SSA_NAME_DEF_STMT (curr->op); | |
1507 | if (!is_gimple_assign (def1)) | |
1508 | return false; | |
1509 | lcode = gimple_assign_rhs_code (def1); | |
1510 | if (TREE_CODE_CLASS (lcode) != tcc_comparison) | |
1511 | return false; | |
1512 | op1 = gimple_assign_rhs1 (def1); | |
1513 | op2 = gimple_assign_rhs2 (def1); | |
1514 | ||
1515 | /* Now look for a similar comparison in the remaining OPS. */ | |
1516 | for (i = currindex + 1; | |
1517 | VEC_iterate (operand_entry_t, *ops, i, oe); | |
1518 | i++) | |
1519 | { | |
1520 | tree t; | |
1521 | ||
1522 | if (TREE_CODE (oe->op) != SSA_NAME) | |
1523 | continue; | |
1524 | def2 = SSA_NAME_DEF_STMT (oe->op); | |
1525 | if (!is_gimple_assign (def2)) | |
1526 | continue; | |
1527 | rcode = gimple_assign_rhs_code (def2); | |
1528 | if (TREE_CODE_CLASS (rcode) != tcc_comparison) | |
1529 | continue; | |
844381e5 SL |
1530 | |
1531 | /* If we got here, we have a match. See if we can combine the | |
1532 | two comparisons. */ | |
e89065a1 SL |
1533 | if (opcode == BIT_IOR_EXPR) |
1534 | t = maybe_fold_or_comparisons (lcode, op1, op2, | |
1535 | rcode, gimple_assign_rhs1 (def2), | |
1536 | gimple_assign_rhs2 (def2)); | |
1537 | else | |
1538 | t = maybe_fold_and_comparisons (lcode, op1, op2, | |
1539 | rcode, gimple_assign_rhs1 (def2), | |
1540 | gimple_assign_rhs2 (def2)); | |
844381e5 SL |
1541 | if (!t) |
1542 | continue; | |
e89065a1 SL |
1543 | |
1544 | /* maybe_fold_and_comparisons and maybe_fold_or_comparisons | |
1545 | always give us a boolean_type_node value back. If the original | |
1546 | BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type, | |
1547 | we need to convert. */ | |
1548 | if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t))) | |
1549 | t = fold_convert (TREE_TYPE (curr->op), t); | |
1550 | ||
2c9da85b JJ |
1551 | if (TREE_CODE (t) != INTEGER_CST |
1552 | && !operand_equal_p (t, curr->op, 0)) | |
1553 | { | |
1554 | enum tree_code subcode; | |
1555 | tree newop1, newop2; | |
1556 | if (!COMPARISON_CLASS_P (t)) | |
1557 | continue; | |
1558 | extract_ops_from_tree (t, &subcode, &newop1, &newop2); | |
1559 | STRIP_USELESS_TYPE_CONVERSION (newop1); | |
1560 | STRIP_USELESS_TYPE_CONVERSION (newop2); | |
1561 | if (!is_gimple_val (newop1) || !is_gimple_val (newop2)) | |
1562 | continue; | |
1563 | } | |
1564 | ||
844381e5 SL |
1565 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1566 | { | |
1567 | fprintf (dump_file, "Equivalence: "); | |
1568 | print_generic_expr (dump_file, curr->op, 0); | |
1569 | fprintf (dump_file, " %s ", op_symbol_code (opcode)); | |
1570 | print_generic_expr (dump_file, oe->op, 0); | |
1571 | fprintf (dump_file, " -> "); | |
1572 | print_generic_expr (dump_file, t, 0); | |
1573 | fprintf (dump_file, "\n"); | |
1574 | } | |
1575 | ||
1576 | /* Now we can delete oe, as it has been subsumed by the new combined | |
1577 | expression t. */ | |
1578 | VEC_ordered_remove (operand_entry_t, *ops, i); | |
1579 | reassociate_stats.ops_eliminated ++; | |
1580 | ||
1581 | /* If t is the same as curr->op, we're done. Otherwise we must | |
1582 | replace curr->op with t. Special case is if we got a constant | |
1583 | back, in which case we add it to the end instead of in place of | |
1584 | the current entry. */ | |
1585 | if (TREE_CODE (t) == INTEGER_CST) | |
1586 | { | |
1587 | VEC_ordered_remove (operand_entry_t, *ops, currindex); | |
1588 | add_to_ops_vec (ops, t); | |
1589 | } | |
e89065a1 | 1590 | else if (!operand_equal_p (t, curr->op, 0)) |
844381e5 | 1591 | { |
844381e5 SL |
1592 | gimple sum; |
1593 | enum tree_code subcode; | |
1594 | tree newop1; | |
1595 | tree newop2; | |
ca046f7f | 1596 | gcc_assert (COMPARISON_CLASS_P (t)); |
844381e5 | 1597 | extract_ops_from_tree (t, &subcode, &newop1, &newop2); |
ca046f7f JJ |
1598 | STRIP_USELESS_TYPE_CONVERSION (newop1); |
1599 | STRIP_USELESS_TYPE_CONVERSION (newop2); | |
1600 | gcc_checking_assert (is_gimple_val (newop1) | |
1601 | && is_gimple_val (newop2)); | |
83d5977e | 1602 | sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode); |
844381e5 SL |
1603 | curr->op = gimple_get_lhs (sum); |
1604 | } | |
1605 | return true; | |
1606 | } | |
1607 | ||
1608 | return false; | |
1609 | } | |
25c6036a | 1610 | |
0e0ed594 JL |
1611 | /* Perform various identities and other optimizations on the list of |
1612 | operand entries, stored in OPS. The tree code for the binary | |
1613 | operation between all the operands is OPCODE. */ | |
012309e6 | 1614 | |
0e0ed594 JL |
1615 | static void |
1616 | optimize_ops_list (enum tree_code opcode, | |
1617 | VEC (operand_entry_t, heap) **ops) | |
1618 | { | |
1619 | unsigned int length = VEC_length (operand_entry_t, *ops); | |
1620 | unsigned int i; | |
1621 | operand_entry_t oe; | |
1622 | operand_entry_t oelast = NULL; | |
1623 | bool iterate = false; | |
012309e6 | 1624 | |
0e0ed594 JL |
1625 | if (length == 1) |
1626 | return; | |
012309e6 | 1627 | |
0e0ed594 | 1628 | oelast = VEC_last (operand_entry_t, *ops); |
012309e6 | 1629 | |
0e0ed594 JL |
1630 | /* If the last two are constants, pop the constants off, merge them |
1631 | and try the next two. */ | |
1632 | if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) | |
1633 | { | |
1634 | operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2); | |
1635 | ||
1636 | if (oelm1->rank == 0 | |
1637 | && is_gimple_min_invariant (oelm1->op) | |
f4088621 RG |
1638 | && useless_type_conversion_p (TREE_TYPE (oelm1->op), |
1639 | TREE_TYPE (oelast->op))) | |
0e0ed594 | 1640 | { |
0dd4b47b | 1641 | tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op), |
0e0ed594 JL |
1642 | oelm1->op, oelast->op); |
1643 | ||
0dd4b47b | 1644 | if (folded && is_gimple_min_invariant (folded)) |
0e0ed594 JL |
1645 | { |
1646 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1647 | fprintf (dump_file, "Merging constants\n"); | |
1648 | ||
1649 | VEC_pop (operand_entry_t, *ops); | |
1650 | VEC_pop (operand_entry_t, *ops); | |
1651 | ||
1652 | add_to_ops_vec (ops, folded); | |
1653 | reassociate_stats.constants_eliminated++; | |
1654 | ||
1655 | optimize_ops_list (opcode, ops); | |
1656 | return; | |
1657 | } | |
1658 | } | |
1659 | } | |
1660 | ||
1661 | eliminate_using_constants (opcode, ops); | |
1662 | oelast = NULL; | |
1663 | ||
1664 | for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);) | |
1665 | { | |
1666 | bool done = false; | |
1667 | ||
1668 | if (eliminate_not_pairs (opcode, ops, i, oe)) | |
1669 | return; | |
1670 | if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast) | |
844381e5 SL |
1671 | || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)) |
1672 | || (!done && eliminate_redundant_comparison (opcode, ops, i, oe))) | |
0e0ed594 JL |
1673 | { |
1674 | if (done) | |
1675 | return; | |
1676 | iterate = true; | |
1677 | oelast = NULL; | |
1678 | continue; | |
1679 | } | |
1680 | oelast = oe; | |
1681 | i++; | |
1682 | } | |
1683 | ||
1684 | length = VEC_length (operand_entry_t, *ops); | |
1685 | oelast = VEC_last (operand_entry_t, *ops); | |
1686 | ||
1687 | if (iterate) | |
1688 | optimize_ops_list (opcode, ops); | |
1689 | } | |
1690 | ||
0ccb5dbf JJ |
1691 | /* The following functions are subroutines to optimize_range_tests and allow |
1692 | it to try to change a logical combination of comparisons into a range | |
1693 | test. | |
1694 | ||
1695 | For example, both | |
1696 | X == 2 || X == 5 || X == 3 || X == 4 | |
1697 | and | |
1698 | X >= 2 && X <= 5 | |
1699 | are converted to | |
1700 | (unsigned) (X - 2) <= 3 | |
1701 | ||
1702 | For more information see comments above fold_test_range in fold-const.c, | |
1703 | this implementation is for GIMPLE. */ | |
1704 | ||
1705 | struct range_entry | |
1706 | { | |
1707 | tree exp; | |
1708 | tree low; | |
1709 | tree high; | |
1710 | bool in_p; | |
1711 | bool strict_overflow_p; | |
1712 | unsigned int idx, next; | |
1713 | }; | |
1714 | ||
1715 | /* This is similar to make_range in fold-const.c, but on top of | |
1716 | GIMPLE instead of trees. */ | |
1717 | ||
1718 | static void | |
1719 | init_range_entry (struct range_entry *r, tree exp) | |
1720 | { | |
1721 | int in_p; | |
1722 | tree low, high; | |
1723 | bool is_bool, strict_overflow_p; | |
1724 | ||
1725 | r->exp = NULL_TREE; | |
1726 | r->in_p = false; | |
1727 | r->strict_overflow_p = false; | |
1728 | r->low = NULL_TREE; | |
1729 | r->high = NULL_TREE; | |
1730 | if (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))) | |
1731 | return; | |
1732 | ||
1733 | /* Start with simply saying "EXP != 0" and then look at the code of EXP | |
1734 | and see if we can refine the range. Some of the cases below may not | |
1735 | happen, but it doesn't seem worth worrying about this. We "continue" | |
1736 | the outer loop when we've changed something; otherwise we "break" | |
1737 | the switch, which will "break" the while. */ | |
1738 | low = build_int_cst (TREE_TYPE (exp), 0); | |
1739 | high = low; | |
1740 | in_p = 0; | |
1741 | strict_overflow_p = false; | |
1742 | is_bool = false; | |
1743 | if (TYPE_PRECISION (TREE_TYPE (exp)) == 1) | |
1744 | { | |
1745 | if (TYPE_UNSIGNED (TREE_TYPE (exp))) | |
1746 | is_bool = true; | |
1747 | else | |
1748 | return; | |
1749 | } | |
1750 | else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) | |
1751 | is_bool = true; | |
1752 | ||
1753 | while (1) | |
1754 | { | |
1755 | gimple stmt; | |
1756 | enum tree_code code; | |
1757 | tree arg0, arg1, exp_type; | |
1758 | tree nexp; | |
1759 | location_t loc; | |
1760 | ||
1761 | if (TREE_CODE (exp) != SSA_NAME) | |
1762 | break; | |
1763 | ||
1764 | stmt = SSA_NAME_DEF_STMT (exp); | |
1765 | if (!is_gimple_assign (stmt)) | |
1766 | break; | |
1767 | ||
1768 | code = gimple_assign_rhs_code (stmt); | |
1769 | arg0 = gimple_assign_rhs1 (stmt); | |
e4a5b262 JJ |
1770 | if (TREE_CODE (arg0) != SSA_NAME) |
1771 | break; | |
0ccb5dbf JJ |
1772 | arg1 = gimple_assign_rhs2 (stmt); |
1773 | exp_type = TREE_TYPE (exp); | |
1774 | loc = gimple_location (stmt); | |
1775 | switch (code) | |
1776 | { | |
1777 | case BIT_NOT_EXPR: | |
1778 | if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) | |
1779 | { | |
1780 | in_p = !in_p; | |
1781 | exp = arg0; | |
1782 | continue; | |
1783 | } | |
1784 | break; | |
1785 | case SSA_NAME: | |
1786 | exp = arg0; | |
1787 | continue; | |
1788 | CASE_CONVERT: | |
1789 | if (is_bool) | |
1790 | goto do_default; | |
1791 | if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1) | |
1792 | { | |
1793 | if (TYPE_UNSIGNED (TREE_TYPE (arg0))) | |
1794 | is_bool = true; | |
1795 | else | |
1796 | return; | |
1797 | } | |
1798 | else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE) | |
1799 | is_bool = true; | |
1800 | goto do_default; | |
1801 | case EQ_EXPR: | |
1802 | case NE_EXPR: | |
1803 | case LT_EXPR: | |
1804 | case LE_EXPR: | |
1805 | case GE_EXPR: | |
1806 | case GT_EXPR: | |
1807 | is_bool = true; | |
1808 | /* FALLTHRU */ | |
1809 | default: | |
1810 | if (!is_bool) | |
1811 | return; | |
1812 | do_default: | |
1813 | nexp = make_range_step (loc, code, arg0, arg1, exp_type, | |
1814 | &low, &high, &in_p, | |
1815 | &strict_overflow_p); | |
1816 | if (nexp != NULL_TREE) | |
1817 | { | |
1818 | exp = nexp; | |
1819 | gcc_assert (TREE_CODE (exp) == SSA_NAME); | |
1820 | continue; | |
1821 | } | |
1822 | break; | |
1823 | } | |
1824 | break; | |
1825 | } | |
1826 | if (is_bool) | |
1827 | { | |
1828 | r->exp = exp; | |
1829 | r->in_p = in_p; | |
1830 | r->low = low; | |
1831 | r->high = high; | |
1832 | r->strict_overflow_p = strict_overflow_p; | |
1833 | } | |
1834 | } | |
1835 | ||
1836 | /* Comparison function for qsort. Sort entries | |
1837 | without SSA_NAME exp first, then with SSA_NAMEs sorted | |
1838 | by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs | |
1839 | by increasing ->low and if ->low is the same, by increasing | |
1840 | ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE | |
1841 | maximum. */ | |
1842 | ||
1843 | static int | |
1844 | range_entry_cmp (const void *a, const void *b) | |
1845 | { | |
1846 | const struct range_entry *p = (const struct range_entry *) a; | |
1847 | const struct range_entry *q = (const struct range_entry *) b; | |
1848 | ||
1849 | if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME) | |
1850 | { | |
1851 | if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) | |
1852 | { | |
1853 | /* Group range_entries for the same SSA_NAME together. */ | |
1854 | if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp)) | |
1855 | return -1; | |
1856 | else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp)) | |
1857 | return 1; | |
1858 | /* If ->low is different, NULL low goes first, then by | |
1859 | ascending low. */ | |
1860 | if (p->low != NULL_TREE) | |
1861 | { | |
1862 | if (q->low != NULL_TREE) | |
1863 | { | |
1864 | tree tem = fold_binary (LT_EXPR, boolean_type_node, | |
1865 | p->low, q->low); | |
1866 | if (tem && integer_onep (tem)) | |
1867 | return -1; | |
1868 | tem = fold_binary (GT_EXPR, boolean_type_node, | |
1869 | p->low, q->low); | |
1870 | if (tem && integer_onep (tem)) | |
1871 | return 1; | |
1872 | } | |
1873 | else | |
1874 | return 1; | |
1875 | } | |
1876 | else if (q->low != NULL_TREE) | |
1877 | return -1; | |
1878 | /* If ->high is different, NULL high goes last, before that by | |
1879 | ascending high. */ | |
1880 | if (p->high != NULL_TREE) | |
1881 | { | |
1882 | if (q->high != NULL_TREE) | |
1883 | { | |
1884 | tree tem = fold_binary (LT_EXPR, boolean_type_node, | |
1885 | p->high, q->high); | |
1886 | if (tem && integer_onep (tem)) | |
1887 | return -1; | |
1888 | tem = fold_binary (GT_EXPR, boolean_type_node, | |
1889 | p->high, q->high); | |
1890 | if (tem && integer_onep (tem)) | |
1891 | return 1; | |
1892 | } | |
1893 | else | |
1894 | return -1; | |
1895 | } | |
1896 | else if (p->high != NULL_TREE) | |
1897 | return 1; | |
1898 | /* If both ranges are the same, sort below by ascending idx. */ | |
1899 | } | |
1900 | else | |
1901 | return 1; | |
1902 | } | |
1903 | else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) | |
1904 | return -1; | |
1905 | ||
1906 | if (p->idx < q->idx) | |
1907 | return -1; | |
1908 | else | |
1909 | { | |
1910 | gcc_checking_assert (p->idx > q->idx); | |
1911 | return 1; | |
1912 | } | |
1913 | } | |
1914 | ||
1915 | /* Helper routine of optimize_range_test. | |
1916 | [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for | |
1917 | RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges, | |
1918 | OPCODE and OPS are arguments of optimize_range_tests. Return | |
1919 | true if the range merge has been successful. */ | |
1920 | ||
1921 | static bool | |
1922 | update_range_test (struct range_entry *range, struct range_entry *otherrange, | |
1923 | unsigned int count, enum tree_code opcode, | |
1924 | VEC (operand_entry_t, heap) **ops, tree exp, bool in_p, | |
1925 | tree low, tree high, bool strict_overflow_p) | |
1926 | { | |
1927 | tree op = VEC_index (operand_entry_t, *ops, range->idx)->op; | |
1928 | location_t loc = gimple_location (SSA_NAME_DEF_STMT (op)); | |
1929 | tree tem = build_range_check (loc, TREE_TYPE (op), exp, in_p, low, high); | |
1930 | enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; | |
1931 | gimple_stmt_iterator gsi; | |
1932 | ||
1933 | if (tem == NULL_TREE) | |
1934 | return false; | |
1935 | ||
1936 | if (strict_overflow_p && issue_strict_overflow_warning (wc)) | |
1937 | warning_at (loc, OPT_Wstrict_overflow, | |
1938 | "assuming signed overflow does not occur " | |
1939 | "when simplifying range test"); | |
1940 | ||
1941 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1942 | { | |
1943 | struct range_entry *r; | |
1944 | fprintf (dump_file, "Optimizing range tests "); | |
1945 | print_generic_expr (dump_file, range->exp, 0); | |
1946 | fprintf (dump_file, " %c[", range->in_p ? '+' : '-'); | |
1947 | print_generic_expr (dump_file, range->low, 0); | |
1948 | fprintf (dump_file, ", "); | |
1949 | print_generic_expr (dump_file, range->high, 0); | |
1950 | fprintf (dump_file, "]"); | |
1951 | for (r = otherrange; r < otherrange + count; r++) | |
1952 | { | |
1953 | fprintf (dump_file, " and %c[", r->in_p ? '+' : '-'); | |
1954 | print_generic_expr (dump_file, r->low, 0); | |
1955 | fprintf (dump_file, ", "); | |
1956 | print_generic_expr (dump_file, r->high, 0); | |
1957 | fprintf (dump_file, "]"); | |
1958 | } | |
1959 | fprintf (dump_file, "\n into "); | |
1960 | print_generic_expr (dump_file, tem, 0); | |
1961 | fprintf (dump_file, "\n"); | |
1962 | } | |
1963 | ||
1964 | if (opcode == BIT_IOR_EXPR) | |
1965 | tem = invert_truthvalue_loc (loc, tem); | |
1966 | ||
1967 | tem = fold_convert_loc (loc, TREE_TYPE (op), tem); | |
1968 | gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (op)); | |
1969 | tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true, | |
1970 | GSI_SAME_STMT); | |
1971 | ||
1972 | VEC_index (operand_entry_t, *ops, range->idx)->op = tem; | |
1973 | range->exp = exp; | |
1974 | range->low = low; | |
1975 | range->high = high; | |
1976 | range->in_p = in_p; | |
1977 | range->strict_overflow_p = false; | |
1978 | ||
1979 | for (range = otherrange; range < otherrange + count; range++) | |
1980 | { | |
1981 | VEC_index (operand_entry_t, *ops, range->idx)->op = error_mark_node; | |
1982 | range->exp = NULL_TREE; | |
1983 | } | |
1984 | return true; | |
1985 | } | |
1986 | ||
1987 | /* Optimize range tests, similarly how fold_range_test optimizes | |
1988 | it on trees. The tree code for the binary | |
1989 | operation between all the operands is OPCODE. */ | |
1990 | ||
1991 | static void | |
1992 | optimize_range_tests (enum tree_code opcode, | |
1993 | VEC (operand_entry_t, heap) **ops) | |
1994 | { | |
1995 | unsigned int length = VEC_length (operand_entry_t, *ops), i, j, first; | |
1996 | operand_entry_t oe; | |
1997 | struct range_entry *ranges; | |
1998 | bool any_changes = false; | |
1999 | ||
2000 | if (length == 1) | |
2001 | return; | |
2002 | ||
2003 | ranges = XNEWVEC (struct range_entry, length); | |
2004 | for (i = 0; i < length; i++) | |
2005 | { | |
2006 | ranges[i].idx = i; | |
2007 | init_range_entry (ranges + i, VEC_index (operand_entry_t, *ops, i)->op); | |
2008 | /* For | invert it now, we will invert it again before emitting | |
2009 | the optimized expression. */ | |
2010 | if (opcode == BIT_IOR_EXPR) | |
2011 | ranges[i].in_p = !ranges[i].in_p; | |
2012 | } | |
2013 | ||
2014 | qsort (ranges, length, sizeof (*ranges), range_entry_cmp); | |
2015 | for (i = 0; i < length; i++) | |
2016 | if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME) | |
2017 | break; | |
2018 | ||
2019 | /* Try to merge ranges. */ | |
2020 | for (first = i; i < length; i++) | |
2021 | { | |
2022 | tree low = ranges[i].low; | |
2023 | tree high = ranges[i].high; | |
2024 | int in_p = ranges[i].in_p; | |
2025 | bool strict_overflow_p = ranges[i].strict_overflow_p; | |
2026 | int update_fail_count = 0; | |
2027 | ||
2028 | for (j = i + 1; j < length; j++) | |
2029 | { | |
2030 | if (ranges[i].exp != ranges[j].exp) | |
2031 | break; | |
2032 | if (!merge_ranges (&in_p, &low, &high, in_p, low, high, | |
2033 | ranges[j].in_p, ranges[j].low, ranges[j].high)) | |
2034 | break; | |
2035 | strict_overflow_p |= ranges[j].strict_overflow_p; | |
2036 | } | |
2037 | ||
2038 | if (j == i + 1) | |
2039 | continue; | |
2040 | ||
2041 | if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode, | |
2042 | ops, ranges[i].exp, in_p, low, high, | |
2043 | strict_overflow_p)) | |
2044 | { | |
2045 | i = j - 1; | |
2046 | any_changes = true; | |
2047 | } | |
2048 | /* Avoid quadratic complexity if all merge_ranges calls would succeed, | |
2049 | while update_range_test would fail. */ | |
2050 | else if (update_fail_count == 64) | |
2051 | i = j - 1; | |
2052 | else | |
2053 | ++update_fail_count; | |
2054 | } | |
2055 | ||
2056 | /* Optimize X == CST1 || X == CST2 | |
2057 | if popcount (CST1 ^ CST2) == 1 into | |
2058 | (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)). | |
2059 | Similarly for ranges. E.g. | |
2060 | X != 2 && X != 3 && X != 10 && X != 11 | |
2061 | will be transformed by the above loop into | |
2062 | (X - 2U) <= 1U && (X - 10U) <= 1U | |
2063 | and this loop can transform that into | |
2064 | ((X & ~8) - 2U) <= 1U. */ | |
2065 | for (i = first; i < length; i++) | |
2066 | { | |
2067 | tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp; | |
2068 | ||
2069 | if (ranges[i].exp == NULL_TREE || ranges[i].in_p) | |
2070 | continue; | |
2071 | type = TREE_TYPE (ranges[i].exp); | |
2072 | if (!INTEGRAL_TYPE_P (type)) | |
2073 | continue; | |
2074 | lowi = ranges[i].low; | |
2075 | if (lowi == NULL_TREE) | |
2076 | lowi = TYPE_MIN_VALUE (type); | |
2077 | highi = ranges[i].high; | |
2078 | if (highi == NULL_TREE) | |
2079 | continue; | |
2080 | for (j = i + 1; j < length && j < i + 64; j++) | |
2081 | { | |
2082 | if (ranges[j].exp == NULL_TREE) | |
2083 | continue; | |
2084 | if (ranges[i].exp != ranges[j].exp) | |
2085 | break; | |
2086 | if (ranges[j].in_p) | |
2087 | continue; | |
2088 | lowj = ranges[j].low; | |
2089 | if (lowj == NULL_TREE) | |
2090 | continue; | |
2091 | highj = ranges[j].high; | |
2092 | if (highj == NULL_TREE) | |
2093 | highj = TYPE_MAX_VALUE (type); | |
2094 | tem = fold_binary (GT_EXPR, boolean_type_node, | |
2095 | lowj, highi); | |
2096 | if (tem == NULL_TREE || !integer_onep (tem)) | |
2097 | continue; | |
2098 | lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj); | |
2099 | if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST) | |
2100 | continue; | |
2101 | gcc_checking_assert (!integer_zerop (lowxor)); | |
2102 | tem = fold_binary (MINUS_EXPR, type, lowxor, | |
2103 | build_int_cst (type, 1)); | |
2104 | if (tem == NULL_TREE) | |
2105 | continue; | |
2106 | tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem); | |
2107 | if (tem == NULL_TREE || !integer_zerop (tem)) | |
2108 | continue; | |
2109 | highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); | |
2110 | if (!tree_int_cst_equal (lowxor, highxor)) | |
2111 | continue; | |
2112 | tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); | |
2113 | exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem); | |
2114 | lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); | |
2115 | highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); | |
2116 | if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp, | |
2117 | ranges[i].in_p, lowj, highj, | |
2118 | ranges[i].strict_overflow_p | |
2119 | || ranges[j].strict_overflow_p)) | |
2120 | { | |
2121 | any_changes = true; | |
2122 | break; | |
2123 | } | |
2124 | } | |
2125 | } | |
2126 | ||
2127 | if (any_changes) | |
2128 | { | |
2129 | j = 0; | |
2130 | FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe) | |
2131 | { | |
2132 | if (oe->op == error_mark_node) | |
2133 | continue; | |
2134 | else if (i != j) | |
2135 | VEC_replace (operand_entry_t, *ops, j, oe); | |
2136 | j++; | |
2137 | } | |
2138 | VEC_truncate (operand_entry_t, *ops, j); | |
2139 | } | |
2140 | ||
2141 | XDELETEVEC (ranges); | |
2142 | } | |
2143 | ||
0e0ed594 JL |
2144 | /* Return true if OPERAND is defined by a PHI node which uses the LHS |
2145 | of STMT in it's operands. This is also known as a "destructive | |
2146 | update" operation. */ | |
2147 | ||
2148 | static bool | |
726a989a | 2149 | is_phi_for_stmt (gimple stmt, tree operand) |
0e0ed594 | 2150 | { |
726a989a RB |
2151 | gimple def_stmt; |
2152 | tree lhs; | |
0e0ed594 JL |
2153 | use_operand_p arg_p; |
2154 | ssa_op_iter i; | |
2155 | ||
2156 | if (TREE_CODE (operand) != SSA_NAME) | |
2157 | return false; | |
2158 | ||
726a989a RB |
2159 | lhs = gimple_assign_lhs (stmt); |
2160 | ||
0e0ed594 | 2161 | def_stmt = SSA_NAME_DEF_STMT (operand); |
726a989a | 2162 | if (gimple_code (def_stmt) != GIMPLE_PHI) |
0e0ed594 JL |
2163 | return false; |
2164 | ||
2165 | FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE) | |
2166 | if (lhs == USE_FROM_PTR (arg_p)) | |
2167 | return true; | |
2168 | return false; | |
2169 | } | |
2170 | ||
ec81df7d JJ |
2171 | /* Remove def stmt of VAR if VAR has zero uses and recurse |
2172 | on rhs1 operand if so. */ | |
2173 | ||
2174 | static void | |
2175 | remove_visited_stmt_chain (tree var) | |
2176 | { | |
2177 | gimple stmt; | |
2178 | gimple_stmt_iterator gsi; | |
2179 | ||
2180 | while (1) | |
2181 | { | |
2182 | if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var)) | |
2183 | return; | |
2184 | stmt = SSA_NAME_DEF_STMT (var); | |
a6f8851e BS |
2185 | if (is_gimple_assign (stmt) && gimple_visited_p (stmt)) |
2186 | { | |
2187 | var = gimple_assign_rhs1 (stmt); | |
a6f8851e BS |
2188 | gsi = gsi_for_stmt (stmt); |
2189 | gsi_remove (&gsi, true); | |
2190 | release_defs (stmt); | |
a6f8851e BS |
2191 | } |
2192 | else | |
ec81df7d | 2193 | return; |
ec81df7d JJ |
2194 | } |
2195 | } | |
2196 | ||
df7b0cc4 EI |
2197 | /* This function checks three consequtive operands in |
2198 | passed operands vector OPS starting from OPINDEX and | |
2199 | swaps two operands if it is profitable for binary operation | |
2200 | consuming OPINDEX + 1 abnd OPINDEX + 2 operands. | |
2201 | ||
2202 | We pair ops with the same rank if possible. | |
2203 | ||
2204 | The alternative we try is to see if STMT is a destructive | |
2205 | update style statement, which is like: | |
2206 | b = phi (a, ...) | |
2207 | a = c + b; | |
2208 | In that case, we want to use the destructive update form to | |
2209 | expose the possible vectorizer sum reduction opportunity. | |
2210 | In that case, the third operand will be the phi node. This | |
2211 | check is not performed if STMT is null. | |
2212 | ||
2213 | We could, of course, try to be better as noted above, and do a | |
2214 | lot of work to try to find these opportunities in >3 operand | |
2215 | cases, but it is unlikely to be worth it. */ | |
2216 | ||
2217 | static void | |
2218 | swap_ops_for_binary_stmt (VEC(operand_entry_t, heap) * ops, | |
2219 | unsigned int opindex, gimple stmt) | |
2220 | { | |
2221 | operand_entry_t oe1, oe2, oe3; | |
2222 | ||
2223 | oe1 = VEC_index (operand_entry_t, ops, opindex); | |
2224 | oe2 = VEC_index (operand_entry_t, ops, opindex + 1); | |
2225 | oe3 = VEC_index (operand_entry_t, ops, opindex + 2); | |
2226 | ||
2227 | if ((oe1->rank == oe2->rank | |
2228 | && oe2->rank != oe3->rank) | |
2229 | || (stmt && is_phi_for_stmt (stmt, oe3->op) | |
2230 | && !is_phi_for_stmt (stmt, oe1->op) | |
2231 | && !is_phi_for_stmt (stmt, oe2->op))) | |
2232 | { | |
2233 | struct operand_entry temp = *oe3; | |
2234 | oe3->op = oe1->op; | |
2235 | oe3->rank = oe1->rank; | |
2236 | oe1->op = temp.op; | |
2237 | oe1->rank= temp.rank; | |
2238 | } | |
2239 | else if ((oe1->rank == oe3->rank | |
2240 | && oe2->rank != oe3->rank) | |
2241 | || (stmt && is_phi_for_stmt (stmt, oe2->op) | |
2242 | && !is_phi_for_stmt (stmt, oe1->op) | |
2243 | && !is_phi_for_stmt (stmt, oe3->op))) | |
2244 | { | |
2245 | struct operand_entry temp = *oe2; | |
2246 | oe2->op = oe1->op; | |
2247 | oe2->rank = oe1->rank; | |
2248 | oe1->op = temp.op; | |
2249 | oe1->rank= temp.rank; | |
2250 | } | |
2251 | } | |
2252 | ||
0e0ed594 JL |
2253 | /* Recursively rewrite our linearized statements so that the operators |
2254 | match those in OPS[OPINDEX], putting the computation in rank | |
2255 | order. */ | |
2256 | ||
2257 | static void | |
726a989a | 2258 | rewrite_expr_tree (gimple stmt, unsigned int opindex, |
ec81df7d | 2259 | VEC(operand_entry_t, heap) * ops, bool moved) |
0e0ed594 | 2260 | { |
726a989a RB |
2261 | tree rhs1 = gimple_assign_rhs1 (stmt); |
2262 | tree rhs2 = gimple_assign_rhs2 (stmt); | |
0e0ed594 JL |
2263 | operand_entry_t oe; |
2264 | ||
df7b0cc4 EI |
2265 | /* If we have three operands left, then we want to make sure the ones |
2266 | that get the double binary op are chosen wisely. */ | |
0e0ed594 | 2267 | if (opindex + 3 == VEC_length (operand_entry_t, ops)) |
df7b0cc4 | 2268 | swap_ops_for_binary_stmt (ops, opindex, stmt); |
0e0ed594 JL |
2269 | |
2270 | /* The final recursion case for this function is that you have | |
2271 | exactly two operations left. | |
2272 | If we had one exactly one op in the entire list to start with, we | |
2273 | would have never called this function, and the tail recursion | |
2274 | rewrites them one at a time. */ | |
2275 | if (opindex + 2 == VEC_length (operand_entry_t, ops)) | |
2276 | { | |
2277 | operand_entry_t oe1, oe2; | |
2278 | ||
2279 | oe1 = VEC_index (operand_entry_t, ops, opindex); | |
2280 | oe2 = VEC_index (operand_entry_t, ops, opindex + 1); | |
2281 | ||
726a989a | 2282 | if (rhs1 != oe1->op || rhs2 != oe2->op) |
0e0ed594 | 2283 | { |
0e0ed594 JL |
2284 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2285 | { | |
2286 | fprintf (dump_file, "Transforming "); | |
726a989a | 2287 | print_gimple_stmt (dump_file, stmt, 0, 0); |
0e0ed594 JL |
2288 | } |
2289 | ||
726a989a RB |
2290 | gimple_assign_set_rhs1 (stmt, oe1->op); |
2291 | gimple_assign_set_rhs2 (stmt, oe2->op); | |
0e0ed594 | 2292 | update_stmt (stmt); |
ec81df7d JJ |
2293 | if (rhs1 != oe1->op && rhs1 != oe2->op) |
2294 | remove_visited_stmt_chain (rhs1); | |
0e0ed594 JL |
2295 | |
2296 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2297 | { | |
2298 | fprintf (dump_file, " into "); | |
726a989a | 2299 | print_gimple_stmt (dump_file, stmt, 0, 0); |
0e0ed594 | 2300 | } |
0e0ed594 JL |
2301 | } |
2302 | return; | |
2303 | } | |
2304 | ||
2305 | /* If we hit here, we should have 3 or more ops left. */ | |
2306 | gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops)); | |
2307 | ||
2308 | /* Rewrite the next operator. */ | |
2309 | oe = VEC_index (operand_entry_t, ops, opindex); | |
2310 | ||
726a989a | 2311 | if (oe->op != rhs2) |
0e0ed594 | 2312 | { |
ec81df7d JJ |
2313 | if (!moved) |
2314 | { | |
2315 | gimple_stmt_iterator gsinow, gsirhs1; | |
2316 | gimple stmt1 = stmt, stmt2; | |
2317 | unsigned int count; | |
2318 | ||
2319 | gsinow = gsi_for_stmt (stmt); | |
2320 | count = VEC_length (operand_entry_t, ops) - opindex - 2; | |
2321 | while (count-- != 0) | |
2322 | { | |
2323 | stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1)); | |
2324 | gsirhs1 = gsi_for_stmt (stmt2); | |
2325 | gsi_move_before (&gsirhs1, &gsinow); | |
2326 | gsi_prev (&gsinow); | |
2327 | stmt1 = stmt2; | |
2328 | } | |
2329 | moved = true; | |
2330 | } | |
0e0ed594 JL |
2331 | |
2332 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2333 | { | |
2334 | fprintf (dump_file, "Transforming "); | |
726a989a | 2335 | print_gimple_stmt (dump_file, stmt, 0, 0); |
0e0ed594 JL |
2336 | } |
2337 | ||
726a989a | 2338 | gimple_assign_set_rhs2 (stmt, oe->op); |
0e0ed594 JL |
2339 | update_stmt (stmt); |
2340 | ||
2341 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2342 | { | |
2343 | fprintf (dump_file, " into "); | |
726a989a | 2344 | print_gimple_stmt (dump_file, stmt, 0, 0); |
0e0ed594 JL |
2345 | } |
2346 | } | |
2347 | /* Recurse on the LHS of the binary operator, which is guaranteed to | |
2348 | be the non-leaf side. */ | |
ec81df7d | 2349 | rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved); |
0e0ed594 JL |
2350 | } |
2351 | ||
df7b0cc4 EI |
2352 | /* Find out how many cycles we need to compute statements chain. |
2353 | OPS_NUM holds number os statements in a chain. CPU_WIDTH is a | |
2354 | maximum number of independent statements we may execute per cycle. */ | |
2355 | ||
2356 | static int | |
2357 | get_required_cycles (int ops_num, int cpu_width) | |
2358 | { | |
2359 | int res; | |
2360 | int elog; | |
2361 | unsigned int rest; | |
2362 | ||
2363 | /* While we have more than 2 * cpu_width operands | |
2364 | we may reduce number of operands by cpu_width | |
2365 | per cycle. */ | |
2366 | res = ops_num / (2 * cpu_width); | |
2367 | ||
2368 | /* Remained operands count may be reduced twice per cycle | |
2369 | until we have only one operand. */ | |
2370 | rest = (unsigned)(ops_num - res * cpu_width); | |
2371 | elog = exact_log2 (rest); | |
2372 | if (elog >= 0) | |
2373 | res += elog; | |
2374 | else | |
2375 | res += floor_log2 (rest) + 1; | |
2376 | ||
2377 | return res; | |
2378 | } | |
2379 | ||
2380 | /* Returns an optimal number of registers to use for computation of | |
2381 | given statements. */ | |
2382 | ||
2383 | static int | |
2384 | get_reassociation_width (int ops_num, enum tree_code opc, | |
2385 | enum machine_mode mode) | |
2386 | { | |
2387 | int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH); | |
2388 | int width; | |
2389 | int width_min; | |
2390 | int cycles_best; | |
2391 | ||
2392 | if (param_width > 0) | |
2393 | width = param_width; | |
2394 | else | |
2395 | width = targetm.sched.reassociation_width (opc, mode); | |
2396 | ||
2397 | if (width == 1) | |
2398 | return width; | |
2399 | ||
2400 | /* Get the minimal time required for sequence computation. */ | |
2401 | cycles_best = get_required_cycles (ops_num, width); | |
2402 | ||
2403 | /* Check if we may use less width and still compute sequence for | |
2404 | the same time. It will allow us to reduce registers usage. | |
2405 | get_required_cycles is monotonically increasing with lower width | |
2406 | so we can perform a binary search for the minimal width that still | |
2407 | results in the optimal cycle count. */ | |
2408 | width_min = 1; | |
2409 | while (width > width_min) | |
2410 | { | |
2411 | int width_mid = (width + width_min) / 2; | |
2412 | ||
2413 | if (get_required_cycles (ops_num, width_mid) == cycles_best) | |
2414 | width = width_mid; | |
2415 | else if (width_min < width_mid) | |
2416 | width_min = width_mid; | |
2417 | else | |
2418 | break; | |
2419 | } | |
2420 | ||
2421 | return width; | |
2422 | } | |
2423 | ||
2424 | /* Recursively rewrite our linearized statements so that the operators | |
2425 | match those in OPS[OPINDEX], putting the computation in rank | |
2426 | order and trying to allow operations to be executed in | |
2427 | parallel. */ | |
2428 | ||
2429 | static void | |
2430 | rewrite_expr_tree_parallel (gimple stmt, int width, | |
2431 | VEC(operand_entry_t, heap) * ops) | |
2432 | { | |
2433 | enum tree_code opcode = gimple_assign_rhs_code (stmt); | |
2434 | int op_num = VEC_length (operand_entry_t, ops); | |
2435 | int stmt_num = op_num - 1; | |
2436 | gimple *stmts = XALLOCAVEC (gimple, stmt_num); | |
2437 | int op_index = op_num - 1; | |
2438 | int stmt_index = 0; | |
2439 | int ready_stmts_end = 0; | |
2440 | int i = 0; | |
2441 | tree last_rhs1 = gimple_assign_rhs1 (stmt); | |
df7b0cc4 EI |
2442 | |
2443 | /* We start expression rewriting from the top statements. | |
2444 | So, in this loop we create a full list of statements | |
2445 | we will work with. */ | |
2446 | stmts[stmt_num - 1] = stmt; | |
2447 | for (i = stmt_num - 2; i >= 0; i--) | |
2448 | stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1])); | |
2449 | ||
df7b0cc4 EI |
2450 | for (i = 0; i < stmt_num; i++) |
2451 | { | |
2452 | tree op1, op2; | |
2453 | ||
2454 | /* Determine whether we should use results of | |
2455 | already handled statements or not. */ | |
2456 | if (ready_stmts_end == 0 | |
2457 | && (i - stmt_index >= width || op_index < 1)) | |
2458 | ready_stmts_end = i; | |
2459 | ||
2460 | /* Now we choose operands for the next statement. Non zero | |
2461 | value in ready_stmts_end means here that we should use | |
2462 | the result of already generated statements as new operand. */ | |
2463 | if (ready_stmts_end > 0) | |
2464 | { | |
2465 | op1 = gimple_assign_lhs (stmts[stmt_index++]); | |
2466 | if (ready_stmts_end > stmt_index) | |
2467 | op2 = gimple_assign_lhs (stmts[stmt_index++]); | |
2468 | else if (op_index >= 0) | |
2469 | op2 = VEC_index (operand_entry_t, ops, op_index--)->op; | |
2470 | else | |
2471 | { | |
2472 | gcc_assert (stmt_index < i); | |
2473 | op2 = gimple_assign_lhs (stmts[stmt_index++]); | |
2474 | } | |
2475 | ||
2476 | if (stmt_index >= ready_stmts_end) | |
2477 | ready_stmts_end = 0; | |
2478 | } | |
2479 | else | |
2480 | { | |
2481 | if (op_index > 1) | |
2482 | swap_ops_for_binary_stmt (ops, op_index - 2, NULL); | |
2483 | op2 = VEC_index (operand_entry_t, ops, op_index--)->op; | |
2484 | op1 = VEC_index (operand_entry_t, ops, op_index--)->op; | |
2485 | } | |
2486 | ||
2487 | /* If we emit the last statement then we should put | |
2488 | operands into the last statement. It will also | |
2489 | break the loop. */ | |
2490 | if (op_index < 0 && stmt_index == i) | |
2491 | i = stmt_num - 1; | |
2492 | ||
2493 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2494 | { | |
2495 | fprintf (dump_file, "Transforming "); | |
2496 | print_gimple_stmt (dump_file, stmts[i], 0, 0); | |
2497 | } | |
2498 | ||
2499 | /* We keep original statement only for the last one. All | |
2500 | others are recreated. */ | |
2501 | if (i == stmt_num - 1) | |
2502 | { | |
2503 | gimple_assign_set_rhs1 (stmts[i], op1); | |
2504 | gimple_assign_set_rhs2 (stmts[i], op2); | |
2505 | update_stmt (stmts[i]); | |
2506 | } | |
2507 | else | |
83d5977e | 2508 | stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode); |
df7b0cc4 EI |
2509 | |
2510 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2511 | { | |
2512 | fprintf (dump_file, " into "); | |
2513 | print_gimple_stmt (dump_file, stmts[i], 0, 0); | |
2514 | } | |
2515 | } | |
2516 | ||
2517 | remove_visited_stmt_chain (last_rhs1); | |
2518 | } | |
2519 | ||
0e0ed594 JL |
2520 | /* Transform STMT, which is really (A +B) + (C + D) into the left |
2521 | linear form, ((A+B)+C)+D. | |
2522 | Recurse on D if necessary. */ | |
2523 | ||
2524 | static void | |
726a989a | 2525 | linearize_expr (gimple stmt) |
0e0ed594 | 2526 | { |
726a989a RB |
2527 | gimple_stmt_iterator gsinow, gsirhs; |
2528 | gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); | |
2529 | gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); | |
2530 | enum tree_code rhscode = gimple_assign_rhs_code (stmt); | |
2531 | gimple newbinrhs = NULL; | |
7a9c7d01 | 2532 | struct loop *loop = loop_containing_stmt (stmt); |
0e0ed594 | 2533 | |
726a989a RB |
2534 | gcc_assert (is_reassociable_op (binlhs, rhscode, loop) |
2535 | && is_reassociable_op (binrhs, rhscode, loop)); | |
2536 | ||
2537 | gsinow = gsi_for_stmt (stmt); | |
2538 | gsirhs = gsi_for_stmt (binrhs); | |
2539 | gsi_move_before (&gsirhs, &gsinow); | |
0e0ed594 | 2540 | |
726a989a RB |
2541 | gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs)); |
2542 | gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs)); | |
2543 | gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs)); | |
0e0ed594 | 2544 | |
726a989a RB |
2545 | if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) |
2546 | newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); | |
0e0ed594 JL |
2547 | |
2548 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2549 | { | |
2550 | fprintf (dump_file, "Linearized: "); | |
726a989a | 2551 | print_gimple_stmt (dump_file, stmt, 0, 0); |
0e0ed594 JL |
2552 | } |
2553 | ||
2554 | reassociate_stats.linearized++; | |
2555 | update_stmt (binrhs); | |
2556 | update_stmt (binlhs); | |
2557 | update_stmt (stmt); | |
726a989a RB |
2558 | |
2559 | gimple_set_visited (stmt, true); | |
2560 | gimple_set_visited (binlhs, true); | |
2561 | gimple_set_visited (binrhs, true); | |
0e0ed594 JL |
2562 | |
2563 | /* Tail recurse on the new rhs if it still needs reassociation. */ | |
7a9c7d01 | 2564 | if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop)) |
726a989a RB |
2565 | /* ??? This should probably be linearize_expr (newbinrhs) but I don't |
2566 | want to change the algorithm while converting to tuples. */ | |
0e0ed594 | 2567 | linearize_expr (stmt); |
0e0ed594 JL |
2568 | } |
2569 | ||
726a989a | 2570 | /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return |
0e0ed594 JL |
2571 | it. Otherwise, return NULL. */ |
2572 | ||
726a989a | 2573 | static gimple |
0e0ed594 JL |
2574 | get_single_immediate_use (tree lhs) |
2575 | { | |
2576 | use_operand_p immuse; | |
726a989a | 2577 | gimple immusestmt; |
0e0ed594 JL |
2578 | |
2579 | if (TREE_CODE (lhs) == SSA_NAME | |
726a989a RB |
2580 | && single_imm_use (lhs, &immuse, &immusestmt) |
2581 | && is_gimple_assign (immusestmt)) | |
2582 | return immusestmt; | |
2583 | ||
2584 | return NULL; | |
0e0ed594 | 2585 | } |
0e0ed594 | 2586 | |
0e0ed594 JL |
2587 | /* Recursively negate the value of TONEGATE, and return the SSA_NAME |
2588 | representing the negated value. Insertions of any necessary | |
726a989a | 2589 | instructions go before GSI. |
0e0ed594 JL |
2590 | This function is recursive in that, if you hand it "a_5" as the |
2591 | value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will | |
2592 | transform b_3 + b_4 into a_5 = -b_3 + -b_4. */ | |
2593 | ||
2594 | static tree | |
726a989a | 2595 | negate_value (tree tonegate, gimple_stmt_iterator *gsi) |
0e0ed594 | 2596 | { |
726a989a | 2597 | gimple negatedefstmt= NULL; |
0e0ed594 JL |
2598 | tree resultofnegate; |
2599 | ||
0e0ed594 JL |
2600 | /* If we are trying to negate a name, defined by an add, negate the |
2601 | add operands instead. */ | |
726a989a RB |
2602 | if (TREE_CODE (tonegate) == SSA_NAME) |
2603 | negatedefstmt = SSA_NAME_DEF_STMT (tonegate); | |
0e0ed594 | 2604 | if (TREE_CODE (tonegate) == SSA_NAME |
726a989a RB |
2605 | && is_gimple_assign (negatedefstmt) |
2606 | && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME | |
2607 | && has_single_use (gimple_assign_lhs (negatedefstmt)) | |
2608 | && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR) | |
0e0ed594 | 2609 | { |
726a989a RB |
2610 | gimple_stmt_iterator gsi; |
2611 | tree rhs1 = gimple_assign_rhs1 (negatedefstmt); | |
2612 | tree rhs2 = gimple_assign_rhs2 (negatedefstmt); | |
2613 | ||
2614 | gsi = gsi_for_stmt (negatedefstmt); | |
2615 | rhs1 = negate_value (rhs1, &gsi); | |
2616 | gimple_assign_set_rhs1 (negatedefstmt, rhs1); | |
2617 | ||
2618 | gsi = gsi_for_stmt (negatedefstmt); | |
2619 | rhs2 = negate_value (rhs2, &gsi); | |
2620 | gimple_assign_set_rhs2 (negatedefstmt, rhs2); | |
2621 | ||
2622 | update_stmt (negatedefstmt); | |
2623 | return gimple_assign_lhs (negatedefstmt); | |
0e0ed594 JL |
2624 | } |
2625 | ||
2626 | tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate); | |
726a989a RB |
2627 | resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true, |
2628 | NULL_TREE, true, GSI_SAME_STMT); | |
0e0ed594 | 2629 | return resultofnegate; |
0e0ed594 JL |
2630 | } |
2631 | ||
2632 | /* Return true if we should break up the subtract in STMT into an add | |
2633 | with negate. This is true when we the subtract operands are really | |
2634 | adds, or the subtract itself is used in an add expression. In | |
2635 | either case, breaking up the subtract into an add with negate | |
2636 | exposes the adds to reassociation. */ | |
2637 | ||
2638 | static bool | |
726a989a | 2639 | should_break_up_subtract (gimple stmt) |
0e0ed594 | 2640 | { |
726a989a RB |
2641 | tree lhs = gimple_assign_lhs (stmt); |
2642 | tree binlhs = gimple_assign_rhs1 (stmt); | |
2643 | tree binrhs = gimple_assign_rhs2 (stmt); | |
2644 | gimple immusestmt; | |
7a9c7d01 | 2645 | struct loop *loop = loop_containing_stmt (stmt); |
0e0ed594 JL |
2646 | |
2647 | if (TREE_CODE (binlhs) == SSA_NAME | |
7a9c7d01 | 2648 | && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) |
0e0ed594 JL |
2649 | return true; |
2650 | ||
2651 | if (TREE_CODE (binrhs) == SSA_NAME | |
7a9c7d01 | 2652 | && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop)) |
0e0ed594 JL |
2653 | return true; |
2654 | ||
2655 | if (TREE_CODE (lhs) == SSA_NAME | |
2656 | && (immusestmt = get_single_immediate_use (lhs)) | |
726a989a | 2657 | && is_gimple_assign (immusestmt) |
25c6036a RG |
2658 | && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR |
2659 | || gimple_assign_rhs_code (immusestmt) == MULT_EXPR)) | |
0e0ed594 JL |
2660 | return true; |
2661 | return false; | |
0e0ed594 JL |
2662 | } |
2663 | ||
2664 | /* Transform STMT from A - B into A + -B. */ | |
2665 | ||
2666 | static void | |
726a989a | 2667 | break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip) |
0e0ed594 | 2668 | { |
726a989a RB |
2669 | tree rhs1 = gimple_assign_rhs1 (stmt); |
2670 | tree rhs2 = gimple_assign_rhs2 (stmt); | |
0e0ed594 JL |
2671 | |
2672 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2673 | { | |
2674 | fprintf (dump_file, "Breaking up subtract "); | |
726a989a | 2675 | print_gimple_stmt (dump_file, stmt, 0, 0); |
0e0ed594 JL |
2676 | } |
2677 | ||
726a989a RB |
2678 | rhs2 = negate_value (rhs2, gsip); |
2679 | gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2); | |
0e0ed594 JL |
2680 | update_stmt (stmt); |
2681 | } | |
2682 | ||
a6f8851e BS |
2683 | /* Determine whether STMT is a builtin call that raises an SSA name |
2684 | to an integer power and has only one use. If so, and this is early | |
2685 | reassociation and unsafe math optimizations are permitted, place | |
2686 | the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE. | |
2687 | If any of these conditions does not hold, return FALSE. */ | |
2688 | ||
2689 | static bool | |
2690 | acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent) | |
2691 | { | |
2692 | tree fndecl, arg1; | |
2693 | REAL_VALUE_TYPE c, cint; | |
2694 | ||
2695 | if (!first_pass_instance | |
2696 | || !flag_unsafe_math_optimizations | |
2697 | || !is_gimple_call (stmt) | |
2698 | || !has_single_use (gimple_call_lhs (stmt))) | |
2699 | return false; | |
2700 | ||
2701 | fndecl = gimple_call_fndecl (stmt); | |
2702 | ||
2703 | if (!fndecl | |
2704 | || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) | |
2705 | return false; | |
2706 | ||
2707 | switch (DECL_FUNCTION_CODE (fndecl)) | |
2708 | { | |
2709 | CASE_FLT_FN (BUILT_IN_POW): | |
2710 | *base = gimple_call_arg (stmt, 0); | |
2711 | arg1 = gimple_call_arg (stmt, 1); | |
2712 | ||
2713 | if (TREE_CODE (arg1) != REAL_CST) | |
2714 | return false; | |
2715 | ||
2716 | c = TREE_REAL_CST (arg1); | |
2717 | ||
2718 | if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT) | |
2719 | return false; | |
2720 | ||
2721 | *exponent = real_to_integer (&c); | |
2722 | real_from_integer (&cint, VOIDmode, *exponent, | |
2723 | *exponent < 0 ? -1 : 0, 0); | |
2724 | if (!real_identical (&c, &cint)) | |
2725 | return false; | |
2726 | ||
2727 | break; | |
2728 | ||
2729 | CASE_FLT_FN (BUILT_IN_POWI): | |
2730 | *base = gimple_call_arg (stmt, 0); | |
2731 | arg1 = gimple_call_arg (stmt, 1); | |
2732 | ||
2733 | if (!host_integerp (arg1, 0)) | |
2734 | return false; | |
2735 | ||
2736 | *exponent = TREE_INT_CST_LOW (arg1); | |
2737 | break; | |
2738 | ||
2739 | default: | |
2740 | return false; | |
2741 | } | |
2742 | ||
2743 | /* Expanding negative exponents is generally unproductive, so we don't | |
2744 | complicate matters with those. Exponents of zero and one should | |
2745 | have been handled by expression folding. */ | |
2746 | if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME) | |
2747 | return false; | |
2748 | ||
2749 | return true; | |
2750 | } | |
2751 | ||
0e0ed594 JL |
2752 | /* Recursively linearize a binary expression that is the RHS of STMT. |
2753 | Place the operands of the expression tree in the vector named OPS. */ | |
2754 | ||
2755 | static void | |
25c6036a RG |
2756 | linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt, |
2757 | bool is_associative, bool set_visited) | |
0e0ed594 | 2758 | { |
726a989a RB |
2759 | tree binlhs = gimple_assign_rhs1 (stmt); |
2760 | tree binrhs = gimple_assign_rhs2 (stmt); | |
a6f8851e | 2761 | gimple binlhsdef = NULL, binrhsdef = NULL; |
0e0ed594 JL |
2762 | bool binlhsisreassoc = false; |
2763 | bool binrhsisreassoc = false; | |
726a989a | 2764 | enum tree_code rhscode = gimple_assign_rhs_code (stmt); |
7a9c7d01 | 2765 | struct loop *loop = loop_containing_stmt (stmt); |
a6f8851e BS |
2766 | tree base = NULL_TREE; |
2767 | HOST_WIDE_INT exponent = 0; | |
0e0ed594 | 2768 | |
25c6036a RG |
2769 | if (set_visited) |
2770 | gimple_set_visited (stmt, true); | |
0e0ed594 JL |
2771 | |
2772 | if (TREE_CODE (binlhs) == SSA_NAME) | |
2773 | { | |
2774 | binlhsdef = SSA_NAME_DEF_STMT (binlhs); | |
6b03de57 RG |
2775 | binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop) |
2776 | && !stmt_could_throw_p (binlhsdef)); | |
0e0ed594 JL |
2777 | } |
2778 | ||
2779 | if (TREE_CODE (binrhs) == SSA_NAME) | |
2780 | { | |
2781 | binrhsdef = SSA_NAME_DEF_STMT (binrhs); | |
6b03de57 RG |
2782 | binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop) |
2783 | && !stmt_could_throw_p (binrhsdef)); | |
0e0ed594 JL |
2784 | } |
2785 | ||
2786 | /* If the LHS is not reassociable, but the RHS is, we need to swap | |
2787 | them. If neither is reassociable, there is nothing we can do, so | |
2788 | just put them in the ops vector. If the LHS is reassociable, | |
2789 | linearize it. If both are reassociable, then linearize the RHS | |
2790 | and the LHS. */ | |
2791 | ||
2792 | if (!binlhsisreassoc) | |
2793 | { | |
2794 | tree temp; | |
2795 | ||
25c6036a RG |
2796 | /* If this is not a associative operation like division, give up. */ |
2797 | if (!is_associative) | |
2798 | { | |
2799 | add_to_ops_vec (ops, binrhs); | |
2800 | return; | |
2801 | } | |
2802 | ||
0e0ed594 JL |
2803 | if (!binrhsisreassoc) |
2804 | { | |
a6f8851e BS |
2805 | if (rhscode == MULT_EXPR |
2806 | && TREE_CODE (binrhs) == SSA_NAME | |
2807 | && acceptable_pow_call (binrhsdef, &base, &exponent)) | |
2808 | { | |
2809 | add_repeat_to_ops_vec (ops, base, exponent); | |
2810 | gimple_set_visited (binrhsdef, true); | |
2811 | } | |
2812 | else | |
2813 | add_to_ops_vec (ops, binrhs); | |
2814 | ||
2815 | if (rhscode == MULT_EXPR | |
2816 | && TREE_CODE (binlhs) == SSA_NAME | |
2817 | && acceptable_pow_call (binlhsdef, &base, &exponent)) | |
2818 | { | |
2819 | add_repeat_to_ops_vec (ops, base, exponent); | |
2820 | gimple_set_visited (binlhsdef, true); | |
2821 | } | |
2822 | else | |
2823 | add_to_ops_vec (ops, binlhs); | |
2824 | ||
0e0ed594 JL |
2825 | return; |
2826 | } | |
2827 | ||
2828 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2829 | { | |
2830 | fprintf (dump_file, "swapping operands of "); | |
726a989a | 2831 | print_gimple_stmt (dump_file, stmt, 0, 0); |
0e0ed594 JL |
2832 | } |
2833 | ||
726a989a RB |
2834 | swap_tree_operands (stmt, |
2835 | gimple_assign_rhs1_ptr (stmt), | |
2836 | gimple_assign_rhs2_ptr (stmt)); | |
0e0ed594 JL |
2837 | update_stmt (stmt); |
2838 | ||
2839 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2840 | { | |
2841 | fprintf (dump_file, " is now "); | |
726a989a | 2842 | print_gimple_stmt (dump_file, stmt, 0, 0); |
0e0ed594 JL |
2843 | } |
2844 | ||
2845 | /* We want to make it so the lhs is always the reassociative op, | |
2846 | so swap. */ | |
2847 | temp = binlhs; | |
2848 | binlhs = binrhs; | |
2849 | binrhs = temp; | |
2850 | } | |
2851 | else if (binrhsisreassoc) | |
2852 | { | |
2853 | linearize_expr (stmt); | |
726a989a RB |
2854 | binlhs = gimple_assign_rhs1 (stmt); |
2855 | binrhs = gimple_assign_rhs2 (stmt); | |
0e0ed594 JL |
2856 | } |
2857 | ||
2858 | gcc_assert (TREE_CODE (binrhs) != SSA_NAME | |
7a9c7d01 ZD |
2859 | || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), |
2860 | rhscode, loop)); | |
25c6036a RG |
2861 | linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs), |
2862 | is_associative, set_visited); | |
a6f8851e BS |
2863 | |
2864 | if (rhscode == MULT_EXPR | |
2865 | && TREE_CODE (binrhs) == SSA_NAME | |
2866 | && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent)) | |
2867 | { | |
2868 | add_repeat_to_ops_vec (ops, base, exponent); | |
2869 | gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true); | |
2870 | } | |
2871 | else | |
2872 | add_to_ops_vec (ops, binrhs); | |
0e0ed594 JL |
2873 | } |
2874 | ||
2875 | /* Repropagate the negates back into subtracts, since no other pass | |
2876 | currently does it. */ | |
2877 | ||
2878 | static void | |
2879 | repropagate_negates (void) | |
2880 | { | |
2881 | unsigned int i = 0; | |
2882 | tree negate; | |
2883 | ||
ac47786e | 2884 | FOR_EACH_VEC_ELT (tree, plus_negates, i, negate) |
0e0ed594 | 2885 | { |
726a989a | 2886 | gimple user = get_single_immediate_use (negate); |
0e0ed594 | 2887 | |
143597ff MM |
2888 | if (!user || !is_gimple_assign (user)) |
2889 | continue; | |
2890 | ||
0e0ed594 JL |
2891 | /* The negate operand can be either operand of a PLUS_EXPR |
2892 | (it can be the LHS if the RHS is a constant for example). | |
2893 | ||
2894 | Force the negate operand to the RHS of the PLUS_EXPR, then | |
2895 | transform the PLUS_EXPR into a MINUS_EXPR. */ | |
143597ff | 2896 | if (gimple_assign_rhs_code (user) == PLUS_EXPR) |
0e0ed594 | 2897 | { |
0e0ed594 JL |
2898 | /* If the negated operand appears on the LHS of the |
2899 | PLUS_EXPR, exchange the operands of the PLUS_EXPR | |
2900 | to force the negated operand to the RHS of the PLUS_EXPR. */ | |
726a989a | 2901 | if (gimple_assign_rhs1 (user) == negate) |
0e0ed594 | 2902 | { |
726a989a RB |
2903 | swap_tree_operands (user, |
2904 | gimple_assign_rhs1_ptr (user), | |
2905 | gimple_assign_rhs2_ptr (user)); | |
0e0ed594 JL |
2906 | } |
2907 | ||
2908 | /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace | |
2909 | the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */ | |
726a989a | 2910 | if (gimple_assign_rhs2 (user) == negate) |
0e0ed594 | 2911 | { |
726a989a RB |
2912 | tree rhs1 = gimple_assign_rhs1 (user); |
2913 | tree rhs2 = get_unary_op (negate, NEGATE_EXPR); | |
2914 | gimple_stmt_iterator gsi = gsi_for_stmt (user); | |
2915 | gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2); | |
0e0ed594 JL |
2916 | update_stmt (user); |
2917 | } | |
2918 | } | |
143597ff MM |
2919 | else if (gimple_assign_rhs_code (user) == MINUS_EXPR) |
2920 | { | |
2921 | if (gimple_assign_rhs1 (user) == negate) | |
2922 | { | |
2923 | /* We have | |
2924 | x = -a | |
2925 | y = x - b | |
2926 | which we transform into | |
2927 | x = a + b | |
2928 | y = -x . | |
2929 | This pushes down the negate which we possibly can merge | |
2930 | into some other operation, hence insert it into the | |
2931 | plus_negates vector. */ | |
2932 | gimple feed = SSA_NAME_DEF_STMT (negate); | |
2933 | tree a = gimple_assign_rhs1 (feed); | |
2934 | tree rhs2 = gimple_assign_rhs2 (user); | |
2935 | gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2; | |
2936 | gimple_replace_lhs (feed, negate); | |
2937 | gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2); | |
2938 | update_stmt (gsi_stmt (gsi)); | |
2939 | gsi2 = gsi_for_stmt (user); | |
2940 | gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL); | |
2941 | update_stmt (gsi_stmt (gsi2)); | |
2942 | gsi_move_before (&gsi, &gsi2); | |
2943 | VEC_safe_push (tree, heap, plus_negates, | |
2944 | gimple_assign_lhs (gsi_stmt (gsi2))); | |
2945 | } | |
2946 | else | |
2947 | { | |
2948 | /* Transform "x = -a; y = b - x" into "y = b + a", getting | |
2949 | rid of one operation. */ | |
2950 | gimple feed = SSA_NAME_DEF_STMT (negate); | |
2951 | tree a = gimple_assign_rhs1 (feed); | |
2952 | tree rhs1 = gimple_assign_rhs1 (user); | |
2953 | gimple_stmt_iterator gsi = gsi_for_stmt (user); | |
2954 | gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a); | |
2955 | update_stmt (gsi_stmt (gsi)); | |
2956 | } | |
2957 | } | |
0e0ed594 JL |
2958 | } |
2959 | } | |
2960 | ||
143597ff MM |
2961 | /* Returns true if OP is of a type for which we can do reassociation. |
2962 | That is for integral or non-saturating fixed-point types, and for | |
2963 | floating point type when associative-math is enabled. */ | |
2964 | ||
2965 | static bool | |
2966 | can_reassociate_p (tree op) | |
2967 | { | |
2968 | tree type = TREE_TYPE (op); | |
2d698d3b | 2969 | if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)) |
143597ff | 2970 | || NON_SAT_FIXED_POINT_TYPE_P (type) |
8a9ecffd | 2971 | || (flag_associative_math && FLOAT_TYPE_P (type))) |
143597ff MM |
2972 | return true; |
2973 | return false; | |
2974 | } | |
2975 | ||
0e0ed594 JL |
2976 | /* Break up subtract operations in block BB. |
2977 | ||
2978 | We do this top down because we don't know whether the subtract is | |
2979 | part of a possible chain of reassociation except at the top. | |
b8698a0f | 2980 | |
0e0ed594 JL |
2981 | IE given |
2982 | d = f + g | |
2983 | c = a + e | |
2984 | b = c - d | |
2985 | q = b - r | |
2986 | k = t - q | |
b8698a0f | 2987 | |
0e0ed594 | 2988 | we want to break up k = t - q, but we won't until we've transformed q |
726a989a RB |
2989 | = b - r, which won't be broken up until we transform b = c - d. |
2990 | ||
2991 | En passant, clear the GIMPLE visited flag on every statement. */ | |
0e0ed594 JL |
2992 | |
2993 | static void | |
2994 | break_up_subtract_bb (basic_block bb) | |
2995 | { | |
726a989a | 2996 | gimple_stmt_iterator gsi; |
0e0ed594 JL |
2997 | basic_block son; |
2998 | ||
726a989a | 2999 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
0e0ed594 | 3000 | { |
726a989a RB |
3001 | gimple stmt = gsi_stmt (gsi); |
3002 | gimple_set_visited (stmt, false); | |
0e0ed594 | 3003 | |
143597ff MM |
3004 | if (!is_gimple_assign (stmt) |
3005 | || !can_reassociate_p (gimple_assign_lhs (stmt))) | |
3006 | continue; | |
3007 | ||
726a989a | 3008 | /* Look for simple gimple subtract operations. */ |
143597ff | 3009 | if (gimple_assign_rhs_code (stmt) == MINUS_EXPR) |
0e0ed594 | 3010 | { |
143597ff MM |
3011 | if (!can_reassociate_p (gimple_assign_rhs1 (stmt)) |
3012 | || !can_reassociate_p (gimple_assign_rhs2 (stmt))) | |
0e0ed594 JL |
3013 | continue; |
3014 | ||
3015 | /* Check for a subtract used only in an addition. If this | |
3016 | is the case, transform it into add of a negate for better | |
3017 | reassociation. IE transform C = A-B into C = A + -B if C | |
3018 | is only used in an addition. */ | |
726a989a RB |
3019 | if (should_break_up_subtract (stmt)) |
3020 | break_up_subtract (stmt, &gsi); | |
0e0ed594 | 3021 | } |
143597ff MM |
3022 | else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR |
3023 | && can_reassociate_p (gimple_assign_rhs1 (stmt))) | |
3024 | VEC_safe_push (tree, heap, plus_negates, gimple_assign_lhs (stmt)); | |
0e0ed594 JL |
3025 | } |
3026 | for (son = first_dom_son (CDI_DOMINATORS, bb); | |
3027 | son; | |
3028 | son = next_dom_son (CDI_DOMINATORS, son)) | |
3029 | break_up_subtract_bb (son); | |
3030 | } | |
3031 | ||
a6f8851e BS |
3032 | /* Used for repeated factor analysis. */ |
3033 | struct repeat_factor_d | |
3034 | { | |
3035 | /* An SSA name that occurs in a multiply chain. */ | |
3036 | tree factor; | |
3037 | ||
3038 | /* Cached rank of the factor. */ | |
3039 | unsigned rank; | |
3040 | ||
3041 | /* Number of occurrences of the factor in the chain. */ | |
3042 | HOST_WIDE_INT count; | |
3043 | ||
3044 | /* An SSA name representing the product of this factor and | |
3045 | all factors appearing later in the repeated factor vector. */ | |
3046 | tree repr; | |
3047 | }; | |
3048 | ||
3049 | typedef struct repeat_factor_d repeat_factor, *repeat_factor_t; | |
3050 | typedef const struct repeat_factor_d *const_repeat_factor_t; | |
3051 | ||
3052 | DEF_VEC_O (repeat_factor); | |
3053 | DEF_VEC_ALLOC_O (repeat_factor, heap); | |
3054 | ||
3055 | static VEC (repeat_factor, heap) *repeat_factor_vec; | |
3056 | ||
3057 | /* Used for sorting the repeat factor vector. Sort primarily by | |
3058 | ascending occurrence count, secondarily by descending rank. */ | |
3059 | ||
3060 | static int | |
3061 | compare_repeat_factors (const void *x1, const void *x2) | |
3062 | { | |
3063 | const_repeat_factor_t rf1 = (const_repeat_factor_t) x1; | |
3064 | const_repeat_factor_t rf2 = (const_repeat_factor_t) x2; | |
3065 | ||
3066 | if (rf1->count != rf2->count) | |
3067 | return rf1->count - rf2->count; | |
3068 | ||
3069 | return rf2->rank - rf1->rank; | |
3070 | } | |
3071 | ||
a6f8851e BS |
3072 | /* Look for repeated operands in OPS in the multiply tree rooted at |
3073 | STMT. Replace them with an optimal sequence of multiplies and powi | |
917a5202 BS |
3074 | builtin calls, and remove the used operands from OPS. Return an |
3075 | SSA name representing the value of the replacement sequence. */ | |
a6f8851e | 3076 | |
917a5202 | 3077 | static tree |
83d5977e | 3078 | attempt_builtin_powi (gimple stmt, VEC(operand_entry_t, heap) **ops) |
a6f8851e BS |
3079 | { |
3080 | unsigned i, j, vec_len; | |
3081 | int ii; | |
3082 | operand_entry_t oe; | |
3083 | repeat_factor_t rf1, rf2; | |
3084 | repeat_factor rfnew; | |
917a5202 | 3085 | tree result = NULL_TREE; |
a6f8851e BS |
3086 | tree target_ssa, iter_result; |
3087 | tree type = TREE_TYPE (gimple_get_lhs (stmt)); | |
3088 | tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI); | |
3089 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); | |
3090 | gimple mul_stmt, pow_stmt; | |
3091 | ||
3092 | /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and | |
3093 | target. */ | |
3094 | if (!powi_fndecl) | |
917a5202 | 3095 | return NULL_TREE; |
a6f8851e BS |
3096 | |
3097 | /* Allocate the repeated factor vector. */ | |
3098 | repeat_factor_vec = VEC_alloc (repeat_factor, heap, 10); | |
3099 | ||
3100 | /* Scan the OPS vector for all SSA names in the product and build | |
3101 | up a vector of occurrence counts for each factor. */ | |
3102 | FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe) | |
3103 | { | |
3104 | if (TREE_CODE (oe->op) == SSA_NAME) | |
3105 | { | |
3106 | FOR_EACH_VEC_ELT (repeat_factor, repeat_factor_vec, j, rf1) | |
3107 | { | |
3108 | if (rf1->factor == oe->op) | |
3109 | { | |
3110 | rf1->count += oe->count; | |
3111 | break; | |
3112 | } | |
3113 | } | |
3114 | ||
3115 | if (j >= VEC_length (repeat_factor, repeat_factor_vec)) | |
3116 | { | |
3117 | rfnew.factor = oe->op; | |
3118 | rfnew.rank = oe->rank; | |
3119 | rfnew.count = oe->count; | |
3120 | rfnew.repr = NULL_TREE; | |
3121 | VEC_safe_push (repeat_factor, heap, repeat_factor_vec, &rfnew); | |
3122 | } | |
3123 | } | |
3124 | } | |
3125 | ||
3126 | /* Sort the repeated factor vector by (a) increasing occurrence count, | |
3127 | and (b) decreasing rank. */ | |
3128 | VEC_qsort (repeat_factor, repeat_factor_vec, compare_repeat_factors); | |
3129 | ||
3130 | /* It is generally best to combine as many base factors as possible | |
3131 | into a product before applying __builtin_powi to the result. | |
3132 | However, the sort order chosen for the repeated factor vector | |
3133 | allows us to cache partial results for the product of the base | |
3134 | factors for subsequent use. When we already have a cached partial | |
3135 | result from a previous iteration, it is best to make use of it | |
3136 | before looking for another __builtin_pow opportunity. | |
3137 | ||
3138 | As an example, consider x * x * y * y * y * z * z * z * z. | |
3139 | We want to first compose the product x * y * z, raise it to the | |
3140 | second power, then multiply this by y * z, and finally multiply | |
3141 | by z. This can be done in 5 multiplies provided we cache y * z | |
3142 | for use in both expressions: | |
3143 | ||
3144 | t1 = y * z | |
3145 | t2 = t1 * x | |
3146 | t3 = t2 * t2 | |
3147 | t4 = t1 * t3 | |
3148 | result = t4 * z | |
3149 | ||
3150 | If we instead ignored the cached y * z and first multiplied by | |
3151 | the __builtin_pow opportunity z * z, we would get the inferior: | |
3152 | ||
3153 | t1 = y * z | |
3154 | t2 = t1 * x | |
3155 | t3 = t2 * t2 | |
3156 | t4 = z * z | |
3157 | t5 = t3 * t4 | |
3158 | result = t5 * y */ | |
3159 | ||
3160 | vec_len = VEC_length (repeat_factor, repeat_factor_vec); | |
3161 | ||
3162 | /* Repeatedly look for opportunities to create a builtin_powi call. */ | |
3163 | while (true) | |
3164 | { | |
3165 | HOST_WIDE_INT power; | |
3166 | ||
3167 | /* First look for the largest cached product of factors from | |
3168 | preceding iterations. If found, create a builtin_powi for | |
3169 | it if the minimum occurrence count for its factors is at | |
3170 | least 2, or just use this cached product as our next | |
3171 | multiplicand if the minimum occurrence count is 1. */ | |
3172 | FOR_EACH_VEC_ELT (repeat_factor, repeat_factor_vec, j, rf1) | |
3173 | { | |
3174 | if (rf1->repr && rf1->count > 0) | |
3175 | break; | |
3176 | } | |
3177 | ||
3178 | if (j < vec_len) | |
3179 | { | |
3180 | power = rf1->count; | |
3181 | ||
3182 | if (power == 1) | |
3183 | { | |
3184 | iter_result = rf1->repr; | |
3185 | ||
3186 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3187 | { | |
3188 | unsigned elt; | |
3189 | repeat_factor_t rf; | |
3190 | fputs ("Multiplying by cached product ", dump_file); | |
3191 | for (elt = j; elt < vec_len; elt++) | |
3192 | { | |
3193 | rf = VEC_index (repeat_factor, repeat_factor_vec, elt); | |
3194 | print_generic_expr (dump_file, rf->factor, 0); | |
3195 | if (elt < vec_len - 1) | |
3196 | fputs (" * ", dump_file); | |
3197 | } | |
3198 | fputs ("\n", dump_file); | |
3199 | } | |
3200 | } | |
3201 | else | |
3202 | { | |
83d5977e | 3203 | iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); |
a6f8851e BS |
3204 | pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, |
3205 | build_int_cst (integer_type_node, | |
3206 | power)); | |
3207 | gimple_call_set_lhs (pow_stmt, iter_result); | |
3208 | gimple_set_location (pow_stmt, gimple_location (stmt)); | |
3209 | gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); | |
3210 | ||
3211 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3212 | { | |
3213 | unsigned elt; | |
3214 | repeat_factor_t rf; | |
3215 | fputs ("Building __builtin_pow call for cached product (", | |
3216 | dump_file); | |
3217 | for (elt = j; elt < vec_len; elt++) | |
3218 | { | |
3219 | rf = VEC_index (repeat_factor, repeat_factor_vec, elt); | |
3220 | print_generic_expr (dump_file, rf->factor, 0); | |
3221 | if (elt < vec_len - 1) | |
3222 | fputs (" * ", dump_file); | |
3223 | } | |
1ede5f2c BS |
3224 | fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", |
3225 | power); | |
a6f8851e BS |
3226 | } |
3227 | } | |
3228 | } | |
3229 | else | |
3230 | { | |
3231 | /* Otherwise, find the first factor in the repeated factor | |
3232 | vector whose occurrence count is at least 2. If no such | |
3233 | factor exists, there are no builtin_powi opportunities | |
3234 | remaining. */ | |
3235 | FOR_EACH_VEC_ELT (repeat_factor, repeat_factor_vec, j, rf1) | |
3236 | { | |
3237 | if (rf1->count >= 2) | |
3238 | break; | |
3239 | } | |
3240 | ||
3241 | if (j >= vec_len) | |
3242 | break; | |
3243 | ||
3244 | power = rf1->count; | |
3245 | ||
3246 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3247 | { | |
3248 | unsigned elt; | |
3249 | repeat_factor_t rf; | |
3250 | fputs ("Building __builtin_pow call for (", dump_file); | |
3251 | for (elt = j; elt < vec_len; elt++) | |
3252 | { | |
3253 | rf = VEC_index (repeat_factor, repeat_factor_vec, elt); | |
3254 | print_generic_expr (dump_file, rf->factor, 0); | |
3255 | if (elt < vec_len - 1) | |
3256 | fputs (" * ", dump_file); | |
3257 | } | |
1ede5f2c | 3258 | fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power); |
a6f8851e BS |
3259 | } |
3260 | ||
3261 | reassociate_stats.pows_created++; | |
3262 | ||
3263 | /* Visit each element of the vector in reverse order (so that | |
3264 | high-occurrence elements are visited first, and within the | |
3265 | same occurrence count, lower-ranked elements are visited | |
3266 | first). Form a linear product of all elements in this order | |
3267 | whose occurrencce count is at least that of element J. | |
3268 | Record the SSA name representing the product of each element | |
3269 | with all subsequent elements in the vector. */ | |
3270 | if (j == vec_len - 1) | |
3271 | rf1->repr = rf1->factor; | |
3272 | else | |
3273 | { | |
3274 | for (ii = vec_len - 2; ii >= (int)j; ii--) | |
3275 | { | |
3276 | tree op1, op2; | |
3277 | ||
3278 | rf1 = VEC_index (repeat_factor, repeat_factor_vec, ii); | |
3279 | rf2 = VEC_index (repeat_factor, repeat_factor_vec, ii + 1); | |
3280 | ||
3281 | /* Init the last factor's representative to be itself. */ | |
3282 | if (!rf2->repr) | |
3283 | rf2->repr = rf2->factor; | |
3284 | ||
3285 | op1 = rf1->factor; | |
3286 | op2 = rf2->repr; | |
3287 | ||
83d5977e | 3288 | target_ssa = make_temp_ssa_name (type, NULL, "reassocpow"); |
a6f8851e BS |
3289 | mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, |
3290 | target_ssa, | |
3291 | op1, op2); | |
3292 | gimple_set_location (mul_stmt, gimple_location (stmt)); | |
3293 | gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); | |
3294 | rf1->repr = target_ssa; | |
3295 | ||
3296 | /* Don't reprocess the multiply we just introduced. */ | |
3297 | gimple_set_visited (mul_stmt, true); | |
3298 | } | |
3299 | } | |
3300 | ||
3301 | /* Form a call to __builtin_powi for the maximum product | |
3302 | just formed, raised to the power obtained earlier. */ | |
3303 | rf1 = VEC_index (repeat_factor, repeat_factor_vec, j); | |
83d5977e | 3304 | iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); |
a6f8851e BS |
3305 | pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, |
3306 | build_int_cst (integer_type_node, | |
3307 | power)); | |
3308 | gimple_call_set_lhs (pow_stmt, iter_result); | |
3309 | gimple_set_location (pow_stmt, gimple_location (stmt)); | |
3310 | gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); | |
3311 | } | |
3312 | ||
917a5202 BS |
3313 | /* If we previously formed at least one other builtin_powi call, |
3314 | form the product of this one and those others. */ | |
3315 | if (result) | |
3316 | { | |
83d5977e | 3317 | tree new_result = make_temp_ssa_name (type, NULL, "reassocpow"); |
917a5202 BS |
3318 | mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result, |
3319 | result, iter_result); | |
3320 | gimple_set_location (mul_stmt, gimple_location (stmt)); | |
3321 | gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); | |
3322 | gimple_set_visited (mul_stmt, true); | |
3323 | result = new_result; | |
3324 | } | |
3325 | else | |
3326 | result = iter_result; | |
a6f8851e BS |
3327 | |
3328 | /* Decrement the occurrence count of each element in the product | |
3329 | by the count found above, and remove this many copies of each | |
3330 | factor from OPS. */ | |
3331 | for (i = j; i < vec_len; i++) | |
3332 | { | |
3333 | unsigned k = power; | |
3334 | unsigned n; | |
3335 | ||
3336 | rf1 = VEC_index (repeat_factor, repeat_factor_vec, i); | |
3337 | rf1->count -= power; | |
3338 | ||
3339 | FOR_EACH_VEC_ELT_REVERSE (operand_entry_t, *ops, n, oe) | |
3340 | { | |
3341 | if (oe->op == rf1->factor) | |
3342 | { | |
3343 | if (oe->count <= k) | |
3344 | { | |
3345 | VEC_ordered_remove (operand_entry_t, *ops, n); | |
3346 | k -= oe->count; | |
3347 | ||
3348 | if (k == 0) | |
3349 | break; | |
3350 | } | |
3351 | else | |
3352 | { | |
3353 | oe->count -= k; | |
3354 | break; | |
3355 | } | |
3356 | } | |
3357 | } | |
3358 | } | |
3359 | } | |
3360 | ||
3361 | /* At this point all elements in the repeated factor vector have a | |
3362 | remaining occurrence count of 0 or 1, and those with a count of 1 | |
3363 | don't have cached representatives. Re-sort the ops vector and | |
3364 | clean up. */ | |
3365 | VEC_qsort (operand_entry_t, *ops, sort_by_operand_rank); | |
3366 | VEC_free (repeat_factor, heap, repeat_factor_vec); | |
917a5202 BS |
3367 | |
3368 | /* Return the final product computed herein. Note that there may | |
3369 | still be some elements with single occurrence count left in OPS; | |
3370 | those will be handled by the normal reassociation logic. */ | |
3371 | return result; | |
3372 | } | |
3373 | ||
3374 | /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */ | |
3375 | ||
3376 | static void | |
3377 | transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs) | |
3378 | { | |
3379 | tree rhs1; | |
3380 | ||
3381 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3382 | { | |
3383 | fprintf (dump_file, "Transforming "); | |
3384 | print_gimple_stmt (dump_file, stmt, 0, 0); | |
3385 | } | |
3386 | ||
3387 | rhs1 = gimple_assign_rhs1 (stmt); | |
3388 | gimple_assign_set_rhs_from_tree (gsi, new_rhs); | |
3389 | update_stmt (stmt); | |
3390 | remove_visited_stmt_chain (rhs1); | |
3391 | ||
3392 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3393 | { | |
3394 | fprintf (dump_file, " into "); | |
3395 | print_gimple_stmt (dump_file, stmt, 0, 0); | |
3396 | } | |
3397 | } | |
3398 | ||
3399 | /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */ | |
3400 | ||
3401 | static void | |
3402 | transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt, | |
3403 | tree rhs1, tree rhs2) | |
3404 | { | |
3405 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3406 | { | |
3407 | fprintf (dump_file, "Transforming "); | |
3408 | print_gimple_stmt (dump_file, stmt, 0, 0); | |
3409 | } | |
3410 | ||
be7493ca | 3411 | gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2); |
917a5202 BS |
3412 | update_stmt (gsi_stmt (*gsi)); |
3413 | remove_visited_stmt_chain (rhs1); | |
3414 | ||
3415 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3416 | { | |
3417 | fprintf (dump_file, " into "); | |
3418 | print_gimple_stmt (dump_file, stmt, 0, 0); | |
3419 | } | |
a6f8851e BS |
3420 | } |
3421 | ||
0e0ed594 JL |
3422 | /* Reassociate expressions in basic block BB and its post-dominator as |
3423 | children. */ | |
3424 | ||
3425 | static void | |
3426 | reassociate_bb (basic_block bb) | |
3427 | { | |
726a989a | 3428 | gimple_stmt_iterator gsi; |
0e0ed594 JL |
3429 | basic_block son; |
3430 | ||
726a989a | 3431 | for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi)) |
0e0ed594 | 3432 | { |
726a989a | 3433 | gimple stmt = gsi_stmt (gsi); |
0e0ed594 | 3434 | |
6b03de57 RG |
3435 | if (is_gimple_assign (stmt) |
3436 | && !stmt_could_throw_p (stmt)) | |
0e0ed594 | 3437 | { |
726a989a RB |
3438 | tree lhs, rhs1, rhs2; |
3439 | enum tree_code rhs_code = gimple_assign_rhs_code (stmt); | |
0e0ed594 | 3440 | |
726a989a RB |
3441 | /* If this is not a gimple binary expression, there is |
3442 | nothing for us to do with it. */ | |
3443 | if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) | |
0e0ed594 JL |
3444 | continue; |
3445 | ||
726a989a RB |
3446 | /* If this was part of an already processed statement, |
3447 | we don't need to touch it again. */ | |
3448 | if (gimple_visited_p (stmt)) | |
25c6036a RG |
3449 | { |
3450 | /* This statement might have become dead because of previous | |
3451 | reassociations. */ | |
3452 | if (has_zero_uses (gimple_get_lhs (stmt))) | |
3453 | { | |
3454 | gsi_remove (&gsi, true); | |
3455 | release_defs (stmt); | |
e4658728 RG |
3456 | /* We might end up removing the last stmt above which |
3457 | places the iterator to the end of the sequence. | |
3458 | Reset it to the last stmt in this case which might | |
3459 | be the end of the sequence as well if we removed | |
3460 | the last statement of the sequence. In which case | |
3461 | we need to bail out. */ | |
3462 | if (gsi_end_p (gsi)) | |
3463 | { | |
3464 | gsi = gsi_last_bb (bb); | |
3465 | if (gsi_end_p (gsi)) | |
3466 | break; | |
3467 | } | |
25c6036a RG |
3468 | } |
3469 | continue; | |
3470 | } | |
726a989a RB |
3471 | |
3472 | lhs = gimple_assign_lhs (stmt); | |
3473 | rhs1 = gimple_assign_rhs1 (stmt); | |
3474 | rhs2 = gimple_assign_rhs2 (stmt); | |
3475 | ||
2d698d3b RG |
3476 | /* For non-bit or min/max operations we can't associate |
3477 | all types. Verify that here. */ | |
3478 | if (rhs_code != BIT_IOR_EXPR | |
3479 | && rhs_code != BIT_AND_EXPR | |
3480 | && rhs_code != BIT_XOR_EXPR | |
3481 | && rhs_code != MIN_EXPR | |
3482 | && rhs_code != MAX_EXPR | |
3483 | && (!can_reassociate_p (lhs) | |
3484 | || !can_reassociate_p (rhs1) | |
3485 | || !can_reassociate_p (rhs2))) | |
0e0ed594 JL |
3486 | continue; |
3487 | ||
726a989a | 3488 | if (associative_tree_code (rhs_code)) |
0e0ed594 JL |
3489 | { |
3490 | VEC(operand_entry_t, heap) *ops = NULL; | |
917a5202 | 3491 | tree powi_result = NULL_TREE; |
0e0ed594 JL |
3492 | |
3493 | /* There may be no immediate uses left by the time we | |
3494 | get here because we may have eliminated them all. */ | |
bfc646bf | 3495 | if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs)) |
0e0ed594 JL |
3496 | continue; |
3497 | ||
726a989a | 3498 | gimple_set_visited (stmt, true); |
25c6036a | 3499 | linearize_expr_tree (&ops, stmt, true, true); |
5095da95 | 3500 | VEC_qsort (operand_entry_t, ops, sort_by_operand_rank); |
726a989a | 3501 | optimize_ops_list (rhs_code, &ops); |
25c6036a RG |
3502 | if (undistribute_ops_list (rhs_code, &ops, |
3503 | loop_containing_stmt (stmt))) | |
3504 | { | |
5095da95 | 3505 | VEC_qsort (operand_entry_t, ops, sort_by_operand_rank); |
25c6036a RG |
3506 | optimize_ops_list (rhs_code, &ops); |
3507 | } | |
0e0ed594 | 3508 | |
0ccb5dbf JJ |
3509 | if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) |
3510 | optimize_range_tests (rhs_code, &ops); | |
3511 | ||
a6f8851e BS |
3512 | if (first_pass_instance |
3513 | && rhs_code == MULT_EXPR | |
3514 | && flag_unsafe_math_optimizations) | |
83d5977e | 3515 | powi_result = attempt_builtin_powi (stmt, &ops); |
a6f8851e | 3516 | |
917a5202 BS |
3517 | /* If the operand vector is now empty, all operands were |
3518 | consumed by the __builtin_powi optimization. */ | |
3519 | if (VEC_length (operand_entry_t, ops) == 0) | |
3520 | transform_stmt_to_copy (&gsi, stmt, powi_result); | |
3521 | else if (VEC_length (operand_entry_t, ops) == 1) | |
0e0ed594 | 3522 | { |
917a5202 BS |
3523 | tree last_op = VEC_last (operand_entry_t, ops)->op; |
3524 | ||
3525 | if (powi_result) | |
3526 | transform_stmt_to_multiply (&gsi, stmt, last_op, | |
3527 | powi_result); | |
3528 | else | |
3529 | transform_stmt_to_copy (&gsi, stmt, last_op); | |
0e0ed594 JL |
3530 | } |
3531 | else | |
df7b0cc4 EI |
3532 | { |
3533 | enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); | |
3534 | int ops_num = VEC_length (operand_entry_t, ops); | |
3535 | int width = get_reassociation_width (ops_num, rhs_code, mode); | |
3536 | ||
3537 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3538 | fprintf (dump_file, | |
3539 | "Width = %d was chosen for reassociation\n", width); | |
3540 | ||
3541 | if (width > 1 | |
3542 | && VEC_length (operand_entry_t, ops) > 3) | |
3543 | rewrite_expr_tree_parallel (stmt, width, ops); | |
3544 | else | |
3545 | rewrite_expr_tree (stmt, 0, ops, false); | |
917a5202 BS |
3546 | |
3547 | /* If we combined some repeated factors into a | |
3548 | __builtin_powi call, multiply that result by the | |
3549 | reassociated operands. */ | |
3550 | if (powi_result) | |
3551 | { | |
3552 | gimple mul_stmt; | |
3553 | tree type = TREE_TYPE (gimple_get_lhs (stmt)); | |
83d5977e RG |
3554 | tree target_ssa = make_temp_ssa_name (type, NULL, |
3555 | "reassocpow"); | |
917a5202 BS |
3556 | gimple_set_lhs (stmt, target_ssa); |
3557 | update_stmt (stmt); | |
3558 | mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs, | |
3559 | powi_result, | |
3560 | target_ssa); | |
3561 | gimple_set_location (mul_stmt, gimple_location (stmt)); | |
3562 | gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT); | |
3563 | } | |
df7b0cc4 | 3564 | } |
0e0ed594 JL |
3565 | |
3566 | VEC_free (operand_entry_t, heap, ops); | |
3567 | } | |
3568 | } | |
3569 | } | |
3570 | for (son = first_dom_son (CDI_POST_DOMINATORS, bb); | |
3571 | son; | |
3572 | son = next_dom_son (CDI_POST_DOMINATORS, son)) | |
3573 | reassociate_bb (son); | |
3574 | } | |
3575 | ||
3576 | void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops); | |
3577 | void debug_ops_vector (VEC (operand_entry_t, heap) *ops); | |
3578 | ||
3579 | /* Dump the operand entry vector OPS to FILE. */ | |
3580 | ||
3581 | void | |
3582 | dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops) | |
3583 | { | |
3584 | operand_entry_t oe; | |
3585 | unsigned int i; | |
3586 | ||
ac47786e | 3587 | FOR_EACH_VEC_ELT (operand_entry_t, ops, i, oe) |
0e0ed594 JL |
3588 | { |
3589 | fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank); | |
726a989a | 3590 | print_generic_expr (file, oe->op, 0); |
0e0ed594 JL |
3591 | } |
3592 | } | |
3593 | ||
3594 | /* Dump the operand entry vector OPS to STDERR. */ | |
3595 | ||
24e47c76 | 3596 | DEBUG_FUNCTION void |
0e0ed594 JL |
3597 | debug_ops_vector (VEC (operand_entry_t, heap) *ops) |
3598 | { | |
3599 | dump_ops_vector (stderr, ops); | |
3600 | } | |
3601 | ||
3602 | static void | |
3603 | do_reassoc (void) | |
3604 | { | |
3605 | break_up_subtract_bb (ENTRY_BLOCK_PTR); | |
3606 | reassociate_bb (EXIT_BLOCK_PTR); | |
3607 | } | |
3608 | ||
3609 | /* Initialize the reassociation pass. */ | |
3610 | ||
3611 | static void | |
3612 | init_reassoc (void) | |
3613 | { | |
3614 | int i; | |
15814ba0 | 3615 | long rank = 2; |
c302207e | 3616 | int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS); |
0e0ed594 | 3617 | |
7a9c7d01 ZD |
3618 | /* Find the loops, so that we can prevent moving calculations in |
3619 | them. */ | |
3620 | loop_optimizer_init (AVOID_CFG_MODIFICATIONS); | |
3621 | ||
0e0ed594 JL |
3622 | memset (&reassociate_stats, 0, sizeof (reassociate_stats)); |
3623 | ||
3624 | operand_entry_pool = create_alloc_pool ("operand entry pool", | |
3625 | sizeof (struct operand_entry), 30); | |
f1665f5c | 3626 | next_operand_entry_id = 0; |
0e0ed594 JL |
3627 | |
3628 | /* Reverse RPO (Reverse Post Order) will give us something where | |
3629 | deeper loops come later. */ | |
f91a0beb | 3630 | pre_and_rev_post_order_compute (NULL, bbs, false); |
c302207e | 3631 | bb_rank = XCNEWVEC (long, last_basic_block); |
15814ba0 | 3632 | operand_rank = pointer_map_create (); |
0e0ed594 | 3633 | |
be7493ca RG |
3634 | /* Give each default definition a distinct rank. This includes |
3635 | parameters and the static chain. Walk backwards over all | |
3636 | SSA names so that we get proper rank ordering according | |
3637 | to tree_swap_operands_p. */ | |
3638 | for (i = num_ssa_names - 1; i > 0; --i) | |
0e0ed594 | 3639 | { |
be7493ca RG |
3640 | tree name = ssa_name (i); |
3641 | if (name && SSA_NAME_IS_DEFAULT_DEF (name)) | |
3642 | insert_operand_rank (name, ++rank); | |
0e0ed594 JL |
3643 | } |
3644 | ||
3645 | /* Set up rank for each BB */ | |
24bd1a0b | 3646 | for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++) |
0e0ed594 JL |
3647 | bb_rank[bbs[i]] = ++rank << 16; |
3648 | ||
3649 | free (bbs); | |
0e0ed594 | 3650 | calculate_dominance_info (CDI_POST_DOMINATORS); |
b0aef8a8 | 3651 | plus_negates = NULL; |
0e0ed594 JL |
3652 | } |
3653 | ||
3654 | /* Cleanup after the reassociation pass, and print stats if | |
3655 | requested. */ | |
3656 | ||
3657 | static void | |
3658 | fini_reassoc (void) | |
3659 | { | |
01902653 RG |
3660 | statistics_counter_event (cfun, "Linearized", |
3661 | reassociate_stats.linearized); | |
3662 | statistics_counter_event (cfun, "Constants eliminated", | |
3663 | reassociate_stats.constants_eliminated); | |
3664 | statistics_counter_event (cfun, "Ops eliminated", | |
3665 | reassociate_stats.ops_eliminated); | |
3666 | statistics_counter_event (cfun, "Statements rewritten", | |
3667 | reassociate_stats.rewritten); | |
a6f8851e BS |
3668 | statistics_counter_event (cfun, "Built-in pow[i] calls encountered", |
3669 | reassociate_stats.pows_encountered); | |
3670 | statistics_counter_event (cfun, "Built-in powi calls created", | |
3671 | reassociate_stats.pows_created); | |
0e0ed594 | 3672 | |
15814ba0 | 3673 | pointer_map_destroy (operand_rank); |
0e0ed594 JL |
3674 | free_alloc_pool (operand_entry_pool); |
3675 | free (bb_rank); | |
b0aef8a8 | 3676 | VEC_free (tree, heap, plus_negates); |
0e0ed594 | 3677 | free_dominance_info (CDI_POST_DOMINATORS); |
7a9c7d01 | 3678 | loop_optimizer_finalize (); |
0e0ed594 JL |
3679 | } |
3680 | ||
3681 | /* Gate and execute functions for Reassociation. */ | |
3682 | ||
c2924966 | 3683 | static unsigned int |
0e0ed594 JL |
3684 | execute_reassoc (void) |
3685 | { | |
012309e6 | 3686 | init_reassoc (); |
0e0ed594 | 3687 | |
012309e6 | 3688 | do_reassoc (); |
0e0ed594 JL |
3689 | repropagate_negates (); |
3690 | ||
012309e6 | 3691 | fini_reassoc (); |
c2924966 | 3692 | return 0; |
012309e6 DB |
3693 | } |
3694 | ||
13c59415 UB |
3695 | static bool |
3696 | gate_tree_ssa_reassoc (void) | |
3697 | { | |
3698 | return flag_tree_reassoc != 0; | |
3699 | } | |
3700 | ||
8ddbbcae | 3701 | struct gimple_opt_pass pass_reassoc = |
012309e6 | 3702 | { |
8ddbbcae JH |
3703 | { |
3704 | GIMPLE_PASS, | |
012309e6 | 3705 | "reassoc", /* name */ |
13c59415 UB |
3706 | gate_tree_ssa_reassoc, /* gate */ |
3707 | execute_reassoc, /* execute */ | |
012309e6 DB |
3708 | NULL, /* sub */ |
3709 | NULL, /* next */ | |
3710 | 0, /* static_pass_number */ | |
13c59415 | 3711 | TV_TREE_REASSOC, /* tv_id */ |
4effdf02 | 3712 | PROP_cfg | PROP_ssa, /* properties_required */ |
012309e6 DB |
3713 | 0, /* properties_provided */ |
3714 | 0, /* properties_destroyed */ | |
3715 | 0, /* todo_flags_start */ | |
c7dd803e EB |
3716 | TODO_verify_ssa |
3717 | | TODO_verify_flow | |
c7dd803e | 3718 | | TODO_ggc_collect /* todo_flags_finish */ |
8ddbbcae | 3719 | } |
012309e6 | 3720 | }; |