]>
Commit | Line | Data |
---|---|---|
f9453c07 BS |
1 | /* Straight-line strength reduction. |
2 | Copyright (C) 2012 Free Software Foundation, Inc. | |
3 | Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com> | |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9 | Software Foundation; either version 3, or (at your option) any later | |
10 | version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | /* There are many algorithms for performing strength reduction on | |
22 | loops. This is not one of them. IVOPTS handles strength reduction | |
23 | of induction variables just fine. This pass is intended to pick | |
24 | up the crumbs it leaves behind, by considering opportunities for | |
25 | strength reduction along dominator paths. | |
26 | ||
27 | Strength reduction will be implemented in four stages, gradually | |
28 | adding more complex candidates: | |
29 | ||
30 | 1) Explicit multiplies, known constant multipliers, no | |
31 | conditional increments. (complete) | |
32 | 2) Explicit multiplies, unknown constant multipliers, | |
33 | no conditional increments. (data gathering complete, | |
34 | replacements pending) | |
35 | 3) Implicit multiplies in addressing expressions. (pending) | |
36 | 4) Explicit multiplies, conditional increments. (pending) | |
37 | ||
38 | It would also be possible to apply strength reduction to divisions | |
39 | and modulos, but such opportunities are relatively uncommon. | |
40 | ||
41 | Strength reduction is also currently restricted to integer operations. | |
42 | If desired, it could be extended to floating-point operations under | |
43 | control of something like -funsafe-math-optimizations. */ | |
44 | ||
45 | #include "config.h" | |
46 | #include "system.h" | |
47 | #include "coretypes.h" | |
48 | #include "tree.h" | |
49 | #include "gimple.h" | |
50 | #include "basic-block.h" | |
51 | #include "tree-pass.h" | |
52 | #include "timevar.h" | |
53 | #include "cfgloop.h" | |
54 | #include "tree-pretty-print.h" | |
55 | #include "gimple-pretty-print.h" | |
56 | #include "tree-flow.h" | |
57 | #include "domwalk.h" | |
58 | #include "pointer-set.h" | |
59 | \f | |
60 | /* Information about a strength reduction candidate. Each statement | |
61 | in the candidate table represents an expression of one of the | |
62 | following forms (the special case of CAND_REF will be described | |
63 | later): | |
64 | ||
65 | (CAND_MULT) S1: X = (B + i) * S | |
66 | (CAND_ADD) S1: X = B + (i * S) | |
67 | ||
68 | Here X and B are SSA names, i is an integer constant, and S is | |
69 | either an SSA name or a constant. We call B the "base," i the | |
70 | "index", and S the "stride." | |
71 | ||
72 | Any statement S0 that dominates S1 and is of the form: | |
73 | ||
74 | (CAND_MULT) S0: Y = (B + i') * S | |
75 | (CAND_ADD) S0: Y = B + (i' * S) | |
76 | ||
77 | is called a "basis" for S1. In both cases, S1 may be replaced by | |
78 | ||
79 | S1': X = Y + (i - i') * S, | |
80 | ||
81 | where (i - i') * S is folded to the extent possible. | |
82 | ||
83 | All gimple statements are visited in dominator order, and each | |
84 | statement that may contribute to one of the forms of S1 above is | |
85 | given at least one entry in the candidate table. Such statements | |
86 | include addition, pointer addition, subtraction, multiplication, | |
87 | negation, copies, and nontrivial type casts. If a statement may | |
88 | represent more than one expression of the forms of S1 above, | |
89 | multiple "interpretations" are stored in the table and chained | |
90 | together. Examples: | |
91 | ||
92 | * An add of two SSA names may treat either operand as the base. | |
93 | * A multiply of two SSA names, likewise. | |
94 | * A copy or cast may be thought of as either a CAND_MULT with | |
95 | i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0. | |
96 | ||
97 | Candidate records are allocated from an obstack. They are addressed | |
98 | both from a hash table keyed on S1, and from a vector of candidate | |
99 | pointers arranged in predominator order. | |
100 | ||
101 | Opportunity note | |
102 | ---------------- | |
103 | Currently we don't recognize: | |
104 | ||
105 | S0: Y = (S * i') - B | |
106 | S1: X = (S * i) - B | |
107 | ||
108 | as a strength reduction opportunity, even though this S1 would | |
109 | also be replaceable by the S1' above. This can be added if it | |
110 | comes up in practice. */ | |
111 | ||
112 | ||
113 | /* Index into the candidate vector, offset by 1. VECs are zero-based, | |
114 | while cand_idx's are one-based, with zero indicating null. */ | |
115 | typedef unsigned cand_idx; | |
116 | ||
117 | /* The kind of candidate. */ | |
118 | enum cand_kind | |
119 | { | |
120 | CAND_MULT, | |
121 | CAND_ADD | |
122 | }; | |
123 | ||
124 | struct slsr_cand_d | |
125 | { | |
126 | /* The candidate statement S1. */ | |
127 | gimple cand_stmt; | |
128 | ||
129 | /* The base SSA name B. */ | |
130 | tree base_name; | |
131 | ||
132 | /* The stride S. */ | |
133 | tree stride; | |
134 | ||
135 | /* The index constant i. */ | |
136 | double_int index; | |
137 | ||
138 | /* The type of the candidate. This is normally the type of base_name, | |
139 | but casts may have occurred when combining feeding instructions. | |
140 | A candidate can only be a basis for candidates of the same final type. */ | |
141 | tree cand_type; | |
142 | ||
143 | /* The kind of candidate (CAND_MULT, etc.). */ | |
144 | enum cand_kind kind; | |
145 | ||
146 | /* Index of this candidate in the candidate vector. */ | |
147 | cand_idx cand_num; | |
148 | ||
149 | /* Index of the next candidate record for the same statement. | |
150 | A statement may be useful in more than one way (e.g., due to | |
151 | commutativity). So we can have multiple "interpretations" | |
152 | of a statement. */ | |
153 | cand_idx next_interp; | |
154 | ||
155 | /* Index of the basis statement S0, if any, in the candidate vector. */ | |
156 | cand_idx basis; | |
157 | ||
158 | /* First candidate for which this candidate is a basis, if one exists. */ | |
159 | cand_idx dependent; | |
160 | ||
161 | /* Next candidate having the same basis as this one. */ | |
162 | cand_idx sibling; | |
163 | ||
164 | /* If this is a conditional candidate, the defining PHI statement | |
165 | for the base SSA name B. For future use; always NULL for now. */ | |
166 | gimple def_phi; | |
167 | ||
168 | /* Savings that can be expected from eliminating dead code if this | |
169 | candidate is replaced. */ | |
170 | int dead_savings; | |
171 | }; | |
172 | ||
173 | typedef struct slsr_cand_d slsr_cand, *slsr_cand_t; | |
174 | typedef const struct slsr_cand_d *const_slsr_cand_t; | |
175 | ||
176 | /* Pointers to candidates are chained together as part of a mapping | |
177 | from SSA names to the candidates that use them as a base name. */ | |
178 | ||
179 | struct cand_chain_d | |
180 | { | |
181 | /* SSA name that serves as a base name for the chain of candidates. */ | |
182 | tree base_name; | |
183 | ||
184 | /* Pointer to a candidate. */ | |
185 | slsr_cand_t cand; | |
186 | ||
187 | /* Chain pointer. */ | |
188 | struct cand_chain_d *next; | |
189 | ||
190 | }; | |
191 | ||
192 | typedef struct cand_chain_d cand_chain, *cand_chain_t; | |
193 | typedef const struct cand_chain_d *const_cand_chain_t; | |
194 | ||
195 | /* Candidates are maintained in a vector. If candidate X dominates | |
196 | candidate Y, then X appears before Y in the vector; but the | |
197 | converse does not necessarily hold. */ | |
198 | DEF_VEC_P (slsr_cand_t); | |
199 | DEF_VEC_ALLOC_P (slsr_cand_t, heap); | |
200 | static VEC (slsr_cand_t, heap) *cand_vec; | |
201 | ||
202 | enum cost_consts | |
203 | { | |
204 | COST_NEUTRAL = 0, | |
205 | COST_INFINITE = 1000 | |
206 | }; | |
207 | ||
208 | /* Pointer map embodying a mapping from statements to candidates. */ | |
209 | static struct pointer_map_t *stmt_cand_map; | |
210 | ||
211 | /* Obstack for candidates. */ | |
212 | static struct obstack cand_obstack; | |
213 | ||
214 | /* Array mapping from base SSA names to chains of candidates. */ | |
215 | static cand_chain_t *base_cand_map; | |
216 | ||
217 | /* Obstack for candidate chains. */ | |
218 | static struct obstack chain_obstack; | |
219 | \f | |
220 | /* Produce a pointer to the IDX'th candidate in the candidate vector. */ | |
221 | ||
222 | static slsr_cand_t | |
223 | lookup_cand (cand_idx idx) | |
224 | { | |
225 | return VEC_index (slsr_cand_t, cand_vec, idx - 1); | |
226 | } | |
227 | ||
228 | /* Use the base name from candidate C to look for possible candidates | |
229 | that can serve as a basis for C. Each potential basis must also | |
230 | appear in a block that dominates the candidate statement and have | |
231 | the same stride and type. If more than one possible basis exists, | |
232 | the one with highest index in the vector is chosen; this will be | |
233 | the most immediately dominating basis. */ | |
234 | ||
235 | static int | |
236 | find_basis_for_candidate (slsr_cand_t c) | |
237 | { | |
238 | cand_chain_t chain; | |
239 | slsr_cand_t basis = NULL; | |
240 | ||
241 | gcc_assert (TREE_CODE (c->base_name) == SSA_NAME); | |
242 | chain = base_cand_map[SSA_NAME_VERSION (c->base_name)]; | |
243 | ||
244 | for (; chain; chain = chain->next) | |
245 | { | |
246 | slsr_cand_t one_basis = chain->cand; | |
247 | ||
248 | if (one_basis->kind != c->kind | |
249 | || !operand_equal_p (one_basis->stride, c->stride, 0) | |
250 | || !types_compatible_p (one_basis->cand_type, c->cand_type) | |
251 | || !dominated_by_p (CDI_DOMINATORS, | |
252 | gimple_bb (c->cand_stmt), | |
253 | gimple_bb (one_basis->cand_stmt))) | |
254 | continue; | |
255 | ||
256 | if (!basis || basis->cand_num < one_basis->cand_num) | |
257 | basis = one_basis; | |
258 | } | |
259 | ||
260 | if (basis) | |
261 | { | |
262 | c->sibling = basis->dependent; | |
263 | basis->dependent = c->cand_num; | |
264 | return basis->cand_num; | |
265 | } | |
266 | ||
267 | return 0; | |
268 | } | |
269 | ||
270 | /* Record a mapping from the base name of C to C itself, indicating that | |
271 | C may potentially serve as a basis using that base name. */ | |
272 | ||
273 | static void | |
274 | record_potential_basis (slsr_cand_t c) | |
275 | { | |
276 | cand_chain_t node, head; | |
277 | int index; | |
278 | ||
279 | node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain)); | |
280 | node->base_name = c->base_name; | |
281 | node->cand = c; | |
282 | node->next = NULL; | |
283 | index = SSA_NAME_VERSION (c->base_name); | |
284 | head = base_cand_map[index]; | |
285 | ||
286 | if (head) | |
287 | { | |
288 | node->next = head->next; | |
289 | head->next = node; | |
290 | } | |
291 | else | |
292 | base_cand_map[index] = node; | |
293 | } | |
294 | ||
295 | /* Allocate storage for a new candidate and initialize its fields. | |
296 | Attempt to find a basis for the candidate. */ | |
297 | ||
298 | static slsr_cand_t | |
299 | alloc_cand_and_find_basis (enum cand_kind kind, gimple gs, tree base, | |
300 | double_int index, tree stride, tree ctype, | |
301 | unsigned savings) | |
302 | { | |
303 | slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack, | |
304 | sizeof (slsr_cand)); | |
305 | c->cand_stmt = gs; | |
306 | c->base_name = base; | |
307 | c->stride = stride; | |
308 | c->index = index; | |
309 | c->cand_type = ctype; | |
310 | c->kind = kind; | |
311 | c->cand_num = VEC_length (slsr_cand_t, cand_vec) + 1; | |
312 | c->next_interp = 0; | |
313 | c->dependent = 0; | |
314 | c->sibling = 0; | |
315 | c->def_phi = NULL; | |
316 | c->dead_savings = savings; | |
317 | ||
318 | VEC_safe_push (slsr_cand_t, heap, cand_vec, c); | |
319 | c->basis = find_basis_for_candidate (c); | |
320 | record_potential_basis (c); | |
321 | ||
322 | return c; | |
323 | } | |
324 | ||
325 | /* Determine the target cost of statement GS when compiling according | |
326 | to SPEED. */ | |
327 | ||
328 | static int | |
329 | stmt_cost (gimple gs, bool speed) | |
330 | { | |
331 | tree lhs, rhs1, rhs2; | |
332 | enum machine_mode lhs_mode; | |
333 | ||
334 | gcc_assert (is_gimple_assign (gs)); | |
335 | lhs = gimple_assign_lhs (gs); | |
336 | rhs1 = gimple_assign_rhs1 (gs); | |
337 | lhs_mode = TYPE_MODE (TREE_TYPE (lhs)); | |
338 | ||
339 | switch (gimple_assign_rhs_code (gs)) | |
340 | { | |
341 | case MULT_EXPR: | |
342 | rhs2 = gimple_assign_rhs2 (gs); | |
343 | ||
344 | if (host_integerp (rhs2, 0)) | |
345 | return multiply_by_const_cost (TREE_INT_CST_LOW (rhs2), lhs_mode, | |
346 | speed); | |
347 | ||
348 | gcc_assert (TREE_CODE (rhs1) != INTEGER_CST); | |
349 | return multiply_regs_cost (TYPE_MODE (TREE_TYPE (lhs)), speed); | |
350 | ||
351 | case PLUS_EXPR: | |
352 | case POINTER_PLUS_EXPR: | |
353 | case MINUS_EXPR: | |
354 | rhs2 = gimple_assign_rhs2 (gs); | |
355 | ||
356 | if (host_integerp (rhs2, 0)) | |
357 | return add_const_cost (TYPE_MODE (TREE_TYPE (rhs1)), speed); | |
358 | ||
359 | gcc_assert (TREE_CODE (rhs1) != INTEGER_CST); | |
360 | return add_regs_cost (lhs_mode, speed); | |
361 | ||
362 | case NEGATE_EXPR: | |
363 | return negate_reg_cost (lhs_mode, speed); | |
364 | ||
365 | case NOP_EXPR: | |
366 | return extend_or_trunc_reg_cost (TREE_TYPE (lhs), TREE_TYPE (rhs1), | |
367 | speed); | |
368 | ||
369 | /* Note that we don't assign costs to copies that in most cases | |
370 | will go away. */ | |
371 | default: | |
372 | ; | |
373 | } | |
374 | ||
375 | gcc_unreachable (); | |
376 | return 0; | |
377 | } | |
378 | ||
379 | /* Look up the defining statement for BASE_IN and return a pointer | |
380 | to its candidate in the candidate table, if any; otherwise NULL. | |
381 | Only CAND_ADD and CAND_MULT candidates are returned. */ | |
382 | ||
383 | static slsr_cand_t | |
384 | base_cand_from_table (tree base_in) | |
385 | { | |
386 | slsr_cand_t *result; | |
387 | ||
388 | gimple def = SSA_NAME_DEF_STMT (base_in); | |
389 | if (!def) | |
390 | return (slsr_cand_t) NULL; | |
391 | ||
392 | result = (slsr_cand_t *) pointer_map_contains (stmt_cand_map, def); | |
393 | if (!result) | |
394 | return (slsr_cand_t) NULL; | |
395 | ||
396 | return *result; | |
397 | } | |
398 | ||
399 | /* Add an entry to the statement-to-candidate mapping. */ | |
400 | ||
401 | static void | |
402 | add_cand_for_stmt (gimple gs, slsr_cand_t c) | |
403 | { | |
404 | void **slot = pointer_map_insert (stmt_cand_map, gs); | |
405 | gcc_assert (!*slot); | |
406 | *slot = c; | |
407 | } | |
408 | \f | |
409 | /* Create a candidate entry for a statement GS, where GS multiplies | |
410 | two SSA names BASE_IN and STRIDE_IN. Propagate any known information | |
411 | about the two SSA names into the new candidate. Return the new | |
412 | candidate. */ | |
413 | ||
414 | static slsr_cand_t | |
415 | create_mul_ssa_cand (gimple gs, tree base_in, tree stride_in, bool speed) | |
416 | { | |
417 | tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE; | |
418 | double_int index; | |
419 | unsigned savings = 0; | |
420 | slsr_cand_t c; | |
421 | slsr_cand_t base_cand = base_cand_from_table (base_in); | |
422 | ||
423 | /* Look at all interpretations of the base candidate, if necessary, | |
424 | to find information to propagate into this candidate. */ | |
425 | while (base_cand && !base) | |
426 | { | |
427 | ||
428 | if (base_cand->kind == CAND_MULT | |
429 | && operand_equal_p (base_cand->stride, integer_one_node, 0)) | |
430 | { | |
431 | /* Y = (B + i') * 1 | |
432 | X = Y * Z | |
433 | ================ | |
434 | X = (B + i') * Z */ | |
435 | base = base_cand->base_name; | |
436 | index = base_cand->index; | |
437 | stride = stride_in; | |
438 | ctype = base_cand->cand_type; | |
439 | if (has_single_use (base_in)) | |
440 | savings = (base_cand->dead_savings | |
441 | + stmt_cost (base_cand->cand_stmt, speed)); | |
442 | } | |
443 | else if (base_cand->kind == CAND_ADD | |
444 | && TREE_CODE (base_cand->stride) == INTEGER_CST) | |
445 | { | |
446 | /* Y = B + (i' * S), S constant | |
447 | X = Y * Z | |
448 | ============================ | |
449 | X = B + ((i' * S) * Z) */ | |
450 | base = base_cand->base_name; | |
451 | index = double_int_mul (base_cand->index, | |
452 | tree_to_double_int (base_cand->stride)); | |
453 | stride = stride_in; | |
454 | ctype = base_cand->cand_type; | |
455 | if (has_single_use (base_in)) | |
456 | savings = (base_cand->dead_savings | |
457 | + stmt_cost (base_cand->cand_stmt, speed)); | |
458 | } | |
459 | ||
460 | if (base_cand->next_interp) | |
461 | base_cand = lookup_cand (base_cand->next_interp); | |
462 | else | |
463 | base_cand = NULL; | |
464 | } | |
465 | ||
466 | if (!base) | |
467 | { | |
468 | /* No interpretations had anything useful to propagate, so | |
469 | produce X = (Y + 0) * Z. */ | |
470 | base = base_in; | |
471 | index = double_int_zero; | |
472 | stride = stride_in; | |
473 | ctype = TREE_TYPE (SSA_NAME_VAR (base_in)); | |
474 | } | |
475 | ||
476 | c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride, | |
477 | ctype, savings); | |
478 | return c; | |
479 | } | |
480 | ||
481 | /* Create a candidate entry for a statement GS, where GS multiplies | |
482 | SSA name BASE_IN by constant STRIDE_IN. Propagate any known | |
483 | information about BASE_IN into the new candidate. Return the new | |
484 | candidate. */ | |
485 | ||
486 | static slsr_cand_t | |
487 | create_mul_imm_cand (gimple gs, tree base_in, tree stride_in, bool speed) | |
488 | { | |
489 | tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE; | |
490 | double_int index, temp; | |
491 | unsigned savings = 0; | |
492 | slsr_cand_t c; | |
493 | slsr_cand_t base_cand = base_cand_from_table (base_in); | |
494 | ||
495 | /* Look at all interpretations of the base candidate, if necessary, | |
496 | to find information to propagate into this candidate. */ | |
497 | while (base_cand && !base) | |
498 | { | |
499 | if (base_cand->kind == CAND_MULT | |
500 | && TREE_CODE (base_cand->stride) == INTEGER_CST) | |
501 | { | |
502 | /* Y = (B + i') * S, S constant | |
503 | X = Y * c | |
504 | ============================ | |
505 | X = (B + i') * (S * c) */ | |
506 | base = base_cand->base_name; | |
507 | index = base_cand->index; | |
508 | temp = double_int_mul (tree_to_double_int (base_cand->stride), | |
509 | tree_to_double_int (stride_in)); | |
510 | stride = double_int_to_tree (TREE_TYPE (stride_in), temp); | |
511 | ctype = base_cand->cand_type; | |
512 | if (has_single_use (base_in)) | |
513 | savings = (base_cand->dead_savings | |
514 | + stmt_cost (base_cand->cand_stmt, speed)); | |
515 | } | |
516 | else if (base_cand->kind == CAND_ADD | |
517 | && operand_equal_p (base_cand->stride, integer_one_node, 0)) | |
518 | { | |
519 | /* Y = B + (i' * 1) | |
520 | X = Y * c | |
521 | =========================== | |
522 | X = (B + i') * c */ | |
523 | base = base_cand->base_name; | |
524 | index = base_cand->index; | |
525 | stride = stride_in; | |
526 | ctype = base_cand->cand_type; | |
527 | if (has_single_use (base_in)) | |
528 | savings = (base_cand->dead_savings | |
529 | + stmt_cost (base_cand->cand_stmt, speed)); | |
530 | } | |
531 | else if (base_cand->kind == CAND_ADD | |
532 | && double_int_one_p (base_cand->index) | |
533 | && TREE_CODE (base_cand->stride) == INTEGER_CST) | |
534 | { | |
535 | /* Y = B + (1 * S), S constant | |
536 | X = Y * c | |
537 | =========================== | |
538 | X = (B + S) * c */ | |
539 | base = base_cand->base_name; | |
540 | index = tree_to_double_int (base_cand->stride); | |
541 | stride = stride_in; | |
542 | ctype = base_cand->cand_type; | |
543 | if (has_single_use (base_in)) | |
544 | savings = (base_cand->dead_savings | |
545 | + stmt_cost (base_cand->cand_stmt, speed)); | |
546 | } | |
547 | ||
548 | if (base_cand->next_interp) | |
549 | base_cand = lookup_cand (base_cand->next_interp); | |
550 | else | |
551 | base_cand = NULL; | |
552 | } | |
553 | ||
554 | if (!base) | |
555 | { | |
556 | /* No interpretations had anything useful to propagate, so | |
557 | produce X = (Y + 0) * c. */ | |
558 | base = base_in; | |
559 | index = double_int_zero; | |
560 | stride = stride_in; | |
561 | ctype = TREE_TYPE (SSA_NAME_VAR (base_in)); | |
562 | } | |
563 | ||
564 | c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride, | |
565 | ctype, savings); | |
566 | return c; | |
567 | } | |
568 | ||
569 | /* Given GS which is a multiply of scalar integers, make an appropriate | |
570 | entry in the candidate table. If this is a multiply of two SSA names, | |
571 | create two CAND_MULT interpretations and attempt to find a basis for | |
572 | each of them. Otherwise, create a single CAND_MULT and attempt to | |
573 | find a basis. */ | |
574 | ||
575 | static void | |
576 | slsr_process_mul (gimple gs, tree rhs1, tree rhs2, bool speed) | |
577 | { | |
578 | slsr_cand_t c, c2; | |
579 | ||
580 | /* If this is a multiply of an SSA name with itself, it is highly | |
581 | unlikely that we will get a strength reduction opportunity, so | |
582 | don't record it as a candidate. This simplifies the logic for | |
583 | finding a basis, so if this is removed that must be considered. */ | |
584 | if (rhs1 == rhs2) | |
585 | return; | |
586 | ||
587 | if (TREE_CODE (rhs2) == SSA_NAME) | |
588 | { | |
589 | /* Record an interpretation of this statement in the candidate table | |
590 | assuming RHS1 is the base name and RHS2 is the stride. */ | |
591 | c = create_mul_ssa_cand (gs, rhs1, rhs2, speed); | |
592 | ||
593 | /* Add the first interpretation to the statement-candidate mapping. */ | |
594 | add_cand_for_stmt (gs, c); | |
595 | ||
596 | /* Record another interpretation of this statement assuming RHS1 | |
597 | is the stride and RHS2 is the base name. */ | |
598 | c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed); | |
599 | c->next_interp = c2->cand_num; | |
600 | } | |
601 | else | |
602 | { | |
603 | /* Record an interpretation for the multiply-immediate. */ | |
604 | c = create_mul_imm_cand (gs, rhs1, rhs2, speed); | |
605 | ||
606 | /* Add the interpretation to the statement-candidate mapping. */ | |
607 | add_cand_for_stmt (gs, c); | |
608 | } | |
609 | } | |
610 | ||
611 | /* Create a candidate entry for a statement GS, where GS adds two | |
612 | SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and | |
613 | subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known | |
614 | information about the two SSA names into the new candidate. | |
615 | Return the new candidate. */ | |
616 | ||
617 | static slsr_cand_t | |
618 | create_add_ssa_cand (gimple gs, tree base_in, tree addend_in, | |
619 | bool subtract_p, bool speed) | |
620 | { | |
621 | tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL; | |
622 | double_int index; | |
623 | unsigned savings = 0; | |
624 | slsr_cand_t c; | |
625 | slsr_cand_t base_cand = base_cand_from_table (base_in); | |
626 | slsr_cand_t addend_cand = base_cand_from_table (addend_in); | |
627 | ||
628 | /* The most useful transformation is a multiply-immediate feeding | |
629 | an add or subtract. Look for that first. */ | |
630 | while (addend_cand && !base) | |
631 | { | |
632 | if (addend_cand->kind == CAND_MULT | |
633 | && double_int_zero_p (addend_cand->index) | |
634 | && TREE_CODE (addend_cand->stride) == INTEGER_CST) | |
635 | { | |
636 | /* Z = (B + 0) * S, S constant | |
637 | X = Y +/- Z | |
638 | =========================== | |
639 | X = Y + ((+/-1 * S) * B) */ | |
640 | base = base_in; | |
641 | index = tree_to_double_int (addend_cand->stride); | |
642 | if (subtract_p) | |
643 | index = double_int_neg (index); | |
644 | stride = addend_cand->base_name; | |
645 | ctype = TREE_TYPE (SSA_NAME_VAR (base_in)); | |
646 | if (has_single_use (addend_in)) | |
647 | savings = (addend_cand->dead_savings | |
648 | + stmt_cost (addend_cand->cand_stmt, speed)); | |
649 | } | |
650 | ||
651 | if (addend_cand->next_interp) | |
652 | addend_cand = lookup_cand (addend_cand->next_interp); | |
653 | else | |
654 | addend_cand = NULL; | |
655 | } | |
656 | ||
657 | while (base_cand && !base) | |
658 | { | |
659 | if (base_cand->kind == CAND_ADD | |
660 | && (double_int_zero_p (base_cand->index) | |
661 | || operand_equal_p (base_cand->stride, | |
662 | integer_zero_node, 0))) | |
663 | { | |
664 | /* Y = B + (i' * S), i' * S = 0 | |
665 | X = Y +/- Z | |
666 | ============================ | |
667 | X = B + (+/-1 * Z) */ | |
668 | base = base_cand->base_name; | |
669 | index = subtract_p ? double_int_minus_one : double_int_one; | |
670 | stride = addend_in; | |
671 | ctype = base_cand->cand_type; | |
672 | if (has_single_use (base_in)) | |
673 | savings = (base_cand->dead_savings | |
674 | + stmt_cost (base_cand->cand_stmt, speed)); | |
675 | } | |
676 | else if (subtract_p) | |
677 | { | |
678 | slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in); | |
679 | ||
680 | while (subtrahend_cand && !base) | |
681 | { | |
682 | if (subtrahend_cand->kind == CAND_MULT | |
683 | && double_int_zero_p (subtrahend_cand->index) | |
684 | && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST) | |
685 | { | |
686 | /* Z = (B + 0) * S, S constant | |
687 | X = Y - Z | |
688 | =========================== | |
689 | Value: X = Y + ((-1 * S) * B) */ | |
690 | base = base_in; | |
691 | index = tree_to_double_int (subtrahend_cand->stride); | |
692 | index = double_int_neg (index); | |
693 | stride = subtrahend_cand->base_name; | |
694 | ctype = TREE_TYPE (SSA_NAME_VAR (base_in)); | |
695 | if (has_single_use (addend_in)) | |
696 | savings = (subtrahend_cand->dead_savings | |
697 | + stmt_cost (subtrahend_cand->cand_stmt, speed)); | |
698 | } | |
699 | ||
700 | if (subtrahend_cand->next_interp) | |
701 | subtrahend_cand = lookup_cand (subtrahend_cand->next_interp); | |
702 | else | |
703 | subtrahend_cand = NULL; | |
704 | } | |
705 | } | |
706 | ||
707 | if (base_cand->next_interp) | |
708 | base_cand = lookup_cand (base_cand->next_interp); | |
709 | else | |
710 | base_cand = NULL; | |
711 | } | |
712 | ||
713 | if (!base) | |
714 | { | |
715 | /* No interpretations had anything useful to propagate, so | |
716 | produce X = Y + (1 * Z). */ | |
717 | base = base_in; | |
718 | index = subtract_p ? double_int_minus_one : double_int_one; | |
719 | stride = addend_in; | |
720 | ctype = TREE_TYPE (SSA_NAME_VAR (base_in)); | |
721 | } | |
722 | ||
723 | c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride, | |
724 | ctype, savings); | |
725 | return c; | |
726 | } | |
727 | ||
728 | /* Create a candidate entry for a statement GS, where GS adds SSA | |
729 | name BASE_IN to constant INDEX_IN. Propagate any known information | |
730 | about BASE_IN into the new candidate. Return the new candidate. */ | |
731 | ||
732 | static slsr_cand_t | |
733 | create_add_imm_cand (gimple gs, tree base_in, double_int index_in, bool speed) | |
734 | { | |
735 | enum cand_kind kind = CAND_ADD; | |
736 | tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE; | |
737 | double_int index, multiple; | |
738 | unsigned savings = 0; | |
739 | slsr_cand_t c; | |
740 | slsr_cand_t base_cand = base_cand_from_table (base_in); | |
741 | ||
742 | while (base_cand && !base) | |
743 | { | |
744 | bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (base_cand->stride)); | |
745 | ||
746 | if (TREE_CODE (base_cand->stride) == INTEGER_CST | |
747 | && double_int_multiple_of (index_in, | |
748 | tree_to_double_int (base_cand->stride), | |
749 | unsigned_p, | |
750 | &multiple)) | |
751 | { | |
752 | /* Y = (B + i') * S, S constant, c = kS for some integer k | |
753 | X = Y + c | |
754 | ============================ | |
755 | X = (B + (i'+ k)) * S | |
756 | OR | |
757 | Y = B + (i' * S), S constant, c = kS for some integer k | |
758 | X = Y + c | |
759 | ============================ | |
760 | X = (B + (i'+ k)) * S */ | |
761 | kind = base_cand->kind; | |
762 | base = base_cand->base_name; | |
763 | index = double_int_add (base_cand->index, multiple); | |
764 | stride = base_cand->stride; | |
765 | ctype = base_cand->cand_type; | |
766 | if (has_single_use (base_in)) | |
767 | savings = (base_cand->dead_savings | |
768 | + stmt_cost (base_cand->cand_stmt, speed)); | |
769 | } | |
770 | ||
771 | if (base_cand->next_interp) | |
772 | base_cand = lookup_cand (base_cand->next_interp); | |
773 | else | |
774 | base_cand = NULL; | |
775 | } | |
776 | ||
777 | if (!base) | |
778 | { | |
779 | /* No interpretations had anything useful to propagate, so | |
780 | produce X = Y + (c * 1). */ | |
781 | kind = CAND_ADD; | |
782 | base = base_in; | |
783 | index = index_in; | |
784 | stride = integer_one_node; | |
785 | ctype = TREE_TYPE (SSA_NAME_VAR (base_in)); | |
786 | } | |
787 | ||
788 | c = alloc_cand_and_find_basis (kind, gs, base, index, stride, | |
789 | ctype, savings); | |
790 | return c; | |
791 | } | |
792 | ||
793 | /* Given GS which is an add or subtract of scalar integers or pointers, | |
794 | make at least one appropriate entry in the candidate table. */ | |
795 | ||
796 | static void | |
797 | slsr_process_add (gimple gs, tree rhs1, tree rhs2, bool speed) | |
798 | { | |
799 | bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR; | |
800 | slsr_cand_t c = NULL, c2; | |
801 | ||
802 | if (TREE_CODE (rhs2) == SSA_NAME) | |
803 | { | |
804 | /* First record an interpretation assuming RHS1 is the base name | |
805 | and RHS2 is the stride. But it doesn't make sense for the | |
806 | stride to be a pointer, so don't record a candidate in that case. */ | |
807 | if (!POINTER_TYPE_P (TREE_TYPE (SSA_NAME_VAR (rhs2)))) | |
808 | { | |
809 | c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed); | |
810 | ||
811 | /* Add the first interpretation to the statement-candidate | |
812 | mapping. */ | |
813 | add_cand_for_stmt (gs, c); | |
814 | } | |
815 | ||
816 | /* If the two RHS operands are identical, or this is a subtract, | |
817 | we're done. */ | |
818 | if (operand_equal_p (rhs1, rhs2, 0) || subtract_p) | |
819 | return; | |
820 | ||
821 | /* Otherwise, record another interpretation assuming RHS2 is the | |
822 | base name and RHS1 is the stride, again provided that the | |
823 | stride is not a pointer. */ | |
824 | if (!POINTER_TYPE_P (TREE_TYPE (SSA_NAME_VAR (rhs1)))) | |
825 | { | |
826 | c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed); | |
827 | if (c) | |
828 | c->next_interp = c2->cand_num; | |
829 | else | |
830 | add_cand_for_stmt (gs, c2); | |
831 | } | |
832 | } | |
833 | else | |
834 | { | |
835 | double_int index; | |
836 | ||
837 | /* Record an interpretation for the add-immediate. */ | |
838 | index = tree_to_double_int (rhs2); | |
839 | if (subtract_p) | |
840 | index = double_int_neg (index); | |
841 | ||
842 | c = create_add_imm_cand (gs, rhs1, index, speed); | |
843 | ||
844 | /* Add the interpretation to the statement-candidate mapping. */ | |
845 | add_cand_for_stmt (gs, c); | |
846 | } | |
847 | } | |
848 | ||
849 | /* Given GS which is a negate of a scalar integer, make an appropriate | |
850 | entry in the candidate table. A negate is equivalent to a multiply | |
851 | by -1. */ | |
852 | ||
853 | static void | |
854 | slsr_process_neg (gimple gs, tree rhs1, bool speed) | |
855 | { | |
856 | /* Record a CAND_MULT interpretation for the multiply by -1. */ | |
857 | slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed); | |
858 | ||
859 | /* Add the interpretation to the statement-candidate mapping. */ | |
860 | add_cand_for_stmt (gs, c); | |
861 | } | |
862 | ||
863 | /* Return TRUE if GS is a statement that defines an SSA name from | |
864 | a conversion and is legal for us to combine with an add and multiply | |
865 | in the candidate table. For example, suppose we have: | |
866 | ||
867 | A = B + i; | |
868 | C = (type) A; | |
869 | D = C * S; | |
870 | ||
871 | Without the type-cast, we would create a CAND_MULT for D with base B, | |
872 | index i, and stride S. We want to record this candidate only if it | |
873 | is equivalent to apply the type cast following the multiply: | |
874 | ||
875 | A = B + i; | |
876 | E = A * S; | |
877 | D = (type) E; | |
878 | ||
879 | We will record the type with the candidate for D. This allows us | |
880 | to use a similar previous candidate as a basis. If we have earlier seen | |
881 | ||
882 | A' = B + i'; | |
883 | C' = (type) A'; | |
884 | D' = C' * S; | |
885 | ||
886 | we can replace D with | |
887 | ||
888 | D = D' + (i - i') * S; | |
889 | ||
890 | But if moving the type-cast would change semantics, we mustn't do this. | |
891 | ||
892 | This is legitimate for casts from a non-wrapping integral type to | |
893 | any integral type of the same or larger size. It is not legitimate | |
894 | to convert a wrapping type to a non-wrapping type, or to a wrapping | |
895 | type of a different size. I.e., with a wrapping type, we must | |
896 | assume that the addition B + i could wrap, in which case performing | |
897 | the multiply before or after one of the "illegal" type casts will | |
898 | have different semantics. */ | |
899 | ||
900 | static bool | |
901 | legal_cast_p (gimple gs, tree rhs) | |
902 | { | |
903 | tree lhs, lhs_type, rhs_type; | |
904 | unsigned lhs_size, rhs_size; | |
905 | bool lhs_wraps, rhs_wraps; | |
906 | ||
907 | if (!is_gimple_assign (gs) | |
908 | || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs))) | |
909 | return false; | |
910 | ||
911 | lhs = gimple_assign_lhs (gs); | |
912 | lhs_type = TREE_TYPE (lhs); | |
913 | rhs_type = TREE_TYPE (rhs); | |
914 | lhs_size = TYPE_PRECISION (lhs_type); | |
915 | rhs_size = TYPE_PRECISION (rhs_type); | |
916 | lhs_wraps = TYPE_OVERFLOW_WRAPS (lhs_type); | |
917 | rhs_wraps = TYPE_OVERFLOW_WRAPS (rhs_type); | |
918 | ||
919 | if (lhs_size < rhs_size | |
920 | || (rhs_wraps && !lhs_wraps) | |
921 | || (rhs_wraps && lhs_wraps && rhs_size != lhs_size)) | |
922 | return false; | |
923 | ||
924 | return true; | |
925 | } | |
926 | ||
927 | /* Given GS which is a cast to a scalar integer type, determine whether | |
928 | the cast is legal for strength reduction. If so, make at least one | |
929 | appropriate entry in the candidate table. */ | |
930 | ||
931 | static void | |
932 | slsr_process_cast (gimple gs, tree rhs1, bool speed) | |
933 | { | |
934 | tree lhs, ctype; | |
935 | slsr_cand_t base_cand, c, c2; | |
936 | unsigned savings = 0; | |
937 | ||
938 | if (!legal_cast_p (gs, rhs1)) | |
939 | return; | |
940 | ||
941 | lhs = gimple_assign_lhs (gs); | |
942 | base_cand = base_cand_from_table (rhs1); | |
943 | ctype = TREE_TYPE (lhs); | |
944 | ||
945 | if (base_cand) | |
946 | { | |
947 | while (base_cand) | |
948 | { | |
949 | /* Propagate all data from the base candidate except the type, | |
950 | which comes from the cast, and the base candidate's cast, | |
951 | which is no longer applicable. */ | |
952 | if (has_single_use (rhs1)) | |
953 | savings = (base_cand->dead_savings | |
954 | + stmt_cost (base_cand->cand_stmt, speed)); | |
955 | ||
956 | c = alloc_cand_and_find_basis (base_cand->kind, gs, | |
957 | base_cand->base_name, | |
958 | base_cand->index, base_cand->stride, | |
959 | ctype, savings); | |
960 | if (base_cand->next_interp) | |
961 | base_cand = lookup_cand (base_cand->next_interp); | |
962 | else | |
963 | base_cand = NULL; | |
964 | } | |
965 | } | |
966 | else | |
967 | { | |
968 | /* If nothing is known about the RHS, create fresh CAND_ADD and | |
969 | CAND_MULT interpretations: | |
970 | ||
971 | X = Y + (0 * 1) | |
972 | X = (Y + 0) * 1 | |
973 | ||
974 | The first of these is somewhat arbitrary, but the choice of | |
975 | 1 for the stride simplifies the logic for propagating casts | |
976 | into their uses. */ | |
977 | c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero, | |
978 | integer_one_node, ctype, 0); | |
979 | c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero, | |
980 | integer_one_node, ctype, 0); | |
981 | c->next_interp = c2->cand_num; | |
982 | } | |
983 | ||
984 | /* Add the first (or only) interpretation to the statement-candidate | |
985 | mapping. */ | |
986 | add_cand_for_stmt (gs, c); | |
987 | } | |
988 | ||
989 | /* Given GS which is a copy of a scalar integer type, make at least one | |
990 | appropriate entry in the candidate table. | |
991 | ||
992 | This interface is included for completeness, but is unnecessary | |
993 | if this pass immediately follows a pass that performs copy | |
994 | propagation, such as DOM. */ | |
995 | ||
996 | static void | |
997 | slsr_process_copy (gimple gs, tree rhs1, bool speed) | |
998 | { | |
999 | slsr_cand_t base_cand, c, c2; | |
1000 | unsigned savings = 0; | |
1001 | ||
1002 | base_cand = base_cand_from_table (rhs1); | |
1003 | ||
1004 | if (base_cand) | |
1005 | { | |
1006 | while (base_cand) | |
1007 | { | |
1008 | /* Propagate all data from the base candidate. */ | |
1009 | if (has_single_use (rhs1)) | |
1010 | savings = (base_cand->dead_savings | |
1011 | + stmt_cost (base_cand->cand_stmt, speed)); | |
1012 | ||
1013 | c = alloc_cand_and_find_basis (base_cand->kind, gs, | |
1014 | base_cand->base_name, | |
1015 | base_cand->index, base_cand->stride, | |
1016 | base_cand->cand_type, savings); | |
1017 | if (base_cand->next_interp) | |
1018 | base_cand = lookup_cand (base_cand->next_interp); | |
1019 | else | |
1020 | base_cand = NULL; | |
1021 | } | |
1022 | } | |
1023 | else | |
1024 | { | |
1025 | /* If nothing is known about the RHS, create fresh CAND_ADD and | |
1026 | CAND_MULT interpretations: | |
1027 | ||
1028 | X = Y + (0 * 1) | |
1029 | X = (Y + 0) * 1 | |
1030 | ||
1031 | The first of these is somewhat arbitrary, but the choice of | |
1032 | 1 for the stride simplifies the logic for propagating casts | |
1033 | into their uses. */ | |
1034 | c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero, | |
1035 | integer_one_node, TREE_TYPE (rhs1), 0); | |
1036 | c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero, | |
1037 | integer_one_node, TREE_TYPE (rhs1), 0); | |
1038 | c->next_interp = c2->cand_num; | |
1039 | } | |
1040 | ||
1041 | /* Add the first (or only) interpretation to the statement-candidate | |
1042 | mapping. */ | |
1043 | add_cand_for_stmt (gs, c); | |
1044 | } | |
1045 | \f | |
1046 | /* Find strength-reduction candidates in block BB. */ | |
1047 | ||
1048 | static void | |
1049 | find_candidates_in_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED, | |
1050 | basic_block bb) | |
1051 | { | |
1052 | bool speed = optimize_bb_for_speed_p (bb); | |
1053 | gimple_stmt_iterator gsi; | |
1054 | ||
1055 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1056 | { | |
1057 | gimple gs = gsi_stmt (gsi); | |
1058 | ||
1059 | if (is_gimple_assign (gs) | |
1060 | && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs))))) | |
1061 | { | |
1062 | tree rhs1 = NULL_TREE, rhs2 = NULL_TREE; | |
1063 | ||
1064 | switch (gimple_assign_rhs_code (gs)) | |
1065 | { | |
1066 | case MULT_EXPR: | |
1067 | case PLUS_EXPR: | |
1068 | rhs1 = gimple_assign_rhs1 (gs); | |
1069 | rhs2 = gimple_assign_rhs2 (gs); | |
1070 | /* Should never happen, but currently some buggy situations | |
1071 | in earlier phases put constants in rhs1. */ | |
1072 | if (TREE_CODE (rhs1) != SSA_NAME) | |
1073 | continue; | |
1074 | break; | |
1075 | ||
1076 | /* Possible future opportunity: rhs1 of a ptr+ can be | |
1077 | an ADDR_EXPR. */ | |
1078 | case POINTER_PLUS_EXPR: | |
1079 | case MINUS_EXPR: | |
1080 | rhs2 = gimple_assign_rhs2 (gs); | |
1081 | /* Fall-through. */ | |
1082 | ||
1083 | case NOP_EXPR: | |
1084 | case MODIFY_EXPR: | |
1085 | case NEGATE_EXPR: | |
1086 | rhs1 = gimple_assign_rhs1 (gs); | |
1087 | if (TREE_CODE (rhs1) != SSA_NAME) | |
1088 | continue; | |
1089 | break; | |
1090 | ||
1091 | default: | |
1092 | ; | |
1093 | } | |
1094 | ||
1095 | switch (gimple_assign_rhs_code (gs)) | |
1096 | { | |
1097 | case MULT_EXPR: | |
1098 | slsr_process_mul (gs, rhs1, rhs2, speed); | |
1099 | break; | |
1100 | ||
1101 | case PLUS_EXPR: | |
1102 | case POINTER_PLUS_EXPR: | |
1103 | case MINUS_EXPR: | |
1104 | slsr_process_add (gs, rhs1, rhs2, speed); | |
1105 | break; | |
1106 | ||
1107 | case NEGATE_EXPR: | |
1108 | slsr_process_neg (gs, rhs1, speed); | |
1109 | break; | |
1110 | ||
1111 | case NOP_EXPR: | |
1112 | slsr_process_cast (gs, rhs1, speed); | |
1113 | break; | |
1114 | ||
1115 | case MODIFY_EXPR: | |
1116 | slsr_process_copy (gs, rhs1, speed); | |
1117 | break; | |
1118 | ||
1119 | default: | |
1120 | ; | |
1121 | } | |
1122 | } | |
1123 | } | |
1124 | } | |
1125 | \f | |
1126 | /* Dump a candidate for debug. */ | |
1127 | ||
1128 | static void | |
1129 | dump_candidate (slsr_cand_t c) | |
1130 | { | |
1131 | fprintf (dump_file, "%3d [%d] ", c->cand_num, | |
1132 | gimple_bb (c->cand_stmt)->index); | |
1133 | print_gimple_stmt (dump_file, c->cand_stmt, 0, 0); | |
1134 | switch (c->kind) | |
1135 | { | |
1136 | case CAND_MULT: | |
1137 | fputs (" MULT : (", dump_file); | |
1138 | print_generic_expr (dump_file, c->base_name, 0); | |
1139 | fputs (" + ", dump_file); | |
1140 | dump_double_int (dump_file, c->index, false); | |
1141 | fputs (") * ", dump_file); | |
1142 | print_generic_expr (dump_file, c->stride, 0); | |
1143 | fputs (" : ", dump_file); | |
1144 | break; | |
1145 | case CAND_ADD: | |
1146 | fputs (" ADD : ", dump_file); | |
1147 | print_generic_expr (dump_file, c->base_name, 0); | |
1148 | fputs (" + (", dump_file); | |
1149 | dump_double_int (dump_file, c->index, false); | |
1150 | fputs (" * ", dump_file); | |
1151 | print_generic_expr (dump_file, c->stride, 0); | |
1152 | fputs (") : ", dump_file); | |
1153 | break; | |
1154 | default: | |
1155 | gcc_unreachable (); | |
1156 | } | |
1157 | print_generic_expr (dump_file, c->cand_type, 0); | |
1158 | fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n", | |
1159 | c->basis, c->dependent, c->sibling); | |
1160 | fprintf (dump_file, " next-interp: %d dead-savings: %d\n", | |
1161 | c->next_interp, c->dead_savings); | |
1162 | if (c->def_phi) | |
1163 | { | |
1164 | fputs (" phi: ", dump_file); | |
1165 | print_gimple_stmt (dump_file, c->def_phi, 0, 0); | |
1166 | } | |
1167 | fputs ("\n", dump_file); | |
1168 | } | |
1169 | ||
1170 | /* Dump the candidate vector for debug. */ | |
1171 | ||
1172 | static void | |
1173 | dump_cand_vec (void) | |
1174 | { | |
1175 | unsigned i; | |
1176 | slsr_cand_t c; | |
1177 | ||
1178 | fprintf (dump_file, "\nStrength reduction candidate vector:\n\n"); | |
1179 | ||
1180 | FOR_EACH_VEC_ELT (slsr_cand_t, cand_vec, i, c) | |
1181 | dump_candidate (c); | |
1182 | } | |
1183 | ||
1184 | /* Dump the candidate chains. */ | |
1185 | ||
1186 | static void | |
1187 | dump_cand_chains (void) | |
1188 | { | |
1189 | unsigned i; | |
1190 | ||
1191 | fprintf (dump_file, "\nStrength reduction candidate chains:\n\n"); | |
1192 | ||
1193 | for (i = 0; i < num_ssa_names; i++) | |
1194 | { | |
1195 | const_cand_chain_t chain = base_cand_map[i]; | |
1196 | ||
1197 | if (chain) | |
1198 | { | |
1199 | cand_chain_t p; | |
1200 | ||
1201 | print_generic_expr (dump_file, chain->base_name, 0); | |
1202 | fprintf (dump_file, " -> %d", chain->cand->cand_num); | |
1203 | ||
1204 | for (p = chain->next; p; p = p->next) | |
1205 | fprintf (dump_file, " -> %d", p->cand->cand_num); | |
1206 | ||
1207 | fputs ("\n", dump_file); | |
1208 | } | |
1209 | } | |
1210 | ||
1211 | fputs ("\n", dump_file); | |
1212 | } | |
1213 | ||
1214 | \f | |
1215 | /* Recursive helper for unconditional_cands_with_known_stride_p. | |
1216 | Returns TRUE iff C, its siblings, and its dependents are all | |
1217 | unconditional candidates. */ | |
1218 | ||
1219 | static bool | |
1220 | unconditional_cands (slsr_cand_t c) | |
1221 | { | |
1222 | if (c->def_phi) | |
1223 | return false; | |
1224 | ||
1225 | if (c->sibling && !unconditional_cands (lookup_cand (c->sibling))) | |
1226 | return false; | |
1227 | ||
1228 | if (c->dependent && !unconditional_cands (lookup_cand (c->dependent))) | |
1229 | return false; | |
1230 | ||
1231 | return true; | |
1232 | } | |
1233 | ||
1234 | /* Determine whether or not the tree of candidates rooted at | |
1235 | ROOT consists entirely of unconditional increments with | |
1236 | an INTEGER_CST stride. */ | |
1237 | ||
1238 | static bool | |
1239 | unconditional_cands_with_known_stride_p (slsr_cand_t root) | |
1240 | { | |
1241 | /* The stride is identical for all related candidates, so | |
1242 | check it once. */ | |
1243 | if (TREE_CODE (root->stride) != INTEGER_CST) | |
1244 | return false; | |
1245 | ||
1246 | return unconditional_cands (lookup_cand (root->dependent)); | |
1247 | } | |
1248 | ||
1249 | /* Calculate the increment required for candidate C relative to | |
1250 | its basis. */ | |
1251 | ||
1252 | static double_int | |
1253 | cand_increment (slsr_cand_t c) | |
1254 | { | |
1255 | slsr_cand_t basis; | |
1256 | ||
1257 | /* If the candidate doesn't have a basis, just return its own | |
1258 | index. This is useful in record_increments to help us find | |
1259 | an existing initializer. */ | |
1260 | if (!c->basis) | |
1261 | return c->index; | |
1262 | ||
1263 | basis = lookup_cand (c->basis); | |
1264 | gcc_assert (operand_equal_p (c->base_name, basis->base_name, 0)); | |
1265 | return double_int_sub (c->index, basis->index); | |
1266 | } | |
1267 | ||
1268 | /* Return TRUE iff candidate C has already been replaced under | |
1269 | another interpretation. */ | |
1270 | ||
1271 | static inline bool | |
1272 | cand_already_replaced (slsr_cand_t c) | |
1273 | { | |
1274 | return (gimple_bb (c->cand_stmt) == 0); | |
1275 | } | |
1276 | ||
1277 | /* Helper routine for replace_dependents, doing the work for a | |
1278 | single candidate C. */ | |
1279 | ||
1280 | static void | |
1281 | replace_dependent (slsr_cand_t c, enum tree_code cand_code) | |
1282 | { | |
1283 | double_int stride = tree_to_double_int (c->stride); | |
1284 | double_int bump = double_int_mul (cand_increment (c), stride); | |
1285 | gimple stmt_to_print = NULL; | |
1286 | slsr_cand_t basis; | |
1287 | tree basis_name, incr_type, bump_tree; | |
1288 | enum tree_code code; | |
1289 | ||
1290 | /* It is highly unlikely, but possible, that the resulting | |
1291 | bump doesn't fit in a HWI. Abandon the replacement | |
1292 | in this case. Restriction to signed HWI is conservative | |
1293 | for unsigned types but allows for safe negation without | |
1294 | twisted logic. */ | |
1295 | if (!double_int_fits_in_shwi_p (bump)) | |
1296 | return; | |
1297 | ||
1298 | basis = lookup_cand (c->basis); | |
1299 | basis_name = gimple_assign_lhs (basis->cand_stmt); | |
1300 | incr_type = TREE_TYPE (gimple_assign_rhs1 (c->cand_stmt)); | |
1301 | code = PLUS_EXPR; | |
1302 | ||
1303 | if (double_int_negative_p (bump)) | |
1304 | { | |
1305 | code = MINUS_EXPR; | |
1306 | bump = double_int_neg (bump); | |
1307 | } | |
1308 | ||
1309 | bump_tree = double_int_to_tree (incr_type, bump); | |
1310 | ||
1311 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1312 | { | |
1313 | fputs ("Replacing: ", dump_file); | |
1314 | print_gimple_stmt (dump_file, c->cand_stmt, 0, 0); | |
1315 | } | |
1316 | ||
1317 | if (double_int_zero_p (bump)) | |
1318 | { | |
1319 | tree lhs = gimple_assign_lhs (c->cand_stmt); | |
1320 | gimple copy_stmt = gimple_build_assign (lhs, basis_name); | |
1321 | gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); | |
1322 | gimple_set_location (copy_stmt, gimple_location (c->cand_stmt)); | |
1323 | gsi_replace (&gsi, copy_stmt, false); | |
1324 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1325 | stmt_to_print = copy_stmt; | |
1326 | } | |
1327 | else | |
1328 | { | |
1329 | tree rhs1 = gimple_assign_rhs1 (c->cand_stmt); | |
1330 | tree rhs2 = gimple_assign_rhs2 (c->cand_stmt); | |
1331 | if (cand_code != NEGATE_EXPR | |
1332 | && ((operand_equal_p (rhs1, basis_name, 0) | |
1333 | && operand_equal_p (rhs2, bump_tree, 0)) | |
1334 | || (operand_equal_p (rhs1, bump_tree, 0) | |
1335 | && operand_equal_p (rhs2, basis_name, 0)))) | |
1336 | { | |
1337 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1338 | { | |
1339 | fputs ("(duplicate, not actually replacing)", dump_file); | |
1340 | stmt_to_print = c->cand_stmt; | |
1341 | } | |
1342 | } | |
1343 | else | |
1344 | { | |
1345 | gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); | |
1346 | gimple_assign_set_rhs_with_ops (&gsi, code, basis_name, bump_tree); | |
1347 | update_stmt (gsi_stmt (gsi)); | |
1348 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1349 | stmt_to_print = gsi_stmt (gsi); | |
1350 | } | |
1351 | } | |
1352 | ||
1353 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1354 | { | |
1355 | fputs ("With: ", dump_file); | |
1356 | print_gimple_stmt (dump_file, stmt_to_print, 0, 0); | |
1357 | fputs ("\n", dump_file); | |
1358 | } | |
1359 | } | |
1360 | ||
1361 | /* Replace candidate C, each sibling of candidate C, and each | |
1362 | dependent of candidate C with an add or subtract. Note that we | |
1363 | only operate on CAND_MULTs with known strides, so we will never | |
1364 | generate a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is | |
1365 | replaced by X = Y + ((i - i') * S), as described in the module | |
1366 | commentary. The folded value ((i - i') * S) is referred to here | |
1367 | as the "bump." */ | |
1368 | ||
1369 | static void | |
1370 | replace_dependents (slsr_cand_t c) | |
1371 | { | |
1372 | enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt); | |
1373 | ||
1374 | /* It is not useful to replace casts, copies, or adds of an SSA name | |
1375 | and a constant. Also skip candidates that have already been | |
1376 | replaced under another interpretation. */ | |
1377 | if (cand_code != MODIFY_EXPR | |
1378 | && cand_code != NOP_EXPR | |
1379 | && c->kind == CAND_MULT | |
1380 | && !cand_already_replaced (c)) | |
1381 | replace_dependent (c, cand_code); | |
1382 | ||
1383 | if (c->sibling) | |
1384 | replace_dependents (lookup_cand (c->sibling)); | |
1385 | ||
1386 | if (c->dependent) | |
1387 | replace_dependents (lookup_cand (c->dependent)); | |
1388 | } | |
1389 | \f | |
1390 | /* Analyze costs of related candidates in the candidate vector, | |
1391 | and make beneficial replacements. */ | |
1392 | ||
1393 | static void | |
1394 | analyze_candidates_and_replace (void) | |
1395 | { | |
1396 | unsigned i; | |
1397 | slsr_cand_t c; | |
1398 | ||
1399 | /* Each candidate that has a null basis and a non-null | |
1400 | dependent is the root of a tree of related statements. | |
1401 | Analyze each tree to determine a subset of those | |
1402 | statements that can be replaced with maximum benefit. */ | |
1403 | FOR_EACH_VEC_ELT (slsr_cand_t, cand_vec, i, c) | |
1404 | { | |
1405 | slsr_cand_t first_dep; | |
1406 | ||
1407 | if (c->basis != 0 || c->dependent == 0) | |
1408 | continue; | |
1409 | ||
1410 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1411 | fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n", | |
1412 | c->cand_num); | |
1413 | ||
1414 | first_dep = lookup_cand (c->dependent); | |
1415 | ||
1416 | /* If the common stride of all related candidates is a | |
1417 | known constant, and none of these has a phi-dependence, | |
1418 | then all replacements are considered profitable. | |
1419 | Each replaces a multiply by a single add, with the | |
1420 | possibility that a feeding add also goes dead as a | |
1421 | result. */ | |
1422 | if (unconditional_cands_with_known_stride_p (c)) | |
1423 | replace_dependents (first_dep); | |
1424 | ||
1425 | /* TODO: When the stride is an SSA name, it may still be | |
1426 | profitable to replace some or all of the dependent | |
1427 | candidates, depending on whether the introduced increments | |
1428 | can be reused, or are less expensive to calculate than | |
1429 | the replaced statements. */ | |
1430 | ||
1431 | /* TODO: Strength-reduce data references with implicit | |
1432 | multiplication in their addressing expressions. */ | |
1433 | ||
1434 | /* TODO: When conditional increments occur so that a | |
1435 | candidate is dependent upon a phi-basis, the cost of | |
1436 | introducing a temporary must be accounted for. */ | |
1437 | } | |
1438 | } | |
1439 | ||
1440 | static unsigned | |
1441 | execute_strength_reduction (void) | |
1442 | { | |
1443 | struct dom_walk_data walk_data; | |
1444 | ||
1445 | /* Create the obstack where candidates will reside. */ | |
1446 | gcc_obstack_init (&cand_obstack); | |
1447 | ||
1448 | /* Allocate the candidate vector. */ | |
1449 | cand_vec = VEC_alloc (slsr_cand_t, heap, 128); | |
1450 | ||
1451 | /* Allocate the mapping from statements to candidate indices. */ | |
1452 | stmt_cand_map = pointer_map_create (); | |
1453 | ||
1454 | /* Create the obstack where candidate chains will reside. */ | |
1455 | gcc_obstack_init (&chain_obstack); | |
1456 | ||
1457 | /* Allocate the mapping from base names to candidate chains. */ | |
1458 | base_cand_map = XNEWVEC (cand_chain_t, num_ssa_names); | |
1459 | memset (base_cand_map, 0, num_ssa_names * sizeof (cand_chain_t)); | |
1460 | ||
1461 | /* Initialize the loop optimizer. We need to detect flow across | |
1462 | back edges, and this gives us dominator information as well. */ | |
1463 | loop_optimizer_init (AVOID_CFG_MODIFICATIONS); | |
1464 | ||
1465 | /* Initialize costs tables in IVOPTS. */ | |
1466 | initialize_costs (); | |
1467 | ||
1468 | /* Set up callbacks for the generic dominator tree walker. */ | |
1469 | walk_data.dom_direction = CDI_DOMINATORS; | |
1470 | walk_data.initialize_block_local_data = NULL; | |
1471 | walk_data.before_dom_children = find_candidates_in_block; | |
1472 | walk_data.after_dom_children = NULL; | |
1473 | walk_data.global_data = NULL; | |
1474 | walk_data.block_local_data_size = 0; | |
1475 | init_walk_dominator_tree (&walk_data); | |
1476 | ||
1477 | /* Walk the CFG in predominator order looking for strength reduction | |
1478 | candidates. */ | |
1479 | walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR); | |
1480 | ||
1481 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1482 | { | |
1483 | dump_cand_vec (); | |
1484 | dump_cand_chains (); | |
1485 | } | |
1486 | ||
1487 | /* Analyze costs and make appropriate replacements. */ | |
1488 | analyze_candidates_and_replace (); | |
1489 | ||
1490 | /* Free resources. */ | |
1491 | fini_walk_dominator_tree (&walk_data); | |
1492 | loop_optimizer_finalize (); | |
1493 | free (base_cand_map); | |
1494 | obstack_free (&chain_obstack, NULL); | |
1495 | pointer_map_destroy (stmt_cand_map); | |
1496 | VEC_free (slsr_cand_t, heap, cand_vec); | |
1497 | obstack_free (&cand_obstack, NULL); | |
1498 | finalize_costs (); | |
1499 | ||
1500 | return 0; | |
1501 | } | |
1502 | ||
1503 | static bool | |
1504 | gate_strength_reduction (void) | |
1505 | { | |
1506 | return optimize > 0; | |
1507 | } | |
1508 | ||
1509 | struct gimple_opt_pass pass_strength_reduction = | |
1510 | { | |
1511 | { | |
1512 | GIMPLE_PASS, | |
1513 | "slsr", /* name */ | |
1514 | gate_strength_reduction, /* gate */ | |
1515 | execute_strength_reduction, /* execute */ | |
1516 | NULL, /* sub */ | |
1517 | NULL, /* next */ | |
1518 | 0, /* static_pass_number */ | |
1519 | TV_GIMPLE_SLSR, /* tv_id */ | |
1520 | PROP_cfg | PROP_ssa, /* properties_required */ | |
1521 | 0, /* properties_provided */ | |
1522 | 0, /* properties_destroyed */ | |
1523 | 0, /* todo_flags_start */ | |
1524 | TODO_verify_ssa /* todo_flags_finish */ | |
1525 | } | |
1526 | }; |