]>
Commit | Line | Data |
---|---|---|
4ee9c684 | 1 | /* Scalar Replacement of Aggregates (SRA) converts some structure |
2 | references into scalar references, exposing them to the scalar | |
3 | optimizers. | |
005b6665 | 4 | Copyright (C) 2003, 2004, 2005, 2006, 2007 |
5 | Free Software Foundation, Inc. | |
4ee9c684 | 6 | Contributed by Diego Novillo <dnovillo@redhat.com> |
7 | ||
8 | This file is part of GCC. | |
ac13e8d9 | 9 | |
4ee9c684 | 10 | GCC is free software; you can redistribute it and/or modify it |
11 | under the terms of the GNU General Public License as published by the | |
8c4c00c1 | 12 | Free Software Foundation; either version 3, or (at your option) any |
4ee9c684 | 13 | later version. |
ac13e8d9 | 14 | |
4ee9c684 | 15 | GCC is distributed in the hope that it will be useful, but WITHOUT |
16 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
17 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
18 | for more details. | |
ac13e8d9 | 19 | |
4ee9c684 | 20 | You should have received a copy of the GNU General Public License |
8c4c00c1 | 21 | along with GCC; see the file COPYING3. If not see |
22 | <http://www.gnu.org/licenses/>. */ | |
4ee9c684 | 23 | |
24 | #include "config.h" | |
25 | #include "system.h" | |
26 | #include "coretypes.h" | |
27 | #include "tm.h" | |
4ee9c684 | 28 | #include "ggc.h" |
29 | #include "tree.h" | |
30 | ||
31 | /* These RTL headers are needed for basic-block.h. */ | |
32 | #include "rtl.h" | |
33 | #include "tm_p.h" | |
34 | #include "hard-reg-set.h" | |
35 | #include "basic-block.h" | |
36 | #include "diagnostic.h" | |
37 | #include "langhooks.h" | |
38 | #include "tree-inline.h" | |
39 | #include "tree-flow.h" | |
88bce636 | 40 | #include "tree-gimple.h" |
4ee9c684 | 41 | #include "tree-dump.h" |
42 | #include "tree-pass.h" | |
43 | #include "timevar.h" | |
44 | #include "flags.h" | |
45 | #include "bitmap.h" | |
f50f6fc3 | 46 | #include "obstack.h" |
47 | #include "target.h" | |
c2eaba28 | 48 | /* expr.h is needed for MOVE_RATIO. */ |
49 | #include "expr.h" | |
152a1c0a | 50 | #include "params.h" |
4ee9c684 | 51 | |
52 | ||
f50f6fc3 | 53 | /* This object of this pass is to replace a non-addressable aggregate with a |
54 | set of independent variables. Most of the time, all of these variables | |
ac13e8d9 | 55 | will be scalars. But a secondary objective is to break up larger |
f50f6fc3 | 56 | aggregates into smaller aggregates. In the process we may find that some |
57 | bits of the larger aggregate can be deleted as unreferenced. | |
4ee9c684 | 58 | |
f50f6fc3 | 59 | This substitution is done globally. More localized substitutions would |
60 | be the purvey of a load-store motion pass. | |
61 | ||
62 | The optimization proceeds in phases: | |
63 | ||
64 | (1) Identify variables that have types that are candidates for | |
65 | decomposition. | |
66 | ||
67 | (2) Scan the function looking for the ways these variables are used. | |
68 | In particular we're interested in the number of times a variable | |
69 | (or member) is needed as a complete unit, and the number of times | |
70 | a variable (or member) is copied. | |
71 | ||
72 | (3) Based on the usage profile, instantiate substitution variables. | |
73 | ||
74 | (4) Scan the function making replacements. | |
75 | */ | |
4ee9c684 | 76 | |
4ee9c684 | 77 | |
1f0a4df8 | 78 | /* True if this is the "early" pass, before inlining. */ |
79 | static bool early_sra; | |
80 | ||
c96420f8 | 81 | /* The set of todo flags to return from tree_sra. */ |
82 | static unsigned int todoflags; | |
83 | ||
4ee9c684 | 84 | /* The set of aggregate variables that are candidates for scalarization. */ |
85 | static bitmap sra_candidates; | |
86 | ||
87 | /* Set of scalarizable PARM_DECLs that need copy-in operations at the | |
88 | beginning of the function. */ | |
89 | static bitmap needs_copy_in; | |
90 | ||
f50f6fc3 | 91 | /* Sets of bit pairs that cache type decomposition and instantiation. */ |
92 | static bitmap sra_type_decomp_cache; | |
93 | static bitmap sra_type_inst_cache; | |
94 | ||
2100c228 | 95 | /* One of these structures is created for each candidate aggregate and |
96 | each (accessed) member or group of members of such an aggregate. */ | |
4ee9c684 | 97 | struct sra_elt |
98 | { | |
f50f6fc3 | 99 | /* A tree of the elements. Used when we want to traverse everything. */ |
100 | struct sra_elt *parent; | |
2100c228 | 101 | struct sra_elt *groups; |
f50f6fc3 | 102 | struct sra_elt *children; |
103 | struct sra_elt *sibling; | |
4ee9c684 | 104 | |
f50f6fc3 | 105 | /* If this element is a root, then this is the VAR_DECL. If this is |
106 | a sub-element, this is some token used to identify the reference. | |
107 | In the case of COMPONENT_REF, this is the FIELD_DECL. In the case | |
2100c228 | 108 | of an ARRAY_REF, this is the (constant) index. In the case of an |
109 | ARRAY_RANGE_REF, this is the (constant) RANGE_EXPR. In the case | |
110 | of a complex number, this is a zero or one. */ | |
f50f6fc3 | 111 | tree element; |
4ee9c684 | 112 | |
f50f6fc3 | 113 | /* The type of the element. */ |
114 | tree type; | |
4ee9c684 | 115 | |
f50f6fc3 | 116 | /* A VAR_DECL, for any sub-element we've decided to replace. */ |
117 | tree replacement; | |
4ee9c684 | 118 | |
f50f6fc3 | 119 | /* The number of times the element is referenced as a whole. I.e. |
120 | given "a.b.c", this would be incremented for C, but not for A or B. */ | |
121 | unsigned int n_uses; | |
4ee9c684 | 122 | |
f50f6fc3 | 123 | /* The number of times the element is copied to or from another |
124 | scalarizable element. */ | |
125 | unsigned int n_copies; | |
4ee9c684 | 126 | |
f50f6fc3 | 127 | /* True if TYPE is scalar. */ |
128 | bool is_scalar; | |
4ee9c684 | 129 | |
2100c228 | 130 | /* True if this element is a group of members of its parent. */ |
131 | bool is_group; | |
132 | ||
f50f6fc3 | 133 | /* True if we saw something about this element that prevents scalarization, |
134 | such as non-constant indexing. */ | |
135 | bool cannot_scalarize; | |
4ee9c684 | 136 | |
f50f6fc3 | 137 | /* True if we've decided that structure-to-structure assignment |
138 | should happen via memcpy and not per-element. */ | |
139 | bool use_block_copy; | |
2cf24776 | 140 | |
fdea8514 | 141 | /* True if everything under this element has been marked TREE_NO_WARNING. */ |
142 | bool all_no_warning; | |
143 | ||
f50f6fc3 | 144 | /* A flag for use with/after random access traversals. */ |
145 | bool visited; | |
8ea8de24 | 146 | |
147 | /* True if there is BIT_FIELD_REF on the lhs with a vector. */ | |
148 | bool is_vector_lhs; | |
f50f6fc3 | 149 | }; |
2cf24776 | 150 | |
2100c228 | 151 | #define IS_ELEMENT_FOR_GROUP(ELEMENT) (TREE_CODE (ELEMENT) == RANGE_EXPR) |
152 | ||
153 | #define FOR_EACH_ACTUAL_CHILD(CHILD, ELT) \ | |
154 | for ((CHILD) = (ELT)->is_group \ | |
155 | ? next_child_for_group (NULL, (ELT)) \ | |
156 | : (ELT)->children; \ | |
157 | (CHILD); \ | |
158 | (CHILD) = (ELT)->is_group \ | |
159 | ? next_child_for_group ((CHILD), (ELT)) \ | |
160 | : (CHILD)->sibling) | |
161 | ||
162 | /* Helper function for above macro. Return next child in group. */ | |
163 | static struct sra_elt * | |
164 | next_child_for_group (struct sra_elt *child, struct sra_elt *group) | |
165 | { | |
166 | gcc_assert (group->is_group); | |
167 | ||
168 | /* Find the next child in the parent. */ | |
169 | if (child) | |
170 | child = child->sibling; | |
171 | else | |
172 | child = group->parent->children; | |
173 | ||
174 | /* Skip siblings that do not belong to the group. */ | |
175 | while (child) | |
176 | { | |
177 | tree g_elt = group->element; | |
178 | if (TREE_CODE (g_elt) == RANGE_EXPR) | |
179 | { | |
180 | if (!tree_int_cst_lt (child->element, TREE_OPERAND (g_elt, 0)) | |
181 | && !tree_int_cst_lt (TREE_OPERAND (g_elt, 1), child->element)) | |
182 | break; | |
183 | } | |
184 | else | |
185 | gcc_unreachable (); | |
186 | ||
187 | child = child->sibling; | |
188 | } | |
189 | ||
190 | return child; | |
191 | } | |
192 | ||
f50f6fc3 | 193 | /* Random access to the child of a parent is performed by hashing. |
c26a6416 | 194 | This prevents quadratic behavior, and allows SRA to function |
f50f6fc3 | 195 | reasonably on larger records. */ |
196 | static htab_t sra_map; | |
2cf24776 | 197 | |
f50f6fc3 | 198 | /* All structures are allocated out of the following obstack. */ |
199 | static struct obstack sra_obstack; | |
2cf24776 | 200 | |
f50f6fc3 | 201 | /* Debugging functions. */ |
202 | static void dump_sra_elt_name (FILE *, struct sra_elt *); | |
203 | extern void debug_sra_elt_name (struct sra_elt *); | |
4ee9c684 | 204 | |
9366434c | 205 | /* Forward declarations. */ |
206 | static tree generate_element_ref (struct sra_elt *); | |
f50f6fc3 | 207 | \f |
4ee9c684 | 208 | /* Return true if DECL is an SRA candidate. */ |
209 | ||
210 | static bool | |
211 | is_sra_candidate_decl (tree decl) | |
212 | { | |
a55dc2cd | 213 | return DECL_P (decl) && bitmap_bit_p (sra_candidates, DECL_UID (decl)); |
4ee9c684 | 214 | } |
215 | ||
f50f6fc3 | 216 | /* Return true if TYPE is a scalar type. */ |
4ee9c684 | 217 | |
218 | static bool | |
f50f6fc3 | 219 | is_sra_scalar_type (tree type) |
4ee9c684 | 220 | { |
f50f6fc3 | 221 | enum tree_code code = TREE_CODE (type); |
222 | return (code == INTEGER_TYPE || code == REAL_TYPE || code == VECTOR_TYPE | |
06f0b99c | 223 | || code == FIXED_POINT_TYPE |
f50f6fc3 | 224 | || code == ENUMERAL_TYPE || code == BOOLEAN_TYPE |
63bf54cf | 225 | || code == POINTER_TYPE || code == OFFSET_TYPE |
f50f6fc3 | 226 | || code == REFERENCE_TYPE); |
4ee9c684 | 227 | } |
228 | ||
f50f6fc3 | 229 | /* Return true if TYPE can be decomposed into a set of independent variables. |
230 | ||
231 | Note that this doesn't imply that all elements of TYPE can be | |
232 | instantiated, just that if we decide to break up the type into | |
233 | separate pieces that it can be done. */ | |
4f264c8b | 234 | |
f7d118a9 | 235 | bool |
236 | sra_type_can_be_decomposed_p (tree type) | |
4f264c8b | 237 | { |
f50f6fc3 | 238 | unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2; |
239 | tree t; | |
4f264c8b | 240 | |
f50f6fc3 | 241 | /* Avoid searching the same type twice. */ |
242 | if (bitmap_bit_p (sra_type_decomp_cache, cache+0)) | |
243 | return true; | |
244 | if (bitmap_bit_p (sra_type_decomp_cache, cache+1)) | |
245 | return false; | |
4ee9c684 | 246 | |
9ee236f3 | 247 | /* The type must have a definite nonzero size. */ |
dcce4b90 | 248 | if (TYPE_SIZE (type) == NULL || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST |
249 | || integer_zerop (TYPE_SIZE (type))) | |
f50f6fc3 | 250 | goto fail; |
4ee9c684 | 251 | |
f50f6fc3 | 252 | /* The type must be a non-union aggregate. */ |
253 | switch (TREE_CODE (type)) | |
4ee9c684 | 254 | { |
f50f6fc3 | 255 | case RECORD_TYPE: |
256 | { | |
257 | bool saw_one_field = false; | |
4ee9c684 | 258 | |
f50f6fc3 | 259 | for (t = TYPE_FIELDS (type); t ; t = TREE_CHAIN (t)) |
260 | if (TREE_CODE (t) == FIELD_DECL) | |
4ee9c684 | 261 | { |
f50f6fc3 | 262 | /* Reject incorrectly represented bit fields. */ |
263 | if (DECL_BIT_FIELD (t) | |
264 | && (tree_low_cst (DECL_SIZE (t), 1) | |
265 | != TYPE_PRECISION (TREE_TYPE (t)))) | |
266 | goto fail; | |
4ee9c684 | 267 | |
f50f6fc3 | 268 | saw_one_field = true; |
269 | } | |
4ee9c684 | 270 | |
f50f6fc3 | 271 | /* Record types must have at least one field. */ |
272 | if (!saw_one_field) | |
273 | goto fail; | |
274 | } | |
275 | break; | |
4ee9c684 | 276 | |
f50f6fc3 | 277 | case ARRAY_TYPE: |
278 | /* Array types must have a fixed lower and upper bound. */ | |
279 | t = TYPE_DOMAIN (type); | |
280 | if (t == NULL) | |
281 | goto fail; | |
282 | if (TYPE_MIN_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MIN_VALUE (t))) | |
283 | goto fail; | |
284 | if (TYPE_MAX_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MAX_VALUE (t))) | |
285 | goto fail; | |
286 | break; | |
4ee9c684 | 287 | |
f50f6fc3 | 288 | case COMPLEX_TYPE: |
289 | break; | |
4ee9c684 | 290 | |
f50f6fc3 | 291 | default: |
292 | goto fail; | |
293 | } | |
4ee9c684 | 294 | |
f50f6fc3 | 295 | bitmap_set_bit (sra_type_decomp_cache, cache+0); |
296 | return true; | |
4ee9c684 | 297 | |
f50f6fc3 | 298 | fail: |
299 | bitmap_set_bit (sra_type_decomp_cache, cache+1); | |
300 | return false; | |
4ee9c684 | 301 | } |
302 | ||
f50f6fc3 | 303 | /* Return true if DECL can be decomposed into a set of independent |
304 | (though not necessarily scalar) variables. */ | |
4ee9c684 | 305 | |
f50f6fc3 | 306 | static bool |
307 | decl_can_be_decomposed_p (tree var) | |
4ee9c684 | 308 | { |
f50f6fc3 | 309 | /* Early out for scalars. */ |
310 | if (is_sra_scalar_type (TREE_TYPE (var))) | |
311 | return false; | |
4ee9c684 | 312 | |
f50f6fc3 | 313 | /* The variable must not be aliased. */ |
4ee9c684 | 314 | if (!is_gimple_non_addressable (var)) |
315 | { | |
316 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
317 | { | |
318 | fprintf (dump_file, "Cannot scalarize variable "); | |
319 | print_generic_expr (dump_file, var, dump_flags); | |
320 | fprintf (dump_file, " because it must live in memory\n"); | |
321 | } | |
322 | return false; | |
323 | } | |
324 | ||
f50f6fc3 | 325 | /* The variable must not be volatile. */ |
4ee9c684 | 326 | if (TREE_THIS_VOLATILE (var)) |
327 | { | |
328 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
329 | { | |
330 | fprintf (dump_file, "Cannot scalarize variable "); | |
331 | print_generic_expr (dump_file, var, dump_flags); | |
332 | fprintf (dump_file, " because it is declared volatile\n"); | |
333 | } | |
334 | return false; | |
335 | } | |
336 | ||
f50f6fc3 | 337 | /* We must be able to decompose the variable's type. */ |
f7d118a9 | 338 | if (!sra_type_can_be_decomposed_p (TREE_TYPE (var))) |
f50f6fc3 | 339 | { |
340 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
341 | { | |
342 | fprintf (dump_file, "Cannot scalarize variable "); | |
343 | print_generic_expr (dump_file, var, dump_flags); | |
344 | fprintf (dump_file, " because its type cannot be decomposed\n"); | |
345 | } | |
346 | return false; | |
347 | } | |
348 | ||
1f0a4df8 | 349 | /* HACK: if we decompose a va_list_type_node before inlining, then we'll |
350 | confuse tree-stdarg.c, and we won't be able to figure out which and | |
351 | how many arguments are accessed. This really should be improved in | |
352 | tree-stdarg.c, as the decomposition is truely a win. This could also | |
353 | be fixed if the stdarg pass ran early, but this can't be done until | |
354 | we've aliasing information early too. See PR 30791. */ | |
355 | if (early_sra | |
356 | && TYPE_MAIN_VARIANT (TREE_TYPE (var)) | |
357 | == TYPE_MAIN_VARIANT (va_list_type_node)) | |
358 | return false; | |
359 | ||
f50f6fc3 | 360 | return true; |
361 | } | |
362 | ||
363 | /* Return true if TYPE can be *completely* decomposed into scalars. */ | |
364 | ||
365 | static bool | |
366 | type_can_instantiate_all_elements (tree type) | |
367 | { | |
368 | if (is_sra_scalar_type (type)) | |
4ee9c684 | 369 | return true; |
f7d118a9 | 370 | if (!sra_type_can_be_decomposed_p (type)) |
f50f6fc3 | 371 | return false; |
4ee9c684 | 372 | |
f50f6fc3 | 373 | switch (TREE_CODE (type)) |
4ee9c684 | 374 | { |
f50f6fc3 | 375 | case RECORD_TYPE: |
376 | { | |
377 | unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2; | |
378 | tree f; | |
4ee9c684 | 379 | |
f50f6fc3 | 380 | if (bitmap_bit_p (sra_type_inst_cache, cache+0)) |
381 | return true; | |
382 | if (bitmap_bit_p (sra_type_inst_cache, cache+1)) | |
4ee9c684 | 383 | return false; |
4ee9c684 | 384 | |
f50f6fc3 | 385 | for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f)) |
386 | if (TREE_CODE (f) == FIELD_DECL) | |
4ee9c684 | 387 | { |
f50f6fc3 | 388 | if (!type_can_instantiate_all_elements (TREE_TYPE (f))) |
389 | { | |
390 | bitmap_set_bit (sra_type_inst_cache, cache+1); | |
391 | return false; | |
392 | } | |
4ee9c684 | 393 | } |
4ee9c684 | 394 | |
f50f6fc3 | 395 | bitmap_set_bit (sra_type_inst_cache, cache+0); |
396 | return true; | |
397 | } | |
4ee9c684 | 398 | |
f50f6fc3 | 399 | case ARRAY_TYPE: |
400 | return type_can_instantiate_all_elements (TREE_TYPE (type)); | |
4ee9c684 | 401 | |
f50f6fc3 | 402 | case COMPLEX_TYPE: |
403 | return true; | |
4ee9c684 | 404 | |
f50f6fc3 | 405 | default: |
8c0963c4 | 406 | gcc_unreachable (); |
f50f6fc3 | 407 | } |
408 | } | |
4ee9c684 | 409 | |
f50f6fc3 | 410 | /* Test whether ELT or some sub-element cannot be scalarized. */ |
4ee9c684 | 411 | |
f50f6fc3 | 412 | static bool |
413 | can_completely_scalarize_p (struct sra_elt *elt) | |
4ee9c684 | 414 | { |
f50f6fc3 | 415 | struct sra_elt *c; |
416 | ||
417 | if (elt->cannot_scalarize) | |
418 | return false; | |
419 | ||
2100c228 | 420 | for (c = elt->children; c; c = c->sibling) |
421 | if (!can_completely_scalarize_p (c)) | |
422 | return false; | |
423 | ||
424 | for (c = elt->groups; c; c = c->sibling) | |
f50f6fc3 | 425 | if (!can_completely_scalarize_p (c)) |
426 | return false; | |
427 | ||
428 | return true; | |
429 | } | |
4ee9c684 | 430 | |
f50f6fc3 | 431 | \f |
432 | /* A simplified tree hashing algorithm that only handles the types of | |
433 | trees we expect to find in sra_elt->element. */ | |
4ee9c684 | 434 | |
f50f6fc3 | 435 | static hashval_t |
436 | sra_hash_tree (tree t) | |
437 | { | |
504d3463 | 438 | hashval_t h; |
439 | ||
4ee9c684 | 440 | switch (TREE_CODE (t)) |
441 | { | |
f50f6fc3 | 442 | case VAR_DECL: |
443 | case PARM_DECL: | |
444 | case RESULT_DECL: | |
504d3463 | 445 | h = DECL_UID (t); |
446 | break; | |
447 | ||
f50f6fc3 | 448 | case INTEGER_CST: |
504d3463 | 449 | h = TREE_INT_CST_LOW (t) ^ TREE_INT_CST_HIGH (t); |
450 | break; | |
451 | ||
2100c228 | 452 | case RANGE_EXPR: |
453 | h = iterative_hash_expr (TREE_OPERAND (t, 0), 0); | |
454 | h = iterative_hash_expr (TREE_OPERAND (t, 1), h); | |
455 | break; | |
456 | ||
504d3463 | 457 | case FIELD_DECL: |
458 | /* We can have types that are compatible, but have different member | |
459 | lists, so we can't hash fields by ID. Use offsets instead. */ | |
460 | h = iterative_hash_expr (DECL_FIELD_OFFSET (t), 0); | |
461 | h = iterative_hash_expr (DECL_FIELD_BIT_OFFSET (t), h); | |
462 | break; | |
463 | ||
4ee9c684 | 464 | default: |
8c0963c4 | 465 | gcc_unreachable (); |
4ee9c684 | 466 | } |
504d3463 | 467 | |
468 | return h; | |
4ee9c684 | 469 | } |
470 | ||
f50f6fc3 | 471 | /* Hash function for type SRA_PAIR. */ |
4ee9c684 | 472 | |
f50f6fc3 | 473 | static hashval_t |
474 | sra_elt_hash (const void *x) | |
4ee9c684 | 475 | { |
f50f6fc3 | 476 | const struct sra_elt *e = x; |
477 | const struct sra_elt *p; | |
478 | hashval_t h; | |
4ee9c684 | 479 | |
f50f6fc3 | 480 | h = sra_hash_tree (e->element); |
4ee9c684 | 481 | |
6084da09 | 482 | /* Take into account everything back up the chain. Given that chain |
483 | lengths are rarely very long, this should be acceptable. If we | |
484 | truly identify this as a performance problem, it should work to | |
485 | hash the pointer value "e->parent". */ | |
f50f6fc3 | 486 | for (p = e->parent; p ; p = p->parent) |
6084da09 | 487 | h = (h * 65521) ^ sra_hash_tree (p->element); |
4ee9c684 | 488 | |
f50f6fc3 | 489 | return h; |
490 | } | |
ac13e8d9 | 491 | |
f50f6fc3 | 492 | /* Equality function for type SRA_PAIR. */ |
4ee9c684 | 493 | |
f50f6fc3 | 494 | static int |
495 | sra_elt_eq (const void *x, const void *y) | |
496 | { | |
497 | const struct sra_elt *a = x; | |
498 | const struct sra_elt *b = y; | |
504d3463 | 499 | tree ae, be; |
4ee9c684 | 500 | |
6084da09 | 501 | if (a->parent != b->parent) |
f50f6fc3 | 502 | return false; |
4ee9c684 | 503 | |
504d3463 | 504 | ae = a->element; |
505 | be = b->element; | |
4ee9c684 | 506 | |
504d3463 | 507 | if (ae == be) |
508 | return true; | |
509 | if (TREE_CODE (ae) != TREE_CODE (be)) | |
f50f6fc3 | 510 | return false; |
504d3463 | 511 | |
512 | switch (TREE_CODE (ae)) | |
513 | { | |
514 | case VAR_DECL: | |
515 | case PARM_DECL: | |
516 | case RESULT_DECL: | |
517 | /* These are all pointer unique. */ | |
518 | return false; | |
519 | ||
520 | case INTEGER_CST: | |
521 | /* Integers are not pointer unique, so compare their values. */ | |
522 | return tree_int_cst_equal (ae, be); | |
523 | ||
2100c228 | 524 | case RANGE_EXPR: |
525 | return | |
526 | tree_int_cst_equal (TREE_OPERAND (ae, 0), TREE_OPERAND (be, 0)) | |
527 | && tree_int_cst_equal (TREE_OPERAND (ae, 1), TREE_OPERAND (be, 1)); | |
528 | ||
504d3463 | 529 | case FIELD_DECL: |
530 | /* Fields are unique within a record, but not between | |
531 | compatible records. */ | |
532 | if (DECL_FIELD_CONTEXT (ae) == DECL_FIELD_CONTEXT (be)) | |
533 | return false; | |
534 | return fields_compatible_p (ae, be); | |
535 | ||
536 | default: | |
8c0963c4 | 537 | gcc_unreachable (); |
504d3463 | 538 | } |
4ee9c684 | 539 | } |
540 | ||
f50f6fc3 | 541 | /* Create or return the SRA_ELT structure for CHILD in PARENT. PARENT |
542 | may be null, in which case CHILD must be a DECL. */ | |
4ee9c684 | 543 | |
f50f6fc3 | 544 | static struct sra_elt * |
545 | lookup_element (struct sra_elt *parent, tree child, tree type, | |
546 | enum insert_option insert) | |
4ee9c684 | 547 | { |
f50f6fc3 | 548 | struct sra_elt dummy; |
549 | struct sra_elt **slot; | |
550 | struct sra_elt *elt; | |
4ee9c684 | 551 | |
2100c228 | 552 | if (parent) |
553 | dummy.parent = parent->is_group ? parent->parent : parent; | |
554 | else | |
555 | dummy.parent = NULL; | |
f50f6fc3 | 556 | dummy.element = child; |
557 | ||
558 | slot = (struct sra_elt **) htab_find_slot (sra_map, &dummy, insert); | |
559 | if (!slot && insert == NO_INSERT) | |
560 | return NULL; | |
561 | ||
562 | elt = *slot; | |
563 | if (!elt && insert == INSERT) | |
4ee9c684 | 564 | { |
f50f6fc3 | 565 | *slot = elt = obstack_alloc (&sra_obstack, sizeof (*elt)); |
566 | memset (elt, 0, sizeof (*elt)); | |
567 | ||
568 | elt->parent = parent; | |
569 | elt->element = child; | |
570 | elt->type = type; | |
571 | elt->is_scalar = is_sra_scalar_type (type); | |
572 | ||
573 | if (parent) | |
574 | { | |
2100c228 | 575 | if (IS_ELEMENT_FOR_GROUP (elt->element)) |
576 | { | |
577 | elt->is_group = true; | |
578 | elt->sibling = parent->groups; | |
579 | parent->groups = elt; | |
580 | } | |
581 | else | |
582 | { | |
583 | elt->sibling = parent->children; | |
584 | parent->children = elt; | |
585 | } | |
f50f6fc3 | 586 | } |
4ee9c684 | 587 | |
f50f6fc3 | 588 | /* If this is a parameter, then if we want to scalarize, we have |
589 | one copy from the true function parameter. Count it now. */ | |
590 | if (TREE_CODE (child) == PARM_DECL) | |
4ee9c684 | 591 | { |
f50f6fc3 | 592 | elt->n_copies = 1; |
a55dc2cd | 593 | bitmap_set_bit (needs_copy_in, DECL_UID (child)); |
4ee9c684 | 594 | } |
595 | } | |
596 | ||
f50f6fc3 | 597 | return elt; |
4ee9c684 | 598 | } |
599 | ||
ac13e8d9 | 600 | /* Create or return the SRA_ELT structure for EXPR if the expression |
f50f6fc3 | 601 | refers to a scalarizable variable. */ |
602 | ||
603 | static struct sra_elt * | |
604 | maybe_lookup_element_for_expr (tree expr) | |
4ee9c684 | 605 | { |
f50f6fc3 | 606 | struct sra_elt *elt; |
607 | tree child; | |
608 | ||
609 | switch (TREE_CODE (expr)) | |
610 | { | |
611 | case VAR_DECL: | |
612 | case PARM_DECL: | |
613 | case RESULT_DECL: | |
614 | if (is_sra_candidate_decl (expr)) | |
615 | return lookup_element (NULL, expr, TREE_TYPE (expr), INSERT); | |
616 | return NULL; | |
617 | ||
618 | case ARRAY_REF: | |
2100c228 | 619 | /* We can't scalarize variable array indices. */ |
620 | if (in_array_bounds_p (expr)) | |
f50f6fc3 | 621 | child = TREE_OPERAND (expr, 1); |
622 | else | |
623 | return NULL; | |
624 | break; | |
625 | ||
2100c228 | 626 | case ARRAY_RANGE_REF: |
627 | /* We can't scalarize variable array indices. */ | |
628 | if (range_in_array_bounds_p (expr)) | |
629 | { | |
630 | tree domain = TYPE_DOMAIN (TREE_TYPE (expr)); | |
631 | child = build2 (RANGE_EXPR, integer_type_node, | |
632 | TYPE_MIN_VALUE (domain), TYPE_MAX_VALUE (domain)); | |
633 | } | |
634 | else | |
635 | return NULL; | |
636 | break; | |
637 | ||
f50f6fc3 | 638 | case COMPONENT_REF: |
639 | /* Don't look through unions. */ | |
640 | if (TREE_CODE (TREE_TYPE (TREE_OPERAND (expr, 0))) != RECORD_TYPE) | |
641 | return NULL; | |
642 | child = TREE_OPERAND (expr, 1); | |
643 | break; | |
644 | ||
645 | case REALPART_EXPR: | |
646 | child = integer_zero_node; | |
647 | break; | |
648 | case IMAGPART_EXPR: | |
649 | child = integer_one_node; | |
650 | break; | |
651 | ||
652 | default: | |
653 | return NULL; | |
654 | } | |
655 | ||
656 | elt = maybe_lookup_element_for_expr (TREE_OPERAND (expr, 0)); | |
657 | if (elt) | |
658 | return lookup_element (elt, child, TREE_TYPE (expr), INSERT); | |
659 | return NULL; | |
660 | } | |
661 | ||
662 | \f | |
663 | /* Functions to walk just enough of the tree to see all scalarizable | |
664 | references, and categorize them. */ | |
665 | ||
666 | /* A set of callbacks for phases 2 and 4. They'll be invoked for the | |
667 | various kinds of references seen. In all cases, *BSI is an iterator | |
668 | pointing to the statement being processed. */ | |
669 | struct sra_walk_fns | |
670 | { | |
671 | /* Invoked when ELT is required as a unit. Note that ELT might refer to | |
672 | a leaf node, in which case this is a simple scalar reference. *EXPR_P | |
673 | points to the location of the expression. IS_OUTPUT is true if this | |
6084da09 | 674 | is a left-hand-side reference. USE_ALL is true if we saw something we |
675 | couldn't quite identify and had to force the use of the entire object. */ | |
f50f6fc3 | 676 | void (*use) (struct sra_elt *elt, tree *expr_p, |
6084da09 | 677 | block_stmt_iterator *bsi, bool is_output, bool use_all); |
f50f6fc3 | 678 | |
679 | /* Invoked when we have a copy between two scalarizable references. */ | |
680 | void (*copy) (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt, | |
681 | block_stmt_iterator *bsi); | |
682 | ||
683 | /* Invoked when ELT is initialized from a constant. VALUE may be NULL, | |
fa6d6d27 | 684 | in which case it should be treated as an empty CONSTRUCTOR. */ |
685 | void (*init) (struct sra_elt *elt, tree value, block_stmt_iterator *bsi); | |
f50f6fc3 | 686 | |
687 | /* Invoked when we have a copy between one scalarizable reference ELT | |
723e4ab0 | 688 | and one non-scalarizable reference OTHER without side-effects. |
689 | IS_OUTPUT is true if ELT is on the left-hand side. */ | |
f50f6fc3 | 690 | void (*ldst) (struct sra_elt *elt, tree other, |
691 | block_stmt_iterator *bsi, bool is_output); | |
692 | ||
693 | /* True during phase 2, false during phase 4. */ | |
694 | /* ??? This is a hack. */ | |
695 | bool initial_scan; | |
696 | }; | |
697 | ||
698 | #ifdef ENABLE_CHECKING | |
86481e89 | 699 | /* Invoked via walk_tree, if *TP contains a candidate decl, return it. */ |
f50f6fc3 | 700 | |
701 | static tree | |
702 | sra_find_candidate_decl (tree *tp, int *walk_subtrees, | |
703 | void *data ATTRIBUTE_UNUSED) | |
704 | { | |
705 | tree t = *tp; | |
706 | enum tree_code code = TREE_CODE (t); | |
707 | ||
708 | if (code == VAR_DECL || code == PARM_DECL || code == RESULT_DECL) | |
709 | { | |
710 | *walk_subtrees = 0; | |
711 | if (is_sra_candidate_decl (t)) | |
712 | return t; | |
713 | } | |
714 | else if (TYPE_P (t)) | |
715 | *walk_subtrees = 0; | |
716 | ||
717 | return NULL; | |
718 | } | |
719 | #endif | |
720 | ||
721 | /* Walk most expressions looking for a scalarizable aggregate. | |
722 | If we find one, invoke FNS->USE. */ | |
723 | ||
724 | static void | |
725 | sra_walk_expr (tree *expr_p, block_stmt_iterator *bsi, bool is_output, | |
726 | const struct sra_walk_fns *fns) | |
727 | { | |
728 | tree expr = *expr_p; | |
729 | tree inner = expr; | |
16248b8e | 730 | bool disable_scalarization = false; |
6084da09 | 731 | bool use_all_p = false; |
f50f6fc3 | 732 | |
733 | /* We're looking to collect a reference expression between EXPR and INNER, | |
734 | such that INNER is a scalarizable decl and all other nodes through EXPR | |
735 | are references that we can scalarize. If we come across something that | |
736 | we can't scalarize, we reset EXPR. This has the effect of making it | |
737 | appear that we're referring to the larger expression as a whole. */ | |
738 | ||
739 | while (1) | |
740 | switch (TREE_CODE (inner)) | |
741 | { | |
742 | case VAR_DECL: | |
743 | case PARM_DECL: | |
744 | case RESULT_DECL: | |
745 | /* If there is a scalarizable decl at the bottom, then process it. */ | |
746 | if (is_sra_candidate_decl (inner)) | |
747 | { | |
748 | struct sra_elt *elt = maybe_lookup_element_for_expr (expr); | |
16248b8e | 749 | if (disable_scalarization) |
750 | elt->cannot_scalarize = true; | |
751 | else | |
6084da09 | 752 | fns->use (elt, expr_p, bsi, is_output, use_all_p); |
f50f6fc3 | 753 | } |
754 | return; | |
755 | ||
756 | case ARRAY_REF: | |
757 | /* Non-constant index means any member may be accessed. Prevent the | |
758 | expression from being scalarized. If we were to treat this as a | |
759 | reference to the whole array, we can wind up with a single dynamic | |
760 | index reference inside a loop being overridden by several constant | |
761 | index references during loop setup. It's possible that this could | |
762 | be avoided by using dynamic usage counts based on BB trip counts | |
ac13e8d9 | 763 | (based on loop analysis or profiling), but that hardly seems worth |
f50f6fc3 | 764 | the effort. */ |
765 | /* ??? Hack. Figure out how to push this into the scan routines | |
766 | without duplicating too much code. */ | |
2100c228 | 767 | if (!in_array_bounds_p (inner)) |
f50f6fc3 | 768 | { |
16248b8e | 769 | disable_scalarization = true; |
770 | goto use_all; | |
f50f6fc3 | 771 | } |
772 | /* ??? Are we assured that non-constant bounds and stride will have | |
773 | the same value everywhere? I don't think Fortran will... */ | |
774 | if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3)) | |
775 | goto use_all; | |
776 | inner = TREE_OPERAND (inner, 0); | |
777 | break; | |
778 | ||
2100c228 | 779 | case ARRAY_RANGE_REF: |
780 | if (!range_in_array_bounds_p (inner)) | |
781 | { | |
782 | disable_scalarization = true; | |
783 | goto use_all; | |
784 | } | |
785 | /* ??? See above non-constant bounds and stride . */ | |
786 | if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3)) | |
787 | goto use_all; | |
788 | inner = TREE_OPERAND (inner, 0); | |
789 | break; | |
790 | ||
f50f6fc3 | 791 | case COMPONENT_REF: |
792 | /* A reference to a union member constitutes a reference to the | |
793 | entire union. */ | |
794 | if (TREE_CODE (TREE_TYPE (TREE_OPERAND (inner, 0))) != RECORD_TYPE) | |
795 | goto use_all; | |
796 | /* ??? See above re non-constant stride. */ | |
797 | if (TREE_OPERAND (inner, 2)) | |
798 | goto use_all; | |
799 | inner = TREE_OPERAND (inner, 0); | |
800 | break; | |
801 | ||
802 | case REALPART_EXPR: | |
803 | case IMAGPART_EXPR: | |
804 | inner = TREE_OPERAND (inner, 0); | |
805 | break; | |
806 | ||
807 | case BIT_FIELD_REF: | |
8ea8de24 | 808 | /* A bit field reference to a specific vector is scalarized but for |
809 | ones for inputs need to be marked as used on the left hand size so | |
810 | when we scalarize it, we can mark that variable as non renamable. */ | |
08e5bc43 | 811 | if (is_output |
812 | && TREE_CODE (TREE_TYPE (TREE_OPERAND (inner, 0))) == VECTOR_TYPE) | |
8ea8de24 | 813 | { |
08e5bc43 | 814 | struct sra_elt *elt |
815 | = maybe_lookup_element_for_expr (TREE_OPERAND (inner, 0)); | |
816 | if (elt) | |
817 | elt->is_vector_lhs = true; | |
8ea8de24 | 818 | } |
f50f6fc3 | 819 | /* A bit field reference (access to *multiple* fields simultaneously) |
ac13e8d9 | 820 | is not currently scalarized. Consider this an access to the |
f50f6fc3 | 821 | complete outer element, to which walk_tree will bring us next. */ |
8ea8de24 | 822 | |
f50f6fc3 | 823 | goto use_all; |
824 | ||
f50f6fc3 | 825 | case VIEW_CONVERT_EXPR: |
826 | case NOP_EXPR: | |
827 | /* Similarly, a view/nop explicitly wants to look at an object in a | |
828 | type other than the one we've scalarized. */ | |
829 | goto use_all; | |
830 | ||
80f06481 | 831 | case WITH_SIZE_EXPR: |
832 | /* This is a transparent wrapper. The entire inner expression really | |
833 | is being used. */ | |
834 | goto use_all; | |
835 | ||
f50f6fc3 | 836 | use_all: |
837 | expr_p = &TREE_OPERAND (inner, 0); | |
838 | inner = expr = *expr_p; | |
6084da09 | 839 | use_all_p = true; |
f50f6fc3 | 840 | break; |
841 | ||
842 | default: | |
843 | #ifdef ENABLE_CHECKING | |
844 | /* Validate that we're not missing any references. */ | |
8c0963c4 | 845 | gcc_assert (!walk_tree (&inner, sra_find_candidate_decl, NULL, NULL)); |
f50f6fc3 | 846 | #endif |
847 | return; | |
848 | } | |
849 | } | |
850 | ||
851 | /* Walk a TREE_LIST of values looking for scalarizable aggregates. | |
852 | If we find one, invoke FNS->USE. */ | |
853 | ||
854 | static void | |
855 | sra_walk_tree_list (tree list, block_stmt_iterator *bsi, bool is_output, | |
856 | const struct sra_walk_fns *fns) | |
857 | { | |
858 | tree op; | |
859 | for (op = list; op ; op = TREE_CHAIN (op)) | |
860 | sra_walk_expr (&TREE_VALUE (op), bsi, is_output, fns); | |
861 | } | |
862 | ||
863 | /* Walk the arguments of a CALL_EXPR looking for scalarizable aggregates. | |
864 | If we find one, invoke FNS->USE. */ | |
865 | ||
866 | static void | |
867 | sra_walk_call_expr (tree expr, block_stmt_iterator *bsi, | |
868 | const struct sra_walk_fns *fns) | |
869 | { | |
c2f47e15 | 870 | int i; |
871 | int nargs = call_expr_nargs (expr); | |
872 | for (i = 0; i < nargs; i++) | |
873 | sra_walk_expr (&CALL_EXPR_ARG (expr, i), bsi, false, fns); | |
f50f6fc3 | 874 | } |
875 | ||
876 | /* Walk the inputs and outputs of an ASM_EXPR looking for scalarizable | |
877 | aggregates. If we find one, invoke FNS->USE. */ | |
878 | ||
879 | static void | |
880 | sra_walk_asm_expr (tree expr, block_stmt_iterator *bsi, | |
881 | const struct sra_walk_fns *fns) | |
882 | { | |
883 | sra_walk_tree_list (ASM_INPUTS (expr), bsi, false, fns); | |
884 | sra_walk_tree_list (ASM_OUTPUTS (expr), bsi, true, fns); | |
885 | } | |
886 | ||
35cc02b5 | 887 | /* Walk a GIMPLE_MODIFY_STMT and categorize the assignment appropriately. */ |
f50f6fc3 | 888 | |
889 | static void | |
35cc02b5 | 890 | sra_walk_gimple_modify_stmt (tree expr, block_stmt_iterator *bsi, |
6084da09 | 891 | const struct sra_walk_fns *fns) |
f50f6fc3 | 892 | { |
893 | struct sra_elt *lhs_elt, *rhs_elt; | |
894 | tree lhs, rhs; | |
895 | ||
35cc02b5 | 896 | lhs = GIMPLE_STMT_OPERAND (expr, 0); |
897 | rhs = GIMPLE_STMT_OPERAND (expr, 1); | |
f50f6fc3 | 898 | lhs_elt = maybe_lookup_element_for_expr (lhs); |
899 | rhs_elt = maybe_lookup_element_for_expr (rhs); | |
900 | ||
901 | /* If both sides are scalarizable, this is a COPY operation. */ | |
902 | if (lhs_elt && rhs_elt) | |
903 | { | |
904 | fns->copy (lhs_elt, rhs_elt, bsi); | |
905 | return; | |
906 | } | |
907 | ||
de0e549b | 908 | /* If the RHS is scalarizable, handle it. There are only two cases. */ |
909 | if (rhs_elt) | |
910 | { | |
723e4ab0 | 911 | if (!rhs_elt->is_scalar && !TREE_SIDE_EFFECTS (lhs)) |
de0e549b | 912 | fns->ldst (rhs_elt, lhs, bsi, false); |
913 | else | |
6084da09 | 914 | fns->use (rhs_elt, &GIMPLE_STMT_OPERAND (expr, 1), bsi, false, false); |
de0e549b | 915 | } |
916 | ||
917 | /* If it isn't scalarizable, there may be scalarizable variables within, so | |
918 | check for a call or else walk the RHS to see if we need to do any | |
919 | copy-in operations. We need to do it before the LHS is scalarized so | |
920 | that the statements get inserted in the proper place, before any | |
921 | copy-out operations. */ | |
922 | else | |
923 | { | |
924 | tree call = get_call_expr_in (rhs); | |
925 | if (call) | |
926 | sra_walk_call_expr (call, bsi, fns); | |
927 | else | |
35cc02b5 | 928 | sra_walk_expr (&GIMPLE_STMT_OPERAND (expr, 1), bsi, false, fns); |
de0e549b | 929 | } |
930 | ||
931 | /* Likewise, handle the LHS being scalarizable. We have cases similar | |
932 | to those above, but also want to handle RHS being constant. */ | |
f50f6fc3 | 933 | if (lhs_elt) |
934 | { | |
935 | /* If this is an assignment from a constant, or constructor, then | |
936 | we have access to all of the elements individually. Invoke INIT. */ | |
fa6d6d27 | 937 | if (TREE_CODE (rhs) == COMPLEX_EXPR |
938 | || TREE_CODE (rhs) == COMPLEX_CST | |
939 | || TREE_CODE (rhs) == CONSTRUCTOR) | |
940 | fns->init (lhs_elt, rhs, bsi); | |
f50f6fc3 | 941 | |
942 | /* If this is an assignment from read-only memory, treat this as if | |
943 | we'd been passed the constructor directly. Invoke INIT. */ | |
944 | else if (TREE_CODE (rhs) == VAR_DECL | |
945 | && TREE_STATIC (rhs) | |
946 | && TREE_READONLY (rhs) | |
fa6d6d27 | 947 | && targetm.binds_local_p (rhs)) |
948 | fns->init (lhs_elt, DECL_INITIAL (rhs), bsi); | |
f50f6fc3 | 949 | |
950 | /* If this is a copy from a non-scalarizable lvalue, invoke LDST. | |
951 | The lvalue requirement prevents us from trying to directly scalarize | |
952 | the result of a function call. Which would result in trying to call | |
953 | the function multiple times, and other evil things. */ | |
723e4ab0 | 954 | else if (!lhs_elt->is_scalar |
955 | && !TREE_SIDE_EFFECTS (rhs) && is_gimple_addressable (rhs)) | |
f50f6fc3 | 956 | fns->ldst (lhs_elt, rhs, bsi, true); |
ac13e8d9 | 957 | |
f50f6fc3 | 958 | /* Otherwise we're being used in some context that requires the |
959 | aggregate to be seen as a whole. Invoke USE. */ | |
960 | else | |
6084da09 | 961 | fns->use (lhs_elt, &GIMPLE_STMT_OPERAND (expr, 0), bsi, true, false); |
f50f6fc3 | 962 | } |
f50f6fc3 | 963 | |
de0e549b | 964 | /* Similarly to above, LHS_ELT being null only means that the LHS as a |
965 | whole is not a scalarizable reference. There may be occurrences of | |
966 | scalarizable variables within, which implies a USE. */ | |
f50f6fc3 | 967 | else |
35cc02b5 | 968 | sra_walk_expr (&GIMPLE_STMT_OPERAND (expr, 0), bsi, true, fns); |
f50f6fc3 | 969 | } |
970 | ||
971 | /* Entry point to the walk functions. Search the entire function, | |
972 | invoking the callbacks in FNS on each of the references to | |
973 | scalarizable variables. */ | |
974 | ||
975 | static void | |
976 | sra_walk_function (const struct sra_walk_fns *fns) | |
977 | { | |
978 | basic_block bb; | |
f105beb6 | 979 | block_stmt_iterator si, ni; |
f50f6fc3 | 980 | |
981 | /* ??? Phase 4 could derive some benefit to walking the function in | |
982 | dominator tree order. */ | |
983 | ||
984 | FOR_EACH_BB (bb) | |
f105beb6 | 985 | for (si = bsi_start (bb); !bsi_end_p (si); si = ni) |
f50f6fc3 | 986 | { |
987 | tree stmt, t; | |
988 | stmt_ann_t ann; | |
989 | ||
990 | stmt = bsi_stmt (si); | |
991 | ann = stmt_ann (stmt); | |
992 | ||
f105beb6 | 993 | ni = si; |
994 | bsi_next (&ni); | |
995 | ||
f50f6fc3 | 996 | /* If the statement has no virtual operands, then it doesn't |
997 | make any structure references that we care about. */ | |
5620e045 | 998 | if (gimple_aliases_computed_p (cfun) |
999 | && ZERO_SSA_OPERANDS (stmt, (SSA_OP_VIRTUAL_DEFS | SSA_OP_VUSE))) | |
1000 | continue; | |
f50f6fc3 | 1001 | |
1002 | switch (TREE_CODE (stmt)) | |
1003 | { | |
1004 | case RETURN_EXPR: | |
1005 | /* If we have "return <retval>" then the return value is | |
1006 | already exposed for our pleasure. Walk it as a USE to | |
1007 | force all the components back in place for the return. | |
1008 | ||
1009 | If we have an embedded assignment, then <retval> is of | |
1010 | a type that gets returned in registers in this ABI, and | |
1011 | we do not wish to extend their lifetimes. Treat this | |
1012 | as a USE of the variable on the RHS of this assignment. */ | |
1013 | ||
1014 | t = TREE_OPERAND (stmt, 0); | |
5620e045 | 1015 | if (t == NULL_TREE) |
1016 | ; | |
1017 | else if (TREE_CODE (t) == GIMPLE_MODIFY_STMT) | |
35cc02b5 | 1018 | sra_walk_expr (&GIMPLE_STMT_OPERAND (t, 1), &si, false, fns); |
f50f6fc3 | 1019 | else |
1020 | sra_walk_expr (&TREE_OPERAND (stmt, 0), &si, false, fns); | |
1021 | break; | |
1022 | ||
35cc02b5 | 1023 | case GIMPLE_MODIFY_STMT: |
1024 | sra_walk_gimple_modify_stmt (stmt, &si, fns); | |
f50f6fc3 | 1025 | break; |
1026 | case CALL_EXPR: | |
1027 | sra_walk_call_expr (stmt, &si, fns); | |
1028 | break; | |
1029 | case ASM_EXPR: | |
1030 | sra_walk_asm_expr (stmt, &si, fns); | |
1031 | break; | |
1032 | ||
1033 | default: | |
1034 | break; | |
1035 | } | |
1036 | } | |
1037 | } | |
1038 | \f | |
1039 | /* Phase One: Scan all referenced variables in the program looking for | |
1040 | structures that could be decomposed. */ | |
1041 | ||
1042 | static bool | |
1043 | find_candidates_for_sra (void) | |
1044 | { | |
f50f6fc3 | 1045 | bool any_set = false; |
a55dc2cd | 1046 | tree var; |
1047 | referenced_var_iterator rvi; | |
f50f6fc3 | 1048 | |
a55dc2cd | 1049 | FOR_EACH_REFERENCED_VAR (var, rvi) |
f50f6fc3 | 1050 | { |
f50f6fc3 | 1051 | if (decl_can_be_decomposed_p (var)) |
1052 | { | |
a55dc2cd | 1053 | bitmap_set_bit (sra_candidates, DECL_UID (var)); |
f50f6fc3 | 1054 | any_set = true; |
1055 | } | |
1056 | } | |
ac13e8d9 | 1057 | |
f50f6fc3 | 1058 | return any_set; |
1059 | } | |
1060 | ||
1061 | \f | |
1062 | /* Phase Two: Scan all references to scalarizable variables. Count the | |
1063 | number of times they are used or copied respectively. */ | |
1064 | ||
1065 | /* Callbacks to fill in SRA_WALK_FNS. Everything but USE is | |
1066 | considered a copy, because we can decompose the reference such that | |
1067 | the sub-elements needn't be contiguous. */ | |
1068 | ||
1069 | static void | |
1070 | scan_use (struct sra_elt *elt, tree *expr_p ATTRIBUTE_UNUSED, | |
1071 | block_stmt_iterator *bsi ATTRIBUTE_UNUSED, | |
6084da09 | 1072 | bool is_output ATTRIBUTE_UNUSED, bool use_all ATTRIBUTE_UNUSED) |
f50f6fc3 | 1073 | { |
1074 | elt->n_uses += 1; | |
1075 | } | |
1076 | ||
1077 | static void | |
1078 | scan_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt, | |
1079 | block_stmt_iterator *bsi ATTRIBUTE_UNUSED) | |
1080 | { | |
1081 | lhs_elt->n_copies += 1; | |
1082 | rhs_elt->n_copies += 1; | |
1083 | } | |
1084 | ||
fa6d6d27 | 1085 | static void |
f50f6fc3 | 1086 | scan_init (struct sra_elt *lhs_elt, tree rhs ATTRIBUTE_UNUSED, |
1087 | block_stmt_iterator *bsi ATTRIBUTE_UNUSED) | |
1088 | { | |
1089 | lhs_elt->n_copies += 1; | |
1090 | } | |
1091 | ||
1092 | static void | |
1093 | scan_ldst (struct sra_elt *elt, tree other ATTRIBUTE_UNUSED, | |
1094 | block_stmt_iterator *bsi ATTRIBUTE_UNUSED, | |
1095 | bool is_output ATTRIBUTE_UNUSED) | |
1096 | { | |
1097 | elt->n_copies += 1; | |
1098 | } | |
1099 | ||
1100 | /* Dump the values we collected during the scanning phase. */ | |
1101 | ||
1102 | static void | |
1103 | scan_dump (struct sra_elt *elt) | |
1104 | { | |
1105 | struct sra_elt *c; | |
1106 | ||
1107 | dump_sra_elt_name (dump_file, elt); | |
1108 | fprintf (dump_file, ": n_uses=%u n_copies=%u\n", elt->n_uses, elt->n_copies); | |
1109 | ||
1110 | for (c = elt->children; c ; c = c->sibling) | |
1111 | scan_dump (c); | |
2100c228 | 1112 | |
1113 | for (c = elt->groups; c ; c = c->sibling) | |
1114 | scan_dump (c); | |
f50f6fc3 | 1115 | } |
1116 | ||
1117 | /* Entry point to phase 2. Scan the entire function, building up | |
1118 | scalarization data structures, recording copies and uses. */ | |
1119 | ||
1120 | static void | |
1121 | scan_function (void) | |
1122 | { | |
1123 | static const struct sra_walk_fns fns = { | |
1124 | scan_use, scan_copy, scan_init, scan_ldst, true | |
1125 | }; | |
0cc4271a | 1126 | bitmap_iterator bi; |
f50f6fc3 | 1127 | |
1128 | sra_walk_function (&fns); | |
1129 | ||
1130 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1131 | { | |
4f917ffe | 1132 | unsigned i; |
f50f6fc3 | 1133 | |
1134 | fputs ("\nScan results:\n", dump_file); | |
0cc4271a | 1135 | EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi) |
f50f6fc3 | 1136 | { |
1137 | tree var = referenced_var (i); | |
1138 | struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT); | |
1139 | if (elt) | |
1140 | scan_dump (elt); | |
0cc4271a | 1141 | } |
f50f6fc3 | 1142 | fputc ('\n', dump_file); |
1143 | } | |
1144 | } | |
1145 | \f | |
1146 | /* Phase Three: Make decisions about which variables to scalarize, if any. | |
1147 | All elements to be scalarized have replacement variables made for them. */ | |
1148 | ||
1149 | /* A subroutine of build_element_name. Recursively build the element | |
1150 | name on the obstack. */ | |
1151 | ||
1152 | static void | |
1153 | build_element_name_1 (struct sra_elt *elt) | |
1154 | { | |
1155 | tree t; | |
1156 | char buffer[32]; | |
1157 | ||
1158 | if (elt->parent) | |
1159 | { | |
1160 | build_element_name_1 (elt->parent); | |
1161 | obstack_1grow (&sra_obstack, '$'); | |
1162 | ||
1163 | if (TREE_CODE (elt->parent->type) == COMPLEX_TYPE) | |
1164 | { | |
1165 | if (elt->element == integer_zero_node) | |
1166 | obstack_grow (&sra_obstack, "real", 4); | |
1167 | else | |
1168 | obstack_grow (&sra_obstack, "imag", 4); | |
1169 | return; | |
1170 | } | |
1171 | } | |
1172 | ||
1173 | t = elt->element; | |
1174 | if (TREE_CODE (t) == INTEGER_CST) | |
1175 | { | |
1176 | /* ??? Eh. Don't bother doing double-wide printing. */ | |
1177 | sprintf (buffer, HOST_WIDE_INT_PRINT_DEC, TREE_INT_CST_LOW (t)); | |
1178 | obstack_grow (&sra_obstack, buffer, strlen (buffer)); | |
1179 | } | |
1180 | else | |
1181 | { | |
1182 | tree name = DECL_NAME (t); | |
1183 | if (name) | |
1184 | obstack_grow (&sra_obstack, IDENTIFIER_POINTER (name), | |
1185 | IDENTIFIER_LENGTH (name)); | |
1186 | else | |
1187 | { | |
1188 | sprintf (buffer, "D%u", DECL_UID (t)); | |
1189 | obstack_grow (&sra_obstack, buffer, strlen (buffer)); | |
1190 | } | |
1191 | } | |
1192 | } | |
1193 | ||
1194 | /* Construct a pretty variable name for an element's replacement variable. | |
1195 | The name is built on the obstack. */ | |
1196 | ||
1197 | static char * | |
1198 | build_element_name (struct sra_elt *elt) | |
1199 | { | |
1200 | build_element_name_1 (elt); | |
1201 | obstack_1grow (&sra_obstack, '\0'); | |
4fac984f | 1202 | return XOBFINISH (&sra_obstack, char *); |
f50f6fc3 | 1203 | } |
1204 | ||
1205 | /* Instantiate an element as an independent variable. */ | |
1206 | ||
1207 | static void | |
1208 | instantiate_element (struct sra_elt *elt) | |
1209 | { | |
1210 | struct sra_elt *base_elt; | |
1211 | tree var, base; | |
1212 | ||
1213 | for (base_elt = elt; base_elt->parent; base_elt = base_elt->parent) | |
6084da09 | 1214 | continue; |
f50f6fc3 | 1215 | base = base_elt->element; |
1216 | ||
1217 | elt->replacement = var = make_rename_temp (elt->type, "SR"); | |
8ea8de24 | 1218 | |
1219 | /* For vectors, if used on the left hand side with BIT_FIELD_REF, | |
1220 | they are not a gimple register. */ | |
1221 | if (TREE_CODE (TREE_TYPE (var)) == VECTOR_TYPE && elt->is_vector_lhs) | |
1222 | DECL_GIMPLE_REG_P (var) = 0; | |
1223 | ||
f50f6fc3 | 1224 | DECL_SOURCE_LOCATION (var) = DECL_SOURCE_LOCATION (base); |
9366434c | 1225 | DECL_ARTIFICIAL (var) = 1; |
f50f6fc3 | 1226 | |
e557e585 | 1227 | if (TREE_THIS_VOLATILE (elt->type)) |
1228 | { | |
1229 | TREE_THIS_VOLATILE (var) = 1; | |
1230 | TREE_SIDE_EFFECTS (var) = 1; | |
1231 | } | |
1232 | ||
f50f6fc3 | 1233 | if (DECL_NAME (base) && !DECL_IGNORED_P (base)) |
1234 | { | |
1235 | char *pretty_name = build_element_name (elt); | |
1236 | DECL_NAME (var) = get_identifier (pretty_name); | |
1237 | obstack_free (&sra_obstack, pretty_name); | |
9366434c | 1238 | |
8bc1e6ff | 1239 | SET_DECL_DEBUG_EXPR (var, generate_element_ref (elt)); |
9366434c | 1240 | DECL_DEBUG_EXPR_IS_FROM (var) = 1; |
8bc1e6ff | 1241 | |
9366434c | 1242 | DECL_IGNORED_P (var) = 0; |
6084da09 | 1243 | TREE_NO_WARNING (var) = TREE_NO_WARNING (base); |
1244 | if (elt->element && TREE_NO_WARNING (elt->element)) | |
1245 | TREE_NO_WARNING (var) = 1; | |
9366434c | 1246 | } |
1247 | else | |
1248 | { | |
1249 | DECL_IGNORED_P (var) = 1; | |
1250 | /* ??? We can't generate any warning that would be meaningful. */ | |
1251 | TREE_NO_WARNING (var) = 1; | |
f50f6fc3 | 1252 | } |
1253 | ||
1254 | if (dump_file) | |
1255 | { | |
1256 | fputs (" ", dump_file); | |
1257 | dump_sra_elt_name (dump_file, elt); | |
1258 | fputs (" -> ", dump_file); | |
1259 | print_generic_expr (dump_file, var, dump_flags); | |
1260 | fputc ('\n', dump_file); | |
1261 | } | |
1262 | } | |
1263 | ||
1264 | /* Make one pass across an element tree deciding whether or not it's | |
1265 | profitable to instantiate individual leaf scalars. | |
1266 | ||
1267 | PARENT_USES and PARENT_COPIES are the sum of the N_USES and N_COPIES | |
1268 | fields all the way up the tree. */ | |
1269 | ||
1270 | static void | |
1271 | decide_instantiation_1 (struct sra_elt *elt, unsigned int parent_uses, | |
1272 | unsigned int parent_copies) | |
1273 | { | |
1274 | if (dump_file && !elt->parent) | |
1275 | { | |
1276 | fputs ("Initial instantiation for ", dump_file); | |
1277 | dump_sra_elt_name (dump_file, elt); | |
1278 | fputc ('\n', dump_file); | |
1279 | } | |
1280 | ||
1281 | if (elt->cannot_scalarize) | |
1282 | return; | |
1283 | ||
1284 | if (elt->is_scalar) | |
1285 | { | |
1286 | /* The decision is simple: instantiate if we're used more frequently | |
1287 | than the parent needs to be seen as a complete unit. */ | |
1288 | if (elt->n_uses + elt->n_copies + parent_copies > parent_uses) | |
1289 | instantiate_element (elt); | |
1290 | } | |
1291 | else | |
1292 | { | |
2100c228 | 1293 | struct sra_elt *c, *group; |
f50f6fc3 | 1294 | unsigned int this_uses = elt->n_uses + parent_uses; |
1295 | unsigned int this_copies = elt->n_copies + parent_copies; | |
1296 | ||
2100c228 | 1297 | /* Consider groups of sub-elements as weighing in favour of |
1298 | instantiation whatever their size. */ | |
1299 | for (group = elt->groups; group ; group = group->sibling) | |
1300 | FOR_EACH_ACTUAL_CHILD (c, group) | |
1301 | { | |
1302 | c->n_uses += group->n_uses; | |
1303 | c->n_copies += group->n_copies; | |
1304 | } | |
1305 | ||
f50f6fc3 | 1306 | for (c = elt->children; c ; c = c->sibling) |
1307 | decide_instantiation_1 (c, this_uses, this_copies); | |
1308 | } | |
1309 | } | |
1310 | ||
1311 | /* Compute the size and number of all instantiated elements below ELT. | |
1312 | We will only care about this if the size of the complete structure | |
1313 | fits in a HOST_WIDE_INT, so we don't have to worry about overflow. */ | |
1314 | ||
1315 | static unsigned int | |
1316 | sum_instantiated_sizes (struct sra_elt *elt, unsigned HOST_WIDE_INT *sizep) | |
1317 | { | |
1318 | if (elt->replacement) | |
1319 | { | |
1320 | *sizep += TREE_INT_CST_LOW (TYPE_SIZE_UNIT (elt->type)); | |
1321 | return 1; | |
1322 | } | |
1323 | else | |
1324 | { | |
1325 | struct sra_elt *c; | |
1326 | unsigned int count = 0; | |
1327 | ||
1328 | for (c = elt->children; c ; c = c->sibling) | |
1329 | count += sum_instantiated_sizes (c, sizep); | |
1330 | ||
1331 | return count; | |
1332 | } | |
1333 | } | |
1334 | ||
1335 | /* Instantiate fields in ELT->TYPE that are not currently present as | |
1336 | children of ELT. */ | |
1337 | ||
1338 | static void instantiate_missing_elements (struct sra_elt *elt); | |
1339 | ||
6084da09 | 1340 | static void |
f50f6fc3 | 1341 | instantiate_missing_elements_1 (struct sra_elt *elt, tree child, tree type) |
1342 | { | |
1343 | struct sra_elt *sub = lookup_element (elt, child, type, INSERT); | |
1344 | if (sub->is_scalar) | |
1345 | { | |
1346 | if (sub->replacement == NULL) | |
1347 | instantiate_element (sub); | |
1348 | } | |
1349 | else | |
1350 | instantiate_missing_elements (sub); | |
1351 | } | |
1352 | ||
1353 | static void | |
1354 | instantiate_missing_elements (struct sra_elt *elt) | |
1355 | { | |
1356 | tree type = elt->type; | |
1357 | ||
1358 | switch (TREE_CODE (type)) | |
1359 | { | |
1360 | case RECORD_TYPE: | |
1361 | { | |
1362 | tree f; | |
1363 | for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f)) | |
1364 | if (TREE_CODE (f) == FIELD_DECL) | |
005b6665 | 1365 | { |
6084da09 | 1366 | tree field_type = TREE_TYPE (f); |
1367 | ||
1368 | /* canonicalize_component_ref() unwidens some bit-field | |
1369 | types (not marked as DECL_BIT_FIELD in C++), so we | |
1370 | must do the same, lest we may introduce type | |
1371 | mismatches. */ | |
1372 | if (INTEGRAL_TYPE_P (field_type) | |
1373 | && DECL_MODE (f) != TYPE_MODE (field_type)) | |
1374 | field_type = TREE_TYPE (get_unwidened (build3 (COMPONENT_REF, | |
1375 | field_type, | |
1376 | elt->element, | |
1377 | f, NULL_TREE), | |
1378 | NULL_TREE)); | |
1379 | ||
1380 | instantiate_missing_elements_1 (elt, f, field_type); | |
005b6665 | 1381 | } |
f50f6fc3 | 1382 | break; |
1383 | } | |
1384 | ||
1385 | case ARRAY_TYPE: | |
1386 | { | |
1387 | tree i, max, subtype; | |
1388 | ||
1389 | i = TYPE_MIN_VALUE (TYPE_DOMAIN (type)); | |
1390 | max = TYPE_MAX_VALUE (TYPE_DOMAIN (type)); | |
1391 | subtype = TREE_TYPE (type); | |
1392 | ||
1393 | while (1) | |
1394 | { | |
1395 | instantiate_missing_elements_1 (elt, i, subtype); | |
1396 | if (tree_int_cst_equal (i, max)) | |
1397 | break; | |
1398 | i = int_const_binop (PLUS_EXPR, i, integer_one_node, true); | |
1399 | } | |
1400 | ||
1401 | break; | |
1402 | } | |
1403 | ||
1404 | case COMPLEX_TYPE: | |
1405 | type = TREE_TYPE (type); | |
1406 | instantiate_missing_elements_1 (elt, integer_zero_node, type); | |
1407 | instantiate_missing_elements_1 (elt, integer_one_node, type); | |
1408 | break; | |
1409 | ||
1410 | default: | |
8c0963c4 | 1411 | gcc_unreachable (); |
f50f6fc3 | 1412 | } |
1413 | } | |
1414 | ||
192903eb | 1415 | /* Return true if there is only one non aggregate field in the record, TYPE. |
1416 | Return false otherwise. */ | |
1417 | ||
1418 | static bool | |
1419 | single_scalar_field_in_record_p (tree type) | |
1420 | { | |
1421 | int num_fields = 0; | |
1422 | tree field; | |
1423 | if (TREE_CODE (type) != RECORD_TYPE) | |
1424 | return false; | |
1425 | ||
1426 | for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) | |
1427 | if (TREE_CODE (field) == FIELD_DECL) | |
1428 | { | |
1429 | num_fields++; | |
1430 | ||
1431 | if (num_fields == 2) | |
1432 | return false; | |
1433 | ||
1434 | if (AGGREGATE_TYPE_P (TREE_TYPE (field))) | |
1435 | return false; | |
1436 | } | |
1437 | ||
1438 | return true; | |
1439 | } | |
1440 | ||
f50f6fc3 | 1441 | /* Make one pass across an element tree deciding whether to perform block |
1442 | or element copies. If we decide on element copies, instantiate all | |
1443 | elements. Return true if there are any instantiated sub-elements. */ | |
1444 | ||
1445 | static bool | |
1446 | decide_block_copy (struct sra_elt *elt) | |
1447 | { | |
1448 | struct sra_elt *c; | |
1449 | bool any_inst; | |
1450 | ||
2100c228 | 1451 | /* We shouldn't be invoked on groups of sub-elements as they must |
1452 | behave like their parent as far as block copy is concerned. */ | |
1453 | gcc_assert (!elt->is_group); | |
1454 | ||
f50f6fc3 | 1455 | /* If scalarization is disabled, respect it. */ |
1456 | if (elt->cannot_scalarize) | |
1457 | { | |
1458 | elt->use_block_copy = 1; | |
1459 | ||
1460 | if (dump_file) | |
1461 | { | |
1462 | fputs ("Scalarization disabled for ", dump_file); | |
1463 | dump_sra_elt_name (dump_file, elt); | |
1464 | fputc ('\n', dump_file); | |
1465 | } | |
1466 | ||
056b102d | 1467 | /* Disable scalarization of sub-elements */ |
1468 | for (c = elt->children; c; c = c->sibling) | |
1469 | { | |
1470 | c->cannot_scalarize = 1; | |
1471 | decide_block_copy (c); | |
1472 | } | |
2100c228 | 1473 | |
1474 | /* Groups behave like their parent. */ | |
1475 | for (c = elt->groups; c; c = c->sibling) | |
1476 | { | |
1477 | c->cannot_scalarize = 1; | |
1478 | c->use_block_copy = 1; | |
1479 | } | |
1480 | ||
f50f6fc3 | 1481 | return false; |
1482 | } | |
1483 | ||
1484 | /* Don't decide if we've no uses. */ | |
1485 | if (elt->n_uses == 0 && elt->n_copies == 0) | |
1486 | ; | |
1487 | ||
1488 | else if (!elt->is_scalar) | |
1489 | { | |
1490 | tree size_tree = TYPE_SIZE_UNIT (elt->type); | |
1491 | bool use_block_copy = true; | |
1492 | ||
c9dee805 | 1493 | /* Tradeoffs for COMPLEX types pretty much always make it better |
1494 | to go ahead and split the components. */ | |
1495 | if (TREE_CODE (elt->type) == COMPLEX_TYPE) | |
1496 | use_block_copy = false; | |
1497 | ||
f50f6fc3 | 1498 | /* Don't bother trying to figure out the rest if the structure is |
1499 | so large we can't do easy arithmetic. This also forces block | |
1500 | copies for variable sized structures. */ | |
c9dee805 | 1501 | else if (host_integerp (size_tree, 1)) |
f50f6fc3 | 1502 | { |
1503 | unsigned HOST_WIDE_INT full_size, inst_size = 0; | |
19e1a5cc | 1504 | unsigned int max_size, max_count, inst_count, full_count; |
152a1c0a | 1505 | |
1506 | /* If the sra-max-structure-size parameter is 0, then the | |
1507 | user has not overridden the parameter and we can choose a | |
1508 | sensible default. */ | |
1509 | max_size = SRA_MAX_STRUCTURE_SIZE | |
1510 | ? SRA_MAX_STRUCTURE_SIZE | |
1511 | : MOVE_RATIO * UNITS_PER_WORD; | |
19e1a5cc | 1512 | max_count = SRA_MAX_STRUCTURE_COUNT |
1513 | ? SRA_MAX_STRUCTURE_COUNT | |
1514 | : MOVE_RATIO; | |
f50f6fc3 | 1515 | |
1516 | full_size = tree_low_cst (size_tree, 1); | |
b7f95d09 | 1517 | full_count = count_type_elements (elt->type, false); |
19e1a5cc | 1518 | inst_count = sum_instantiated_sizes (elt, &inst_size); |
f50f6fc3 | 1519 | |
192903eb | 1520 | /* If there is only one scalar field in the record, don't block copy. */ |
1521 | if (single_scalar_field_in_record_p (elt->type)) | |
1522 | use_block_copy = false; | |
1523 | ||
ac13e8d9 | 1524 | /* ??? What to do here. If there are two fields, and we've only |
f50f6fc3 | 1525 | instantiated one, then instantiating the other is clearly a win. |
1526 | If there are a large number of fields then the size of the copy | |
1527 | is much more of a factor. */ | |
1528 | ||
1529 | /* If the structure is small, and we've made copies, go ahead | |
1530 | and instantiate, hoping that the copies will go away. */ | |
152a1c0a | 1531 | if (full_size <= max_size |
19e1a5cc | 1532 | && (full_count - inst_count) <= max_count |
f50f6fc3 | 1533 | && elt->n_copies > elt->n_uses) |
1534 | use_block_copy = false; | |
19e1a5cc | 1535 | else if (inst_count * 100 >= full_count * SRA_FIELD_STRUCTURE_RATIO |
1536 | && inst_size * 100 >= full_size * SRA_FIELD_STRUCTURE_RATIO) | |
1537 | use_block_copy = false; | |
f50f6fc3 | 1538 | |
1539 | /* In order to avoid block copy, we have to be able to instantiate | |
1540 | all elements of the type. See if this is possible. */ | |
1541 | if (!use_block_copy | |
1542 | && (!can_completely_scalarize_p (elt) | |
1543 | || !type_can_instantiate_all_elements (elt->type))) | |
1544 | use_block_copy = true; | |
1545 | } | |
2100c228 | 1546 | |
f50f6fc3 | 1547 | elt->use_block_copy = use_block_copy; |
1548 | ||
2100c228 | 1549 | /* Groups behave like their parent. */ |
1550 | for (c = elt->groups; c; c = c->sibling) | |
1551 | c->use_block_copy = use_block_copy; | |
1552 | ||
f50f6fc3 | 1553 | if (dump_file) |
1554 | { | |
1555 | fprintf (dump_file, "Using %s for ", | |
1556 | use_block_copy ? "block-copy" : "element-copy"); | |
1557 | dump_sra_elt_name (dump_file, elt); | |
1558 | fputc ('\n', dump_file); | |
1559 | } | |
1560 | ||
1561 | if (!use_block_copy) | |
1562 | { | |
1563 | instantiate_missing_elements (elt); | |
1564 | return true; | |
1565 | } | |
1566 | } | |
1567 | ||
1568 | any_inst = elt->replacement != NULL; | |
1569 | ||
1570 | for (c = elt->children; c ; c = c->sibling) | |
1571 | any_inst |= decide_block_copy (c); | |
1572 | ||
1573 | return any_inst; | |
1574 | } | |
1575 | ||
1576 | /* Entry point to phase 3. Instantiate scalar replacement variables. */ | |
1577 | ||
1578 | static void | |
1579 | decide_instantiations (void) | |
1580 | { | |
1581 | unsigned int i; | |
1582 | bool cleared_any; | |
42fe97ed | 1583 | bitmap_head done_head; |
0cc4271a | 1584 | bitmap_iterator bi; |
f50f6fc3 | 1585 | |
1586 | /* We cannot clear bits from a bitmap we're iterating over, | |
1587 | so save up all the bits to clear until the end. */ | |
42fe97ed | 1588 | bitmap_initialize (&done_head, &bitmap_default_obstack); |
f50f6fc3 | 1589 | cleared_any = false; |
1590 | ||
0cc4271a | 1591 | EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi) |
f50f6fc3 | 1592 | { |
1593 | tree var = referenced_var (i); | |
1594 | struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT); | |
1595 | if (elt) | |
1596 | { | |
1597 | decide_instantiation_1 (elt, 0, 0); | |
1598 | if (!decide_block_copy (elt)) | |
1599 | elt = NULL; | |
1600 | } | |
1601 | if (!elt) | |
1602 | { | |
1603 | bitmap_set_bit (&done_head, i); | |
1604 | cleared_any = true; | |
1605 | } | |
0cc4271a | 1606 | } |
f50f6fc3 | 1607 | |
1608 | if (cleared_any) | |
1609 | { | |
604efc01 | 1610 | bitmap_and_compl_into (sra_candidates, &done_head); |
1611 | bitmap_and_compl_into (needs_copy_in, &done_head); | |
f50f6fc3 | 1612 | } |
1613 | bitmap_clear (&done_head); | |
c96420f8 | 1614 | |
88dbf20f | 1615 | mark_set_for_renaming (sra_candidates); |
1616 | ||
f50f6fc3 | 1617 | if (dump_file) |
1618 | fputc ('\n', dump_file); | |
1619 | } | |
1620 | ||
1621 | \f | |
1622 | /* Phase Four: Update the function to match the replacements created. */ | |
1623 | ||
4fb5e5ca | 1624 | /* Mark all the variables in VDEF/VUSE operators for STMT for |
1625 | renaming. This becomes necessary when we modify all of a | |
1626 | non-scalar. */ | |
f50f6fc3 | 1627 | |
1628 | static void | |
88dbf20f | 1629 | mark_all_v_defs_1 (tree stmt) |
f50f6fc3 | 1630 | { |
43daa21e | 1631 | tree sym; |
1632 | ssa_op_iter iter; | |
f50f6fc3 | 1633 | |
22aa74c4 | 1634 | update_stmt_if_modified (stmt); |
f50f6fc3 | 1635 | |
6cf80e5b | 1636 | FOR_EACH_SSA_TREE_OPERAND (sym, stmt, iter, SSA_OP_ALL_VIRTUALS) |
f50f6fc3 | 1637 | { |
f50f6fc3 | 1638 | if (TREE_CODE (sym) == SSA_NAME) |
1639 | sym = SSA_NAME_VAR (sym); | |
88dbf20f | 1640 | mark_sym_for_renaming (sym); |
1641 | } | |
1642 | } | |
1643 | ||
1644 | ||
1645 | /* Mark all the variables in virtual operands in all the statements in | |
1646 | LIST for renaming. */ | |
1647 | ||
1648 | static void | |
1649 | mark_all_v_defs (tree list) | |
1650 | { | |
1651 | if (TREE_CODE (list) != STATEMENT_LIST) | |
1652 | mark_all_v_defs_1 (list); | |
1653 | else | |
1654 | { | |
1655 | tree_stmt_iterator i; | |
1656 | for (i = tsi_start (list); !tsi_end_p (i); tsi_next (&i)) | |
1657 | mark_all_v_defs_1 (tsi_stmt (i)); | |
f50f6fc3 | 1658 | } |
4ee9c684 | 1659 | } |
1660 | ||
4fb5e5ca | 1661 | |
fdea8514 | 1662 | /* Mark every replacement under ELT with TREE_NO_WARNING. */ |
1663 | ||
1664 | static void | |
1665 | mark_no_warning (struct sra_elt *elt) | |
1666 | { | |
1667 | if (!elt->all_no_warning) | |
1668 | { | |
1669 | if (elt->replacement) | |
1670 | TREE_NO_WARNING (elt->replacement) = 1; | |
1671 | else | |
1672 | { | |
1673 | struct sra_elt *c; | |
2100c228 | 1674 | FOR_EACH_ACTUAL_CHILD (c, elt) |
fdea8514 | 1675 | mark_no_warning (c); |
1676 | } | |
2100c228 | 1677 | elt->all_no_warning = true; |
fdea8514 | 1678 | } |
1679 | } | |
88dbf20f | 1680 | |
f50f6fc3 | 1681 | /* Build a single level component reference to ELT rooted at BASE. */ |
4ee9c684 | 1682 | |
1683 | static tree | |
f50f6fc3 | 1684 | generate_one_element_ref (struct sra_elt *elt, tree base) |
4ee9c684 | 1685 | { |
f50f6fc3 | 1686 | switch (TREE_CODE (TREE_TYPE (base))) |
4ee9c684 | 1687 | { |
f50f6fc3 | 1688 | case RECORD_TYPE: |
504d3463 | 1689 | { |
1690 | tree field = elt->element; | |
1691 | ||
1692 | /* Watch out for compatible records with differing field lists. */ | |
1693 | if (DECL_FIELD_CONTEXT (field) != TYPE_MAIN_VARIANT (TREE_TYPE (base))) | |
1694 | field = find_compatible_field (TREE_TYPE (base), field); | |
1695 | ||
5a12b939 | 1696 | return build3 (COMPONENT_REF, elt->type, base, field, NULL); |
504d3463 | 1697 | } |
4ee9c684 | 1698 | |
f50f6fc3 | 1699 | case ARRAY_TYPE: |
2100c228 | 1700 | if (TREE_CODE (elt->element) == RANGE_EXPR) |
1701 | return build4 (ARRAY_RANGE_REF, elt->type, base, | |
1702 | TREE_OPERAND (elt->element, 0), NULL, NULL); | |
1703 | else | |
1704 | return build4 (ARRAY_REF, elt->type, base, elt->element, NULL, NULL); | |
4ee9c684 | 1705 | |
f50f6fc3 | 1706 | case COMPLEX_TYPE: |
1707 | if (elt->element == integer_zero_node) | |
5a12b939 | 1708 | return build1 (REALPART_EXPR, elt->type, base); |
f50f6fc3 | 1709 | else |
5a12b939 | 1710 | return build1 (IMAGPART_EXPR, elt->type, base); |
4ee9c684 | 1711 | |
f50f6fc3 | 1712 | default: |
8c0963c4 | 1713 | gcc_unreachable (); |
f50f6fc3 | 1714 | } |
4ee9c684 | 1715 | } |
1716 | ||
f50f6fc3 | 1717 | /* Build a full component reference to ELT rooted at its native variable. */ |
4ee9c684 | 1718 | |
1719 | static tree | |
f50f6fc3 | 1720 | generate_element_ref (struct sra_elt *elt) |
1721 | { | |
1722 | if (elt->parent) | |
1723 | return generate_one_element_ref (elt, generate_element_ref (elt->parent)); | |
1724 | else | |
1725 | return elt->element; | |
1726 | } | |
1727 | ||
a42c3d85 | 1728 | /* Create an assignment statement from SRC to DST. */ |
1729 | ||
005b6665 | 1730 | static tree |
1731 | sra_build_assignment (tree dst, tree src) | |
1732 | { | |
33536fd1 | 1733 | /* It was hoped that we could perform some type sanity checking |
1734 | here, but since front-ends can emit accesses of fields in types | |
1735 | different from their nominal types and copy structures containing | |
1736 | them as a whole, we'd have to handle such differences here. | |
1737 | Since such accesses under different types require compatibility | |
1738 | anyway, there's little point in making tests and/or adding | |
1739 | conversions to ensure the types of src and dst are the same. | |
1740 | So we just assume type differences at this point are ok. */ | |
46a0e9e8 | 1741 | return build_gimple_modify_stmt (dst, src); |
005b6665 | 1742 | } |
1743 | ||
f50f6fc3 | 1744 | /* Generate a set of assignment statements in *LIST_P to copy all |
1745 | instantiated elements under ELT to or from the equivalent structure | |
1746 | rooted at EXPR. COPY_OUT controls the direction of the copy, with | |
1747 | true meaning to copy out of EXPR into ELT. */ | |
1748 | ||
1749 | static void | |
1750 | generate_copy_inout (struct sra_elt *elt, bool copy_out, tree expr, | |
1751 | tree *list_p) | |
4ee9c684 | 1752 | { |
f50f6fc3 | 1753 | struct sra_elt *c; |
1754 | tree t; | |
1755 | ||
7bdf9401 | 1756 | if (!copy_out && TREE_CODE (expr) == SSA_NAME |
1757 | && TREE_CODE (TREE_TYPE (expr)) == COMPLEX_TYPE) | |
1758 | { | |
1759 | tree r, i; | |
1760 | ||
1761 | c = lookup_element (elt, integer_zero_node, NULL, NO_INSERT); | |
1762 | r = c->replacement; | |
1763 | c = lookup_element (elt, integer_one_node, NULL, NO_INSERT); | |
1764 | i = c->replacement; | |
1765 | ||
5a12b939 | 1766 | t = build2 (COMPLEX_EXPR, elt->type, r, i); |
005b6665 | 1767 | t = sra_build_assignment (expr, t); |
7bdf9401 | 1768 | SSA_NAME_DEF_STMT (expr) = t; |
1769 | append_to_statement_list (t, list_p); | |
1770 | } | |
1771 | else if (elt->replacement) | |
4ee9c684 | 1772 | { |
f50f6fc3 | 1773 | if (copy_out) |
6084da09 | 1774 | t = sra_build_assignment (elt->replacement, expr); |
4ee9c684 | 1775 | else |
6084da09 | 1776 | t = sra_build_assignment (expr, elt->replacement); |
f50f6fc3 | 1777 | append_to_statement_list (t, list_p); |
1778 | } | |
1779 | else | |
1780 | { | |
2100c228 | 1781 | FOR_EACH_ACTUAL_CHILD (c, elt) |
f50f6fc3 | 1782 | { |
1783 | t = generate_one_element_ref (c, unshare_expr (expr)); | |
1784 | generate_copy_inout (c, copy_out, t, list_p); | |
1785 | } | |
1786 | } | |
1787 | } | |
4ee9c684 | 1788 | |
f50f6fc3 | 1789 | /* Generate a set of assignment statements in *LIST_P to copy all instantiated |
1790 | elements under SRC to their counterparts under DST. There must be a 1-1 | |
1791 | correspondence of instantiated elements. */ | |
4ee9c684 | 1792 | |
f50f6fc3 | 1793 | static void |
1794 | generate_element_copy (struct sra_elt *dst, struct sra_elt *src, tree *list_p) | |
1795 | { | |
1796 | struct sra_elt *dc, *sc; | |
4ee9c684 | 1797 | |
2100c228 | 1798 | FOR_EACH_ACTUAL_CHILD (dc, dst) |
f50f6fc3 | 1799 | { |
1800 | sc = lookup_element (src, dc->element, NULL, NO_INSERT); | |
8c0963c4 | 1801 | gcc_assert (sc); |
f50f6fc3 | 1802 | generate_element_copy (dc, sc, list_p); |
4ee9c684 | 1803 | } |
1804 | ||
f50f6fc3 | 1805 | if (dst->replacement) |
1806 | { | |
1807 | tree t; | |
4ee9c684 | 1808 | |
8c0963c4 | 1809 | gcc_assert (src->replacement); |
4ee9c684 | 1810 | |
6084da09 | 1811 | t = sra_build_assignment (dst->replacement, src->replacement); |
f50f6fc3 | 1812 | append_to_statement_list (t, list_p); |
1813 | } | |
1814 | } | |
4ee9c684 | 1815 | |
f50f6fc3 | 1816 | /* Generate a set of assignment statements in *LIST_P to zero all instantiated |
1817 | elements under ELT. In addition, do not assign to elements that have been | |
1818 | marked VISITED but do reset the visited flag; this allows easy coordination | |
1819 | with generate_element_init. */ | |
4ee9c684 | 1820 | |
f50f6fc3 | 1821 | static void |
1822 | generate_element_zero (struct sra_elt *elt, tree *list_p) | |
4ee9c684 | 1823 | { |
f50f6fc3 | 1824 | struct sra_elt *c; |
4ee9c684 | 1825 | |
fa6d6d27 | 1826 | if (elt->visited) |
1827 | { | |
1828 | elt->visited = false; | |
1829 | return; | |
1830 | } | |
1831 | ||
6084da09 | 1832 | FOR_EACH_ACTUAL_CHILD (c, elt) |
1833 | generate_element_zero (c, list_p); | |
4ee9c684 | 1834 | |
fa6d6d27 | 1835 | if (elt->replacement) |
4ee9c684 | 1836 | { |
f50f6fc3 | 1837 | tree t; |
4ee9c684 | 1838 | |
8c0963c4 | 1839 | gcc_assert (elt->is_scalar); |
df770747 | 1840 | t = fold_convert (elt->type, integer_zero_node); |
4ee9c684 | 1841 | |
6084da09 | 1842 | t = sra_build_assignment (elt->replacement, t); |
f50f6fc3 | 1843 | append_to_statement_list (t, list_p); |
4ee9c684 | 1844 | } |
f50f6fc3 | 1845 | } |
4ee9c684 | 1846 | |
eece3694 | 1847 | /* Generate an assignment VAR = INIT, where INIT may need gimplification. |
1848 | Add the result to *LIST_P. */ | |
1849 | ||
1850 | static void | |
6084da09 | 1851 | generate_one_element_init (tree var, tree init, tree *list_p) |
eece3694 | 1852 | { |
eece3694 | 1853 | /* The replacement can be almost arbitrarily complex. Gimplify. */ |
6084da09 | 1854 | tree stmt = sra_build_assignment (var, init); |
b94b916c | 1855 | gimplify_and_add (stmt, list_p); |
eece3694 | 1856 | } |
1857 | ||
f50f6fc3 | 1858 | /* Generate a set of assignment statements in *LIST_P to set all instantiated |
1859 | elements under ELT with the contents of the initializer INIT. In addition, | |
1860 | mark all assigned elements VISITED; this allows easy coordination with | |
c580338e | 1861 | generate_element_zero. Return false if we found a case we couldn't |
1862 | handle. */ | |
4ee9c684 | 1863 | |
c580338e | 1864 | static bool |
b94b916c | 1865 | generate_element_init_1 (struct sra_elt *elt, tree init, tree *list_p) |
f50f6fc3 | 1866 | { |
c580338e | 1867 | bool result = true; |
2e7e1fb3 | 1868 | enum tree_code init_code; |
f50f6fc3 | 1869 | struct sra_elt *sub; |
1870 | tree t; | |
c75b4594 | 1871 | unsigned HOST_WIDE_INT idx; |
1872 | tree value, purpose; | |
4ee9c684 | 1873 | |
c580338e | 1874 | /* We can be passed DECL_INITIAL of a static variable. It might have a |
1875 | conversion, which we strip off here. */ | |
2e7e1fb3 | 1876 | STRIP_USELESS_TYPE_CONVERSION (init); |
1877 | init_code = TREE_CODE (init); | |
1878 | ||
f50f6fc3 | 1879 | if (elt->is_scalar) |
1880 | { | |
1881 | if (elt->replacement) | |
4ee9c684 | 1882 | { |
6084da09 | 1883 | generate_one_element_init (elt->replacement, init, list_p); |
f50f6fc3 | 1884 | elt->visited = true; |
4ee9c684 | 1885 | } |
c580338e | 1886 | return result; |
f50f6fc3 | 1887 | } |
4ee9c684 | 1888 | |
f50f6fc3 | 1889 | switch (init_code) |
1890 | { | |
1891 | case COMPLEX_CST: | |
1892 | case COMPLEX_EXPR: | |
2100c228 | 1893 | FOR_EACH_ACTUAL_CHILD (sub, elt) |
4ee9c684 | 1894 | { |
f50f6fc3 | 1895 | if (sub->element == integer_zero_node) |
1896 | t = (init_code == COMPLEX_EXPR | |
1897 | ? TREE_OPERAND (init, 0) : TREE_REALPART (init)); | |
4ee9c684 | 1898 | else |
f50f6fc3 | 1899 | t = (init_code == COMPLEX_EXPR |
1900 | ? TREE_OPERAND (init, 1) : TREE_IMAGPART (init)); | |
b94b916c | 1901 | result &= generate_element_init_1 (sub, t, list_p); |
4ee9c684 | 1902 | } |
f50f6fc3 | 1903 | break; |
4ee9c684 | 1904 | |
f50f6fc3 | 1905 | case CONSTRUCTOR: |
c75b4594 | 1906 | FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), idx, purpose, value) |
4ee9c684 | 1907 | { |
36fff22e | 1908 | if (TREE_CODE (purpose) == RANGE_EXPR) |
1909 | { | |
1910 | tree lower = TREE_OPERAND (purpose, 0); | |
1911 | tree upper = TREE_OPERAND (purpose, 1); | |
1912 | ||
1913 | while (1) | |
1914 | { | |
1915 | sub = lookup_element (elt, lower, NULL, NO_INSERT); | |
1916 | if (sub != NULL) | |
1917 | result &= generate_element_init_1 (sub, value, list_p); | |
1918 | if (tree_int_cst_equal (lower, upper)) | |
1919 | break; | |
1920 | lower = int_const_binop (PLUS_EXPR, lower, | |
1921 | integer_one_node, true); | |
1922 | } | |
1923 | } | |
1924 | else | |
1925 | { | |
1926 | sub = lookup_element (elt, purpose, NULL, NO_INSERT); | |
1927 | if (sub != NULL) | |
1928 | result &= generate_element_init_1 (sub, value, list_p); | |
1929 | } | |
f50f6fc3 | 1930 | } |
1931 | break; | |
4ee9c684 | 1932 | |
f50f6fc3 | 1933 | default: |
fa6d6d27 | 1934 | elt->visited = true; |
c580338e | 1935 | result = false; |
f50f6fc3 | 1936 | } |
c580338e | 1937 | |
1938 | return result; | |
f50f6fc3 | 1939 | } |
4ee9c684 | 1940 | |
b94b916c | 1941 | /* A wrapper function for generate_element_init_1 that handles cleanup after |
1942 | gimplification. */ | |
1943 | ||
1944 | static bool | |
1945 | generate_element_init (struct sra_elt *elt, tree init, tree *list_p) | |
1946 | { | |
1947 | bool ret; | |
1948 | ||
1949 | push_gimplify_context (); | |
1950 | ret = generate_element_init_1 (elt, init, list_p); | |
1951 | pop_gimplify_context (NULL); | |
1952 | ||
1953 | /* The replacement can expose previously unreferenced variables. */ | |
1954 | if (ret && *list_p) | |
1955 | { | |
1956 | tree_stmt_iterator i; | |
b94b916c | 1957 | |
1958 | for (i = tsi_start (*list_p); !tsi_end_p (i); tsi_next (&i)) | |
1959 | find_new_referenced_vars (tsi_stmt_ptr (i)); | |
b94b916c | 1960 | } |
1961 | ||
1962 | return ret; | |
1963 | } | |
1964 | ||
f50f6fc3 | 1965 | /* Insert STMT on all the outgoing edges out of BB. Note that if BB |
1966 | has more than one edge, STMT will be replicated for each edge. Also, | |
1967 | abnormal edges will be ignored. */ | |
4ee9c684 | 1968 | |
f50f6fc3 | 1969 | void |
1970 | insert_edge_copies (tree stmt, basic_block bb) | |
1971 | { | |
1972 | edge e; | |
cd665a06 | 1973 | edge_iterator ei; |
f50f6fc3 | 1974 | bool first_copy; |
4ee9c684 | 1975 | |
f50f6fc3 | 1976 | first_copy = true; |
cd665a06 | 1977 | FOR_EACH_EDGE (e, ei, bb->succs) |
4ee9c684 | 1978 | { |
f50f6fc3 | 1979 | /* We don't need to insert copies on abnormal edges. The |
1980 | value of the scalar replacement is not guaranteed to | |
1981 | be valid through an abnormal edge. */ | |
1982 | if (!(e->flags & EDGE_ABNORMAL)) | |
4ee9c684 | 1983 | { |
f50f6fc3 | 1984 | if (first_copy) |
1985 | { | |
1986 | bsi_insert_on_edge (e, stmt); | |
1987 | first_copy = false; | |
1988 | } | |
1989 | else | |
ac13e8d9 | 1990 | bsi_insert_on_edge (e, unsave_expr_now (stmt)); |
4ee9c684 | 1991 | } |
4ee9c684 | 1992 | } |
4ee9c684 | 1993 | } |
1994 | ||
f50f6fc3 | 1995 | /* Helper function to insert LIST before BSI, and set up line number info. */ |
4ee9c684 | 1996 | |
f7d118a9 | 1997 | void |
f50f6fc3 | 1998 | sra_insert_before (block_stmt_iterator *bsi, tree list) |
4ee9c684 | 1999 | { |
4ee9c684 | 2000 | tree stmt = bsi_stmt (*bsi); |
2001 | ||
2002 | if (EXPR_HAS_LOCATION (stmt)) | |
2003 | annotate_all_with_locus (&list, EXPR_LOCATION (stmt)); | |
4ee9c684 | 2004 | bsi_insert_before (bsi, list, BSI_SAME_STMT); |
2005 | } | |
2006 | ||
f50f6fc3 | 2007 | /* Similarly, but insert after BSI. Handles insertion onto edges as well. */ |
4ee9c684 | 2008 | |
f7d118a9 | 2009 | void |
f50f6fc3 | 2010 | sra_insert_after (block_stmt_iterator *bsi, tree list) |
4ee9c684 | 2011 | { |
f50f6fc3 | 2012 | tree stmt = bsi_stmt (*bsi); |
4ee9c684 | 2013 | |
f50f6fc3 | 2014 | if (EXPR_HAS_LOCATION (stmt)) |
2015 | annotate_all_with_locus (&list, EXPR_LOCATION (stmt)); | |
4ee9c684 | 2016 | |
f50f6fc3 | 2017 | if (stmt_ends_bb_p (stmt)) |
2018 | insert_edge_copies (list, bsi->bb); | |
2019 | else | |
f105beb6 | 2020 | bsi_insert_after (bsi, list, BSI_SAME_STMT); |
4ee9c684 | 2021 | } |
2022 | ||
f50f6fc3 | 2023 | /* Similarly, but replace the statement at BSI. */ |
4ee9c684 | 2024 | |
2025 | static void | |
f50f6fc3 | 2026 | sra_replace (block_stmt_iterator *bsi, tree list) |
4ee9c684 | 2027 | { |
f50f6fc3 | 2028 | sra_insert_before (bsi, list); |
f2428b62 | 2029 | bsi_remove (bsi, false); |
f50f6fc3 | 2030 | if (bsi_end_p (*bsi)) |
2031 | *bsi = bsi_last (bsi->bb); | |
2032 | else | |
2033 | bsi_prev (bsi); | |
4ee9c684 | 2034 | } |
2035 | ||
f50f6fc3 | 2036 | /* Scalarize a USE. To recap, this is either a simple reference to ELT, |
0870fd6e | 2037 | if elt is scalar, or some occurrence of ELT that requires a complete |
f50f6fc3 | 2038 | aggregate. IS_OUTPUT is true if ELT is being modified. */ |
4ee9c684 | 2039 | |
2040 | static void | |
f50f6fc3 | 2041 | scalarize_use (struct sra_elt *elt, tree *expr_p, block_stmt_iterator *bsi, |
6084da09 | 2042 | bool is_output, bool use_all) |
4ee9c684 | 2043 | { |
f50f6fc3 | 2044 | tree list = NULL, stmt = bsi_stmt (*bsi); |
4ee9c684 | 2045 | |
f50f6fc3 | 2046 | if (elt->replacement) |
4ee9c684 | 2047 | { |
f50f6fc3 | 2048 | /* If we have a replacement, then updating the reference is as |
2049 | simple as modifying the existing statement in place. */ | |
2050 | if (is_output) | |
6084da09 | 2051 | mark_all_v_defs (stmt); |
2052 | *expr_p = elt->replacement; | |
22aa74c4 | 2053 | update_stmt (stmt); |
4ee9c684 | 2054 | } |
f50f6fc3 | 2055 | else |
4ee9c684 | 2056 | { |
f50f6fc3 | 2057 | /* Otherwise we need some copies. If ELT is being read, then we want |
2058 | to store all (modified) sub-elements back into the structure before | |
2059 | the reference takes place. If ELT is being written, then we want to | |
2060 | load the changed values back into our shadow variables. */ | |
2061 | /* ??? We don't check modified for reads, we just always write all of | |
2062 | the values. We should be able to record the SSA number of the VOP | |
2063 | for which the values were last read. If that number matches the | |
2064 | SSA number of the VOP in the current statement, then we needn't | |
2065 | emit an assignment. This would also eliminate double writes when | |
2066 | a structure is passed as more than one argument to a function call. | |
2067 | This optimization would be most effective if sra_walk_function | |
2068 | processed the blocks in dominator order. */ | |
2069 | ||
6084da09 | 2070 | generate_copy_inout (elt, is_output, generate_element_ref (elt), &list); |
2071 | if (list == NULL) | |
2072 | return; | |
2073 | mark_all_v_defs (list); | |
2e4b8bcd | 2074 | if (is_output) |
6084da09 | 2075 | sra_insert_after (bsi, list); |
2076 | else | |
2e4b8bcd | 2077 | { |
6084da09 | 2078 | sra_insert_before (bsi, list); |
2079 | if (use_all) | |
2080 | mark_no_warning (elt); | |
fdea8514 | 2081 | } |
4ee9c684 | 2082 | } |
4ee9c684 | 2083 | } |
2084 | ||
f50f6fc3 | 2085 | /* Scalarize a COPY. To recap, this is an assignment statement between |
2086 | two scalarizable references, LHS_ELT and RHS_ELT. */ | |
4ee9c684 | 2087 | |
f50f6fc3 | 2088 | static void |
2089 | scalarize_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt, | |
2090 | block_stmt_iterator *bsi) | |
4ee9c684 | 2091 | { |
f50f6fc3 | 2092 | tree list, stmt; |
4ee9c684 | 2093 | |
f50f6fc3 | 2094 | if (lhs_elt->replacement && rhs_elt->replacement) |
4ee9c684 | 2095 | { |
f50f6fc3 | 2096 | /* If we have two scalar operands, modify the existing statement. */ |
2097 | stmt = bsi_stmt (*bsi); | |
4ee9c684 | 2098 | |
f50f6fc3 | 2099 | /* See the commentary in sra_walk_function concerning |
2100 | RETURN_EXPR, and why we should never see one here. */ | |
35cc02b5 | 2101 | gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT); |
f50f6fc3 | 2102 | |
35cc02b5 | 2103 | GIMPLE_STMT_OPERAND (stmt, 0) = lhs_elt->replacement; |
6084da09 | 2104 | GIMPLE_STMT_OPERAND (stmt, 1) = rhs_elt->replacement; |
22aa74c4 | 2105 | update_stmt (stmt); |
f50f6fc3 | 2106 | } |
2107 | else if (lhs_elt->use_block_copy || rhs_elt->use_block_copy) | |
2108 | { | |
2109 | /* If either side requires a block copy, then sync the RHS back | |
ac13e8d9 | 2110 | to the original structure, leave the original assignment |
f50f6fc3 | 2111 | statement (which will perform the block copy), then load the |
2112 | LHS values out of its now-updated original structure. */ | |
2113 | /* ??? Could perform a modified pair-wise element copy. That | |
2114 | would at least allow those elements that are instantiated in | |
2115 | both structures to be optimized well. */ | |
2116 | ||
2117 | list = NULL; | |
2118 | generate_copy_inout (rhs_elt, false, | |
2119 | generate_element_ref (rhs_elt), &list); | |
2120 | if (list) | |
4ee9c684 | 2121 | { |
88dbf20f | 2122 | mark_all_v_defs (list); |
f50f6fc3 | 2123 | sra_insert_before (bsi, list); |
4ee9c684 | 2124 | } |
f50f6fc3 | 2125 | |
2126 | list = NULL; | |
2127 | generate_copy_inout (lhs_elt, true, | |
2128 | generate_element_ref (lhs_elt), &list); | |
2129 | if (list) | |
88dbf20f | 2130 | { |
2131 | mark_all_v_defs (list); | |
2132 | sra_insert_after (bsi, list); | |
2133 | } | |
4ee9c684 | 2134 | } |
f50f6fc3 | 2135 | else |
2136 | { | |
2137 | /* Otherwise both sides must be fully instantiated. In which | |
2138 | case perform pair-wise element assignments and replace the | |
2139 | original block copy statement. */ | |
2140 | ||
2141 | stmt = bsi_stmt (*bsi); | |
2142 | mark_all_v_defs (stmt); | |
4ee9c684 | 2143 | |
f50f6fc3 | 2144 | list = NULL; |
2145 | generate_element_copy (lhs_elt, rhs_elt, &list); | |
8c0963c4 | 2146 | gcc_assert (list); |
88dbf20f | 2147 | mark_all_v_defs (list); |
f50f6fc3 | 2148 | sra_replace (bsi, list); |
2149 | } | |
2150 | } | |
4ee9c684 | 2151 | |
f50f6fc3 | 2152 | /* Scalarize an INIT. To recap, this is an assignment to a scalarizable |
2153 | reference from some form of constructor: CONSTRUCTOR, COMPLEX_CST or | |
2154 | COMPLEX_EXPR. If RHS is NULL, it should be treated as an empty | |
fa6d6d27 | 2155 | CONSTRUCTOR. */ |
4ee9c684 | 2156 | |
fa6d6d27 | 2157 | static void |
f50f6fc3 | 2158 | scalarize_init (struct sra_elt *lhs_elt, tree rhs, block_stmt_iterator *bsi) |
4ee9c684 | 2159 | { |
c580338e | 2160 | bool result = true; |
f50f6fc3 | 2161 | tree list = NULL; |
4ee9c684 | 2162 | |
f50f6fc3 | 2163 | /* Generate initialization statements for all members extant in the RHS. */ |
2164 | if (rhs) | |
eece3694 | 2165 | { |
54a9357d | 2166 | /* Unshare the expression just in case this is from a decl's initial. */ |
2167 | rhs = unshare_expr (rhs); | |
eece3694 | 2168 | result = generate_element_init (lhs_elt, rhs, &list); |
eece3694 | 2169 | } |
4ee9c684 | 2170 | |
f50f6fc3 | 2171 | /* CONSTRUCTOR is defined such that any member not mentioned is assigned |
2172 | a zero value. Initialize the rest of the instantiated elements. */ | |
2173 | generate_element_zero (lhs_elt, &list); | |
c580338e | 2174 | |
fa6d6d27 | 2175 | if (!result) |
2176 | { | |
2177 | /* If we failed to convert the entire initializer, then we must | |
2178 | leave the structure assignment in place and must load values | |
2179 | from the structure into the slots for which we did not find | |
2180 | constants. The easiest way to do this is to generate a complete | |
2181 | copy-out, and then follow that with the constant assignments | |
2182 | that we were able to build. DCE will clean things up. */ | |
2183 | tree list0 = NULL; | |
2184 | generate_copy_inout (lhs_elt, true, generate_element_ref (lhs_elt), | |
2185 | &list0); | |
2186 | append_to_statement_list (list, &list0); | |
2187 | list = list0; | |
2188 | } | |
4ee9c684 | 2189 | |
fa6d6d27 | 2190 | if (lhs_elt->use_block_copy || !result) |
f50f6fc3 | 2191 | { |
2192 | /* Since LHS is not fully instantiated, we must leave the structure | |
2193 | assignment in place. Treating this case differently from a USE | |
2194 | exposes constants to later optimizations. */ | |
fa6d6d27 | 2195 | if (list) |
2196 | { | |
88dbf20f | 2197 | mark_all_v_defs (list); |
fa6d6d27 | 2198 | sra_insert_after (bsi, list); |
2199 | } | |
f50f6fc3 | 2200 | } |
2201 | else | |
2202 | { | |
2203 | /* The LHS is fully instantiated. The list of initializations | |
2204 | replaces the original structure assignment. */ | |
8c0963c4 | 2205 | gcc_assert (list); |
f50f6fc3 | 2206 | mark_all_v_defs (bsi_stmt (*bsi)); |
88dbf20f | 2207 | mark_all_v_defs (list); |
f50f6fc3 | 2208 | sra_replace (bsi, list); |
4ee9c684 | 2209 | } |
2210 | } | |
2211 | ||
f50f6fc3 | 2212 | /* A subroutine of scalarize_ldst called via walk_tree. Set TREE_NO_TRAP |
2213 | on all INDIRECT_REFs. */ | |
4ee9c684 | 2214 | |
f50f6fc3 | 2215 | static tree |
2216 | mark_notrap (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) | |
4ee9c684 | 2217 | { |
f50f6fc3 | 2218 | tree t = *tp; |
4ee9c684 | 2219 | |
f50f6fc3 | 2220 | if (TREE_CODE (t) == INDIRECT_REF) |
2221 | { | |
2222 | TREE_THIS_NOTRAP (t) = 1; | |
2223 | *walk_subtrees = 0; | |
2224 | } | |
ce45a448 | 2225 | else if (IS_TYPE_OR_DECL_P (t)) |
f50f6fc3 | 2226 | *walk_subtrees = 0; |
4ee9c684 | 2227 | |
f50f6fc3 | 2228 | return NULL; |
4ee9c684 | 2229 | } |
2230 | ||
f50f6fc3 | 2231 | /* Scalarize a LDST. To recap, this is an assignment between one scalarizable |
2232 | reference ELT and one non-scalarizable reference OTHER. IS_OUTPUT is true | |
2233 | if ELT is on the left-hand side. */ | |
4ee9c684 | 2234 | |
2235 | static void | |
f50f6fc3 | 2236 | scalarize_ldst (struct sra_elt *elt, tree other, |
2237 | block_stmt_iterator *bsi, bool is_output) | |
4ee9c684 | 2238 | { |
f50f6fc3 | 2239 | /* Shouldn't have gotten called for a scalar. */ |
8c0963c4 | 2240 | gcc_assert (!elt->replacement); |
4ee9c684 | 2241 | |
f50f6fc3 | 2242 | if (elt->use_block_copy) |
2243 | { | |
2244 | /* Since ELT is not fully instantiated, we have to leave the | |
2245 | block copy in place. Treat this as a USE. */ | |
6084da09 | 2246 | scalarize_use (elt, NULL, bsi, is_output, false); |
f50f6fc3 | 2247 | } |
2248 | else | |
4ee9c684 | 2249 | { |
f50f6fc3 | 2250 | /* The interesting case is when ELT is fully instantiated. In this |
2251 | case we can have each element stored/loaded directly to/from the | |
2252 | corresponding slot in OTHER. This avoids a block copy. */ | |
4ee9c684 | 2253 | |
f50f6fc3 | 2254 | tree list = NULL, stmt = bsi_stmt (*bsi); |
4ee9c684 | 2255 | |
f50f6fc3 | 2256 | mark_all_v_defs (stmt); |
2257 | generate_copy_inout (elt, is_output, other, &list); | |
2e4b8bcd | 2258 | mark_all_v_defs (list); |
6084da09 | 2259 | gcc_assert (list); |
4ee9c684 | 2260 | |
f50f6fc3 | 2261 | /* Preserve EH semantics. */ |
2262 | if (stmt_ends_bb_p (stmt)) | |
4ee9c684 | 2263 | { |
f50f6fc3 | 2264 | tree_stmt_iterator tsi; |
2265 | tree first; | |
2266 | ||
2267 | /* Extract the first statement from LIST. */ | |
2268 | tsi = tsi_start (list); | |
2269 | first = tsi_stmt (tsi); | |
2270 | tsi_delink (&tsi); | |
2271 | ||
2272 | /* Replace the old statement with this new representative. */ | |
2273 | bsi_replace (bsi, first, true); | |
ac13e8d9 | 2274 | |
f50f6fc3 | 2275 | if (!tsi_end_p (tsi)) |
2276 | { | |
2277 | /* If any reference would trap, then they all would. And more | |
2278 | to the point, the first would. Therefore none of the rest | |
2279 | will trap since the first didn't. Indicate this by | |
2280 | iterating over the remaining statements and set | |
2281 | TREE_THIS_NOTRAP in all INDIRECT_REFs. */ | |
2282 | do | |
2283 | { | |
2284 | walk_tree (tsi_stmt_ptr (tsi), mark_notrap, NULL, NULL); | |
2285 | tsi_next (&tsi); | |
2286 | } | |
2287 | while (!tsi_end_p (tsi)); | |
2288 | ||
2289 | insert_edge_copies (list, bsi->bb); | |
2290 | } | |
4ee9c684 | 2291 | } |
f50f6fc3 | 2292 | else |
2293 | sra_replace (bsi, list); | |
4ee9c684 | 2294 | } |
2295 | } | |
2296 | ||
f50f6fc3 | 2297 | /* Generate initializations for all scalarizable parameters. */ |
4ee9c684 | 2298 | |
f50f6fc3 | 2299 | static void |
2300 | scalarize_parms (void) | |
4ee9c684 | 2301 | { |
f50f6fc3 | 2302 | tree list = NULL; |
4f917ffe | 2303 | unsigned i; |
0cc4271a | 2304 | bitmap_iterator bi; |
4ee9c684 | 2305 | |
0cc4271a | 2306 | EXECUTE_IF_SET_IN_BITMAP (needs_copy_in, 0, i, bi) |
ac13e8d9 | 2307 | { |
f50f6fc3 | 2308 | tree var = referenced_var (i); |
2309 | struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT); | |
2310 | generate_copy_inout (elt, true, var, &list); | |
0cc4271a | 2311 | } |
4ee9c684 | 2312 | |
f50f6fc3 | 2313 | if (list) |
88dbf20f | 2314 | { |
2315 | insert_edge_copies (list, ENTRY_BLOCK_PTR); | |
2316 | mark_all_v_defs (list); | |
2317 | } | |
4ee9c684 | 2318 | } |
2319 | ||
f50f6fc3 | 2320 | /* Entry point to phase 4. Update the function to match replacements. */ |
2321 | ||
4ee9c684 | 2322 | static void |
f50f6fc3 | 2323 | scalarize_function (void) |
4ee9c684 | 2324 | { |
f50f6fc3 | 2325 | static const struct sra_walk_fns fns = { |
2326 | scalarize_use, scalarize_copy, scalarize_init, scalarize_ldst, false | |
2327 | }; | |
2328 | ||
2329 | sra_walk_function (&fns); | |
2330 | scalarize_parms (); | |
54f658bd | 2331 | bsi_commit_edge_inserts (); |
4ee9c684 | 2332 | } |
2333 | ||
f50f6fc3 | 2334 | \f |
2335 | /* Debug helper function. Print ELT in a nice human-readable format. */ | |
4ee9c684 | 2336 | |
f50f6fc3 | 2337 | static void |
2338 | dump_sra_elt_name (FILE *f, struct sra_elt *elt) | |
2339 | { | |
2340 | if (elt->parent && TREE_CODE (elt->parent->type) == COMPLEX_TYPE) | |
2341 | { | |
2342 | fputs (elt->element == integer_zero_node ? "__real__ " : "__imag__ ", f); | |
2343 | dump_sra_elt_name (f, elt->parent); | |
2344 | } | |
2345 | else | |
2346 | { | |
2347 | if (elt->parent) | |
2348 | dump_sra_elt_name (f, elt->parent); | |
2349 | if (DECL_P (elt->element)) | |
2350 | { | |
2351 | if (TREE_CODE (elt->element) == FIELD_DECL) | |
2352 | fputc ('.', f); | |
2353 | print_generic_expr (f, elt->element, dump_flags); | |
2354 | } | |
2100c228 | 2355 | else if (TREE_CODE (elt->element) == RANGE_EXPR) |
2356 | fprintf (f, "["HOST_WIDE_INT_PRINT_DEC".."HOST_WIDE_INT_PRINT_DEC"]", | |
2357 | TREE_INT_CST_LOW (TREE_OPERAND (elt->element, 0)), | |
2358 | TREE_INT_CST_LOW (TREE_OPERAND (elt->element, 1))); | |
f50f6fc3 | 2359 | else |
2360 | fprintf (f, "[" HOST_WIDE_INT_PRINT_DEC "]", | |
2361 | TREE_INT_CST_LOW (elt->element)); | |
2362 | } | |
2363 | } | |
4ee9c684 | 2364 | |
f50f6fc3 | 2365 | /* Likewise, but callable from the debugger. */ |
2366 | ||
2367 | void | |
2368 | debug_sra_elt_name (struct sra_elt *elt) | |
2369 | { | |
2370 | dump_sra_elt_name (stderr, elt); | |
2371 | fputc ('\n', stderr); | |
2372 | } | |
4ee9c684 | 2373 | |
f7d118a9 | 2374 | void |
2375 | sra_init_cache (void) | |
2376 | { | |
2377 | if (sra_type_decomp_cache) | |
2378 | return; | |
2379 | ||
2380 | sra_type_decomp_cache = BITMAP_ALLOC (NULL); | |
2381 | sra_type_inst_cache = BITMAP_ALLOC (NULL); | |
2382 | } | |
2383 | ||
f50f6fc3 | 2384 | /* Main entry point. */ |
4ee9c684 | 2385 | |
2a1990e9 | 2386 | static unsigned int |
4ee9c684 | 2387 | tree_sra (void) |
2388 | { | |
2389 | /* Initialize local variables. */ | |
c96420f8 | 2390 | todoflags = 0; |
f50f6fc3 | 2391 | gcc_obstack_init (&sra_obstack); |
27335ffd | 2392 | sra_candidates = BITMAP_ALLOC (NULL); |
2393 | needs_copy_in = BITMAP_ALLOC (NULL); | |
f7d118a9 | 2394 | sra_init_cache (); |
f50f6fc3 | 2395 | sra_map = htab_create (101, sra_elt_hash, sra_elt_eq, NULL); |
4ee9c684 | 2396 | |
f50f6fc3 | 2397 | /* Scan. If we find anything, instantiate and scalarize. */ |
2398 | if (find_candidates_for_sra ()) | |
4ee9c684 | 2399 | { |
f50f6fc3 | 2400 | scan_function (); |
2401 | decide_instantiations (); | |
2402 | scalarize_function (); | |
4ee9c684 | 2403 | } |
2404 | ||
4ee9c684 | 2405 | /* Free allocated memory. */ |
2406 | htab_delete (sra_map); | |
2407 | sra_map = NULL; | |
27335ffd | 2408 | BITMAP_FREE (sra_candidates); |
2409 | BITMAP_FREE (needs_copy_in); | |
2410 | BITMAP_FREE (sra_type_decomp_cache); | |
2411 | BITMAP_FREE (sra_type_inst_cache); | |
f50f6fc3 | 2412 | obstack_free (&sra_obstack, NULL); |
c96420f8 | 2413 | return todoflags; |
4ee9c684 | 2414 | } |
2415 | ||
1f0a4df8 | 2416 | static unsigned int |
2417 | tree_sra_early (void) | |
2418 | { | |
2419 | unsigned int ret; | |
2420 | ||
2421 | early_sra = true; | |
2422 | ret = tree_sra (); | |
2423 | early_sra = false; | |
2424 | ||
2425 | return ret; | |
2426 | } | |
2427 | ||
4ee9c684 | 2428 | static bool |
2429 | gate_sra (void) | |
2430 | { | |
2431 | return flag_tree_sra != 0; | |
2432 | } | |
2433 | ||
1f0a4df8 | 2434 | struct tree_opt_pass pass_sra_early = |
2435 | { | |
2436 | "esra", /* name */ | |
2437 | gate_sra, /* gate */ | |
2438 | tree_sra_early, /* execute */ | |
2439 | NULL, /* sub */ | |
2440 | NULL, /* next */ | |
2441 | 0, /* static_pass_number */ | |
2442 | TV_TREE_SRA, /* tv_id */ | |
2443 | PROP_cfg | PROP_ssa, /* properties_required */ | |
2444 | 0, /* properties_provided */ | |
2445 | 0, /* properties_destroyed */ | |
2446 | 0, /* todo_flags_start */ | |
2447 | TODO_dump_func | |
2448 | | TODO_update_ssa | |
2449 | | TODO_ggc_collect | |
2450 | | TODO_verify_ssa, /* todo_flags_finish */ | |
2451 | 0 /* letter */ | |
2452 | }; | |
2453 | ||
ac13e8d9 | 2454 | struct tree_opt_pass pass_sra = |
4ee9c684 | 2455 | { |
2456 | "sra", /* name */ | |
2457 | gate_sra, /* gate */ | |
2458 | tree_sra, /* execute */ | |
2459 | NULL, /* sub */ | |
2460 | NULL, /* next */ | |
2461 | 0, /* static_pass_number */ | |
2462 | TV_TREE_SRA, /* tv_id */ | |
5620e045 | 2463 | PROP_cfg | PROP_ssa, /* properties_required */ |
4ee9c684 | 2464 | 0, /* properties_provided */ |
b6246c40 | 2465 | 0, /* properties_destroyed */ |
4ee9c684 | 2466 | 0, /* todo_flags_start */ |
4fb5e5ca | 2467 | TODO_dump_func |
abd433a7 | 2468 | | TODO_update_ssa |
4fb5e5ca | 2469 | | TODO_ggc_collect |
2470 | | TODO_verify_ssa, /* todo_flags_finish */ | |
0f9005dd | 2471 | 0 /* letter */ |
4ee9c684 | 2472 | }; |