]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/ipa-sra.c
coroutines: Make call argument handling more robust [PR95440]
[thirdparty/gcc.git] / gcc / ipa-sra.c
1 /* Interprocedural scalar replacement of aggregates
2 Copyright (C) 2008-2020 Free Software Foundation, Inc.
3
4 Contributed by Martin Jambor <mjambor@suse.cz>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* IPA-SRA is an interprocedural pass that removes unused function return
23 values (turning functions returning a value which is never used into void
24 functions), removes unused function parameters. It can also replace an
25 aggregate parameter by a set of other parameters representing part of the
26 original, turning those passed by reference into new ones which pass the
27 value directly.
28
29 The pass is a true IPA one, which means that it works in three stages in
30 order to be able to take advantage of LTO. First, summaries about functions
31 and each calls are generated. Function summaries (often called call graph
32 node summaries) contain mainly information about which parameters are
33 potential transformation candidates and which bits of candidates are
34 accessed. We differentiate between accesses done as a part of a call
35 statement (which might be not necessary if the callee is also transformed)
36 and others (which are mandatory). Call summaries (often called call graph
37 edge summaries) contain information about which function formal parameters
38 feed into which actual call arguments so that if two parameters are only
39 used in a sum which is then passed to another function which then however
40 does not use this parameter, all three parameters of the two functions can
41 be eliminated. Edge summaries also have flags whether the return value is
42 used or if it is only returned in the caller too. In LTO mode these
43 summaries are then streamed to the object file in the compilation phase and
44 streamed back in in the WPA analysis stage.
45
46 The interprocedural analysis phase traverses the graph in topological order
47 in two sweeps, one in each direction. First, from callees to callers for
48 parameter removal and splitting. Each strongly-connected component is
49 processed iteratively until the situation in it stabilizes. The pass from
50 callers to callees is then carried out to remove unused return values in a
51 very similar fashion.
52
53 Because parameter manipulation has big implications for call redirection
54 which is done only after all call graph nodes materialize, the
55 transformation phase is not part of this patch but is carried out by the
56 clone materialization and edge redirection itself, see comments in
57 ipa-param-manipulation.h for more details. */
58
59
60
61 #include "config.h"
62 #include "system.h"
63 #include "coretypes.h"
64 #include "backend.h"
65 #include "tree.h"
66 #include "gimple.h"
67 #include "predict.h"
68 #include "tree-pass.h"
69 #include "ssa.h"
70 #include "cgraph.h"
71 #include "gimple-pretty-print.h"
72 #include "alias.h"
73 #include "tree-eh.h"
74 #include "gimple-iterator.h"
75 #include "gimple-walk.h"
76 #include "tree-dfa.h"
77 #include "tree-sra.h"
78 #include "alloc-pool.h"
79 #include "symbol-summary.h"
80 #include "dbgcnt.h"
81 #include "tree-inline.h"
82 #include "ipa-utils.h"
83 #include "builtins.h"
84 #include "cfganal.h"
85 #include "tree-streamer.h"
86 #include "internal-fn.h"
87
88 /* Bits used to track size of an aggregate in bytes interprocedurally. */
89 #define ISRA_ARG_SIZE_LIMIT_BITS 16
90 #define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
91 /* How many parameters can feed into a call actual argument and still be
92 tracked. */
93 #define IPA_SRA_MAX_PARAM_FLOW_LEN 7
94
95 /* Structure describing accesses to a specific portion of an aggregate
96 parameter, as given by the offset and size. Any smaller accesses that occur
97 within a function that fall within another access form a tree. The pass
98 cannot analyze parameters with only partially overlapping accesses. */
99
100 struct GTY(()) param_access
101 {
102 /* Type that a potential replacement should have. This field only has
103 meaning in the summary building and transformation phases, when it is
104 reconstructed from the body. Must not be touched in IPA analysis
105 stage. */
106 tree type;
107
108 /* Alias reference type to be used in MEM_REFs when adjusting caller
109 arguments. */
110 tree alias_ptr_type;
111
112 /* Values returned by get_ref_base_and_extent but converted to bytes and
113 stored as unsigned ints. */
114 unsigned unit_offset;
115 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
116
117 /* Set once we are sure that the access will really end up in a potentially
118 transformed function - initially not set for portions of formal parameters
119 that are only used as actual function arguments passed to callees. */
120 unsigned certain : 1;
121 /* Set if the access has a reversed scalar storage order. */
122 unsigned reverse : 1;
123 };
124
125 /* This structure has the same purpose as the one above and additionally it
126 contains some fields that are only necessary in the summary generation
127 phase. */
128
129 struct gensum_param_access
130 {
131 /* Values returned by get_ref_base_and_extent. */
132 HOST_WIDE_INT offset;
133 HOST_WIDE_INT size;
134
135 /* if this access has any children (in terms of the definition above), this
136 points to the first one. */
137 struct gensum_param_access *first_child;
138 /* In intraprocedural SRA, pointer to the next sibling in the access tree as
139 described above. */
140 struct gensum_param_access *next_sibling;
141
142 /* Type that a potential replacement should have. This field only has
143 meaning in the summary building and transformation phases, when it is
144 reconstructed from the body. Must not be touched in IPA analysis
145 stage. */
146 tree type;
147 /* Alias reference type to be used in MEM_REFs when adjusting caller
148 arguments. */
149 tree alias_ptr_type;
150
151 /* Have there been writes to or reads from this exact location except for as
152 arguments to a function call that can be tracked. */
153 bool nonarg;
154
155 /* Set if the access has a reversed scalar storage order. */
156 bool reverse;
157 };
158
159 /* Summary describing a parameter in the IPA stages. */
160
161 struct GTY(()) isra_param_desc
162 {
163 /* List of access representatives to the parameters, sorted according to
164 their offset. */
165 vec <param_access *, va_gc> *accesses;
166
167 /* Unit size limit of total size of all replacements. */
168 unsigned param_size_limit : ISRA_ARG_SIZE_LIMIT_BITS;
169 /* Sum of unit sizes of all certain replacements. */
170 unsigned size_reached : ISRA_ARG_SIZE_LIMIT_BITS;
171
172 /* A parameter that is used only in call arguments and can be removed if all
173 concerned actual arguments are removed. */
174 unsigned locally_unused : 1;
175 /* An aggregate that is a candidate for breaking up or complete removal. */
176 unsigned split_candidate : 1;
177 /* Is this a parameter passing stuff by reference? */
178 unsigned by_ref : 1;
179 };
180
181 /* Structure used when generating summaries that describes a parameter. */
182
183 struct gensum_param_desc
184 {
185 /* Roots of param_accesses. */
186 gensum_param_access *accesses;
187 /* Number of accesses in the access tree rooted in field accesses. */
188 unsigned access_count;
189
190 /* If the below is non-zero, this is the number of uses as actual arguments. */
191 int call_uses;
192 /* Number of times this parameter has been directly passed to. */
193 unsigned ptr_pt_count;
194
195 /* Size limit of total size of all replacements. */
196 unsigned param_size_limit;
197 /* Sum of sizes of nonarg accesses. */
198 unsigned nonarg_acc_size;
199
200 /* A parameter that is used only in call arguments and can be removed if all
201 concerned actual arguments are removed. */
202 bool locally_unused;
203 /* An aggregate that is a candidate for breaking up or a pointer passing data
204 by reference that is a candidate for being converted to a set of
205 parameters passing those data by value. */
206 bool split_candidate;
207 /* Is this a parameter passing stuff by reference? */
208 bool by_ref;
209
210 /* The number of this parameter as they are ordered in function decl. */
211 int param_number;
212 /* For parameters passing data by reference, this is parameter index to
213 compute indices to bb_dereferences. */
214 int deref_index;
215 };
216
217 /* Properly deallocate accesses of DESC. TODO: Since this data structure is
218 not in GC memory, this is not necessary and we can consider removing the
219 function. */
220
221 static void
222 free_param_decl_accesses (isra_param_desc *desc)
223 {
224 unsigned len = vec_safe_length (desc->accesses);
225 for (unsigned i = 0; i < len; ++i)
226 ggc_free ((*desc->accesses)[i]);
227 vec_free (desc->accesses);
228 }
229
230 /* Class used to convey information about functions from the
231 intra-procedural analysis stage to inter-procedural one. */
232
233 class GTY((for_user)) isra_func_summary
234 {
235 public:
236 /* initialize the object. */
237
238 isra_func_summary ()
239 : m_parameters (NULL), m_candidate (false), m_returns_value (false),
240 m_return_ignored (false), m_queued (false)
241 {}
242
243 /* Destroy m_parameters. */
244
245 ~isra_func_summary ();
246
247 /* Mark the function as not a candidate for any IPA-SRA transformation.
248 Return true if it was a candidate until now. */
249
250 bool zap ();
251
252 /* Vector of parameter descriptors corresponding to the function being
253 analyzed. */
254 vec<isra_param_desc, va_gc> *m_parameters;
255
256 /* Whether the node is even a candidate for any IPA-SRA transformation at
257 all. */
258 unsigned m_candidate : 1;
259
260 /* Whether the original function returns any value. */
261 unsigned m_returns_value : 1;
262
263 /* Set to true if all call statements do not actually use the returned
264 value. */
265
266 unsigned m_return_ignored : 1;
267
268 /* Whether the node is already queued in IPA SRA stack during processing of
269 call graphs SCCs. */
270
271 unsigned m_queued : 1;
272 };
273
274 /* Clean up and deallocate isra_func_summary points to. TODO: Since this data
275 structure is not in GC memory, this is not necessary and we can consider
276 removing the destructor. */
277
278 isra_func_summary::~isra_func_summary ()
279 {
280 unsigned len = vec_safe_length (m_parameters);
281 for (unsigned i = 0; i < len; ++i)
282 free_param_decl_accesses (&(*m_parameters)[i]);
283 vec_free (m_parameters);
284 }
285
286
287 /* Mark the function as not a candidate for any IPA-SRA transformation. Return
288 true if it was a candidate until now. */
289
290 bool
291 isra_func_summary::zap ()
292 {
293 bool ret = m_candidate;
294 m_candidate = false;
295
296 unsigned len = vec_safe_length (m_parameters);
297 for (unsigned i = 0; i < len; ++i)
298 free_param_decl_accesses (&(*m_parameters)[i]);
299 vec_free (m_parameters);
300
301 return ret;
302 }
303
304 /* Structure to describe which formal parameters feed into a particular actual
305 arguments. */
306
307 struct isra_param_flow
308 {
309 /* Number of elements in array inputs that contain valid data. */
310 char length;
311 /* Indices of formal parameters that feed into the described actual argument.
312 If aggregate_pass_through or pointer_pass_through below are true, it must
313 contain exactly one element which is passed through from a formal
314 parameter if the given number. Otherwise, the array contains indices of
315 callee's formal parameters which are used to calculate value of this
316 actual argument. */
317 unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN];
318
319 /* Offset within the formal parameter. */
320 unsigned unit_offset;
321 /* Size of the portion of the formal parameter that is being passed. */
322 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
323
324 /* True when the value of this actual copy is a portion of a formal
325 parameter. */
326 unsigned aggregate_pass_through : 1;
327 /* True when the value of this actual copy is a verbatim pass through of an
328 obtained pointer. */
329 unsigned pointer_pass_through : 1;
330 /* True when it is safe to copy access candidates here from the callee, which
331 would mean introducing dereferences into callers of the caller. */
332 unsigned safe_to_import_accesses : 1;
333 };
334
335 /* Structure used to convey information about calls from the intra-procedural
336 analysis stage to inter-procedural one. */
337
338 class isra_call_summary
339 {
340 public:
341 isra_call_summary ()
342 : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
343 m_bit_aligned_arg (false)
344 {}
345
346 void init_inputs (unsigned arg_count);
347 void dump (FILE *f);
348
349 /* Information about what formal parameters of the caller are used to compute
350 individual actual arguments of this call. */
351 auto_vec <isra_param_flow> m_arg_flow;
352
353 /* Set to true if the call statement does not have a LHS. */
354 unsigned m_return_ignored : 1;
355
356 /* Set to true if the LHS of call statement is only used to construct the
357 return value of the caller. */
358 unsigned m_return_returned : 1;
359
360 /* Set when any of the call arguments are not byte-aligned. */
361 unsigned m_bit_aligned_arg : 1;
362 };
363
364 /* Class to manage function summaries. */
365
366 class GTY((user)) ipa_sra_function_summaries
367 : public function_summary <isra_func_summary *>
368 {
369 public:
370 ipa_sra_function_summaries (symbol_table *table, bool ggc):
371 function_summary<isra_func_summary *> (table, ggc) { }
372
373 virtual void duplicate (cgraph_node *, cgraph_node *,
374 isra_func_summary *old_sum,
375 isra_func_summary *new_sum);
376 };
377
378 /* Hook that is called by summary when a node is duplicated. */
379
380 void
381 ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *,
382 isra_func_summary *old_sum,
383 isra_func_summary *new_sum)
384 {
385 /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
386 useless. */
387 new_sum->m_candidate = old_sum->m_candidate;
388 new_sum->m_returns_value = old_sum->m_returns_value;
389 new_sum->m_return_ignored = old_sum->m_return_ignored;
390 gcc_assert (!old_sum->m_queued);
391 new_sum->m_queued = false;
392
393 unsigned param_count = vec_safe_length (old_sum->m_parameters);
394 if (!param_count)
395 return;
396 vec_safe_reserve_exact (new_sum->m_parameters, param_count);
397 new_sum->m_parameters->quick_grow_cleared (param_count);
398 for (unsigned i = 0; i < param_count; i++)
399 {
400 isra_param_desc *s = &(*old_sum->m_parameters)[i];
401 isra_param_desc *d = &(*new_sum->m_parameters)[i];
402
403 d->param_size_limit = s->param_size_limit;
404 d->size_reached = s->size_reached;
405 d->locally_unused = s->locally_unused;
406 d->split_candidate = s->split_candidate;
407 d->by_ref = s->by_ref;
408
409 unsigned acc_count = vec_safe_length (s->accesses);
410 vec_safe_reserve_exact (d->accesses, acc_count);
411 for (unsigned j = 0; j < acc_count; j++)
412 {
413 param_access *from = (*s->accesses)[j];
414 param_access *to = ggc_cleared_alloc<param_access> ();
415 to->type = from->type;
416 to->alias_ptr_type = from->alias_ptr_type;
417 to->unit_offset = from->unit_offset;
418 to->unit_size = from->unit_size;
419 to->certain = from->certain;
420 d->accesses->quick_push (to);
421 }
422 }
423 }
424
425 /* Pointer to the pass function summary holder. */
426
427 static GTY(()) ipa_sra_function_summaries *func_sums;
428
429 /* Class to manage call summaries. */
430
431 class ipa_sra_call_summaries: public call_summary <isra_call_summary *>
432 {
433 public:
434 ipa_sra_call_summaries (symbol_table *table):
435 call_summary<isra_call_summary *> (table) { }
436
437 /* Duplicate info when an edge is cloned. */
438 virtual void duplicate (cgraph_edge *, cgraph_edge *,
439 isra_call_summary *old_sum,
440 isra_call_summary *new_sum);
441 };
442
443 static ipa_sra_call_summaries *call_sums;
444
445
446 /* Initialize m_arg_flow of a particular instance of isra_call_summary.
447 ARG_COUNT is the number of actual arguments passed. */
448
449 void
450 isra_call_summary::init_inputs (unsigned arg_count)
451 {
452 if (arg_count == 0)
453 {
454 gcc_checking_assert (m_arg_flow.length () == 0);
455 return;
456 }
457 if (m_arg_flow.length () == 0)
458 {
459 m_arg_flow.reserve_exact (arg_count);
460 m_arg_flow.quick_grow_cleared (arg_count);
461 }
462 else
463 gcc_checking_assert (arg_count == m_arg_flow.length ());
464 }
465
466 /* Dump all information in call summary to F. */
467
468 void
469 isra_call_summary::dump (FILE *f)
470 {
471 if (m_return_ignored)
472 fprintf (f, " return value ignored\n");
473 if (m_return_returned)
474 fprintf (f, " return value used only to compute caller return value\n");
475 for (unsigned i = 0; i < m_arg_flow.length (); i++)
476 {
477 fprintf (f, " Parameter %u:\n", i);
478 isra_param_flow *ipf = &m_arg_flow[i];
479
480 if (ipf->length)
481 {
482 bool first = true;
483 fprintf (f, " Scalar param sources: ");
484 for (int j = 0; j < ipf->length; j++)
485 {
486 if (!first)
487 fprintf (f, ", ");
488 else
489 first = false;
490 fprintf (f, "%i", (int) ipf->inputs[j]);
491 }
492 fprintf (f, "\n");
493 }
494 if (ipf->aggregate_pass_through)
495 fprintf (f, " Aggregate pass through from the param given above, "
496 "unit offset: %u , unit size: %u\n",
497 ipf->unit_offset, ipf->unit_size);
498 if (ipf->pointer_pass_through)
499 fprintf (f, " Pointer pass through from the param given above, "
500 "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses);
501 }
502 }
503
504 /* Duplicate edge summary when an edge is cloned. */
505
506 void
507 ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
508 isra_call_summary *old_sum,
509 isra_call_summary *new_sum)
510 {
511 unsigned arg_count = old_sum->m_arg_flow.length ();
512 new_sum->init_inputs (arg_count);
513 for (unsigned i = 0; i < arg_count; i++)
514 new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i];
515
516 new_sum->m_return_ignored = old_sum->m_return_ignored;
517 new_sum->m_return_returned = old_sum->m_return_returned;
518 new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
519 }
520
521
522 /* With all GTY stuff done, we can move to anonymous namespace. */
523 namespace {
524 /* Quick mapping from a decl to its param descriptor. */
525
526 hash_map<tree, gensum_param_desc *> *decl2desc;
527
528 /* Countdown of allowed Alias analysis steps during summary building. */
529
530 int aa_walking_limit;
531
532 /* This is a table in which for each basic block and parameter there is a
533 distance (offset + size) in that parameter which is dereferenced and
534 accessed in that BB. */
535 HOST_WIDE_INT *bb_dereferences = NULL;
536 /* How many by-reference parameters there are in the current function. */
537 int by_ref_count;
538
539 /* Bitmap of BBs that can cause the function to "stop" progressing by
540 returning, throwing externally, looping infinitely or calling a function
541 which might abort etc.. */
542 bitmap final_bbs;
543
544 /* Obstack to allocate various small structures required only when generating
545 summary for a function. */
546 struct obstack gensum_obstack;
547
548 /* Return false the function is apparently unsuitable for IPA-SRA based on it's
549 attributes, return true otherwise. NODE is the cgraph node of the current
550 function. */
551
552 static bool
553 ipa_sra_preliminary_function_checks (cgraph_node *node)
554 {
555 if (!node->can_change_signature)
556 {
557 if (dump_file)
558 fprintf (dump_file, "Function cannot change signature.\n");
559 return false;
560 }
561
562 if (!tree_versionable_function_p (node->decl))
563 {
564 if (dump_file)
565 fprintf (dump_file, "Function is not versionable.\n");
566 return false;
567 }
568
569 if (!opt_for_fn (node->decl, optimize)
570 || !opt_for_fn (node->decl, flag_ipa_sra))
571 {
572 if (dump_file)
573 fprintf (dump_file, "Not optimizing or IPA-SRA turned off for this "
574 "function.\n");
575 return false;
576 }
577
578 if (DECL_VIRTUAL_P (node->decl))
579 {
580 if (dump_file)
581 fprintf (dump_file, "Function is a virtual method.\n");
582 return false;
583 }
584
585 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
586 if (fun->stdarg)
587 {
588 if (dump_file)
589 fprintf (dump_file, "Function uses stdarg. \n");
590 return false;
591 }
592
593 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
594 {
595 if (dump_file)
596 fprintf (dump_file, "Function type has attributes. \n");
597 return false;
598 }
599
600 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
601 {
602 if (dump_file)
603 fprintf (dump_file, "Always inline function will be inlined "
604 "anyway. \n");
605 return false;
606 }
607
608 return true;
609 }
610
611 /* Print access tree starting at ACCESS to F. */
612
613 static void
614 dump_gensum_access (FILE *f, gensum_param_access *access, unsigned indent)
615 {
616 fprintf (f, " ");
617 for (unsigned i = 0; i < indent; i++)
618 fprintf (f, " ");
619 fprintf (f, " * Access to offset: " HOST_WIDE_INT_PRINT_DEC,
620 access->offset);
621 fprintf (f, ", size: " HOST_WIDE_INT_PRINT_DEC, access->size);
622 fprintf (f, ", type: ");
623 print_generic_expr (f, access->type);
624 fprintf (f, ", alias_ptr_type: ");
625 print_generic_expr (f, access->alias_ptr_type);
626 fprintf (f, ", nonarg: %u, reverse: %u\n", access->nonarg, access->reverse);
627 for (gensum_param_access *ch = access->first_child;
628 ch;
629 ch = ch->next_sibling)
630 dump_gensum_access (f, ch, indent + 2);
631 }
632
633
634 /* Print access tree starting at ACCESS to F. */
635
636 static void
637 dump_isra_access (FILE *f, param_access *access)
638 {
639 fprintf (f, " * Access to unit offset: %u", access->unit_offset);
640 fprintf (f, ", unit size: %u", access->unit_size);
641 fprintf (f, ", type: ");
642 print_generic_expr (f, access->type);
643 fprintf (f, ", alias_ptr_type: ");
644 print_generic_expr (f, access->alias_ptr_type);
645 if (access->certain)
646 fprintf (f, ", certain");
647 else
648 fprintf (f, ", not-certain");
649 if (access->reverse)
650 fprintf (f, ", reverse");
651 fprintf (f, "\n");
652 }
653
654 /* Dump access tree starting at ACCESS to stderr. */
655
656 DEBUG_FUNCTION void
657 debug_isra_access (param_access *access)
658 {
659 dump_isra_access (stderr, access);
660 }
661
662 /* Dump DESC to F. */
663
664 static void
665 dump_gensum_param_descriptor (FILE *f, gensum_param_desc *desc)
666 {
667 if (desc->locally_unused)
668 fprintf (f, " unused with %i call_uses\n", desc->call_uses);
669 if (!desc->split_candidate)
670 {
671 fprintf (f, " not a candidate\n");
672 return;
673 }
674 if (desc->by_ref)
675 fprintf (f, " by_ref with %u pass throughs\n", desc->ptr_pt_count);
676
677 for (gensum_param_access *acc = desc->accesses; acc; acc = acc->next_sibling)
678 dump_gensum_access (f, acc, 2);
679 }
680
681 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
682 F. */
683
684 static void
685 dump_gensum_param_descriptors (FILE *f, tree fndecl,
686 vec<gensum_param_desc> *param_descriptions)
687 {
688 tree parm = DECL_ARGUMENTS (fndecl);
689 for (unsigned i = 0;
690 i < param_descriptions->length ();
691 ++i, parm = DECL_CHAIN (parm))
692 {
693 fprintf (f, " Descriptor for parameter %i ", i);
694 print_generic_expr (f, parm, TDF_UID);
695 fprintf (f, "\n");
696 dump_gensum_param_descriptor (f, &(*param_descriptions)[i]);
697 }
698 }
699
700
701 /* Dump DESC to F. */
702
703 static void
704 dump_isra_param_descriptor (FILE *f, isra_param_desc *desc)
705 {
706 if (desc->locally_unused)
707 {
708 fprintf (f, " (locally) unused\n");
709 }
710 if (!desc->split_candidate)
711 {
712 fprintf (f, " not a candidate for splitting\n");
713 return;
714 }
715 fprintf (f, " param_size_limit: %u, size_reached: %u%s\n",
716 desc->param_size_limit, desc->size_reached,
717 desc->by_ref ? ", by_ref" : "");
718
719 for (unsigned i = 0; i < vec_safe_length (desc->accesses); ++i)
720 {
721 param_access *access = (*desc->accesses)[i];
722 dump_isra_access (f, access);
723 }
724 }
725
726 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
727 F. */
728
729 static void
730 dump_isra_param_descriptors (FILE *f, tree fndecl,
731 isra_func_summary *ifs)
732 {
733 tree parm = DECL_ARGUMENTS (fndecl);
734 if (!ifs->m_parameters)
735 {
736 fprintf (f, " parameter descriptors not available\n");
737 return;
738 }
739
740 for (unsigned i = 0;
741 i < ifs->m_parameters->length ();
742 ++i, parm = DECL_CHAIN (parm))
743 {
744 fprintf (f, " Descriptor for parameter %i ", i);
745 print_generic_expr (f, parm, TDF_UID);
746 fprintf (f, "\n");
747 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
748 }
749 }
750
751 /* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the
752 function fails return false, otherwise return true. SRC must fit into an
753 unsigned char. Used for purposes of transitive unused parameter
754 removal. */
755
756 static bool
757 add_src_to_param_flow (isra_param_flow *param_flow, int src)
758 {
759 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
760 if (param_flow->length == IPA_SRA_MAX_PARAM_FLOW_LEN)
761 return false;
762
763 param_flow->inputs[(int) param_flow->length] = src;
764 param_flow->length++;
765 return true;
766 }
767
768 /* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
769 it is the only input. Used for purposes of transitive parameter
770 splitting. */
771
772 static void
773 set_single_param_flow_source (isra_param_flow *param_flow, int src)
774 {
775 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
776 if (param_flow->length == 0)
777 {
778 param_flow->inputs[0] = src;
779 param_flow->length = 1;
780 }
781 else if (param_flow->length == 1)
782 gcc_assert (param_flow->inputs[0] == src);
783 else
784 gcc_unreachable ();
785 }
786
787 /* Assert that there is only a single value in PARAM_FLOW's inputs and return
788 it. */
789
790 static unsigned
791 get_single_param_flow_source (const isra_param_flow *param_flow)
792 {
793 gcc_assert (param_flow->length == 1);
794 return param_flow->inputs[0];
795 }
796
797 /* Inspect all uses of NAME and simple arithmetic calculations involving NAME
798 in FUN represented with NODE and return a negative number if any of them is
799 used for something else than either an actual call argument, simple
800 arithmetic operation or debug statement. If there are no such uses, return
801 the number of actual arguments that this parameter eventually feeds to (or
802 zero if there is none). For any such parameter, mark PARM_NUM as one of its
803 sources. ANALYZED is a bitmap that tracks which SSA names we have already
804 started investigating. */
805
806 static int
807 isra_track_scalar_value_uses (function *fun, cgraph_node *node, tree name,
808 int parm_num, bitmap analyzed)
809 {
810 int res = 0;
811 imm_use_iterator imm_iter;
812 gimple *stmt;
813
814 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
815 {
816 if (is_gimple_debug (stmt))
817 continue;
818
819 /* TODO: We could handle at least const builtin functions like arithmetic
820 operations below. */
821 if (is_gimple_call (stmt))
822 {
823 int all_uses = 0;
824 use_operand_p use_p;
825 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
826 all_uses++;
827
828 gcall *call = as_a <gcall *> (stmt);
829 unsigned arg_count;
830 if (gimple_call_internal_p (call)
831 || (arg_count = gimple_call_num_args (call)) == 0)
832 {
833 res = -1;
834 BREAK_FROM_IMM_USE_STMT (imm_iter);
835 }
836
837 cgraph_edge *cs = node->get_edge (stmt);
838 gcc_checking_assert (cs);
839 isra_call_summary *csum = call_sums->get_create (cs);
840 csum->init_inputs (arg_count);
841
842 int simple_uses = 0;
843 for (unsigned i = 0; i < arg_count; i++)
844 if (gimple_call_arg (call, i) == name)
845 {
846 if (!add_src_to_param_flow (&csum->m_arg_flow[i], parm_num))
847 {
848 simple_uses = -1;
849 break;
850 }
851 simple_uses++;
852 }
853
854 if (simple_uses < 0
855 || all_uses != simple_uses)
856 {
857 res = -1;
858 BREAK_FROM_IMM_USE_STMT (imm_iter);
859 }
860 res += all_uses;
861 }
862 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
863 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
864 || gimple_code (stmt) == GIMPLE_PHI))
865 {
866 tree lhs;
867 if (gimple_code (stmt) == GIMPLE_PHI)
868 lhs = gimple_phi_result (stmt);
869 else
870 lhs = gimple_assign_lhs (stmt);
871
872 if (TREE_CODE (lhs) != SSA_NAME)
873 {
874 res = -1;
875 BREAK_FROM_IMM_USE_STMT (imm_iter);
876 }
877 gcc_assert (!gimple_vdef (stmt));
878 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs)))
879 {
880 int tmp = isra_track_scalar_value_uses (fun, node, lhs, parm_num,
881 analyzed);
882 if (tmp < 0)
883 {
884 res = tmp;
885 BREAK_FROM_IMM_USE_STMT (imm_iter);
886 }
887 res += tmp;
888 }
889 }
890 else
891 {
892 res = -1;
893 BREAK_FROM_IMM_USE_STMT (imm_iter);
894 }
895 }
896 return res;
897 }
898
899 /* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
900 also described by NODE) and simple arithmetic calculations involving PARM
901 and return false if any of them is used for something else than either an
902 actual call argument, simple arithmetic operation or debug statement. If
903 there are no such uses, return true and store the number of actual arguments
904 that this parameter eventually feeds to (or zero if there is none) to
905 *CALL_USES_P. For any such parameter, mark PARM_NUM as one of its
906 sources.
907
908 This function is similar to ptr_parm_has_nonarg_uses but its results are
909 meant for unused parameter removal, as opposed to splitting of parameters
910 passed by reference or converting them to passed by value.
911 */
912
913 static bool
914 isra_track_scalar_param_local_uses (function *fun, cgraph_node *node, tree parm,
915 int parm_num, int *call_uses_p)
916 {
917 gcc_checking_assert (is_gimple_reg (parm));
918
919 tree name = ssa_default_def (fun, parm);
920 if (!name || has_zero_uses (name))
921 {
922 *call_uses_p = 0;
923 return false;
924 }
925
926 /* Edge summaries can only handle callers with fewer than 256 parameters. */
927 if (parm_num > UCHAR_MAX)
928 return true;
929
930 bitmap analyzed = BITMAP_ALLOC (NULL);
931 int call_uses = isra_track_scalar_value_uses (fun, node, name, parm_num,
932 analyzed);
933 BITMAP_FREE (analyzed);
934 if (call_uses < 0)
935 return true;
936 *call_uses_p = call_uses;
937 return false;
938 }
939
940 /* Scan immediate uses of a default definition SSA name of a parameter PARM and
941 examine whether there are any nonarg uses that are not actual arguments or
942 otherwise infeasible uses. If so, return true, otherwise return false.
943 Create pass-through IPA flow records for any direct uses as argument calls
944 and if returning false, store their number into *PT_COUNT_P. NODE and FUN
945 must represent the function that is currently analyzed, PARM_NUM must be the
946 index of the analyzed parameter.
947
948 This function is similar to isra_track_scalar_param_local_uses but its
949 results are meant for splitting of parameters passed by reference or turning
950 them into bits passed by value, as opposed to generic unused parameter
951 removal.
952 */
953
954 static bool
955 ptr_parm_has_nonarg_uses (cgraph_node *node, function *fun, tree parm,
956 int parm_num, unsigned *pt_count_p)
957 {
958 imm_use_iterator ui;
959 gimple *stmt;
960 tree name = ssa_default_def (fun, parm);
961 bool ret = false;
962 unsigned pt_count = 0;
963
964 if (!name || has_zero_uses (name))
965 return false;
966
967 /* Edge summaries can only handle callers with fewer than 256 parameters. */
968 if (parm_num > UCHAR_MAX)
969 return true;
970
971 FOR_EACH_IMM_USE_STMT (stmt, ui, name)
972 {
973 unsigned uses_ok = 0;
974 use_operand_p use_p;
975
976 if (is_gimple_debug (stmt))
977 continue;
978
979 if (gimple_assign_single_p (stmt))
980 {
981 tree rhs = gimple_assign_rhs1 (stmt);
982 while (handled_component_p (rhs))
983 rhs = TREE_OPERAND (rhs, 0);
984 if (TREE_CODE (rhs) == MEM_REF
985 && TREE_OPERAND (rhs, 0) == name
986 && integer_zerop (TREE_OPERAND (rhs, 1))
987 && types_compatible_p (TREE_TYPE (rhs),
988 TREE_TYPE (TREE_TYPE (name)))
989 && !TREE_THIS_VOLATILE (rhs))
990 uses_ok++;
991 }
992 else if (is_gimple_call (stmt))
993 {
994 gcall *call = as_a <gcall *> (stmt);
995 unsigned arg_count;
996 if (gimple_call_internal_p (call)
997 || (arg_count = gimple_call_num_args (call)) == 0)
998 {
999 ret = true;
1000 BREAK_FROM_IMM_USE_STMT (ui);
1001 }
1002
1003 cgraph_edge *cs = node->get_edge (stmt);
1004 gcc_checking_assert (cs);
1005 isra_call_summary *csum = call_sums->get_create (cs);
1006 csum->init_inputs (arg_count);
1007
1008 for (unsigned i = 0; i < arg_count; ++i)
1009 {
1010 tree arg = gimple_call_arg (stmt, i);
1011
1012 if (arg == name)
1013 {
1014 /* TODO: Allow &MEM_REF[name + offset] here,
1015 ipa_param_body_adjustments::modify_call_stmt has to be
1016 adjusted too. */
1017 csum->m_arg_flow[i].pointer_pass_through = true;
1018 set_single_param_flow_source (&csum->m_arg_flow[i], parm_num);
1019 pt_count++;
1020 uses_ok++;
1021 continue;
1022 }
1023
1024 while (handled_component_p (arg))
1025 arg = TREE_OPERAND (arg, 0);
1026 if (TREE_CODE (arg) == MEM_REF
1027 && TREE_OPERAND (arg, 0) == name
1028 && integer_zerop (TREE_OPERAND (arg, 1))
1029 && types_compatible_p (TREE_TYPE (arg),
1030 TREE_TYPE (TREE_TYPE (name)))
1031 && !TREE_THIS_VOLATILE (arg))
1032 uses_ok++;
1033 }
1034 }
1035
1036 /* If the number of valid uses does not match the number of
1037 uses in this stmt there is an unhandled use. */
1038 unsigned all_uses = 0;
1039 FOR_EACH_IMM_USE_ON_STMT (use_p, ui)
1040 all_uses++;
1041
1042 gcc_checking_assert (uses_ok <= all_uses);
1043 if (uses_ok != all_uses)
1044 {
1045 ret = true;
1046 BREAK_FROM_IMM_USE_STMT (ui);
1047 }
1048 }
1049
1050 *pt_count_p = pt_count;
1051 return ret;
1052 }
1053
1054 /* Initialize vector of parameter descriptors of NODE. Return true if there
1055 are any candidates for splitting or unused aggregate parameter removal (the
1056 function may return false if there are candidates for removal of register
1057 parameters) and function body must be scanned. */
1058
1059 static bool
1060 create_parameter_descriptors (cgraph_node *node,
1061 vec<gensum_param_desc> *param_descriptions)
1062 {
1063 function *fun = DECL_STRUCT_FUNCTION (node->decl);
1064 bool ret = false;
1065
1066 int num = 0;
1067 for (tree parm = DECL_ARGUMENTS (node->decl);
1068 parm;
1069 parm = DECL_CHAIN (parm), num++)
1070 {
1071 const char *msg;
1072 gensum_param_desc *desc = &(*param_descriptions)[num];
1073 /* param_descriptions vector is grown cleared in the caller. */
1074 desc->param_number = num;
1075 decl2desc->put (parm, desc);
1076
1077 if (dump_file && (dump_flags & TDF_DETAILS))
1078 print_generic_expr (dump_file, parm, TDF_UID);
1079
1080 int scalar_call_uses;
1081 tree type = TREE_TYPE (parm);
1082 if (TREE_THIS_VOLATILE (parm))
1083 {
1084 if (dump_file && (dump_flags & TDF_DETAILS))
1085 fprintf (dump_file, " not a candidate, is volatile\n");
1086 continue;
1087 }
1088 if (!is_gimple_reg_type (type) && is_va_list_type (type))
1089 {
1090 if (dump_file && (dump_flags & TDF_DETAILS))
1091 fprintf (dump_file, " not a candidate, is a va_list type\n");
1092 continue;
1093 }
1094 if (TREE_ADDRESSABLE (parm))
1095 {
1096 if (dump_file && (dump_flags & TDF_DETAILS))
1097 fprintf (dump_file, " not a candidate, is addressable\n");
1098 continue;
1099 }
1100 if (TREE_ADDRESSABLE (type))
1101 {
1102 if (dump_file && (dump_flags & TDF_DETAILS))
1103 fprintf (dump_file, " not a candidate, type cannot be split\n");
1104 continue;
1105 }
1106
1107 if (is_gimple_reg (parm)
1108 && !isra_track_scalar_param_local_uses (fun, node, parm, num,
1109 &scalar_call_uses))
1110 {
1111 if (dump_file && (dump_flags & TDF_DETAILS))
1112 fprintf (dump_file, " is a scalar with only %i call uses\n",
1113 scalar_call_uses);
1114
1115 desc->locally_unused = true;
1116 desc->call_uses = scalar_call_uses;
1117 }
1118
1119 if (POINTER_TYPE_P (type))
1120 {
1121 type = TREE_TYPE (type);
1122
1123 if (TREE_CODE (type) == FUNCTION_TYPE
1124 || TREE_CODE (type) == METHOD_TYPE)
1125 {
1126 if (dump_file && (dump_flags & TDF_DETAILS))
1127 fprintf (dump_file, " not a candidate, reference to "
1128 "a function\n");
1129 continue;
1130 }
1131 if (TYPE_VOLATILE (type))
1132 {
1133 if (dump_file && (dump_flags & TDF_DETAILS))
1134 fprintf (dump_file, " not a candidate, reference to "
1135 "a volatile type\n");
1136 continue;
1137 }
1138 if (TREE_CODE (type) == ARRAY_TYPE
1139 && TYPE_NONALIASED_COMPONENT (type))
1140 {
1141 if (dump_file && (dump_flags & TDF_DETAILS))
1142 fprintf (dump_file, " not a candidate, reference to "
1143 "a nonaliased component array\n");
1144 continue;
1145 }
1146 if (!is_gimple_reg (parm))
1147 {
1148 if (dump_file && (dump_flags & TDF_DETAILS))
1149 fprintf (dump_file, " not a candidate, a reference which is "
1150 "not a gimple register (probably addressable)\n");
1151 continue;
1152 }
1153 if (is_va_list_type (type))
1154 {
1155 if (dump_file && (dump_flags & TDF_DETAILS))
1156 fprintf (dump_file, " not a candidate, reference to "
1157 "a va list\n");
1158 continue;
1159 }
1160 if (ptr_parm_has_nonarg_uses (node, fun, parm, num,
1161 &desc->ptr_pt_count))
1162 {
1163 if (dump_file && (dump_flags & TDF_DETAILS))
1164 fprintf (dump_file, " not a candidate, reference has "
1165 "nonarg uses\n");
1166 continue;
1167 }
1168 desc->by_ref = true;
1169 }
1170 else if (!AGGREGATE_TYPE_P (type))
1171 {
1172 /* This is in an else branch because scalars passed by reference are
1173 still candidates to be passed by value. */
1174 if (dump_file && (dump_flags & TDF_DETAILS))
1175 fprintf (dump_file, " not a candidate, not an aggregate\n");
1176 continue;
1177 }
1178
1179 if (!COMPLETE_TYPE_P (type))
1180 {
1181 if (dump_file && (dump_flags & TDF_DETAILS))
1182 fprintf (dump_file, " not a candidate, not a complete type\n");
1183 continue;
1184 }
1185 if (!tree_fits_uhwi_p (TYPE_SIZE (type)))
1186 {
1187 if (dump_file && (dump_flags & TDF_DETAILS))
1188 fprintf (dump_file, " not a candidate, size not representable\n");
1189 continue;
1190 }
1191 unsigned HOST_WIDE_INT type_size
1192 = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
1193 if (type_size == 0
1194 || type_size >= ISRA_ARG_SIZE_LIMIT)
1195 {
1196 if (dump_file && (dump_flags & TDF_DETAILS))
1197 fprintf (dump_file, " not a candidate, has zero or huge size\n");
1198 continue;
1199 }
1200 if (type_internals_preclude_sra_p (type, &msg))
1201 {
1202 if (dump_file && (dump_flags & TDF_DETAILS))
1203 fprintf (dump_file, " not a candidate, %s\n", msg);
1204 continue;
1205 }
1206
1207 if (dump_file && (dump_flags & TDF_DETAILS))
1208 fprintf (dump_file, " is a candidate\n");
1209
1210 ret = true;
1211 desc->split_candidate = true;
1212 if (desc->by_ref)
1213 desc->deref_index = by_ref_count++;
1214 }
1215 return ret;
1216 }
1217
1218 /* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1219 found, which happens if DECL is for a static chain. */
1220
1221 static gensum_param_desc *
1222 get_gensum_param_desc (tree decl)
1223 {
1224 gcc_checking_assert (TREE_CODE (decl) == PARM_DECL);
1225 gensum_param_desc **slot = decl2desc->get (decl);
1226 if (!slot)
1227 /* This can happen for static chains which we cannot handle so far. */
1228 return NULL;
1229 gcc_checking_assert (*slot);
1230 return *slot;
1231 }
1232
1233
1234 /* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1235 write REASON to the dump file if there is one. */
1236
1237 static void
1238 disqualify_split_candidate (gensum_param_desc *desc, const char *reason)
1239 {
1240 if (!desc->split_candidate)
1241 return;
1242
1243 if (dump_file && (dump_flags & TDF_DETAILS))
1244 fprintf (dump_file, "! Disqualifying parameter number %i - %s\n",
1245 desc->param_number, reason);
1246
1247 desc->split_candidate = false;
1248 }
1249
1250 /* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1251 there is one. */
1252
1253 static void
1254 disqualify_split_candidate (tree decl, const char *reason)
1255 {
1256 gensum_param_desc *desc = get_gensum_param_desc (decl);
1257 if (desc)
1258 disqualify_split_candidate (desc, reason);
1259 }
1260
1261 /* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But
1262 first, check that there are not too many of them already. If so, do not
1263 allocate anything and return NULL. */
1264
1265 static gensum_param_access *
1266 allocate_access (gensum_param_desc *desc,
1267 HOST_WIDE_INT offset, HOST_WIDE_INT size)
1268 {
1269 if (desc->access_count
1270 == (unsigned) param_ipa_sra_max_replacements)
1271 {
1272 disqualify_split_candidate (desc, "Too many replacement candidates");
1273 return NULL;
1274 }
1275
1276 gensum_param_access *access
1277 = (gensum_param_access *) obstack_alloc (&gensum_obstack,
1278 sizeof (gensum_param_access));
1279 memset (access, 0, sizeof (*access));
1280 access->offset = offset;
1281 access->size = size;
1282 return access;
1283 }
1284
1285 /* In what context scan_expr_access has been called, whether it deals with a
1286 load, a function argument, or a store. Please note that in rare
1287 circumstances when it is not clear if the access is a load or store,
1288 ISRA_CTX_STORE is used too. */
1289
1290 enum isra_scan_context {ISRA_CTX_LOAD, ISRA_CTX_ARG, ISRA_CTX_STORE};
1291
1292 /* Return an access describing memory access to the variable described by DESC
1293 at OFFSET with SIZE in context CTX, starting at pointer to the linked list
1294 at a certain tree level FIRST. Attempt to create it and put into the
1295 appropriate place in the access tree if does not exist, but fail and return
1296 NULL if there are already too many accesses, if it would create a partially
1297 overlapping access or if an access would end up within a pre-existing
1298 non-call access. */
1299
1300 static gensum_param_access *
1301 get_access_1 (gensum_param_desc *desc, gensum_param_access **first,
1302 HOST_WIDE_INT offset, HOST_WIDE_INT size, isra_scan_context ctx)
1303 {
1304 gensum_param_access *access = *first, **ptr = first;
1305
1306 if (!access)
1307 {
1308 /* No pre-existing access at this level, just create it. */
1309 gensum_param_access *a = allocate_access (desc, offset, size);
1310 if (!a)
1311 return NULL;
1312 *first = a;
1313 return *first;
1314 }
1315
1316 if (access->offset >= offset + size)
1317 {
1318 /* We want to squeeze it in front of the very first access, just do
1319 it. */
1320 gensum_param_access *r = allocate_access (desc, offset, size);
1321 if (!r)
1322 return NULL;
1323 r->next_sibling = access;
1324 *first = r;
1325 return r;
1326 }
1327
1328 /* Skip all accesses that have to come before us until the next sibling is
1329 already too far. */
1330 while (offset >= access->offset + access->size
1331 && access->next_sibling
1332 && access->next_sibling->offset < offset + size)
1333 {
1334 ptr = &access->next_sibling;
1335 access = access->next_sibling;
1336 }
1337
1338 /* At this point we know we do not belong before access. */
1339 gcc_assert (access->offset < offset + size);
1340
1341 if (access->offset == offset && access->size == size)
1342 /* We found what we were looking for. */
1343 return access;
1344
1345 if (access->offset <= offset
1346 && access->offset + access->size >= offset + size)
1347 {
1348 /* We fit into access which is larger than us. We need to find/create
1349 something below access. But we only allow nesting in call
1350 arguments. */
1351 if (access->nonarg)
1352 return NULL;
1353
1354 return get_access_1 (desc, &access->first_child, offset, size, ctx);
1355 }
1356
1357 if (offset <= access->offset
1358 && offset + size >= access->offset + access->size)
1359 /* We are actually bigger than access, which fully fits into us, take its
1360 place and make all accesses fitting into it its children. */
1361 {
1362 /* But first, we only allow nesting in call arguments so check if that is
1363 what we are trying to represent. */
1364 if (ctx != ISRA_CTX_ARG)
1365 return NULL;
1366
1367 gensum_param_access *r = allocate_access (desc, offset, size);
1368 if (!r)
1369 return NULL;
1370 r->first_child = access;
1371
1372 while (access->next_sibling
1373 && access->next_sibling->offset < offset + size)
1374 access = access->next_sibling;
1375 if (access->offset + access->size > offset + size)
1376 {
1377 /* This must be a different access, which are sorted, so the
1378 following must be true and this signals a partial overlap. */
1379 gcc_assert (access->offset > offset);
1380 return NULL;
1381 }
1382
1383 r->next_sibling = access->next_sibling;
1384 access->next_sibling = NULL;
1385 *ptr = r;
1386 return r;
1387 }
1388
1389 if (offset >= access->offset + access->size)
1390 {
1391 /* We belong after access. */
1392 gensum_param_access *r = allocate_access (desc, offset, size);
1393 if (!r)
1394 return NULL;
1395 r->next_sibling = access->next_sibling;
1396 access->next_sibling = r;
1397 return r;
1398 }
1399
1400 if (offset < access->offset)
1401 {
1402 /* We know the following, otherwise we would have created a
1403 super-access. */
1404 gcc_checking_assert (offset + size < access->offset + access->size);
1405 return NULL;
1406 }
1407
1408 if (offset + size > access->offset + access->size)
1409 {
1410 /* Likewise. */
1411 gcc_checking_assert (offset > access->offset);
1412 return NULL;
1413 }
1414
1415 gcc_unreachable ();
1416 }
1417
1418 /* Return an access describing memory access to the variable described by DESC
1419 at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt
1420 to create if it does not exist, but fail and return NULL if there are
1421 already too many accesses, if it would create a partially overlapping access
1422 or if an access would end up in a non-call access. */
1423
1424 static gensum_param_access *
1425 get_access (gensum_param_desc *desc, HOST_WIDE_INT offset, HOST_WIDE_INT size,
1426 isra_scan_context ctx)
1427 {
1428 gcc_checking_assert (desc->split_candidate);
1429
1430 gensum_param_access *access = get_access_1 (desc, &desc->accesses, offset,
1431 size, ctx);
1432 if (!access)
1433 {
1434 disqualify_split_candidate (desc,
1435 "Bad access overlap or too many accesses");
1436 return NULL;
1437 }
1438
1439 switch (ctx)
1440 {
1441 case ISRA_CTX_STORE:
1442 gcc_assert (!desc->by_ref);
1443 /* Fall-through */
1444 case ISRA_CTX_LOAD:
1445 access->nonarg = true;
1446 break;
1447 case ISRA_CTX_ARG:
1448 break;
1449 }
1450
1451 return access;
1452 }
1453
1454 /* Verify that parameter access tree starting with ACCESS is in good shape.
1455 PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
1456 ACCESS or zero if there is none. */
1457
1458 static bool
1459 verify_access_tree_1 (gensum_param_access *access, HOST_WIDE_INT parent_offset,
1460 HOST_WIDE_INT parent_size)
1461 {
1462 while (access)
1463 {
1464 gcc_assert (access->offset >= 0 && access->size > 0);
1465
1466 if (parent_size != 0)
1467 {
1468 if (access->offset < parent_offset)
1469 {
1470 error ("Access offset before parent offset");
1471 return true;
1472 }
1473 if (access->size >= parent_size)
1474 {
1475 error ("Access size greater or equal to its parent size");
1476 return true;
1477 }
1478 if (access->offset + access->size > parent_offset + parent_size)
1479 {
1480 error ("Access terminates outside of its parent");
1481 return true;
1482 }
1483 }
1484
1485 if (verify_access_tree_1 (access->first_child, access->offset,
1486 access->size))
1487 return true;
1488
1489 if (access->next_sibling
1490 && (access->next_sibling->offset < access->offset + access->size))
1491 {
1492 error ("Access overlaps with its sibling");
1493 return true;
1494 }
1495
1496 access = access->next_sibling;
1497 }
1498 return false;
1499 }
1500
1501 /* Verify that parameter access tree starting with ACCESS is in good shape,
1502 halt compilation and dump the tree to stderr if not. */
1503
1504 DEBUG_FUNCTION void
1505 isra_verify_access_tree (gensum_param_access *access)
1506 {
1507 if (verify_access_tree_1 (access, 0, 0))
1508 {
1509 for (; access; access = access->next_sibling)
1510 dump_gensum_access (stderr, access, 2);
1511 internal_error ("IPA-SRA access verification failed");
1512 }
1513 }
1514
1515
1516 /* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1517 GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
1518
1519 static bool
1520 asm_visit_addr (gimple *, tree op, tree, void *)
1521 {
1522 op = get_base_address (op);
1523 if (op
1524 && TREE_CODE (op) == PARM_DECL)
1525 disqualify_split_candidate (op, "Non-scalarizable GIMPLE_ASM operand.");
1526
1527 return false;
1528 }
1529
1530 /* Mark a dereference of parameter identified by DESC of distance DIST in a
1531 basic block BB, unless the BB has already been marked as a potentially
1532 final. */
1533
1534 static void
1535 mark_param_dereference (gensum_param_desc *desc, HOST_WIDE_INT dist,
1536 basic_block bb)
1537 {
1538 gcc_assert (desc->by_ref);
1539 gcc_checking_assert (desc->split_candidate);
1540
1541 if (bitmap_bit_p (final_bbs, bb->index))
1542 return;
1543
1544 int idx = bb->index * by_ref_count + desc->deref_index;
1545 if (bb_dereferences[idx] < dist)
1546 bb_dereferences[idx] = dist;
1547 }
1548
1549 /* Return true, if any potential replacements should use NEW_TYPE as opposed to
1550 previously recorded OLD_TYPE. */
1551
1552 static bool
1553 type_prevails_p (tree old_type, tree new_type)
1554 {
1555 if (old_type == new_type)
1556 return false;
1557
1558 /* Non-aggregates are always better. */
1559 if (!is_gimple_reg_type (old_type)
1560 && is_gimple_reg_type (new_type))
1561 return true;
1562 if (is_gimple_reg_type (old_type)
1563 && !is_gimple_reg_type (new_type))
1564 return false;
1565
1566 /* Prefer any complex or vector type over any other scalar type. */
1567 if (TREE_CODE (old_type) != COMPLEX_TYPE
1568 && TREE_CODE (old_type) != VECTOR_TYPE
1569 && (TREE_CODE (new_type) == COMPLEX_TYPE
1570 || TREE_CODE (new_type) == VECTOR_TYPE))
1571 return true;
1572 if ((TREE_CODE (old_type) == COMPLEX_TYPE
1573 || TREE_CODE (old_type) == VECTOR_TYPE)
1574 && TREE_CODE (new_type) != COMPLEX_TYPE
1575 && TREE_CODE (new_type) != VECTOR_TYPE)
1576 return false;
1577
1578 /* Use the integral type with the bigger precision. */
1579 if (INTEGRAL_TYPE_P (old_type)
1580 && INTEGRAL_TYPE_P (new_type))
1581 return (TYPE_PRECISION (new_type) > TYPE_PRECISION (old_type));
1582
1583 /* Attempt to disregard any integral type with non-full precision. */
1584 if (INTEGRAL_TYPE_P (old_type)
1585 && (TREE_INT_CST_LOW (TYPE_SIZE (old_type))
1586 != TYPE_PRECISION (old_type)))
1587 return true;
1588 if (INTEGRAL_TYPE_P (new_type)
1589 && (TREE_INT_CST_LOW (TYPE_SIZE (new_type))
1590 != TYPE_PRECISION (new_type)))
1591 return false;
1592 /* Stabilize the selection. */
1593 return TYPE_UID (old_type) < TYPE_UID (new_type);
1594 }
1595
1596 /* When scanning an expression which is a call argument, this structure
1597 specifies the call and the position of the argument. */
1598
1599 struct scan_call_info
1600 {
1601 /* Call graph edge representing the call. */
1602 cgraph_edge *cs;
1603 /* Total number of arguments in the call. */
1604 unsigned argument_count;
1605 /* Number of the actual argument being scanned. */
1606 unsigned arg_idx;
1607 };
1608
1609 /* Record use of ACCESS which belongs to a parameter described by DESC in a
1610 call argument described by CALL_INFO. */
1611
1612 static void
1613 record_nonregister_call_use (gensum_param_desc *desc,
1614 scan_call_info *call_info,
1615 unsigned unit_offset, unsigned unit_size)
1616 {
1617 isra_call_summary *csum = call_sums->get_create (call_info->cs);
1618 csum->init_inputs (call_info->argument_count);
1619
1620 isra_param_flow *param_flow = &csum->m_arg_flow[call_info->arg_idx];
1621 param_flow->aggregate_pass_through = true;
1622 set_single_param_flow_source (param_flow, desc->param_number);
1623 param_flow->unit_offset = unit_offset;
1624 param_flow->unit_size = unit_size;
1625 desc->call_uses++;
1626 }
1627
1628 /* Callback of walk_aliased_vdefs, just mark that there was a possible
1629 modification. */
1630
1631 static bool
1632 mark_maybe_modified (ao_ref *, tree, void *data)
1633 {
1634 bool *maybe_modified = (bool *) data;
1635 *maybe_modified = true;
1636 return true;
1637 }
1638
1639 /* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1640 specifies whether EXPR is used in a load, store or as an argument call. BB
1641 must be the basic block in which expr resides. If CTX specifies call
1642 argument context, CALL_INFO must describe that call and argument position,
1643 otherwise it is ignored. */
1644
1645 static void
1646 scan_expr_access (tree expr, gimple *stmt, isra_scan_context ctx,
1647 basic_block bb, scan_call_info *call_info = NULL)
1648 {
1649 poly_int64 poffset, psize, pmax_size;
1650 HOST_WIDE_INT offset, size, max_size;
1651 tree base;
1652 bool deref = false;
1653 bool reverse;
1654
1655 if (TREE_CODE (expr) == BIT_FIELD_REF
1656 || TREE_CODE (expr) == IMAGPART_EXPR
1657 || TREE_CODE (expr) == REALPART_EXPR)
1658 expr = TREE_OPERAND (expr, 0);
1659
1660 base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, &reverse);
1661
1662 if (TREE_CODE (base) == MEM_REF)
1663 {
1664 tree op = TREE_OPERAND (base, 0);
1665 if (TREE_CODE (op) != SSA_NAME
1666 || !SSA_NAME_IS_DEFAULT_DEF (op))
1667 return;
1668 base = SSA_NAME_VAR (op);
1669 if (!base)
1670 return;
1671 deref = true;
1672 }
1673 if (TREE_CODE (base) != PARM_DECL)
1674 return;
1675
1676 gensum_param_desc *desc = get_gensum_param_desc (base);
1677 if (!desc || !desc->split_candidate)
1678 return;
1679
1680 if (!poffset.is_constant (&offset)
1681 || !psize.is_constant (&size)
1682 || !pmax_size.is_constant (&max_size))
1683 {
1684 disqualify_split_candidate (desc, "Encountered a polynomial-sized "
1685 "access.");
1686 return;
1687 }
1688 if (size < 0 || size != max_size)
1689 {
1690 disqualify_split_candidate (desc, "Encountered a variable sized access.");
1691 return;
1692 }
1693 if (TREE_CODE (expr) == COMPONENT_REF
1694 && DECL_BIT_FIELD (TREE_OPERAND (expr, 1)))
1695 {
1696 disqualify_split_candidate (desc, "Encountered a bit-field access.");
1697 return;
1698 }
1699 if (offset < 0)
1700 {
1701 disqualify_split_candidate (desc, "Encountered an access at a "
1702 "negative offset.");
1703 return;
1704 }
1705 gcc_assert ((offset % BITS_PER_UNIT) == 0);
1706 gcc_assert ((size % BITS_PER_UNIT) == 0);
1707 if ((offset / BITS_PER_UNIT) >= (UINT_MAX - ISRA_ARG_SIZE_LIMIT)
1708 || (size / BITS_PER_UNIT) >= ISRA_ARG_SIZE_LIMIT)
1709 {
1710 disqualify_split_candidate (desc, "Encountered an access with too big "
1711 "offset or size");
1712 return;
1713 }
1714
1715 tree type = TREE_TYPE (expr);
1716 unsigned int exp_align = get_object_alignment (expr);
1717
1718 if (exp_align < TYPE_ALIGN (type))
1719 {
1720 disqualify_split_candidate (desc, "Underaligned access.");
1721 return;
1722 }
1723
1724 if (deref)
1725 {
1726 if (!desc->by_ref)
1727 {
1728 disqualify_split_candidate (desc, "Dereferencing a non-reference.");
1729 return;
1730 }
1731 else if (ctx == ISRA_CTX_STORE)
1732 {
1733 disqualify_split_candidate (desc, "Storing to data passed by "
1734 "reference.");
1735 return;
1736 }
1737
1738 if (!aa_walking_limit)
1739 {
1740 disqualify_split_candidate (desc, "Out of alias analysis step "
1741 "limit.");
1742 return;
1743 }
1744
1745 gcc_checking_assert (gimple_vuse (stmt));
1746 bool maybe_modified = false;
1747 ao_ref ar;
1748
1749 ao_ref_init (&ar, expr);
1750 bitmap visited = BITMAP_ALLOC (NULL);
1751 int walked = walk_aliased_vdefs (&ar, gimple_vuse (stmt),
1752 mark_maybe_modified, &maybe_modified,
1753 &visited, NULL, aa_walking_limit);
1754 BITMAP_FREE (visited);
1755 if (walked > 0)
1756 {
1757 gcc_assert (aa_walking_limit > walked);
1758 aa_walking_limit = aa_walking_limit - walked;
1759 }
1760 if (walked < 0)
1761 aa_walking_limit = 0;
1762 if (maybe_modified || walked < 0)
1763 {
1764 disqualify_split_candidate (desc, "Data passed by reference possibly "
1765 "modified through an alias.");
1766 return;
1767 }
1768 else
1769 mark_param_dereference (desc, offset + size, bb);
1770 }
1771 else
1772 /* Pointer parameters with direct uses should have been ruled out by
1773 analyzing SSA default def when looking at the parameters. */
1774 gcc_assert (!desc->by_ref);
1775
1776 gensum_param_access *access = get_access (desc, offset, size, ctx);
1777 if (!access)
1778 return;
1779
1780 if (ctx == ISRA_CTX_ARG)
1781 {
1782 gcc_checking_assert (call_info);
1783
1784 if (!deref)
1785 record_nonregister_call_use (desc, call_info, offset / BITS_PER_UNIT,
1786 size / BITS_PER_UNIT);
1787 else
1788 /* This is not a pass-through of a pointer, this is a use like any
1789 other. */
1790 access->nonarg = true;
1791 }
1792
1793 if (!access->type)
1794 {
1795 access->type = type;
1796 access->alias_ptr_type = reference_alias_ptr_type (expr);
1797 access->reverse = reverse;
1798 }
1799 else
1800 {
1801 if (exp_align < TYPE_ALIGN (access->type))
1802 {
1803 disqualify_split_candidate (desc, "Reference has lower alignment "
1804 "than a previous one.");
1805 return;
1806 }
1807 if (access->alias_ptr_type != reference_alias_ptr_type (expr))
1808 {
1809 disqualify_split_candidate (desc, "Multiple alias pointer types.");
1810 return;
1811 }
1812 if (access->reverse != reverse)
1813 {
1814 disqualify_split_candidate (desc, "Both normal and reverse "
1815 "scalar storage order.");
1816 return;
1817 }
1818 if (!deref
1819 && (AGGREGATE_TYPE_P (type) || AGGREGATE_TYPE_P (access->type))
1820 && (TYPE_MAIN_VARIANT (access->type) != TYPE_MAIN_VARIANT (type)))
1821 {
1822 /* We need the same aggregate type on all accesses to be able to
1823 distinguish transformation spots from pass-through arguments in
1824 the transformation phase. */
1825 disqualify_split_candidate (desc, "We do not support aggregate "
1826 "type punning.");
1827 return;
1828 }
1829
1830 if (type_prevails_p (access->type, type))
1831 access->type = type;
1832 }
1833 }
1834
1835 /* Scan body function described by NODE and FUN and create access trees for
1836 parameters. */
1837
1838 static void
1839 scan_function (cgraph_node *node, struct function *fun)
1840 {
1841 basic_block bb;
1842
1843 FOR_EACH_BB_FN (bb, fun)
1844 {
1845 gimple_stmt_iterator gsi;
1846 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1847 {
1848 gimple *stmt = gsi_stmt (gsi);
1849
1850 if (stmt_can_throw_external (fun, stmt))
1851 bitmap_set_bit (final_bbs, bb->index);
1852 switch (gimple_code (stmt))
1853 {
1854 case GIMPLE_RETURN:
1855 {
1856 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1857 if (t != NULL_TREE)
1858 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1859 bitmap_set_bit (final_bbs, bb->index);
1860 }
1861 break;
1862
1863 case GIMPLE_ASSIGN:
1864 if (gimple_assign_single_p (stmt)
1865 && !gimple_clobber_p (stmt))
1866 {
1867 tree rhs = gimple_assign_rhs1 (stmt);
1868 scan_expr_access (rhs, stmt, ISRA_CTX_LOAD, bb);
1869 tree lhs = gimple_assign_lhs (stmt);
1870 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1871 }
1872 break;
1873
1874 case GIMPLE_CALL:
1875 {
1876 unsigned argument_count = gimple_call_num_args (stmt);
1877 isra_scan_context ctx = ISRA_CTX_ARG;
1878 scan_call_info call_info, *call_info_p = &call_info;
1879 if (gimple_call_internal_p (stmt))
1880 {
1881 call_info_p = NULL;
1882 ctx = ISRA_CTX_LOAD;
1883 internal_fn ifn = gimple_call_internal_fn (stmt);
1884 if (internal_store_fn_p (ifn))
1885 ctx = ISRA_CTX_STORE;
1886 }
1887 else
1888 {
1889 call_info.cs = node->get_edge (stmt);
1890 call_info.argument_count = argument_count;
1891 }
1892
1893 for (unsigned i = 0; i < argument_count; i++)
1894 {
1895 call_info.arg_idx = i;
1896 scan_expr_access (gimple_call_arg (stmt, i), stmt,
1897 ctx, bb, call_info_p);
1898 }
1899
1900 tree lhs = gimple_call_lhs (stmt);
1901 if (lhs)
1902 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1903 int flags = gimple_call_flags (stmt);
1904 if ((flags & (ECF_CONST | ECF_PURE)) == 0)
1905 bitmap_set_bit (final_bbs, bb->index);
1906 }
1907 break;
1908
1909 case GIMPLE_ASM:
1910 {
1911 gasm *asm_stmt = as_a <gasm *> (stmt);
1912 walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL,
1913 asm_visit_addr);
1914 bitmap_set_bit (final_bbs, bb->index);
1915
1916 for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++)
1917 {
1918 tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i));
1919 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1920 }
1921 for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++)
1922 {
1923 tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i));
1924 scan_expr_access (t, stmt, ISRA_CTX_STORE, bb);
1925 }
1926 }
1927 break;
1928
1929 default:
1930 break;
1931 }
1932 }
1933 }
1934 }
1935
1936 /* Return true if SSA_NAME NAME is only used in return statements, or if
1937 results of any operations it is involved in are only used in return
1938 statements. ANALYZED is a bitmap that tracks which SSA names we have
1939 already started investigating. */
1940
1941 static bool
1942 ssa_name_only_returned_p (tree name, bitmap analyzed)
1943 {
1944 bool res = true;
1945 imm_use_iterator imm_iter;
1946 gimple *stmt;
1947
1948 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
1949 {
1950 if (is_gimple_debug (stmt))
1951 continue;
1952
1953 if (gimple_code (stmt) == GIMPLE_RETURN)
1954 {
1955 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1956 if (t != name)
1957 {
1958 res = false;
1959 BREAK_FROM_IMM_USE_STMT (imm_iter);
1960 }
1961 }
1962 else if ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
1963 || gimple_code (stmt) == GIMPLE_PHI)
1964 {
1965 /* TODO: And perhaps for const function calls too? */
1966 tree lhs;
1967 if (gimple_code (stmt) == GIMPLE_PHI)
1968 lhs = gimple_phi_result (stmt);
1969 else
1970 lhs = gimple_assign_lhs (stmt);
1971
1972 if (TREE_CODE (lhs) != SSA_NAME)
1973 {
1974 res = false;
1975 BREAK_FROM_IMM_USE_STMT (imm_iter);
1976 }
1977 gcc_assert (!gimple_vdef (stmt));
1978 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs))
1979 && !ssa_name_only_returned_p (lhs, analyzed))
1980 {
1981 res = false;
1982 BREAK_FROM_IMM_USE_STMT (imm_iter);
1983 }
1984 }
1985 else
1986 {
1987 res = false;
1988 BREAK_FROM_IMM_USE_STMT (imm_iter);
1989 }
1990 }
1991 return res;
1992 }
1993
1994 /* Inspect the uses of the return value of the call associated with CS, and if
1995 it is not used or if it is only used to construct the return value of the
1996 caller, mark it as such in call or caller summary. Also check for
1997 misaligned arguments. */
1998
1999 static void
2000 isra_analyze_call (cgraph_edge *cs)
2001 {
2002 gcall *call_stmt = cs->call_stmt;
2003 unsigned count = gimple_call_num_args (call_stmt);
2004 isra_call_summary *csum = call_sums->get_create (cs);
2005
2006 for (unsigned i = 0; i < count; i++)
2007 {
2008 tree arg = gimple_call_arg (call_stmt, i);
2009 if (is_gimple_reg (arg))
2010 continue;
2011
2012 tree offset;
2013 poly_int64 bitsize, bitpos;
2014 machine_mode mode;
2015 int unsignedp, reversep, volatilep = 0;
2016 get_inner_reference (arg, &bitsize, &bitpos, &offset, &mode,
2017 &unsignedp, &reversep, &volatilep);
2018 if (!multiple_p (bitpos, BITS_PER_UNIT))
2019 {
2020 csum->m_bit_aligned_arg = true;
2021 break;
2022 }
2023 }
2024
2025 tree lhs = gimple_call_lhs (call_stmt);
2026 if (lhs)
2027 {
2028 /* TODO: Also detect aggregates on a LHS of a call that are only returned
2029 from this function (without being read anywhere). */
2030 if (TREE_CODE (lhs) == SSA_NAME)
2031 {
2032 bitmap analyzed = BITMAP_ALLOC (NULL);
2033 if (ssa_name_only_returned_p (lhs, analyzed))
2034 csum->m_return_returned = true;
2035 BITMAP_FREE (analyzed);
2036 }
2037 }
2038 else
2039 csum->m_return_ignored = true;
2040 }
2041
2042 /* Look at all calls going out of NODE, described also by IFS and perform all
2043 analyses necessary for IPA-SRA that are not done at body scan time or done
2044 even when body is not scanned because the function is not a candidate. */
2045
2046 static void
2047 isra_analyze_all_outgoing_calls (cgraph_node *node)
2048 {
2049 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2050 isra_analyze_call (cs);
2051 for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
2052 isra_analyze_call (cs);
2053 }
2054
2055 /* Dump a dereferences table with heading STR to file F. */
2056
2057 static void
2058 dump_dereferences_table (FILE *f, struct function *fun, const char *str)
2059 {
2060 basic_block bb;
2061
2062 fprintf (dump_file, "%s", str);
2063 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun),
2064 EXIT_BLOCK_PTR_FOR_FN (fun), next_bb)
2065 {
2066 fprintf (f, "%4i %i ", bb->index, bitmap_bit_p (final_bbs, bb->index));
2067 if (bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2068 {
2069 int i;
2070 for (i = 0; i < by_ref_count; i++)
2071 {
2072 int idx = bb->index * by_ref_count + i;
2073 fprintf (f, " %4" HOST_WIDE_INT_PRINT "d", bb_dereferences[idx]);
2074 }
2075 }
2076 fprintf (f, "\n");
2077 }
2078 fprintf (dump_file, "\n");
2079 }
2080
2081 /* Propagate distances in bb_dereferences in the opposite direction than the
2082 control flow edges, in each step storing the maximum of the current value
2083 and the minimum of all successors. These steps are repeated until the table
2084 stabilizes. Note that BBs which might terminate the functions (according to
2085 final_bbs bitmap) never updated in this way. */
2086
2087 static void
2088 propagate_dereference_distances (struct function *fun)
2089 {
2090 basic_block bb;
2091
2092 if (dump_file && (dump_flags & TDF_DETAILS))
2093 dump_dereferences_table (dump_file, fun,
2094 "Dereference table before propagation:\n");
2095
2096 auto_vec<basic_block> queue (last_basic_block_for_fn (fun));
2097 queue.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
2098 FOR_EACH_BB_FN (bb, fun)
2099 {
2100 queue.quick_push (bb);
2101 bb->aux = bb;
2102 }
2103
2104 while (!queue.is_empty ())
2105 {
2106 edge_iterator ei;
2107 edge e;
2108 bool change = false;
2109 int i;
2110
2111 bb = queue.pop ();
2112 bb->aux = NULL;
2113
2114 if (bitmap_bit_p (final_bbs, bb->index))
2115 continue;
2116
2117 for (i = 0; i < by_ref_count; i++)
2118 {
2119 int idx = bb->index * by_ref_count + i;
2120 bool first = true;
2121 HOST_WIDE_INT inh = 0;
2122
2123 FOR_EACH_EDGE (e, ei, bb->succs)
2124 {
2125 int succ_idx = e->dest->index * by_ref_count + i;
2126
2127 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
2128 continue;
2129
2130 if (first)
2131 {
2132 first = false;
2133 inh = bb_dereferences [succ_idx];
2134 }
2135 else if (bb_dereferences [succ_idx] < inh)
2136 inh = bb_dereferences [succ_idx];
2137 }
2138
2139 if (!first && bb_dereferences[idx] < inh)
2140 {
2141 bb_dereferences[idx] = inh;
2142 change = true;
2143 }
2144 }
2145
2146 if (change)
2147 FOR_EACH_EDGE (e, ei, bb->preds)
2148 {
2149 if (e->src->aux)
2150 continue;
2151
2152 e->src->aux = e->src;
2153 queue.quick_push (e->src);
2154 }
2155 }
2156
2157 if (dump_file && (dump_flags & TDF_DETAILS))
2158 dump_dereferences_table (dump_file, fun,
2159 "Dereference table after propagation:\n");
2160 }
2161
2162 /* Perform basic checks on ACCESS to PARM described by DESC and all its
2163 children, return true if the parameter cannot be split, otherwise return
2164 true and update *TOTAL_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be the
2165 index of the entry BB in the function of PARM. */
2166
2167 static bool
2168 check_gensum_access (tree parm, gensum_param_desc *desc,
2169 gensum_param_access *access,
2170 HOST_WIDE_INT *nonarg_acc_size, bool *only_calls,
2171 int entry_bb_index)
2172 {
2173 if (access->nonarg)
2174 {
2175 *only_calls = false;
2176 *nonarg_acc_size += access->size;
2177
2178 if (access->first_child)
2179 {
2180 disqualify_split_candidate (desc, "Overlapping non-call uses.");
2181 return true;
2182 }
2183 }
2184 /* Do not decompose a non-BLKmode param in a way that would create
2185 BLKmode params. Especially for by-reference passing (thus,
2186 pointer-type param) this is hardly worthwhile. */
2187 if (DECL_MODE (parm) != BLKmode
2188 && TYPE_MODE (access->type) == BLKmode)
2189 {
2190 disqualify_split_candidate (desc, "Would convert a non-BLK to a BLK.");
2191 return true;
2192 }
2193
2194 if (desc->by_ref)
2195 {
2196 int idx = (entry_bb_index * by_ref_count + desc->deref_index);
2197 if ((access->offset + access->size) > bb_dereferences[idx])
2198 {
2199 disqualify_split_candidate (desc, "Would create a possibly "
2200 "illegal dereference in a caller.");
2201 return true;
2202 }
2203 }
2204
2205 for (gensum_param_access *ch = access->first_child;
2206 ch;
2207 ch = ch->next_sibling)
2208 if (check_gensum_access (parm, desc, ch, nonarg_acc_size, only_calls,
2209 entry_bb_index))
2210 return true;
2211
2212 return false;
2213 }
2214
2215 /* Copy data from FROM and all of its children to a vector of accesses in IPA
2216 descriptor DESC. */
2217
2218 static void
2219 copy_accesses_to_ipa_desc (gensum_param_access *from, isra_param_desc *desc)
2220 {
2221 param_access *to = ggc_cleared_alloc<param_access> ();
2222 gcc_checking_assert ((from->offset % BITS_PER_UNIT) == 0);
2223 gcc_checking_assert ((from->size % BITS_PER_UNIT) == 0);
2224 to->unit_offset = from->offset / BITS_PER_UNIT;
2225 to->unit_size = from->size / BITS_PER_UNIT;
2226 to->type = from->type;
2227 to->alias_ptr_type = from->alias_ptr_type;
2228 to->certain = from->nonarg;
2229 to->reverse = from->reverse;
2230 vec_safe_push (desc->accesses, to);
2231
2232 for (gensum_param_access *ch = from->first_child;
2233 ch;
2234 ch = ch->next_sibling)
2235 copy_accesses_to_ipa_desc (ch, desc);
2236 }
2237
2238 /* Analyze function body scan results stored in param_accesses and
2239 param_accesses, detect possible transformations and store information of
2240 those in function summary. NODE, FUN and IFS are all various structures
2241 describing the currently analyzed function. */
2242
2243 static void
2244 process_scan_results (cgraph_node *node, struct function *fun,
2245 isra_func_summary *ifs,
2246 vec<gensum_param_desc> *param_descriptions)
2247 {
2248 bool check_pass_throughs = false;
2249 bool dereferences_propagated = false;
2250 tree parm = DECL_ARGUMENTS (node->decl);
2251 unsigned param_count = param_descriptions->length();
2252
2253 for (unsigned desc_index = 0;
2254 desc_index < param_count;
2255 desc_index++, parm = DECL_CHAIN (parm))
2256 {
2257 gensum_param_desc *desc = &(*param_descriptions)[desc_index];
2258 if (!desc->split_candidate)
2259 continue;
2260
2261 if (flag_checking)
2262 isra_verify_access_tree (desc->accesses);
2263
2264 if (!dereferences_propagated
2265 && desc->by_ref
2266 && desc->accesses)
2267 {
2268 propagate_dereference_distances (fun);
2269 dereferences_propagated = true;
2270 }
2271
2272 HOST_WIDE_INT nonarg_acc_size = 0;
2273 bool only_calls = true;
2274 bool check_failed = false;
2275
2276 int entry_bb_index = ENTRY_BLOCK_PTR_FOR_FN (fun)->index;
2277 for (gensum_param_access *acc = desc->accesses;
2278 acc;
2279 acc = acc->next_sibling)
2280 if (check_gensum_access (parm, desc, acc, &nonarg_acc_size, &only_calls,
2281 entry_bb_index))
2282 {
2283 check_failed = true;
2284 break;
2285 }
2286 if (check_failed)
2287 continue;
2288
2289 if (only_calls)
2290 desc->locally_unused = true;
2291
2292 HOST_WIDE_INT cur_param_size
2293 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm)));
2294 HOST_WIDE_INT param_size_limit;
2295 if (!desc->by_ref || optimize_function_for_size_p (fun))
2296 param_size_limit = cur_param_size;
2297 else
2298 param_size_limit
2299 = opt_for_fn (node->decl,
2300 param_ipa_sra_ptr_growth_factor) * cur_param_size;
2301 if (nonarg_acc_size > param_size_limit
2302 || (!desc->by_ref && nonarg_acc_size == param_size_limit))
2303 {
2304 disqualify_split_candidate (desc, "Would result into a too big set "
2305 "of replacements.");
2306 }
2307 else
2308 {
2309 /* create_parameter_descriptors makes sure unit sizes of all
2310 candidate parameters fit unsigned integers restricted to
2311 ISRA_ARG_SIZE_LIMIT. */
2312 desc->param_size_limit = param_size_limit / BITS_PER_UNIT;
2313 desc->nonarg_acc_size = nonarg_acc_size / BITS_PER_UNIT;
2314 if (desc->split_candidate && desc->ptr_pt_count)
2315 {
2316 gcc_assert (desc->by_ref);
2317 check_pass_throughs = true;
2318 }
2319 }
2320 }
2321
2322 /* When a pointer parameter is passed-through to a callee, in which it is
2323 only used to read only one or a few items, we can attempt to transform it
2324 to obtaining and passing through the items instead of the pointer. But we
2325 must take extra care that 1) we do not introduce any segfault by moving
2326 dereferences above control flow and that 2) the data is not modified
2327 through an alias in this function. The IPA analysis must not introduce
2328 any accesses candidates unless it can prove both.
2329
2330 The current solution is very crude as it consists of ensuring that the
2331 call postdominates entry BB and that the definition of VUSE of the call is
2332 default definition. TODO: For non-recursive callees in the same
2333 compilation unit we could do better by doing analysis in topological order
2334 an looking into access candidates of callees, using their alias_ptr_types
2335 to attempt real AA. We could also use the maximum known dereferenced
2336 offset in this function at IPA level.
2337
2338 TODO: Measure the overhead and the effect of just being pessimistic.
2339 Maybe this is only -O3 material?
2340 */
2341 bool pdoms_calculated = false;
2342 if (check_pass_throughs)
2343 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2344 {
2345 gcall *call_stmt = cs->call_stmt;
2346 tree vuse = gimple_vuse (call_stmt);
2347
2348 /* If the callee is a const function, we don't get a VUSE. In such
2349 case there will be no memory accesses in the called function (or the
2350 const attribute is wrong) and then we just don't care. */
2351 bool uses_memory_as_obtained = vuse && SSA_NAME_IS_DEFAULT_DEF (vuse);
2352
2353 unsigned count = gimple_call_num_args (call_stmt);
2354 isra_call_summary *csum = call_sums->get_create (cs);
2355 csum->init_inputs (count);
2356 for (unsigned argidx = 0; argidx < count; argidx++)
2357 {
2358 if (!csum->m_arg_flow[argidx].pointer_pass_through)
2359 continue;
2360 unsigned pidx
2361 = get_single_param_flow_source (&csum->m_arg_flow[argidx]);
2362 gensum_param_desc *desc = &(*param_descriptions)[pidx];
2363 if (!desc->split_candidate)
2364 {
2365 csum->m_arg_flow[argidx].pointer_pass_through = false;
2366 continue;
2367 }
2368 if (!uses_memory_as_obtained)
2369 continue;
2370
2371 /* Post-dominator check placed last, hoping that it usually won't
2372 be needed. */
2373 if (!pdoms_calculated)
2374 {
2375 gcc_checking_assert (cfun);
2376 add_noreturn_fake_exit_edges ();
2377 connect_infinite_loops_to_exit ();
2378 calculate_dominance_info (CDI_POST_DOMINATORS);
2379 pdoms_calculated = true;
2380 }
2381 if (dominated_by_p (CDI_POST_DOMINATORS,
2382 gimple_bb (call_stmt),
2383 single_succ (ENTRY_BLOCK_PTR_FOR_FN (fun))))
2384 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2385 }
2386
2387 }
2388 if (pdoms_calculated)
2389 {
2390 free_dominance_info (CDI_POST_DOMINATORS);
2391 remove_fake_exit_edges ();
2392 }
2393
2394 /* TODO: Add early exit if we disqualified everything. This also requires
2395 that we either relax the restriction that
2396 ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
2397 or store the number of parameters to IPA-SRA function summary and use that
2398 when just removing params. */
2399
2400 vec_safe_reserve_exact (ifs->m_parameters, param_count);
2401 ifs->m_parameters->quick_grow_cleared (param_count);
2402 for (unsigned desc_index = 0; desc_index < param_count; desc_index++)
2403 {
2404 gensum_param_desc *s = &(*param_descriptions)[desc_index];
2405 isra_param_desc *d = &(*ifs->m_parameters)[desc_index];
2406
2407 d->param_size_limit = s->param_size_limit;
2408 d->size_reached = s->nonarg_acc_size;
2409 d->locally_unused = s->locally_unused;
2410 d->split_candidate = s->split_candidate;
2411 d->by_ref = s->by_ref;
2412
2413 for (gensum_param_access *acc = s->accesses;
2414 acc;
2415 acc = acc->next_sibling)
2416 copy_accesses_to_ipa_desc (acc, d);
2417 }
2418
2419 if (dump_file)
2420 dump_isra_param_descriptors (dump_file, node->decl, ifs);
2421 }
2422
2423 /* Return true if there are any overlaps among certain accesses of DESC. If
2424 non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
2425 too. DESC is assumed to be a split candidate that is not locally
2426 unused. */
2427
2428 static bool
2429 overlapping_certain_accesses_p (isra_param_desc *desc,
2430 bool *certain_access_present_p)
2431 {
2432 unsigned pclen = vec_safe_length (desc->accesses);
2433 for (unsigned i = 0; i < pclen; i++)
2434 {
2435 param_access *a1 = (*desc->accesses)[i];
2436
2437 if (!a1->certain)
2438 continue;
2439 if (certain_access_present_p)
2440 *certain_access_present_p = true;
2441 for (unsigned j = i + 1; j < pclen; j++)
2442 {
2443 param_access *a2 = (*desc->accesses)[j];
2444 if (a2->certain
2445 && a1->unit_offset < a2->unit_offset + a2->unit_size
2446 && a1->unit_offset + a1->unit_size > a2->unit_offset)
2447 return true;
2448 }
2449 }
2450 return false;
2451 }
2452
2453 /* Check for any overlaps of certain param accesses among splitting candidates
2454 and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also
2455 check that used splitting candidates have at least one certain access. */
2456
2457 static void
2458 verify_splitting_accesses (cgraph_node *node, bool certain_must_exist)
2459 {
2460 isra_func_summary *ifs = func_sums->get (node);
2461 if (!ifs || !ifs->m_candidate)
2462 return;
2463 unsigned param_count = vec_safe_length (ifs->m_parameters);
2464 for (unsigned pidx = 0; pidx < param_count; pidx++)
2465 {
2466 isra_param_desc *desc = &(*ifs->m_parameters)[pidx];
2467 if (!desc->split_candidate || desc->locally_unused)
2468 continue;
2469
2470 bool certain_access_present = !certain_must_exist;
2471 if (overlapping_certain_accesses_p (desc, &certain_access_present))
2472 internal_error ("Function %qs, parameter %u, has IPA-SRA accesses "
2473 "which overlap", node->dump_name (), pidx);
2474 if (!certain_access_present)
2475 internal_error ("Function %s, parameter %u, is used but does not "
2476 "have any certain IPA-SRA access",
2477 node->dump_name (), pidx);
2478 }
2479 }
2480
2481 /* Intraprocedural part of IPA-SRA analysis. Scan function body of NODE and
2482 create a summary structure describing IPA-SRA opportunities and constraints
2483 in it. */
2484
2485 static void
2486 ipa_sra_summarize_function (cgraph_node *node)
2487 {
2488 if (dump_file)
2489 fprintf (dump_file, "Creating summary for %s/%i:\n", node->name (),
2490 node->order);
2491 if (!ipa_sra_preliminary_function_checks (node))
2492 return;
2493 gcc_obstack_init (&gensum_obstack);
2494 isra_func_summary *ifs = func_sums->get_create (node);
2495 ifs->m_candidate = true;
2496 tree ret = TREE_TYPE (TREE_TYPE (node->decl));
2497 ifs->m_returns_value = (TREE_CODE (ret) != VOID_TYPE);
2498
2499 decl2desc = new hash_map<tree, gensum_param_desc *>;
2500 unsigned count = 0;
2501 for (tree parm = DECL_ARGUMENTS (node->decl); parm; parm = DECL_CHAIN (parm))
2502 count++;
2503
2504 if (count > 0)
2505 {
2506 auto_vec<gensum_param_desc, 16> param_descriptions (count);
2507 param_descriptions.reserve_exact (count);
2508 param_descriptions.quick_grow_cleared (count);
2509
2510 bool cfun_pushed = false;
2511 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
2512 if (create_parameter_descriptors (node, &param_descriptions))
2513 {
2514 push_cfun (fun);
2515 cfun_pushed = true;
2516 final_bbs = BITMAP_ALLOC (NULL);
2517 bb_dereferences = XCNEWVEC (HOST_WIDE_INT,
2518 by_ref_count
2519 * last_basic_block_for_fn (fun));
2520 aa_walking_limit = opt_for_fn (node->decl, param_ipa_max_aa_steps);
2521 scan_function (node, fun);
2522
2523 if (dump_file)
2524 {
2525 dump_gensum_param_descriptors (dump_file, node->decl,
2526 &param_descriptions);
2527 fprintf (dump_file, "----------------------------------------\n");
2528 }
2529 }
2530 process_scan_results (node, fun, ifs, &param_descriptions);
2531
2532 if (cfun_pushed)
2533 pop_cfun ();
2534 if (bb_dereferences)
2535 {
2536 free (bb_dereferences);
2537 bb_dereferences = NULL;
2538 BITMAP_FREE (final_bbs);
2539 final_bbs = NULL;
2540 }
2541 }
2542 isra_analyze_all_outgoing_calls (node);
2543
2544 delete decl2desc;
2545 decl2desc = NULL;
2546 obstack_free (&gensum_obstack, NULL);
2547 if (dump_file)
2548 fprintf (dump_file, "\n\n");
2549 if (flag_checking)
2550 verify_splitting_accesses (node, false);
2551 return;
2552 }
2553
2554 /* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in
2555 this compilation unit and create summary structures describing IPA-SRA
2556 opportunities and constraints in them. */
2557
2558 static void
2559 ipa_sra_generate_summary (void)
2560 {
2561 struct cgraph_node *node;
2562
2563 gcc_checking_assert (!func_sums);
2564 gcc_checking_assert (!call_sums);
2565 func_sums
2566 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2567 ipa_sra_function_summaries (symtab, true));
2568 call_sums = new ipa_sra_call_summaries (symtab);
2569
2570 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2571 ipa_sra_summarize_function (node);
2572 return;
2573 }
2574
2575 /* Write intraprocedural analysis information about E and all of its outgoing
2576 edges into a stream for LTO WPA. */
2577
2578 static void
2579 isra_write_edge_summary (output_block *ob, cgraph_edge *e)
2580 {
2581 isra_call_summary *csum = call_sums->get (e);
2582 unsigned input_count = csum->m_arg_flow.length ();
2583 streamer_write_uhwi (ob, input_count);
2584 for (unsigned i = 0; i < input_count; i++)
2585 {
2586 isra_param_flow *ipf = &csum->m_arg_flow[i];
2587 streamer_write_hwi (ob, ipf->length);
2588 bitpack_d bp = bitpack_create (ob->main_stream);
2589 for (int j = 0; j < ipf->length; j++)
2590 bp_pack_value (&bp, ipf->inputs[j], 8);
2591 bp_pack_value (&bp, ipf->aggregate_pass_through, 1);
2592 bp_pack_value (&bp, ipf->pointer_pass_through, 1);
2593 bp_pack_value (&bp, ipf->safe_to_import_accesses, 1);
2594 streamer_write_bitpack (&bp);
2595 streamer_write_uhwi (ob, ipf->unit_offset);
2596 streamer_write_uhwi (ob, ipf->unit_size);
2597 }
2598 bitpack_d bp = bitpack_create (ob->main_stream);
2599 bp_pack_value (&bp, csum->m_return_ignored, 1);
2600 bp_pack_value (&bp, csum->m_return_returned, 1);
2601 bp_pack_value (&bp, csum->m_bit_aligned_arg, 1);
2602 streamer_write_bitpack (&bp);
2603 }
2604
2605 /* Write intraprocedural analysis information about NODE and all of its outgoing
2606 edges into a stream for LTO WPA. */
2607
2608 static void
2609 isra_write_node_summary (output_block *ob, cgraph_node *node)
2610 {
2611 isra_func_summary *ifs = func_sums->get (node);
2612 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2613 int node_ref = lto_symtab_encoder_encode (encoder, node);
2614 streamer_write_uhwi (ob, node_ref);
2615
2616 unsigned param_desc_count = vec_safe_length (ifs->m_parameters);
2617 streamer_write_uhwi (ob, param_desc_count);
2618 for (unsigned i = 0; i < param_desc_count; i++)
2619 {
2620 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2621 unsigned access_count = vec_safe_length (desc->accesses);
2622 streamer_write_uhwi (ob, access_count);
2623 for (unsigned j = 0; j < access_count; j++)
2624 {
2625 param_access *acc = (*desc->accesses)[j];
2626 stream_write_tree (ob, acc->type, true);
2627 stream_write_tree (ob, acc->alias_ptr_type, true);
2628 streamer_write_uhwi (ob, acc->unit_offset);
2629 streamer_write_uhwi (ob, acc->unit_size);
2630 bitpack_d bp = bitpack_create (ob->main_stream);
2631 bp_pack_value (&bp, acc->certain, 1);
2632 streamer_write_bitpack (&bp);
2633 }
2634 streamer_write_uhwi (ob, desc->param_size_limit);
2635 streamer_write_uhwi (ob, desc->size_reached);
2636 bitpack_d bp = bitpack_create (ob->main_stream);
2637 bp_pack_value (&bp, desc->locally_unused, 1);
2638 bp_pack_value (&bp, desc->split_candidate, 1);
2639 bp_pack_value (&bp, desc->by_ref, 1);
2640 streamer_write_bitpack (&bp);
2641 }
2642 bitpack_d bp = bitpack_create (ob->main_stream);
2643 bp_pack_value (&bp, ifs->m_candidate, 1);
2644 bp_pack_value (&bp, ifs->m_returns_value, 1);
2645 bp_pack_value (&bp, ifs->m_return_ignored, 1);
2646 gcc_assert (!ifs->m_queued);
2647 streamer_write_bitpack (&bp);
2648
2649 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2650 isra_write_edge_summary (ob, e);
2651 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2652 isra_write_edge_summary (ob, e);
2653 }
2654
2655 /* Write intraprocedural analysis information into a stream for LTO WPA. */
2656
2657 static void
2658 ipa_sra_write_summary (void)
2659 {
2660 if (!func_sums || !call_sums)
2661 return;
2662
2663 struct output_block *ob = create_output_block (LTO_section_ipa_sra);
2664 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2665 ob->symbol = NULL;
2666
2667 unsigned int count = 0;
2668 lto_symtab_encoder_iterator lsei;
2669 for (lsei = lsei_start_function_in_partition (encoder);
2670 !lsei_end_p (lsei);
2671 lsei_next_function_in_partition (&lsei))
2672 {
2673 cgraph_node *node = lsei_cgraph_node (lsei);
2674 if (node->has_gimple_body_p ()
2675 && func_sums->get (node) != NULL)
2676 count++;
2677 }
2678 streamer_write_uhwi (ob, count);
2679
2680 /* Process all of the functions. */
2681 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
2682 lsei_next_function_in_partition (&lsei))
2683 {
2684 cgraph_node *node = lsei_cgraph_node (lsei);
2685 if (node->has_gimple_body_p ()
2686 && func_sums->get (node) != NULL)
2687 isra_write_node_summary (ob, node);
2688 }
2689 streamer_write_char_stream (ob->main_stream, 0);
2690 produce_asm (ob, NULL);
2691 destroy_output_block (ob);
2692 }
2693
2694 /* Read intraprocedural analysis information about E and all of its outgoing
2695 edges into a stream for LTO WPA. */
2696
2697 static void
2698 isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
2699 {
2700 isra_call_summary *csum = call_sums->get_create (cs);
2701 unsigned input_count = streamer_read_uhwi (ib);
2702 csum->init_inputs (input_count);
2703 for (unsigned i = 0; i < input_count; i++)
2704 {
2705 isra_param_flow *ipf = &csum->m_arg_flow[i];
2706 ipf->length = streamer_read_hwi (ib);
2707 bitpack_d bp = streamer_read_bitpack (ib);
2708 for (int j = 0; j < ipf->length; j++)
2709 ipf->inputs[j] = bp_unpack_value (&bp, 8);
2710 ipf->aggregate_pass_through = bp_unpack_value (&bp, 1);
2711 ipf->pointer_pass_through = bp_unpack_value (&bp, 1);
2712 ipf->safe_to_import_accesses = bp_unpack_value (&bp, 1);
2713 ipf->unit_offset = streamer_read_uhwi (ib);
2714 ipf->unit_size = streamer_read_uhwi (ib);
2715 }
2716 bitpack_d bp = streamer_read_bitpack (ib);
2717 csum->m_return_ignored = bp_unpack_value (&bp, 1);
2718 csum->m_return_returned = bp_unpack_value (&bp, 1);
2719 csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1);
2720 }
2721
2722 /* Read intraprocedural analysis information about NODE and all of its outgoing
2723 edges into a stream for LTO WPA. */
2724
2725 static void
2726 isra_read_node_info (struct lto_input_block *ib, cgraph_node *node,
2727 struct data_in *data_in)
2728 {
2729 isra_func_summary *ifs = func_sums->get_create (node);
2730 unsigned param_desc_count = streamer_read_uhwi (ib);
2731 if (param_desc_count > 0)
2732 {
2733 vec_safe_reserve_exact (ifs->m_parameters, param_desc_count);
2734 ifs->m_parameters->quick_grow_cleared (param_desc_count);
2735 }
2736 for (unsigned i = 0; i < param_desc_count; i++)
2737 {
2738 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2739 unsigned access_count = streamer_read_uhwi (ib);
2740 for (unsigned j = 0; j < access_count; j++)
2741 {
2742 param_access *acc = ggc_cleared_alloc<param_access> ();
2743 acc->type = stream_read_tree (ib, data_in);
2744 acc->alias_ptr_type = stream_read_tree (ib, data_in);
2745 acc->unit_offset = streamer_read_uhwi (ib);
2746 acc->unit_size = streamer_read_uhwi (ib);
2747 bitpack_d bp = streamer_read_bitpack (ib);
2748 acc->certain = bp_unpack_value (&bp, 1);
2749 vec_safe_push (desc->accesses, acc);
2750 }
2751 desc->param_size_limit = streamer_read_uhwi (ib);
2752 desc->size_reached = streamer_read_uhwi (ib);
2753 bitpack_d bp = streamer_read_bitpack (ib);
2754 desc->locally_unused = bp_unpack_value (&bp, 1);
2755 desc->split_candidate = bp_unpack_value (&bp, 1);
2756 desc->by_ref = bp_unpack_value (&bp, 1);
2757 }
2758 bitpack_d bp = streamer_read_bitpack (ib);
2759 ifs->m_candidate = bp_unpack_value (&bp, 1);
2760 ifs->m_returns_value = bp_unpack_value (&bp, 1);
2761 ifs->m_return_ignored = bp_unpack_value (&bp, 1);
2762 ifs->m_queued = 0;
2763
2764 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2765 isra_read_edge_summary (ib, e);
2766 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2767 isra_read_edge_summary (ib, e);
2768 }
2769
2770 /* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
2771 data DATA. TODO: This function was copied almost verbatim from ipa-prop.c,
2772 it should be possible to unify them somehow. */
2773
2774 static void
2775 isra_read_summary_section (struct lto_file_decl_data *file_data,
2776 const char *data, size_t len)
2777 {
2778 const struct lto_function_header *header =
2779 (const struct lto_function_header *) data;
2780 const int cfg_offset = sizeof (struct lto_function_header);
2781 const int main_offset = cfg_offset + header->cfg_size;
2782 const int string_offset = main_offset + header->main_size;
2783 struct data_in *data_in;
2784 unsigned int i;
2785 unsigned int count;
2786
2787 lto_input_block ib_main ((const char *) data + main_offset,
2788 header->main_size, file_data->mode_table);
2789
2790 data_in =
2791 lto_data_in_create (file_data, (const char *) data + string_offset,
2792 header->string_size, vNULL);
2793 count = streamer_read_uhwi (&ib_main);
2794
2795 for (i = 0; i < count; i++)
2796 {
2797 unsigned int index;
2798 struct cgraph_node *node;
2799 lto_symtab_encoder_t encoder;
2800
2801 index = streamer_read_uhwi (&ib_main);
2802 encoder = file_data->symtab_node_encoder;
2803 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
2804 index));
2805 gcc_assert (node->definition);
2806 isra_read_node_info (&ib_main, node, data_in);
2807 }
2808 lto_free_section_data (file_data, LTO_section_ipa_sra, NULL, data,
2809 len);
2810 lto_data_in_delete (data_in);
2811 }
2812
2813 /* Read intraprocedural analysis information into a stream for LTO WPA. */
2814
2815 static void
2816 ipa_sra_read_summary (void)
2817 {
2818 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
2819 struct lto_file_decl_data *file_data;
2820 unsigned int j = 0;
2821
2822 gcc_checking_assert (!func_sums);
2823 gcc_checking_assert (!call_sums);
2824 func_sums
2825 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2826 ipa_sra_function_summaries (symtab, true));
2827 call_sums = new ipa_sra_call_summaries (symtab);
2828
2829 while ((file_data = file_data_vec[j++]))
2830 {
2831 size_t len;
2832 const char *data
2833 = lto_get_summary_section_data (file_data, LTO_section_ipa_sra, &len);
2834 if (data)
2835 isra_read_summary_section (file_data, data, len);
2836 }
2837 }
2838
2839 /* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. */
2840
2841 static void
2842 ipa_sra_dump_all_summaries (FILE *f)
2843 {
2844 cgraph_node *node;
2845 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2846 {
2847 fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
2848
2849 isra_func_summary *ifs = func_sums->get (node);
2850 if (!ifs)
2851 {
2852 fprintf (f, " Function does not have any associated IPA-SRA "
2853 "summary\n");
2854 continue;
2855 }
2856 if (!ifs->m_candidate)
2857 {
2858 fprintf (f, " Not a candidate function\n");
2859 continue;
2860 }
2861 if (ifs->m_returns_value)
2862 fprintf (f, " Returns value\n");
2863 if (vec_safe_is_empty (ifs->m_parameters))
2864 fprintf (f, " No parameter information. \n");
2865 else
2866 for (unsigned i = 0; i < ifs->m_parameters->length (); ++i)
2867 {
2868 fprintf (f, " Descriptor for parameter %i:\n", i);
2869 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
2870 }
2871 fprintf (f, "\n");
2872
2873 struct cgraph_edge *cs;
2874 for (cs = node->callees; cs; cs = cs->next_callee)
2875 {
2876 fprintf (f, " Summary for edge %s->%s:\n", cs->caller->dump_name (),
2877 cs->callee->dump_name ());
2878 isra_call_summary *csum = call_sums->get (cs);
2879 if (csum)
2880 csum->dump (f);
2881 else
2882 fprintf (f, " Call summary is MISSING!\n");
2883 }
2884
2885 }
2886 fprintf (f, "\n\n");
2887 }
2888
2889 /* Perform function-scope viability tests that can be only made at IPA level
2890 and return false if the function is deemed unsuitable for IPA-SRA. */
2891
2892 static bool
2893 ipa_sra_ipa_function_checks (cgraph_node *node)
2894 {
2895 if (!node->can_be_local_p ())
2896 {
2897 if (dump_file)
2898 fprintf (dump_file, "Function %s disqualified because it cannot be "
2899 "made local.\n", node->dump_name ());
2900 return false;
2901 }
2902 if (!node->can_change_signature)
2903 {
2904 if (dump_file)
2905 fprintf (dump_file, "Function can not change signature.\n");
2906 return false;
2907 }
2908
2909 return true;
2910 }
2911
2912 /* Issues found out by check_callers_for_issues. */
2913
2914 struct caller_issues
2915 {
2916 /* The candidate being considered. */
2917 cgraph_node *candidate;
2918 /* There is a thunk among callers. */
2919 bool thunk;
2920 /* Call site with no available information. */
2921 bool unknown_callsite;
2922 /* Call from outside the the candidate's comdat group. */
2923 bool call_from_outside_comdat;
2924 /* There is a bit-aligned load into one of non-gimple-typed arguments. */
2925 bool bit_aligned_aggregate_argument;
2926 };
2927
2928 /* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
2929 that apply. */
2930
2931 static bool
2932 check_for_caller_issues (struct cgraph_node *node, void *data)
2933 {
2934 struct caller_issues *issues = (struct caller_issues *) data;
2935
2936 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
2937 {
2938 if (cs->caller->thunk.thunk_p)
2939 {
2940 issues->thunk = true;
2941 /* TODO: We should be able to process at least some types of
2942 thunks. */
2943 return true;
2944 }
2945 if (issues->candidate->calls_comdat_local
2946 && issues->candidate->same_comdat_group
2947 && !issues->candidate->in_same_comdat_group_p (cs->caller))
2948 {
2949 issues->call_from_outside_comdat = true;
2950 return true;
2951 }
2952
2953 isra_call_summary *csum = call_sums->get (cs);
2954 if (!csum)
2955 {
2956 issues->unknown_callsite = true;
2957 return true;
2958 }
2959
2960 if (csum->m_bit_aligned_arg)
2961 issues->bit_aligned_aggregate_argument = true;
2962 }
2963 return false;
2964 }
2965
2966 /* Look at all incoming edges to NODE, including aliases and thunks and look
2967 for problems. Return true if NODE type should not be modified at all. */
2968
2969 static bool
2970 check_all_callers_for_issues (cgraph_node *node)
2971 {
2972 struct caller_issues issues;
2973 memset (&issues, 0, sizeof (issues));
2974 issues.candidate = node;
2975
2976 node->call_for_symbol_and_aliases (check_for_caller_issues, &issues, true);
2977 if (issues.unknown_callsite)
2978 {
2979 if (dump_file && (dump_flags & TDF_DETAILS))
2980 fprintf (dump_file, "A call of %s has not been analyzed. Disabling "
2981 "all modifications.\n", node->dump_name ());
2982 return true;
2983 }
2984 /* TODO: We should be able to process at least some types of thunks. */
2985 if (issues.thunk)
2986 {
2987 if (dump_file && (dump_flags & TDF_DETAILS))
2988 fprintf (dump_file, "A call of %s is through thunk, which are not"
2989 " handled yet. Disabling all modifications.\n",
2990 node->dump_name ());
2991 return true;
2992 }
2993 if (issues.call_from_outside_comdat)
2994 {
2995 if (dump_file)
2996 fprintf (dump_file, "Function would become private comdat called "
2997 "outside of its comdat group.\n");
2998 return true;
2999 }
3000
3001 if (issues.bit_aligned_aggregate_argument)
3002 {
3003 /* Let's only remove parameters/return values from such functions.
3004 TODO: We could only prevent splitting the problematic parameters if
3005 anybody thinks it is worth it. */
3006 if (dump_file && (dump_flags & TDF_DETAILS))
3007 fprintf (dump_file, "A call of %s has bit-aligned aggregate argument,"
3008 " disabling parameter splitting.\n", node->dump_name ());
3009
3010 isra_func_summary *ifs = func_sums->get (node);
3011 gcc_checking_assert (ifs);
3012 unsigned param_count = vec_safe_length (ifs->m_parameters);
3013 for (unsigned i = 0; i < param_count; i++)
3014 (*ifs->m_parameters)[i].split_candidate = false;
3015 }
3016 return false;
3017 }
3018
3019 /* Find the access with corresponding OFFSET and SIZE among accesses in
3020 PARAM_DESC and return it or NULL if such an access is not there. */
3021
3022 static param_access *
3023 find_param_access (isra_param_desc *param_desc, unsigned offset, unsigned size)
3024 {
3025 unsigned pclen = vec_safe_length (param_desc->accesses);
3026
3027 /* The search is linear but the number of stored accesses is bound by
3028 PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */
3029
3030 for (unsigned i = 0; i < pclen; i++)
3031 if ((*param_desc->accesses)[i]->unit_offset == offset
3032 && (*param_desc->accesses)[i]->unit_size == size)
3033 return (*param_desc->accesses)[i];
3034
3035 return NULL;
3036 }
3037
3038 /* Return iff the total size of definite replacement SIZE would violate the
3039 limit set for it in PARAM. */
3040
3041 static bool
3042 size_would_violate_limit_p (isra_param_desc *desc, unsigned size)
3043 {
3044 unsigned limit = desc->param_size_limit;
3045 if (size > limit
3046 || (!desc->by_ref && size == limit))
3047 return true;
3048 return false;
3049 }
3050
3051 /* Increase reached size of DESC by SIZE or disqualify it if it would violate
3052 the set limit. IDX is the parameter number which is dumped when
3053 disqualifying. */
3054
3055 static void
3056 bump_reached_size (isra_param_desc *desc, unsigned size, unsigned idx)
3057 {
3058 unsigned after = desc->size_reached + size;
3059 if (size_would_violate_limit_p (desc, after))
3060 {
3061 if (dump_file && (dump_flags & TDF_DETAILS))
3062 fprintf (dump_file, " ...size limit reached, disqualifying "
3063 "candidate parameter %u\n", idx);
3064 desc->split_candidate = false;
3065 return;
3066 }
3067 desc->size_reached = after;
3068 }
3069
3070 /* Take all actions required to deal with an edge CS that represents a call to
3071 an unknown or un-analyzed function, for both parameter removal and
3072 splitting. */
3073
3074 static void
3075 process_edge_to_unknown_caller (cgraph_edge *cs)
3076 {
3077 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3078 gcc_checking_assert (from_ifs);
3079 isra_call_summary *csum = call_sums->get (cs);
3080
3081 if (dump_file && (dump_flags & TDF_DETAILS))
3082 fprintf (dump_file, "Processing an edge to an unknown caller from %s:\n",
3083 cs->caller->dump_name ());
3084
3085 unsigned args_count = csum->m_arg_flow.length ();
3086 for (unsigned i = 0; i < args_count; i++)
3087 {
3088 isra_param_flow *ipf = &csum->m_arg_flow[i];
3089
3090 if (ipf->pointer_pass_through)
3091 {
3092 isra_param_desc *param_desc
3093 = &(*from_ifs->m_parameters)[get_single_param_flow_source (ipf)];
3094 param_desc->locally_unused = false;
3095 param_desc->split_candidate = false;
3096 continue;
3097 }
3098 if (ipf->aggregate_pass_through)
3099 {
3100 unsigned idx = get_single_param_flow_source (ipf);
3101 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3102
3103 param_desc->locally_unused = false;
3104 if (!param_desc->split_candidate)
3105 continue;
3106 gcc_assert (!param_desc->by_ref);
3107 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3108 ipf->unit_size);
3109 gcc_checking_assert (pacc);
3110 pacc->certain = true;
3111 if (overlapping_certain_accesses_p (param_desc, NULL))
3112 {
3113 if (dump_file && (dump_flags & TDF_DETAILS))
3114 fprintf (dump_file, " ...leading to overlap, "
3115 " disqualifying candidate parameter %u\n",
3116 idx);
3117 param_desc->split_candidate = false;
3118 }
3119 else
3120 bump_reached_size (param_desc, pacc->unit_size, idx);
3121 ipf->aggregate_pass_through = false;
3122 continue;
3123 }
3124
3125 for (int j = 0; j < ipf->length; j++)
3126 {
3127 int input_idx = ipf->inputs[j];
3128 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3129 }
3130 }
3131 }
3132
3133 /* Propagate parameter removal information through cross-SCC edge CS,
3134 i.e. decrease the use count in the caller parameter descriptor for each use
3135 in this call. */
3136
3137 static void
3138 param_removal_cross_scc_edge (cgraph_edge *cs)
3139 {
3140 enum availability availability;
3141 cgraph_node *callee = cs->callee->function_symbol (&availability);
3142 isra_func_summary *to_ifs = func_sums->get (callee);
3143 if (!to_ifs || !to_ifs->m_candidate
3144 || (availability < AVAIL_AVAILABLE)
3145 || vec_safe_is_empty (to_ifs->m_parameters))
3146 {
3147 process_edge_to_unknown_caller (cs);
3148 return;
3149 }
3150 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3151 gcc_checking_assert (from_ifs);
3152
3153 isra_call_summary *csum = call_sums->get (cs);
3154 unsigned args_count = csum->m_arg_flow.length ();
3155 unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3156
3157 for (unsigned i = 0; i < args_count; i++)
3158 {
3159 bool unused_in_callee;
3160 if (i < param_count)
3161 unused_in_callee = (*to_ifs->m_parameters)[i].locally_unused;
3162 else
3163 unused_in_callee = false;
3164
3165 if (!unused_in_callee)
3166 {
3167 isra_param_flow *ipf = &csum->m_arg_flow[i];
3168 for (int j = 0; j < ipf->length; j++)
3169 {
3170 int input_idx = ipf->inputs[j];
3171 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3172 }
3173 }
3174 }
3175 }
3176
3177 /* Unless it is already there, push NODE which is also described by IFS to
3178 STACK. */
3179
3180 static void
3181 isra_push_node_to_stack (cgraph_node *node, isra_func_summary *ifs,
3182 vec<cgraph_node *> *stack)
3183 {
3184 if (!ifs->m_queued)
3185 {
3186 ifs->m_queued = true;
3187 stack->safe_push (node);
3188 }
3189 }
3190
3191 /* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3192 used and push CALLER on STACK. */
3193
3194 static void
3195 isra_mark_caller_param_used (isra_func_summary *from_ifs, int input_idx,
3196 cgraph_node *caller, vec<cgraph_node *> *stack)
3197 {
3198 if ((*from_ifs->m_parameters)[input_idx].locally_unused)
3199 {
3200 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3201 isra_push_node_to_stack (caller, from_ifs, stack);
3202 }
3203 }
3204
3205
3206 /* Propagate information that any parameter is not used only locally within a
3207 SCC across CS to the caller, which must be in the same SCC as the
3208 callee. Push any callers that need to be re-processed to STACK. */
3209
3210 static void
3211 propagate_used_across_scc_edge (cgraph_edge *cs, vec<cgraph_node *> *stack)
3212 {
3213 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3214 if (!from_ifs || vec_safe_is_empty (from_ifs->m_parameters))
3215 return;
3216
3217 isra_call_summary *csum = call_sums->get (cs);
3218 gcc_checking_assert (csum);
3219 unsigned args_count = csum->m_arg_flow.length ();
3220 enum availability availability;
3221 cgraph_node *callee = cs->callee->function_symbol (&availability);
3222 isra_func_summary *to_ifs = func_sums->get (callee);
3223
3224 unsigned param_count
3225 = (to_ifs && (availability >= AVAIL_AVAILABLE))
3226 ? vec_safe_length (to_ifs->m_parameters) : 0;
3227 for (unsigned i = 0; i < args_count; i++)
3228 {
3229 if (i < param_count
3230 && (*to_ifs->m_parameters)[i].locally_unused)
3231 continue;
3232
3233 /* The argument is needed in the callee it, we must mark the parameter as
3234 used also in the caller and its callers within this SCC. */
3235 isra_param_flow *ipf = &csum->m_arg_flow[i];
3236 for (int j = 0; j < ipf->length; j++)
3237 {
3238 int input_idx = ipf->inputs[j];
3239 isra_mark_caller_param_used (from_ifs, input_idx, cs->caller, stack);
3240 }
3241 }
3242 }
3243
3244 /* Propagate information that any parameter is not used only locally within a
3245 SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
3246 same SCC. Push any callers that need to be re-processed to STACK. */
3247
3248 static bool
3249 propagate_used_to_scc_callers (cgraph_node *node, void *data)
3250 {
3251 vec<cgraph_node *> *stack = (vec<cgraph_node *> *) data;
3252 cgraph_edge *cs;
3253 for (cs = node->callers; cs; cs = cs->next_caller)
3254 if (ipa_edge_within_scc (cs))
3255 propagate_used_across_scc_edge (cs, stack);
3256 return false;
3257 }
3258
3259 /* Return true iff all certain accesses in ARG_DESC are also present as
3260 certain accesses in PARAM_DESC. */
3261
3262 static bool
3263 all_callee_accesses_present_p (isra_param_desc *param_desc,
3264 isra_param_desc *arg_desc)
3265 {
3266 unsigned aclen = vec_safe_length (arg_desc->accesses);
3267 for (unsigned j = 0; j < aclen; j++)
3268 {
3269 param_access *argacc = (*arg_desc->accesses)[j];
3270 if (!argacc->certain)
3271 continue;
3272 param_access *pacc = find_param_access (param_desc, argacc->unit_offset,
3273 argacc->unit_size);
3274 if (!pacc || !pacc->certain)
3275 return false;
3276 }
3277 return true;
3278 }
3279
3280 /* Type internal to function pull_accesses_from_callee. Unfortunately gcc 4.8
3281 does not allow instantiating an auto_vec with a type defined within a
3282 function so it is a global type. */
3283 enum acc_prop_kind {ACC_PROP_DONT, ACC_PROP_COPY, ACC_PROP_CERTAIN};
3284
3285
3286 /* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3287 (which belongs to CALLER) if they would not violate some constraint there.
3288 If successful, return NULL, otherwise return the string reason for failure
3289 (which can be written to the dump file). DELTA_OFFSET is the known offset
3290 of the actual argument withing the formal parameter (so of ARG_DESCS within
3291 PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3292 known. In case of success, set *CHANGE_P to true if propagation actually
3293 changed anything. */
3294
3295 static const char *
3296 pull_accesses_from_callee (cgraph_node *caller, isra_param_desc *param_desc,
3297 isra_param_desc *arg_desc,
3298 unsigned delta_offset, unsigned arg_size,
3299 bool *change_p)
3300 {
3301 unsigned pclen = vec_safe_length (param_desc->accesses);
3302 unsigned aclen = vec_safe_length (arg_desc->accesses);
3303 unsigned prop_count = 0;
3304 unsigned prop_size = 0;
3305 bool change = false;
3306
3307 auto_vec <enum acc_prop_kind, 8> prop_kinds (aclen);
3308 for (unsigned j = 0; j < aclen; j++)
3309 {
3310 param_access *argacc = (*arg_desc->accesses)[j];
3311 prop_kinds.safe_push (ACC_PROP_DONT);
3312
3313 if (arg_size > 0
3314 && argacc->unit_offset + argacc->unit_size > arg_size)
3315 return "callee access outsize size boundary";
3316
3317 if (!argacc->certain)
3318 continue;
3319
3320 unsigned offset = argacc->unit_offset + delta_offset;
3321 /* Given that accesses are initially stored according to increasing
3322 offset and decreasing size in case of equal offsets, the following
3323 searches could be written more efficiently if we kept the ordering
3324 when copying. But the number of accesses is capped at
3325 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3326 messy quickly, so let's improve on that only if necessary. */
3327
3328 bool exact_match = false;
3329 for (unsigned i = 0; i < pclen; i++)
3330 {
3331 /* Check for overlaps. */
3332 param_access *pacc = (*param_desc->accesses)[i];
3333 if (pacc->unit_offset == offset
3334 && pacc->unit_size == argacc->unit_size)
3335 {
3336 if (argacc->alias_ptr_type != pacc->alias_ptr_type
3337 || !types_compatible_p (argacc->type, pacc->type))
3338 return "propagated access types would not match existing ones";
3339
3340 exact_match = true;
3341 if (!pacc->certain)
3342 {
3343 prop_kinds[j] = ACC_PROP_CERTAIN;
3344 prop_size += argacc->unit_size;
3345 change = true;
3346 }
3347 continue;
3348 }
3349
3350 if (offset < pacc->unit_offset + pacc->unit_size
3351 && offset + argacc->unit_size > pacc->unit_offset)
3352 {
3353 /* None permissible with load accesses, possible to fit into
3354 argument ones. */
3355 if (pacc->certain
3356 || offset < pacc->unit_offset
3357 || (offset + argacc->unit_size
3358 > pacc->unit_offset + pacc->unit_size))
3359 return "a propagated access would conflict in caller";
3360 }
3361 }
3362
3363 if (!exact_match)
3364 {
3365 prop_kinds[j] = ACC_PROP_COPY;
3366 prop_count++;
3367 prop_size += argacc->unit_size;
3368 change = true;
3369 }
3370 }
3371
3372 if (!change)
3373 return NULL;
3374
3375 if ((prop_count + pclen
3376 > (unsigned) opt_for_fn (caller->decl, param_ipa_sra_max_replacements))
3377 || size_would_violate_limit_p (param_desc,
3378 param_desc->size_reached + prop_size))
3379 return "propagating accesses would violate the count or size limit";
3380
3381 *change_p = true;
3382 for (unsigned j = 0; j < aclen; j++)
3383 {
3384 if (prop_kinds[j] == ACC_PROP_COPY)
3385 {
3386 param_access *argacc = (*arg_desc->accesses)[j];
3387
3388 param_access *copy = ggc_cleared_alloc<param_access> ();
3389 copy->unit_offset = argacc->unit_offset + delta_offset;
3390 copy->unit_size = argacc->unit_size;
3391 copy->type = argacc->type;
3392 copy->alias_ptr_type = argacc->alias_ptr_type;
3393 copy->certain = true;
3394 vec_safe_push (param_desc->accesses, copy);
3395 }
3396 else if (prop_kinds[j] == ACC_PROP_CERTAIN)
3397 {
3398 param_access *argacc = (*arg_desc->accesses)[j];
3399 param_access *csp
3400 = find_param_access (param_desc, argacc->unit_offset + delta_offset,
3401 argacc->unit_size);
3402 csp->certain = true;
3403 }
3404 }
3405
3406 param_desc->size_reached += prop_size;
3407
3408 return NULL;
3409 }
3410
3411 /* Propagate parameter splitting information through call graph edge CS.
3412 Return true if any changes that might need to be propagated within SCCs have
3413 been made. The function also clears the aggregate_pass_through and
3414 pointer_pass_through in call summaries which do not need to be processed
3415 again if this CS is revisited when iterating while changes are propagated
3416 within an SCC. */
3417
3418 static bool
3419 param_splitting_across_edge (cgraph_edge *cs)
3420 {
3421 bool res = false;
3422 bool cross_scc = !ipa_edge_within_scc (cs);
3423 enum availability availability;
3424 cgraph_node *callee = cs->callee->function_symbol (&availability);
3425 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3426 gcc_checking_assert (from_ifs && from_ifs->m_parameters);
3427
3428 isra_call_summary *csum = call_sums->get (cs);
3429 gcc_checking_assert (csum);
3430 unsigned args_count = csum->m_arg_flow.length ();
3431 isra_func_summary *to_ifs = func_sums->get (callee);
3432 unsigned param_count
3433 = ((to_ifs && to_ifs->m_candidate && (availability >= AVAIL_AVAILABLE))
3434 ? vec_safe_length (to_ifs->m_parameters)
3435 : 0);
3436
3437 if (dump_file && (dump_flags & TDF_DETAILS))
3438 fprintf (dump_file, "Splitting across %s->%s:\n",
3439 cs->caller->dump_name (), callee->dump_name ());
3440
3441 unsigned i;
3442 for (i = 0; (i < args_count) && (i < param_count); i++)
3443 {
3444 isra_param_desc *arg_desc = &(*to_ifs->m_parameters)[i];
3445 isra_param_flow *ipf = &csum->m_arg_flow[i];
3446
3447 if (arg_desc->locally_unused)
3448 {
3449 if (dump_file && (dump_flags & TDF_DETAILS))
3450 fprintf (dump_file, " ->%u: unused in callee\n", i);
3451 ipf->pointer_pass_through = false;
3452 continue;
3453 }
3454
3455 if (ipf->pointer_pass_through)
3456 {
3457 int idx = get_single_param_flow_source (ipf);
3458 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3459 if (!param_desc->split_candidate)
3460 continue;
3461 gcc_assert (param_desc->by_ref);
3462
3463 if (!arg_desc->split_candidate || !arg_desc->by_ref)
3464 {
3465 if (dump_file && (dump_flags & TDF_DETAILS))
3466 fprintf (dump_file, " %u->%u: not candidate or not by "
3467 "reference in callee\n", idx, i);
3468 param_desc->split_candidate = false;
3469 ipf->pointer_pass_through = false;
3470 res = true;
3471 }
3472 else if (!ipf->safe_to_import_accesses)
3473 {
3474 if (!all_callee_accesses_present_p (param_desc, arg_desc))
3475 {
3476 if (dump_file && (dump_flags & TDF_DETAILS))
3477 fprintf (dump_file, " %u->%u: cannot import accesses.\n",
3478 idx, i);
3479 param_desc->split_candidate = false;
3480 ipf->pointer_pass_through = false;
3481 res = true;
3482
3483 }
3484 else
3485 {
3486 if (dump_file && (dump_flags & TDF_DETAILS))
3487 fprintf (dump_file, " %u->%u: verified callee accesses "
3488 "present.\n", idx, i);
3489 if (cross_scc)
3490 ipf->pointer_pass_through = false;
3491 }
3492 }
3493 else
3494 {
3495 const char *pull_failure
3496 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3497 0, 0, &res);
3498 if (pull_failure)
3499 {
3500 if (dump_file && (dump_flags & TDF_DETAILS))
3501 fprintf (dump_file, " %u->%u: by_ref access pull "
3502 "failed: %s.\n", idx, i, pull_failure);
3503 param_desc->split_candidate = false;
3504 ipf->pointer_pass_through = false;
3505 res = true;
3506 }
3507 else
3508 {
3509 if (dump_file && (dump_flags & TDF_DETAILS))
3510 fprintf (dump_file, " %u->%u: by_ref access pull "
3511 "succeeded.\n", idx, i);
3512 if (cross_scc)
3513 ipf->pointer_pass_through = false;
3514 }
3515 }
3516 }
3517 else if (ipf->aggregate_pass_through)
3518 {
3519 int idx = get_single_param_flow_source (ipf);
3520 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3521 if (!param_desc->split_candidate)
3522 continue;
3523 gcc_assert (!param_desc->by_ref);
3524 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3525 ipf->unit_size);
3526 gcc_checking_assert (pacc);
3527
3528 if (pacc->certain)
3529 {
3530 if (dump_file && (dump_flags & TDF_DETAILS))
3531 fprintf (dump_file, " %u->%u: already certain\n", idx, i);
3532 ipf->aggregate_pass_through = false;
3533 }
3534 else if (!arg_desc->split_candidate || arg_desc->by_ref)
3535 {
3536 if (dump_file && (dump_flags & TDF_DETAILS))
3537 fprintf (dump_file, " %u->%u: not candidate or by "
3538 "reference in callee\n", idx, i);
3539
3540 pacc->certain = true;
3541 if (overlapping_certain_accesses_p (param_desc, NULL))
3542 {
3543 if (dump_file && (dump_flags & TDF_DETAILS))
3544 fprintf (dump_file, " ...leading to overlap, "
3545 " disqualifying candidate parameter %u\n",
3546 idx);
3547 param_desc->split_candidate = false;
3548 }
3549 else
3550 bump_reached_size (param_desc, pacc->unit_size, idx);
3551
3552 ipf->aggregate_pass_through = false;
3553 res = true;
3554 }
3555 else
3556 {
3557 const char *pull_failure
3558 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3559 ipf->unit_offset,
3560 ipf->unit_size, &res);
3561 if (pull_failure)
3562 {
3563 if (dump_file && (dump_flags & TDF_DETAILS))
3564 fprintf (dump_file, " %u->%u: arg access pull "
3565 "failed: %s.\n", idx, i, pull_failure);
3566
3567 ipf->aggregate_pass_through = false;
3568 pacc->certain = true;
3569
3570 if (overlapping_certain_accesses_p (param_desc, NULL))
3571 {
3572 if (dump_file && (dump_flags & TDF_DETAILS))
3573 fprintf (dump_file, " ...leading to overlap, "
3574 " disqualifying candidate parameter %u\n",
3575 idx);
3576 param_desc->split_candidate = false;
3577 }
3578 else
3579 bump_reached_size (param_desc, pacc->unit_size, idx);
3580
3581 res = true;
3582 }
3583 else
3584 {
3585 if (dump_file && (dump_flags & TDF_DETAILS))
3586 fprintf (dump_file, " %u->%u: arg access pull "
3587 "succeeded.\n", idx, i);
3588 if (cross_scc)
3589 ipf->aggregate_pass_through = false;
3590 }
3591 }
3592 }
3593 }
3594
3595 /* Handle argument-parameter count mismatches. */
3596 for (; (i < args_count); i++)
3597 {
3598 isra_param_flow *ipf = &csum->m_arg_flow[i];
3599
3600 if (ipf->pointer_pass_through || ipf->aggregate_pass_through)
3601 {
3602 int idx = get_single_param_flow_source (ipf);
3603 ipf->pointer_pass_through = false;
3604 ipf->aggregate_pass_through = false;
3605 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3606 if (!param_desc->split_candidate)
3607 continue;
3608
3609 if (dump_file && (dump_flags & TDF_DETAILS))
3610 fprintf (dump_file, " %u->%u: no corresponding formal parameter\n",
3611 idx, i);
3612 param_desc->split_candidate = false;
3613 res = true;
3614 }
3615 }
3616 return res;
3617 }
3618
3619 /* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3620 callers ignore the return value, or come from the same SCC and use the
3621 return value only to compute their return value, return false, otherwise
3622 return true. */
3623
3624 static bool
3625 retval_used_p (cgraph_node *node, void *)
3626 {
3627 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3628 {
3629 isra_call_summary *csum = call_sums->get (cs);
3630 gcc_checking_assert (csum);
3631 if (csum->m_return_ignored)
3632 continue;
3633 if (!csum->m_return_returned)
3634 return true;
3635
3636 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3637 if (!from_ifs || !from_ifs->m_candidate)
3638 return true;
3639
3640 if (!ipa_edge_within_scc (cs)
3641 && !from_ifs->m_return_ignored)
3642 return true;
3643 }
3644
3645 return false;
3646 }
3647
3648 /* Push into NEW_PARAMS all required parameter adjustment entries to copy or
3649 modify parameter which originally had index BASE_INDEX, in the adjustment
3650 vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
3651 PREV_ADJUSTMENT. If the parent clone is the original function,
3652 PREV_ADJUSTMENT is NULL and PREV_CLONE_INDEX is equal to BASE_INDEX. */
3653
3654
3655 static void
3656 push_param_adjustments_for_index (isra_func_summary *ifs, unsigned base_index,
3657 unsigned prev_clone_index,
3658 ipa_adjusted_param *prev_adjustment,
3659 vec<ipa_adjusted_param, va_gc> **new_params)
3660 {
3661 isra_param_desc *desc = &(*ifs->m_parameters)[base_index];
3662 if (desc->locally_unused)
3663 {
3664 if (dump_file)
3665 fprintf (dump_file, " Will remove parameter %u\n", base_index);
3666 return;
3667 }
3668
3669 if (!desc->split_candidate)
3670 {
3671 ipa_adjusted_param adj;
3672 if (prev_adjustment)
3673 {
3674 adj = *prev_adjustment;
3675 adj.prev_clone_adjustment = true;
3676 adj.prev_clone_index = prev_clone_index;
3677 }
3678 else
3679 {
3680 memset (&adj, 0, sizeof (adj));
3681 adj.op = IPA_PARAM_OP_COPY;
3682 adj.base_index = base_index;
3683 adj.prev_clone_index = prev_clone_index;
3684 }
3685 vec_safe_push ((*new_params), adj);
3686 return;
3687 }
3688
3689 if (dump_file)
3690 fprintf (dump_file, " Will split parameter %u\n", base_index);
3691
3692 gcc_assert (!prev_adjustment || prev_adjustment->op == IPA_PARAM_OP_COPY);
3693 unsigned aclen = vec_safe_length (desc->accesses);
3694 for (unsigned j = 0; j < aclen; j++)
3695 {
3696 param_access *pa = (*desc->accesses)[j];
3697 if (!pa->certain)
3698 continue;
3699 if (dump_file)
3700 fprintf (dump_file, " - component at byte offset %u, "
3701 "size %u\n", pa->unit_offset, pa->unit_size);
3702
3703 ipa_adjusted_param adj;
3704 memset (&adj, 0, sizeof (adj));
3705 adj.op = IPA_PARAM_OP_SPLIT;
3706 adj.base_index = base_index;
3707 adj.prev_clone_index = prev_clone_index;
3708 adj.param_prefix_index = IPA_PARAM_PREFIX_ISRA;
3709 adj.reverse = pa->reverse;
3710 adj.type = pa->type;
3711 adj.alias_ptr_type = pa->alias_ptr_type;
3712 adj.unit_offset = pa->unit_offset;
3713 vec_safe_push ((*new_params), adj);
3714 }
3715 }
3716
3717 /* Worker for all call_for_symbol_thunks_and_aliases. Set calls_comdat_local
3718 flag of all callers of NODE. */
3719
3720 static bool
3721 mark_callers_calls_comdat_local (struct cgraph_node *node, void *)
3722 {
3723 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3724 cs->caller->calls_comdat_local = true;
3725 return false;
3726 }
3727
3728
3729 /* Do final processing of results of IPA propagation regarding NODE, clone it
3730 if appropriate. */
3731
3732 static void
3733 process_isra_node_results (cgraph_node *node,
3734 hash_map<const char *, unsigned> *clone_num_suffixes)
3735 {
3736 isra_func_summary *ifs = func_sums->get (node);
3737 if (!ifs || !ifs->m_candidate)
3738 return;
3739
3740 auto_vec<bool, 16> surviving_params;
3741 bool check_surviving = false;
3742 if (node->clone.param_adjustments)
3743 {
3744 check_surviving = true;
3745 node->clone.param_adjustments->get_surviving_params (&surviving_params);
3746 }
3747
3748 unsigned param_count = vec_safe_length (ifs->m_parameters);
3749 bool will_change_function = false;
3750 if (ifs->m_returns_value && ifs->m_return_ignored)
3751 will_change_function = true;
3752 else
3753 for (unsigned i = 0; i < param_count; i++)
3754 {
3755 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3756 if ((desc->locally_unused || desc->split_candidate)
3757 /* Make sure we do not clone just to attempt to remove an already
3758 removed unused argument. */
3759 && (!check_surviving
3760 || (i < surviving_params.length ()
3761 && surviving_params[i])))
3762 {
3763 will_change_function = true;
3764 break;
3765 }
3766 }
3767 if (!will_change_function)
3768 return;
3769
3770 if (dump_file)
3771 {
3772 fprintf (dump_file, "\nEvaluating analysis results for %s\n",
3773 node->dump_name ());
3774 if (ifs->m_returns_value && ifs->m_return_ignored)
3775 fprintf (dump_file, " Will remove return value.\n");
3776 }
3777
3778 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
3779 if (ipa_param_adjustments *old_adjustments = node->clone.param_adjustments)
3780 {
3781 unsigned old_adj_len = vec_safe_length (old_adjustments->m_adj_params);
3782 for (unsigned i = 0; i < old_adj_len; i++)
3783 {
3784 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
3785 push_param_adjustments_for_index (ifs, old_adj->base_index, i,
3786 old_adj, &new_params);
3787 }
3788 }
3789 else
3790 for (unsigned i = 0; i < param_count; i++)
3791 push_param_adjustments_for_index (ifs, i, i, NULL, &new_params);
3792
3793 ipa_param_adjustments *new_adjustments
3794 = (new (ggc_alloc <ipa_param_adjustments> ())
3795 ipa_param_adjustments (new_params, param_count,
3796 ifs->m_returns_value && ifs->m_return_ignored));
3797
3798 if (dump_file && (dump_flags & TDF_DETAILS))
3799 {
3800 fprintf (dump_file, "\n Created adjustments:\n");
3801 new_adjustments->dump (dump_file);
3802 }
3803
3804 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
3805 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
3806 node->decl)));
3807 vec<cgraph_edge *> callers = node->collect_callers ();
3808 cgraph_node *new_node
3809 = node->create_virtual_clone (callers, NULL, new_adjustments, "isra",
3810 suffix_counter);
3811 suffix_counter++;
3812 if (node->calls_comdat_local && node->same_comdat_group)
3813 {
3814 new_node->add_to_same_comdat_group (node);
3815 new_node->call_for_symbol_and_aliases (mark_callers_calls_comdat_local,
3816 NULL, true);
3817 }
3818 new_node->calls_comdat_local = node->calls_comdat_local;
3819
3820 if (dump_file)
3821 fprintf (dump_file, " Created new node %s\n", new_node->dump_name ());
3822 callers.release ();
3823 }
3824
3825 /* Check which parameters of NODE described by IFS have survived until IPA-SRA
3826 and disable transformations for those which have not or which should not
3827 transformed because the associated debug counter reached its limit. Return
3828 true if none survived or if there were no candidates to begin with. */
3829
3830 static bool
3831 disable_unavailable_parameters (cgraph_node *node, isra_func_summary *ifs)
3832 {
3833 bool ret = true;
3834 unsigned len = vec_safe_length (ifs->m_parameters);
3835 if (!len)
3836 return true;
3837
3838 auto_vec<bool, 16> surviving_params;
3839 bool check_surviving = false;
3840 if (node->clone.param_adjustments)
3841 {
3842 check_surviving = true;
3843 node->clone.param_adjustments->get_surviving_params (&surviving_params);
3844 }
3845 bool dumped_first = false;
3846 for (unsigned i = 0; i < len; i++)
3847 {
3848 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3849 if (!dbg_cnt (ipa_sra_params))
3850 {
3851 desc->locally_unused = false;
3852 desc->split_candidate = false;
3853 }
3854 else if (check_surviving
3855 && (i >= surviving_params.length ()
3856 || !surviving_params[i]))
3857 {
3858 /* Even if the parameter was removed by a previous IPA pass, we do
3859 not clear locally_unused because if it really is unused, this
3860 information might be useful in callers. */
3861 desc->split_candidate = false;
3862
3863 if (dump_file && (dump_flags & TDF_DETAILS))
3864 {
3865 if (!dumped_first)
3866 {
3867 fprintf (dump_file,
3868 "The following parameters of %s are dead on "
3869 "arrival:", node->dump_name ());
3870 dumped_first = true;
3871 }
3872 fprintf (dump_file, " %u", i);
3873 }
3874 }
3875 else if (desc->locally_unused || desc->split_candidate)
3876 ret = false;
3877 }
3878
3879 if (dumped_first)
3880 fprintf (dump_file, "\n");
3881
3882 return ret;
3883 }
3884
3885
3886 /* Run the interprocedural part of IPA-SRA. */
3887
3888 static unsigned int
3889 ipa_sra_analysis (void)
3890 {
3891 if (dump_file)
3892 {
3893 fprintf (dump_file, "\n========== IPA-SRA IPA stage ==========\n");
3894 ipa_sra_dump_all_summaries (dump_file);
3895 }
3896
3897 gcc_checking_assert (func_sums);
3898 gcc_checking_assert (call_sums);
3899 cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
3900 auto_vec <cgraph_node *, 16> stack;
3901 int node_scc_count = ipa_reduced_postorder (order, true, NULL);
3902
3903 /* One sweep from callees to callers for parameter removal and splitting. */
3904 for (int i = 0; i < node_scc_count; i++)
3905 {
3906 cgraph_node *scc_rep = order[i];
3907 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3908 unsigned j;
3909
3910 /* Preliminary IPA function level checks and first step of parameter
3911 removal. */
3912 cgraph_node *v;
3913 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3914 {
3915 isra_func_summary *ifs = func_sums->get (v);
3916 if (!ifs || !ifs->m_candidate)
3917 continue;
3918 if (!ipa_sra_ipa_function_checks (v)
3919 || check_all_callers_for_issues (v))
3920 {
3921 ifs->zap ();
3922 continue;
3923 }
3924 if (disable_unavailable_parameters (v, ifs))
3925 continue;
3926 for (cgraph_edge *cs = v->indirect_calls; cs; cs = cs->next_callee)
3927 process_edge_to_unknown_caller (cs);
3928 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3929 if (!ipa_edge_within_scc (cs))
3930 param_removal_cross_scc_edge (cs);
3931 }
3932
3933 /* Look at edges within the current SCC and propagate used-ness across
3934 them, pushing onto the stack all notes which might need to be
3935 revisited. */
3936 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3937 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3938 &stack, true);
3939
3940 /* Keep revisiting and pushing until nothing changes. */
3941 while (!stack.is_empty ())
3942 {
3943 cgraph_node *v = stack.pop ();
3944 isra_func_summary *ifs = func_sums->get (v);
3945 gcc_checking_assert (ifs && ifs->m_queued);
3946 ifs->m_queued = false;
3947
3948 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3949 &stack, true);
3950 }
3951
3952 /* Parameter splitting. */
3953 bool repeat_scc_access_propagation;
3954 do
3955 {
3956 repeat_scc_access_propagation = false;
3957 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3958 {
3959 isra_func_summary *ifs = func_sums->get (v);
3960 if (!ifs
3961 || !ifs->m_candidate
3962 || vec_safe_is_empty (ifs->m_parameters))
3963 continue;
3964 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3965 if (param_splitting_across_edge (cs))
3966 repeat_scc_access_propagation = true;
3967 }
3968 }
3969 while (repeat_scc_access_propagation);
3970
3971 if (flag_checking)
3972 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3973 verify_splitting_accesses (v, true);
3974
3975 cycle_nodes.release ();
3976 }
3977
3978 /* One sweep from caller to callees for result removal. */
3979 for (int i = node_scc_count - 1; i >= 0 ; i--)
3980 {
3981 cgraph_node *scc_rep = order[i];
3982 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3983 unsigned j;
3984
3985 cgraph_node *v;
3986 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3987 {
3988 isra_func_summary *ifs = func_sums->get (v);
3989 if (!ifs || !ifs->m_candidate)
3990 continue;
3991
3992 bool return_needed
3993 = (ifs->m_returns_value
3994 && (!dbg_cnt (ipa_sra_retvalues)
3995 || v->call_for_symbol_and_aliases (retval_used_p,
3996 NULL, true)));
3997 ifs->m_return_ignored = !return_needed;
3998 if (return_needed)
3999 isra_push_node_to_stack (v, ifs, &stack);
4000 }
4001
4002 while (!stack.is_empty ())
4003 {
4004 cgraph_node *node = stack.pop ();
4005 isra_func_summary *ifs = func_sums->get (node);
4006 gcc_checking_assert (ifs && ifs->m_queued);
4007 ifs->m_queued = false;
4008
4009 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4010 if (ipa_edge_within_scc (cs)
4011 && call_sums->get (cs)->m_return_returned)
4012 {
4013 enum availability av;
4014 cgraph_node *callee = cs->callee->function_symbol (&av);
4015 isra_func_summary *to_ifs = func_sums->get (callee);
4016 if (to_ifs && to_ifs->m_return_ignored)
4017 {
4018 to_ifs->m_return_ignored = false;
4019 isra_push_node_to_stack (callee, to_ifs, &stack);
4020 }
4021 }
4022 }
4023 cycle_nodes.release ();
4024 }
4025
4026 ipa_free_postorder_info ();
4027 free (order);
4028
4029 if (dump_file)
4030 {
4031 if (dump_flags & TDF_DETAILS)
4032 {
4033 fprintf (dump_file, "\n========== IPA-SRA propagation final state "
4034 " ==========\n");
4035 ipa_sra_dump_all_summaries (dump_file);
4036 }
4037 fprintf (dump_file, "\n========== IPA-SRA decisions ==========\n");
4038 }
4039
4040 hash_map<const char *, unsigned> *clone_num_suffixes
4041 = new hash_map<const char *, unsigned>;
4042
4043 cgraph_node *node;
4044 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
4045 process_isra_node_results (node, clone_num_suffixes);
4046
4047 delete clone_num_suffixes;
4048 ggc_delete (func_sums);
4049 func_sums = NULL;
4050 delete call_sums;
4051 call_sums = NULL;
4052
4053 if (dump_file)
4054 fprintf (dump_file, "\n========== IPA SRA IPA analysis done "
4055 "==========\n\n");
4056 return 0;
4057 }
4058
4059
4060 const pass_data pass_data_ipa_sra =
4061 {
4062 IPA_PASS, /* type */
4063 "sra", /* name */
4064 OPTGROUP_NONE, /* optinfo_flags */
4065 TV_IPA_SRA, /* tv_id */
4066 0, /* properties_required */
4067 0, /* properties_provided */
4068 0, /* properties_destroyed */
4069 0, /* todo_flags_start */
4070 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
4071 };
4072
4073 class pass_ipa_sra : public ipa_opt_pass_d
4074 {
4075 public:
4076 pass_ipa_sra (gcc::context *ctxt)
4077 : ipa_opt_pass_d (pass_data_ipa_sra, ctxt,
4078 ipa_sra_generate_summary, /* generate_summary */
4079 ipa_sra_write_summary, /* write_summary */
4080 ipa_sra_read_summary, /* read_summary */
4081 NULL , /* write_optimization_summary */
4082 NULL, /* read_optimization_summary */
4083 NULL, /* stmt_fixup */
4084 0, /* function_transform_todo_flags_start */
4085 NULL, /* function_transform */
4086 NULL) /* variable_transform */
4087 {}
4088
4089 /* opt_pass methods: */
4090 virtual bool gate (function *)
4091 {
4092 /* TODO: We should remove the optimize check after we ensure we never run
4093 IPA passes when not optimizing. */
4094 return (flag_ipa_sra && optimize);
4095 }
4096
4097 virtual unsigned int execute (function *) { return ipa_sra_analysis (); }
4098
4099 }; // class pass_ipa_sra
4100
4101 } // anon namespace
4102
4103 ipa_opt_pass_d *
4104 make_pass_ipa_sra (gcc::context *ctxt)
4105 {
4106 return new pass_ipa_sra (ctxt);
4107 }
4108
4109
4110 #include "gt-ipa-sra.h"