]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/ipa-sra.c
PR modula2/114929 for loop fails to iterate down to zero when using a cardinal type
[thirdparty/gcc.git] / gcc / ipa-sra.c
1 /* Interprocedural scalar replacement of aggregates
2 Copyright (C) 2008-2020 Free Software Foundation, Inc.
3
4 Contributed by Martin Jambor <mjambor@suse.cz>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* IPA-SRA is an interprocedural pass that removes unused function return
23 values (turning functions returning a value which is never used into void
24 functions), removes unused function parameters. It can also replace an
25 aggregate parameter by a set of other parameters representing part of the
26 original, turning those passed by reference into new ones which pass the
27 value directly.
28
29 The pass is a true IPA one, which means that it works in three stages in
30 order to be able to take advantage of LTO. First, summaries about functions
31 and each calls are generated. Function summaries (often called call graph
32 node summaries) contain mainly information about which parameters are
33 potential transformation candidates and which bits of candidates are
34 accessed. We differentiate between accesses done as a part of a call
35 statement (which might be not necessary if the callee is also transformed)
36 and others (which are mandatory). Call summaries (often called call graph
37 edge summaries) contain information about which function formal parameters
38 feed into which actual call arguments so that if two parameters are only
39 used in a sum which is then passed to another function which then however
40 does not use this parameter, all three parameters of the two functions can
41 be eliminated. Edge summaries also have flags whether the return value is
42 used or if it is only returned in the caller too. In LTO mode these
43 summaries are then streamed to the object file in the compilation phase and
44 streamed back in in the WPA analysis stage.
45
46 The interprocedural analysis phase traverses the graph in topological order
47 in two sweeps, one in each direction. First, from callees to callers for
48 parameter removal and splitting. Each strongly-connected component is
49 processed iteratively until the situation in it stabilizes. The pass from
50 callers to callees is then carried out to remove unused return values in a
51 very similar fashion.
52
53 Because parameter manipulation has big implications for call redirection
54 which is done only after all call graph nodes materialize, the
55 transformation phase is not part of this patch but is carried out by the
56 clone materialization and edge redirection itself, see comments in
57 ipa-param-manipulation.h for more details. */
58
59
60
61 #include "config.h"
62 #include "system.h"
63 #include "coretypes.h"
64 #include "backend.h"
65 #include "tree.h"
66 #include "gimple.h"
67 #include "predict.h"
68 #include "tree-pass.h"
69 #include "ssa.h"
70 #include "cgraph.h"
71 #include "gimple-pretty-print.h"
72 #include "alias.h"
73 #include "tree-eh.h"
74 #include "gimple-iterator.h"
75 #include "gimple-walk.h"
76 #include "tree-dfa.h"
77 #include "tree-sra.h"
78 #include "alloc-pool.h"
79 #include "symbol-summary.h"
80 #include "dbgcnt.h"
81 #include "tree-inline.h"
82 #include "ipa-utils.h"
83 #include "builtins.h"
84 #include "cfganal.h"
85 #include "tree-streamer.h"
86 #include "internal-fn.h"
87
88 /* Bits used to track size of an aggregate in bytes interprocedurally. */
89 #define ISRA_ARG_SIZE_LIMIT_BITS 16
90 #define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
91 /* How many parameters can feed into a call actual argument and still be
92 tracked. */
93 #define IPA_SRA_MAX_PARAM_FLOW_LEN 7
94
95 /* Structure describing accesses to a specific portion of an aggregate
96 parameter, as given by the offset and size. Any smaller accesses that occur
97 within a function that fall within another access form a tree. The pass
98 cannot analyze parameters with only partially overlapping accesses. */
99
100 struct GTY(()) param_access
101 {
102 /* Type that a potential replacement should have. This field only has
103 meaning in the summary building and transformation phases, when it is
104 reconstructed from the body. Must not be touched in IPA analysis
105 stage. */
106 tree type;
107
108 /* Alias reference type to be used in MEM_REFs when adjusting caller
109 arguments. */
110 tree alias_ptr_type;
111
112 /* Values returned by get_ref_base_and_extent but converted to bytes and
113 stored as unsigned ints. */
114 unsigned unit_offset;
115 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
116
117 /* Set once we are sure that the access will really end up in a potentially
118 transformed function - initially not set for portions of formal parameters
119 that are only used as actual function arguments passed to callees. */
120 unsigned certain : 1;
121 /* Set if the access has a reversed scalar storage order. */
122 unsigned reverse : 1;
123 };
124
125 /* This structure has the same purpose as the one above and additionally it
126 contains some fields that are only necessary in the summary generation
127 phase. */
128
129 struct gensum_param_access
130 {
131 /* Values returned by get_ref_base_and_extent. */
132 HOST_WIDE_INT offset;
133 HOST_WIDE_INT size;
134
135 /* if this access has any children (in terms of the definition above), this
136 points to the first one. */
137 struct gensum_param_access *first_child;
138 /* In intraprocedural SRA, pointer to the next sibling in the access tree as
139 described above. */
140 struct gensum_param_access *next_sibling;
141
142 /* Type that a potential replacement should have. This field only has
143 meaning in the summary building and transformation phases, when it is
144 reconstructed from the body. Must not be touched in IPA analysis
145 stage. */
146 tree type;
147 /* Alias reference type to be used in MEM_REFs when adjusting caller
148 arguments. */
149 tree alias_ptr_type;
150
151 /* Have there been writes to or reads from this exact location except for as
152 arguments to a function call that can be tracked. */
153 bool nonarg;
154
155 /* Set if the access has a reversed scalar storage order. */
156 bool reverse;
157 };
158
159 /* Summary describing a parameter in the IPA stages. */
160
161 struct GTY(()) isra_param_desc
162 {
163 /* List of access representatives to the parameters, sorted according to
164 their offset. */
165 vec <param_access *, va_gc> *accesses;
166
167 /* Unit size limit of total size of all replacements. */
168 unsigned param_size_limit : ISRA_ARG_SIZE_LIMIT_BITS;
169 /* Sum of unit sizes of all certain replacements. */
170 unsigned size_reached : ISRA_ARG_SIZE_LIMIT_BITS;
171
172 /* A parameter that is used only in call arguments and can be removed if all
173 concerned actual arguments are removed. */
174 unsigned locally_unused : 1;
175 /* An aggregate that is a candidate for breaking up or complete removal. */
176 unsigned split_candidate : 1;
177 /* Is this a parameter passing stuff by reference? */
178 unsigned by_ref : 1;
179 };
180
181 /* Structure used when generating summaries that describes a parameter. */
182
183 struct gensum_param_desc
184 {
185 /* Roots of param_accesses. */
186 gensum_param_access *accesses;
187 /* Number of accesses in the access tree rooted in field accesses. */
188 unsigned access_count;
189
190 /* If the below is non-zero, this is the number of uses as actual arguments. */
191 int call_uses;
192 /* Number of times this parameter has been directly passed to. */
193 unsigned ptr_pt_count;
194
195 /* Size limit of total size of all replacements. */
196 unsigned param_size_limit;
197 /* Sum of sizes of nonarg accesses. */
198 unsigned nonarg_acc_size;
199
200 /* A parameter that is used only in call arguments and can be removed if all
201 concerned actual arguments are removed. */
202 bool locally_unused;
203 /* An aggregate that is a candidate for breaking up or a pointer passing data
204 by reference that is a candidate for being converted to a set of
205 parameters passing those data by value. */
206 bool split_candidate;
207 /* Is this a parameter passing stuff by reference? */
208 bool by_ref;
209
210 /* The number of this parameter as they are ordered in function decl. */
211 int param_number;
212 /* For parameters passing data by reference, this is parameter index to
213 compute indices to bb_dereferences. */
214 int deref_index;
215 };
216
217 /* Properly deallocate accesses of DESC. TODO: Since this data structure is
218 not in GC memory, this is not necessary and we can consider removing the
219 function. */
220
221 static void
222 free_param_decl_accesses (isra_param_desc *desc)
223 {
224 unsigned len = vec_safe_length (desc->accesses);
225 for (unsigned i = 0; i < len; ++i)
226 ggc_free ((*desc->accesses)[i]);
227 vec_free (desc->accesses);
228 }
229
230 /* Class used to convey information about functions from the
231 intra-procedural analysis stage to inter-procedural one. */
232
233 class GTY((for_user)) isra_func_summary
234 {
235 public:
236 /* initialize the object. */
237
238 isra_func_summary ()
239 : m_parameters (NULL), m_candidate (false), m_returns_value (false),
240 m_return_ignored (false), m_queued (false)
241 {}
242
243 /* Destroy m_parameters. */
244
245 ~isra_func_summary ();
246
247 /* Mark the function as not a candidate for any IPA-SRA transformation.
248 Return true if it was a candidate until now. */
249
250 bool zap ();
251
252 /* Vector of parameter descriptors corresponding to the function being
253 analyzed. */
254 vec<isra_param_desc, va_gc> *m_parameters;
255
256 /* Whether the node is even a candidate for any IPA-SRA transformation at
257 all. */
258 unsigned m_candidate : 1;
259
260 /* Whether the original function returns any value. */
261 unsigned m_returns_value : 1;
262
263 /* Set to true if all call statements do not actually use the returned
264 value. */
265
266 unsigned m_return_ignored : 1;
267
268 /* Whether the node is already queued in IPA SRA stack during processing of
269 call graphs SCCs. */
270
271 unsigned m_queued : 1;
272 };
273
274 /* Clean up and deallocate isra_func_summary points to. TODO: Since this data
275 structure is not in GC memory, this is not necessary and we can consider
276 removing the destructor. */
277
278 isra_func_summary::~isra_func_summary ()
279 {
280 unsigned len = vec_safe_length (m_parameters);
281 for (unsigned i = 0; i < len; ++i)
282 free_param_decl_accesses (&(*m_parameters)[i]);
283 vec_free (m_parameters);
284 }
285
286
287 /* Mark the function as not a candidate for any IPA-SRA transformation. Return
288 true if it was a candidate until now. */
289
290 bool
291 isra_func_summary::zap ()
292 {
293 bool ret = m_candidate;
294 m_candidate = false;
295
296 unsigned len = vec_safe_length (m_parameters);
297 for (unsigned i = 0; i < len; ++i)
298 free_param_decl_accesses (&(*m_parameters)[i]);
299 vec_free (m_parameters);
300
301 return ret;
302 }
303
304 /* Structure to describe which formal parameters feed into a particular actual
305 arguments. */
306
307 struct isra_param_flow
308 {
309 /* Number of elements in array inputs that contain valid data. */
310 char length;
311 /* Indices of formal parameters that feed into the described actual argument.
312 If aggregate_pass_through or pointer_pass_through below are true, it must
313 contain exactly one element which is passed through from a formal
314 parameter if the given number. Otherwise, the array contains indices of
315 callee's formal parameters which are used to calculate value of this
316 actual argument. */
317 unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN];
318
319 /* Offset within the formal parameter. */
320 unsigned unit_offset;
321 /* Size of the portion of the formal parameter that is being passed. */
322 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
323
324 /* True when the value of this actual copy is a portion of a formal
325 parameter. */
326 unsigned aggregate_pass_through : 1;
327 /* True when the value of this actual copy is a verbatim pass through of an
328 obtained pointer. */
329 unsigned pointer_pass_through : 1;
330 /* True when it is safe to copy access candidates here from the callee, which
331 would mean introducing dereferences into callers of the caller. */
332 unsigned safe_to_import_accesses : 1;
333 };
334
335 /* Structure used to convey information about calls from the intra-procedural
336 analysis stage to inter-procedural one. */
337
338 class isra_call_summary
339 {
340 public:
341 isra_call_summary ()
342 : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
343 m_bit_aligned_arg (false)
344 {}
345
346 void init_inputs (unsigned arg_count);
347 void dump (FILE *f);
348
349 /* Information about what formal parameters of the caller are used to compute
350 individual actual arguments of this call. */
351 auto_vec <isra_param_flow> m_arg_flow;
352
353 /* Set to true if the call statement does not have a LHS. */
354 unsigned m_return_ignored : 1;
355
356 /* Set to true if the LHS of call statement is only used to construct the
357 return value of the caller. */
358 unsigned m_return_returned : 1;
359
360 /* Set when any of the call arguments are not byte-aligned. */
361 unsigned m_bit_aligned_arg : 1;
362 };
363
364 /* Class to manage function summaries. */
365
366 class GTY((user)) ipa_sra_function_summaries
367 : public function_summary <isra_func_summary *>
368 {
369 public:
370 ipa_sra_function_summaries (symbol_table *table, bool ggc):
371 function_summary<isra_func_summary *> (table, ggc) { }
372
373 virtual void duplicate (cgraph_node *, cgraph_node *,
374 isra_func_summary *old_sum,
375 isra_func_summary *new_sum);
376 };
377
378 /* Hook that is called by summary when a node is duplicated. */
379
380 void
381 ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *,
382 isra_func_summary *old_sum,
383 isra_func_summary *new_sum)
384 {
385 /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
386 useless. */
387 new_sum->m_candidate = old_sum->m_candidate;
388 new_sum->m_returns_value = old_sum->m_returns_value;
389 new_sum->m_return_ignored = old_sum->m_return_ignored;
390 gcc_assert (!old_sum->m_queued);
391 new_sum->m_queued = false;
392
393 unsigned param_count = vec_safe_length (old_sum->m_parameters);
394 if (!param_count)
395 return;
396 vec_safe_reserve_exact (new_sum->m_parameters, param_count);
397 new_sum->m_parameters->quick_grow_cleared (param_count);
398 for (unsigned i = 0; i < param_count; i++)
399 {
400 isra_param_desc *s = &(*old_sum->m_parameters)[i];
401 isra_param_desc *d = &(*new_sum->m_parameters)[i];
402
403 d->param_size_limit = s->param_size_limit;
404 d->size_reached = s->size_reached;
405 d->locally_unused = s->locally_unused;
406 d->split_candidate = s->split_candidate;
407 d->by_ref = s->by_ref;
408
409 unsigned acc_count = vec_safe_length (s->accesses);
410 vec_safe_reserve_exact (d->accesses, acc_count);
411 for (unsigned j = 0; j < acc_count; j++)
412 {
413 param_access *from = (*s->accesses)[j];
414 param_access *to = ggc_cleared_alloc<param_access> ();
415 to->type = from->type;
416 to->alias_ptr_type = from->alias_ptr_type;
417 to->unit_offset = from->unit_offset;
418 to->unit_size = from->unit_size;
419 to->certain = from->certain;
420 d->accesses->quick_push (to);
421 }
422 }
423 }
424
425 /* Pointer to the pass function summary holder. */
426
427 static GTY(()) ipa_sra_function_summaries *func_sums;
428
429 /* Class to manage call summaries. */
430
431 class ipa_sra_call_summaries: public call_summary <isra_call_summary *>
432 {
433 public:
434 ipa_sra_call_summaries (symbol_table *table):
435 call_summary<isra_call_summary *> (table) { }
436
437 /* Duplicate info when an edge is cloned. */
438 virtual void duplicate (cgraph_edge *, cgraph_edge *,
439 isra_call_summary *old_sum,
440 isra_call_summary *new_sum);
441 };
442
443 static ipa_sra_call_summaries *call_sums;
444
445
446 /* Initialize m_arg_flow of a particular instance of isra_call_summary.
447 ARG_COUNT is the number of actual arguments passed. */
448
449 void
450 isra_call_summary::init_inputs (unsigned arg_count)
451 {
452 if (arg_count == 0)
453 {
454 gcc_checking_assert (m_arg_flow.length () == 0);
455 return;
456 }
457 if (m_arg_flow.length () == 0)
458 {
459 m_arg_flow.reserve_exact (arg_count);
460 m_arg_flow.quick_grow_cleared (arg_count);
461 }
462 else
463 gcc_checking_assert (arg_count == m_arg_flow.length ());
464 }
465
466 /* Dump all information in call summary to F. */
467
468 void
469 isra_call_summary::dump (FILE *f)
470 {
471 if (m_return_ignored)
472 fprintf (f, " return value ignored\n");
473 if (m_return_returned)
474 fprintf (f, " return value used only to compute caller return value\n");
475 for (unsigned i = 0; i < m_arg_flow.length (); i++)
476 {
477 fprintf (f, " Parameter %u:\n", i);
478 isra_param_flow *ipf = &m_arg_flow[i];
479
480 if (ipf->length)
481 {
482 bool first = true;
483 fprintf (f, " Scalar param sources: ");
484 for (int j = 0; j < ipf->length; j++)
485 {
486 if (!first)
487 fprintf (f, ", ");
488 else
489 first = false;
490 fprintf (f, "%i", (int) ipf->inputs[j]);
491 }
492 fprintf (f, "\n");
493 }
494 if (ipf->aggregate_pass_through)
495 fprintf (f, " Aggregate pass through from the param given above, "
496 "unit offset: %u , unit size: %u\n",
497 ipf->unit_offset, ipf->unit_size);
498 if (ipf->pointer_pass_through)
499 fprintf (f, " Pointer pass through from the param given above, "
500 "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses);
501 }
502 }
503
504 /* Duplicate edge summary when an edge is cloned. */
505
506 void
507 ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
508 isra_call_summary *old_sum,
509 isra_call_summary *new_sum)
510 {
511 unsigned arg_count = old_sum->m_arg_flow.length ();
512 new_sum->init_inputs (arg_count);
513 for (unsigned i = 0; i < arg_count; i++)
514 new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i];
515
516 new_sum->m_return_ignored = old_sum->m_return_ignored;
517 new_sum->m_return_returned = old_sum->m_return_returned;
518 new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
519 }
520
521
522 /* With all GTY stuff done, we can move to anonymous namespace. */
523 namespace {
524 /* Quick mapping from a decl to its param descriptor. */
525
526 hash_map<tree, gensum_param_desc *> *decl2desc;
527
528 /* Countdown of allowed Alias analysis steps during summary building. */
529
530 int aa_walking_limit;
531
532 /* This is a table in which for each basic block and parameter there is a
533 distance (offset + size) in that parameter which is dereferenced and
534 accessed in that BB. */
535 HOST_WIDE_INT *bb_dereferences = NULL;
536 /* How many by-reference parameters there are in the current function. */
537 int by_ref_count;
538
539 /* Bitmap of BBs that can cause the function to "stop" progressing by
540 returning, throwing externally, looping infinitely or calling a function
541 which might abort etc.. */
542 bitmap final_bbs;
543
544 /* Obstack to allocate various small structures required only when generating
545 summary for a function. */
546 struct obstack gensum_obstack;
547
548 /* Return false the function is apparently unsuitable for IPA-SRA based on it's
549 attributes, return true otherwise. NODE is the cgraph node of the current
550 function. */
551
552 static bool
553 ipa_sra_preliminary_function_checks (cgraph_node *node)
554 {
555 if (!node->can_change_signature)
556 {
557 if (dump_file)
558 fprintf (dump_file, "Function cannot change signature.\n");
559 return false;
560 }
561
562 if (!tree_versionable_function_p (node->decl))
563 {
564 if (dump_file)
565 fprintf (dump_file, "Function is not versionable.\n");
566 return false;
567 }
568
569 if (!opt_for_fn (node->decl, optimize)
570 || !opt_for_fn (node->decl, flag_ipa_sra))
571 {
572 if (dump_file)
573 fprintf (dump_file, "Not optimizing or IPA-SRA turned off for this "
574 "function.\n");
575 return false;
576 }
577
578 if (DECL_VIRTUAL_P (node->decl))
579 {
580 if (dump_file)
581 fprintf (dump_file, "Function is a virtual method.\n");
582 return false;
583 }
584
585 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
586 if (fun->stdarg)
587 {
588 if (dump_file)
589 fprintf (dump_file, "Function uses stdarg. \n");
590 return false;
591 }
592
593 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
594 {
595 if (dump_file)
596 fprintf (dump_file, "Function type has attributes. \n");
597 return false;
598 }
599
600 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
601 {
602 if (dump_file)
603 fprintf (dump_file, "Always inline function will be inlined "
604 "anyway. \n");
605 return false;
606 }
607
608 return true;
609 }
610
611 /* Print access tree starting at ACCESS to F. */
612
613 static void
614 dump_gensum_access (FILE *f, gensum_param_access *access, unsigned indent)
615 {
616 fprintf (f, " ");
617 for (unsigned i = 0; i < indent; i++)
618 fprintf (f, " ");
619 fprintf (f, " * Access to offset: " HOST_WIDE_INT_PRINT_DEC,
620 access->offset);
621 fprintf (f, ", size: " HOST_WIDE_INT_PRINT_DEC, access->size);
622 fprintf (f, ", type: ");
623 print_generic_expr (f, access->type);
624 fprintf (f, ", alias_ptr_type: ");
625 print_generic_expr (f, access->alias_ptr_type);
626 fprintf (f, ", nonarg: %u, reverse: %u\n", access->nonarg, access->reverse);
627 for (gensum_param_access *ch = access->first_child;
628 ch;
629 ch = ch->next_sibling)
630 dump_gensum_access (f, ch, indent + 2);
631 }
632
633
634 /* Print access tree starting at ACCESS to F. */
635
636 static void
637 dump_isra_access (FILE *f, param_access *access)
638 {
639 fprintf (f, " * Access to unit offset: %u", access->unit_offset);
640 fprintf (f, ", unit size: %u", access->unit_size);
641 fprintf (f, ", type: ");
642 print_generic_expr (f, access->type);
643 fprintf (f, ", alias_ptr_type: ");
644 print_generic_expr (f, access->alias_ptr_type);
645 if (access->certain)
646 fprintf (f, ", certain");
647 else
648 fprintf (f, ", not-certain");
649 if (access->reverse)
650 fprintf (f, ", reverse");
651 fprintf (f, "\n");
652 }
653
654 /* Dump access tree starting at ACCESS to stderr. */
655
656 DEBUG_FUNCTION void
657 debug_isra_access (param_access *access)
658 {
659 dump_isra_access (stderr, access);
660 }
661
662 /* Dump DESC to F. */
663
664 static void
665 dump_gensum_param_descriptor (FILE *f, gensum_param_desc *desc)
666 {
667 if (desc->locally_unused)
668 fprintf (f, " unused with %i call_uses\n", desc->call_uses);
669 if (!desc->split_candidate)
670 {
671 fprintf (f, " not a candidate\n");
672 return;
673 }
674 if (desc->by_ref)
675 fprintf (f, " by_ref with %u pass throughs\n", desc->ptr_pt_count);
676
677 for (gensum_param_access *acc = desc->accesses; acc; acc = acc->next_sibling)
678 dump_gensum_access (f, acc, 2);
679 }
680
681 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
682 F. */
683
684 static void
685 dump_gensum_param_descriptors (FILE *f, tree fndecl,
686 vec<gensum_param_desc> *param_descriptions)
687 {
688 tree parm = DECL_ARGUMENTS (fndecl);
689 for (unsigned i = 0;
690 i < param_descriptions->length ();
691 ++i, parm = DECL_CHAIN (parm))
692 {
693 fprintf (f, " Descriptor for parameter %i ", i);
694 print_generic_expr (f, parm, TDF_UID);
695 fprintf (f, "\n");
696 dump_gensum_param_descriptor (f, &(*param_descriptions)[i]);
697 }
698 }
699
700
701 /* Dump DESC to F. */
702
703 static void
704 dump_isra_param_descriptor (FILE *f, isra_param_desc *desc)
705 {
706 if (desc->locally_unused)
707 {
708 fprintf (f, " (locally) unused\n");
709 }
710 if (!desc->split_candidate)
711 {
712 fprintf (f, " not a candidate for splitting\n");
713 return;
714 }
715 fprintf (f, " param_size_limit: %u, size_reached: %u%s\n",
716 desc->param_size_limit, desc->size_reached,
717 desc->by_ref ? ", by_ref" : "");
718
719 for (unsigned i = 0; i < vec_safe_length (desc->accesses); ++i)
720 {
721 param_access *access = (*desc->accesses)[i];
722 dump_isra_access (f, access);
723 }
724 }
725
726 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
727 F. */
728
729 static void
730 dump_isra_param_descriptors (FILE *f, tree fndecl,
731 isra_func_summary *ifs)
732 {
733 tree parm = DECL_ARGUMENTS (fndecl);
734 if (!ifs->m_parameters)
735 {
736 fprintf (f, " parameter descriptors not available\n");
737 return;
738 }
739
740 for (unsigned i = 0;
741 i < ifs->m_parameters->length ();
742 ++i, parm = DECL_CHAIN (parm))
743 {
744 fprintf (f, " Descriptor for parameter %i ", i);
745 print_generic_expr (f, parm, TDF_UID);
746 fprintf (f, "\n");
747 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
748 }
749 }
750
751 /* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the
752 function fails return false, otherwise return true. SRC must fit into an
753 unsigned char. Used for purposes of transitive unused parameter
754 removal. */
755
756 static bool
757 add_src_to_param_flow (isra_param_flow *param_flow, int src)
758 {
759 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
760 if (param_flow->length == IPA_SRA_MAX_PARAM_FLOW_LEN)
761 return false;
762
763 param_flow->inputs[(int) param_flow->length] = src;
764 param_flow->length++;
765 return true;
766 }
767
768 /* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
769 it is the only input. Used for purposes of transitive parameter
770 splitting. */
771
772 static void
773 set_single_param_flow_source (isra_param_flow *param_flow, int src)
774 {
775 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
776 if (param_flow->length == 0)
777 {
778 param_flow->inputs[0] = src;
779 param_flow->length = 1;
780 }
781 else if (param_flow->length == 1)
782 gcc_assert (param_flow->inputs[0] == src);
783 else
784 gcc_unreachable ();
785 }
786
787 /* Assert that there is only a single value in PARAM_FLOW's inputs and return
788 it. */
789
790 static unsigned
791 get_single_param_flow_source (const isra_param_flow *param_flow)
792 {
793 gcc_assert (param_flow->length == 1);
794 return param_flow->inputs[0];
795 }
796
797 /* Inspect all uses of NAME and simple arithmetic calculations involving NAME
798 in NODE and return a negative number if any of them is used for something
799 else than either an actual call argument, simple arithmetic operation or
800 debug statement. If there are no such uses, return the number of actual
801 arguments that this parameter eventually feeds to (or zero if there is none).
802 For any such parameter, mark PARM_NUM as one of its sources. ANALYZED is a
803 bitmap that tracks which SSA names we have already started
804 investigating. */
805
806 static int
807 isra_track_scalar_value_uses (cgraph_node *node, tree name, int parm_num,
808 bitmap analyzed)
809 {
810 int res = 0;
811 imm_use_iterator imm_iter;
812 gimple *stmt;
813
814 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
815 {
816 if (is_gimple_debug (stmt))
817 continue;
818
819 /* TODO: We could handle at least const builtin functions like arithmetic
820 operations below. */
821 if (is_gimple_call (stmt))
822 {
823 int all_uses = 0;
824 use_operand_p use_p;
825 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
826 all_uses++;
827
828 gcall *call = as_a <gcall *> (stmt);
829 unsigned arg_count;
830 if (gimple_call_internal_p (call)
831 || (arg_count = gimple_call_num_args (call)) == 0)
832 {
833 res = -1;
834 BREAK_FROM_IMM_USE_STMT (imm_iter);
835 }
836
837 cgraph_edge *cs = node->get_edge (stmt);
838 gcc_checking_assert (cs);
839 isra_call_summary *csum = call_sums->get_create (cs);
840 csum->init_inputs (arg_count);
841
842 int simple_uses = 0;
843 for (unsigned i = 0; i < arg_count; i++)
844 if (gimple_call_arg (call, i) == name)
845 {
846 if (!add_src_to_param_flow (&csum->m_arg_flow[i], parm_num))
847 {
848 simple_uses = -1;
849 break;
850 }
851 simple_uses++;
852 }
853
854 if (simple_uses < 0
855 || all_uses != simple_uses)
856 {
857 res = -1;
858 BREAK_FROM_IMM_USE_STMT (imm_iter);
859 }
860 res += all_uses;
861 }
862 else if ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
863 || gimple_code (stmt) == GIMPLE_PHI)
864 {
865 tree lhs;
866 if (gimple_code (stmt) == GIMPLE_PHI)
867 lhs = gimple_phi_result (stmt);
868 else
869 lhs = gimple_assign_lhs (stmt);
870
871 if (TREE_CODE (lhs) != SSA_NAME)
872 {
873 res = -1;
874 BREAK_FROM_IMM_USE_STMT (imm_iter);
875 }
876 gcc_assert (!gimple_vdef (stmt));
877 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs)))
878 {
879 int tmp = isra_track_scalar_value_uses (node, lhs, parm_num,
880 analyzed);
881 if (tmp < 0)
882 {
883 res = tmp;
884 BREAK_FROM_IMM_USE_STMT (imm_iter);
885 }
886 res += tmp;
887 }
888 }
889 else
890 {
891 res = -1;
892 BREAK_FROM_IMM_USE_STMT (imm_iter);
893 }
894 }
895 return res;
896 }
897
898 /* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
899 also described by NODE) and simple arithmetic calculations involving PARM
900 and return false if any of them is used for something else than either an
901 actual call argument, simple arithmetic operation or debug statement. If
902 there are no such uses, return true and store the number of actual arguments
903 that this parameter eventually feeds to (or zero if there is none) to
904 *CALL_USES_P. For any such parameter, mark PARM_NUM as one of its
905 sources.
906
907 This function is similar to ptr_parm_has_nonarg_uses but its results are
908 meant for unused parameter removal, as opposed to splitting of parameters
909 passed by reference or converting them to passed by value.
910 */
911
912 static bool
913 isra_track_scalar_param_local_uses (function *fun, cgraph_node *node, tree parm,
914 int parm_num, int *call_uses_p)
915 {
916 gcc_checking_assert (is_gimple_reg (parm));
917
918 tree name = ssa_default_def (fun, parm);
919 if (!name || has_zero_uses (name))
920 {
921 *call_uses_p = 0;
922 return false;
923 }
924
925 /* Edge summaries can only handle callers with fewer than 256 parameters. */
926 if (parm_num > UCHAR_MAX)
927 return true;
928
929 bitmap analyzed = BITMAP_ALLOC (NULL);
930 int call_uses = isra_track_scalar_value_uses (node, name, parm_num, analyzed);
931 BITMAP_FREE (analyzed);
932 if (call_uses < 0)
933 return true;
934 *call_uses_p = call_uses;
935 return false;
936 }
937
938 /* Scan immediate uses of a default definition SSA name of a parameter PARM and
939 examine whether there are any nonarg uses that are not actual arguments or
940 otherwise infeasible uses. If so, return true, otherwise return false.
941 Create pass-through IPA flow records for any direct uses as argument calls
942 and if returning false, store their number into *PT_COUNT_P. NODE and FUN
943 must represent the function that is currently analyzed, PARM_NUM must be the
944 index of the analyzed parameter.
945
946 This function is similar to isra_track_scalar_param_local_uses but its
947 results are meant for splitting of parameters passed by reference or turning
948 them into bits passed by value, as opposed to generic unused parameter
949 removal.
950 */
951
952 static bool
953 ptr_parm_has_nonarg_uses (cgraph_node *node, function *fun, tree parm,
954 int parm_num, unsigned *pt_count_p)
955 {
956 imm_use_iterator ui;
957 gimple *stmt;
958 tree name = ssa_default_def (fun, parm);
959 bool ret = false;
960 unsigned pt_count = 0;
961
962 if (!name || has_zero_uses (name))
963 return false;
964
965 /* Edge summaries can only handle callers with fewer than 256 parameters. */
966 if (parm_num > UCHAR_MAX)
967 return true;
968
969 FOR_EACH_IMM_USE_STMT (stmt, ui, name)
970 {
971 unsigned uses_ok = 0;
972 use_operand_p use_p;
973
974 if (is_gimple_debug (stmt))
975 continue;
976
977 if (gimple_assign_single_p (stmt))
978 {
979 tree rhs = gimple_assign_rhs1 (stmt);
980 while (handled_component_p (rhs))
981 rhs = TREE_OPERAND (rhs, 0);
982 if (TREE_CODE (rhs) == MEM_REF
983 && TREE_OPERAND (rhs, 0) == name
984 && integer_zerop (TREE_OPERAND (rhs, 1))
985 && types_compatible_p (TREE_TYPE (rhs),
986 TREE_TYPE (TREE_TYPE (name)))
987 && !TREE_THIS_VOLATILE (rhs))
988 uses_ok++;
989 }
990 else if (is_gimple_call (stmt))
991 {
992 gcall *call = as_a <gcall *> (stmt);
993 unsigned arg_count;
994 if (gimple_call_internal_p (call)
995 || (arg_count = gimple_call_num_args (call)) == 0)
996 {
997 ret = true;
998 BREAK_FROM_IMM_USE_STMT (ui);
999 }
1000
1001 cgraph_edge *cs = node->get_edge (stmt);
1002 gcc_checking_assert (cs);
1003 isra_call_summary *csum = call_sums->get_create (cs);
1004 csum->init_inputs (arg_count);
1005
1006 for (unsigned i = 0; i < arg_count; ++i)
1007 {
1008 tree arg = gimple_call_arg (stmt, i);
1009
1010 if (arg == name)
1011 {
1012 /* TODO: Allow &MEM_REF[name + offset] here,
1013 ipa_param_body_adjustments::modify_call_stmt has to be
1014 adjusted too. */
1015 csum->m_arg_flow[i].pointer_pass_through = true;
1016 set_single_param_flow_source (&csum->m_arg_flow[i], parm_num);
1017 pt_count++;
1018 uses_ok++;
1019 continue;
1020 }
1021
1022 while (handled_component_p (arg))
1023 arg = TREE_OPERAND (arg, 0);
1024 if (TREE_CODE (arg) == MEM_REF
1025 && TREE_OPERAND (arg, 0) == name
1026 && integer_zerop (TREE_OPERAND (arg, 1))
1027 && types_compatible_p (TREE_TYPE (arg),
1028 TREE_TYPE (TREE_TYPE (name)))
1029 && !TREE_THIS_VOLATILE (arg))
1030 uses_ok++;
1031 }
1032 }
1033
1034 /* If the number of valid uses does not match the number of
1035 uses in this stmt there is an unhandled use. */
1036 unsigned all_uses = 0;
1037 FOR_EACH_IMM_USE_ON_STMT (use_p, ui)
1038 all_uses++;
1039
1040 gcc_checking_assert (uses_ok <= all_uses);
1041 if (uses_ok != all_uses)
1042 {
1043 ret = true;
1044 BREAK_FROM_IMM_USE_STMT (ui);
1045 }
1046 }
1047
1048 *pt_count_p = pt_count;
1049 return ret;
1050 }
1051
1052 /* Initialize vector of parameter descriptors of NODE. Return true if there
1053 are any candidates for splitting or unused aggregate parameter removal (the
1054 function may return false if there are candidates for removal of register
1055 parameters) and function body must be scanned. */
1056
1057 static bool
1058 create_parameter_descriptors (cgraph_node *node,
1059 vec<gensum_param_desc> *param_descriptions)
1060 {
1061 function *fun = DECL_STRUCT_FUNCTION (node->decl);
1062 bool ret = false;
1063
1064 int num = 0;
1065 for (tree parm = DECL_ARGUMENTS (node->decl);
1066 parm;
1067 parm = DECL_CHAIN (parm), num++)
1068 {
1069 const char *msg;
1070 gensum_param_desc *desc = &(*param_descriptions)[num];
1071 /* param_descriptions vector is grown cleared in the caller. */
1072 desc->param_number = num;
1073 decl2desc->put (parm, desc);
1074
1075 if (dump_file && (dump_flags & TDF_DETAILS))
1076 print_generic_expr (dump_file, parm, TDF_UID);
1077
1078 int scalar_call_uses;
1079 tree type = TREE_TYPE (parm);
1080 if (TREE_THIS_VOLATILE (parm))
1081 {
1082 if (dump_file && (dump_flags & TDF_DETAILS))
1083 fprintf (dump_file, " not a candidate, is volatile\n");
1084 continue;
1085 }
1086 if (!is_gimple_reg_type (type) && is_va_list_type (type))
1087 {
1088 if (dump_file && (dump_flags & TDF_DETAILS))
1089 fprintf (dump_file, " not a candidate, is a va_list type\n");
1090 continue;
1091 }
1092 if (TREE_ADDRESSABLE (parm))
1093 {
1094 if (dump_file && (dump_flags & TDF_DETAILS))
1095 fprintf (dump_file, " not a candidate, is addressable\n");
1096 continue;
1097 }
1098 if (TREE_ADDRESSABLE (type))
1099 {
1100 if (dump_file && (dump_flags & TDF_DETAILS))
1101 fprintf (dump_file, " not a candidate, type cannot be split\n");
1102 continue;
1103 }
1104
1105 if (is_gimple_reg (parm)
1106 && !isra_track_scalar_param_local_uses (fun, node, parm, num,
1107 &scalar_call_uses))
1108 {
1109 if (dump_file && (dump_flags & TDF_DETAILS))
1110 fprintf (dump_file, " is a scalar with only %i call uses\n",
1111 scalar_call_uses);
1112
1113 desc->locally_unused = true;
1114 desc->call_uses = scalar_call_uses;
1115 }
1116
1117 if (POINTER_TYPE_P (type))
1118 {
1119 type = TREE_TYPE (type);
1120
1121 if (TREE_CODE (type) == FUNCTION_TYPE
1122 || TREE_CODE (type) == METHOD_TYPE)
1123 {
1124 if (dump_file && (dump_flags & TDF_DETAILS))
1125 fprintf (dump_file, " not a candidate, reference to "
1126 "a function\n");
1127 continue;
1128 }
1129 if (TYPE_VOLATILE (type))
1130 {
1131 if (dump_file && (dump_flags & TDF_DETAILS))
1132 fprintf (dump_file, " not a candidate, reference to "
1133 "a volatile type\n");
1134 continue;
1135 }
1136 if (TREE_CODE (type) == ARRAY_TYPE
1137 && TYPE_NONALIASED_COMPONENT (type))
1138 {
1139 if (dump_file && (dump_flags & TDF_DETAILS))
1140 fprintf (dump_file, " not a candidate, reference to "
1141 "a nonaliased component array\n");
1142 continue;
1143 }
1144 if (!is_gimple_reg (parm))
1145 {
1146 if (dump_file && (dump_flags & TDF_DETAILS))
1147 fprintf (dump_file, " not a candidate, a reference which is "
1148 "not a gimple register (probably addressable)\n");
1149 continue;
1150 }
1151 if (is_va_list_type (type))
1152 {
1153 if (dump_file && (dump_flags & TDF_DETAILS))
1154 fprintf (dump_file, " not a candidate, reference to "
1155 "a va list\n");
1156 continue;
1157 }
1158 if (ptr_parm_has_nonarg_uses (node, fun, parm, num,
1159 &desc->ptr_pt_count))
1160 {
1161 if (dump_file && (dump_flags & TDF_DETAILS))
1162 fprintf (dump_file, " not a candidate, reference has "
1163 "nonarg uses\n");
1164 continue;
1165 }
1166 desc->by_ref = true;
1167 }
1168 else if (!AGGREGATE_TYPE_P (type))
1169 {
1170 /* This is in an else branch because scalars passed by reference are
1171 still candidates to be passed by value. */
1172 if (dump_file && (dump_flags & TDF_DETAILS))
1173 fprintf (dump_file, " not a candidate, not an aggregate\n");
1174 continue;
1175 }
1176
1177 if (!COMPLETE_TYPE_P (type))
1178 {
1179 if (dump_file && (dump_flags & TDF_DETAILS))
1180 fprintf (dump_file, " not a candidate, not a complete type\n");
1181 continue;
1182 }
1183 if (!tree_fits_uhwi_p (TYPE_SIZE (type)))
1184 {
1185 if (dump_file && (dump_flags & TDF_DETAILS))
1186 fprintf (dump_file, " not a candidate, size not representable\n");
1187 continue;
1188 }
1189 unsigned HOST_WIDE_INT type_size
1190 = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
1191 if (type_size == 0
1192 || type_size >= ISRA_ARG_SIZE_LIMIT)
1193 {
1194 if (dump_file && (dump_flags & TDF_DETAILS))
1195 fprintf (dump_file, " not a candidate, has zero or huge size\n");
1196 continue;
1197 }
1198 if (type_internals_preclude_sra_p (type, &msg))
1199 {
1200 if (dump_file && (dump_flags & TDF_DETAILS))
1201 fprintf (dump_file, " not a candidate, %s\n", msg);
1202 continue;
1203 }
1204
1205 if (dump_file && (dump_flags & TDF_DETAILS))
1206 fprintf (dump_file, " is a candidate\n");
1207
1208 ret = true;
1209 desc->split_candidate = true;
1210 if (desc->by_ref)
1211 desc->deref_index = by_ref_count++;
1212 }
1213 return ret;
1214 }
1215
1216 /* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1217 found, which happens if DECL is for a static chain. */
1218
1219 static gensum_param_desc *
1220 get_gensum_param_desc (tree decl)
1221 {
1222 gcc_checking_assert (TREE_CODE (decl) == PARM_DECL);
1223 gensum_param_desc **slot = decl2desc->get (decl);
1224 if (!slot)
1225 /* This can happen for static chains which we cannot handle so far. */
1226 return NULL;
1227 gcc_checking_assert (*slot);
1228 return *slot;
1229 }
1230
1231
1232 /* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1233 write REASON to the dump file if there is one. */
1234
1235 static void
1236 disqualify_split_candidate (gensum_param_desc *desc, const char *reason)
1237 {
1238 if (!desc->split_candidate)
1239 return;
1240
1241 if (dump_file && (dump_flags & TDF_DETAILS))
1242 fprintf (dump_file, "! Disqualifying parameter number %i - %s\n",
1243 desc->param_number, reason);
1244
1245 desc->split_candidate = false;
1246 }
1247
1248 /* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1249 there is one. */
1250
1251 static void
1252 disqualify_split_candidate (tree decl, const char *reason)
1253 {
1254 gensum_param_desc *desc = get_gensum_param_desc (decl);
1255 if (desc)
1256 disqualify_split_candidate (desc, reason);
1257 }
1258
1259 /* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But
1260 first, check that there are not too many of them already. If so, do not
1261 allocate anything and return NULL. */
1262
1263 static gensum_param_access *
1264 allocate_access (gensum_param_desc *desc,
1265 HOST_WIDE_INT offset, HOST_WIDE_INT size)
1266 {
1267 if (desc->access_count
1268 == (unsigned) param_ipa_sra_max_replacements)
1269 {
1270 disqualify_split_candidate (desc, "Too many replacement candidates");
1271 return NULL;
1272 }
1273
1274 gensum_param_access *access
1275 = (gensum_param_access *) obstack_alloc (&gensum_obstack,
1276 sizeof (gensum_param_access));
1277 memset (access, 0, sizeof (*access));
1278 access->offset = offset;
1279 access->size = size;
1280 return access;
1281 }
1282
1283 /* In what context scan_expr_access has been called, whether it deals with a
1284 load, a function argument, or a store. Please note that in rare
1285 circumstances when it is not clear if the access is a load or store,
1286 ISRA_CTX_STORE is used too. */
1287
1288 enum isra_scan_context {ISRA_CTX_LOAD, ISRA_CTX_ARG, ISRA_CTX_STORE};
1289
1290 /* Return an access describing memory access to the variable described by DESC
1291 at OFFSET with SIZE in context CTX, starting at pointer to the linked list
1292 at a certain tree level FIRST. Attempt to create it and put into the
1293 appropriate place in the access tree if does not exist, but fail and return
1294 NULL if there are already too many accesses, if it would create a partially
1295 overlapping access or if an access would end up within a pre-existing
1296 non-call access. */
1297
1298 static gensum_param_access *
1299 get_access_1 (gensum_param_desc *desc, gensum_param_access **first,
1300 HOST_WIDE_INT offset, HOST_WIDE_INT size, isra_scan_context ctx)
1301 {
1302 gensum_param_access *access = *first, **ptr = first;
1303
1304 if (!access)
1305 {
1306 /* No pre-existing access at this level, just create it. */
1307 gensum_param_access *a = allocate_access (desc, offset, size);
1308 if (!a)
1309 return NULL;
1310 *first = a;
1311 return *first;
1312 }
1313
1314 if (access->offset >= offset + size)
1315 {
1316 /* We want to squeeze it in front of the very first access, just do
1317 it. */
1318 gensum_param_access *r = allocate_access (desc, offset, size);
1319 if (!r)
1320 return NULL;
1321 r->next_sibling = access;
1322 *first = r;
1323 return r;
1324 }
1325
1326 /* Skip all accesses that have to come before us until the next sibling is
1327 already too far. */
1328 while (offset >= access->offset + access->size
1329 && access->next_sibling
1330 && access->next_sibling->offset < offset + size)
1331 {
1332 ptr = &access->next_sibling;
1333 access = access->next_sibling;
1334 }
1335
1336 /* At this point we know we do not belong before access. */
1337 gcc_assert (access->offset < offset + size);
1338
1339 if (access->offset == offset && access->size == size)
1340 /* We found what we were looking for. */
1341 return access;
1342
1343 if (access->offset <= offset
1344 && access->offset + access->size >= offset + size)
1345 {
1346 /* We fit into access which is larger than us. We need to find/create
1347 something below access. But we only allow nesting in call
1348 arguments. */
1349 if (access->nonarg)
1350 return NULL;
1351
1352 return get_access_1 (desc, &access->first_child, offset, size, ctx);
1353 }
1354
1355 if (offset <= access->offset
1356 && offset + size >= access->offset + access->size)
1357 /* We are actually bigger than access, which fully fits into us, take its
1358 place and make all accesses fitting into it its children. */
1359 {
1360 /* But first, we only allow nesting in call arguments so check if that is
1361 what we are trying to represent. */
1362 if (ctx != ISRA_CTX_ARG)
1363 return NULL;
1364
1365 gensum_param_access *r = allocate_access (desc, offset, size);
1366 if (!r)
1367 return NULL;
1368 r->first_child = access;
1369
1370 while (access->next_sibling
1371 && access->next_sibling->offset < offset + size)
1372 access = access->next_sibling;
1373 if (access->offset + access->size > offset + size)
1374 {
1375 /* This must be a different access, which are sorted, so the
1376 following must be true and this signals a partial overlap. */
1377 gcc_assert (access->offset > offset);
1378 return NULL;
1379 }
1380
1381 r->next_sibling = access->next_sibling;
1382 access->next_sibling = NULL;
1383 *ptr = r;
1384 return r;
1385 }
1386
1387 if (offset >= access->offset + access->size)
1388 {
1389 /* We belong after access. */
1390 gensum_param_access *r = allocate_access (desc, offset, size);
1391 if (!r)
1392 return NULL;
1393 r->next_sibling = access->next_sibling;
1394 access->next_sibling = r;
1395 return r;
1396 }
1397
1398 if (offset < access->offset)
1399 {
1400 /* We know the following, otherwise we would have created a
1401 super-access. */
1402 gcc_checking_assert (offset + size < access->offset + access->size);
1403 return NULL;
1404 }
1405
1406 if (offset + size > access->offset + access->size)
1407 {
1408 /* Likewise. */
1409 gcc_checking_assert (offset > access->offset);
1410 return NULL;
1411 }
1412
1413 gcc_unreachable ();
1414 }
1415
1416 /* Return an access describing memory access to the variable described by DESC
1417 at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt
1418 to create if it does not exist, but fail and return NULL if there are
1419 already too many accesses, if it would create a partially overlapping access
1420 or if an access would end up in a non-call access. */
1421
1422 static gensum_param_access *
1423 get_access (gensum_param_desc *desc, HOST_WIDE_INT offset, HOST_WIDE_INT size,
1424 isra_scan_context ctx)
1425 {
1426 gcc_checking_assert (desc->split_candidate);
1427
1428 gensum_param_access *access = get_access_1 (desc, &desc->accesses, offset,
1429 size, ctx);
1430 if (!access)
1431 {
1432 disqualify_split_candidate (desc,
1433 "Bad access overlap or too many accesses");
1434 return NULL;
1435 }
1436
1437 switch (ctx)
1438 {
1439 case ISRA_CTX_STORE:
1440 gcc_assert (!desc->by_ref);
1441 /* Fall-through */
1442 case ISRA_CTX_LOAD:
1443 access->nonarg = true;
1444 break;
1445 case ISRA_CTX_ARG:
1446 break;
1447 }
1448
1449 return access;
1450 }
1451
1452 /* Verify that parameter access tree starting with ACCESS is in good shape.
1453 PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
1454 ACCESS or zero if there is none. */
1455
1456 static bool
1457 verify_access_tree_1 (gensum_param_access *access, HOST_WIDE_INT parent_offset,
1458 HOST_WIDE_INT parent_size)
1459 {
1460 while (access)
1461 {
1462 gcc_assert (access->offset >= 0 && access->size > 0);
1463
1464 if (parent_size != 0)
1465 {
1466 if (access->offset < parent_offset)
1467 {
1468 error ("Access offset before parent offset");
1469 return true;
1470 }
1471 if (access->size >= parent_size)
1472 {
1473 error ("Access size greater or equal to its parent size");
1474 return true;
1475 }
1476 if (access->offset + access->size > parent_offset + parent_size)
1477 {
1478 error ("Access terminates outside of its parent");
1479 return true;
1480 }
1481 }
1482
1483 if (verify_access_tree_1 (access->first_child, access->offset,
1484 access->size))
1485 return true;
1486
1487 if (access->next_sibling
1488 && (access->next_sibling->offset < access->offset + access->size))
1489 {
1490 error ("Access overlaps with its sibling");
1491 return true;
1492 }
1493
1494 access = access->next_sibling;
1495 }
1496 return false;
1497 }
1498
1499 /* Verify that parameter access tree starting with ACCESS is in good shape,
1500 halt compilation and dump the tree to stderr if not. */
1501
1502 DEBUG_FUNCTION void
1503 isra_verify_access_tree (gensum_param_access *access)
1504 {
1505 if (verify_access_tree_1 (access, 0, 0))
1506 {
1507 for (; access; access = access->next_sibling)
1508 dump_gensum_access (stderr, access, 2);
1509 internal_error ("IPA-SRA access verification failed");
1510 }
1511 }
1512
1513
1514 /* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1515 GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
1516
1517 static bool
1518 asm_visit_addr (gimple *, tree op, tree, void *)
1519 {
1520 op = get_base_address (op);
1521 if (op
1522 && TREE_CODE (op) == PARM_DECL)
1523 disqualify_split_candidate (op, "Non-scalarizable GIMPLE_ASM operand.");
1524
1525 return false;
1526 }
1527
1528 /* Mark a dereference of parameter identified by DESC of distance DIST in a
1529 basic block BB, unless the BB has already been marked as a potentially
1530 final. */
1531
1532 static void
1533 mark_param_dereference (gensum_param_desc *desc, HOST_WIDE_INT dist,
1534 basic_block bb)
1535 {
1536 gcc_assert (desc->by_ref);
1537 gcc_checking_assert (desc->split_candidate);
1538
1539 if (bitmap_bit_p (final_bbs, bb->index))
1540 return;
1541
1542 int idx = bb->index * by_ref_count + desc->deref_index;
1543 if (bb_dereferences[idx] < dist)
1544 bb_dereferences[idx] = dist;
1545 }
1546
1547 /* Return true, if any potential replacements should use NEW_TYPE as opposed to
1548 previously recorded OLD_TYPE. */
1549
1550 static bool
1551 type_prevails_p (tree old_type, tree new_type)
1552 {
1553 if (old_type == new_type)
1554 return false;
1555
1556 /* Non-aggregates are always better. */
1557 if (!is_gimple_reg_type (old_type)
1558 && is_gimple_reg_type (new_type))
1559 return true;
1560 if (is_gimple_reg_type (old_type)
1561 && !is_gimple_reg_type (new_type))
1562 return false;
1563
1564 /* Prefer any complex or vector type over any other scalar type. */
1565 if (TREE_CODE (old_type) != COMPLEX_TYPE
1566 && TREE_CODE (old_type) != VECTOR_TYPE
1567 && (TREE_CODE (new_type) == COMPLEX_TYPE
1568 || TREE_CODE (new_type) == VECTOR_TYPE))
1569 return true;
1570 if ((TREE_CODE (old_type) == COMPLEX_TYPE
1571 || TREE_CODE (old_type) == VECTOR_TYPE)
1572 && TREE_CODE (new_type) != COMPLEX_TYPE
1573 && TREE_CODE (new_type) != VECTOR_TYPE)
1574 return false;
1575
1576 /* Use the integral type with the bigger precision. */
1577 if (INTEGRAL_TYPE_P (old_type)
1578 && INTEGRAL_TYPE_P (new_type))
1579 return (TYPE_PRECISION (new_type) > TYPE_PRECISION (old_type));
1580
1581 /* Attempt to disregard any integral type with non-full precision. */
1582 if (INTEGRAL_TYPE_P (old_type)
1583 && (TREE_INT_CST_LOW (TYPE_SIZE (old_type))
1584 != TYPE_PRECISION (old_type)))
1585 return true;
1586 if (INTEGRAL_TYPE_P (new_type)
1587 && (TREE_INT_CST_LOW (TYPE_SIZE (new_type))
1588 != TYPE_PRECISION (new_type)))
1589 return false;
1590 /* Stabilize the selection. */
1591 return TYPE_UID (old_type) < TYPE_UID (new_type);
1592 }
1593
1594 /* When scanning an expression which is a call argument, this structure
1595 specifies the call and the position of the argument. */
1596
1597 struct scan_call_info
1598 {
1599 /* Call graph edge representing the call. */
1600 cgraph_edge *cs;
1601 /* Total number of arguments in the call. */
1602 unsigned argument_count;
1603 /* Number of the actual argument being scanned. */
1604 unsigned arg_idx;
1605 };
1606
1607 /* Record use of ACCESS which belongs to a parameter described by DESC in a
1608 call argument described by CALL_INFO. */
1609
1610 static void
1611 record_nonregister_call_use (gensum_param_desc *desc,
1612 scan_call_info *call_info,
1613 unsigned unit_offset, unsigned unit_size)
1614 {
1615 isra_call_summary *csum = call_sums->get_create (call_info->cs);
1616 csum->init_inputs (call_info->argument_count);
1617
1618 isra_param_flow *param_flow = &csum->m_arg_flow[call_info->arg_idx];
1619 param_flow->aggregate_pass_through = true;
1620 set_single_param_flow_source (param_flow, desc->param_number);
1621 param_flow->unit_offset = unit_offset;
1622 param_flow->unit_size = unit_size;
1623 desc->call_uses++;
1624 }
1625
1626 /* Callback of walk_aliased_vdefs, just mark that there was a possible
1627 modification. */
1628
1629 static bool
1630 mark_maybe_modified (ao_ref *, tree, void *data)
1631 {
1632 bool *maybe_modified = (bool *) data;
1633 *maybe_modified = true;
1634 return true;
1635 }
1636
1637 /* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1638 specifies whether EXPR is used in a load, store or as an argument call. BB
1639 must be the basic block in which expr resides. If CTX specifies call
1640 argument context, CALL_INFO must describe that call and argument position,
1641 otherwise it is ignored. */
1642
1643 static void
1644 scan_expr_access (tree expr, gimple *stmt, isra_scan_context ctx,
1645 basic_block bb, scan_call_info *call_info = NULL)
1646 {
1647 poly_int64 poffset, psize, pmax_size;
1648 HOST_WIDE_INT offset, size, max_size;
1649 tree base;
1650 bool deref = false;
1651 bool reverse;
1652
1653 if (TREE_CODE (expr) == BIT_FIELD_REF
1654 || TREE_CODE (expr) == IMAGPART_EXPR
1655 || TREE_CODE (expr) == REALPART_EXPR)
1656 expr = TREE_OPERAND (expr, 0);
1657
1658 base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, &reverse);
1659
1660 if (TREE_CODE (base) == MEM_REF)
1661 {
1662 tree op = TREE_OPERAND (base, 0);
1663 if (TREE_CODE (op) != SSA_NAME
1664 || !SSA_NAME_IS_DEFAULT_DEF (op))
1665 return;
1666 base = SSA_NAME_VAR (op);
1667 if (!base)
1668 return;
1669 deref = true;
1670 }
1671 if (TREE_CODE (base) != PARM_DECL)
1672 return;
1673
1674 gensum_param_desc *desc = get_gensum_param_desc (base);
1675 if (!desc || !desc->split_candidate)
1676 return;
1677
1678 if (!poffset.is_constant (&offset)
1679 || !psize.is_constant (&size)
1680 || !pmax_size.is_constant (&max_size))
1681 {
1682 disqualify_split_candidate (desc, "Encountered a polynomial-sized "
1683 "access.");
1684 return;
1685 }
1686 if (size < 0 || size != max_size)
1687 {
1688 disqualify_split_candidate (desc, "Encountered a variable sized access.");
1689 return;
1690 }
1691 if (TREE_CODE (expr) == COMPONENT_REF
1692 && DECL_BIT_FIELD (TREE_OPERAND (expr, 1)))
1693 {
1694 disqualify_split_candidate (desc, "Encountered a bit-field access.");
1695 return;
1696 }
1697 if (offset < 0)
1698 {
1699 disqualify_split_candidate (desc, "Encountered an access at a "
1700 "negative offset.");
1701 return;
1702 }
1703 gcc_assert ((offset % BITS_PER_UNIT) == 0);
1704 gcc_assert ((size % BITS_PER_UNIT) == 0);
1705 if ((offset / BITS_PER_UNIT) >= (UINT_MAX - ISRA_ARG_SIZE_LIMIT)
1706 || (size / BITS_PER_UNIT) >= ISRA_ARG_SIZE_LIMIT)
1707 {
1708 disqualify_split_candidate (desc, "Encountered an access with too big "
1709 "offset or size");
1710 return;
1711 }
1712
1713 tree type = TREE_TYPE (expr);
1714 unsigned int exp_align = get_object_alignment (expr);
1715
1716 if (exp_align < TYPE_ALIGN (type))
1717 {
1718 disqualify_split_candidate (desc, "Underaligned access.");
1719 return;
1720 }
1721
1722 if (deref)
1723 {
1724 if (!desc->by_ref)
1725 {
1726 disqualify_split_candidate (desc, "Dereferencing a non-reference.");
1727 return;
1728 }
1729 else if (ctx == ISRA_CTX_STORE)
1730 {
1731 disqualify_split_candidate (desc, "Storing to data passed by "
1732 "reference.");
1733 return;
1734 }
1735
1736 if (!aa_walking_limit)
1737 {
1738 disqualify_split_candidate (desc, "Out of alias analysis step "
1739 "limit.");
1740 return;
1741 }
1742
1743 gcc_checking_assert (gimple_vuse (stmt));
1744 bool maybe_modified = false;
1745 ao_ref ar;
1746
1747 ao_ref_init (&ar, expr);
1748 bitmap visited = BITMAP_ALLOC (NULL);
1749 int walked = walk_aliased_vdefs (&ar, gimple_vuse (stmt),
1750 mark_maybe_modified, &maybe_modified,
1751 &visited, NULL, aa_walking_limit);
1752 BITMAP_FREE (visited);
1753 if (walked > 0)
1754 {
1755 gcc_assert (aa_walking_limit > walked);
1756 aa_walking_limit = aa_walking_limit - walked;
1757 }
1758 if (walked < 0)
1759 aa_walking_limit = 0;
1760 if (maybe_modified || walked < 0)
1761 {
1762 disqualify_split_candidate (desc, "Data passed by reference possibly "
1763 "modified through an alias.");
1764 return;
1765 }
1766 else
1767 mark_param_dereference (desc, offset + size, bb);
1768 }
1769 else
1770 /* Pointer parameters with direct uses should have been ruled out by
1771 analyzing SSA default def when looking at the parameters. */
1772 gcc_assert (!desc->by_ref);
1773
1774 gensum_param_access *access = get_access (desc, offset, size, ctx);
1775 if (!access)
1776 return;
1777
1778 if (ctx == ISRA_CTX_ARG)
1779 {
1780 gcc_checking_assert (call_info);
1781
1782 if (!deref)
1783 record_nonregister_call_use (desc, call_info, offset / BITS_PER_UNIT,
1784 size / BITS_PER_UNIT);
1785 else
1786 /* This is not a pass-through of a pointer, this is a use like any
1787 other. */
1788 access->nonarg = true;
1789 }
1790
1791 if (!access->type)
1792 {
1793 access->type = type;
1794 access->alias_ptr_type = reference_alias_ptr_type (expr);
1795 access->reverse = reverse;
1796 }
1797 else
1798 {
1799 if (exp_align < TYPE_ALIGN (access->type))
1800 {
1801 disqualify_split_candidate (desc, "Reference has lower alignment "
1802 "than a previous one.");
1803 return;
1804 }
1805 if (access->alias_ptr_type != reference_alias_ptr_type (expr))
1806 {
1807 disqualify_split_candidate (desc, "Multiple alias pointer types.");
1808 return;
1809 }
1810 if (access->reverse != reverse)
1811 {
1812 disqualify_split_candidate (desc, "Both normal and reverse "
1813 "scalar storage order.");
1814 return;
1815 }
1816 if (!deref
1817 && (AGGREGATE_TYPE_P (type) || AGGREGATE_TYPE_P (access->type))
1818 && (TYPE_MAIN_VARIANT (access->type) != TYPE_MAIN_VARIANT (type)))
1819 {
1820 /* We need the same aggregate type on all accesses to be able to
1821 distinguish transformation spots from pass-through arguments in
1822 the transformation phase. */
1823 disqualify_split_candidate (desc, "We do not support aggregate "
1824 "type punning.");
1825 return;
1826 }
1827
1828 if (type_prevails_p (access->type, type))
1829 access->type = type;
1830 }
1831 }
1832
1833 /* Scan body function described by NODE and FUN and create access trees for
1834 parameters. */
1835
1836 static void
1837 scan_function (cgraph_node *node, struct function *fun)
1838 {
1839 basic_block bb;
1840
1841 FOR_EACH_BB_FN (bb, fun)
1842 {
1843 gimple_stmt_iterator gsi;
1844 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1845 {
1846 gimple *stmt = gsi_stmt (gsi);
1847
1848 if (stmt_can_throw_external (fun, stmt))
1849 bitmap_set_bit (final_bbs, bb->index);
1850 switch (gimple_code (stmt))
1851 {
1852 case GIMPLE_RETURN:
1853 {
1854 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1855 if (t != NULL_TREE)
1856 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1857 bitmap_set_bit (final_bbs, bb->index);
1858 }
1859 break;
1860
1861 case GIMPLE_ASSIGN:
1862 if (gimple_assign_single_p (stmt)
1863 && !gimple_clobber_p (stmt))
1864 {
1865 tree rhs = gimple_assign_rhs1 (stmt);
1866 scan_expr_access (rhs, stmt, ISRA_CTX_LOAD, bb);
1867 tree lhs = gimple_assign_lhs (stmt);
1868 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1869 }
1870 break;
1871
1872 case GIMPLE_CALL:
1873 {
1874 unsigned argument_count = gimple_call_num_args (stmt);
1875 isra_scan_context ctx = ISRA_CTX_ARG;
1876 scan_call_info call_info, *call_info_p = &call_info;
1877 if (gimple_call_internal_p (stmt))
1878 {
1879 call_info_p = NULL;
1880 ctx = ISRA_CTX_LOAD;
1881 internal_fn ifn = gimple_call_internal_fn (stmt);
1882 if (internal_store_fn_p (ifn))
1883 ctx = ISRA_CTX_STORE;
1884 }
1885 else
1886 {
1887 call_info.cs = node->get_edge (stmt);
1888 call_info.argument_count = argument_count;
1889 }
1890
1891 for (unsigned i = 0; i < argument_count; i++)
1892 {
1893 call_info.arg_idx = i;
1894 scan_expr_access (gimple_call_arg (stmt, i), stmt,
1895 ctx, bb, call_info_p);
1896 }
1897
1898 tree lhs = gimple_call_lhs (stmt);
1899 if (lhs)
1900 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1901 int flags = gimple_call_flags (stmt);
1902 if ((flags & (ECF_CONST | ECF_PURE)) == 0)
1903 bitmap_set_bit (final_bbs, bb->index);
1904 }
1905 break;
1906
1907 case GIMPLE_ASM:
1908 {
1909 gasm *asm_stmt = as_a <gasm *> (stmt);
1910 walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL,
1911 asm_visit_addr);
1912 bitmap_set_bit (final_bbs, bb->index);
1913
1914 for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++)
1915 {
1916 tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i));
1917 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1918 }
1919 for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++)
1920 {
1921 tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i));
1922 scan_expr_access (t, stmt, ISRA_CTX_STORE, bb);
1923 }
1924 }
1925 break;
1926
1927 default:
1928 break;
1929 }
1930 }
1931 }
1932 }
1933
1934 /* Return true if SSA_NAME NAME is only used in return statements, or if
1935 results of any operations it is involved in are only used in return
1936 statements. ANALYZED is a bitmap that tracks which SSA names we have
1937 already started investigating. */
1938
1939 static bool
1940 ssa_name_only_returned_p (tree name, bitmap analyzed)
1941 {
1942 bool res = true;
1943 imm_use_iterator imm_iter;
1944 gimple *stmt;
1945
1946 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
1947 {
1948 if (is_gimple_debug (stmt))
1949 continue;
1950
1951 if (gimple_code (stmt) == GIMPLE_RETURN)
1952 {
1953 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1954 if (t != name)
1955 {
1956 res = false;
1957 BREAK_FROM_IMM_USE_STMT (imm_iter);
1958 }
1959 }
1960 else if ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
1961 || gimple_code (stmt) == GIMPLE_PHI)
1962 {
1963 /* TODO: And perhaps for const function calls too? */
1964 tree lhs;
1965 if (gimple_code (stmt) == GIMPLE_PHI)
1966 lhs = gimple_phi_result (stmt);
1967 else
1968 lhs = gimple_assign_lhs (stmt);
1969
1970 if (TREE_CODE (lhs) != SSA_NAME)
1971 {
1972 res = false;
1973 BREAK_FROM_IMM_USE_STMT (imm_iter);
1974 }
1975 gcc_assert (!gimple_vdef (stmt));
1976 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs))
1977 && !ssa_name_only_returned_p (lhs, analyzed))
1978 {
1979 res = false;
1980 BREAK_FROM_IMM_USE_STMT (imm_iter);
1981 }
1982 }
1983 else
1984 {
1985 res = false;
1986 BREAK_FROM_IMM_USE_STMT (imm_iter);
1987 }
1988 }
1989 return res;
1990 }
1991
1992 /* Inspect the uses of the return value of the call associated with CS, and if
1993 it is not used or if it is only used to construct the return value of the
1994 caller, mark it as such in call or caller summary. Also check for
1995 misaligned arguments. */
1996
1997 static void
1998 isra_analyze_call (cgraph_edge *cs)
1999 {
2000 gcall *call_stmt = cs->call_stmt;
2001 unsigned count = gimple_call_num_args (call_stmt);
2002 isra_call_summary *csum = call_sums->get_create (cs);
2003
2004 for (unsigned i = 0; i < count; i++)
2005 {
2006 tree arg = gimple_call_arg (call_stmt, i);
2007 if (is_gimple_reg (arg))
2008 continue;
2009
2010 tree offset;
2011 poly_int64 bitsize, bitpos;
2012 machine_mode mode;
2013 int unsignedp, reversep, volatilep = 0;
2014 get_inner_reference (arg, &bitsize, &bitpos, &offset, &mode,
2015 &unsignedp, &reversep, &volatilep);
2016 if (!multiple_p (bitpos, BITS_PER_UNIT))
2017 {
2018 csum->m_bit_aligned_arg = true;
2019 break;
2020 }
2021 }
2022
2023 tree lhs = gimple_call_lhs (call_stmt);
2024 if (lhs)
2025 {
2026 /* TODO: Also detect aggregates on a LHS of a call that are only returned
2027 from this function (without being read anywhere). */
2028 if (TREE_CODE (lhs) == SSA_NAME)
2029 {
2030 bitmap analyzed = BITMAP_ALLOC (NULL);
2031 if (ssa_name_only_returned_p (lhs, analyzed))
2032 csum->m_return_returned = true;
2033 BITMAP_FREE (analyzed);
2034 }
2035 }
2036 else
2037 csum->m_return_ignored = true;
2038 }
2039
2040 /* Look at all calls going out of NODE, described also by IFS and perform all
2041 analyses necessary for IPA-SRA that are not done at body scan time or done
2042 even when body is not scanned because the function is not a candidate. */
2043
2044 static void
2045 isra_analyze_all_outgoing_calls (cgraph_node *node)
2046 {
2047 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2048 isra_analyze_call (cs);
2049 for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
2050 isra_analyze_call (cs);
2051 }
2052
2053 /* Dump a dereferences table with heading STR to file F. */
2054
2055 static void
2056 dump_dereferences_table (FILE *f, struct function *fun, const char *str)
2057 {
2058 basic_block bb;
2059
2060 fprintf (dump_file, "%s", str);
2061 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun),
2062 EXIT_BLOCK_PTR_FOR_FN (fun), next_bb)
2063 {
2064 fprintf (f, "%4i %i ", bb->index, bitmap_bit_p (final_bbs, bb->index));
2065 if (bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2066 {
2067 int i;
2068 for (i = 0; i < by_ref_count; i++)
2069 {
2070 int idx = bb->index * by_ref_count + i;
2071 fprintf (f, " %4" HOST_WIDE_INT_PRINT "d", bb_dereferences[idx]);
2072 }
2073 }
2074 fprintf (f, "\n");
2075 }
2076 fprintf (dump_file, "\n");
2077 }
2078
2079 /* Propagate distances in bb_dereferences in the opposite direction than the
2080 control flow edges, in each step storing the maximum of the current value
2081 and the minimum of all successors. These steps are repeated until the table
2082 stabilizes. Note that BBs which might terminate the functions (according to
2083 final_bbs bitmap) never updated in this way. */
2084
2085 static void
2086 propagate_dereference_distances (struct function *fun)
2087 {
2088 basic_block bb;
2089
2090 if (dump_file && (dump_flags & TDF_DETAILS))
2091 dump_dereferences_table (dump_file, fun,
2092 "Dereference table before propagation:\n");
2093
2094 auto_vec<basic_block> queue (last_basic_block_for_fn (fun));
2095 queue.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
2096 FOR_EACH_BB_FN (bb, fun)
2097 {
2098 queue.quick_push (bb);
2099 bb->aux = bb;
2100 }
2101
2102 while (!queue.is_empty ())
2103 {
2104 edge_iterator ei;
2105 edge e;
2106 bool change = false;
2107 int i;
2108
2109 bb = queue.pop ();
2110 bb->aux = NULL;
2111
2112 if (bitmap_bit_p (final_bbs, bb->index))
2113 continue;
2114
2115 for (i = 0; i < by_ref_count; i++)
2116 {
2117 int idx = bb->index * by_ref_count + i;
2118 bool first = true;
2119 HOST_WIDE_INT inh = 0;
2120
2121 FOR_EACH_EDGE (e, ei, bb->succs)
2122 {
2123 int succ_idx = e->dest->index * by_ref_count + i;
2124
2125 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
2126 continue;
2127
2128 if (first)
2129 {
2130 first = false;
2131 inh = bb_dereferences [succ_idx];
2132 }
2133 else if (bb_dereferences [succ_idx] < inh)
2134 inh = bb_dereferences [succ_idx];
2135 }
2136
2137 if (!first && bb_dereferences[idx] < inh)
2138 {
2139 bb_dereferences[idx] = inh;
2140 change = true;
2141 }
2142 }
2143
2144 if (change)
2145 FOR_EACH_EDGE (e, ei, bb->preds)
2146 {
2147 if (e->src->aux)
2148 continue;
2149
2150 e->src->aux = e->src;
2151 queue.quick_push (e->src);
2152 }
2153 }
2154
2155 if (dump_file && (dump_flags & TDF_DETAILS))
2156 dump_dereferences_table (dump_file, fun,
2157 "Dereference table after propagation:\n");
2158 }
2159
2160 /* Perform basic checks on ACCESS to PARM described by DESC and all its
2161 children, return true if the parameter cannot be split, otherwise return
2162 true and update *TOTAL_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be the
2163 index of the entry BB in the function of PARM. */
2164
2165 static bool
2166 check_gensum_access (tree parm, gensum_param_desc *desc,
2167 gensum_param_access *access,
2168 HOST_WIDE_INT *nonarg_acc_size, bool *only_calls,
2169 int entry_bb_index)
2170 {
2171 if (access->nonarg)
2172 {
2173 *only_calls = false;
2174 *nonarg_acc_size += access->size;
2175
2176 if (access->first_child)
2177 {
2178 disqualify_split_candidate (desc, "Overlapping non-call uses.");
2179 return true;
2180 }
2181 }
2182 /* Do not decompose a non-BLKmode param in a way that would create
2183 BLKmode params. Especially for by-reference passing (thus,
2184 pointer-type param) this is hardly worthwhile. */
2185 if (DECL_MODE (parm) != BLKmode
2186 && TYPE_MODE (access->type) == BLKmode)
2187 {
2188 disqualify_split_candidate (desc, "Would convert a non-BLK to a BLK.");
2189 return true;
2190 }
2191
2192 if (desc->by_ref)
2193 {
2194 int idx = (entry_bb_index * by_ref_count + desc->deref_index);
2195 if ((access->offset + access->size) > bb_dereferences[idx])
2196 {
2197 disqualify_split_candidate (desc, "Would create a possibly "
2198 "illegal dereference in a caller.");
2199 return true;
2200 }
2201 }
2202
2203 for (gensum_param_access *ch = access->first_child;
2204 ch;
2205 ch = ch->next_sibling)
2206 if (check_gensum_access (parm, desc, ch, nonarg_acc_size, only_calls,
2207 entry_bb_index))
2208 return true;
2209
2210 return false;
2211 }
2212
2213 /* Copy data from FROM and all of its children to a vector of accesses in IPA
2214 descriptor DESC. */
2215
2216 static void
2217 copy_accesses_to_ipa_desc (gensum_param_access *from, isra_param_desc *desc)
2218 {
2219 param_access *to = ggc_cleared_alloc<param_access> ();
2220 gcc_checking_assert ((from->offset % BITS_PER_UNIT) == 0);
2221 gcc_checking_assert ((from->size % BITS_PER_UNIT) == 0);
2222 to->unit_offset = from->offset / BITS_PER_UNIT;
2223 to->unit_size = from->size / BITS_PER_UNIT;
2224 to->type = from->type;
2225 to->alias_ptr_type = from->alias_ptr_type;
2226 to->certain = from->nonarg;
2227 to->reverse = from->reverse;
2228 vec_safe_push (desc->accesses, to);
2229
2230 for (gensum_param_access *ch = from->first_child;
2231 ch;
2232 ch = ch->next_sibling)
2233 copy_accesses_to_ipa_desc (ch, desc);
2234 }
2235
2236 /* Analyze function body scan results stored in param_accesses and
2237 param_accesses, detect possible transformations and store information of
2238 those in function summary. NODE, FUN and IFS are all various structures
2239 describing the currently analyzed function. */
2240
2241 static void
2242 process_scan_results (cgraph_node *node, struct function *fun,
2243 isra_func_summary *ifs,
2244 vec<gensum_param_desc> *param_descriptions)
2245 {
2246 bool check_pass_throughs = false;
2247 bool dereferences_propagated = false;
2248 tree parm = DECL_ARGUMENTS (node->decl);
2249 unsigned param_count = param_descriptions->length();
2250
2251 for (unsigned desc_index = 0;
2252 desc_index < param_count;
2253 desc_index++, parm = DECL_CHAIN (parm))
2254 {
2255 gensum_param_desc *desc = &(*param_descriptions)[desc_index];
2256 if (!desc->split_candidate)
2257 continue;
2258
2259 if (flag_checking)
2260 isra_verify_access_tree (desc->accesses);
2261
2262 if (!dereferences_propagated
2263 && desc->by_ref
2264 && desc->accesses)
2265 {
2266 propagate_dereference_distances (fun);
2267 dereferences_propagated = true;
2268 }
2269
2270 HOST_WIDE_INT nonarg_acc_size = 0;
2271 bool only_calls = true;
2272 bool check_failed = false;
2273
2274 int entry_bb_index = ENTRY_BLOCK_PTR_FOR_FN (fun)->index;
2275 for (gensum_param_access *acc = desc->accesses;
2276 acc;
2277 acc = acc->next_sibling)
2278 if (check_gensum_access (parm, desc, acc, &nonarg_acc_size, &only_calls,
2279 entry_bb_index))
2280 {
2281 check_failed = true;
2282 break;
2283 }
2284 if (check_failed)
2285 continue;
2286
2287 if (only_calls)
2288 desc->locally_unused = true;
2289
2290 HOST_WIDE_INT cur_param_size
2291 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm)));
2292 HOST_WIDE_INT param_size_limit;
2293 if (!desc->by_ref || optimize_function_for_size_p (fun))
2294 param_size_limit = cur_param_size;
2295 else
2296 param_size_limit
2297 = opt_for_fn (node->decl,
2298 param_ipa_sra_ptr_growth_factor) * cur_param_size;
2299 if (nonarg_acc_size > param_size_limit
2300 || (!desc->by_ref && nonarg_acc_size == param_size_limit))
2301 {
2302 disqualify_split_candidate (desc, "Would result into a too big set "
2303 "of replacements.");
2304 }
2305 else
2306 {
2307 /* create_parameter_descriptors makes sure unit sizes of all
2308 candidate parameters fit unsigned integers restricted to
2309 ISRA_ARG_SIZE_LIMIT. */
2310 desc->param_size_limit = param_size_limit / BITS_PER_UNIT;
2311 desc->nonarg_acc_size = nonarg_acc_size / BITS_PER_UNIT;
2312 if (desc->split_candidate && desc->ptr_pt_count)
2313 {
2314 gcc_assert (desc->by_ref);
2315 check_pass_throughs = true;
2316 }
2317 }
2318 }
2319
2320 /* When a pointer parameter is passed-through to a callee, in which it is
2321 only used to read only one or a few items, we can attempt to transform it
2322 to obtaining and passing through the items instead of the pointer. But we
2323 must take extra care that 1) we do not introduce any segfault by moving
2324 dereferences above control flow and that 2) the data is not modified
2325 through an alias in this function. The IPA analysis must not introduce
2326 any accesses candidates unless it can prove both.
2327
2328 The current solution is very crude as it consists of ensuring that the
2329 call postdominates entry BB and that the definition of VUSE of the call is
2330 default definition. TODO: For non-recursive callees in the same
2331 compilation unit we could do better by doing analysis in topological order
2332 an looking into access candidates of callees, using their alias_ptr_types
2333 to attempt real AA. We could also use the maximum known dereferenced
2334 offset in this function at IPA level.
2335
2336 TODO: Measure the overhead and the effect of just being pessimistic.
2337 Maybe this is only -O3 material?
2338 */
2339 bool pdoms_calculated = false;
2340 if (check_pass_throughs)
2341 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2342 {
2343 gcall *call_stmt = cs->call_stmt;
2344 tree vuse = gimple_vuse (call_stmt);
2345
2346 /* If the callee is a const function, we don't get a VUSE. In such
2347 case there will be no memory accesses in the called function (or the
2348 const attribute is wrong) and then we just don't care. */
2349 bool uses_memory_as_obtained = vuse && SSA_NAME_IS_DEFAULT_DEF (vuse);
2350
2351 unsigned count = gimple_call_num_args (call_stmt);
2352 isra_call_summary *csum = call_sums->get_create (cs);
2353 csum->init_inputs (count);
2354 for (unsigned argidx = 0; argidx < count; argidx++)
2355 {
2356 if (!csum->m_arg_flow[argidx].pointer_pass_through)
2357 continue;
2358 unsigned pidx
2359 = get_single_param_flow_source (&csum->m_arg_flow[argidx]);
2360 gensum_param_desc *desc = &(*param_descriptions)[pidx];
2361 if (!desc->split_candidate)
2362 {
2363 csum->m_arg_flow[argidx].pointer_pass_through = false;
2364 continue;
2365 }
2366 if (!uses_memory_as_obtained)
2367 continue;
2368
2369 /* Post-dominator check placed last, hoping that it usually won't
2370 be needed. */
2371 if (!pdoms_calculated)
2372 {
2373 gcc_checking_assert (cfun);
2374 add_noreturn_fake_exit_edges ();
2375 connect_infinite_loops_to_exit ();
2376 calculate_dominance_info (CDI_POST_DOMINATORS);
2377 pdoms_calculated = true;
2378 }
2379 if (dominated_by_p (CDI_POST_DOMINATORS,
2380 gimple_bb (call_stmt),
2381 single_succ (ENTRY_BLOCK_PTR_FOR_FN (fun))))
2382 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2383 }
2384
2385 }
2386 if (pdoms_calculated)
2387 {
2388 free_dominance_info (CDI_POST_DOMINATORS);
2389 remove_fake_exit_edges ();
2390 }
2391
2392 /* TODO: Add early exit if we disqualified everything. This also requires
2393 that we either relax the restriction that
2394 ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
2395 or store the number of parameters to IPA-SRA function summary and use that
2396 when just removing params. */
2397
2398 vec_safe_reserve_exact (ifs->m_parameters, param_count);
2399 ifs->m_parameters->quick_grow_cleared (param_count);
2400 for (unsigned desc_index = 0; desc_index < param_count; desc_index++)
2401 {
2402 gensum_param_desc *s = &(*param_descriptions)[desc_index];
2403 isra_param_desc *d = &(*ifs->m_parameters)[desc_index];
2404
2405 d->param_size_limit = s->param_size_limit;
2406 d->size_reached = s->nonarg_acc_size;
2407 d->locally_unused = s->locally_unused;
2408 d->split_candidate = s->split_candidate;
2409 d->by_ref = s->by_ref;
2410
2411 for (gensum_param_access *acc = s->accesses;
2412 acc;
2413 acc = acc->next_sibling)
2414 copy_accesses_to_ipa_desc (acc, d);
2415 }
2416
2417 if (dump_file)
2418 dump_isra_param_descriptors (dump_file, node->decl, ifs);
2419 }
2420
2421 /* Return true if there are any overlaps among certain accesses of DESC. If
2422 non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
2423 too. DESC is assumed to be a split candidate that is not locally
2424 unused. */
2425
2426 static bool
2427 overlapping_certain_accesses_p (isra_param_desc *desc,
2428 bool *certain_access_present_p)
2429 {
2430 unsigned pclen = vec_safe_length (desc->accesses);
2431 for (unsigned i = 0; i < pclen; i++)
2432 {
2433 param_access *a1 = (*desc->accesses)[i];
2434
2435 if (!a1->certain)
2436 continue;
2437 if (certain_access_present_p)
2438 *certain_access_present_p = true;
2439 for (unsigned j = i + 1; j < pclen; j++)
2440 {
2441 param_access *a2 = (*desc->accesses)[j];
2442 if (a2->certain
2443 && a1->unit_offset < a2->unit_offset + a2->unit_size
2444 && a1->unit_offset + a1->unit_size > a2->unit_offset)
2445 return true;
2446 }
2447 }
2448 return false;
2449 }
2450
2451 /* Check for any overlaps of certain param accesses among splitting candidates
2452 and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also
2453 check that used splitting candidates have at least one certain access. */
2454
2455 static void
2456 verify_splitting_accesses (cgraph_node *node, bool certain_must_exist)
2457 {
2458 isra_func_summary *ifs = func_sums->get (node);
2459 if (!ifs || !ifs->m_candidate)
2460 return;
2461 unsigned param_count = vec_safe_length (ifs->m_parameters);
2462 for (unsigned pidx = 0; pidx < param_count; pidx++)
2463 {
2464 isra_param_desc *desc = &(*ifs->m_parameters)[pidx];
2465 if (!desc->split_candidate || desc->locally_unused)
2466 continue;
2467
2468 bool certain_access_present = !certain_must_exist;
2469 if (overlapping_certain_accesses_p (desc, &certain_access_present))
2470 internal_error ("Function %qs, parameter %u, has IPA-SRA accesses "
2471 "which overlap", node->dump_name (), pidx);
2472 if (!certain_access_present)
2473 internal_error ("Function %s, parameter %u, is used but does not "
2474 "have any certain IPA-SRA access",
2475 node->dump_name (), pidx);
2476 }
2477 }
2478
2479 /* Intraprocedural part of IPA-SRA analysis. Scan function body of NODE and
2480 create a summary structure describing IPA-SRA opportunities and constraints
2481 in it. */
2482
2483 static void
2484 ipa_sra_summarize_function (cgraph_node *node)
2485 {
2486 if (dump_file)
2487 fprintf (dump_file, "Creating summary for %s/%i:\n", node->name (),
2488 node->order);
2489 if (!ipa_sra_preliminary_function_checks (node))
2490 return;
2491 gcc_obstack_init (&gensum_obstack);
2492 isra_func_summary *ifs = func_sums->get_create (node);
2493 ifs->m_candidate = true;
2494 tree ret = TREE_TYPE (TREE_TYPE (node->decl));
2495 ifs->m_returns_value = (TREE_CODE (ret) != VOID_TYPE);
2496
2497 decl2desc = new hash_map<tree, gensum_param_desc *>;
2498 unsigned count = 0;
2499 for (tree parm = DECL_ARGUMENTS (node->decl); parm; parm = DECL_CHAIN (parm))
2500 count++;
2501
2502 if (count > 0)
2503 {
2504 auto_vec<gensum_param_desc, 16> param_descriptions (count);
2505 param_descriptions.reserve_exact (count);
2506 param_descriptions.quick_grow_cleared (count);
2507
2508 bool cfun_pushed = false;
2509 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
2510 if (create_parameter_descriptors (node, &param_descriptions))
2511 {
2512 push_cfun (fun);
2513 cfun_pushed = true;
2514 final_bbs = BITMAP_ALLOC (NULL);
2515 bb_dereferences = XCNEWVEC (HOST_WIDE_INT,
2516 by_ref_count
2517 * last_basic_block_for_fn (fun));
2518 aa_walking_limit = opt_for_fn (node->decl, param_ipa_max_aa_steps);
2519 scan_function (node, fun);
2520
2521 if (dump_file)
2522 {
2523 dump_gensum_param_descriptors (dump_file, node->decl,
2524 &param_descriptions);
2525 fprintf (dump_file, "----------------------------------------\n");
2526 }
2527 }
2528 process_scan_results (node, fun, ifs, &param_descriptions);
2529
2530 if (cfun_pushed)
2531 pop_cfun ();
2532 if (bb_dereferences)
2533 {
2534 free (bb_dereferences);
2535 bb_dereferences = NULL;
2536 BITMAP_FREE (final_bbs);
2537 final_bbs = NULL;
2538 }
2539 }
2540 isra_analyze_all_outgoing_calls (node);
2541
2542 delete decl2desc;
2543 decl2desc = NULL;
2544 obstack_free (&gensum_obstack, NULL);
2545 if (dump_file)
2546 fprintf (dump_file, "\n\n");
2547 if (flag_checking)
2548 verify_splitting_accesses (node, false);
2549 return;
2550 }
2551
2552 /* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in
2553 this compilation unit and create summary structures describing IPA-SRA
2554 opportunities and constraints in them. */
2555
2556 static void
2557 ipa_sra_generate_summary (void)
2558 {
2559 struct cgraph_node *node;
2560
2561 gcc_checking_assert (!func_sums);
2562 gcc_checking_assert (!call_sums);
2563 func_sums
2564 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2565 ipa_sra_function_summaries (symtab, true));
2566 call_sums = new ipa_sra_call_summaries (symtab);
2567
2568 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2569 ipa_sra_summarize_function (node);
2570 return;
2571 }
2572
2573 /* Write intraprocedural analysis information about E and all of its outgoing
2574 edges into a stream for LTO WPA. */
2575
2576 static void
2577 isra_write_edge_summary (output_block *ob, cgraph_edge *e)
2578 {
2579 isra_call_summary *csum = call_sums->get (e);
2580 unsigned input_count = csum->m_arg_flow.length ();
2581 streamer_write_uhwi (ob, input_count);
2582 for (unsigned i = 0; i < input_count; i++)
2583 {
2584 isra_param_flow *ipf = &csum->m_arg_flow[i];
2585 streamer_write_hwi (ob, ipf->length);
2586 bitpack_d bp = bitpack_create (ob->main_stream);
2587 for (int j = 0; j < ipf->length; j++)
2588 bp_pack_value (&bp, ipf->inputs[j], 8);
2589 bp_pack_value (&bp, ipf->aggregate_pass_through, 1);
2590 bp_pack_value (&bp, ipf->pointer_pass_through, 1);
2591 bp_pack_value (&bp, ipf->safe_to_import_accesses, 1);
2592 streamer_write_bitpack (&bp);
2593 streamer_write_uhwi (ob, ipf->unit_offset);
2594 streamer_write_uhwi (ob, ipf->unit_size);
2595 }
2596 bitpack_d bp = bitpack_create (ob->main_stream);
2597 bp_pack_value (&bp, csum->m_return_ignored, 1);
2598 bp_pack_value (&bp, csum->m_return_returned, 1);
2599 bp_pack_value (&bp, csum->m_bit_aligned_arg, 1);
2600 streamer_write_bitpack (&bp);
2601 }
2602
2603 /* Write intraprocedural analysis information about NODE and all of its outgoing
2604 edges into a stream for LTO WPA. */
2605
2606 static void
2607 isra_write_node_summary (output_block *ob, cgraph_node *node)
2608 {
2609 isra_func_summary *ifs = func_sums->get (node);
2610 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2611 int node_ref = lto_symtab_encoder_encode (encoder, node);
2612 streamer_write_uhwi (ob, node_ref);
2613
2614 unsigned param_desc_count = vec_safe_length (ifs->m_parameters);
2615 streamer_write_uhwi (ob, param_desc_count);
2616 for (unsigned i = 0; i < param_desc_count; i++)
2617 {
2618 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2619 unsigned access_count = vec_safe_length (desc->accesses);
2620 streamer_write_uhwi (ob, access_count);
2621 for (unsigned j = 0; j < access_count; j++)
2622 {
2623 param_access *acc = (*desc->accesses)[j];
2624 stream_write_tree (ob, acc->type, true);
2625 stream_write_tree (ob, acc->alias_ptr_type, true);
2626 streamer_write_uhwi (ob, acc->unit_offset);
2627 streamer_write_uhwi (ob, acc->unit_size);
2628 bitpack_d bp = bitpack_create (ob->main_stream);
2629 bp_pack_value (&bp, acc->certain, 1);
2630 streamer_write_bitpack (&bp);
2631 }
2632 streamer_write_uhwi (ob, desc->param_size_limit);
2633 streamer_write_uhwi (ob, desc->size_reached);
2634 bitpack_d bp = bitpack_create (ob->main_stream);
2635 bp_pack_value (&bp, desc->locally_unused, 1);
2636 bp_pack_value (&bp, desc->split_candidate, 1);
2637 bp_pack_value (&bp, desc->by_ref, 1);
2638 streamer_write_bitpack (&bp);
2639 }
2640 bitpack_d bp = bitpack_create (ob->main_stream);
2641 bp_pack_value (&bp, ifs->m_candidate, 1);
2642 bp_pack_value (&bp, ifs->m_returns_value, 1);
2643 bp_pack_value (&bp, ifs->m_return_ignored, 1);
2644 gcc_assert (!ifs->m_queued);
2645 streamer_write_bitpack (&bp);
2646
2647 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2648 isra_write_edge_summary (ob, e);
2649 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2650 isra_write_edge_summary (ob, e);
2651 }
2652
2653 /* Write intraprocedural analysis information into a stream for LTO WPA. */
2654
2655 static void
2656 ipa_sra_write_summary (void)
2657 {
2658 if (!func_sums || !call_sums)
2659 return;
2660
2661 struct output_block *ob = create_output_block (LTO_section_ipa_sra);
2662 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2663 ob->symbol = NULL;
2664
2665 unsigned int count = 0;
2666 lto_symtab_encoder_iterator lsei;
2667 for (lsei = lsei_start_function_in_partition (encoder);
2668 !lsei_end_p (lsei);
2669 lsei_next_function_in_partition (&lsei))
2670 {
2671 cgraph_node *node = lsei_cgraph_node (lsei);
2672 if (node->has_gimple_body_p ()
2673 && func_sums->get (node) != NULL)
2674 count++;
2675 }
2676 streamer_write_uhwi (ob, count);
2677
2678 /* Process all of the functions. */
2679 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
2680 lsei_next_function_in_partition (&lsei))
2681 {
2682 cgraph_node *node = lsei_cgraph_node (lsei);
2683 if (node->has_gimple_body_p ()
2684 && func_sums->get (node) != NULL)
2685 isra_write_node_summary (ob, node);
2686 }
2687 streamer_write_char_stream (ob->main_stream, 0);
2688 produce_asm (ob, NULL);
2689 destroy_output_block (ob);
2690 }
2691
2692 /* Read intraprocedural analysis information about E and all of its outgoing
2693 edges into a stream for LTO WPA. */
2694
2695 static void
2696 isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
2697 {
2698 isra_call_summary *csum = call_sums->get_create (cs);
2699 unsigned input_count = streamer_read_uhwi (ib);
2700 csum->init_inputs (input_count);
2701 for (unsigned i = 0; i < input_count; i++)
2702 {
2703 isra_param_flow *ipf = &csum->m_arg_flow[i];
2704 ipf->length = streamer_read_hwi (ib);
2705 bitpack_d bp = streamer_read_bitpack (ib);
2706 for (int j = 0; j < ipf->length; j++)
2707 ipf->inputs[j] = bp_unpack_value (&bp, 8);
2708 ipf->aggregate_pass_through = bp_unpack_value (&bp, 1);
2709 ipf->pointer_pass_through = bp_unpack_value (&bp, 1);
2710 ipf->safe_to_import_accesses = bp_unpack_value (&bp, 1);
2711 ipf->unit_offset = streamer_read_uhwi (ib);
2712 ipf->unit_size = streamer_read_uhwi (ib);
2713 }
2714 bitpack_d bp = streamer_read_bitpack (ib);
2715 csum->m_return_ignored = bp_unpack_value (&bp, 1);
2716 csum->m_return_returned = bp_unpack_value (&bp, 1);
2717 csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1);
2718 }
2719
2720 /* Read intraprocedural analysis information about NODE and all of its outgoing
2721 edges into a stream for LTO WPA. */
2722
2723 static void
2724 isra_read_node_info (struct lto_input_block *ib, cgraph_node *node,
2725 struct data_in *data_in)
2726 {
2727 isra_func_summary *ifs = func_sums->get_create (node);
2728 unsigned param_desc_count = streamer_read_uhwi (ib);
2729 if (param_desc_count > 0)
2730 {
2731 vec_safe_reserve_exact (ifs->m_parameters, param_desc_count);
2732 ifs->m_parameters->quick_grow_cleared (param_desc_count);
2733 }
2734 for (unsigned i = 0; i < param_desc_count; i++)
2735 {
2736 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2737 unsigned access_count = streamer_read_uhwi (ib);
2738 for (unsigned j = 0; j < access_count; j++)
2739 {
2740 param_access *acc = ggc_cleared_alloc<param_access> ();
2741 acc->type = stream_read_tree (ib, data_in);
2742 acc->alias_ptr_type = stream_read_tree (ib, data_in);
2743 acc->unit_offset = streamer_read_uhwi (ib);
2744 acc->unit_size = streamer_read_uhwi (ib);
2745 bitpack_d bp = streamer_read_bitpack (ib);
2746 acc->certain = bp_unpack_value (&bp, 1);
2747 vec_safe_push (desc->accesses, acc);
2748 }
2749 desc->param_size_limit = streamer_read_uhwi (ib);
2750 desc->size_reached = streamer_read_uhwi (ib);
2751 bitpack_d bp = streamer_read_bitpack (ib);
2752 desc->locally_unused = bp_unpack_value (&bp, 1);
2753 desc->split_candidate = bp_unpack_value (&bp, 1);
2754 desc->by_ref = bp_unpack_value (&bp, 1);
2755 }
2756 bitpack_d bp = streamer_read_bitpack (ib);
2757 ifs->m_candidate = bp_unpack_value (&bp, 1);
2758 ifs->m_returns_value = bp_unpack_value (&bp, 1);
2759 ifs->m_return_ignored = bp_unpack_value (&bp, 1);
2760 ifs->m_queued = 0;
2761
2762 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2763 isra_read_edge_summary (ib, e);
2764 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2765 isra_read_edge_summary (ib, e);
2766 }
2767
2768 /* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
2769 data DATA. TODO: This function was copied almost verbatim from ipa-prop.c,
2770 it should be possible to unify them somehow. */
2771
2772 static void
2773 isra_read_summary_section (struct lto_file_decl_data *file_data,
2774 const char *data, size_t len)
2775 {
2776 const struct lto_function_header *header =
2777 (const struct lto_function_header *) data;
2778 const int cfg_offset = sizeof (struct lto_function_header);
2779 const int main_offset = cfg_offset + header->cfg_size;
2780 const int string_offset = main_offset + header->main_size;
2781 struct data_in *data_in;
2782 unsigned int i;
2783 unsigned int count;
2784
2785 lto_input_block ib_main ((const char *) data + main_offset,
2786 header->main_size, file_data->mode_table);
2787
2788 data_in =
2789 lto_data_in_create (file_data, (const char *) data + string_offset,
2790 header->string_size, vNULL);
2791 count = streamer_read_uhwi (&ib_main);
2792
2793 for (i = 0; i < count; i++)
2794 {
2795 unsigned int index;
2796 struct cgraph_node *node;
2797 lto_symtab_encoder_t encoder;
2798
2799 index = streamer_read_uhwi (&ib_main);
2800 encoder = file_data->symtab_node_encoder;
2801 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
2802 index));
2803 gcc_assert (node->definition);
2804 isra_read_node_info (&ib_main, node, data_in);
2805 }
2806 lto_free_section_data (file_data, LTO_section_ipa_sra, NULL, data,
2807 len);
2808 lto_data_in_delete (data_in);
2809 }
2810
2811 /* Read intraprocedural analysis information into a stream for LTO WPA. */
2812
2813 static void
2814 ipa_sra_read_summary (void)
2815 {
2816 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
2817 struct lto_file_decl_data *file_data;
2818 unsigned int j = 0;
2819
2820 gcc_checking_assert (!func_sums);
2821 gcc_checking_assert (!call_sums);
2822 func_sums
2823 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2824 ipa_sra_function_summaries (symtab, true));
2825 call_sums = new ipa_sra_call_summaries (symtab);
2826
2827 while ((file_data = file_data_vec[j++]))
2828 {
2829 size_t len;
2830 const char *data
2831 = lto_get_summary_section_data (file_data, LTO_section_ipa_sra, &len);
2832 if (data)
2833 isra_read_summary_section (file_data, data, len);
2834 }
2835 }
2836
2837 /* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. */
2838
2839 static void
2840 ipa_sra_dump_all_summaries (FILE *f)
2841 {
2842 cgraph_node *node;
2843 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2844 {
2845 fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
2846
2847 isra_func_summary *ifs = func_sums->get (node);
2848 if (!ifs)
2849 {
2850 fprintf (f, " Function does not have any associated IPA-SRA "
2851 "summary\n");
2852 continue;
2853 }
2854 if (!ifs->m_candidate)
2855 {
2856 fprintf (f, " Not a candidate function\n");
2857 continue;
2858 }
2859 if (ifs->m_returns_value)
2860 fprintf (f, " Returns value\n");
2861 if (vec_safe_is_empty (ifs->m_parameters))
2862 fprintf (f, " No parameter information. \n");
2863 else
2864 for (unsigned i = 0; i < ifs->m_parameters->length (); ++i)
2865 {
2866 fprintf (f, " Descriptor for parameter %i:\n", i);
2867 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
2868 }
2869 fprintf (f, "\n");
2870
2871 struct cgraph_edge *cs;
2872 for (cs = node->callees; cs; cs = cs->next_callee)
2873 {
2874 fprintf (f, " Summary for edge %s->%s:\n", cs->caller->dump_name (),
2875 cs->callee->dump_name ());
2876 isra_call_summary *csum = call_sums->get (cs);
2877 if (csum)
2878 csum->dump (f);
2879 else
2880 fprintf (f, " Call summary is MISSING!\n");
2881 }
2882
2883 }
2884 fprintf (f, "\n\n");
2885 }
2886
2887 /* Perform function-scope viability tests that can be only made at IPA level
2888 and return false if the function is deemed unsuitable for IPA-SRA. */
2889
2890 static bool
2891 ipa_sra_ipa_function_checks (cgraph_node *node)
2892 {
2893 if (!node->can_be_local_p ())
2894 {
2895 if (dump_file)
2896 fprintf (dump_file, "Function %s disqualified because it cannot be "
2897 "made local.\n", node->dump_name ());
2898 return false;
2899 }
2900 if (!node->can_change_signature)
2901 {
2902 if (dump_file)
2903 fprintf (dump_file, "Function can not change signature.\n");
2904 return false;
2905 }
2906
2907 return true;
2908 }
2909
2910 /* Issues found out by check_callers_for_issues. */
2911
2912 struct caller_issues
2913 {
2914 /* The candidate being considered. */
2915 cgraph_node *candidate;
2916 /* There is a thunk among callers. */
2917 bool thunk;
2918 /* Call site with no available information. */
2919 bool unknown_callsite;
2920 /* Call from outside the the candidate's comdat group. */
2921 bool call_from_outside_comdat;
2922 /* There is a bit-aligned load into one of non-gimple-typed arguments. */
2923 bool bit_aligned_aggregate_argument;
2924 };
2925
2926 /* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
2927 that apply. */
2928
2929 static bool
2930 check_for_caller_issues (struct cgraph_node *node, void *data)
2931 {
2932 struct caller_issues *issues = (struct caller_issues *) data;
2933
2934 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
2935 {
2936 if (cs->caller->thunk.thunk_p)
2937 {
2938 issues->thunk = true;
2939 /* TODO: We should be able to process at least some types of
2940 thunks. */
2941 return true;
2942 }
2943 if (issues->candidate->calls_comdat_local
2944 && issues->candidate->same_comdat_group
2945 && !issues->candidate->in_same_comdat_group_p (cs->caller))
2946 {
2947 issues->call_from_outside_comdat = true;
2948 return true;
2949 }
2950
2951 isra_call_summary *csum = call_sums->get (cs);
2952 if (!csum)
2953 {
2954 issues->unknown_callsite = true;
2955 return true;
2956 }
2957
2958 if (csum->m_bit_aligned_arg)
2959 issues->bit_aligned_aggregate_argument = true;
2960 }
2961 return false;
2962 }
2963
2964 /* Look at all incoming edges to NODE, including aliases and thunks and look
2965 for problems. Return true if NODE type should not be modified at all. */
2966
2967 static bool
2968 check_all_callers_for_issues (cgraph_node *node)
2969 {
2970 struct caller_issues issues;
2971 memset (&issues, 0, sizeof (issues));
2972 issues.candidate = node;
2973
2974 node->call_for_symbol_and_aliases (check_for_caller_issues, &issues, true);
2975 if (issues.unknown_callsite)
2976 {
2977 if (dump_file && (dump_flags & TDF_DETAILS))
2978 fprintf (dump_file, "A call of %s has not been analyzed. Disabling "
2979 "all modifications.\n", node->dump_name ());
2980 return true;
2981 }
2982 /* TODO: We should be able to process at least some types of thunks. */
2983 if (issues.thunk)
2984 {
2985 if (dump_file && (dump_flags & TDF_DETAILS))
2986 fprintf (dump_file, "A call of %s is through thunk, which are not"
2987 " handled yet. Disabling all modifications.\n",
2988 node->dump_name ());
2989 return true;
2990 }
2991 if (issues.call_from_outside_comdat)
2992 {
2993 if (dump_file)
2994 fprintf (dump_file, "Function would become private comdat called "
2995 "outside of its comdat group.\n");
2996 return true;
2997 }
2998
2999 if (issues.bit_aligned_aggregate_argument)
3000 {
3001 /* Let's only remove parameters/return values from such functions.
3002 TODO: We could only prevent splitting the problematic parameters if
3003 anybody thinks it is worth it. */
3004 if (dump_file && (dump_flags & TDF_DETAILS))
3005 fprintf (dump_file, "A call of %s has bit-aligned aggregate argument,"
3006 " disabling parameter splitting.\n", node->dump_name ());
3007
3008 isra_func_summary *ifs = func_sums->get (node);
3009 gcc_checking_assert (ifs);
3010 unsigned param_count = vec_safe_length (ifs->m_parameters);
3011 for (unsigned i = 0; i < param_count; i++)
3012 (*ifs->m_parameters)[i].split_candidate = false;
3013 }
3014 return false;
3015 }
3016
3017 /* Find the access with corresponding OFFSET and SIZE among accesses in
3018 PARAM_DESC and return it or NULL if such an access is not there. */
3019
3020 static param_access *
3021 find_param_access (isra_param_desc *param_desc, unsigned offset, unsigned size)
3022 {
3023 unsigned pclen = vec_safe_length (param_desc->accesses);
3024
3025 /* The search is linear but the number of stored accesses is bound by
3026 PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */
3027
3028 for (unsigned i = 0; i < pclen; i++)
3029 if ((*param_desc->accesses)[i]->unit_offset == offset
3030 && (*param_desc->accesses)[i]->unit_size == size)
3031 return (*param_desc->accesses)[i];
3032
3033 return NULL;
3034 }
3035
3036 /* Return iff the total size of definite replacement SIZE would violate the
3037 limit set for it in PARAM. */
3038
3039 static bool
3040 size_would_violate_limit_p (isra_param_desc *desc, unsigned size)
3041 {
3042 unsigned limit = desc->param_size_limit;
3043 if (size > limit
3044 || (!desc->by_ref && size == limit))
3045 return true;
3046 return false;
3047 }
3048
3049 /* Increase reached size of DESC by SIZE or disqualify it if it would violate
3050 the set limit. IDX is the parameter number which is dumped when
3051 disqualifying. */
3052
3053 static void
3054 bump_reached_size (isra_param_desc *desc, unsigned size, unsigned idx)
3055 {
3056 unsigned after = desc->size_reached + size;
3057 if (size_would_violate_limit_p (desc, after))
3058 {
3059 if (dump_file && (dump_flags & TDF_DETAILS))
3060 fprintf (dump_file, " ...size limit reached, disqualifying "
3061 "candidate parameter %u\n", idx);
3062 desc->split_candidate = false;
3063 return;
3064 }
3065 desc->size_reached = after;
3066 }
3067
3068 /* Take all actions required to deal with an edge CS that represents a call to
3069 an unknown or un-analyzed function, for both parameter removal and
3070 splitting. */
3071
3072 static void
3073 process_edge_to_unknown_caller (cgraph_edge *cs)
3074 {
3075 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3076 gcc_checking_assert (from_ifs);
3077 isra_call_summary *csum = call_sums->get (cs);
3078
3079 if (dump_file && (dump_flags & TDF_DETAILS))
3080 fprintf (dump_file, "Processing an edge to an unknown caller from %s:\n",
3081 cs->caller->dump_name ());
3082
3083 unsigned args_count = csum->m_arg_flow.length ();
3084 for (unsigned i = 0; i < args_count; i++)
3085 {
3086 isra_param_flow *ipf = &csum->m_arg_flow[i];
3087
3088 if (ipf->pointer_pass_through)
3089 {
3090 isra_param_desc *param_desc
3091 = &(*from_ifs->m_parameters)[get_single_param_flow_source (ipf)];
3092 param_desc->locally_unused = false;
3093 param_desc->split_candidate = false;
3094 continue;
3095 }
3096 if (ipf->aggregate_pass_through)
3097 {
3098 unsigned idx = get_single_param_flow_source (ipf);
3099 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3100
3101 param_desc->locally_unused = false;
3102 if (!param_desc->split_candidate)
3103 continue;
3104 gcc_assert (!param_desc->by_ref);
3105 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3106 ipf->unit_size);
3107 gcc_checking_assert (pacc);
3108 pacc->certain = true;
3109 if (overlapping_certain_accesses_p (param_desc, NULL))
3110 {
3111 if (dump_file && (dump_flags & TDF_DETAILS))
3112 fprintf (dump_file, " ...leading to overlap, "
3113 " disqualifying candidate parameter %u\n",
3114 idx);
3115 param_desc->split_candidate = false;
3116 }
3117 else
3118 bump_reached_size (param_desc, pacc->unit_size, idx);
3119 ipf->aggregate_pass_through = false;
3120 continue;
3121 }
3122
3123 for (int j = 0; j < ipf->length; j++)
3124 {
3125 int input_idx = ipf->inputs[j];
3126 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3127 }
3128 }
3129 }
3130
3131 /* Propagate parameter removal information through cross-SCC edge CS,
3132 i.e. decrease the use count in the caller parameter descriptor for each use
3133 in this call. */
3134
3135 static void
3136 param_removal_cross_scc_edge (cgraph_edge *cs)
3137 {
3138 enum availability availability;
3139 cgraph_node *callee = cs->callee->function_symbol (&availability);
3140 isra_func_summary *to_ifs = func_sums->get (callee);
3141 if (!to_ifs || !to_ifs->m_candidate
3142 || (availability < AVAIL_AVAILABLE)
3143 || vec_safe_is_empty (to_ifs->m_parameters))
3144 {
3145 process_edge_to_unknown_caller (cs);
3146 return;
3147 }
3148 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3149 gcc_checking_assert (from_ifs);
3150
3151 isra_call_summary *csum = call_sums->get (cs);
3152 unsigned args_count = csum->m_arg_flow.length ();
3153 unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3154
3155 for (unsigned i = 0; i < args_count; i++)
3156 {
3157 bool unused_in_callee;
3158 if (i < param_count)
3159 unused_in_callee = (*to_ifs->m_parameters)[i].locally_unused;
3160 else
3161 unused_in_callee = false;
3162
3163 if (!unused_in_callee)
3164 {
3165 isra_param_flow *ipf = &csum->m_arg_flow[i];
3166 for (int j = 0; j < ipf->length; j++)
3167 {
3168 int input_idx = ipf->inputs[j];
3169 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3170 }
3171 }
3172 }
3173 }
3174
3175 /* Unless it is already there, push NODE which is also described by IFS to
3176 STACK. */
3177
3178 static void
3179 isra_push_node_to_stack (cgraph_node *node, isra_func_summary *ifs,
3180 vec<cgraph_node *> *stack)
3181 {
3182 if (!ifs->m_queued)
3183 {
3184 ifs->m_queued = true;
3185 stack->safe_push (node);
3186 }
3187 }
3188
3189 /* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3190 used and push CALLER on STACK. */
3191
3192 static void
3193 isra_mark_caller_param_used (isra_func_summary *from_ifs, int input_idx,
3194 cgraph_node *caller, vec<cgraph_node *> *stack)
3195 {
3196 if ((*from_ifs->m_parameters)[input_idx].locally_unused)
3197 {
3198 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3199 isra_push_node_to_stack (caller, from_ifs, stack);
3200 }
3201 }
3202
3203
3204 /* Propagate information that any parameter is not used only locally within a
3205 SCC across CS to the caller, which must be in the same SCC as the
3206 callee. Push any callers that need to be re-processed to STACK. */
3207
3208 static void
3209 propagate_used_across_scc_edge (cgraph_edge *cs, vec<cgraph_node *> *stack)
3210 {
3211 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3212 if (!from_ifs || vec_safe_is_empty (from_ifs->m_parameters))
3213 return;
3214
3215 isra_call_summary *csum = call_sums->get (cs);
3216 gcc_checking_assert (csum);
3217 unsigned args_count = csum->m_arg_flow.length ();
3218 enum availability availability;
3219 cgraph_node *callee = cs->callee->function_symbol (&availability);
3220 isra_func_summary *to_ifs = func_sums->get (callee);
3221
3222 unsigned param_count
3223 = (to_ifs && (availability >= AVAIL_AVAILABLE))
3224 ? vec_safe_length (to_ifs->m_parameters) : 0;
3225 for (unsigned i = 0; i < args_count; i++)
3226 {
3227 if (i < param_count
3228 && (*to_ifs->m_parameters)[i].locally_unused)
3229 continue;
3230
3231 /* The argument is needed in the callee it, we must mark the parameter as
3232 used also in the caller and its callers within this SCC. */
3233 isra_param_flow *ipf = &csum->m_arg_flow[i];
3234 for (int j = 0; j < ipf->length; j++)
3235 {
3236 int input_idx = ipf->inputs[j];
3237 isra_mark_caller_param_used (from_ifs, input_idx, cs->caller, stack);
3238 }
3239 }
3240 }
3241
3242 /* Propagate information that any parameter is not used only locally within a
3243 SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
3244 same SCC. Push any callers that need to be re-processed to STACK. */
3245
3246 static bool
3247 propagate_used_to_scc_callers (cgraph_node *node, void *data)
3248 {
3249 vec<cgraph_node *> *stack = (vec<cgraph_node *> *) data;
3250 cgraph_edge *cs;
3251 for (cs = node->callers; cs; cs = cs->next_caller)
3252 if (ipa_edge_within_scc (cs))
3253 propagate_used_across_scc_edge (cs, stack);
3254 return false;
3255 }
3256
3257 /* Return true iff all certain accesses in ARG_DESC are also present as
3258 certain accesses in PARAM_DESC. */
3259
3260 static bool
3261 all_callee_accesses_present_p (isra_param_desc *param_desc,
3262 isra_param_desc *arg_desc)
3263 {
3264 unsigned aclen = vec_safe_length (arg_desc->accesses);
3265 for (unsigned j = 0; j < aclen; j++)
3266 {
3267 param_access *argacc = (*arg_desc->accesses)[j];
3268 if (!argacc->certain)
3269 continue;
3270 param_access *pacc = find_param_access (param_desc, argacc->unit_offset,
3271 argacc->unit_size);
3272 if (!pacc || !pacc->certain)
3273 return false;
3274 }
3275 return true;
3276 }
3277
3278 /* Type internal to function pull_accesses_from_callee. Unfortunately gcc 4.8
3279 does not allow instantiating an auto_vec with a type defined within a
3280 function so it is a global type. */
3281 enum acc_prop_kind {ACC_PROP_DONT, ACC_PROP_COPY, ACC_PROP_CERTAIN};
3282
3283
3284 /* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3285 (which belongs to CALLER) if they would not violate some constraint there.
3286 If successful, return NULL, otherwise return the string reason for failure
3287 (which can be written to the dump file). DELTA_OFFSET is the known offset
3288 of the actual argument withing the formal parameter (so of ARG_DESCS within
3289 PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3290 known. In case of success, set *CHANGE_P to true if propagation actually
3291 changed anything. */
3292
3293 static const char *
3294 pull_accesses_from_callee (cgraph_node *caller, isra_param_desc *param_desc,
3295 isra_param_desc *arg_desc,
3296 unsigned delta_offset, unsigned arg_size,
3297 bool *change_p)
3298 {
3299 unsigned pclen = vec_safe_length (param_desc->accesses);
3300 unsigned aclen = vec_safe_length (arg_desc->accesses);
3301 unsigned prop_count = 0;
3302 unsigned prop_size = 0;
3303 bool change = false;
3304
3305 auto_vec <enum acc_prop_kind, 8> prop_kinds (aclen);
3306 for (unsigned j = 0; j < aclen; j++)
3307 {
3308 param_access *argacc = (*arg_desc->accesses)[j];
3309 prop_kinds.safe_push (ACC_PROP_DONT);
3310
3311 if (arg_size > 0
3312 && argacc->unit_offset + argacc->unit_size > arg_size)
3313 return "callee access outsize size boundary";
3314
3315 if (!argacc->certain)
3316 continue;
3317
3318 unsigned offset = argacc->unit_offset + delta_offset;
3319 /* Given that accesses are initially stored according to increasing
3320 offset and decreasing size in case of equal offsets, the following
3321 searches could be written more efficiently if we kept the ordering
3322 when copying. But the number of accesses is capped at
3323 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3324 messy quickly, so let's improve on that only if necessary. */
3325
3326 bool exact_match = false;
3327 for (unsigned i = 0; i < pclen; i++)
3328 {
3329 /* Check for overlaps. */
3330 param_access *pacc = (*param_desc->accesses)[i];
3331 if (pacc->unit_offset == offset
3332 && pacc->unit_size == argacc->unit_size)
3333 {
3334 if (argacc->alias_ptr_type != pacc->alias_ptr_type
3335 || !types_compatible_p (argacc->type, pacc->type))
3336 return "propagated access types would not match existing ones";
3337
3338 exact_match = true;
3339 if (!pacc->certain)
3340 {
3341 prop_kinds[j] = ACC_PROP_CERTAIN;
3342 prop_size += argacc->unit_size;
3343 change = true;
3344 }
3345 continue;
3346 }
3347
3348 if (offset < pacc->unit_offset + pacc->unit_size
3349 && offset + argacc->unit_size > pacc->unit_offset)
3350 {
3351 /* None permissible with load accesses, possible to fit into
3352 argument ones. */
3353 if (pacc->certain
3354 || offset < pacc->unit_offset
3355 || (offset + argacc->unit_size
3356 > pacc->unit_offset + pacc->unit_size))
3357 return "a propagated access would conflict in caller";
3358 }
3359 }
3360
3361 if (!exact_match)
3362 {
3363 prop_kinds[j] = ACC_PROP_COPY;
3364 prop_count++;
3365 prop_size += argacc->unit_size;
3366 change = true;
3367 }
3368 }
3369
3370 if (!change)
3371 return NULL;
3372
3373 if ((prop_count + pclen
3374 > (unsigned) opt_for_fn (caller->decl, param_ipa_sra_max_replacements))
3375 || size_would_violate_limit_p (param_desc,
3376 param_desc->size_reached + prop_size))
3377 return "propagating accesses would violate the count or size limit";
3378
3379 *change_p = true;
3380 for (unsigned j = 0; j < aclen; j++)
3381 {
3382 if (prop_kinds[j] == ACC_PROP_COPY)
3383 {
3384 param_access *argacc = (*arg_desc->accesses)[j];
3385
3386 param_access *copy = ggc_cleared_alloc<param_access> ();
3387 copy->unit_offset = argacc->unit_offset + delta_offset;
3388 copy->unit_size = argacc->unit_size;
3389 copy->type = argacc->type;
3390 copy->alias_ptr_type = argacc->alias_ptr_type;
3391 copy->certain = true;
3392 vec_safe_push (param_desc->accesses, copy);
3393 }
3394 else if (prop_kinds[j] == ACC_PROP_CERTAIN)
3395 {
3396 param_access *argacc = (*arg_desc->accesses)[j];
3397 param_access *csp
3398 = find_param_access (param_desc, argacc->unit_offset + delta_offset,
3399 argacc->unit_size);
3400 csp->certain = true;
3401 }
3402 }
3403
3404 param_desc->size_reached += prop_size;
3405
3406 return NULL;
3407 }
3408
3409 /* Propagate parameter splitting information through call graph edge CS.
3410 Return true if any changes that might need to be propagated within SCCs have
3411 been made. The function also clears the aggregate_pass_through and
3412 pointer_pass_through in call summaries which do not need to be processed
3413 again if this CS is revisited when iterating while changes are propagated
3414 within an SCC. */
3415
3416 static bool
3417 param_splitting_across_edge (cgraph_edge *cs)
3418 {
3419 bool res = false;
3420 bool cross_scc = !ipa_edge_within_scc (cs);
3421 enum availability availability;
3422 cgraph_node *callee = cs->callee->function_symbol (&availability);
3423 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3424 gcc_checking_assert (from_ifs && from_ifs->m_parameters);
3425
3426 isra_call_summary *csum = call_sums->get (cs);
3427 gcc_checking_assert (csum);
3428 unsigned args_count = csum->m_arg_flow.length ();
3429 isra_func_summary *to_ifs = func_sums->get (callee);
3430 unsigned param_count
3431 = ((to_ifs && to_ifs->m_candidate && (availability >= AVAIL_AVAILABLE))
3432 ? vec_safe_length (to_ifs->m_parameters)
3433 : 0);
3434
3435 if (dump_file && (dump_flags & TDF_DETAILS))
3436 fprintf (dump_file, "Splitting across %s->%s:\n",
3437 cs->caller->dump_name (), callee->dump_name ());
3438
3439 unsigned i;
3440 for (i = 0; (i < args_count) && (i < param_count); i++)
3441 {
3442 isra_param_desc *arg_desc = &(*to_ifs->m_parameters)[i];
3443 isra_param_flow *ipf = &csum->m_arg_flow[i];
3444
3445 if (arg_desc->locally_unused)
3446 {
3447 if (dump_file && (dump_flags & TDF_DETAILS))
3448 fprintf (dump_file, " ->%u: unused in callee\n", i);
3449 ipf->pointer_pass_through = false;
3450 continue;
3451 }
3452
3453 if (ipf->pointer_pass_through)
3454 {
3455 int idx = get_single_param_flow_source (ipf);
3456 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3457 if (!param_desc->split_candidate)
3458 continue;
3459 gcc_assert (param_desc->by_ref);
3460
3461 if (!arg_desc->split_candidate || !arg_desc->by_ref)
3462 {
3463 if (dump_file && (dump_flags & TDF_DETAILS))
3464 fprintf (dump_file, " %u->%u: not candidate or not by "
3465 "reference in callee\n", idx, i);
3466 param_desc->split_candidate = false;
3467 ipf->pointer_pass_through = false;
3468 res = true;
3469 }
3470 else if (!ipf->safe_to_import_accesses)
3471 {
3472 if (!all_callee_accesses_present_p (param_desc, arg_desc))
3473 {
3474 if (dump_file && (dump_flags & TDF_DETAILS))
3475 fprintf (dump_file, " %u->%u: cannot import accesses.\n",
3476 idx, i);
3477 param_desc->split_candidate = false;
3478 ipf->pointer_pass_through = false;
3479 res = true;
3480
3481 }
3482 else
3483 {
3484 if (dump_file && (dump_flags & TDF_DETAILS))
3485 fprintf (dump_file, " %u->%u: verified callee accesses "
3486 "present.\n", idx, i);
3487 if (cross_scc)
3488 ipf->pointer_pass_through = false;
3489 }
3490 }
3491 else
3492 {
3493 const char *pull_failure
3494 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3495 0, 0, &res);
3496 if (pull_failure)
3497 {
3498 if (dump_file && (dump_flags & TDF_DETAILS))
3499 fprintf (dump_file, " %u->%u: by_ref access pull "
3500 "failed: %s.\n", idx, i, pull_failure);
3501 param_desc->split_candidate = false;
3502 ipf->pointer_pass_through = false;
3503 res = true;
3504 }
3505 else
3506 {
3507 if (dump_file && (dump_flags & TDF_DETAILS))
3508 fprintf (dump_file, " %u->%u: by_ref access pull "
3509 "succeeded.\n", idx, i);
3510 if (cross_scc)
3511 ipf->pointer_pass_through = false;
3512 }
3513 }
3514 }
3515 else if (ipf->aggregate_pass_through)
3516 {
3517 int idx = get_single_param_flow_source (ipf);
3518 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3519 if (!param_desc->split_candidate)
3520 continue;
3521 gcc_assert (!param_desc->by_ref);
3522 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3523 ipf->unit_size);
3524 gcc_checking_assert (pacc);
3525
3526 if (pacc->certain)
3527 {
3528 if (dump_file && (dump_flags & TDF_DETAILS))
3529 fprintf (dump_file, " %u->%u: already certain\n", idx, i);
3530 ipf->aggregate_pass_through = false;
3531 }
3532 else if (!arg_desc->split_candidate || arg_desc->by_ref)
3533 {
3534 if (dump_file && (dump_flags & TDF_DETAILS))
3535 fprintf (dump_file, " %u->%u: not candidate or by "
3536 "reference in callee\n", idx, i);
3537
3538 pacc->certain = true;
3539 if (overlapping_certain_accesses_p (param_desc, NULL))
3540 {
3541 if (dump_file && (dump_flags & TDF_DETAILS))
3542 fprintf (dump_file, " ...leading to overlap, "
3543 " disqualifying candidate parameter %u\n",
3544 idx);
3545 param_desc->split_candidate = false;
3546 }
3547 else
3548 bump_reached_size (param_desc, pacc->unit_size, idx);
3549
3550 ipf->aggregate_pass_through = false;
3551 res = true;
3552 }
3553 else
3554 {
3555 const char *pull_failure
3556 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3557 ipf->unit_offset,
3558 ipf->unit_size, &res);
3559 if (pull_failure)
3560 {
3561 if (dump_file && (dump_flags & TDF_DETAILS))
3562 fprintf (dump_file, " %u->%u: arg access pull "
3563 "failed: %s.\n", idx, i, pull_failure);
3564
3565 ipf->aggregate_pass_through = false;
3566 pacc->certain = true;
3567
3568 if (overlapping_certain_accesses_p (param_desc, NULL))
3569 {
3570 if (dump_file && (dump_flags & TDF_DETAILS))
3571 fprintf (dump_file, " ...leading to overlap, "
3572 " disqualifying candidate parameter %u\n",
3573 idx);
3574 param_desc->split_candidate = false;
3575 }
3576 else
3577 bump_reached_size (param_desc, pacc->unit_size, idx);
3578
3579 res = true;
3580 }
3581 else
3582 {
3583 if (dump_file && (dump_flags & TDF_DETAILS))
3584 fprintf (dump_file, " %u->%u: arg access pull "
3585 "succeeded.\n", idx, i);
3586 if (cross_scc)
3587 ipf->aggregate_pass_through = false;
3588 }
3589 }
3590 }
3591 }
3592
3593 /* Handle argument-parameter count mismatches. */
3594 for (; (i < args_count); i++)
3595 {
3596 isra_param_flow *ipf = &csum->m_arg_flow[i];
3597
3598 if (ipf->pointer_pass_through || ipf->aggregate_pass_through)
3599 {
3600 int idx = get_single_param_flow_source (ipf);
3601 ipf->pointer_pass_through = false;
3602 ipf->aggregate_pass_through = false;
3603 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3604 if (!param_desc->split_candidate)
3605 continue;
3606
3607 if (dump_file && (dump_flags & TDF_DETAILS))
3608 fprintf (dump_file, " %u->%u: no corresponding formal parameter\n",
3609 idx, i);
3610 param_desc->split_candidate = false;
3611 res = true;
3612 }
3613 }
3614 return res;
3615 }
3616
3617 /* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3618 callers ignore the return value, or come from the same SCC and use the
3619 return value only to compute their return value, return false, otherwise
3620 return true. */
3621
3622 static bool
3623 retval_used_p (cgraph_node *node, void *)
3624 {
3625 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3626 {
3627 isra_call_summary *csum = call_sums->get (cs);
3628 gcc_checking_assert (csum);
3629 if (csum->m_return_ignored)
3630 continue;
3631 if (!csum->m_return_returned)
3632 return true;
3633
3634 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3635 if (!from_ifs || !from_ifs->m_candidate)
3636 return true;
3637
3638 if (!ipa_edge_within_scc (cs)
3639 && !from_ifs->m_return_ignored)
3640 return true;
3641 }
3642
3643 return false;
3644 }
3645
3646 /* Push into NEW_PARAMS all required parameter adjustment entries to copy or
3647 modify parameter which originally had index BASE_INDEX, in the adjustment
3648 vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
3649 PREV_ADJUSTMENT. If the parent clone is the original function,
3650 PREV_ADJUSTMENT is NULL and PREV_CLONE_INDEX is equal to BASE_INDEX. */
3651
3652
3653 static void
3654 push_param_adjustments_for_index (isra_func_summary *ifs, unsigned base_index,
3655 unsigned prev_clone_index,
3656 ipa_adjusted_param *prev_adjustment,
3657 vec<ipa_adjusted_param, va_gc> **new_params)
3658 {
3659 isra_param_desc *desc = &(*ifs->m_parameters)[base_index];
3660 if (desc->locally_unused)
3661 {
3662 if (dump_file)
3663 fprintf (dump_file, " Will remove parameter %u\n", base_index);
3664 return;
3665 }
3666
3667 if (!desc->split_candidate)
3668 {
3669 ipa_adjusted_param adj;
3670 if (prev_adjustment)
3671 {
3672 adj = *prev_adjustment;
3673 adj.prev_clone_adjustment = true;
3674 adj.prev_clone_index = prev_clone_index;
3675 }
3676 else
3677 {
3678 memset (&adj, 0, sizeof (adj));
3679 adj.op = IPA_PARAM_OP_COPY;
3680 adj.base_index = base_index;
3681 adj.prev_clone_index = prev_clone_index;
3682 }
3683 vec_safe_push ((*new_params), adj);
3684 return;
3685 }
3686
3687 if (dump_file)
3688 fprintf (dump_file, " Will split parameter %u\n", base_index);
3689
3690 gcc_assert (!prev_adjustment || prev_adjustment->op == IPA_PARAM_OP_COPY);
3691 unsigned aclen = vec_safe_length (desc->accesses);
3692 for (unsigned j = 0; j < aclen; j++)
3693 {
3694 param_access *pa = (*desc->accesses)[j];
3695 if (!pa->certain)
3696 continue;
3697 if (dump_file)
3698 fprintf (dump_file, " - component at byte offset %u, "
3699 "size %u\n", pa->unit_offset, pa->unit_size);
3700
3701 ipa_adjusted_param adj;
3702 memset (&adj, 0, sizeof (adj));
3703 adj.op = IPA_PARAM_OP_SPLIT;
3704 adj.base_index = base_index;
3705 adj.prev_clone_index = prev_clone_index;
3706 adj.param_prefix_index = IPA_PARAM_PREFIX_ISRA;
3707 adj.reverse = pa->reverse;
3708 adj.type = pa->type;
3709 adj.alias_ptr_type = pa->alias_ptr_type;
3710 adj.unit_offset = pa->unit_offset;
3711 vec_safe_push ((*new_params), adj);
3712 }
3713 }
3714
3715 /* Worker for all call_for_symbol_thunks_and_aliases. Set calls_comdat_local
3716 flag of all callers of NODE. */
3717
3718 static bool
3719 mark_callers_calls_comdat_local (struct cgraph_node *node, void *)
3720 {
3721 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3722 cs->caller->calls_comdat_local = true;
3723 return false;
3724 }
3725
3726
3727 /* Do final processing of results of IPA propagation regarding NODE, clone it
3728 if appropriate. */
3729
3730 static void
3731 process_isra_node_results (cgraph_node *node,
3732 hash_map<const char *, unsigned> *clone_num_suffixes)
3733 {
3734 isra_func_summary *ifs = func_sums->get (node);
3735 if (!ifs || !ifs->m_candidate)
3736 return;
3737
3738 auto_vec<bool, 16> surviving_params;
3739 bool check_surviving = false;
3740 if (node->clone.param_adjustments)
3741 {
3742 check_surviving = true;
3743 node->clone.param_adjustments->get_surviving_params (&surviving_params);
3744 }
3745
3746 unsigned param_count = vec_safe_length (ifs->m_parameters);
3747 bool will_change_function = false;
3748 if (ifs->m_returns_value && ifs->m_return_ignored)
3749 will_change_function = true;
3750 else
3751 for (unsigned i = 0; i < param_count; i++)
3752 {
3753 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3754 if ((desc->locally_unused || desc->split_candidate)
3755 /* Make sure we do not clone just to attempt to remove an already
3756 removed unused argument. */
3757 && (!check_surviving
3758 || (i < surviving_params.length ()
3759 && surviving_params[i])))
3760 {
3761 will_change_function = true;
3762 break;
3763 }
3764 }
3765 if (!will_change_function)
3766 return;
3767
3768 if (dump_file)
3769 {
3770 fprintf (dump_file, "\nEvaluating analysis results for %s\n",
3771 node->dump_name ());
3772 if (ifs->m_returns_value && ifs->m_return_ignored)
3773 fprintf (dump_file, " Will remove return value.\n");
3774 }
3775
3776 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
3777 if (ipa_param_adjustments *old_adjustments = node->clone.param_adjustments)
3778 {
3779 unsigned old_adj_len = vec_safe_length (old_adjustments->m_adj_params);
3780 for (unsigned i = 0; i < old_adj_len; i++)
3781 {
3782 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
3783 push_param_adjustments_for_index (ifs, old_adj->base_index, i,
3784 old_adj, &new_params);
3785 }
3786 }
3787 else
3788 for (unsigned i = 0; i < param_count; i++)
3789 push_param_adjustments_for_index (ifs, i, i, NULL, &new_params);
3790
3791 ipa_param_adjustments *new_adjustments
3792 = (new (ggc_alloc <ipa_param_adjustments> ())
3793 ipa_param_adjustments (new_params, param_count,
3794 ifs->m_returns_value && ifs->m_return_ignored));
3795
3796 if (dump_file && (dump_flags & TDF_DETAILS))
3797 {
3798 fprintf (dump_file, "\n Created adjustments:\n");
3799 new_adjustments->dump (dump_file);
3800 }
3801
3802 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
3803 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
3804 node->decl)));
3805 vec<cgraph_edge *> callers = node->collect_callers ();
3806 cgraph_node *new_node
3807 = node->create_virtual_clone (callers, NULL, new_adjustments, "isra",
3808 suffix_counter);
3809 suffix_counter++;
3810 if (node->calls_comdat_local && node->same_comdat_group)
3811 {
3812 new_node->add_to_same_comdat_group (node);
3813 new_node->call_for_symbol_and_aliases (mark_callers_calls_comdat_local,
3814 NULL, true);
3815 }
3816 new_node->calls_comdat_local = node->calls_comdat_local;
3817
3818 if (dump_file)
3819 fprintf (dump_file, " Created new node %s\n", new_node->dump_name ());
3820 callers.release ();
3821 }
3822
3823 /* Check which parameters of NODE described by IFS have survived until IPA-SRA
3824 and disable transformations for those which have not or which should not
3825 transformed because the associated debug counter reached its limit. Return
3826 true if none survived or if there were no candidates to begin with. */
3827
3828 static bool
3829 disable_unavailable_parameters (cgraph_node *node, isra_func_summary *ifs)
3830 {
3831 bool ret = true;
3832 unsigned len = vec_safe_length (ifs->m_parameters);
3833 if (!len)
3834 return true;
3835
3836 auto_vec<bool, 16> surviving_params;
3837 bool check_surviving = false;
3838 if (node->clone.param_adjustments)
3839 {
3840 check_surviving = true;
3841 node->clone.param_adjustments->get_surviving_params (&surviving_params);
3842 }
3843 bool dumped_first = false;
3844 for (unsigned i = 0; i < len; i++)
3845 {
3846 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3847 if (!dbg_cnt (ipa_sra_params))
3848 {
3849 desc->locally_unused = false;
3850 desc->split_candidate = false;
3851 }
3852 else if (check_surviving
3853 && (i >= surviving_params.length ()
3854 || !surviving_params[i]))
3855 {
3856 /* Even if the parameter was removed by a previous IPA pass, we do
3857 not clear locally_unused because if it really is unused, this
3858 information might be useful in callers. */
3859 desc->split_candidate = false;
3860
3861 if (dump_file && (dump_flags & TDF_DETAILS))
3862 {
3863 if (!dumped_first)
3864 {
3865 fprintf (dump_file,
3866 "The following parameters of %s are dead on "
3867 "arrival:", node->dump_name ());
3868 dumped_first = true;
3869 }
3870 fprintf (dump_file, " %u", i);
3871 }
3872 }
3873 else if (desc->locally_unused || desc->split_candidate)
3874 ret = false;
3875 }
3876
3877 if (dumped_first)
3878 fprintf (dump_file, "\n");
3879
3880 return ret;
3881 }
3882
3883
3884 /* Run the interprocedural part of IPA-SRA. */
3885
3886 static unsigned int
3887 ipa_sra_analysis (void)
3888 {
3889 if (dump_file)
3890 {
3891 fprintf (dump_file, "\n========== IPA-SRA IPA stage ==========\n");
3892 ipa_sra_dump_all_summaries (dump_file);
3893 }
3894
3895 gcc_checking_assert (func_sums);
3896 gcc_checking_assert (call_sums);
3897 cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
3898 auto_vec <cgraph_node *, 16> stack;
3899 int node_scc_count = ipa_reduced_postorder (order, true, NULL);
3900
3901 /* One sweep from callees to callers for parameter removal and splitting. */
3902 for (int i = 0; i < node_scc_count; i++)
3903 {
3904 cgraph_node *scc_rep = order[i];
3905 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3906 unsigned j;
3907
3908 /* Preliminary IPA function level checks and first step of parameter
3909 removal. */
3910 cgraph_node *v;
3911 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3912 {
3913 isra_func_summary *ifs = func_sums->get (v);
3914 if (!ifs || !ifs->m_candidate)
3915 continue;
3916 if (!ipa_sra_ipa_function_checks (v)
3917 || check_all_callers_for_issues (v))
3918 {
3919 ifs->zap ();
3920 continue;
3921 }
3922 if (disable_unavailable_parameters (v, ifs))
3923 continue;
3924 for (cgraph_edge *cs = v->indirect_calls; cs; cs = cs->next_callee)
3925 process_edge_to_unknown_caller (cs);
3926 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3927 if (!ipa_edge_within_scc (cs))
3928 param_removal_cross_scc_edge (cs);
3929 }
3930
3931 /* Look at edges within the current SCC and propagate used-ness across
3932 them, pushing onto the stack all notes which might need to be
3933 revisited. */
3934 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3935 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3936 &stack, true);
3937
3938 /* Keep revisiting and pushing until nothing changes. */
3939 while (!stack.is_empty ())
3940 {
3941 cgraph_node *v = stack.pop ();
3942 isra_func_summary *ifs = func_sums->get (v);
3943 gcc_checking_assert (ifs && ifs->m_queued);
3944 ifs->m_queued = false;
3945
3946 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3947 &stack, true);
3948 }
3949
3950 /* Parameter splitting. */
3951 bool repeat_scc_access_propagation;
3952 do
3953 {
3954 repeat_scc_access_propagation = false;
3955 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3956 {
3957 isra_func_summary *ifs = func_sums->get (v);
3958 if (!ifs
3959 || !ifs->m_candidate
3960 || vec_safe_is_empty (ifs->m_parameters))
3961 continue;
3962 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3963 if (param_splitting_across_edge (cs))
3964 repeat_scc_access_propagation = true;
3965 }
3966 }
3967 while (repeat_scc_access_propagation);
3968
3969 if (flag_checking)
3970 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3971 verify_splitting_accesses (v, true);
3972
3973 cycle_nodes.release ();
3974 }
3975
3976 /* One sweep from caller to callees for result removal. */
3977 for (int i = node_scc_count - 1; i >= 0 ; i--)
3978 {
3979 cgraph_node *scc_rep = order[i];
3980 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3981 unsigned j;
3982
3983 cgraph_node *v;
3984 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3985 {
3986 isra_func_summary *ifs = func_sums->get (v);
3987 if (!ifs || !ifs->m_candidate)
3988 continue;
3989
3990 bool return_needed
3991 = (ifs->m_returns_value
3992 && (!dbg_cnt (ipa_sra_retvalues)
3993 || v->call_for_symbol_and_aliases (retval_used_p,
3994 NULL, true)));
3995 ifs->m_return_ignored = !return_needed;
3996 if (return_needed)
3997 isra_push_node_to_stack (v, ifs, &stack);
3998 }
3999
4000 while (!stack.is_empty ())
4001 {
4002 cgraph_node *node = stack.pop ();
4003 isra_func_summary *ifs = func_sums->get (node);
4004 gcc_checking_assert (ifs && ifs->m_queued);
4005 ifs->m_queued = false;
4006
4007 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4008 if (ipa_edge_within_scc (cs)
4009 && call_sums->get (cs)->m_return_returned)
4010 {
4011 enum availability av;
4012 cgraph_node *callee = cs->callee->function_symbol (&av);
4013 isra_func_summary *to_ifs = func_sums->get (callee);
4014 if (to_ifs && to_ifs->m_return_ignored)
4015 {
4016 to_ifs->m_return_ignored = false;
4017 isra_push_node_to_stack (callee, to_ifs, &stack);
4018 }
4019 }
4020 }
4021 cycle_nodes.release ();
4022 }
4023
4024 ipa_free_postorder_info ();
4025 free (order);
4026
4027 if (dump_file)
4028 {
4029 if (dump_flags & TDF_DETAILS)
4030 {
4031 fprintf (dump_file, "\n========== IPA-SRA propagation final state "
4032 " ==========\n");
4033 ipa_sra_dump_all_summaries (dump_file);
4034 }
4035 fprintf (dump_file, "\n========== IPA-SRA decisions ==========\n");
4036 }
4037
4038 hash_map<const char *, unsigned> *clone_num_suffixes
4039 = new hash_map<const char *, unsigned>;
4040
4041 cgraph_node *node;
4042 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
4043 process_isra_node_results (node, clone_num_suffixes);
4044
4045 delete clone_num_suffixes;
4046 ggc_delete (func_sums);
4047 func_sums = NULL;
4048 delete call_sums;
4049 call_sums = NULL;
4050
4051 if (dump_file)
4052 fprintf (dump_file, "\n========== IPA SRA IPA analysis done "
4053 "==========\n\n");
4054 return 0;
4055 }
4056
4057
4058 const pass_data pass_data_ipa_sra =
4059 {
4060 IPA_PASS, /* type */
4061 "sra", /* name */
4062 OPTGROUP_NONE, /* optinfo_flags */
4063 TV_IPA_SRA, /* tv_id */
4064 0, /* properties_required */
4065 0, /* properties_provided */
4066 0, /* properties_destroyed */
4067 0, /* todo_flags_start */
4068 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
4069 };
4070
4071 class pass_ipa_sra : public ipa_opt_pass_d
4072 {
4073 public:
4074 pass_ipa_sra (gcc::context *ctxt)
4075 : ipa_opt_pass_d (pass_data_ipa_sra, ctxt,
4076 ipa_sra_generate_summary, /* generate_summary */
4077 ipa_sra_write_summary, /* write_summary */
4078 ipa_sra_read_summary, /* read_summary */
4079 NULL , /* write_optimization_summary */
4080 NULL, /* read_optimization_summary */
4081 NULL, /* stmt_fixup */
4082 0, /* function_transform_todo_flags_start */
4083 NULL, /* function_transform */
4084 NULL) /* variable_transform */
4085 {}
4086
4087 /* opt_pass methods: */
4088 virtual bool gate (function *)
4089 {
4090 /* TODO: We should remove the optimize check after we ensure we never run
4091 IPA passes when not optimizing. */
4092 return (flag_ipa_sra && optimize);
4093 }
4094
4095 virtual unsigned int execute (function *) { return ipa_sra_analysis (); }
4096
4097 }; // class pass_ipa_sra
4098
4099 } // anon namespace
4100
4101 ipa_opt_pass_d *
4102 make_pass_ipa_sra (gcc::context *ctxt)
4103 {
4104 return new pass_ipa_sra (ctxt);
4105 }
4106
4107
4108 #include "gt-ipa-sra.h"