]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/ipa-sra.c
Correct a function pre/postcondition [PR102403].
[thirdparty/gcc.git] / gcc / ipa-sra.c
CommitLineData
ff6686d2 1/* Interprocedural scalar replacement of aggregates
99dee823 2 Copyright (C) 2008-2021 Free Software Foundation, Inc.
ff6686d2
MJ
3
4 Contributed by Martin Jambor <mjambor@suse.cz>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22/* IPA-SRA is an interprocedural pass that removes unused function return
23 values (turning functions returning a value which is never used into void
24 functions), removes unused function parameters. It can also replace an
25 aggregate parameter by a set of other parameters representing part of the
26 original, turning those passed by reference into new ones which pass the
27 value directly.
28
29 The pass is a true IPA one, which means that it works in three stages in
30 order to be able to take advantage of LTO. First, summaries about functions
31 and each calls are generated. Function summaries (often called call graph
32 node summaries) contain mainly information about which parameters are
33 potential transformation candidates and which bits of candidates are
34 accessed. We differentiate between accesses done as a part of a call
35 statement (which might be not necessary if the callee is also transformed)
36 and others (which are mandatory). Call summaries (often called call graph
37 edge summaries) contain information about which function formal parameters
38 feed into which actual call arguments so that if two parameters are only
39 used in a sum which is then passed to another function which then however
40 does not use this parameter, all three parameters of the two functions can
41 be eliminated. Edge summaries also have flags whether the return value is
42 used or if it is only returned in the caller too. In LTO mode these
43 summaries are then streamed to the object file in the compilation phase and
44 streamed back in in the WPA analysis stage.
45
46 The interprocedural analysis phase traverses the graph in topological order
47 in two sweeps, one in each direction. First, from callees to callers for
48 parameter removal and splitting. Each strongly-connected component is
49 processed iteratively until the situation in it stabilizes. The pass from
50 callers to callees is then carried out to remove unused return values in a
51 very similar fashion.
52
53 Because parameter manipulation has big implications for call redirection
54 which is done only after all call graph nodes materialize, the
55 transformation phase is not part of this patch but is carried out by the
56 clone materialization and edge redirection itself, see comments in
57 ipa-param-manipulation.h for more details. */
58
59
60
61#include "config.h"
62#include "system.h"
63#include "coretypes.h"
64#include "backend.h"
65#include "tree.h"
66#include "gimple.h"
67#include "predict.h"
68#include "tree-pass.h"
69#include "ssa.h"
70#include "cgraph.h"
71#include "gimple-pretty-print.h"
72#include "alias.h"
73#include "tree-eh.h"
74#include "gimple-iterator.h"
75#include "gimple-walk.h"
76#include "tree-dfa.h"
77#include "tree-sra.h"
a895e6d7 78#include "alloc-pool.h"
ff6686d2 79#include "symbol-summary.h"
ff6686d2
MJ
80#include "dbgcnt.h"
81#include "tree-inline.h"
82#include "ipa-utils.h"
83#include "builtins.h"
84#include "cfganal.h"
85#include "tree-streamer.h"
9707b593 86#include "internal-fn.h"
ae7a23a3 87#include "symtab-clones.h"
ff6686d2 88
40e67ab8
JH
89static void ipa_sra_summarize_function (cgraph_node *);
90
ff6686d2
MJ
91/* Bits used to track size of an aggregate in bytes interprocedurally. */
92#define ISRA_ARG_SIZE_LIMIT_BITS 16
93#define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
94/* How many parameters can feed into a call actual argument and still be
95 tracked. */
96#define IPA_SRA_MAX_PARAM_FLOW_LEN 7
97
98/* Structure describing accesses to a specific portion of an aggregate
99 parameter, as given by the offset and size. Any smaller accesses that occur
100 within a function that fall within another access form a tree. The pass
101 cannot analyze parameters with only partially overlapping accesses. */
102
103struct GTY(()) param_access
104{
105 /* Type that a potential replacement should have. This field only has
106 meaning in the summary building and transformation phases, when it is
dfea3d6f 107 reconstructed from the body. Must not be touched in IPA analysis
ff6686d2
MJ
108 stage. */
109 tree type;
110
111 /* Alias reference type to be used in MEM_REFs when adjusting caller
112 arguments. */
113 tree alias_ptr_type;
114
115 /* Values returned by get_ref_base_and_extent but converted to bytes and
116 stored as unsigned ints. */
117 unsigned unit_offset;
118 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
119
120 /* Set once we are sure that the access will really end up in a potentially
121 transformed function - initially not set for portions of formal parameters
122 that are only used as actual function arguments passed to callees. */
123 unsigned certain : 1;
124 /* Set if the access has a reversed scalar storage order. */
125 unsigned reverse : 1;
126};
127
dfea3d6f 128/* This structure has the same purpose as the one above and additionally it
ff6686d2
MJ
129 contains some fields that are only necessary in the summary generation
130 phase. */
131
132struct gensum_param_access
133{
134 /* Values returned by get_ref_base_and_extent. */
135 HOST_WIDE_INT offset;
136 HOST_WIDE_INT size;
137
138 /* if this access has any children (in terms of the definition above), this
139 points to the first one. */
140 struct gensum_param_access *first_child;
141 /* In intraprocedural SRA, pointer to the next sibling in the access tree as
142 described above. */
143 struct gensum_param_access *next_sibling;
144
145 /* Type that a potential replacement should have. This field only has
146 meaning in the summary building and transformation phases, when it is
dfea3d6f 147 reconstructed from the body. Must not be touched in IPA analysis
ff6686d2
MJ
148 stage. */
149 tree type;
dfea3d6f 150 /* Alias reference type to be used in MEM_REFs when adjusting caller
ff6686d2
MJ
151 arguments. */
152 tree alias_ptr_type;
153
154 /* Have there been writes to or reads from this exact location except for as
155 arguments to a function call that can be tracked. */
156 bool nonarg;
157
158 /* Set if the access has a reversed scalar storage order. */
159 bool reverse;
160};
161
162/* Summary describing a parameter in the IPA stages. */
163
164struct GTY(()) isra_param_desc
165{
166 /* List of access representatives to the parameters, sorted according to
167 their offset. */
168 vec <param_access *, va_gc> *accesses;
169
170 /* Unit size limit of total size of all replacements. */
171 unsigned param_size_limit : ISRA_ARG_SIZE_LIMIT_BITS;
172 /* Sum of unit sizes of all certain replacements. */
173 unsigned size_reached : ISRA_ARG_SIZE_LIMIT_BITS;
174
175 /* A parameter that is used only in call arguments and can be removed if all
176 concerned actual arguments are removed. */
177 unsigned locally_unused : 1;
178 /* An aggregate that is a candidate for breaking up or complete removal. */
179 unsigned split_candidate : 1;
180 /* Is this a parameter passing stuff by reference? */
181 unsigned by_ref : 1;
182};
183
184/* Structure used when generating summaries that describes a parameter. */
185
186struct gensum_param_desc
187{
188 /* Roots of param_accesses. */
189 gensum_param_access *accesses;
190 /* Number of accesses in the access tree rooted in field accesses. */
191 unsigned access_count;
192
dfea3d6f 193 /* If the below is non-zero, this is the number of uses as actual arguments. */
ff6686d2
MJ
194 int call_uses;
195 /* Number of times this parameter has been directly passed to. */
196 unsigned ptr_pt_count;
197
198 /* Size limit of total size of all replacements. */
199 unsigned param_size_limit;
200 /* Sum of sizes of nonarg accesses. */
201 unsigned nonarg_acc_size;
202
203 /* A parameter that is used only in call arguments and can be removed if all
204 concerned actual arguments are removed. */
205 bool locally_unused;
206 /* An aggregate that is a candidate for breaking up or a pointer passing data
207 by reference that is a candidate for being converted to a set of
dfea3d6f 208 parameters passing those data by value. */
ff6686d2
MJ
209 bool split_candidate;
210 /* Is this a parameter passing stuff by reference? */
211 bool by_ref;
212
213 /* The number of this parameter as they are ordered in function decl. */
214 int param_number;
215 /* For parameters passing data by reference, this is parameter index to
216 compute indices to bb_dereferences. */
217 int deref_index;
218};
219
dfea3d6f 220/* Properly deallocate accesses of DESC. TODO: Since this data structure is
ff6686d2
MJ
221 not in GC memory, this is not necessary and we can consider removing the
222 function. */
223
224static void
225free_param_decl_accesses (isra_param_desc *desc)
226{
227 unsigned len = vec_safe_length (desc->accesses);
228 for (unsigned i = 0; i < len; ++i)
229 ggc_free ((*desc->accesses)[i]);
230 vec_free (desc->accesses);
231}
232
233/* Class used to convey information about functions from the
dfea3d6f 234 intra-procedural analysis stage to inter-procedural one. */
ff6686d2
MJ
235
236class GTY((for_user)) isra_func_summary
237{
238public:
239 /* initialize the object. */
240
241 isra_func_summary ()
242 : m_parameters (NULL), m_candidate (false), m_returns_value (false),
243 m_return_ignored (false), m_queued (false)
244 {}
245
246 /* Destroy m_parameters. */
247
248 ~isra_func_summary ();
249
dfea3d6f 250 /* Mark the function as not a candidate for any IPA-SRA transformation.
ff6686d2
MJ
251 Return true if it was a candidate until now. */
252
253 bool zap ();
254
255 /* Vector of parameter descriptors corresponding to the function being
256 analyzed. */
257 vec<isra_param_desc, va_gc> *m_parameters;
258
259 /* Whether the node is even a candidate for any IPA-SRA transformation at
260 all. */
261 unsigned m_candidate : 1;
262
263 /* Whether the original function returns any value. */
264 unsigned m_returns_value : 1;
265
266 /* Set to true if all call statements do not actually use the returned
267 value. */
268
269 unsigned m_return_ignored : 1;
270
271 /* Whether the node is already queued in IPA SRA stack during processing of
272 call graphs SCCs. */
273
274 unsigned m_queued : 1;
275};
276
dfea3d6f
JJ
277/* Clean up and deallocate isra_func_summary points to. TODO: Since this data
278 structure is not in GC memory, this is not necessary and we can consider
ff6686d2
MJ
279 removing the destructor. */
280
281isra_func_summary::~isra_func_summary ()
282{
283 unsigned len = vec_safe_length (m_parameters);
284 for (unsigned i = 0; i < len; ++i)
285 free_param_decl_accesses (&(*m_parameters)[i]);
286 vec_free (m_parameters);
287}
288
289
dfea3d6f 290/* Mark the function as not a candidate for any IPA-SRA transformation. Return
ff6686d2
MJ
291 true if it was a candidate until now. */
292
293bool
294isra_func_summary::zap ()
295{
296 bool ret = m_candidate;
297 m_candidate = false;
298
299 unsigned len = vec_safe_length (m_parameters);
300 for (unsigned i = 0; i < len; ++i)
301 free_param_decl_accesses (&(*m_parameters)[i]);
302 vec_free (m_parameters);
303
304 return ret;
305}
306
307/* Structure to describe which formal parameters feed into a particular actual
308 arguments. */
309
310struct isra_param_flow
311{
312 /* Number of elements in array inputs that contain valid data. */
313 char length;
314 /* Indices of formal parameters that feed into the described actual argument.
315 If aggregate_pass_through or pointer_pass_through below are true, it must
316 contain exactly one element which is passed through from a formal
317 parameter if the given number. Otherwise, the array contains indices of
dfea3d6f 318 callee's formal parameters which are used to calculate value of this
ff6686d2
MJ
319 actual argument. */
320 unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN];
321
322 /* Offset within the formal parameter. */
323 unsigned unit_offset;
324 /* Size of the portion of the formal parameter that is being passed. */
325 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
326
327 /* True when the value of this actual copy is a portion of a formal
328 parameter. */
329 unsigned aggregate_pass_through : 1;
330 /* True when the value of this actual copy is a verbatim pass through of an
331 obtained pointer. */
332 unsigned pointer_pass_through : 1;
333 /* True when it is safe to copy access candidates here from the callee, which
334 would mean introducing dereferences into callers of the caller. */
335 unsigned safe_to_import_accesses : 1;
336};
337
dfea3d6f 338/* Structure used to convey information about calls from the intra-procedural
ff6686d2
MJ
339 analysis stage to inter-procedural one. */
340
341class isra_call_summary
342{
343public:
344 isra_call_summary ()
345 : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
763121cc 346 m_bit_aligned_arg (false), m_before_any_store (false)
ff6686d2
MJ
347 {}
348
349 void init_inputs (unsigned arg_count);
350 void dump (FILE *f);
351
352 /* Information about what formal parameters of the caller are used to compute
dfea3d6f 353 individual actual arguments of this call. */
ff6686d2
MJ
354 auto_vec <isra_param_flow> m_arg_flow;
355
356 /* Set to true if the call statement does not have a LHS. */
357 unsigned m_return_ignored : 1;
358
359 /* Set to true if the LHS of call statement is only used to construct the
360 return value of the caller. */
361 unsigned m_return_returned : 1;
362
363 /* Set when any of the call arguments are not byte-aligned. */
364 unsigned m_bit_aligned_arg : 1;
763121cc
MJ
365
366 /* Set to true if the call happend before any (other) store to memory in the
367 caller. */
368 unsigned m_before_any_store : 1;
ff6686d2
MJ
369};
370
371/* Class to manage function summaries. */
372
373class GTY((user)) ipa_sra_function_summaries
374 : public function_summary <isra_func_summary *>
375{
376public:
377 ipa_sra_function_summaries (symbol_table *table, bool ggc):
378 function_summary<isra_func_summary *> (table, ggc) { }
379
380 virtual void duplicate (cgraph_node *, cgraph_node *,
381 isra_func_summary *old_sum,
382 isra_func_summary *new_sum);
40e67ab8 383 virtual void insert (cgraph_node *, isra_func_summary *);
ff6686d2
MJ
384};
385
386/* Hook that is called by summary when a node is duplicated. */
387
388void
389ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *,
390 isra_func_summary *old_sum,
391 isra_func_summary *new_sum)
392{
393 /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
394 useless. */
395 new_sum->m_candidate = old_sum->m_candidate;
396 new_sum->m_returns_value = old_sum->m_returns_value;
397 new_sum->m_return_ignored = old_sum->m_return_ignored;
398 gcc_assert (!old_sum->m_queued);
399 new_sum->m_queued = false;
400
401 unsigned param_count = vec_safe_length (old_sum->m_parameters);
402 if (!param_count)
403 return;
404 vec_safe_reserve_exact (new_sum->m_parameters, param_count);
405 new_sum->m_parameters->quick_grow_cleared (param_count);
406 for (unsigned i = 0; i < param_count; i++)
407 {
408 isra_param_desc *s = &(*old_sum->m_parameters)[i];
409 isra_param_desc *d = &(*new_sum->m_parameters)[i];
410
411 d->param_size_limit = s->param_size_limit;
412 d->size_reached = s->size_reached;
413 d->locally_unused = s->locally_unused;
414 d->split_candidate = s->split_candidate;
415 d->by_ref = s->by_ref;
416
417 unsigned acc_count = vec_safe_length (s->accesses);
418 vec_safe_reserve_exact (d->accesses, acc_count);
419 for (unsigned j = 0; j < acc_count; j++)
420 {
421 param_access *from = (*s->accesses)[j];
422 param_access *to = ggc_cleared_alloc<param_access> ();
423 to->type = from->type;
424 to->alias_ptr_type = from->alias_ptr_type;
425 to->unit_offset = from->unit_offset;
426 to->unit_size = from->unit_size;
427 to->certain = from->certain;
428 d->accesses->quick_push (to);
429 }
430 }
431}
432
433/* Pointer to the pass function summary holder. */
434
435static GTY(()) ipa_sra_function_summaries *func_sums;
436
40e67ab8
JH
437/* Hook that is called by summary when new node appears. */
438
439void
440ipa_sra_function_summaries::insert (cgraph_node *node, isra_func_summary *)
441{
442 if (opt_for_fn (node->decl, flag_ipa_sra))
443 {
444 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
445 ipa_sra_summarize_function (node);
446 pop_cfun ();
447 }
448 else
449 func_sums->remove (node);
450}
451
ff6686d2
MJ
452/* Class to manage call summaries. */
453
454class ipa_sra_call_summaries: public call_summary <isra_call_summary *>
455{
456public:
457 ipa_sra_call_summaries (symbol_table *table):
458 call_summary<isra_call_summary *> (table) { }
459
460 /* Duplicate info when an edge is cloned. */
461 virtual void duplicate (cgraph_edge *, cgraph_edge *,
462 isra_call_summary *old_sum,
463 isra_call_summary *new_sum);
464};
465
466static ipa_sra_call_summaries *call_sums;
467
468
469/* Initialize m_arg_flow of a particular instance of isra_call_summary.
470 ARG_COUNT is the number of actual arguments passed. */
471
472void
473isra_call_summary::init_inputs (unsigned arg_count)
474{
475 if (arg_count == 0)
476 {
477 gcc_checking_assert (m_arg_flow.length () == 0);
478 return;
479 }
480 if (m_arg_flow.length () == 0)
481 {
482 m_arg_flow.reserve_exact (arg_count);
483 m_arg_flow.quick_grow_cleared (arg_count);
484 }
485 else
486 gcc_checking_assert (arg_count == m_arg_flow.length ());
487}
488
489/* Dump all information in call summary to F. */
490
491void
492isra_call_summary::dump (FILE *f)
493{
494 if (m_return_ignored)
495 fprintf (f, " return value ignored\n");
496 if (m_return_returned)
497 fprintf (f, " return value used only to compute caller return value\n");
763121cc
MJ
498 if (m_before_any_store)
499 fprintf (f, " happens before any store to memory\n");
ff6686d2
MJ
500 for (unsigned i = 0; i < m_arg_flow.length (); i++)
501 {
502 fprintf (f, " Parameter %u:\n", i);
503 isra_param_flow *ipf = &m_arg_flow[i];
504
505 if (ipf->length)
506 {
507 bool first = true;
508 fprintf (f, " Scalar param sources: ");
509 for (int j = 0; j < ipf->length; j++)
510 {
511 if (!first)
512 fprintf (f, ", ");
513 else
514 first = false;
515 fprintf (f, "%i", (int) ipf->inputs[j]);
516 }
517 fprintf (f, "\n");
518 }
519 if (ipf->aggregate_pass_through)
520 fprintf (f, " Aggregate pass through from the param given above, "
521 "unit offset: %u , unit size: %u\n",
522 ipf->unit_offset, ipf->unit_size);
523 if (ipf->pointer_pass_through)
524 fprintf (f, " Pointer pass through from the param given above, "
525 "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses);
526 }
527}
528
dfea3d6f 529/* Duplicate edge summary when an edge is cloned. */
ff6686d2
MJ
530
531void
532ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
533 isra_call_summary *old_sum,
534 isra_call_summary *new_sum)
535{
536 unsigned arg_count = old_sum->m_arg_flow.length ();
537 new_sum->init_inputs (arg_count);
538 for (unsigned i = 0; i < arg_count; i++)
539 new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i];
540
541 new_sum->m_return_ignored = old_sum->m_return_ignored;
542 new_sum->m_return_returned = old_sum->m_return_returned;
543 new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
763121cc 544 new_sum->m_before_any_store = old_sum->m_before_any_store;
ff6686d2
MJ
545}
546
547
548/* With all GTY stuff done, we can move to anonymous namespace. */
549namespace {
550/* Quick mapping from a decl to its param descriptor. */
551
552hash_map<tree, gensum_param_desc *> *decl2desc;
553
dfea3d6f 554/* Countdown of allowed Alias analysis steps during summary building. */
ff6686d2
MJ
555
556int aa_walking_limit;
557
558/* This is a table in which for each basic block and parameter there is a
559 distance (offset + size) in that parameter which is dereferenced and
560 accessed in that BB. */
561HOST_WIDE_INT *bb_dereferences = NULL;
562/* How many by-reference parameters there are in the current function. */
563int by_ref_count;
564
565/* Bitmap of BBs that can cause the function to "stop" progressing by
566 returning, throwing externally, looping infinitely or calling a function
567 which might abort etc.. */
568bitmap final_bbs;
569
570/* Obstack to allocate various small structures required only when generating
571 summary for a function. */
572struct obstack gensum_obstack;
573
574/* Return false the function is apparently unsuitable for IPA-SRA based on it's
575 attributes, return true otherwise. NODE is the cgraph node of the current
576 function. */
577
578static bool
579ipa_sra_preliminary_function_checks (cgraph_node *node)
580{
87f94429 581 if (!node->can_change_signature)
ff6686d2
MJ
582 {
583 if (dump_file)
584 fprintf (dump_file, "Function cannot change signature.\n");
585 return false;
586 }
587
588 if (!tree_versionable_function_p (node->decl))
589 {
590 if (dump_file)
591 fprintf (dump_file, "Function is not versionable.\n");
592 return false;
593 }
594
595 if (!opt_for_fn (node->decl, optimize)
596 || !opt_for_fn (node->decl, flag_ipa_sra))
597 {
598 if (dump_file)
599 fprintf (dump_file, "Not optimizing or IPA-SRA turned off for this "
600 "function.\n");
601 return false;
602 }
603
604 if (DECL_VIRTUAL_P (node->decl))
605 {
606 if (dump_file)
607 fprintf (dump_file, "Function is a virtual method.\n");
608 return false;
609 }
610
611 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
612 if (fun->stdarg)
613 {
614 if (dump_file)
615 fprintf (dump_file, "Function uses stdarg. \n");
616 return false;
617 }
618
619 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
620 {
621 if (dump_file)
622 fprintf (dump_file, "Function type has attributes. \n");
623 return false;
624 }
625
626 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
627 {
628 if (dump_file)
629 fprintf (dump_file, "Always inline function will be inlined "
630 "anyway. \n");
631 return false;
632 }
633
634 return true;
635}
636
637/* Print access tree starting at ACCESS to F. */
638
639static void
640dump_gensum_access (FILE *f, gensum_param_access *access, unsigned indent)
641{
642 fprintf (f, " ");
643 for (unsigned i = 0; i < indent; i++)
644 fprintf (f, " ");
645 fprintf (f, " * Access to offset: " HOST_WIDE_INT_PRINT_DEC,
646 access->offset);
647 fprintf (f, ", size: " HOST_WIDE_INT_PRINT_DEC, access->size);
648 fprintf (f, ", type: ");
649 print_generic_expr (f, access->type);
650 fprintf (f, ", alias_ptr_type: ");
651 print_generic_expr (f, access->alias_ptr_type);
652 fprintf (f, ", nonarg: %u, reverse: %u\n", access->nonarg, access->reverse);
653 for (gensum_param_access *ch = access->first_child;
654 ch;
655 ch = ch->next_sibling)
656 dump_gensum_access (f, ch, indent + 2);
657}
658
659
660/* Print access tree starting at ACCESS to F. */
661
662static void
663dump_isra_access (FILE *f, param_access *access)
664{
665 fprintf (f, " * Access to unit offset: %u", access->unit_offset);
666 fprintf (f, ", unit size: %u", access->unit_size);
667 fprintf (f, ", type: ");
668 print_generic_expr (f, access->type);
669 fprintf (f, ", alias_ptr_type: ");
670 print_generic_expr (f, access->alias_ptr_type);
671 if (access->certain)
672 fprintf (f, ", certain");
673 else
674 fprintf (f, ", not-certain");
675 if (access->reverse)
676 fprintf (f, ", reverse");
677 fprintf (f, "\n");
678}
679
680/* Dump access tree starting at ACCESS to stderr. */
681
682DEBUG_FUNCTION void
683debug_isra_access (param_access *access)
684{
685 dump_isra_access (stderr, access);
686}
687
688/* Dump DESC to F. */
689
690static void
691dump_gensum_param_descriptor (FILE *f, gensum_param_desc *desc)
692{
693 if (desc->locally_unused)
694 fprintf (f, " unused with %i call_uses\n", desc->call_uses);
695 if (!desc->split_candidate)
696 {
697 fprintf (f, " not a candidate\n");
698 return;
699 }
700 if (desc->by_ref)
701 fprintf (f, " by_ref with %u pass throughs\n", desc->ptr_pt_count);
702
703 for (gensum_param_access *acc = desc->accesses; acc; acc = acc->next_sibling)
704 dump_gensum_access (f, acc, 2);
705}
706
dfea3d6f 707/* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
ff6686d2
MJ
708 F. */
709
710static void
711dump_gensum_param_descriptors (FILE *f, tree fndecl,
712 vec<gensum_param_desc> *param_descriptions)
713{
714 tree parm = DECL_ARGUMENTS (fndecl);
715 for (unsigned i = 0;
716 i < param_descriptions->length ();
717 ++i, parm = DECL_CHAIN (parm))
718 {
719 fprintf (f, " Descriptor for parameter %i ", i);
720 print_generic_expr (f, parm, TDF_UID);
721 fprintf (f, "\n");
722 dump_gensum_param_descriptor (f, &(*param_descriptions)[i]);
723 }
724}
725
726
727/* Dump DESC to F. */
728
729static void
730dump_isra_param_descriptor (FILE *f, isra_param_desc *desc)
731{
732 if (desc->locally_unused)
733 {
734 fprintf (f, " (locally) unused\n");
735 }
736 if (!desc->split_candidate)
737 {
738 fprintf (f, " not a candidate for splitting\n");
739 return;
740 }
741 fprintf (f, " param_size_limit: %u, size_reached: %u%s\n",
742 desc->param_size_limit, desc->size_reached,
743 desc->by_ref ? ", by_ref" : "");
744
745 for (unsigned i = 0; i < vec_safe_length (desc->accesses); ++i)
746 {
747 param_access *access = (*desc->accesses)[i];
748 dump_isra_access (f, access);
749 }
750}
751
dfea3d6f 752/* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
ff6686d2
MJ
753 F. */
754
755static void
756dump_isra_param_descriptors (FILE *f, tree fndecl,
757 isra_func_summary *ifs)
758{
759 tree parm = DECL_ARGUMENTS (fndecl);
760 if (!ifs->m_parameters)
761 {
762 fprintf (f, " parameter descriptors not available\n");
763 return;
764 }
765
766 for (unsigned i = 0;
767 i < ifs->m_parameters->length ();
768 ++i, parm = DECL_CHAIN (parm))
769 {
770 fprintf (f, " Descriptor for parameter %i ", i);
771 print_generic_expr (f, parm, TDF_UID);
772 fprintf (f, "\n");
773 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
774 }
775}
776
777/* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the
778 function fails return false, otherwise return true. SRC must fit into an
779 unsigned char. Used for purposes of transitive unused parameter
780 removal. */
781
782static bool
783add_src_to_param_flow (isra_param_flow *param_flow, int src)
784{
785 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
786 if (param_flow->length == IPA_SRA_MAX_PARAM_FLOW_LEN)
787 return false;
788
789 param_flow->inputs[(int) param_flow->length] = src;
790 param_flow->length++;
791 return true;
792}
793
794/* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
795 it is the only input. Used for purposes of transitive parameter
796 splitting. */
797
798static void
799set_single_param_flow_source (isra_param_flow *param_flow, int src)
800{
801 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
802 if (param_flow->length == 0)
803 {
804 param_flow->inputs[0] = src;
805 param_flow->length = 1;
806 }
807 else if (param_flow->length == 1)
808 gcc_assert (param_flow->inputs[0] == src);
809 else
810 gcc_unreachable ();
811}
812
813/* Assert that there is only a single value in PARAM_FLOW's inputs and return
814 it. */
815
816static unsigned
817get_single_param_flow_source (const isra_param_flow *param_flow)
818{
819 gcc_assert (param_flow->length == 1);
820 return param_flow->inputs[0];
821}
822
823/* Inspect all uses of NAME and simple arithmetic calculations involving NAME
1980ffec
MJ
824 in FUN represented with NODE and return a negative number if any of them is
825 used for something else than either an actual call argument, simple
826 arithmetic operation or debug statement. If there are no such uses, return
827 the number of actual arguments that this parameter eventually feeds to (or
828 zero if there is none). For any such parameter, mark PARM_NUM as one of its
829 sources. ANALYZED is a bitmap that tracks which SSA names we have already
830 started investigating. */
ff6686d2
MJ
831
832static int
1980ffec
MJ
833isra_track_scalar_value_uses (function *fun, cgraph_node *node, tree name,
834 int parm_num, bitmap analyzed)
ff6686d2
MJ
835{
836 int res = 0;
837 imm_use_iterator imm_iter;
838 gimple *stmt;
839
840 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
841 {
842 if (is_gimple_debug (stmt))
843 continue;
844
845 /* TODO: We could handle at least const builtin functions like arithmetic
846 operations below. */
847 if (is_gimple_call (stmt))
848 {
849 int all_uses = 0;
850 use_operand_p use_p;
851 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
852 all_uses++;
853
854 gcall *call = as_a <gcall *> (stmt);
855 unsigned arg_count;
856 if (gimple_call_internal_p (call)
857 || (arg_count = gimple_call_num_args (call)) == 0)
858 {
859 res = -1;
640296c3 860 break;
ff6686d2
MJ
861 }
862
863 cgraph_edge *cs = node->get_edge (stmt);
864 gcc_checking_assert (cs);
865 isra_call_summary *csum = call_sums->get_create (cs);
866 csum->init_inputs (arg_count);
867
868 int simple_uses = 0;
869 for (unsigned i = 0; i < arg_count; i++)
870 if (gimple_call_arg (call, i) == name)
871 {
872 if (!add_src_to_param_flow (&csum->m_arg_flow[i], parm_num))
873 {
874 simple_uses = -1;
875 break;
876 }
877 simple_uses++;
878 }
879
880 if (simple_uses < 0
881 || all_uses != simple_uses)
882 {
883 res = -1;
640296c3 884 break;
ff6686d2
MJ
885 }
886 res += all_uses;
887 }
1980ffec
MJ
888 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
889 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
890 || gimple_code (stmt) == GIMPLE_PHI))
ff6686d2
MJ
891 {
892 tree lhs;
893 if (gimple_code (stmt) == GIMPLE_PHI)
894 lhs = gimple_phi_result (stmt);
895 else
896 lhs = gimple_assign_lhs (stmt);
897
898 if (TREE_CODE (lhs) != SSA_NAME)
899 {
900 res = -1;
640296c3 901 break;
ff6686d2
MJ
902 }
903 gcc_assert (!gimple_vdef (stmt));
904 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs)))
905 {
1980ffec 906 int tmp = isra_track_scalar_value_uses (fun, node, lhs, parm_num,
ff6686d2
MJ
907 analyzed);
908 if (tmp < 0)
909 {
910 res = tmp;
640296c3 911 break;
ff6686d2
MJ
912 }
913 res += tmp;
914 }
915 }
916 else
917 {
918 res = -1;
640296c3 919 break;
ff6686d2
MJ
920 }
921 }
922 return res;
923}
924
925/* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
926 also described by NODE) and simple arithmetic calculations involving PARM
927 and return false if any of them is used for something else than either an
928 actual call argument, simple arithmetic operation or debug statement. If
929 there are no such uses, return true and store the number of actual arguments
930 that this parameter eventually feeds to (or zero if there is none) to
931 *CALL_USES_P. For any such parameter, mark PARM_NUM as one of its
932 sources.
933
934 This function is similar to ptr_parm_has_nonarg_uses but its results are
935 meant for unused parameter removal, as opposed to splitting of parameters
936 passed by reference or converting them to passed by value.
937 */
938
939static bool
940isra_track_scalar_param_local_uses (function *fun, cgraph_node *node, tree parm,
941 int parm_num, int *call_uses_p)
942{
943 gcc_checking_assert (is_gimple_reg (parm));
944
945 tree name = ssa_default_def (fun, parm);
946 if (!name || has_zero_uses (name))
947 {
948 *call_uses_p = 0;
949 return false;
950 }
951
952 /* Edge summaries can only handle callers with fewer than 256 parameters. */
953 if (parm_num > UCHAR_MAX)
954 return true;
955
956 bitmap analyzed = BITMAP_ALLOC (NULL);
1980ffec
MJ
957 int call_uses = isra_track_scalar_value_uses (fun, node, name, parm_num,
958 analyzed);
ff6686d2
MJ
959 BITMAP_FREE (analyzed);
960 if (call_uses < 0)
961 return true;
962 *call_uses_p = call_uses;
963 return false;
964}
965
966/* Scan immediate uses of a default definition SSA name of a parameter PARM and
967 examine whether there are any nonarg uses that are not actual arguments or
968 otherwise infeasible uses. If so, return true, otherwise return false.
969 Create pass-through IPA flow records for any direct uses as argument calls
970 and if returning false, store their number into *PT_COUNT_P. NODE and FUN
971 must represent the function that is currently analyzed, PARM_NUM must be the
972 index of the analyzed parameter.
973
974 This function is similar to isra_track_scalar_param_local_uses but its
975 results are meant for splitting of parameters passed by reference or turning
976 them into bits passed by value, as opposed to generic unused parameter
977 removal.
978 */
979
980static bool
981ptr_parm_has_nonarg_uses (cgraph_node *node, function *fun, tree parm,
982 int parm_num, unsigned *pt_count_p)
983{
984 imm_use_iterator ui;
985 gimple *stmt;
986 tree name = ssa_default_def (fun, parm);
987 bool ret = false;
988 unsigned pt_count = 0;
989
990 if (!name || has_zero_uses (name))
991 return false;
992
993 /* Edge summaries can only handle callers with fewer than 256 parameters. */
994 if (parm_num > UCHAR_MAX)
995 return true;
996
997 FOR_EACH_IMM_USE_STMT (stmt, ui, name)
998 {
999 unsigned uses_ok = 0;
1000 use_operand_p use_p;
1001
1002 if (is_gimple_debug (stmt))
1003 continue;
1004
1005 if (gimple_assign_single_p (stmt))
1006 {
1007 tree rhs = gimple_assign_rhs1 (stmt);
1008 while (handled_component_p (rhs))
1009 rhs = TREE_OPERAND (rhs, 0);
1010 if (TREE_CODE (rhs) == MEM_REF
1011 && TREE_OPERAND (rhs, 0) == name
1012 && integer_zerop (TREE_OPERAND (rhs, 1))
1013 && types_compatible_p (TREE_TYPE (rhs),
1014 TREE_TYPE (TREE_TYPE (name)))
1015 && !TREE_THIS_VOLATILE (rhs))
1016 uses_ok++;
1017 }
1018 else if (is_gimple_call (stmt))
1019 {
1020 gcall *call = as_a <gcall *> (stmt);
1021 unsigned arg_count;
1022 if (gimple_call_internal_p (call)
1023 || (arg_count = gimple_call_num_args (call)) == 0)
1024 {
1025 ret = true;
640296c3 1026 break;
ff6686d2
MJ
1027 }
1028
1029 cgraph_edge *cs = node->get_edge (stmt);
1030 gcc_checking_assert (cs);
1031 isra_call_summary *csum = call_sums->get_create (cs);
1032 csum->init_inputs (arg_count);
1033
1034 for (unsigned i = 0; i < arg_count; ++i)
1035 {
1036 tree arg = gimple_call_arg (stmt, i);
1037
1038 if (arg == name)
1039 {
1040 /* TODO: Allow &MEM_REF[name + offset] here,
1041 ipa_param_body_adjustments::modify_call_stmt has to be
1042 adjusted too. */
1043 csum->m_arg_flow[i].pointer_pass_through = true;
1044 set_single_param_flow_source (&csum->m_arg_flow[i], parm_num);
1045 pt_count++;
1046 uses_ok++;
1047 continue;
1048 }
1049
1050 while (handled_component_p (arg))
1051 arg = TREE_OPERAND (arg, 0);
1052 if (TREE_CODE (arg) == MEM_REF
1053 && TREE_OPERAND (arg, 0) == name
1054 && integer_zerop (TREE_OPERAND (arg, 1))
1055 && types_compatible_p (TREE_TYPE (arg),
1056 TREE_TYPE (TREE_TYPE (name)))
1057 && !TREE_THIS_VOLATILE (arg))
1058 uses_ok++;
1059 }
1060 }
1061
1062 /* If the number of valid uses does not match the number of
1063 uses in this stmt there is an unhandled use. */
1064 unsigned all_uses = 0;
1065 FOR_EACH_IMM_USE_ON_STMT (use_p, ui)
1066 all_uses++;
1067
1068 gcc_checking_assert (uses_ok <= all_uses);
1069 if (uses_ok != all_uses)
1070 {
1071 ret = true;
640296c3 1072 break;
ff6686d2
MJ
1073 }
1074 }
1075
1076 *pt_count_p = pt_count;
1077 return ret;
1078}
1079
1080/* Initialize vector of parameter descriptors of NODE. Return true if there
1081 are any candidates for splitting or unused aggregate parameter removal (the
1082 function may return false if there are candidates for removal of register
1083 parameters) and function body must be scanned. */
1084
1085static bool
1086create_parameter_descriptors (cgraph_node *node,
1087 vec<gensum_param_desc> *param_descriptions)
1088{
1089 function *fun = DECL_STRUCT_FUNCTION (node->decl);
1090 bool ret = false;
1091
1092 int num = 0;
1093 for (tree parm = DECL_ARGUMENTS (node->decl);
1094 parm;
1095 parm = DECL_CHAIN (parm), num++)
1096 {
1097 const char *msg;
1098 gensum_param_desc *desc = &(*param_descriptions)[num];
1099 /* param_descriptions vector is grown cleared in the caller. */
1100 desc->param_number = num;
1101 decl2desc->put (parm, desc);
1102
1103 if (dump_file && (dump_flags & TDF_DETAILS))
1104 print_generic_expr (dump_file, parm, TDF_UID);
1105
1106 int scalar_call_uses;
1107 tree type = TREE_TYPE (parm);
1108 if (TREE_THIS_VOLATILE (parm))
1109 {
1110 if (dump_file && (dump_flags & TDF_DETAILS))
1111 fprintf (dump_file, " not a candidate, is volatile\n");
1112 continue;
1113 }
1114 if (!is_gimple_reg_type (type) && is_va_list_type (type))
1115 {
1116 if (dump_file && (dump_flags & TDF_DETAILS))
1117 fprintf (dump_file, " not a candidate, is a va_list type\n");
1118 continue;
1119 }
1120 if (TREE_ADDRESSABLE (parm))
1121 {
1122 if (dump_file && (dump_flags & TDF_DETAILS))
1123 fprintf (dump_file, " not a candidate, is addressable\n");
1124 continue;
1125 }
1126 if (TREE_ADDRESSABLE (type))
1127 {
1128 if (dump_file && (dump_flags & TDF_DETAILS))
1129 fprintf (dump_file, " not a candidate, type cannot be split\n");
1130 continue;
1131 }
1132
1133 if (is_gimple_reg (parm)
1134 && !isra_track_scalar_param_local_uses (fun, node, parm, num,
1135 &scalar_call_uses))
1136 {
1137 if (dump_file && (dump_flags & TDF_DETAILS))
1138 fprintf (dump_file, " is a scalar with only %i call uses\n",
1139 scalar_call_uses);
1140
1141 desc->locally_unused = true;
1142 desc->call_uses = scalar_call_uses;
1143 }
1144
1145 if (POINTER_TYPE_P (type))
1146 {
1147 type = TREE_TYPE (type);
1148
1149 if (TREE_CODE (type) == FUNCTION_TYPE
1150 || TREE_CODE (type) == METHOD_TYPE)
1151 {
1152 if (dump_file && (dump_flags & TDF_DETAILS))
1153 fprintf (dump_file, " not a candidate, reference to "
1154 "a function\n");
1155 continue;
1156 }
1157 if (TYPE_VOLATILE (type))
1158 {
1159 if (dump_file && (dump_flags & TDF_DETAILS))
1160 fprintf (dump_file, " not a candidate, reference to "
1161 "a volatile type\n");
1162 continue;
1163 }
1164 if (TREE_CODE (type) == ARRAY_TYPE
1165 && TYPE_NONALIASED_COMPONENT (type))
1166 {
1167 if (dump_file && (dump_flags & TDF_DETAILS))
f8cb8bcd
JJ
1168 fprintf (dump_file, " not a candidate, reference to "
1169 "a nonaliased component array\n");
ff6686d2
MJ
1170 continue;
1171 }
1172 if (!is_gimple_reg (parm))
1173 {
1174 if (dump_file && (dump_flags & TDF_DETAILS))
1175 fprintf (dump_file, " not a candidate, a reference which is "
1176 "not a gimple register (probably addressable)\n");
1177 continue;
1178 }
1179 if (is_va_list_type (type))
1180 {
1181 if (dump_file && (dump_flags & TDF_DETAILS))
1182 fprintf (dump_file, " not a candidate, reference to "
1183 "a va list\n");
1184 continue;
1185 }
1186 if (ptr_parm_has_nonarg_uses (node, fun, parm, num,
1187 &desc->ptr_pt_count))
1188 {
1189 if (dump_file && (dump_flags & TDF_DETAILS))
1190 fprintf (dump_file, " not a candidate, reference has "
1191 "nonarg uses\n");
1192 continue;
1193 }
1194 desc->by_ref = true;
1195 }
1196 else if (!AGGREGATE_TYPE_P (type))
1197 {
1198 /* This is in an else branch because scalars passed by reference are
1199 still candidates to be passed by value. */
1200 if (dump_file && (dump_flags & TDF_DETAILS))
1201 fprintf (dump_file, " not a candidate, not an aggregate\n");
1202 continue;
1203 }
1204
1205 if (!COMPLETE_TYPE_P (type))
1206 {
1207 if (dump_file && (dump_flags & TDF_DETAILS))
1208 fprintf (dump_file, " not a candidate, not a complete type\n");
1209 continue;
1210 }
1211 if (!tree_fits_uhwi_p (TYPE_SIZE (type)))
1212 {
1213 if (dump_file && (dump_flags & TDF_DETAILS))
1214 fprintf (dump_file, " not a candidate, size not representable\n");
1215 continue;
1216 }
1217 unsigned HOST_WIDE_INT type_size
1218 = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
1219 if (type_size == 0
1220 || type_size >= ISRA_ARG_SIZE_LIMIT)
1221 {
1222 if (dump_file && (dump_flags & TDF_DETAILS))
1223 fprintf (dump_file, " not a candidate, has zero or huge size\n");
1224 continue;
1225 }
1226 if (type_internals_preclude_sra_p (type, &msg))
1227 {
1228 if (dump_file && (dump_flags & TDF_DETAILS))
1229 fprintf (dump_file, " not a candidate, %s\n", msg);
1230 continue;
1231 }
1232
1233 if (dump_file && (dump_flags & TDF_DETAILS))
1234 fprintf (dump_file, " is a candidate\n");
1235
1236 ret = true;
1237 desc->split_candidate = true;
1238 if (desc->by_ref)
1239 desc->deref_index = by_ref_count++;
1240 }
1241 return ret;
1242}
1243
1244/* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1245 found, which happens if DECL is for a static chain. */
1246
1247static gensum_param_desc *
1248get_gensum_param_desc (tree decl)
1249{
1250 gcc_checking_assert (TREE_CODE (decl) == PARM_DECL);
1251 gensum_param_desc **slot = decl2desc->get (decl);
1252 if (!slot)
1253 /* This can happen for static chains which we cannot handle so far. */
1254 return NULL;
1255 gcc_checking_assert (*slot);
1256 return *slot;
1257}
1258
1259
1260/* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1261 write REASON to the dump file if there is one. */
1262
1263static void
1264disqualify_split_candidate (gensum_param_desc *desc, const char *reason)
1265{
1266 if (!desc->split_candidate)
1267 return;
1268
1269 if (dump_file && (dump_flags & TDF_DETAILS))
1270 fprintf (dump_file, "! Disqualifying parameter number %i - %s\n",
1271 desc->param_number, reason);
1272
1273 desc->split_candidate = false;
1274}
1275
1276/* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1277 there is one. */
1278
1279static void
1280disqualify_split_candidate (tree decl, const char *reason)
1281{
1282 gensum_param_desc *desc = get_gensum_param_desc (decl);
1283 if (desc)
1284 disqualify_split_candidate (desc, reason);
1285}
1286
1287/* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But
1288 first, check that there are not too many of them already. If so, do not
1289 allocate anything and return NULL. */
1290
1291static gensum_param_access *
1292allocate_access (gensum_param_desc *desc,
1293 HOST_WIDE_INT offset, HOST_WIDE_INT size)
1294{
1295 if (desc->access_count
028d4092 1296 == (unsigned) param_ipa_sra_max_replacements)
ff6686d2
MJ
1297 {
1298 disqualify_split_candidate (desc, "Too many replacement candidates");
1299 return NULL;
1300 }
1301
1302 gensum_param_access *access
1303 = (gensum_param_access *) obstack_alloc (&gensum_obstack,
1304 sizeof (gensum_param_access));
1305 memset (access, 0, sizeof (*access));
1306 access->offset = offset;
1307 access->size = size;
1308 return access;
1309}
1310
1311/* In what context scan_expr_access has been called, whether it deals with a
9707b593
MJ
1312 load, a function argument, or a store. Please note that in rare
1313 circumstances when it is not clear if the access is a load or store,
1314 ISRA_CTX_STORE is used too. */
ff6686d2
MJ
1315
1316enum isra_scan_context {ISRA_CTX_LOAD, ISRA_CTX_ARG, ISRA_CTX_STORE};
1317
1318/* Return an access describing memory access to the variable described by DESC
1319 at OFFSET with SIZE in context CTX, starting at pointer to the linked list
dfea3d6f 1320 at a certain tree level FIRST. Attempt to create it and put into the
ff6686d2
MJ
1321 appropriate place in the access tree if does not exist, but fail and return
1322 NULL if there are already too many accesses, if it would create a partially
dfea3d6f 1323 overlapping access or if an access would end up within a pre-existing
ff6686d2
MJ
1324 non-call access. */
1325
1326static gensum_param_access *
1327get_access_1 (gensum_param_desc *desc, gensum_param_access **first,
1328 HOST_WIDE_INT offset, HOST_WIDE_INT size, isra_scan_context ctx)
1329{
1330 gensum_param_access *access = *first, **ptr = first;
1331
1332 if (!access)
1333 {
1334 /* No pre-existing access at this level, just create it. */
1335 gensum_param_access *a = allocate_access (desc, offset, size);
1336 if (!a)
1337 return NULL;
1338 *first = a;
1339 return *first;
1340 }
1341
1342 if (access->offset >= offset + size)
1343 {
1344 /* We want to squeeze it in front of the very first access, just do
1345 it. */
1346 gensum_param_access *r = allocate_access (desc, offset, size);
1347 if (!r)
1348 return NULL;
1349 r->next_sibling = access;
1350 *first = r;
1351 return r;
1352 }
1353
1354 /* Skip all accesses that have to come before us until the next sibling is
1355 already too far. */
1356 while (offset >= access->offset + access->size
1357 && access->next_sibling
1358 && access->next_sibling->offset < offset + size)
1359 {
1360 ptr = &access->next_sibling;
1361 access = access->next_sibling;
1362 }
1363
1364 /* At this point we know we do not belong before access. */
1365 gcc_assert (access->offset < offset + size);
1366
1367 if (access->offset == offset && access->size == size)
1368 /* We found what we were looking for. */
1369 return access;
1370
1371 if (access->offset <= offset
1372 && access->offset + access->size >= offset + size)
1373 {
1374 /* We fit into access which is larger than us. We need to find/create
1375 something below access. But we only allow nesting in call
1376 arguments. */
1377 if (access->nonarg)
1378 return NULL;
1379
1380 return get_access_1 (desc, &access->first_child, offset, size, ctx);
1381 }
1382
1383 if (offset <= access->offset
1384 && offset + size >= access->offset + access->size)
1385 /* We are actually bigger than access, which fully fits into us, take its
1386 place and make all accesses fitting into it its children. */
1387 {
1388 /* But first, we only allow nesting in call arguments so check if that is
1389 what we are trying to represent. */
1390 if (ctx != ISRA_CTX_ARG)
1391 return NULL;
1392
1393 gensum_param_access *r = allocate_access (desc, offset, size);
1394 if (!r)
1395 return NULL;
1396 r->first_child = access;
1397
1398 while (access->next_sibling
1399 && access->next_sibling->offset < offset + size)
1400 access = access->next_sibling;
1401 if (access->offset + access->size > offset + size)
1402 {
1403 /* This must be a different access, which are sorted, so the
1404 following must be true and this signals a partial overlap. */
1405 gcc_assert (access->offset > offset);
1406 return NULL;
1407 }
1408
1409 r->next_sibling = access->next_sibling;
1410 access->next_sibling = NULL;
1411 *ptr = r;
1412 return r;
1413 }
1414
1415 if (offset >= access->offset + access->size)
1416 {
1417 /* We belong after access. */
1418 gensum_param_access *r = allocate_access (desc, offset, size);
1419 if (!r)
1420 return NULL;
1421 r->next_sibling = access->next_sibling;
1422 access->next_sibling = r;
1423 return r;
1424 }
1425
1426 if (offset < access->offset)
1427 {
1428 /* We know the following, otherwise we would have created a
1429 super-access. */
1430 gcc_checking_assert (offset + size < access->offset + access->size);
1431 return NULL;
1432 }
1433
1434 if (offset + size > access->offset + access->size)
1435 {
1436 /* Likewise. */
1437 gcc_checking_assert (offset > access->offset);
1438 return NULL;
1439 }
1440
1441 gcc_unreachable ();
1442}
1443
1444/* Return an access describing memory access to the variable described by DESC
1445 at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt
1446 to create if it does not exist, but fail and return NULL if there are
1447 already too many accesses, if it would create a partially overlapping access
dfea3d6f 1448 or if an access would end up in a non-call access. */
ff6686d2
MJ
1449
1450static gensum_param_access *
1451get_access (gensum_param_desc *desc, HOST_WIDE_INT offset, HOST_WIDE_INT size,
1452 isra_scan_context ctx)
1453{
1454 gcc_checking_assert (desc->split_candidate);
1455
1456 gensum_param_access *access = get_access_1 (desc, &desc->accesses, offset,
1457 size, ctx);
1458 if (!access)
1459 {
1460 disqualify_split_candidate (desc,
1461 "Bad access overlap or too many accesses");
1462 return NULL;
1463 }
1464
1465 switch (ctx)
1466 {
1467 case ISRA_CTX_STORE:
1468 gcc_assert (!desc->by_ref);
1469 /* Fall-through */
1470 case ISRA_CTX_LOAD:
1471 access->nonarg = true;
1472 break;
1473 case ISRA_CTX_ARG:
1474 break;
1475 }
1476
1477 return access;
1478}
1479
1480/* Verify that parameter access tree starting with ACCESS is in good shape.
dfea3d6f 1481 PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
ff6686d2
MJ
1482 ACCESS or zero if there is none. */
1483
1484static bool
1485verify_access_tree_1 (gensum_param_access *access, HOST_WIDE_INT parent_offset,
1486 HOST_WIDE_INT parent_size)
1487{
1488 while (access)
1489 {
241a2c49 1490 gcc_assert (access->offset >= 0 && access->size >= 0);
ff6686d2
MJ
1491
1492 if (parent_size != 0)
1493 {
1494 if (access->offset < parent_offset)
1495 {
1496 error ("Access offset before parent offset");
1497 return true;
1498 }
1499 if (access->size >= parent_size)
1500 {
1501 error ("Access size greater or equal to its parent size");
1502 return true;
1503 }
1504 if (access->offset + access->size > parent_offset + parent_size)
1505 {
1506 error ("Access terminates outside of its parent");
1507 return true;
1508 }
1509 }
1510
1511 if (verify_access_tree_1 (access->first_child, access->offset,
1512 access->size))
1513 return true;
1514
1515 if (access->next_sibling
1516 && (access->next_sibling->offset < access->offset + access->size))
1517 {
1518 error ("Access overlaps with its sibling");
1519 return true;
1520 }
1521
1522 access = access->next_sibling;
1523 }
1524 return false;
1525}
1526
1527/* Verify that parameter access tree starting with ACCESS is in good shape,
1528 halt compilation and dump the tree to stderr if not. */
1529
1530DEBUG_FUNCTION void
1531isra_verify_access_tree (gensum_param_access *access)
1532{
1533 if (verify_access_tree_1 (access, 0, 0))
1534 {
1535 for (; access; access = access->next_sibling)
1536 dump_gensum_access (stderr, access, 2);
1537 internal_error ("IPA-SRA access verification failed");
1538 }
1539}
1540
1541
1542/* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1543 GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
1544
1545static bool
1546asm_visit_addr (gimple *, tree op, tree, void *)
1547{
1548 op = get_base_address (op);
1549 if (op
1550 && TREE_CODE (op) == PARM_DECL)
1551 disqualify_split_candidate (op, "Non-scalarizable GIMPLE_ASM operand.");
1552
1553 return false;
1554}
1555
1556/* Mark a dereference of parameter identified by DESC of distance DIST in a
1557 basic block BB, unless the BB has already been marked as a potentially
1558 final. */
1559
1560static void
1561mark_param_dereference (gensum_param_desc *desc, HOST_WIDE_INT dist,
1562 basic_block bb)
1563{
1564 gcc_assert (desc->by_ref);
1565 gcc_checking_assert (desc->split_candidate);
1566
1567 if (bitmap_bit_p (final_bbs, bb->index))
1568 return;
1569
1570 int idx = bb->index * by_ref_count + desc->deref_index;
1571 if (bb_dereferences[idx] < dist)
1572 bb_dereferences[idx] = dist;
1573}
1574
1575/* Return true, if any potential replacements should use NEW_TYPE as opposed to
1576 previously recorded OLD_TYPE. */
1577
1578static bool
1579type_prevails_p (tree old_type, tree new_type)
1580{
1581 if (old_type == new_type)
1582 return false;
1583
1584 /* Non-aggregates are always better. */
1585 if (!is_gimple_reg_type (old_type)
1586 && is_gimple_reg_type (new_type))
1587 return true;
1588 if (is_gimple_reg_type (old_type)
1589 && !is_gimple_reg_type (new_type))
1590 return false;
1591
1592 /* Prefer any complex or vector type over any other scalar type. */
1593 if (TREE_CODE (old_type) != COMPLEX_TYPE
1594 && TREE_CODE (old_type) != VECTOR_TYPE
1595 && (TREE_CODE (new_type) == COMPLEX_TYPE
1596 || TREE_CODE (new_type) == VECTOR_TYPE))
1597 return true;
1598 if ((TREE_CODE (old_type) == COMPLEX_TYPE
1599 || TREE_CODE (old_type) == VECTOR_TYPE)
1600 && TREE_CODE (new_type) != COMPLEX_TYPE
1601 && TREE_CODE (new_type) != VECTOR_TYPE)
1602 return false;
1603
1604 /* Use the integral type with the bigger precision. */
1605 if (INTEGRAL_TYPE_P (old_type)
1606 && INTEGRAL_TYPE_P (new_type))
1607 return (TYPE_PRECISION (new_type) > TYPE_PRECISION (old_type));
1608
1609 /* Attempt to disregard any integral type with non-full precision. */
1610 if (INTEGRAL_TYPE_P (old_type)
1611 && (TREE_INT_CST_LOW (TYPE_SIZE (old_type))
1612 != TYPE_PRECISION (old_type)))
1613 return true;
1614 if (INTEGRAL_TYPE_P (new_type)
1615 && (TREE_INT_CST_LOW (TYPE_SIZE (new_type))
1616 != TYPE_PRECISION (new_type)))
1617 return false;
1618 /* Stabilize the selection. */
1619 return TYPE_UID (old_type) < TYPE_UID (new_type);
1620}
1621
1622/* When scanning an expression which is a call argument, this structure
dfea3d6f 1623 specifies the call and the position of the argument. */
ff6686d2
MJ
1624
1625struct scan_call_info
1626{
1627 /* Call graph edge representing the call. */
1628 cgraph_edge *cs;
1629 /* Total number of arguments in the call. */
1630 unsigned argument_count;
1631 /* Number of the actual argument being scanned. */
1632 unsigned arg_idx;
1633};
1634
1635/* Record use of ACCESS which belongs to a parameter described by DESC in a
1636 call argument described by CALL_INFO. */
1637
1638static void
1639record_nonregister_call_use (gensum_param_desc *desc,
1640 scan_call_info *call_info,
1641 unsigned unit_offset, unsigned unit_size)
1642{
1643 isra_call_summary *csum = call_sums->get_create (call_info->cs);
1644 csum->init_inputs (call_info->argument_count);
1645
1646 isra_param_flow *param_flow = &csum->m_arg_flow[call_info->arg_idx];
1647 param_flow->aggregate_pass_through = true;
1648 set_single_param_flow_source (param_flow, desc->param_number);
1649 param_flow->unit_offset = unit_offset;
1650 param_flow->unit_size = unit_size;
1651 desc->call_uses++;
1652}
1653
1654/* Callback of walk_aliased_vdefs, just mark that there was a possible
1655 modification. */
1656
1657static bool
1658mark_maybe_modified (ao_ref *, tree, void *data)
1659{
1660 bool *maybe_modified = (bool *) data;
1661 *maybe_modified = true;
1662 return true;
1663}
1664
1665/* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1666 specifies whether EXPR is used in a load, store or as an argument call. BB
1667 must be the basic block in which expr resides. If CTX specifies call
dfea3d6f 1668 argument context, CALL_INFO must describe that call and argument position,
ff6686d2
MJ
1669 otherwise it is ignored. */
1670
1671static void
1672scan_expr_access (tree expr, gimple *stmt, isra_scan_context ctx,
1673 basic_block bb, scan_call_info *call_info = NULL)
1674{
1675 poly_int64 poffset, psize, pmax_size;
1676 HOST_WIDE_INT offset, size, max_size;
1677 tree base;
1678 bool deref = false;
1679 bool reverse;
1680
1681 if (TREE_CODE (expr) == BIT_FIELD_REF
1682 || TREE_CODE (expr) == IMAGPART_EXPR
1683 || TREE_CODE (expr) == REALPART_EXPR)
1684 expr = TREE_OPERAND (expr, 0);
1685
1686 base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, &reverse);
1687
1688 if (TREE_CODE (base) == MEM_REF)
1689 {
1690 tree op = TREE_OPERAND (base, 0);
1691 if (TREE_CODE (op) != SSA_NAME
1692 || !SSA_NAME_IS_DEFAULT_DEF (op))
1693 return;
1694 base = SSA_NAME_VAR (op);
1695 if (!base)
1696 return;
1697 deref = true;
1698 }
1699 if (TREE_CODE (base) != PARM_DECL)
1700 return;
1701
1702 gensum_param_desc *desc = get_gensum_param_desc (base);
1703 if (!desc || !desc->split_candidate)
1704 return;
1705
1706 if (!poffset.is_constant (&offset)
1707 || !psize.is_constant (&size)
1708 || !pmax_size.is_constant (&max_size))
1709 {
1710 disqualify_split_candidate (desc, "Encountered a polynomial-sized "
1711 "access.");
1712 return;
1713 }
1714 if (size < 0 || size != max_size)
1715 {
1716 disqualify_split_candidate (desc, "Encountered a variable sized access.");
1717 return;
1718 }
1719 if (TREE_CODE (expr) == COMPONENT_REF
1720 && DECL_BIT_FIELD (TREE_OPERAND (expr, 1)))
1721 {
1722 disqualify_split_candidate (desc, "Encountered a bit-field access.");
1723 return;
1724 }
5a4d0da4
MJ
1725 if (offset < 0)
1726 {
1727 disqualify_split_candidate (desc, "Encountered an access at a "
1728 "negative offset.");
1729 return;
1730 }
ff6686d2
MJ
1731 gcc_assert ((offset % BITS_PER_UNIT) == 0);
1732 gcc_assert ((size % BITS_PER_UNIT) == 0);
1733 if ((offset / BITS_PER_UNIT) >= (UINT_MAX - ISRA_ARG_SIZE_LIMIT)
1734 || (size / BITS_PER_UNIT) >= ISRA_ARG_SIZE_LIMIT)
1735 {
1736 disqualify_split_candidate (desc, "Encountered an access with too big "
1737 "offset or size");
1738 return;
1739 }
1740
1741 tree type = TREE_TYPE (expr);
1742 unsigned int exp_align = get_object_alignment (expr);
1743
1744 if (exp_align < TYPE_ALIGN (type))
1745 {
1746 disqualify_split_candidate (desc, "Underaligned access.");
1747 return;
1748 }
1749
1750 if (deref)
1751 {
1752 if (!desc->by_ref)
1753 {
1754 disqualify_split_candidate (desc, "Dereferencing a non-reference.");
1755 return;
1756 }
1757 else if (ctx == ISRA_CTX_STORE)
1758 {
1759 disqualify_split_candidate (desc, "Storing to data passed by "
1760 "reference.");
1761 return;
1762 }
1763
1764 if (!aa_walking_limit)
1765 {
1766 disqualify_split_candidate (desc, "Out of alias analysis step "
1767 "limit.");
1768 return;
1769 }
1770
1771 gcc_checking_assert (gimple_vuse (stmt));
1772 bool maybe_modified = false;
1773 ao_ref ar;
1774
1775 ao_ref_init (&ar, expr);
1776 bitmap visited = BITMAP_ALLOC (NULL);
1777 int walked = walk_aliased_vdefs (&ar, gimple_vuse (stmt),
1778 mark_maybe_modified, &maybe_modified,
1779 &visited, NULL, aa_walking_limit);
1780 BITMAP_FREE (visited);
1781 if (walked > 0)
1782 {
1783 gcc_assert (aa_walking_limit > walked);
1784 aa_walking_limit = aa_walking_limit - walked;
1785 }
1786 if (walked < 0)
1787 aa_walking_limit = 0;
1788 if (maybe_modified || walked < 0)
1789 {
1790 disqualify_split_candidate (desc, "Data passed by reference possibly "
1791 "modified through an alias.");
1792 return;
1793 }
1794 else
1795 mark_param_dereference (desc, offset + size, bb);
1796 }
1797 else
1798 /* Pointer parameters with direct uses should have been ruled out by
dfea3d6f 1799 analyzing SSA default def when looking at the parameters. */
ff6686d2
MJ
1800 gcc_assert (!desc->by_ref);
1801
1802 gensum_param_access *access = get_access (desc, offset, size, ctx);
1803 if (!access)
1804 return;
1805
1806 if (ctx == ISRA_CTX_ARG)
1807 {
1808 gcc_checking_assert (call_info);
1809
1810 if (!deref)
1811 record_nonregister_call_use (desc, call_info, offset / BITS_PER_UNIT,
1812 size / BITS_PER_UNIT);
1813 else
1814 /* This is not a pass-through of a pointer, this is a use like any
1815 other. */
1816 access->nonarg = true;
1817 }
1818
1819 if (!access->type)
1820 {
1821 access->type = type;
1822 access->alias_ptr_type = reference_alias_ptr_type (expr);
1823 access->reverse = reverse;
1824 }
1825 else
1826 {
1827 if (exp_align < TYPE_ALIGN (access->type))
1828 {
1829 disqualify_split_candidate (desc, "Reference has lower alignment "
1830 "than a previous one.");
1831 return;
1832 }
1833 if (access->alias_ptr_type != reference_alias_ptr_type (expr))
1834 {
1835 disqualify_split_candidate (desc, "Multiple alias pointer types.");
1836 return;
1837 }
1838 if (access->reverse != reverse)
1839 {
1840 disqualify_split_candidate (desc, "Both normal and reverse "
1841 "scalar storage order.");
1842 return;
1843 }
1844 if (!deref
1845 && (AGGREGATE_TYPE_P (type) || AGGREGATE_TYPE_P (access->type))
1846 && (TYPE_MAIN_VARIANT (access->type) != TYPE_MAIN_VARIANT (type)))
1847 {
1848 /* We need the same aggregate type on all accesses to be able to
1849 distinguish transformation spots from pass-through arguments in
dfea3d6f
JJ
1850 the transformation phase. */
1851 disqualify_split_candidate (desc, "We do not support aggregate "
ff6686d2
MJ
1852 "type punning.");
1853 return;
1854 }
1855
1856 if (type_prevails_p (access->type, type))
1857 access->type = type;
1858 }
1859}
1860
1861/* Scan body function described by NODE and FUN and create access trees for
1862 parameters. */
1863
1864static void
1865scan_function (cgraph_node *node, struct function *fun)
1866{
1867 basic_block bb;
1868
1869 FOR_EACH_BB_FN (bb, fun)
1870 {
1871 gimple_stmt_iterator gsi;
1872 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1873 {
1874 gimple *stmt = gsi_stmt (gsi);
1875
1876 if (stmt_can_throw_external (fun, stmt))
1877 bitmap_set_bit (final_bbs, bb->index);
1878 switch (gimple_code (stmt))
1879 {
1880 case GIMPLE_RETURN:
1881 {
1882 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1883 if (t != NULL_TREE)
1884 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1885 bitmap_set_bit (final_bbs, bb->index);
1886 }
1887 break;
1888
1889 case GIMPLE_ASSIGN:
1890 if (gimple_assign_single_p (stmt)
1891 && !gimple_clobber_p (stmt))
1892 {
1893 tree rhs = gimple_assign_rhs1 (stmt);
1894 scan_expr_access (rhs, stmt, ISRA_CTX_LOAD, bb);
1895 tree lhs = gimple_assign_lhs (stmt);
1896 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1897 }
1898 break;
1899
1900 case GIMPLE_CALL:
1901 {
1902 unsigned argument_count = gimple_call_num_args (stmt);
9707b593
MJ
1903 isra_scan_context ctx = ISRA_CTX_ARG;
1904 scan_call_info call_info, *call_info_p = &call_info;
1905 if (gimple_call_internal_p (stmt))
1906 {
1907 call_info_p = NULL;
1908 ctx = ISRA_CTX_LOAD;
1909 internal_fn ifn = gimple_call_internal_fn (stmt);
1910 if (internal_store_fn_p (ifn))
1911 ctx = ISRA_CTX_STORE;
1912 }
1913 else
1914 {
1915 call_info.cs = node->get_edge (stmt);
1916 call_info.argument_count = argument_count;
1917 }
ff6686d2
MJ
1918
1919 for (unsigned i = 0; i < argument_count; i++)
1920 {
1921 call_info.arg_idx = i;
1922 scan_expr_access (gimple_call_arg (stmt, i), stmt,
9707b593 1923 ctx, bb, call_info_p);
ff6686d2
MJ
1924 }
1925
1926 tree lhs = gimple_call_lhs (stmt);
1927 if (lhs)
1928 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1929 int flags = gimple_call_flags (stmt);
1930 if ((flags & (ECF_CONST | ECF_PURE)) == 0)
1931 bitmap_set_bit (final_bbs, bb->index);
1932 }
1933 break;
1934
1935 case GIMPLE_ASM:
1936 {
1937 gasm *asm_stmt = as_a <gasm *> (stmt);
1938 walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL,
1939 asm_visit_addr);
1940 bitmap_set_bit (final_bbs, bb->index);
1941
1942 for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++)
1943 {
1944 tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i));
1945 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1946 }
1947 for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++)
1948 {
1949 tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i));
1950 scan_expr_access (t, stmt, ISRA_CTX_STORE, bb);
1951 }
1952 }
1953 break;
1954
1955 default:
1956 break;
1957 }
1958 }
1959 }
1960}
1961
9b8741c9
MJ
1962/* Return true if SSA_NAME NAME of function described by FUN is only used in
1963 return statements, or if results of any operations it is involved in are
1964 only used in return statements. ANALYZED is a bitmap that tracks which SSA
1965 names we have already started investigating. */
ff6686d2
MJ
1966
1967static bool
9b8741c9 1968ssa_name_only_returned_p (function *fun, tree name, bitmap analyzed)
ff6686d2
MJ
1969{
1970 bool res = true;
1971 imm_use_iterator imm_iter;
1972 gimple *stmt;
1973
1974 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
1975 {
1976 if (is_gimple_debug (stmt))
1977 continue;
1978
1979 if (gimple_code (stmt) == GIMPLE_RETURN)
1980 {
1981 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1982 if (t != name)
1983 {
1984 res = false;
640296c3 1985 break;
ff6686d2
MJ
1986 }
1987 }
9b8741c9
MJ
1988 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
1989 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
1990 || gimple_code (stmt) == GIMPLE_PHI))
ff6686d2
MJ
1991 {
1992 /* TODO: And perhaps for const function calls too? */
1993 tree lhs;
1994 if (gimple_code (stmt) == GIMPLE_PHI)
1995 lhs = gimple_phi_result (stmt);
1996 else
1997 lhs = gimple_assign_lhs (stmt);
1998
1999 if (TREE_CODE (lhs) != SSA_NAME)
2000 {
2001 res = false;
640296c3 2002 break;
ff6686d2
MJ
2003 }
2004 gcc_assert (!gimple_vdef (stmt));
2005 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs))
9b8741c9 2006 && !ssa_name_only_returned_p (fun, lhs, analyzed))
ff6686d2
MJ
2007 {
2008 res = false;
640296c3 2009 break;
ff6686d2
MJ
2010 }
2011 }
2012 else
2013 {
2014 res = false;
640296c3 2015 break;
ff6686d2
MJ
2016 }
2017 }
2018 return res;
2019}
2020
2021/* Inspect the uses of the return value of the call associated with CS, and if
2022 it is not used or if it is only used to construct the return value of the
2023 caller, mark it as such in call or caller summary. Also check for
2024 misaligned arguments. */
2025
2026static void
2027isra_analyze_call (cgraph_edge *cs)
2028{
2029 gcall *call_stmt = cs->call_stmt;
2030 unsigned count = gimple_call_num_args (call_stmt);
2031 isra_call_summary *csum = call_sums->get_create (cs);
2032
2033 for (unsigned i = 0; i < count; i++)
2034 {
2035 tree arg = gimple_call_arg (call_stmt, i);
2036 if (is_gimple_reg (arg))
2037 continue;
2038
2039 tree offset;
2040 poly_int64 bitsize, bitpos;
2041 machine_mode mode;
2042 int unsignedp, reversep, volatilep = 0;
2043 get_inner_reference (arg, &bitsize, &bitpos, &offset, &mode,
2044 &unsignedp, &reversep, &volatilep);
2045 if (!multiple_p (bitpos, BITS_PER_UNIT))
2046 {
2047 csum->m_bit_aligned_arg = true;
2048 break;
2049 }
2050 }
2051
2052 tree lhs = gimple_call_lhs (call_stmt);
2053 if (lhs)
2054 {
2055 /* TODO: Also detect aggregates on a LHS of a call that are only returned
2056 from this function (without being read anywhere). */
2057 if (TREE_CODE (lhs) == SSA_NAME)
2058 {
2059 bitmap analyzed = BITMAP_ALLOC (NULL);
9b8741c9
MJ
2060 if (ssa_name_only_returned_p (DECL_STRUCT_FUNCTION (cs->caller->decl),
2061 lhs, analyzed))
ff6686d2
MJ
2062 csum->m_return_returned = true;
2063 BITMAP_FREE (analyzed);
2064 }
2065 }
2066 else
2067 csum->m_return_ignored = true;
2068}
2069
2070/* Look at all calls going out of NODE, described also by IFS and perform all
2071 analyses necessary for IPA-SRA that are not done at body scan time or done
2072 even when body is not scanned because the function is not a candidate. */
2073
2074static void
2075isra_analyze_all_outgoing_calls (cgraph_node *node)
2076{
2077 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2078 isra_analyze_call (cs);
2079 for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
2080 isra_analyze_call (cs);
2081}
2082
2083/* Dump a dereferences table with heading STR to file F. */
2084
2085static void
2086dump_dereferences_table (FILE *f, struct function *fun, const char *str)
2087{
2088 basic_block bb;
2089
2090 fprintf (dump_file, "%s", str);
2091 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun),
2092 EXIT_BLOCK_PTR_FOR_FN (fun), next_bb)
2093 {
2094 fprintf (f, "%4i %i ", bb->index, bitmap_bit_p (final_bbs, bb->index));
2095 if (bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2096 {
2097 int i;
2098 for (i = 0; i < by_ref_count; i++)
2099 {
2100 int idx = bb->index * by_ref_count + i;
2101 fprintf (f, " %4" HOST_WIDE_INT_PRINT "d", bb_dereferences[idx]);
2102 }
2103 }
2104 fprintf (f, "\n");
2105 }
2106 fprintf (dump_file, "\n");
2107}
2108
2109/* Propagate distances in bb_dereferences in the opposite direction than the
2110 control flow edges, in each step storing the maximum of the current value
2111 and the minimum of all successors. These steps are repeated until the table
2112 stabilizes. Note that BBs which might terminate the functions (according to
2113 final_bbs bitmap) never updated in this way. */
2114
2115static void
2116propagate_dereference_distances (struct function *fun)
2117{
2118 basic_block bb;
2119
2120 if (dump_file && (dump_flags & TDF_DETAILS))
2121 dump_dereferences_table (dump_file, fun,
2122 "Dereference table before propagation:\n");
2123
2124 auto_vec<basic_block> queue (last_basic_block_for_fn (fun));
2125 queue.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
2126 FOR_EACH_BB_FN (bb, fun)
2127 {
2128 queue.quick_push (bb);
2129 bb->aux = bb;
2130 }
2131
2132 while (!queue.is_empty ())
2133 {
2134 edge_iterator ei;
2135 edge e;
2136 bool change = false;
2137 int i;
2138
2139 bb = queue.pop ();
2140 bb->aux = NULL;
2141
2142 if (bitmap_bit_p (final_bbs, bb->index))
2143 continue;
2144
2145 for (i = 0; i < by_ref_count; i++)
2146 {
2147 int idx = bb->index * by_ref_count + i;
2148 bool first = true;
2149 HOST_WIDE_INT inh = 0;
2150
2151 FOR_EACH_EDGE (e, ei, bb->succs)
2152 {
2153 int succ_idx = e->dest->index * by_ref_count + i;
2154
2155 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
2156 continue;
2157
2158 if (first)
2159 {
2160 first = false;
2161 inh = bb_dereferences [succ_idx];
2162 }
2163 else if (bb_dereferences [succ_idx] < inh)
2164 inh = bb_dereferences [succ_idx];
2165 }
2166
2167 if (!first && bb_dereferences[idx] < inh)
2168 {
2169 bb_dereferences[idx] = inh;
2170 change = true;
2171 }
2172 }
2173
2174 if (change)
2175 FOR_EACH_EDGE (e, ei, bb->preds)
2176 {
2177 if (e->src->aux)
2178 continue;
2179
2180 e->src->aux = e->src;
2181 queue.quick_push (e->src);
2182 }
2183 }
2184
2185 if (dump_file && (dump_flags & TDF_DETAILS))
2186 dump_dereferences_table (dump_file, fun,
2187 "Dereference table after propagation:\n");
2188}
2189
2190/* Perform basic checks on ACCESS to PARM described by DESC and all its
2191 children, return true if the parameter cannot be split, otherwise return
2192 true and update *TOTAL_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be the
2193 index of the entry BB in the function of PARM. */
2194
2195static bool
2196check_gensum_access (tree parm, gensum_param_desc *desc,
2197 gensum_param_access *access,
2198 HOST_WIDE_INT *nonarg_acc_size, bool *only_calls,
2199 int entry_bb_index)
2200{
2201 if (access->nonarg)
2202 {
2203 *only_calls = false;
2204 *nonarg_acc_size += access->size;
2205
2206 if (access->first_child)
2207 {
2208 disqualify_split_candidate (desc, "Overlapping non-call uses.");
2209 return true;
2210 }
2211 }
2212 /* Do not decompose a non-BLKmode param in a way that would create
2213 BLKmode params. Especially for by-reference passing (thus,
2214 pointer-type param) this is hardly worthwhile. */
2215 if (DECL_MODE (parm) != BLKmode
2216 && TYPE_MODE (access->type) == BLKmode)
2217 {
2218 disqualify_split_candidate (desc, "Would convert a non-BLK to a BLK.");
2219 return true;
2220 }
2221
2222 if (desc->by_ref)
2223 {
2224 int idx = (entry_bb_index * by_ref_count + desc->deref_index);
2225 if ((access->offset + access->size) > bb_dereferences[idx])
2226 {
2227 disqualify_split_candidate (desc, "Would create a possibly "
2228 "illegal dereference in a caller.");
2229 return true;
2230 }
2231 }
2232
2233 for (gensum_param_access *ch = access->first_child;
2234 ch;
2235 ch = ch->next_sibling)
2236 if (check_gensum_access (parm, desc, ch, nonarg_acc_size, only_calls,
2237 entry_bb_index))
2238 return true;
2239
2240 return false;
2241}
2242
2243/* Copy data from FROM and all of its children to a vector of accesses in IPA
2244 descriptor DESC. */
2245
2246static void
2247copy_accesses_to_ipa_desc (gensum_param_access *from, isra_param_desc *desc)
2248{
2249 param_access *to = ggc_cleared_alloc<param_access> ();
2250 gcc_checking_assert ((from->offset % BITS_PER_UNIT) == 0);
2251 gcc_checking_assert ((from->size % BITS_PER_UNIT) == 0);
2252 to->unit_offset = from->offset / BITS_PER_UNIT;
2253 to->unit_size = from->size / BITS_PER_UNIT;
2254 to->type = from->type;
2255 to->alias_ptr_type = from->alias_ptr_type;
2256 to->certain = from->nonarg;
2257 to->reverse = from->reverse;
2258 vec_safe_push (desc->accesses, to);
2259
2260 for (gensum_param_access *ch = from->first_child;
2261 ch;
2262 ch = ch->next_sibling)
2263 copy_accesses_to_ipa_desc (ch, desc);
2264}
2265
2266/* Analyze function body scan results stored in param_accesses and
2267 param_accesses, detect possible transformations and store information of
2268 those in function summary. NODE, FUN and IFS are all various structures
2269 describing the currently analyzed function. */
2270
2271static void
2272process_scan_results (cgraph_node *node, struct function *fun,
2273 isra_func_summary *ifs,
2274 vec<gensum_param_desc> *param_descriptions)
2275{
2276 bool check_pass_throughs = false;
2277 bool dereferences_propagated = false;
2278 tree parm = DECL_ARGUMENTS (node->decl);
2279 unsigned param_count = param_descriptions->length();
2280
2281 for (unsigned desc_index = 0;
2282 desc_index < param_count;
2283 desc_index++, parm = DECL_CHAIN (parm))
2284 {
2285 gensum_param_desc *desc = &(*param_descriptions)[desc_index];
581b519f 2286 if (!desc->split_candidate)
ff6686d2
MJ
2287 continue;
2288
2289 if (flag_checking)
2290 isra_verify_access_tree (desc->accesses);
2291
2292 if (!dereferences_propagated
2293 && desc->by_ref
2294 && desc->accesses)
2295 {
2296 propagate_dereference_distances (fun);
2297 dereferences_propagated = true;
2298 }
2299
2300 HOST_WIDE_INT nonarg_acc_size = 0;
2301 bool only_calls = true;
2302 bool check_failed = false;
2303
2304 int entry_bb_index = ENTRY_BLOCK_PTR_FOR_FN (fun)->index;
2305 for (gensum_param_access *acc = desc->accesses;
2306 acc;
2307 acc = acc->next_sibling)
2308 if (check_gensum_access (parm, desc, acc, &nonarg_acc_size, &only_calls,
2309 entry_bb_index))
2310 {
2311 check_failed = true;
2312 break;
2313 }
2314 if (check_failed)
2315 continue;
2316
2317 if (only_calls)
2318 desc->locally_unused = true;
2319
2320 HOST_WIDE_INT cur_param_size
2321 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm)));
2322 HOST_WIDE_INT param_size_limit;
2323 if (!desc->by_ref || optimize_function_for_size_p (fun))
2324 param_size_limit = cur_param_size;
2325 else
fdfd7f53
ML
2326 param_size_limit
2327 = opt_for_fn (node->decl,
2328 param_ipa_sra_ptr_growth_factor) * cur_param_size;
ff6686d2
MJ
2329 if (nonarg_acc_size > param_size_limit
2330 || (!desc->by_ref && nonarg_acc_size == param_size_limit))
2331 {
f8cb8bcd
JJ
2332 disqualify_split_candidate (desc, "Would result into a too big set "
2333 "of replacements.");
ff6686d2
MJ
2334 }
2335 else
2336 {
2337 /* create_parameter_descriptors makes sure unit sizes of all
2338 candidate parameters fit unsigned integers restricted to
2339 ISRA_ARG_SIZE_LIMIT. */
2340 desc->param_size_limit = param_size_limit / BITS_PER_UNIT;
2341 desc->nonarg_acc_size = nonarg_acc_size / BITS_PER_UNIT;
2342 if (desc->split_candidate && desc->ptr_pt_count)
2343 {
2344 gcc_assert (desc->by_ref);
2345 check_pass_throughs = true;
2346 }
2347 }
2348 }
2349
2350 /* When a pointer parameter is passed-through to a callee, in which it is
2351 only used to read only one or a few items, we can attempt to transform it
2352 to obtaining and passing through the items instead of the pointer. But we
2353 must take extra care that 1) we do not introduce any segfault by moving
2354 dereferences above control flow and that 2) the data is not modified
2355 through an alias in this function. The IPA analysis must not introduce
2356 any accesses candidates unless it can prove both.
2357
2358 The current solution is very crude as it consists of ensuring that the
2359 call postdominates entry BB and that the definition of VUSE of the call is
2360 default definition. TODO: For non-recursive callees in the same
2361 compilation unit we could do better by doing analysis in topological order
2362 an looking into access candidates of callees, using their alias_ptr_types
2363 to attempt real AA. We could also use the maximum known dereferenced
2364 offset in this function at IPA level.
2365
2366 TODO: Measure the overhead and the effect of just being pessimistic.
dfea3d6f 2367 Maybe this is only -O3 material?
ff6686d2
MJ
2368 */
2369 bool pdoms_calculated = false;
2370 if (check_pass_throughs)
2371 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2372 {
2373 gcall *call_stmt = cs->call_stmt;
2374 tree vuse = gimple_vuse (call_stmt);
2375
2376 /* If the callee is a const function, we don't get a VUSE. In such
2377 case there will be no memory accesses in the called function (or the
2378 const attribute is wrong) and then we just don't care. */
2379 bool uses_memory_as_obtained = vuse && SSA_NAME_IS_DEFAULT_DEF (vuse);
2380
2381 unsigned count = gimple_call_num_args (call_stmt);
2382 isra_call_summary *csum = call_sums->get_create (cs);
2383 csum->init_inputs (count);
763121cc 2384 csum->m_before_any_store = uses_memory_as_obtained;
ff6686d2
MJ
2385 for (unsigned argidx = 0; argidx < count; argidx++)
2386 {
2387 if (!csum->m_arg_flow[argidx].pointer_pass_through)
2388 continue;
2389 unsigned pidx
2390 = get_single_param_flow_source (&csum->m_arg_flow[argidx]);
2391 gensum_param_desc *desc = &(*param_descriptions)[pidx];
2392 if (!desc->split_candidate)
2393 {
2394 csum->m_arg_flow[argidx].pointer_pass_through = false;
2395 continue;
2396 }
2397 if (!uses_memory_as_obtained)
2398 continue;
2399
2400 /* Post-dominator check placed last, hoping that it usually won't
2401 be needed. */
2402 if (!pdoms_calculated)
2403 {
2404 gcc_checking_assert (cfun);
ff6686d2
MJ
2405 connect_infinite_loops_to_exit ();
2406 calculate_dominance_info (CDI_POST_DOMINATORS);
2407 pdoms_calculated = true;
2408 }
2409 if (dominated_by_p (CDI_POST_DOMINATORS,
2410 gimple_bb (call_stmt),
2411 single_succ (ENTRY_BLOCK_PTR_FOR_FN (fun))))
2412 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2413 }
2414
2415 }
2416 if (pdoms_calculated)
2417 {
2418 free_dominance_info (CDI_POST_DOMINATORS);
2419 remove_fake_exit_edges ();
2420 }
2421
2422 /* TODO: Add early exit if we disqualified everything. This also requires
2423 that we either relax the restriction that
dfea3d6f 2424 ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
ff6686d2
MJ
2425 or store the number of parameters to IPA-SRA function summary and use that
2426 when just removing params. */
2427
2428 vec_safe_reserve_exact (ifs->m_parameters, param_count);
2429 ifs->m_parameters->quick_grow_cleared (param_count);
2430 for (unsigned desc_index = 0; desc_index < param_count; desc_index++)
2431 {
2432 gensum_param_desc *s = &(*param_descriptions)[desc_index];
2433 isra_param_desc *d = &(*ifs->m_parameters)[desc_index];
2434
2435 d->param_size_limit = s->param_size_limit;
2436 d->size_reached = s->nonarg_acc_size;
2437 d->locally_unused = s->locally_unused;
2438 d->split_candidate = s->split_candidate;
2439 d->by_ref = s->by_ref;
2440
2441 for (gensum_param_access *acc = s->accesses;
2442 acc;
2443 acc = acc->next_sibling)
2444 copy_accesses_to_ipa_desc (acc, d);
2445 }
2446
2447 if (dump_file)
2448 dump_isra_param_descriptors (dump_file, node->decl, ifs);
2449}
2450
2451/* Return true if there are any overlaps among certain accesses of DESC. If
dfea3d6f 2452 non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
ff6686d2
MJ
2453 too. DESC is assumed to be a split candidate that is not locally
2454 unused. */
2455
2456static bool
2457overlapping_certain_accesses_p (isra_param_desc *desc,
2458 bool *certain_access_present_p)
2459{
2460 unsigned pclen = vec_safe_length (desc->accesses);
2461 for (unsigned i = 0; i < pclen; i++)
2462 {
2463 param_access *a1 = (*desc->accesses)[i];
2464
2465 if (!a1->certain)
2466 continue;
2467 if (certain_access_present_p)
2468 *certain_access_present_p = true;
2469 for (unsigned j = i + 1; j < pclen; j++)
2470 {
2471 param_access *a2 = (*desc->accesses)[j];
2472 if (a2->certain
2473 && a1->unit_offset < a2->unit_offset + a2->unit_size
2474 && a1->unit_offset + a1->unit_size > a2->unit_offset)
2475 return true;
2476 }
2477 }
2478 return false;
2479}
2480
2481/* Check for any overlaps of certain param accesses among splitting candidates
2482 and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also
2483 check that used splitting candidates have at least one certain access. */
2484
2485static void
2486verify_splitting_accesses (cgraph_node *node, bool certain_must_exist)
2487{
2488 isra_func_summary *ifs = func_sums->get (node);
2489 if (!ifs || !ifs->m_candidate)
2490 return;
2491 unsigned param_count = vec_safe_length (ifs->m_parameters);
2492 for (unsigned pidx = 0; pidx < param_count; pidx++)
2493 {
2494 isra_param_desc *desc = &(*ifs->m_parameters)[pidx];
2495 if (!desc->split_candidate || desc->locally_unused)
2496 continue;
2497
2498 bool certain_access_present = !certain_must_exist;
2499 if (overlapping_certain_accesses_p (desc, &certain_access_present))
e2b1923b 2500 internal_error ("Function %qs, parameter %u, has IPA-SRA accesses "
ff6686d2
MJ
2501 "which overlap", node->dump_name (), pidx);
2502 if (!certain_access_present)
2503 internal_error ("Function %s, parameter %u, is used but does not "
2504 "have any certain IPA-SRA access",
2505 node->dump_name (), pidx);
2506 }
2507}
2508
ff6686d2
MJ
2509/* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in
2510 this compilation unit and create summary structures describing IPA-SRA
2511 opportunities and constraints in them. */
2512
2513static void
2514ipa_sra_generate_summary (void)
2515{
2516 struct cgraph_node *node;
2517
2518 gcc_checking_assert (!func_sums);
2519 gcc_checking_assert (!call_sums);
2520 func_sums
78cd68c0 2521 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
ff6686d2
MJ
2522 ipa_sra_function_summaries (symtab, true));
2523 call_sums = new ipa_sra_call_summaries (symtab);
2524
2525 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2526 ipa_sra_summarize_function (node);
2527 return;
2528}
2529
dfea3d6f 2530/* Write intraprocedural analysis information about E and all of its outgoing
ff6686d2
MJ
2531 edges into a stream for LTO WPA. */
2532
2533static void
2534isra_write_edge_summary (output_block *ob, cgraph_edge *e)
2535{
2536 isra_call_summary *csum = call_sums->get (e);
2537 unsigned input_count = csum->m_arg_flow.length ();
2538 streamer_write_uhwi (ob, input_count);
2539 for (unsigned i = 0; i < input_count; i++)
2540 {
2541 isra_param_flow *ipf = &csum->m_arg_flow[i];
2542 streamer_write_hwi (ob, ipf->length);
2543 bitpack_d bp = bitpack_create (ob->main_stream);
2544 for (int j = 0; j < ipf->length; j++)
2545 bp_pack_value (&bp, ipf->inputs[j], 8);
2546 bp_pack_value (&bp, ipf->aggregate_pass_through, 1);
2547 bp_pack_value (&bp, ipf->pointer_pass_through, 1);
2548 bp_pack_value (&bp, ipf->safe_to_import_accesses, 1);
2549 streamer_write_bitpack (&bp);
2550 streamer_write_uhwi (ob, ipf->unit_offset);
2551 streamer_write_uhwi (ob, ipf->unit_size);
2552 }
2553 bitpack_d bp = bitpack_create (ob->main_stream);
2554 bp_pack_value (&bp, csum->m_return_ignored, 1);
2555 bp_pack_value (&bp, csum->m_return_returned, 1);
2556 bp_pack_value (&bp, csum->m_bit_aligned_arg, 1);
763121cc 2557 bp_pack_value (&bp, csum->m_before_any_store, 1);
ff6686d2
MJ
2558 streamer_write_bitpack (&bp);
2559}
2560
dfea3d6f 2561/* Write intraprocedural analysis information about NODE and all of its outgoing
ff6686d2
MJ
2562 edges into a stream for LTO WPA. */
2563
2564static void
2565isra_write_node_summary (output_block *ob, cgraph_node *node)
2566{
2567 isra_func_summary *ifs = func_sums->get (node);
2568 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2569 int node_ref = lto_symtab_encoder_encode (encoder, node);
2570 streamer_write_uhwi (ob, node_ref);
2571
2572 unsigned param_desc_count = vec_safe_length (ifs->m_parameters);
2573 streamer_write_uhwi (ob, param_desc_count);
2574 for (unsigned i = 0; i < param_desc_count; i++)
2575 {
2576 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2577 unsigned access_count = vec_safe_length (desc->accesses);
2578 streamer_write_uhwi (ob, access_count);
2579 for (unsigned j = 0; j < access_count; j++)
2580 {
2581 param_access *acc = (*desc->accesses)[j];
2582 stream_write_tree (ob, acc->type, true);
2583 stream_write_tree (ob, acc->alias_ptr_type, true);
2584 streamer_write_uhwi (ob, acc->unit_offset);
2585 streamer_write_uhwi (ob, acc->unit_size);
2586 bitpack_d bp = bitpack_create (ob->main_stream);
2587 bp_pack_value (&bp, acc->certain, 1);
2588 streamer_write_bitpack (&bp);
2589 }
2590 streamer_write_uhwi (ob, desc->param_size_limit);
2591 streamer_write_uhwi (ob, desc->size_reached);
2592 bitpack_d bp = bitpack_create (ob->main_stream);
2593 bp_pack_value (&bp, desc->locally_unused, 1);
2594 bp_pack_value (&bp, desc->split_candidate, 1);
2595 bp_pack_value (&bp, desc->by_ref, 1);
2596 streamer_write_bitpack (&bp);
2597 }
2598 bitpack_d bp = bitpack_create (ob->main_stream);
2599 bp_pack_value (&bp, ifs->m_candidate, 1);
2600 bp_pack_value (&bp, ifs->m_returns_value, 1);
2601 bp_pack_value (&bp, ifs->m_return_ignored, 1);
2602 gcc_assert (!ifs->m_queued);
2603 streamer_write_bitpack (&bp);
2604
2605 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2606 isra_write_edge_summary (ob, e);
2607 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2608 isra_write_edge_summary (ob, e);
2609}
2610
dfea3d6f 2611/* Write intraprocedural analysis information into a stream for LTO WPA. */
ff6686d2
MJ
2612
2613static void
2614ipa_sra_write_summary (void)
2615{
2616 if (!func_sums || !call_sums)
2617 return;
2618
2619 struct output_block *ob = create_output_block (LTO_section_ipa_sra);
2620 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2621 ob->symbol = NULL;
2622
2623 unsigned int count = 0;
2624 lto_symtab_encoder_iterator lsei;
2625 for (lsei = lsei_start_function_in_partition (encoder);
2626 !lsei_end_p (lsei);
2627 lsei_next_function_in_partition (&lsei))
2628 {
2629 cgraph_node *node = lsei_cgraph_node (lsei);
2630 if (node->has_gimple_body_p ()
2631 && func_sums->get (node) != NULL)
2632 count++;
2633 }
2634 streamer_write_uhwi (ob, count);
2635
2636 /* Process all of the functions. */
2637 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
2638 lsei_next_function_in_partition (&lsei))
2639 {
2640 cgraph_node *node = lsei_cgraph_node (lsei);
2641 if (node->has_gimple_body_p ()
2642 && func_sums->get (node) != NULL)
2643 isra_write_node_summary (ob, node);
2644 }
2645 streamer_write_char_stream (ob->main_stream, 0);
2646 produce_asm (ob, NULL);
2647 destroy_output_block (ob);
2648}
2649
dfea3d6f 2650/* Read intraprocedural analysis information about E and all of its outgoing
ff6686d2
MJ
2651 edges into a stream for LTO WPA. */
2652
2653static void
2654isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
2655{
2656 isra_call_summary *csum = call_sums->get_create (cs);
2657 unsigned input_count = streamer_read_uhwi (ib);
2658 csum->init_inputs (input_count);
2659 for (unsigned i = 0; i < input_count; i++)
2660 {
2661 isra_param_flow *ipf = &csum->m_arg_flow[i];
2662 ipf->length = streamer_read_hwi (ib);
2663 bitpack_d bp = streamer_read_bitpack (ib);
2664 for (int j = 0; j < ipf->length; j++)
2665 ipf->inputs[j] = bp_unpack_value (&bp, 8);
2666 ipf->aggregate_pass_through = bp_unpack_value (&bp, 1);
2667 ipf->pointer_pass_through = bp_unpack_value (&bp, 1);
2668 ipf->safe_to_import_accesses = bp_unpack_value (&bp, 1);
2669 ipf->unit_offset = streamer_read_uhwi (ib);
2670 ipf->unit_size = streamer_read_uhwi (ib);
2671 }
2672 bitpack_d bp = streamer_read_bitpack (ib);
2673 csum->m_return_ignored = bp_unpack_value (&bp, 1);
2674 csum->m_return_returned = bp_unpack_value (&bp, 1);
2675 csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1);
763121cc 2676 csum->m_before_any_store = bp_unpack_value (&bp, 1);
ff6686d2
MJ
2677}
2678
dfea3d6f 2679/* Read intraprocedural analysis information about NODE and all of its outgoing
ff6686d2
MJ
2680 edges into a stream for LTO WPA. */
2681
2682static void
2683isra_read_node_info (struct lto_input_block *ib, cgraph_node *node,
2684 struct data_in *data_in)
2685{
2686 isra_func_summary *ifs = func_sums->get_create (node);
2687 unsigned param_desc_count = streamer_read_uhwi (ib);
2688 if (param_desc_count > 0)
2689 {
2690 vec_safe_reserve_exact (ifs->m_parameters, param_desc_count);
2691 ifs->m_parameters->quick_grow_cleared (param_desc_count);
2692 }
2693 for (unsigned i = 0; i < param_desc_count; i++)
2694 {
2695 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2696 unsigned access_count = streamer_read_uhwi (ib);
2697 for (unsigned j = 0; j < access_count; j++)
2698 {
2699 param_access *acc = ggc_cleared_alloc<param_access> ();
2700 acc->type = stream_read_tree (ib, data_in);
2701 acc->alias_ptr_type = stream_read_tree (ib, data_in);
2702 acc->unit_offset = streamer_read_uhwi (ib);
2703 acc->unit_size = streamer_read_uhwi (ib);
2704 bitpack_d bp = streamer_read_bitpack (ib);
2705 acc->certain = bp_unpack_value (&bp, 1);
2706 vec_safe_push (desc->accesses, acc);
2707 }
2708 desc->param_size_limit = streamer_read_uhwi (ib);
2709 desc->size_reached = streamer_read_uhwi (ib);
2710 bitpack_d bp = streamer_read_bitpack (ib);
2711 desc->locally_unused = bp_unpack_value (&bp, 1);
2712 desc->split_candidate = bp_unpack_value (&bp, 1);
2713 desc->by_ref = bp_unpack_value (&bp, 1);
2714 }
2715 bitpack_d bp = streamer_read_bitpack (ib);
2716 ifs->m_candidate = bp_unpack_value (&bp, 1);
2717 ifs->m_returns_value = bp_unpack_value (&bp, 1);
2718 ifs->m_return_ignored = bp_unpack_value (&bp, 1);
2719 ifs->m_queued = 0;
2720
2721 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2722 isra_read_edge_summary (ib, e);
2723 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2724 isra_read_edge_summary (ib, e);
2725}
2726
2727/* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
2728 data DATA. TODO: This function was copied almost verbatim from ipa-prop.c,
2729 it should be possible to unify them somehow. */
2730
2731static void
2732isra_read_summary_section (struct lto_file_decl_data *file_data,
2733 const char *data, size_t len)
2734{
2735 const struct lto_function_header *header =
2736 (const struct lto_function_header *) data;
2737 const int cfg_offset = sizeof (struct lto_function_header);
2738 const int main_offset = cfg_offset + header->cfg_size;
2739 const int string_offset = main_offset + header->main_size;
2740 struct data_in *data_in;
2741 unsigned int i;
2742 unsigned int count;
2743
2744 lto_input_block ib_main ((const char *) data + main_offset,
2745 header->main_size, file_data->mode_table);
2746
2747 data_in =
2748 lto_data_in_create (file_data, (const char *) data + string_offset,
2749 header->string_size, vNULL);
2750 count = streamer_read_uhwi (&ib_main);
2751
2752 for (i = 0; i < count; i++)
2753 {
2754 unsigned int index;
2755 struct cgraph_node *node;
2756 lto_symtab_encoder_t encoder;
2757
2758 index = streamer_read_uhwi (&ib_main);
2759 encoder = file_data->symtab_node_encoder;
2760 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
2761 index));
2762 gcc_assert (node->definition);
2763 isra_read_node_info (&ib_main, node, data_in);
2764 }
2765 lto_free_section_data (file_data, LTO_section_ipa_sra, NULL, data,
2766 len);
2767 lto_data_in_delete (data_in);
2768}
2769
dfea3d6f 2770/* Read intraprocedural analysis information into a stream for LTO WPA. */
ff6686d2
MJ
2771
2772static void
2773ipa_sra_read_summary (void)
2774{
2775 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
2776 struct lto_file_decl_data *file_data;
2777 unsigned int j = 0;
2778
2779 gcc_checking_assert (!func_sums);
2780 gcc_checking_assert (!call_sums);
2781 func_sums
78cd68c0 2782 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
ff6686d2
MJ
2783 ipa_sra_function_summaries (symtab, true));
2784 call_sums = new ipa_sra_call_summaries (symtab);
2785
2786 while ((file_data = file_data_vec[j++]))
2787 {
2788 size_t len;
3c56d8d8
ML
2789 const char *data
2790 = lto_get_summary_section_data (file_data, LTO_section_ipa_sra, &len);
ff6686d2
MJ
2791 if (data)
2792 isra_read_summary_section (file_data, data, len);
2793 }
2794}
2795
2796/* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. */
2797
2798static void
2799ipa_sra_dump_all_summaries (FILE *f)
2800{
2801 cgraph_node *node;
2802 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2803 {
2804 fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
2805
2806 isra_func_summary *ifs = func_sums->get (node);
2807 if (!ifs)
717d278a
MJ
2808 fprintf (f, " Function does not have any associated IPA-SRA "
2809 "summary\n");
2810 else
ff6686d2 2811 {
717d278a
MJ
2812 if (!ifs->m_candidate)
2813 {
2814 fprintf (f, " Not a candidate function\n");
2815 continue;
2816 }
2817 if (ifs->m_returns_value)
2818 fprintf (f, " Returns value\n");
2819 if (vec_safe_is_empty (ifs->m_parameters))
2820 fprintf (f, " No parameter information. \n");
2821 else
2822 for (unsigned i = 0; i < ifs->m_parameters->length (); ++i)
2823 {
2824 fprintf (f, " Descriptor for parameter %i:\n", i);
2825 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
2826 }
2827 fprintf (f, "\n");
ff6686d2 2828 }
ff6686d2
MJ
2829
2830 struct cgraph_edge *cs;
2831 for (cs = node->callees; cs; cs = cs->next_callee)
2832 {
2833 fprintf (f, " Summary for edge %s->%s:\n", cs->caller->dump_name (),
2834 cs->callee->dump_name ());
2835 isra_call_summary *csum = call_sums->get (cs);
2836 if (csum)
2837 csum->dump (f);
2838 else
2839 fprintf (f, " Call summary is MISSING!\n");
2840 }
2841
2842 }
2843 fprintf (f, "\n\n");
2844}
2845
2846/* Perform function-scope viability tests that can be only made at IPA level
2847 and return false if the function is deemed unsuitable for IPA-SRA. */
2848
2849static bool
2850ipa_sra_ipa_function_checks (cgraph_node *node)
2851{
2852 if (!node->can_be_local_p ())
2853 {
2854 if (dump_file)
2855 fprintf (dump_file, "Function %s disqualified because it cannot be "
2856 "made local.\n", node->dump_name ());
2857 return false;
2858 }
87f94429 2859 if (!node->can_change_signature)
ff6686d2
MJ
2860 {
2861 if (dump_file)
2862 fprintf (dump_file, "Function can not change signature.\n");
2863 return false;
2864 }
2865
2866 return true;
2867}
2868
2869/* Issues found out by check_callers_for_issues. */
2870
2871struct caller_issues
2872{
b90061c6
MJ
2873 /* The candidate being considered. */
2874 cgraph_node *candidate;
ff6686d2
MJ
2875 /* There is a thunk among callers. */
2876 bool thunk;
2877 /* Call site with no available information. */
2878 bool unknown_callsite;
b90061c6
MJ
2879 /* Call from outside the the candidate's comdat group. */
2880 bool call_from_outside_comdat;
ff6686d2
MJ
2881 /* There is a bit-aligned load into one of non-gimple-typed arguments. */
2882 bool bit_aligned_aggregate_argument;
2883};
2884
2885/* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
2886 that apply. */
2887
2888static bool
2889check_for_caller_issues (struct cgraph_node *node, void *data)
2890{
2891 struct caller_issues *issues = (struct caller_issues *) data;
2892
2893 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
2894 {
67f3791f 2895 if (cs->caller->thunk)
ff6686d2
MJ
2896 {
2897 issues->thunk = true;
2898 /* TODO: We should be able to process at least some types of
2899 thunks. */
2900 return true;
2901 }
b90061c6
MJ
2902 if (issues->candidate->calls_comdat_local
2903 && issues->candidate->same_comdat_group
2904 && !issues->candidate->in_same_comdat_group_p (cs->caller))
2905 {
2906 issues->call_from_outside_comdat = true;
2907 return true;
2908 }
ff6686d2
MJ
2909
2910 isra_call_summary *csum = call_sums->get (cs);
2911 if (!csum)
2912 {
2913 issues->unknown_callsite = true;
2914 return true;
2915 }
2916
2917 if (csum->m_bit_aligned_arg)
2918 issues->bit_aligned_aggregate_argument = true;
2919 }
2920 return false;
2921}
2922
2923/* Look at all incoming edges to NODE, including aliases and thunks and look
2924 for problems. Return true if NODE type should not be modified at all. */
2925
2926static bool
2927check_all_callers_for_issues (cgraph_node *node)
2928{
2929 struct caller_issues issues;
2930 memset (&issues, 0, sizeof (issues));
b90061c6 2931 issues.candidate = node;
ff6686d2
MJ
2932
2933 node->call_for_symbol_and_aliases (check_for_caller_issues, &issues, true);
2934 if (issues.unknown_callsite)
2935 {
2936 if (dump_file && (dump_flags & TDF_DETAILS))
2937 fprintf (dump_file, "A call of %s has not been analyzed. Disabling "
2938 "all modifications.\n", node->dump_name ());
2939 return true;
2940 }
2941 /* TODO: We should be able to process at least some types of thunks. */
2942 if (issues.thunk)
2943 {
2944 if (dump_file && (dump_flags & TDF_DETAILS))
2945 fprintf (dump_file, "A call of %s is through thunk, which are not"
2946 " handled yet. Disabling all modifications.\n",
2947 node->dump_name ());
2948 return true;
2949 }
b90061c6
MJ
2950 if (issues.call_from_outside_comdat)
2951 {
2952 if (dump_file)
2953 fprintf (dump_file, "Function would become private comdat called "
2954 "outside of its comdat group.\n");
2955 return true;
2956 }
ff6686d2
MJ
2957
2958 if (issues.bit_aligned_aggregate_argument)
2959 {
2960 /* Let's only remove parameters/return values from such functions.
2961 TODO: We could only prevent splitting the problematic parameters if
2962 anybody thinks it is worth it. */
2963 if (dump_file && (dump_flags & TDF_DETAILS))
dfea3d6f 2964 fprintf (dump_file, "A call of %s has bit-aligned aggregate argument,"
ff6686d2
MJ
2965 " disabling parameter splitting.\n", node->dump_name ());
2966
2967 isra_func_summary *ifs = func_sums->get (node);
2968 gcc_checking_assert (ifs);
2969 unsigned param_count = vec_safe_length (ifs->m_parameters);
2970 for (unsigned i = 0; i < param_count; i++)
2971 (*ifs->m_parameters)[i].split_candidate = false;
2972 }
2973 return false;
2974}
2975
2976/* Find the access with corresponding OFFSET and SIZE among accesses in
2977 PARAM_DESC and return it or NULL if such an access is not there. */
2978
2979static param_access *
2980find_param_access (isra_param_desc *param_desc, unsigned offset, unsigned size)
2981{
2982 unsigned pclen = vec_safe_length (param_desc->accesses);
2983
2984 /* The search is linear but the number of stored accesses is bound by
2985 PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */
2986
2987 for (unsigned i = 0; i < pclen; i++)
2988 if ((*param_desc->accesses)[i]->unit_offset == offset
2989 && (*param_desc->accesses)[i]->unit_size == size)
2990 return (*param_desc->accesses)[i];
2991
2992 return NULL;
2993}
2994
2995/* Return iff the total size of definite replacement SIZE would violate the
2996 limit set for it in PARAM. */
2997
2998static bool
2999size_would_violate_limit_p (isra_param_desc *desc, unsigned size)
3000{
3001 unsigned limit = desc->param_size_limit;
3002 if (size > limit
3003 || (!desc->by_ref && size == limit))
3004 return true;
3005 return false;
3006}
3007
3008/* Increase reached size of DESC by SIZE or disqualify it if it would violate
3009 the set limit. IDX is the parameter number which is dumped when
3010 disqualifying. */
3011
3012static void
3013bump_reached_size (isra_param_desc *desc, unsigned size, unsigned idx)
3014{
3015 unsigned after = desc->size_reached + size;
3016 if (size_would_violate_limit_p (desc, after))
3017 {
3018 if (dump_file && (dump_flags & TDF_DETAILS))
3019 fprintf (dump_file, " ...size limit reached, disqualifying "
3020 "candidate parameter %u\n", idx);
3021 desc->split_candidate = false;
3022 return;
3023 }
3024 desc->size_reached = after;
3025}
3026
3027/* Take all actions required to deal with an edge CS that represents a call to
3028 an unknown or un-analyzed function, for both parameter removal and
3029 splitting. */
3030
3031static void
3032process_edge_to_unknown_caller (cgraph_edge *cs)
3033{
3034 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3035 gcc_checking_assert (from_ifs);
3036 isra_call_summary *csum = call_sums->get (cs);
3037
3038 if (dump_file && (dump_flags & TDF_DETAILS))
3039 fprintf (dump_file, "Processing an edge to an unknown caller from %s:\n",
3040 cs->caller->dump_name ());
3041
3042 unsigned args_count = csum->m_arg_flow.length ();
3043 for (unsigned i = 0; i < args_count; i++)
3044 {
3045 isra_param_flow *ipf = &csum->m_arg_flow[i];
3046
3047 if (ipf->pointer_pass_through)
3048 {
3049 isra_param_desc *param_desc
3050 = &(*from_ifs->m_parameters)[get_single_param_flow_source (ipf)];
3051 param_desc->locally_unused = false;
3052 param_desc->split_candidate = false;
3053 continue;
3054 }
3055 if (ipf->aggregate_pass_through)
3056 {
3057 unsigned idx = get_single_param_flow_source (ipf);
3058 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3059
3060 param_desc->locally_unused = false;
3061 if (!param_desc->split_candidate)
3062 continue;
3063 gcc_assert (!param_desc->by_ref);
3064 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3065 ipf->unit_size);
3066 gcc_checking_assert (pacc);
3067 pacc->certain = true;
3068 if (overlapping_certain_accesses_p (param_desc, NULL))
3069 {
3070 if (dump_file && (dump_flags & TDF_DETAILS))
3071 fprintf (dump_file, " ...leading to overlap, "
3072 " disqualifying candidate parameter %u\n",
3073 idx);
3074 param_desc->split_candidate = false;
3075 }
3076 else
3077 bump_reached_size (param_desc, pacc->unit_size, idx);
3078 ipf->aggregate_pass_through = false;
3079 continue;
3080 }
3081
3082 for (int j = 0; j < ipf->length; j++)
3083 {
3084 int input_idx = ipf->inputs[j];
3085 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3086 }
3087 }
3088}
3089
3090/* Propagate parameter removal information through cross-SCC edge CS,
3091 i.e. decrease the use count in the caller parameter descriptor for each use
3092 in this call. */
3093
3094static void
3095param_removal_cross_scc_edge (cgraph_edge *cs)
3096{
3097 enum availability availability;
3098 cgraph_node *callee = cs->callee->function_symbol (&availability);
3099 isra_func_summary *to_ifs = func_sums->get (callee);
3100 if (!to_ifs || !to_ifs->m_candidate
3101 || (availability < AVAIL_AVAILABLE)
3102 || vec_safe_is_empty (to_ifs->m_parameters))
3103 {
3104 process_edge_to_unknown_caller (cs);
3105 return;
3106 }
3107 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3108 gcc_checking_assert (from_ifs);
3109
3110 isra_call_summary *csum = call_sums->get (cs);
3111 unsigned args_count = csum->m_arg_flow.length ();
3112 unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3113
3114 for (unsigned i = 0; i < args_count; i++)
3115 {
3116 bool unused_in_callee;
3117 if (i < param_count)
3118 unused_in_callee = (*to_ifs->m_parameters)[i].locally_unused;
3119 else
3120 unused_in_callee = false;
3121
3122 if (!unused_in_callee)
3123 {
3124 isra_param_flow *ipf = &csum->m_arg_flow[i];
3125 for (int j = 0; j < ipf->length; j++)
3126 {
3127 int input_idx = ipf->inputs[j];
3128 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3129 }
3130 }
3131 }
3132}
3133
3134/* Unless it is already there, push NODE which is also described by IFS to
3135 STACK. */
3136
3137static void
3138isra_push_node_to_stack (cgraph_node *node, isra_func_summary *ifs,
3139 vec<cgraph_node *> *stack)
3140{
3141 if (!ifs->m_queued)
3142 {
3143 ifs->m_queued = true;
3144 stack->safe_push (node);
3145 }
3146}
3147
3148/* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3149 used and push CALLER on STACK. */
3150
3151static void
3152isra_mark_caller_param_used (isra_func_summary *from_ifs, int input_idx,
3153 cgraph_node *caller, vec<cgraph_node *> *stack)
3154{
3155 if ((*from_ifs->m_parameters)[input_idx].locally_unused)
3156 {
3157 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3158 isra_push_node_to_stack (caller, from_ifs, stack);
3159 }
3160}
3161
3162
3163/* Propagate information that any parameter is not used only locally within a
6b6a8065 3164 SCC across CS to the caller, which must be in the same SCC as the
ff6686d2
MJ
3165 callee. Push any callers that need to be re-processed to STACK. */
3166
3167static void
3168propagate_used_across_scc_edge (cgraph_edge *cs, vec<cgraph_node *> *stack)
3169{
3170 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3171 if (!from_ifs || vec_safe_is_empty (from_ifs->m_parameters))
3172 return;
3173
3174 isra_call_summary *csum = call_sums->get (cs);
3175 gcc_checking_assert (csum);
3176 unsigned args_count = csum->m_arg_flow.length ();
3177 enum availability availability;
3178 cgraph_node *callee = cs->callee->function_symbol (&availability);
3179 isra_func_summary *to_ifs = func_sums->get (callee);
3180
3181 unsigned param_count
3182 = (to_ifs && (availability >= AVAIL_AVAILABLE))
3183 ? vec_safe_length (to_ifs->m_parameters) : 0;
3184 for (unsigned i = 0; i < args_count; i++)
3185 {
3186 if (i < param_count
3187 && (*to_ifs->m_parameters)[i].locally_unused)
3188 continue;
3189
3190 /* The argument is needed in the callee it, we must mark the parameter as
3191 used also in the caller and its callers within this SCC. */
3192 isra_param_flow *ipf = &csum->m_arg_flow[i];
3193 for (int j = 0; j < ipf->length; j++)
3194 {
3195 int input_idx = ipf->inputs[j];
3196 isra_mark_caller_param_used (from_ifs, input_idx, cs->caller, stack);
3197 }
3198 }
3199}
3200
3201/* Propagate information that any parameter is not used only locally within a
3202 SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
3203 same SCC. Push any callers that need to be re-processed to STACK. */
3204
3205static bool
3206propagate_used_to_scc_callers (cgraph_node *node, void *data)
3207{
3208 vec<cgraph_node *> *stack = (vec<cgraph_node *> *) data;
3209 cgraph_edge *cs;
3210 for (cs = node->callers; cs; cs = cs->next_caller)
3211 if (ipa_edge_within_scc (cs))
3212 propagate_used_across_scc_edge (cs, stack);
3213 return false;
3214}
3215
3216/* Return true iff all certain accesses in ARG_DESC are also present as
3217 certain accesses in PARAM_DESC. */
3218
3219static bool
3220all_callee_accesses_present_p (isra_param_desc *param_desc,
3221 isra_param_desc *arg_desc)
3222{
3223 unsigned aclen = vec_safe_length (arg_desc->accesses);
3224 for (unsigned j = 0; j < aclen; j++)
3225 {
3226 param_access *argacc = (*arg_desc->accesses)[j];
3227 if (!argacc->certain)
3228 continue;
3229 param_access *pacc = find_param_access (param_desc, argacc->unit_offset,
3230 argacc->unit_size);
b9a15a83
MJ
3231 if (!pacc
3232 || !pacc->certain
3233 || !types_compatible_p (argacc->type, pacc->type))
ff6686d2
MJ
3234 return false;
3235 }
3236 return true;
3237}
3238
3239/* Type internal to function pull_accesses_from_callee. Unfortunately gcc 4.8
3240 does not allow instantiating an auto_vec with a type defined within a
3241 function so it is a global type. */
3242enum acc_prop_kind {ACC_PROP_DONT, ACC_PROP_COPY, ACC_PROP_CERTAIN};
3243
3244
1a315435
MJ
3245/* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3246 (which belongs to CALLER) if they would not violate some constraint there.
3247 If successful, return NULL, otherwise return the string reason for failure
3248 (which can be written to the dump file). DELTA_OFFSET is the known offset
3249 of the actual argument withing the formal parameter (so of ARG_DESCS within
3250 PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3251 known. In case of success, set *CHANGE_P to true if propagation actually
3252 changed anything. */
ff6686d2
MJ
3253
3254static const char *
1a315435 3255pull_accesses_from_callee (cgraph_node *caller, isra_param_desc *param_desc,
ff6686d2
MJ
3256 isra_param_desc *arg_desc,
3257 unsigned delta_offset, unsigned arg_size,
3258 bool *change_p)
3259{
3260 unsigned pclen = vec_safe_length (param_desc->accesses);
3261 unsigned aclen = vec_safe_length (arg_desc->accesses);
3262 unsigned prop_count = 0;
3263 unsigned prop_size = 0;
3264 bool change = false;
3265
3266 auto_vec <enum acc_prop_kind, 8> prop_kinds (aclen);
3267 for (unsigned j = 0; j < aclen; j++)
3268 {
3269 param_access *argacc = (*arg_desc->accesses)[j];
3270 prop_kinds.safe_push (ACC_PROP_DONT);
3271
3272 if (arg_size > 0
3273 && argacc->unit_offset + argacc->unit_size > arg_size)
3274 return "callee access outsize size boundary";
3275
3276 if (!argacc->certain)
3277 continue;
3278
3279 unsigned offset = argacc->unit_offset + delta_offset;
3280 /* Given that accesses are initially stored according to increasing
3281 offset and decreasing size in case of equal offsets, the following
3282 searches could be written more efficiently if we kept the ordering
3283 when copying. But the number of accesses is capped at
3284 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3285 messy quickly, so let's improve on that only if necessary. */
3286
3287 bool exact_match = false;
3288 for (unsigned i = 0; i < pclen; i++)
3289 {
3290 /* Check for overlaps. */
3291 param_access *pacc = (*param_desc->accesses)[i];
3292 if (pacc->unit_offset == offset
3293 && pacc->unit_size == argacc->unit_size)
3294 {
3295 if (argacc->alias_ptr_type != pacc->alias_ptr_type
3296 || !types_compatible_p (argacc->type, pacc->type))
3297 return "propagated access types would not match existing ones";
3298
3299 exact_match = true;
3300 if (!pacc->certain)
3301 {
3302 prop_kinds[j] = ACC_PROP_CERTAIN;
3303 prop_size += argacc->unit_size;
3304 change = true;
3305 }
3306 continue;
3307 }
3308
3309 if (offset < pacc->unit_offset + pacc->unit_size
3310 && offset + argacc->unit_size > pacc->unit_offset)
3311 {
3312 /* None permissible with load accesses, possible to fit into
3313 argument ones. */
3314 if (pacc->certain
3315 || offset < pacc->unit_offset
3316 || (offset + argacc->unit_size
3317 > pacc->unit_offset + pacc->unit_size))
3318 return "a propagated access would conflict in caller";
3319 }
3320 }
3321
3322 if (!exact_match)
3323 {
3324 prop_kinds[j] = ACC_PROP_COPY;
3325 prop_count++;
3326 prop_size += argacc->unit_size;
3327 change = true;
3328 }
3329 }
3330
3331 if (!change)
3332 return NULL;
3333
3334 if ((prop_count + pclen
1a315435 3335 > (unsigned) opt_for_fn (caller->decl, param_ipa_sra_max_replacements))
ff6686d2
MJ
3336 || size_would_violate_limit_p (param_desc,
3337 param_desc->size_reached + prop_size))
3338 return "propagating accesses would violate the count or size limit";
3339
3340 *change_p = true;
3341 for (unsigned j = 0; j < aclen; j++)
3342 {
3343 if (prop_kinds[j] == ACC_PROP_COPY)
3344 {
3345 param_access *argacc = (*arg_desc->accesses)[j];
3346
3347 param_access *copy = ggc_cleared_alloc<param_access> ();
3348 copy->unit_offset = argacc->unit_offset + delta_offset;
3349 copy->unit_size = argacc->unit_size;
3350 copy->type = argacc->type;
3351 copy->alias_ptr_type = argacc->alias_ptr_type;
3352 copy->certain = true;
3353 vec_safe_push (param_desc->accesses, copy);
3354 }
3355 else if (prop_kinds[j] == ACC_PROP_CERTAIN)
3356 {
3357 param_access *argacc = (*arg_desc->accesses)[j];
3358 param_access *csp
3359 = find_param_access (param_desc, argacc->unit_offset + delta_offset,
3360 argacc->unit_size);
3361 csp->certain = true;
3362 }
3363 }
3364
3365 param_desc->size_reached += prop_size;
3366
3367 return NULL;
3368}
3369
3370/* Propagate parameter splitting information through call graph edge CS.
3371 Return true if any changes that might need to be propagated within SCCs have
3372 been made. The function also clears the aggregate_pass_through and
dfea3d6f 3373 pointer_pass_through in call summaries which do not need to be processed
ff6686d2
MJ
3374 again if this CS is revisited when iterating while changes are propagated
3375 within an SCC. */
3376
3377static bool
3378param_splitting_across_edge (cgraph_edge *cs)
3379{
3380 bool res = false;
3381 bool cross_scc = !ipa_edge_within_scc (cs);
3382 enum availability availability;
3383 cgraph_node *callee = cs->callee->function_symbol (&availability);
3384 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3385 gcc_checking_assert (from_ifs && from_ifs->m_parameters);
3386
3387 isra_call_summary *csum = call_sums->get (cs);
3388 gcc_checking_assert (csum);
3389 unsigned args_count = csum->m_arg_flow.length ();
3390 isra_func_summary *to_ifs = func_sums->get (callee);
3391 unsigned param_count
3392 = ((to_ifs && to_ifs->m_candidate && (availability >= AVAIL_AVAILABLE))
3393 ? vec_safe_length (to_ifs->m_parameters)
3394 : 0);
3395
3396 if (dump_file && (dump_flags & TDF_DETAILS))
6b6a8065 3397 fprintf (dump_file, "Splitting across %s->%s:\n",
ff6686d2
MJ
3398 cs->caller->dump_name (), callee->dump_name ());
3399
3400 unsigned i;
3401 for (i = 0; (i < args_count) && (i < param_count); i++)
3402 {
3403 isra_param_desc *arg_desc = &(*to_ifs->m_parameters)[i];
3404 isra_param_flow *ipf = &csum->m_arg_flow[i];
3405
3406 if (arg_desc->locally_unused)
3407 {
3408 if (dump_file && (dump_flags & TDF_DETAILS))
3409 fprintf (dump_file, " ->%u: unused in callee\n", i);
3410 ipf->pointer_pass_through = false;
3411 continue;
3412 }
3413
3414 if (ipf->pointer_pass_through)
3415 {
3416 int idx = get_single_param_flow_source (ipf);
3417 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3418 if (!param_desc->split_candidate)
3419 continue;
3420 gcc_assert (param_desc->by_ref);
3421
3422 if (!arg_desc->split_candidate || !arg_desc->by_ref)
3423 {
3424 if (dump_file && (dump_flags & TDF_DETAILS))
3425 fprintf (dump_file, " %u->%u: not candidate or not by "
3426 "reference in callee\n", idx, i);
3427 param_desc->split_candidate = false;
3428 ipf->pointer_pass_through = false;
3429 res = true;
3430 }
3431 else if (!ipf->safe_to_import_accesses)
3432 {
763121cc
MJ
3433 if (!csum->m_before_any_store
3434 || !all_callee_accesses_present_p (param_desc, arg_desc))
ff6686d2
MJ
3435 {
3436 if (dump_file && (dump_flags & TDF_DETAILS))
3437 fprintf (dump_file, " %u->%u: cannot import accesses.\n",
3438 idx, i);
3439 param_desc->split_candidate = false;
3440 ipf->pointer_pass_through = false;
3441 res = true;
3442
3443 }
3444 else
3445 {
3446 if (dump_file && (dump_flags & TDF_DETAILS))
3447 fprintf (dump_file, " %u->%u: verified callee accesses "
3448 "present.\n", idx, i);
3449 if (cross_scc)
3450 ipf->pointer_pass_through = false;
3451 }
3452 }
3453 else
3454 {
3455 const char *pull_failure
1a315435
MJ
3456 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3457 0, 0, &res);
ff6686d2
MJ
3458 if (pull_failure)
3459 {
3460 if (dump_file && (dump_flags & TDF_DETAILS))
3461 fprintf (dump_file, " %u->%u: by_ref access pull "
3462 "failed: %s.\n", idx, i, pull_failure);
3463 param_desc->split_candidate = false;
3464 ipf->pointer_pass_through = false;
3465 res = true;
3466 }
3467 else
3468 {
3469 if (dump_file && (dump_flags & TDF_DETAILS))
3470 fprintf (dump_file, " %u->%u: by_ref access pull "
3471 "succeeded.\n", idx, i);
3472 if (cross_scc)
3473 ipf->pointer_pass_through = false;
3474 }
3475 }
3476 }
3477 else if (ipf->aggregate_pass_through)
3478 {
3479 int idx = get_single_param_flow_source (ipf);
3480 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3481 if (!param_desc->split_candidate)
3482 continue;
3483 gcc_assert (!param_desc->by_ref);
3484 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3485 ipf->unit_size);
3486 gcc_checking_assert (pacc);
3487
3488 if (pacc->certain)
3489 {
3490 if (dump_file && (dump_flags & TDF_DETAILS))
3491 fprintf (dump_file, " %u->%u: already certain\n", idx, i);
3492 ipf->aggregate_pass_through = false;
3493 }
3494 else if (!arg_desc->split_candidate || arg_desc->by_ref)
3495 {
3496 if (dump_file && (dump_flags & TDF_DETAILS))
3497 fprintf (dump_file, " %u->%u: not candidate or by "
3498 "reference in callee\n", idx, i);
3499
3500 pacc->certain = true;
3501 if (overlapping_certain_accesses_p (param_desc, NULL))
3502 {
3503 if (dump_file && (dump_flags & TDF_DETAILS))
3504 fprintf (dump_file, " ...leading to overlap, "
3505 " disqualifying candidate parameter %u\n",
3506 idx);
3507 param_desc->split_candidate = false;
3508 }
3509 else
3510 bump_reached_size (param_desc, pacc->unit_size, idx);
3511
3512 ipf->aggregate_pass_through = false;
3513 res = true;
3514 }
3515 else
3516 {
3517 const char *pull_failure
1a315435 3518 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
ff6686d2
MJ
3519 ipf->unit_offset,
3520 ipf->unit_size, &res);
3521 if (pull_failure)
3522 {
3523 if (dump_file && (dump_flags & TDF_DETAILS))
3524 fprintf (dump_file, " %u->%u: arg access pull "
3525 "failed: %s.\n", idx, i, pull_failure);
3526
3527 ipf->aggregate_pass_through = false;
3528 pacc->certain = true;
3529
3530 if (overlapping_certain_accesses_p (param_desc, NULL))
3531 {
3532 if (dump_file && (dump_flags & TDF_DETAILS))
3533 fprintf (dump_file, " ...leading to overlap, "
3534 " disqualifying candidate parameter %u\n",
3535 idx);
3536 param_desc->split_candidate = false;
3537 }
3538 else
3539 bump_reached_size (param_desc, pacc->unit_size, idx);
3540
3541 res = true;
3542 }
3543 else
3544 {
3545 if (dump_file && (dump_flags & TDF_DETAILS))
3546 fprintf (dump_file, " %u->%u: arg access pull "
3547 "succeeded.\n", idx, i);
3548 if (cross_scc)
3549 ipf->aggregate_pass_through = false;
3550 }
3551 }
3552 }
3553 }
3554
3555 /* Handle argument-parameter count mismatches. */
3556 for (; (i < args_count); i++)
3557 {
3558 isra_param_flow *ipf = &csum->m_arg_flow[i];
3559
3560 if (ipf->pointer_pass_through || ipf->aggregate_pass_through)
3561 {
3562 int idx = get_single_param_flow_source (ipf);
3563 ipf->pointer_pass_through = false;
3564 ipf->aggregate_pass_through = false;
3565 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3566 if (!param_desc->split_candidate)
3567 continue;
3568
3569 if (dump_file && (dump_flags & TDF_DETAILS))
3570 fprintf (dump_file, " %u->%u: no corresponding formal parameter\n",
3571 idx, i);
3572 param_desc->split_candidate = false;
3573 res = true;
3574 }
3575 }
3576 return res;
3577}
3578
3579/* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3580 callers ignore the return value, or come from the same SCC and use the
3581 return value only to compute their return value, return false, otherwise
3582 return true. */
3583
3584static bool
3585retval_used_p (cgraph_node *node, void *)
3586{
3587 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3588 {
3589 isra_call_summary *csum = call_sums->get (cs);
3590 gcc_checking_assert (csum);
3591 if (csum->m_return_ignored)
3592 continue;
3593 if (!csum->m_return_returned)
3594 return true;
3595
3596 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3597 if (!from_ifs || !from_ifs->m_candidate)
3598 return true;
3599
3600 if (!ipa_edge_within_scc (cs)
3601 && !from_ifs->m_return_ignored)
3602 return true;
3603 }
3604
3605 return false;
3606}
3607
3608/* Push into NEW_PARAMS all required parameter adjustment entries to copy or
3609 modify parameter which originally had index BASE_INDEX, in the adjustment
3610 vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
3611 PREV_ADJUSTMENT. If the parent clone is the original function,
3612 PREV_ADJUSTMENT is NULL and PREV_CLONE_INDEX is equal to BASE_INDEX. */
3613
3614
3615static void
3616push_param_adjustments_for_index (isra_func_summary *ifs, unsigned base_index,
3617 unsigned prev_clone_index,
3618 ipa_adjusted_param *prev_adjustment,
3619 vec<ipa_adjusted_param, va_gc> **new_params)
3620{
3621 isra_param_desc *desc = &(*ifs->m_parameters)[base_index];
3622 if (desc->locally_unused)
3623 {
3624 if (dump_file)
3625 fprintf (dump_file, " Will remove parameter %u\n", base_index);
3626 return;
3627 }
3628
3629 if (!desc->split_candidate)
3630 {
3631 ipa_adjusted_param adj;
3632 if (prev_adjustment)
3633 {
3634 adj = *prev_adjustment;
3635 adj.prev_clone_adjustment = true;
3636 adj.prev_clone_index = prev_clone_index;
3637 }
3638 else
3639 {
3640 memset (&adj, 0, sizeof (adj));
3641 adj.op = IPA_PARAM_OP_COPY;
3642 adj.base_index = base_index;
3643 adj.prev_clone_index = prev_clone_index;
3644 }
3645 vec_safe_push ((*new_params), adj);
3646 return;
3647 }
3648
3649 if (dump_file)
3650 fprintf (dump_file, " Will split parameter %u\n", base_index);
3651
3652 gcc_assert (!prev_adjustment || prev_adjustment->op == IPA_PARAM_OP_COPY);
3653 unsigned aclen = vec_safe_length (desc->accesses);
3654 for (unsigned j = 0; j < aclen; j++)
3655 {
3656 param_access *pa = (*desc->accesses)[j];
3657 if (!pa->certain)
3658 continue;
3659 if (dump_file)
3660 fprintf (dump_file, " - component at byte offset %u, "
3661 "size %u\n", pa->unit_offset, pa->unit_size);
3662
3663 ipa_adjusted_param adj;
3664 memset (&adj, 0, sizeof (adj));
3665 adj.op = IPA_PARAM_OP_SPLIT;
3666 adj.base_index = base_index;
3667 adj.prev_clone_index = prev_clone_index;
3668 adj.param_prefix_index = IPA_PARAM_PREFIX_ISRA;
3669 adj.reverse = pa->reverse;
3670 adj.type = pa->type;
3671 adj.alias_ptr_type = pa->alias_ptr_type;
3672 adj.unit_offset = pa->unit_offset;
3673 vec_safe_push ((*new_params), adj);
3674 }
3675}
3676
b90061c6
MJ
3677/* Worker for all call_for_symbol_thunks_and_aliases. Set calls_comdat_local
3678 flag of all callers of NODE. */
3679
3680static bool
3681mark_callers_calls_comdat_local (struct cgraph_node *node, void *)
3682{
3683 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3684 cs->caller->calls_comdat_local = true;
3685 return false;
3686}
3687
ff6686d2 3688
dfea3d6f 3689/* Do final processing of results of IPA propagation regarding NODE, clone it
ff6686d2
MJ
3690 if appropriate. */
3691
3692static void
3693process_isra_node_results (cgraph_node *node,
3694 hash_map<const char *, unsigned> *clone_num_suffixes)
3695{
3696 isra_func_summary *ifs = func_sums->get (node);
3697 if (!ifs || !ifs->m_candidate)
3698 return;
3699
3700 auto_vec<bool, 16> surviving_params;
3701 bool check_surviving = false;
ae7a23a3
JH
3702 clone_info *cinfo = clone_info::get (node);
3703 if (cinfo && cinfo->param_adjustments)
ff6686d2
MJ
3704 {
3705 check_surviving = true;
ae7a23a3 3706 cinfo->param_adjustments->get_surviving_params (&surviving_params);
ff6686d2
MJ
3707 }
3708
3709 unsigned param_count = vec_safe_length (ifs->m_parameters);
3710 bool will_change_function = false;
3711 if (ifs->m_returns_value && ifs->m_return_ignored)
3712 will_change_function = true;
3713 else
3714 for (unsigned i = 0; i < param_count; i++)
3715 {
3716 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3717 if ((desc->locally_unused || desc->split_candidate)
3718 /* Make sure we do not clone just to attempt to remove an already
3719 removed unused argument. */
3720 && (!check_surviving
3721 || (i < surviving_params.length ()
3722 && surviving_params[i])))
3723 {
3724 will_change_function = true;
3725 break;
3726 }
3727 }
3728 if (!will_change_function)
3729 return;
3730
3731 if (dump_file)
3732 {
3733 fprintf (dump_file, "\nEvaluating analysis results for %s\n",
3734 node->dump_name ());
3735 if (ifs->m_returns_value && ifs->m_return_ignored)
3736 fprintf (dump_file, " Will remove return value.\n");
3737 }
3738
3739 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
ae7a23a3
JH
3740 if (ipa_param_adjustments *old_adjustments
3741 = cinfo ? cinfo->param_adjustments : NULL)
ff6686d2
MJ
3742 {
3743 unsigned old_adj_len = vec_safe_length (old_adjustments->m_adj_params);
3744 for (unsigned i = 0; i < old_adj_len; i++)
3745 {
3746 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
3747 push_param_adjustments_for_index (ifs, old_adj->base_index, i,
3748 old_adj, &new_params);
3749 }
3750 }
3751 else
3752 for (unsigned i = 0; i < param_count; i++)
3753 push_param_adjustments_for_index (ifs, i, i, NULL, &new_params);
3754
3755 ipa_param_adjustments *new_adjustments
3756 = (new (ggc_alloc <ipa_param_adjustments> ())
3757 ipa_param_adjustments (new_params, param_count,
3758 ifs->m_returns_value && ifs->m_return_ignored));
3759
3760 if (dump_file && (dump_flags & TDF_DETAILS))
3761 {
3762 fprintf (dump_file, "\n Created adjustments:\n");
3763 new_adjustments->dump (dump_file);
3764 }
3765
3766 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
3767 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
3768 node->decl)));
265af872 3769 auto_vec<cgraph_edge *> callers = node->collect_callers ();
ff6686d2
MJ
3770 cgraph_node *new_node
3771 = node->create_virtual_clone (callers, NULL, new_adjustments, "isra",
3772 suffix_counter);
3773 suffix_counter++;
b90061c6
MJ
3774 if (node->calls_comdat_local && node->same_comdat_group)
3775 {
3776 new_node->add_to_same_comdat_group (node);
3777 new_node->call_for_symbol_and_aliases (mark_callers_calls_comdat_local,
3778 NULL, true);
3779 }
ed649cda 3780 new_node->calls_comdat_local = node->calls_comdat_local;
ff6686d2
MJ
3781
3782 if (dump_file)
3783 fprintf (dump_file, " Created new node %s\n", new_node->dump_name ());
3784 callers.release ();
3785}
3786
3787/* Check which parameters of NODE described by IFS have survived until IPA-SRA
3788 and disable transformations for those which have not or which should not
3789 transformed because the associated debug counter reached its limit. Return
3790 true if none survived or if there were no candidates to begin with. */
3791
3792static bool
3793disable_unavailable_parameters (cgraph_node *node, isra_func_summary *ifs)
3794{
3795 bool ret = true;
3796 unsigned len = vec_safe_length (ifs->m_parameters);
3797 if (!len)
3798 return true;
3799
3800 auto_vec<bool, 16> surviving_params;
3801 bool check_surviving = false;
ae7a23a3
JH
3802 clone_info *cinfo = clone_info::get (node);
3803 if (cinfo && cinfo->param_adjustments)
ff6686d2
MJ
3804 {
3805 check_surviving = true;
ae7a23a3 3806 cinfo->param_adjustments->get_surviving_params (&surviving_params);
ff6686d2
MJ
3807 }
3808 bool dumped_first = false;
3809 for (unsigned i = 0; i < len; i++)
3810 {
3811 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3812 if (!dbg_cnt (ipa_sra_params))
3813 {
3814 desc->locally_unused = false;
3815 desc->split_candidate = false;
3816 }
3817 else if (check_surviving
3818 && (i >= surviving_params.length ()
3819 || !surviving_params[i]))
3820 {
3821 /* Even if the parameter was removed by a previous IPA pass, we do
3822 not clear locally_unused because if it really is unused, this
3823 information might be useful in callers. */
3824 desc->split_candidate = false;
3825
3826 if (dump_file && (dump_flags & TDF_DETAILS))
3827 {
3828 if (!dumped_first)
3829 {
3830 fprintf (dump_file,
3831 "The following parameters of %s are dead on "
3832 "arrival:", node->dump_name ());
3833 dumped_first = true;
3834 }
3835 fprintf (dump_file, " %u", i);
3836 }
3837 }
3838 else if (desc->locally_unused || desc->split_candidate)
3839 ret = false;
3840 }
3841
3842 if (dumped_first)
3843 fprintf (dump_file, "\n");
3844
3845 return ret;
3846}
3847
3848
3849/* Run the interprocedural part of IPA-SRA. */
3850
3851static unsigned int
3852ipa_sra_analysis (void)
3853{
3854 if (dump_file)
3855 {
3856 fprintf (dump_file, "\n========== IPA-SRA IPA stage ==========\n");
3857 ipa_sra_dump_all_summaries (dump_file);
3858 }
3859
3860 gcc_checking_assert (func_sums);
3861 gcc_checking_assert (call_sums);
3862 cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
3863 auto_vec <cgraph_node *, 16> stack;
3864 int node_scc_count = ipa_reduced_postorder (order, true, NULL);
3865
3866 /* One sweep from callees to callers for parameter removal and splitting. */
3867 for (int i = 0; i < node_scc_count; i++)
3868 {
3869 cgraph_node *scc_rep = order[i];
3870 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3871 unsigned j;
3872
3873 /* Preliminary IPA function level checks and first step of parameter
3874 removal. */
3875 cgraph_node *v;
3876 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3877 {
3878 isra_func_summary *ifs = func_sums->get (v);
3879 if (!ifs || !ifs->m_candidate)
3880 continue;
3881 if (!ipa_sra_ipa_function_checks (v)
3882 || check_all_callers_for_issues (v))
3883 {
3884 ifs->zap ();
3885 continue;
3886 }
3887 if (disable_unavailable_parameters (v, ifs))
3888 continue;
3889 for (cgraph_edge *cs = v->indirect_calls; cs; cs = cs->next_callee)
3890 process_edge_to_unknown_caller (cs);
3891 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3892 if (!ipa_edge_within_scc (cs))
3893 param_removal_cross_scc_edge (cs);
3894 }
3895
6b6a8065
JJ
3896 /* Look at edges within the current SCC and propagate used-ness across
3897 them, pushing onto the stack all notes which might need to be
3898 revisited. */
ff6686d2
MJ
3899 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3900 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3901 &stack, true);
3902
3903 /* Keep revisiting and pushing until nothing changes. */
3904 while (!stack.is_empty ())
3905 {
3906 cgraph_node *v = stack.pop ();
3907 isra_func_summary *ifs = func_sums->get (v);
3908 gcc_checking_assert (ifs && ifs->m_queued);
3909 ifs->m_queued = false;
3910
3911 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3912 &stack, true);
3913 }
3914
3915 /* Parameter splitting. */
3916 bool repeat_scc_access_propagation;
3917 do
3918 {
3919 repeat_scc_access_propagation = false;
3920 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3921 {
3922 isra_func_summary *ifs = func_sums->get (v);
3923 if (!ifs
3924 || !ifs->m_candidate
3925 || vec_safe_is_empty (ifs->m_parameters))
3926 continue;
3927 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3928 if (param_splitting_across_edge (cs))
3929 repeat_scc_access_propagation = true;
3930 }
3931 }
3932 while (repeat_scc_access_propagation);
3933
3934 if (flag_checking)
3935 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3936 verify_splitting_accesses (v, true);
3937
3938 cycle_nodes.release ();
3939 }
3940
3941 /* One sweep from caller to callees for result removal. */
3942 for (int i = node_scc_count - 1; i >= 0 ; i--)
3943 {
3944 cgraph_node *scc_rep = order[i];
3945 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3946 unsigned j;
3947
3948 cgraph_node *v;
3949 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3950 {
3951 isra_func_summary *ifs = func_sums->get (v);
3952 if (!ifs || !ifs->m_candidate)
3953 continue;
3954
3955 bool return_needed
3956 = (ifs->m_returns_value
3957 && (!dbg_cnt (ipa_sra_retvalues)
3958 || v->call_for_symbol_and_aliases (retval_used_p,
3959 NULL, true)));
3960 ifs->m_return_ignored = !return_needed;
3961 if (return_needed)
3962 isra_push_node_to_stack (v, ifs, &stack);
3963 }
3964
3965 while (!stack.is_empty ())
3966 {
3967 cgraph_node *node = stack.pop ();
3968 isra_func_summary *ifs = func_sums->get (node);
3969 gcc_checking_assert (ifs && ifs->m_queued);
3970 ifs->m_queued = false;
3971
3972 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
3973 if (ipa_edge_within_scc (cs)
3974 && call_sums->get (cs)->m_return_returned)
3975 {
3976 enum availability av;
3977 cgraph_node *callee = cs->callee->function_symbol (&av);
3978 isra_func_summary *to_ifs = func_sums->get (callee);
3979 if (to_ifs && to_ifs->m_return_ignored)
3980 {
3981 to_ifs->m_return_ignored = false;
3982 isra_push_node_to_stack (callee, to_ifs, &stack);
3983 }
3984 }
3985 }
3986 cycle_nodes.release ();
3987 }
3988
3989 ipa_free_postorder_info ();
3990 free (order);
3991
3992 if (dump_file)
3993 {
3994 if (dump_flags & TDF_DETAILS)
3995 {
3996 fprintf (dump_file, "\n========== IPA-SRA propagation final state "
3997 " ==========\n");
3998 ipa_sra_dump_all_summaries (dump_file);
3999 }
4000 fprintf (dump_file, "\n========== IPA-SRA decisions ==========\n");
4001 }
4002
4003 hash_map<const char *, unsigned> *clone_num_suffixes
4004 = new hash_map<const char *, unsigned>;
4005
4006 cgraph_node *node;
4007 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
4008 process_isra_node_results (node, clone_num_suffixes);
4009
4010 delete clone_num_suffixes;
ddf628e4 4011 ggc_delete (func_sums);
ff6686d2 4012 func_sums = NULL;
78cd68c0 4013 delete call_sums;
ff6686d2
MJ
4014 call_sums = NULL;
4015
4016 if (dump_file)
4017 fprintf (dump_file, "\n========== IPA SRA IPA analysis done "
4018 "==========\n\n");
4019 return 0;
4020}
4021
4022
4023const pass_data pass_data_ipa_sra =
4024{
4025 IPA_PASS, /* type */
4026 "sra", /* name */
4027 OPTGROUP_NONE, /* optinfo_flags */
4028 TV_IPA_SRA, /* tv_id */
4029 0, /* properties_required */
4030 0, /* properties_provided */
4031 0, /* properties_destroyed */
4032 0, /* todo_flags_start */
4033 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
4034};
4035
4036class pass_ipa_sra : public ipa_opt_pass_d
4037{
4038public:
4039 pass_ipa_sra (gcc::context *ctxt)
4040 : ipa_opt_pass_d (pass_data_ipa_sra, ctxt,
4041 ipa_sra_generate_summary, /* generate_summary */
4042 ipa_sra_write_summary, /* write_summary */
4043 ipa_sra_read_summary, /* read_summary */
4044 NULL , /* write_optimization_summary */
4045 NULL, /* read_optimization_summary */
4046 NULL, /* stmt_fixup */
4047 0, /* function_transform_todo_flags_start */
4048 NULL, /* function_transform */
4049 NULL) /* variable_transform */
4050 {}
4051
4052 /* opt_pass methods: */
4053 virtual bool gate (function *)
4054 {
4055 /* TODO: We should remove the optimize check after we ensure we never run
4056 IPA passes when not optimizing. */
4057 return (flag_ipa_sra && optimize);
4058 }
4059
4060 virtual unsigned int execute (function *) { return ipa_sra_analysis (); }
4061
4062}; // class pass_ipa_sra
4063
4064} // anon namespace
4065
40e67ab8
JH
4066/* Intraprocedural part of IPA-SRA analysis. Scan function body of NODE and
4067 create a summary structure describing IPA-SRA opportunities and constraints
4068 in it. */
4069
4070static void
4071ipa_sra_summarize_function (cgraph_node *node)
4072{
4073 if (dump_file)
4074 fprintf (dump_file, "Creating summary for %s/%i:\n", node->name (),
4075 node->order);
4076 if (!ipa_sra_preliminary_function_checks (node))
717d278a
MJ
4077 {
4078 isra_analyze_all_outgoing_calls (node);
4079 return;
4080 }
40e67ab8
JH
4081 gcc_obstack_init (&gensum_obstack);
4082 isra_func_summary *ifs = func_sums->get_create (node);
4083 ifs->m_candidate = true;
4084 tree ret = TREE_TYPE (TREE_TYPE (node->decl));
4085 ifs->m_returns_value = (TREE_CODE (ret) != VOID_TYPE);
4086
4087 decl2desc = new hash_map<tree, gensum_param_desc *>;
4088 unsigned count = 0;
4089 for (tree parm = DECL_ARGUMENTS (node->decl); parm; parm = DECL_CHAIN (parm))
4090 count++;
4091
4092 if (count > 0)
4093 {
4094 auto_vec<gensum_param_desc, 16> param_descriptions (count);
4095 param_descriptions.reserve_exact (count);
4096 param_descriptions.quick_grow_cleared (count);
4097
4098 bool cfun_pushed = false;
4099 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
4100 if (create_parameter_descriptors (node, &param_descriptions))
4101 {
4102 push_cfun (fun);
4103 cfun_pushed = true;
4104 final_bbs = BITMAP_ALLOC (NULL);
4105 bb_dereferences = XCNEWVEC (HOST_WIDE_INT,
4106 by_ref_count
4107 * last_basic_block_for_fn (fun));
4108 aa_walking_limit = opt_for_fn (node->decl, param_ipa_max_aa_steps);
4109 scan_function (node, fun);
4110
4111 if (dump_file)
4112 {
4113 dump_gensum_param_descriptors (dump_file, node->decl,
4114 &param_descriptions);
4115 fprintf (dump_file, "----------------------------------------\n");
4116 }
4117 }
4118 process_scan_results (node, fun, ifs, &param_descriptions);
4119
4120 if (cfun_pushed)
4121 pop_cfun ();
4122 if (bb_dereferences)
4123 {
4124 free (bb_dereferences);
4125 bb_dereferences = NULL;
4126 BITMAP_FREE (final_bbs);
4127 final_bbs = NULL;
4128 }
4129 }
4130 isra_analyze_all_outgoing_calls (node);
4131
4132 delete decl2desc;
4133 decl2desc = NULL;
4134 obstack_free (&gensum_obstack, NULL);
4135 if (dump_file)
4136 fprintf (dump_file, "\n\n");
4137 if (flag_checking)
4138 verify_splitting_accesses (node, false);
4139 return;
4140}
4141
ff6686d2
MJ
4142ipa_opt_pass_d *
4143make_pass_ipa_sra (gcc::context *ctxt)
4144{
4145 return new pass_ipa_sra (ctxt);
4146}
4147
4148
4149#include "gt-ipa-sra.h"